takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
data_frame_factor.hpp
Go to the documentation of this file.
1#ifndef TAKANE_DATA_FRAME_FACTOR_HPP
2#define TAKANE_DATA_FRAME_FACTOR_HPP
3
4#include <string>
5#include <stdexcept>
6#include <filesystem>
7
8#include "ritsuko/hdf5/hdf5.hpp"
9
10#include "utils_public.hpp"
11#include "utils_string.hpp"
12#include "utils_factor.hpp"
13#include "utils_json.hpp"
14#include "utils_other.hpp"
15
21namespace takane {
22
26void validate(const std::filesystem::path&, const ObjectMetadata&, Options&);
27size_t height(const std::filesystem::path&, const ObjectMetadata&, Options&);
28bool satisfies_interface(const std::string&, const std::string&, const Options&);
37namespace data_frame_factor {
38
47inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
48 const auto& vstring = internal_json::extract_version_for_type(metadata.other, "data_frame_factor");
49 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
50 if (version.major != 1) {
51 throw std::runtime_error("unsupported version string '" + vstring + "'");
52 }
53
54 // Validating the levels.
55 auto lpath = path / "levels";
56 auto lmeta = read_object_metadata(lpath);
57 if (!satisfies_interface(lmeta.type, "DATA_FRAME", options)) {
58 throw std::runtime_error("expected 'levels' to be an object that satifies the 'DATA_FRAME' interface");
59 }
60
61 try {
62 ::takane::validate(lpath, lmeta, options);
63 } catch (std::exception& e) {
64 throw std::runtime_error("failed to validate 'levels'; " + std::string(e.what()));
65 }
66 size_t num_levels = ::takane::height(lpath, lmeta, options);
67
69 if (options.data_frame_factor_any_duplicated(lpath, lmeta, options)) {
70 throw std::runtime_error("'levels' should not contain duplicated rows");
71 }
72 }
73
74 auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
75 auto ghandle = ritsuko::hdf5::open_group(handle, "data_frame_factor");
76 size_t num_codes = internal_factor::validate_factor_codes(ghandle, "codes", num_levels, options.hdf5_buffer_size, /* allow_missing = */ false);
77
78 internal_other::validate_mcols(path, "element_annotations", num_codes, options);
79 internal_other::validate_metadata(path, "other_annotations", options);
80
81 internal_string::validate_names(ghandle, "names", num_codes, options.hdf5_buffer_size);
82}
83
90inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
91 auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
92 auto ghandle = handle.openGroup("data_frame_factor");
93 auto dhandle = ghandle.openDataSet("codes");
94 return ritsuko::hdf5::get_1d_length(dhandle.getSpace(), false);
95}
96
97}
98
99}
100
101#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition data_frame_factor.hpp:47
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition data_frame_factor.hpp:90
takane validation functions.
Definition _derived_from.hpp:15
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
hsize_t hdf5_buffer_size
Definition utils_public.hpp:103
std::function< bool(const std::filesystem::path &, const ObjectMetadata &, Options &options)> data_frame_factor_any_duplicated
Definition utils_public.hpp:191
Exported utilities.