takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
delayed_array.hpp
Go to the documentation of this file.
1#ifndef TAKANE_DELAYED_ARRAY_HPP
2#define TAKANE_DELAYED_ARRAY_HPP
3
4#include "ritsuko/hdf5/hdf5.hpp"
5#include "ritsuko/ritsuko.hpp"
6#include "chihaya/chihaya.hpp"
7
8#include "utils_public.hpp"
9#include "utils_other.hpp"
10
11#include <vector>
12#include <string>
13#include <stdexcept>
14#include <filesystem>
15#include <cstdint>
16
22namespace takane {
23
27void validate(const std::filesystem::path&, const ObjectMetadata&, Options&);
28std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options&);
37namespace delayed_array {
38
42namespace internal {
43
44// For efficiency purposes, we just mutate the existing
45// 'options.delayed_array_options' rather than making a copy. In this case, we
46// need to set 'details_only' to either true or false, depending on whether we
47// want to do the full validation; it's important to subsequently reset it back
48// to its original setting in the destructor.
49struct DetailsOnlyResetter {
50 DetailsOnlyResetter(chihaya::Options& o) : options(o), old(options.details_only) {}
51 ~DetailsOnlyResetter() {
52 options.details_only = old;
53 }
54private:
55 chihaya::Options& options;
56 bool old;
57};
58
59}
69inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
70 const std::string type_name = "delayed_array"; // use a separate variable to avoid dangling reference warnings from GCC.
71 const auto& vstring = internal_json::extract_version_for_type(metadata.other, type_name);
72 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
73 if (version.major != 1) {
74 throw std::runtime_error("unsupported version '" + vstring + "'");
75 }
76
77 uint64_t max = 0;
78 {
79 std::string custom_name = "custom takane seed array";
80 auto& custom_options = options.delayed_array_options;
81 bool custom_found = (custom_options.array_validate_registry.find(custom_name) != custom_options.array_validate_registry.end());
82
83 // For efficiency purposes, we just mutate the existing
84 // 'options.delayed_array_options' rather than making a copy. We need
85 // to add a validator for the 'custom takane seed array' type, which
86 // checks for valid references to external arrays in 'seeds/'.
87 //
88 // Note that we respect any existing 'custom takane seed array' setting
89 // - possibly from recursive calls to 'delayed_array::validate()', but
90 // also possibly from user-provided overrides, in which case we assume
91 // that the caller really knows what they're doing.
92 //
93 // Anyway, all this means that we only mutate chihaya::Options if there
94 // is no existing custom takane function. However, if we do so, we need
95 // to restore the original state before function exit, hence the
96 // destructor for RAII-based clean-up.
97 struct ValidateResetter {
98 ValidateResetter(chihaya::Options& o, const std::string& n, bool f) : options(o), name(n), found(f) {}
99 ~ValidateResetter() {
100 if (!found) {
101 options.array_validate_registry.erase(name);
102 }
103 }
104 private:
105 chihaya::Options& options;
106 const std::string& name;
107 bool found;
108 };
109 [[maybe_unused]] ValidateResetter v(custom_options, custom_name, custom_found);
110
111 if (!custom_found) {
112 custom_options.array_validate_registry[custom_name] = [&](const H5::Group& handle, const ritsuko::Version& version, chihaya::Options& ch_options) -> chihaya::ArrayDetails {
113 auto details = chihaya::custom_array::validate(handle, version, ch_options);
114
115 auto dhandle = ritsuko::hdf5::open_dataset(handle, "index");
116 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 64, false)) {
117 throw std::runtime_error("'index' should have a datatype that fits into a 64-bit unsigned integer");
118 }
119
120 auto index = ritsuko::hdf5::load_scalar_numeric_dataset<uint64_t>(dhandle);
121 auto seed_path = path / "seeds" / std::to_string(index);
122 auto seed_meta = read_object_metadata(seed_path);
123 ::takane::validate(seed_path, seed_meta, options);
124
125 auto seed_dims = ::takane::dimensions(seed_path, seed_meta, options);
126 if (seed_dims.size() != details.dimensions.size()) {
127 throw std::runtime_error("dimensionality of 'seeds/" + std::to_string(index) + "' is not consistent with 'dimensions'");
128 }
129
130 for (size_t d = 0, ndims = seed_dims.size(); d < ndims; ++d) {
131 if (seed_dims[d] != details.dimensions[d]) {
132 throw std::runtime_error("dimension extents of 'seeds/" + std::to_string(index) + "' is not consistent with 'dimensions'");
133 }
134 }
135
136 if (index >= max) {
137 max = index + 1;
138 }
139 return details;
140 };
141 }
142
143 auto apath = path / "array.h5";
144 auto fhandle = ritsuko::hdf5::open_file(apath);
145 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
146 ritsuko::Version chihaya_version = chihaya::extract_version(ghandle);
147 if (chihaya_version.lt(1, 1, 0)) {
148 throw std::runtime_error("version of the chihaya specification should be no less than 1.1");
149 }
150
151 // Again, using RAII to reset the 'details_only' flag to its original
152 // state after we're done with it.
153 [[maybe_unused]] internal::DetailsOnlyResetter o(custom_options);
154 custom_options.details_only = false;
155
156 chihaya::validate(ghandle, chihaya_version, custom_options);
157 }
158
159 size_t found = 0;
160 auto seed_path = path / "seeds";
161 if (std::filesystem::exists(seed_path)) {
162 found = internal_other::count_directory_entries(seed_path);
163 }
164 if (max != found) {
165 throw std::runtime_error("number of objects in 'seeds' is not consistent with the number of 'index' references in 'array.h5'");
166 }
167}
168
175inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, Options& options) {
176 auto& chihaya_options = options.delayed_array_options;
177 [[maybe_unused]] internal::DetailsOnlyResetter o(chihaya_options);
178 chihaya_options.details_only = true;
179
180 auto apath = path / "array.h5";
181 auto fhandle = ritsuko::hdf5::open_file(apath);
182 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
183 auto output = chihaya::validate(ghandle, chihaya_options);
184 return output.dimensions[0];
185}
186
193inline std::vector<size_t> dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, Options& options) {
194 auto& chihaya_options = options.delayed_array_options;
195 [[maybe_unused]] internal::DetailsOnlyResetter o(chihaya_options);
196 chihaya_options.details_only = true;
197
198 auto apath = path / "array.h5";
199 auto fhandle = ritsuko::hdf5::open_file(apath);
200 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
201 auto output = chihaya::validate(ghandle, chihaya_options);
202 return std::vector<size_t>(output.dimensions.begin(), output.dimensions.end());
203}
204
205}
206
207}
208
209#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:69
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:193
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:175
takane validation functions.
Definition _derived_from.hpp:15
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
chihaya::Options delayed_array_options
Definition utils_public.hpp:234
Exported utilities.