takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
delayed_array.hpp
Go to the documentation of this file.
1#ifndef TAKANE_DELAYED_ARRAY_HPP
2#define TAKANE_DELAYED_ARRAY_HPP
3
4#include "ritsuko/hdf5/hdf5.hpp"
5#include "ritsuko/ritsuko.hpp"
6#include "chihaya/chihaya.hpp"
7
8#include "utils_public.hpp"
9#include "utils_other.hpp"
10
11#include <vector>
12#include <string>
13#include <stdexcept>
14#include <filesystem>
15#include <cstdint>
16
22namespace takane {
23
27void validate(const std::filesystem::path&, const ObjectMetadata&, Options&);
28std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options&);
37namespace delayed_array {
38
42namespace internal {
43
44// For efficiency purposes, we just mutate the existing
45// 'options.delayed_array_options' rather than making a copy. In this case, we
46// need to set 'details_only' to either true or false, depending on whether we
47// want to do the full validation; it's important to subsequently reset it back
48// to its original setting in the destructor.
49struct DetailsOnlyResetter {
50 DetailsOnlyResetter(chihaya::Options& o) : options(o), old(options.details_only) {}
51 ~DetailsOnlyResetter() {
52 options.details_only = old;
53 }
54private:
55 chihaya::Options& options;
56 bool old;
57};
58
59}
69inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
70 auto vstring = internal_json::extract_version_for_type(metadata.other, "delayed_array");
71 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
72 if (version.major != 1) {
73 throw std::runtime_error("unsupported version '" + vstring + "'");
74 }
75
76 uint64_t max = 0;
77 {
78 std::string custom_name = "custom takane seed array";
79 auto& custom_options = options.delayed_array_options;
80 bool custom_found = (custom_options.array_validate_registry.find(custom_name) != custom_options.array_validate_registry.end());
81
82 // For efficiency purposes, we just mutate the existing
83 // 'options.delayed_array_options' rather than making a copy. We need
84 // to add a validator for the 'custom takane seed array' type, which
85 // checks for valid references to external arrays in 'seeds/'.
86 //
87 // Note that we respect any existing 'custom takane seed array' setting
88 // - possibly from recursive calls to 'delayed_array::validate()', but
89 // also possibly from user-provided overrides, in which case we assume
90 // that the caller really knows what they're doing.
91 //
92 // Anyway, all this means that we only mutate chihaya::Options if there
93 // is no existing custom takane function. However, if we do so, we need
94 // to restore the original state before function exit, hence the
95 // destructor for RAII-based clean-up.
96 struct ValidateResetter {
97 ValidateResetter(chihaya::Options& o, const std::string& n, bool f) : options(o), name(n), found(f) {}
98 ~ValidateResetter() {
99 if (!found) {
100 options.array_validate_registry.erase(name);
101 }
102 }
103 private:
104 chihaya::Options& options;
105 const std::string& name;
106 bool found;
107 };
108 [[maybe_unused]] ValidateResetter v(custom_options, custom_name, custom_found);
109
110 if (!custom_found) {
111 custom_options.array_validate_registry[custom_name] = [&](const H5::Group& handle, const ritsuko::Version& version, chihaya::Options& ch_options) -> chihaya::ArrayDetails {
112 auto details = chihaya::custom_array::validate(handle, version, ch_options);
113
114 auto dhandle = ritsuko::hdf5::open_dataset(handle, "index");
115 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 64, false)) {
116 throw std::runtime_error("'index' should have a datatype that fits into a 64-bit unsigned integer");
117 }
118
119 auto index = ritsuko::hdf5::load_scalar_numeric_dataset<uint64_t>(dhandle);
120 auto seed_path = path / "seeds" / std::to_string(index);
121 auto seed_meta = read_object_metadata(seed_path);
122 ::takane::validate(seed_path, seed_meta, options);
123
124 auto seed_dims = ::takane::dimensions(seed_path, seed_meta, options);
125 if (seed_dims.size() != details.dimensions.size()) {
126 throw std::runtime_error("dimensionality of 'seeds/" + std::to_string(index) + "' is not consistent with 'dimensions'");
127 }
128
129 for (size_t d = 0, ndims = seed_dims.size(); d < ndims; ++d) {
130 if (seed_dims[d] != details.dimensions[d]) {
131 throw std::runtime_error("dimension extents of 'seeds/" + std::to_string(index) + "' is not consistent with 'dimensions'");
132 }
133 }
134
135 if (index >= max) {
136 max = index + 1;
137 }
138 return details;
139 };
140 }
141
142 auto apath = path / "array.h5";
143 auto fhandle = ritsuko::hdf5::open_file(apath);
144 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
145 ritsuko::Version chihaya_version = chihaya::extract_version(ghandle);
146 if (chihaya_version.lt(1, 1, 0)) {
147 throw std::runtime_error("version of the chihaya specification should be no less than 1.1");
148 }
149
150 // Again, using RAII to reset the 'details_only' flag to its original
151 // state after we're done with it.
152 [[maybe_unused]] internal::DetailsOnlyResetter o(custom_options);
153 custom_options.details_only = false;
154
155 chihaya::validate(ghandle, chihaya_version, custom_options);
156 }
157
158 size_t found = 0;
159 auto seed_path = path / "seeds";
160 if (std::filesystem::exists(seed_path)) {
161 found = internal_other::count_directory_entries(seed_path);
162 }
163 if (max != found) {
164 throw std::runtime_error("number of objects in 'seeds' is not consistent with the number of 'index' references in 'array.h5'");
165 }
166}
167
174inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, Options& options) {
175 auto& chihaya_options = options.delayed_array_options;
176 [[maybe_unused]] internal::DetailsOnlyResetter o(chihaya_options);
177 chihaya_options.details_only = true;
178
179 auto apath = path / "array.h5";
180 auto fhandle = ritsuko::hdf5::open_file(apath);
181 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
182 auto output = chihaya::validate(ghandle, chihaya_options);
183 return output.dimensions[0];
184}
185
192inline std::vector<size_t> dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, Options& options) {
193 auto& chihaya_options = options.delayed_array_options;
194 [[maybe_unused]] internal::DetailsOnlyResetter o(chihaya_options);
195 chihaya_options.details_only = true;
196
197 auto apath = path / "array.h5";
198 auto fhandle = ritsuko::hdf5::open_file(apath);
199 auto ghandle = ritsuko::hdf5::open_group(fhandle, "delayed_array");
200 auto output = chihaya::validate(ghandle, chihaya_options);
201 return std::vector<size_t>(output.dimensions.begin(), output.dimensions.end());
202}
203
204}
205
206}
207
208#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:69
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:192
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition delayed_array.hpp:174
takane validation functions.
Definition _derived_from.hpp:15
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
chihaya::Options delayed_array_options
Definition utils_public.hpp:234
Exported utilities.