42 const std::string type_name =
"multi_sample_dataset";
43 const auto& dmap = internal_json::extract_typed_object_from_metadata(metadata.
other, type_name);
45 const std::string version_name =
"version";
46 const std::string& vstring = internal_json::extract_string_from_typed_object(dmap, version_name, type_name);
47 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
48 if (version.major != 1) {
49 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
53 auto sd_path = path /
"sample_data";
56 throw std::runtime_error(
"object in 'sample_data' should satisfy the 'DATA_FRAME' interface");
60 }
catch (std::exception& e) {
61 throw std::runtime_error(
"failed to validate 'sample_data'; " + std::string(e.what()));
66 std::vector<size_t> num_columns;
67 auto edir = path /
"experiments";
68 if (std::filesystem::exists(edir)) {
69 size_t num_experiments = internal_summarized_experiment::check_names_json(edir);
70 num_columns.reserve(num_experiments);
72 for (
size_t e = 0; e < num_experiments; ++e) {
73 auto ename = std::to_string(e);
74 auto epath = edir / ename;
78 throw std::runtime_error(
"object in 'experiments/" + ename +
"' should satisfy the 'SUMMARIZED_EXPERIMENT' interface");
83 }
catch (std::exception& e) {
84 throw std::runtime_error(
"failed to validate 'experiments/" + ename +
"'; " + std::string(e.what()));
88 num_columns.push_back(dims[1]);
91 size_t num_dir_obj = internal_other::count_directory_entries(edir);
92 if (num_dir_obj - 1 != num_experiments) {
93 throw std::runtime_error(
"more objects than expected inside the 'experiments' subdirectory");
98 if (num_columns.size() > 0) {
100 auto handle = ritsuko::hdf5::open_file(path /
"sample_map.h5");
101 auto ghandle = ritsuko::hdf5::open_group(handle, type_name.c_str());
103 for (
size_t e = 0, end = num_columns.size(); e < end; ++e) {
104 auto ename = std::to_string(e);
105 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, ename.c_str());
106 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 64,
false)) {
107 throw std::runtime_error(
"'multi_sample_dataset/" + ename +
"' should have a datatype that fits into a 64-bit unsigned integer");
110 auto len = ritsuko::hdf5::get_1d_length(dhandle.getSpace(),
false);
111 if (len != num_columns[e]) {
112 throw std::runtime_error(
"length of 'multi_sample_dataset/" + ename +
"' should equal the number of columns of 'experiments/" + ename +
"'");
115 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> stream(&dhandle, len, options.
hdf5_buffer_size);
116 for (hsize_t i = 0; i < len; ++i, stream.next()) {
117 auto x = stream.get();
118 if (
static_cast<size_t>(x) >= num_samples) {
119 throw std::runtime_error(
"indices in 'multi_sample_dataset/" + ename +
"' should be less than the number of samples");
124 if (num_columns.size() != ghandle.getNumObjs()) {
125 throw std::runtime_error(
"more objects present in the 'multi_sample_dataset' group than expected");
127 }
catch (std::exception& e) {
128 throw std::runtime_error(
"failed to validate the sample mapping; " + std::string(e.what()));
132 internal_other::validate_metadata(path,
"other_data", options);
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69