42 const auto& dmap = internal_json::extract_typed_object_from_metadata(metadata.
other,
"multi_sample_dataset");
44 const std::string& vstring = internal_json::extract_string_from_typed_object(dmap,
"version",
"multi_sample_dataset");
45 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
46 if (version.major != 1) {
47 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
51 auto sd_path = path /
"sample_data";
54 throw std::runtime_error(
"object in 'sample_data' should satisfy the 'DATA_FRAME' interface");
58 }
catch (std::exception& e) {
59 throw std::runtime_error(
"failed to validate 'sample_data'; " + std::string(e.what()));
64 std::vector<size_t> num_columns;
65 auto edir = path /
"experiments";
66 if (std::filesystem::exists(edir)) {
67 size_t num_experiments = internal_summarized_experiment::check_names_json(edir);
68 num_columns.reserve(num_experiments);
70 for (
size_t e = 0; e < num_experiments; ++e) {
71 auto ename = std::to_string(e);
72 auto epath = edir / ename;
76 throw std::runtime_error(
"object in 'experiments/" + ename +
"' should satisfy the 'SUMMARIZED_EXPERIMENT' interface");
81 }
catch (std::exception& e) {
82 throw std::runtime_error(
"failed to validate 'experiments/" + ename +
"'; " + std::string(e.what()));
86 num_columns.push_back(dims[1]);
89 size_t num_dir_obj = internal_other::count_directory_entries(edir);
90 if (num_dir_obj - 1 != num_experiments) {
91 throw std::runtime_error(
"more objects than expected inside the 'experiments' subdirectory");
96 if (num_columns.size() > 0) {
98 auto handle = ritsuko::hdf5::open_file(path /
"sample_map.h5");
99 auto ghandle = ritsuko::hdf5::open_group(handle,
"multi_sample_dataset");
101 for (
size_t e = 0, end = num_columns.size(); e < end; ++e) {
102 auto ename = std::to_string(e);
103 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, ename.c_str());
104 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 64,
false)) {
105 throw std::runtime_error(
"'multi_sample_dataset/" + ename +
"' should have a datatype that fits into a 64-bit unsigned integer");
108 auto len = ritsuko::hdf5::get_1d_length(dhandle.getSpace(),
false);
109 if (len != num_columns[e]) {
110 throw std::runtime_error(
"length of 'multi_sample_dataset/" + ename +
"' should equal the number of columns of 'experiments/" + ename +
"'");
113 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> stream(&dhandle, len, options.
hdf5_buffer_size);
114 for (hsize_t i = 0; i < len; ++i, stream.next()) {
115 auto x = stream.get();
116 if (
static_cast<size_t>(x) >= num_samples) {
117 throw std::runtime_error(
"indices in 'multi_sample_dataset/" + ename +
"' should be less than the number of samples");
122 if (num_columns.size() != ghandle.getNumObjs()) {
123 throw std::runtime_error(
"more objects present in the 'multi_sample_dataset' group than expected");
125 }
catch (std::exception& e) {
126 throw std::runtime_error(
"failed to validate the sample mapping; " + std::string(e.what()));
130 internal_other::validate_metadata(path,
"other_data", options);
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69