1#ifndef TAKANE_SPATIAL_EXPERIMENT_HPP
2#define TAKANE_SPATIAL_EXPERIMENT_HPP
4#include "ritsuko/hdf5/hdf5.hpp"
8#include "utils_factor.hpp"
10#include "utils_other.hpp"
11#include "utils_files.hpp"
15#include <unordered_set>
30bool derived_from(
const std::string&,
const std::string&,
const Options& options);
31void validate(
const std::filesystem::path&,
const ObjectMetadata&, Options& options);
41namespace spatial_experiment {
48inline void validate_coordinates(
const std::filesystem::path& path,
size_t ncols, Options& options) {
49 auto coord_path = path /
"coordinates";
51 if (!
derived_from(coord_meta.type,
"dense_array", options)) {
52 throw std::runtime_error(
"'coordinates' should be a dense array");
59 }
catch (std::exception& e) {
60 throw std::runtime_error(
"failed to validate 'coordinates'; " + std::string(e.what()));
64 if (cdims.size() != 2) {
65 throw std::runtime_error(
"'coordinates' should be a 2-dimensional dense array");
66 }
else if (cdims[1] != 2 && cdims[1] != 3) {
67 throw std::runtime_error(
"'coordinates' should have 2 or 3 columns");
68 }
else if (cdims[0] != ncols) {
69 throw std::runtime_error(
"number of rows in 'coordinates' should equal the number of columns in the 'spatial_experiment'");
73 auto handle = ritsuko::hdf5::open_file(coord_path /
"array.h5");
74 auto ghandle = ritsuko::hdf5::open_group(handle,
"dense_array");
75 auto dhandle = ritsuko::hdf5::open_dataset(ghandle,
"data");
76 auto dclass = dhandle.getTypeClass();
77 if (dclass != H5T_INTEGER && dclass != H5T_FLOAT) {
78 throw std::runtime_error(
"values in 'coordinates' should be numeric");
82inline void validate_image(
const std::filesystem::path& path,
size_t i,
const std::string& format, Options& options,
const ritsuko::Version& version) {
83 auto ipath = path / std::to_string(i);
84 if (format ==
"PNG") {
86 image_file::internal::validate_png(ipath);
87 }
else if (format ==
"TIFF") {
89 image_file::internal::validate_tiff(ipath);
90 }
else if (format ==
"OTHER" && version.ge(1, 1, 0)) {
93 throw std::runtime_error(
"object in '" + ipath.string() +
"' should satisfy the 'IMAGE' interface");
97 throw std::runtime_error(
"image format '" + format +
"' is not currently supported");
101inline void validate_images(
const std::filesystem::path& path,
size_t ncols, Options& options,
const ritsuko::Version& version) {
102 auto image_dir = path /
"images";
103 if (!std::filesystem::exists(image_dir) && version.ge(1, 2, 0)) {
108 auto mappath = image_dir /
"mapping.h5";
109 auto ihandle = ritsuko::hdf5::open_file(mappath);
110 auto ghandle = ritsuko::hdf5::open_group(ihandle,
"spatial_experiment");
112 std::vector<std::string> image_formats;
115 struct SampleMapMessenger {
116 static std::string level() {
return "sample name"; }
117 static std::string levels() {
return "sample names"; }
118 static std::string codes() {
return "sample assignments"; }
121 auto num_samples = internal_factor::validate_factor_levels<SampleMapMessenger>(ghandle,
"sample_names", options.hdf5_buffer_size);
122 auto num_codes = internal_factor::validate_factor_codes<SampleMapMessenger>(ghandle,
"column_samples", num_samples, options.hdf5_buffer_size,
true);
123 if (num_codes != ncols) {
124 throw std::runtime_error(
"length of 'column_samples' should equal the number of columns in the spatial experiment");
128 auto sample_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_samples");
129 if (ritsuko::hdf5::exceeds_integer_limit(sample_handle, 64,
false)) {
130 throw std::runtime_error(
"expected a datatype for 'image_samples' that fits in a 64-bit unsigned integer");
132 auto num_images = ritsuko::hdf5::get_1d_length(sample_handle.getSpace(),
false);
134 auto id_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_ids");
135 if (!ritsuko::hdf5::is_utf8_string(id_handle)) {
136 throw std::runtime_error(
"expected 'image_ids' to have a datatype that can be represented by a UTF-8 encoded string");
138 if (ritsuko::hdf5::get_1d_length(id_handle.getSpace(),
false) != num_images) {
139 throw std::runtime_error(
"expected 'image_ids' to have the same length as 'image_samples'");
142 auto scale_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_scale_factors");
143 if (ritsuko::hdf5::exceeds_float_limit(scale_handle, 64)) {
144 throw std::runtime_error(
"expected a datatype for 'image_scale_factors' that fits in a 64-bit float");
146 if (ritsuko::hdf5::get_1d_length(scale_handle.getSpace(),
false) != num_images) {
147 throw std::runtime_error(
"expected 'image_scale_factors' to have the same length as 'image_samples'");
150 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> sample_stream(&sample_handle, num_images, options.hdf5_buffer_size);
151 ritsuko::hdf5::Stream1dStringDataset id_stream(&id_handle, num_images, options.hdf5_buffer_size);
152 ritsuko::hdf5::Stream1dNumericDataset<double> scale_stream(&scale_handle, num_images, options.hdf5_buffer_size);
153 std::vector<std::unordered_set<std::string> > collected(num_samples);
155 for (hsize_t i = 0; i < num_images; ++i) {
156 auto sample = sample_stream.get();
157 if (sample >= num_samples) {
158 throw std::runtime_error(
"entries of 'image_samples' should be less than the number of samples");
160 sample_stream.next();
162 auto& present = collected[sample];
163 auto id = id_stream.steal();
164 if (present.find(
id) != present.end()) {
165 throw std::runtime_error(
"'image_ids' contains duplicated image IDs for the same sample + ('" +
id +
"')");
167 present.insert(std::move(
id));
170 auto sc = scale_stream.get();
171 if (!std::isfinite(sc) || sc <= 0) {
172 throw std::runtime_error(
"entries of 'image_scale_factors' should be finite and positive");
177 if (version.ge(1, 3, 0) && !ghandle.exists(
"image_formats")) {
178 image_formats.resize(num_images,
"OTHER");
181 auto format_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_formats");
182 if (!ritsuko::hdf5::is_utf8_string(format_handle)) {
183 throw std::runtime_error(
"expected 'image_formats' to have a datatype that can be represented by a UTF-8 encoded string");
185 if (ritsuko::hdf5::get_1d_length(format_handle.getSpace(),
false) != num_images) {
186 throw std::runtime_error(
"expected 'image_formats' to have the same length as 'image_samples'");
188 image_formats.reserve(num_images);
190 ritsuko::hdf5::Stream1dStringDataset format_stream(&format_handle, num_images, options.hdf5_buffer_size);
192 for (hsize_t i = 0; i < num_images; ++i) {
193 auto fmt = format_stream.steal();
194 image_formats.push_back(std::move(fmt));
195 format_stream.next();
199 for (
const auto& x : collected) {
201 throw std::runtime_error(
"each sample should map to one or more images in 'image_samples'");
205 }
catch (std::exception& e) {
206 throw std::runtime_error(
"failed to validate '" + mappath.string() +
"'; " + std::string(e.what()));
210 size_t num_images = image_formats.size();
211 for (
size_t i = 0; i < num_images; ++i) {
212 validate_image(image_dir, i, image_formats[i], options, version);
215 size_t num_dir_obj = internal_other::count_directory_entries(image_dir);
216 if (num_dir_obj - 1 != num_images) {
217 throw std::runtime_error(
"more objects than expected inside the 'images' subdirectory");
234 const std::string type_name =
"spatial_experiment";
235 const std::string& vstring = internal_json::extract_version_for_type(metadata.
other, type_name);
236 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
237 if (version.major != 1) {
238 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
242 internal::validate_coordinates(path, dims[1], options);
243 internal::validate_images(path, dims[1], options, version);
Validation for standard image files.
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition spatial_experiment.hpp:231
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:136
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:68
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:109
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
bool derived_from(const std::string &type, const std::string &base, const Options &options)
Definition _derived_from.hpp:80
Validation for single cell experiments.
Validation options.
Definition utils_public.hpp:94