1#ifndef TAKANE_SPATIAL_EXPERIMENT_HPP
2#define TAKANE_SPATIAL_EXPERIMENT_HPP
4#include "ritsuko/hdf5/hdf5.hpp"
7#include "utils_factor.hpp"
9#include "utils_other.hpp"
10#include "utils_files.hpp"
14#include <unordered_set>
29bool derived_from(
const std::string&,
const std::string&,
const Options& options);
30void validate(
const std::filesystem::path&,
const ObjectMetadata&, Options& options);
40namespace spatial_experiment {
47inline void validate_coordinates(
const std::filesystem::path& path,
size_t ncols, Options& options) {
48 auto coord_path = path /
"coordinates";
50 if (!
derived_from(coord_meta.type,
"dense_array", options)) {
51 throw std::runtime_error(
"'coordinates' should be a dense array");
58 }
catch (std::exception& e) {
59 throw std::runtime_error(
"failed to validate 'coordinates'; " + std::string(e.what()));
63 if (cdims.size() != 2) {
64 throw std::runtime_error(
"'coordinates' should be a 2-dimensional dense array");
65 }
else if (cdims[1] != 2 && cdims[1] != 3) {
66 throw std::runtime_error(
"'coordinates' should have 2 or 3 columns");
67 }
else if (cdims[0] != ncols) {
68 throw std::runtime_error(
"number of rows in 'coordinates' should equal the number of columns in the 'spatial_experiment'");
72 auto handle = ritsuko::hdf5::open_file(coord_path /
"array.h5");
73 auto ghandle = ritsuko::hdf5::open_group(handle,
"dense_array");
74 auto dhandle = ritsuko::hdf5::open_dataset(ghandle,
"data");
75 auto dclass = dhandle.getTypeClass();
76 if (dclass != H5T_INTEGER && dclass != H5T_FLOAT) {
77 throw std::runtime_error(
"values in 'coordinates' should be numeric");
81inline void validate_image(
const std::filesystem::path& path,
size_t i,
const std::string& format, Options& options,
const ritsuko::Version& version) {
82 auto ipath = path / std::to_string(i);
84 if (format ==
"PNG") {
87 std::array<unsigned char, 8> expected { 137, 80, 78, 71, 13, 10, 26, 10 };
88 internal_files::check_signature(ipath, expected.data(), expected.size(),
"PNG");
90 }
else if (format ==
"TIFF") {
92 std::array<unsigned char, 4> observed;
93 internal_files::extract_signature(ipath, observed.data(), observed.size());
95 std::array<unsigned char, 4> iisig = { 0x49, 0x49, 0x2A, 0x00 };
96 std::array<unsigned char, 4> mmsig = { 0x4D, 0x4D, 0x00, 0x2A };
97 if (observed != iisig && observed != mmsig) {
98 throw std::runtime_error(
"incorrect TIFF file signature for '" + ipath.string() +
"'");
101 }
else if (format ==
"OTHER" && version.ge(1, 1, 0)) {
104 throw std::runtime_error(
"object in '" + ipath.string() +
"' should satisfy the 'IMAGE' interface");
109 throw std::runtime_error(
"image format '" + format +
"' is not currently supported");
113inline void validate_images(
const std::filesystem::path& path,
size_t ncols, Options& options,
const ritsuko::Version& version) {
114 auto image_dir = path /
"images";
115 if (!std::filesystem::exists(image_dir) && version.ge(1, 2, 0)) {
120 auto mappath = image_dir /
"mapping.h5";
121 auto ihandle = ritsuko::hdf5::open_file(mappath);
122 auto ghandle = ritsuko::hdf5::open_group(ihandle,
"spatial_experiment");
124 std::vector<std::string> image_formats;
126 struct SampleMapMessenger {
127 static std::string level() {
return "sample name"; }
128 static std::string levels() {
return "sample names"; }
129 static std::string codes() {
return "sample assignments"; }
132 auto num_samples = internal_factor::validate_factor_levels<SampleMapMessenger>(ghandle,
"sample_names", options.hdf5_buffer_size);
133 auto num_codes = internal_factor::validate_factor_codes<SampleMapMessenger>(ghandle,
"column_samples", num_samples, options.hdf5_buffer_size,
true);
134 if (num_codes != ncols) {
135 throw std::runtime_error(
"length of 'column_samples' should equal the number of columns in the spatial experiment");
139 auto sample_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_samples");
140 if (ritsuko::hdf5::exceeds_integer_limit(sample_handle, 64,
false)) {
141 throw std::runtime_error(
"expected a datatype for 'image_samples' that fits in a 64-bit unsigned integer");
143 auto num_images = ritsuko::hdf5::get_1d_length(sample_handle.getSpace(),
false);
145 auto id_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_ids");
146 if (!ritsuko::hdf5::is_utf8_string(id_handle)) {
147 throw std::runtime_error(
"expected 'image_ids' to have a datatype that can be represented by a UTF-8 encoded string");
149 if (ritsuko::hdf5::get_1d_length(id_handle.getSpace(),
false) != num_images) {
150 throw std::runtime_error(
"expected 'image_ids' to have the same length as 'image_samples'");
153 auto scale_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_scale_factors");
154 if (ritsuko::hdf5::exceeds_float_limit(scale_handle, 64)) {
155 throw std::runtime_error(
"expected a datatype for 'image_scale_factors' that fits in a 64-bit float");
157 if (ritsuko::hdf5::get_1d_length(scale_handle.getSpace(),
false) != num_images) {
158 throw std::runtime_error(
"expected 'image_scale_factors' to have the same length as 'image_samples'");
161 auto format_handle = ritsuko::hdf5::open_dataset(ghandle,
"image_formats");
162 if (!ritsuko::hdf5::is_utf8_string(format_handle)) {
163 throw std::runtime_error(
"expected 'image_formats' to have a datatype that can be represented by a UTF-8 encoded string");
165 if (ritsuko::hdf5::get_1d_length(format_handle.getSpace(),
false) != num_images) {
166 throw std::runtime_error(
"expected 'image_formats' to have the same length as 'image_samples'");
169 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> sample_stream(&sample_handle, num_images, options.hdf5_buffer_size);
170 ritsuko::hdf5::Stream1dStringDataset id_stream(&id_handle, num_images, options.hdf5_buffer_size);
171 ritsuko::hdf5::Stream1dNumericDataset<double> scale_stream(&scale_handle, num_images, options.hdf5_buffer_size);
172 ritsuko::hdf5::Stream1dStringDataset format_stream(&format_handle, num_images, options.hdf5_buffer_size);
173 std::vector<std::unordered_set<std::string> > collected(num_samples);
174 image_formats.reserve(num_images);
176 for (hsize_t i = 0; i < num_images; ++i) {
177 auto sample = sample_stream.get();
178 if (sample >= num_samples) {
179 throw std::runtime_error(
"entries of 'image_samples' should be less than the number of samples");
181 sample_stream.next();
183 auto& present = collected[sample];
184 auto id = id_stream.steal();
185 if (present.find(
id) != present.end()) {
186 throw std::runtime_error(
"'image_ids' contains duplicated image IDs for the same sample + ('" +
id +
"')");
188 present.insert(std::move(
id));
191 auto sc = scale_stream.get();
192 if (!std::isfinite(sc) || sc <= 0) {
193 throw std::runtime_error(
"entries of 'image_scale_factors' should be finite and positive");
197 auto fmt = format_stream.steal();
198 image_formats.push_back(std::move(fmt));
199 format_stream.next();
202 for (
const auto& x : collected) {
204 throw std::runtime_error(
"each sample should map to one or more images in 'image_samples'");
208 }
catch (std::exception& e) {
209 throw std::runtime_error(
"failed to validate '" + mappath.string() +
"'; " + std::string(e.what()));
213 size_t num_images = image_formats.size();
214 for (
size_t i = 0; i < num_images; ++i) {
215 validate_image(image_dir, i, image_formats[i], options, version);
218 size_t num_dir_obj = internal_other::count_directory_entries(image_dir);
219 if (num_dir_obj - 1 != num_images) {
220 throw std::runtime_error(
"more objects than expected inside the 'images' subdirectory");
237 const std::string& vstring = internal_json::extract_version_for_type(metadata.
other,
"spatial_experiment");
238 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
239 if (version.major != 1) {
240 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
244 internal::validate_coordinates(path, dims[1], options);
245 internal::validate_images(path, dims[1], options, version);
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition spatial_experiment.hpp:234
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:133
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
bool derived_from(const std::string &type, const std::string &base, const Options &options)
Definition _derived_from.hpp:80
Validation for single cell experiments.
Validation options.
Definition utils_public.hpp:94