takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
spatial_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SPATIAL_EXPERIMENT_HPP
2#define TAKANE_SPATIAL_EXPERIMENT_HPP
3
4#include "ritsuko/hdf5/hdf5.hpp"
5
7#include "image_file.hpp"
8#include "utils_factor.hpp"
9#include "utils_public.hpp"
10#include "utils_other.hpp"
11#include "utils_files.hpp"
12
13#include <filesystem>
14#include <stdexcept>
15#include <unordered_set>
16#include <string>
17#include <vector>
18#include <cmath>
19
25namespace takane {
26
30bool derived_from(const std::string&, const std::string&, const Options& options);
31void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
32bool satisfies_interface(const std::string&, const std::string&, const Options& options);
41namespace spatial_experiment {
42
46namespace internal {
47
48inline void validate_coordinates(const std::filesystem::path& path, size_t ncols, Options& options) {
49 auto coord_path = path / "coordinates";
50 auto coord_meta = read_object_metadata(coord_path);
51 if (!derived_from(coord_meta.type, "dense_array", options)) {
52 throw std::runtime_error("'coordinates' should be a dense array");
53 }
54
55 // Validating the coordinates; currently these must be a dense array of
56 // points, but could also be polygons/hulls in the future.
57 try {
58 ::takane::validate(coord_path, coord_meta, options);
59 } catch (std::exception& e) {
60 throw std::runtime_error("failed to validate 'coordinates'; " + std::string(e.what()));
61 }
62
63 auto cdims = ::takane::dimensions(coord_path, coord_meta, options);
64 if (cdims.size() != 2) {
65 throw std::runtime_error("'coordinates' should be a 2-dimensional dense array");
66 } else if (cdims[1] != 2 && cdims[1] != 3) {
67 throw std::runtime_error("'coordinates' should have 2 or 3 columns");
68 } else if (cdims[0] != ncols) {
69 throw std::runtime_error("number of rows in 'coordinates' should equal the number of columns in the 'spatial_experiment'");
70 }
71
72 // Checking that the values are numeric.
73 auto handle = ritsuko::hdf5::open_file(coord_path / "array.h5");
74 auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
75 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
76 auto dclass = dhandle.getTypeClass();
77 if (dclass != H5T_INTEGER && dclass != H5T_FLOAT) {
78 throw std::runtime_error("values in 'coordinates' should be numeric");
79 }
80}
81
82inline void validate_image(const std::filesystem::path& path, size_t i, const std::string& format, Options& options, const ritsuko::Version& version) {
83 auto ipath = path / std::to_string(i);
84 if (format == "PNG") {
85 ipath += ".png";
86 image_file::internal::validate_png(ipath);
87 } else if (format == "TIFF") {
88 ipath += ".tif";
89 image_file::internal::validate_tiff(ipath);
90 } else if (format == "OTHER" && version.ge(1, 1, 0)) {
91 auto imeta = read_object_metadata(ipath);
92 if (!satisfies_interface(imeta.type, "IMAGE", options)) {
93 throw std::runtime_error("object in '" + ipath.string() + "' should satisfy the 'IMAGE' interface");
94 }
95 ::takane::validate(ipath, imeta, options);
96 } else {
97 throw std::runtime_error("image format '" + format + "' is not currently supported");
98 }
99}
100
101inline void validate_images(const std::filesystem::path& path, size_t ncols, Options& options, const ritsuko::Version& version) {
102 auto image_dir = path / "images";
103 if (!std::filesystem::exists(image_dir) && version.ge(1, 2, 0)) {
104 // No images at all, which is permitted.
105 return;
106 }
107
108 auto mappath = image_dir / "mapping.h5";
109 auto ihandle = ritsuko::hdf5::open_file(mappath);
110 auto ghandle = ritsuko::hdf5::open_group(ihandle, "spatial_experiment");
111
112 std::vector<std::string> image_formats;
113
114 try {
115 struct SampleMapMessenger {
116 static std::string level() { return "sample name"; }
117 static std::string levels() { return "sample names"; }
118 static std::string codes() { return "sample assignments"; }
119 };
120
121 auto num_samples = internal_factor::validate_factor_levels<SampleMapMessenger>(ghandle, "sample_names", options.hdf5_buffer_size);
122 auto num_codes = internal_factor::validate_factor_codes<SampleMapMessenger>(ghandle, "column_samples", num_samples, options.hdf5_buffer_size, true);
123 if (num_codes != ncols) {
124 throw std::runtime_error("length of 'column_samples' should equal the number of columns in the spatial experiment");
125 }
126
127 // Scanning through the image information.
128 auto sample_handle = ritsuko::hdf5::open_dataset(ghandle, "image_samples");
129 if (ritsuko::hdf5::exceeds_integer_limit(sample_handle, 64, false)) {
130 throw std::runtime_error("expected a datatype for 'image_samples' that fits in a 64-bit unsigned integer");
131 }
132 auto num_images = ritsuko::hdf5::get_1d_length(sample_handle.getSpace(), false);
133
134 auto id_handle = ritsuko::hdf5::open_dataset(ghandle, "image_ids");
135 if (!ritsuko::hdf5::is_utf8_string(id_handle)) {
136 throw std::runtime_error("expected 'image_ids' to have a datatype that can be represented by a UTF-8 encoded string");
137 }
138 if (ritsuko::hdf5::get_1d_length(id_handle.getSpace(), false) != num_images) {
139 throw std::runtime_error("expected 'image_ids' to have the same length as 'image_samples'");
140 }
141
142 auto scale_handle = ritsuko::hdf5::open_dataset(ghandle, "image_scale_factors");
143 if (ritsuko::hdf5::exceeds_float_limit(scale_handle, 64)) {
144 throw std::runtime_error("expected a datatype for 'image_scale_factors' that fits in a 64-bit float");
145 }
146 if (ritsuko::hdf5::get_1d_length(scale_handle.getSpace(), false) != num_images) {
147 throw std::runtime_error("expected 'image_scale_factors' to have the same length as 'image_samples'");
148 }
149
150 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> sample_stream(&sample_handle, num_images, options.hdf5_buffer_size);
151 ritsuko::hdf5::Stream1dStringDataset id_stream(&id_handle, num_images, options.hdf5_buffer_size);
152 ritsuko::hdf5::Stream1dNumericDataset<double> scale_stream(&scale_handle, num_images, options.hdf5_buffer_size);
153 std::vector<std::unordered_set<std::string> > collected(num_samples);
154
155 for (hsize_t i = 0; i < num_images; ++i) {
156 auto sample = sample_stream.get();
157 if (sample >= num_samples) {
158 throw std::runtime_error("entries of 'image_samples' should be less than the number of samples");
159 }
160 sample_stream.next();
161
162 auto& present = collected[sample];
163 auto id = id_stream.steal();
164 if (present.find(id) != present.end()) {
165 throw std::runtime_error("'image_ids' contains duplicated image IDs for the same sample + ('" + id + "')");
166 }
167 present.insert(std::move(id));
168 id_stream.next();
169
170 auto sc = scale_stream.get();
171 if (!std::isfinite(sc) || sc <= 0) {
172 throw std::runtime_error("entries of 'image_scale_factors' should be finite and positive");
173 }
174 scale_stream.next();
175 }
176
177 if (version.ge(1, 3, 0) && !ghandle.exists("image_formats")) {
178 image_formats.resize(num_images, "OTHER");
179
180 } else {
181 auto format_handle = ritsuko::hdf5::open_dataset(ghandle, "image_formats");
182 if (!ritsuko::hdf5::is_utf8_string(format_handle)) {
183 throw std::runtime_error("expected 'image_formats' to have a datatype that can be represented by a UTF-8 encoded string");
184 }
185 if (ritsuko::hdf5::get_1d_length(format_handle.getSpace(), false) != num_images) {
186 throw std::runtime_error("expected 'image_formats' to have the same length as 'image_samples'");
187 }
188 image_formats.reserve(num_images);
189
190 ritsuko::hdf5::Stream1dStringDataset format_stream(&format_handle, num_images, options.hdf5_buffer_size);
191
192 for (hsize_t i = 0; i < num_images; ++i) {
193 auto fmt = format_stream.steal();
194 image_formats.push_back(std::move(fmt));
195 format_stream.next();
196 }
197 }
198
199 for (const auto& x : collected) {
200 if (x.empty()) {
201 throw std::runtime_error("each sample should map to one or more images in 'image_samples'");
202 }
203 }
204
205 } catch (std::exception& e) {
206 throw std::runtime_error("failed to validate '" + mappath.string() + "'; " + std::string(e.what()));
207 }
208
209 // Now validating the images themselves.
210 size_t num_images = image_formats.size();
211 for (size_t i = 0; i < num_images; ++i) {
212 validate_image(image_dir, i, image_formats[i], options, version);
213 }
214
215 size_t num_dir_obj = internal_other::count_directory_entries(image_dir);
216 if (num_dir_obj - 1 != num_images) { // -1 to account for the mapping.h5 file itself.
217 throw std::runtime_error("more objects than expected inside the 'images' subdirectory");
218 }
219}
220
221}
231inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
232 ::takane::single_cell_experiment::validate(path, metadata, options);
233
234 const std::string type_name = "spatial_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
235 const std::string& vstring = internal_json::extract_version_for_type(metadata.other, type_name);
236 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
237 if (version.major != 1) {
238 throw std::runtime_error("unsupported version string '" + vstring + "'");
239 }
240
241 auto dims = ::takane::summarized_experiment::dimensions(path, metadata, options);
242 internal::validate_coordinates(path, dims[1], options);
243 internal::validate_images(path, dims[1], options, version);
244}
245
246}
247
248}
249
250#endif
Validation for standard image files.
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition spatial_experiment.hpp:231
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:136
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:68
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:109
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
bool derived_from(const std::string &type, const std::string &base, const Options &options)
Definition _derived_from.hpp:80
Validation for single cell experiments.
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
Exported utilities.