takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
spatial_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SPATIAL_EXPERIMENT_HPP
2#define TAKANE_SPATIAL_EXPERIMENT_HPP
3
4#include "ritsuko/hdf5/hdf5.hpp"
5
7#include "utils_factor.hpp"
8#include "utils_public.hpp"
9#include "utils_other.hpp"
10#include "utils_files.hpp"
11
12#include <filesystem>
13#include <stdexcept>
14#include <unordered_set>
15#include <string>
16#include <vector>
17#include <cmath>
18
24namespace takane {
25
29bool derived_from(const std::string&, const std::string&, const Options& options);
30void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
31bool satisfies_interface(const std::string&, const std::string&, const Options& options);
40namespace spatial_experiment {
41
45namespace internal {
46
47inline void validate_coordinates(const std::filesystem::path& path, size_t ncols, Options& options) {
48 auto coord_path = path / "coordinates";
49 auto coord_meta = read_object_metadata(coord_path);
50 if (!derived_from(coord_meta.type, "dense_array", options)) {
51 throw std::runtime_error("'coordinates' should be a dense array");
52 }
53
54 // Validating the coordinates; currently these must be a dense array of
55 // points, but could also be polygons/hulls in the future.
56 try {
57 ::takane::validate(coord_path, coord_meta, options);
58 } catch (std::exception& e) {
59 throw std::runtime_error("failed to validate 'coordinates'; " + std::string(e.what()));
60 }
61
62 auto cdims = ::takane::dimensions(coord_path, coord_meta, options);
63 if (cdims.size() != 2) {
64 throw std::runtime_error("'coordinates' should be a 2-dimensional dense array");
65 } else if (cdims[1] != 2 && cdims[1] != 3) {
66 throw std::runtime_error("'coordinates' should have 2 or 3 columns");
67 } else if (cdims[0] != ncols) {
68 throw std::runtime_error("number of rows in 'coordinates' should equal the number of columns in the 'spatial_experiment'");
69 }
70
71 // Checking that the values are numeric.
72 auto handle = ritsuko::hdf5::open_file(coord_path / "array.h5");
73 auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
74 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
75 auto dclass = dhandle.getTypeClass();
76 if (dclass != H5T_INTEGER && dclass != H5T_FLOAT) {
77 throw std::runtime_error("values in 'coordinates' should be numeric");
78 }
79}
80
81inline void validate_image(const std::filesystem::path& path, size_t i, const std::string& format, Options& options, const ritsuko::Version& version) {
82 auto ipath = path / std::to_string(i);
83
84 if (format == "PNG") {
85 ipath += ".png";
86 // Magic number from http://www.libpng.org/pub/png/spec/1.2/png-1.2-pdg.html#PNG-file-signature
87 std::array<unsigned char, 8> expected { 137, 80, 78, 71, 13, 10, 26, 10 };
88 internal_files::check_signature(ipath, expected.data(), expected.size(), "PNG");
89
90 } else if (format == "TIFF") {
91 ipath += ".tif";
92 std::array<unsigned char, 4> observed;
93 internal_files::extract_signature(ipath, observed.data(), observed.size());
94 // Magic numbers from https://en.wikipedia.org/wiki/Magic_number_(programming)
95 std::array<unsigned char, 4> iisig = { 0x49, 0x49, 0x2A, 0x00 };
96 std::array<unsigned char, 4> mmsig = { 0x4D, 0x4D, 0x00, 0x2A };
97 if (observed != iisig && observed != mmsig) {
98 throw std::runtime_error("incorrect TIFF file signature for '" + ipath.string() + "'");
99 }
100
101 } else if (format == "OTHER" && version.ge(1, 1, 0)) {
102 auto imeta = read_object_metadata(ipath);
103 if (!satisfies_interface(imeta.type, "IMAGE", options)) {
104 throw std::runtime_error("object in '" + ipath.string() + "' should satisfy the 'IMAGE' interface");
105 }
106 ::takane::validate(ipath, imeta, options);
107
108 } else {
109 throw std::runtime_error("image format '" + format + "' is not currently supported");
110 }
111}
112
113inline void validate_images(const std::filesystem::path& path, size_t ncols, Options& options, const ritsuko::Version& version) {
114 auto image_dir = path / "images";
115 if (!std::filesystem::exists(image_dir) && version.ge(1, 2, 0)) {
116 // No images at all, which is permitted.
117 return;
118 }
119
120 auto mappath = image_dir / "mapping.h5";
121 auto ihandle = ritsuko::hdf5::open_file(mappath);
122 auto ghandle = ritsuko::hdf5::open_group(ihandle, "spatial_experiment");
123
124 std::vector<std::string> image_formats;
125 try {
126 struct SampleMapMessenger {
127 static std::string level() { return "sample name"; }
128 static std::string levels() { return "sample names"; }
129 static std::string codes() { return "sample assignments"; }
130 };
131
132 auto num_samples = internal_factor::validate_factor_levels<SampleMapMessenger>(ghandle, "sample_names", options.hdf5_buffer_size);
133 auto num_codes = internal_factor::validate_factor_codes<SampleMapMessenger>(ghandle, "column_samples", num_samples, options.hdf5_buffer_size, true);
134 if (num_codes != ncols) {
135 throw std::runtime_error("length of 'column_samples' should equal the number of columns in the spatial experiment");
136 }
137
138 // Scanning through the image information.
139 auto sample_handle = ritsuko::hdf5::open_dataset(ghandle, "image_samples");
140 if (ritsuko::hdf5::exceeds_integer_limit(sample_handle, 64, false)) {
141 throw std::runtime_error("expected a datatype for 'image_samples' that fits in a 64-bit unsigned integer");
142 }
143 auto num_images = ritsuko::hdf5::get_1d_length(sample_handle.getSpace(), false);
144
145 auto id_handle = ritsuko::hdf5::open_dataset(ghandle, "image_ids");
146 if (!ritsuko::hdf5::is_utf8_string(id_handle)) {
147 throw std::runtime_error("expected 'image_ids' to have a datatype that can be represented by a UTF-8 encoded string");
148 }
149 if (ritsuko::hdf5::get_1d_length(id_handle.getSpace(), false) != num_images) {
150 throw std::runtime_error("expected 'image_ids' to have the same length as 'image_samples'");
151 }
152
153 auto scale_handle = ritsuko::hdf5::open_dataset(ghandle, "image_scale_factors");
154 if (ritsuko::hdf5::exceeds_float_limit(scale_handle, 64)) {
155 throw std::runtime_error("expected a datatype for 'image_scale_factors' that fits in a 64-bit float");
156 }
157 if (ritsuko::hdf5::get_1d_length(scale_handle.getSpace(), false) != num_images) {
158 throw std::runtime_error("expected 'image_scale_factors' to have the same length as 'image_samples'");
159 }
160
161 auto format_handle = ritsuko::hdf5::open_dataset(ghandle, "image_formats");
162 if (!ritsuko::hdf5::is_utf8_string(format_handle)) {
163 throw std::runtime_error("expected 'image_formats' to have a datatype that can be represented by a UTF-8 encoded string");
164 }
165 if (ritsuko::hdf5::get_1d_length(format_handle.getSpace(), false) != num_images) {
166 throw std::runtime_error("expected 'image_formats' to have the same length as 'image_samples'");
167 }
168
169 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> sample_stream(&sample_handle, num_images, options.hdf5_buffer_size);
170 ritsuko::hdf5::Stream1dStringDataset id_stream(&id_handle, num_images, options.hdf5_buffer_size);
171 ritsuko::hdf5::Stream1dNumericDataset<double> scale_stream(&scale_handle, num_images, options.hdf5_buffer_size);
172 ritsuko::hdf5::Stream1dStringDataset format_stream(&format_handle, num_images, options.hdf5_buffer_size);
173 std::vector<std::unordered_set<std::string> > collected(num_samples);
174 image_formats.reserve(num_images);
175
176 for (hsize_t i = 0; i < num_images; ++i) {
177 auto sample = sample_stream.get();
178 if (sample >= num_samples) {
179 throw std::runtime_error("entries of 'image_samples' should be less than the number of samples");
180 }
181 sample_stream.next();
182
183 auto& present = collected[sample];
184 auto id = id_stream.steal();
185 if (present.find(id) != present.end()) {
186 throw std::runtime_error("'image_ids' contains duplicated image IDs for the same sample + ('" + id + "')");
187 }
188 present.insert(std::move(id));
189 id_stream.next();
190
191 auto sc = scale_stream.get();
192 if (!std::isfinite(sc) || sc <= 0) {
193 throw std::runtime_error("entries of 'image_scale_factors' should be finite and positive");
194 }
195 scale_stream.next();
196
197 auto fmt = format_stream.steal();
198 image_formats.push_back(std::move(fmt));
199 format_stream.next();
200 }
201
202 for (const auto& x : collected) {
203 if (x.empty()) {
204 throw std::runtime_error("each sample should map to one or more images in 'image_samples'");
205 }
206 }
207
208 } catch (std::exception& e) {
209 throw std::runtime_error("failed to validate '" + mappath.string() + "'; " + std::string(e.what()));
210 }
211
212 // Now validating the images themselves.
213 size_t num_images = image_formats.size();
214 for (size_t i = 0; i < num_images; ++i) {
215 validate_image(image_dir, i, image_formats[i], options, version);
216 }
217
218 size_t num_dir_obj = internal_other::count_directory_entries(image_dir);
219 if (num_dir_obj - 1 != num_images) { // -1 to account for the mapping.h5 file itself.
220 throw std::runtime_error("more objects than expected inside the 'images' subdirectory");
221 }
222}
223
224}
234inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
235 ::takane::single_cell_experiment::validate(path, metadata, options);
236
237 const std::string& vstring = internal_json::extract_version_for_type(metadata.other, "spatial_experiment");
238 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
239 if (version.major != 1) {
240 throw std::runtime_error("unsupported version string '" + vstring + "'");
241 }
242
243 auto dims = ::takane::summarized_experiment::dimensions(path, metadata, options);
244 internal::validate_coordinates(path, dims[1], options);
245 internal::validate_images(path, dims[1], options, version);
246}
247
248}
249
250}
251
252#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition spatial_experiment.hpp:234
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:133
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
bool derived_from(const std::string &type, const std::string &base, const Options &options)
Definition _derived_from.hpp:80
Validation for single cell experiments.
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
Exported utilities.