takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
single_cell_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SINGLE_CELL_EXPERIMENT_HPP
2#define TAKANE_SINGLE_CELL_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5#include "byteme/byteme.hpp"
6
9
10#include <filesystem>
11#include <stdexcept>
12#include <unordered_set>
13#include <string>
14
20namespace takane {
21
25void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
27bool satisfies_interface(const std::string&, const std::string&, const Options&);
36namespace single_cell_experiment {
37
43inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
45
46 auto sedims = ::takane::summarized_experiment::dimensions(path, metadata, options);
47 size_t num_cols = sedims[1];
48
49 const std::string type_name = "single_cell_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
50 const auto& scemap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
51
52 const std::string version_name = "version"; // again, avoid dangling reference warnings.
53 const std::string& vstring = internal_json::extract_string_from_typed_object(scemap, version_name, type_name);
54 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
55 if (version.major != 1) {
56 throw std::runtime_error("unsupported version string '" + vstring + "'");
57 }
58
59 // Check the reduced dimensions.
60 auto rddir = path / "reduced_dimensions";
61 if (std::filesystem::exists(rddir)) {
62 auto num_rd = internal_summarized_experiment::check_names_json(rddir);
63
64 for (size_t i = 0; i < num_rd; ++i) {
65 auto rdname = std::to_string(i);
66 auto rdpath = rddir / rdname;
67 auto rdmeta = read_object_metadata(rdpath);
68 ::takane::validate(rdpath, rdmeta, options);
69
70 auto dims = ::takane::dimensions(rdpath, rdmeta, options);
71 if (dims.size() < 1) {
72 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have at least one dimension");
73 }
74 if (dims[0] != num_cols) {
75 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have the same number of rows as the columns of its parent '" + metadata.type + "'");
76 }
77 }
78
79 size_t num_dir_obj = internal_other::count_directory_entries(rddir);
80 if (num_dir_obj - 1 != num_rd) { // -1 to account for the names.json file itself.
81 throw std::runtime_error("more objects than expected inside the 'reduced_dimensions' subdirectory");
82 }
83 }
84
85 // Check the alternative experiments.
86 auto aedir = path / "alternative_experiments";
87 std::unordered_set<std::string> alt_names;
88 if (std::filesystem::exists(aedir)) {
89 internal_summarized_experiment::check_names_json(aedir, alt_names);
90 size_t num_ae = alt_names.size();
91
92 for (size_t i = 0; i < num_ae; ++i) {
93 auto aename = std::to_string(i);
94 auto aepath = aedir / aename;
95 auto aemeta = read_object_metadata(aepath);
96 if (!satisfies_interface(aemeta.type, "SUMMARIZED_EXPERIMENT", options)) {
97 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should satisfy the 'SUMMARIZED_EXPERIMENT' interface");
98 }
99
100 ::takane::validate(aepath, aemeta, options);
101 auto dims = ::takane::dimensions(aepath, aemeta, options);
102 if (dims[1] != num_cols) {
103 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should have the same number of columns as its parent '" + metadata.type + "'");
104 }
105 }
106
107 size_t num_dir_obj = internal_other::count_directory_entries(aedir);
108 if (num_dir_obj - 1 != num_ae) { // -1 to account for the names.json file itself.
109 throw std::runtime_error("more objects than expected inside the 'alternative_experiments' subdirectory");
110 }
111 }
112
113 // Validating the main experiment name.
114 auto mIt = scemap.find("main_experiment_name");
115 if (mIt != scemap.end()) {
116 const auto& ver = mIt->second;
117 if (ver->type() != millijson::STRING) {
118 throw std::runtime_error("expected 'main_experiment_name' to be a string");
119 }
120 const auto& mname = reinterpret_cast<const millijson::String*>(ver.get())->value;
121 if (mname.empty()) {
122 throw std::runtime_error("expected 'main_experiment_name' to be a non-empty string");
123 }
124 if (alt_names.find(mname) != alt_names.end()) {
125 throw std::runtime_error("expected 'main_experiment_name' to not overlap with 'alternative_experiment' names (found '" + mname + "')");
126 }
127 }
128}
129
130}
131
132}
133
134#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition ranged_summarized_experiment.hpp:41
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:135
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Validation for ranged summarized experiments.
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
std::string type
Definition utils_public.hpp:30
Validation options.
Definition utils_public.hpp:94
Validation for summarized experiments.