takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
single_cell_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SINGLE_CELL_EXPERIMENT_HPP
2#define TAKANE_SINGLE_CELL_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5
8
9#include <filesystem>
10#include <stdexcept>
11#include <unordered_set>
12#include <string>
13
19namespace takane {
20
24void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
25std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26bool satisfies_interface(const std::string&, const std::string&, const Options&);
35namespace single_cell_experiment {
36
42inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
44
45 auto sedims = ::takane::summarized_experiment::dimensions(path, metadata, options);
46 size_t num_cols = sedims[1];
47
48 const std::string type_name = "single_cell_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
49 const auto& scemap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
50
51 const std::string version_name = "version"; // again, avoid dangling reference warnings.
52 const std::string& vstring = internal_json::extract_string_from_typed_object(scemap, version_name, type_name);
53 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
54 if (version.major != 1) {
55 throw std::runtime_error("unsupported version string '" + vstring + "'");
56 }
57
58 // Check the reduced dimensions.
59 auto rddir = path / "reduced_dimensions";
60 if (std::filesystem::exists(rddir)) {
61 auto num_rd = internal_summarized_experiment::check_names_json(rddir);
62
63 for (size_t i = 0; i < num_rd; ++i) {
64 auto rdname = std::to_string(i);
65 auto rdpath = rddir / rdname;
66 auto rdmeta = read_object_metadata(rdpath);
67 ::takane::validate(rdpath, rdmeta, options);
68
69 auto dims = ::takane::dimensions(rdpath, rdmeta, options);
70 if (dims.size() < 1) {
71 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have at least one dimension");
72 }
73 if (dims[0] != num_cols) {
74 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have the same number of rows as the columns of its parent '" + metadata.type + "'");
75 }
76 }
77
78 size_t num_dir_obj = internal_other::count_directory_entries(rddir);
79 if (num_dir_obj - 1 != num_rd) { // -1 to account for the names.json file itself.
80 throw std::runtime_error("more objects than expected inside the 'reduced_dimensions' subdirectory");
81 }
82 }
83
84 // Check the alternative experiments.
85 auto aedir = path / "alternative_experiments";
86 std::unordered_set<std::string> alt_names;
87 if (std::filesystem::exists(aedir)) {
88 internal_summarized_experiment::check_names_json(aedir, alt_names);
89 size_t num_ae = alt_names.size();
90
91 for (size_t i = 0; i < num_ae; ++i) {
92 auto aename = std::to_string(i);
93 auto aepath = aedir / aename;
94 auto aemeta = read_object_metadata(aepath);
95 if (!satisfies_interface(aemeta.type, "SUMMARIZED_EXPERIMENT", options)) {
96 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should satisfy the 'SUMMARIZED_EXPERIMENT' interface");
97 }
98
99 ::takane::validate(aepath, aemeta, options);
100 auto dims = ::takane::dimensions(aepath, aemeta, options);
101 if (dims[1] != num_cols) {
102 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should have the same number of columns as its parent '" + metadata.type + "'");
103 }
104 }
105
106 size_t num_dir_obj = internal_other::count_directory_entries(aedir);
107 if (num_dir_obj - 1 != num_ae) { // -1 to account for the names.json file itself.
108 throw std::runtime_error("more objects than expected inside the 'alternative_experiments' subdirectory");
109 }
110 }
111
112 // Validating the main experiment name.
113 auto mIt = scemap.find("main_experiment_name");
114 if (mIt != scemap.end()) {
115 const auto& ver = mIt->second;
116 if (ver->type() != millijson::STRING) {
117 throw std::runtime_error("expected 'main_experiment_name' to be a string");
118 }
119 const auto& mname = reinterpret_cast<const millijson::String*>(ver.get())->value();
120 if (mname.empty()) {
121 throw std::runtime_error("expected 'main_experiment_name' to be a non-empty string");
122 }
123 if (alt_names.find(mname) != alt_names.end()) {
124 throw std::runtime_error("expected 'main_experiment_name' to not overlap with 'alternative_experiment' names (found '" + mname + "')");
125 }
126 }
127}
128
129}
130
131}
132
133#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition ranged_summarized_experiment.hpp:40
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:42
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:135
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:68
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:73
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:109
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Validation for ranged summarized experiments.
Object metadata, including the type and other fields.
Definition utils_public.hpp:25
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:34
std::string type
Definition utils_public.hpp:29
Validation options.
Definition utils_public.hpp:93
Validation for summarized experiments.