takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
single_cell_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SINGLE_CELL_EXPERIMENT_HPP
2#define TAKANE_SINGLE_CELL_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5#include "byteme/byteme.hpp"
6
9
10#include <filesystem>
11#include <stdexcept>
12#include <unordered_set>
13#include <string>
14
20namespace takane {
21
25void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
27bool satisfies_interface(const std::string&, const std::string&, const Options&);
36namespace single_cell_experiment {
37
43inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
45
46 auto sedims = ::takane::summarized_experiment::dimensions(path, metadata, options);
47 size_t num_cols = sedims[1];
48
49 const auto& scemap = internal_json::extract_typed_object_from_metadata(metadata.other, "single_cell_experiment");
50
51 const std::string& vstring = internal_json::extract_string_from_typed_object(scemap, "version", "single_cell_experiment");
52 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
53 if (version.major != 1) {
54 throw std::runtime_error("unsupported version string '" + vstring + "'");
55 }
56
57 // Check the reduced dimensions.
58 auto rddir = path / "reduced_dimensions";
59 if (std::filesystem::exists(rddir)) {
60 auto num_rd = internal_summarized_experiment::check_names_json(rddir);
61
62 for (size_t i = 0; i < num_rd; ++i) {
63 auto rdname = std::to_string(i);
64 auto rdpath = rddir / rdname;
65 auto rdmeta = read_object_metadata(rdpath);
66 ::takane::validate(rdpath, rdmeta, options);
67
68 auto dims = ::takane::dimensions(rdpath, rdmeta, options);
69 if (dims.size() < 1) {
70 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have at least one dimension");
71 }
72 if (dims[0] != num_cols) {
73 throw std::runtime_error("object in 'reduced_dimensions/" + rdname + "' should have the same number of rows as the columns of its parent '" + metadata.type + "'");
74 }
75 }
76
77 size_t num_dir_obj = internal_other::count_directory_entries(rddir);
78 if (num_dir_obj - 1 != num_rd) { // -1 to account for the names.json file itself.
79 throw std::runtime_error("more objects than expected inside the 'reduced_dimensions' subdirectory");
80 }
81 }
82
83 // Check the alternative experiments.
84 auto aedir = path / "alternative_experiments";
85 std::unordered_set<std::string> alt_names;
86 if (std::filesystem::exists(aedir)) {
87 internal_summarized_experiment::check_names_json(aedir, alt_names);
88 size_t num_ae = alt_names.size();
89
90 for (size_t i = 0; i < num_ae; ++i) {
91 auto aename = std::to_string(i);
92 auto aepath = aedir / aename;
93 auto aemeta = read_object_metadata(aepath);
94 if (!satisfies_interface(aemeta.type, "SUMMARIZED_EXPERIMENT", options)) {
95 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should satisfy the 'SUMMARIZED_EXPERIMENT' interface");
96 }
97
98 ::takane::validate(aepath, aemeta, options);
99 auto dims = ::takane::dimensions(aepath, aemeta, options);
100 if (dims[1] != num_cols) {
101 throw std::runtime_error("object in 'alternative_experiments/" + aename + "' should have the same number of columns as its parent '" + metadata.type + "'");
102 }
103 }
104
105 size_t num_dir_obj = internal_other::count_directory_entries(aedir);
106 if (num_dir_obj - 1 != num_ae) { // -1 to account for the names.json file itself.
107 throw std::runtime_error("more objects than expected inside the 'alternative_experiments' subdirectory");
108 }
109 }
110
111 // Validating the main experiment name.
112 auto mIt = scemap.find("main_experiment_name");
113 if (mIt != scemap.end()) {
114 const auto& ver = mIt->second;
115 if (ver->type() != millijson::STRING) {
116 throw std::runtime_error("expected 'main_experiment_name' to be a string");
117 }
118 const auto& mname = reinterpret_cast<const millijson::String*>(ver.get())->value;
119 if (mname.empty()) {
120 throw std::runtime_error("expected 'main_experiment_name' to be a non-empty string");
121 }
122 if (alt_names.find(mname) != alt_names.end()) {
123 throw std::runtime_error("expected 'main_experiment_name' to not overlap with 'alternative_experiment' names (found '" + mname + "')");
124 }
125 }
126}
127
128}
129
130}
131
132#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition ranged_summarized_experiment.hpp:41
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition single_cell_experiment.hpp:43
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:133
takane validation functions.
Definition _derived_from.hpp:15
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Validation for ranged summarized experiments.
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
std::string type
Definition utils_public.hpp:30
Validation options.
Definition utils_public.hpp:94
Validation for summarized experiments.