takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
summarized_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SUMMARIZED_EXPERIMENT_HPP
2#define TAKANE_SUMMARIZED_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5
6#include "utils_public.hpp"
7#include "utils_other.hpp"
8#include "utils_summarized_experiment.hpp"
9
10#include <filesystem>
11#include <stdexcept>
12#include <string>
13
19namespace takane {
20
24void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
25size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
27bool satisfies_interface(const std::string&, const std::string&, const Options&);
36namespace summarized_experiment {
37
43inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
44 const std::string type_name = "summarized_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
45 const auto& semap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
46
47 const std::string version_name = "version"; // again, avoid dangling reference warnings.
48 const std::string& vstring = internal_json::extract_string_from_typed_object(semap, version_name, type_name);
49 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
50 if (version.major != 1) {
51 throw std::runtime_error("unsupported version string '" + vstring + "'");
52 }
53
54 // Validating the dimensions.
55 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, type_name);
56 size_t num_rows = dims.first;
57 size_t num_cols = dims.second;
58
59 // Checking the assays. The directory is also allowed to not exist,
60 // in which case we have no assays.
61 auto adir = path / "assays";
62 if (std::filesystem::exists(adir)) {
63 size_t num_assays = internal_summarized_experiment::check_names_json(adir);
64 for (size_t i = 0; i < num_assays; ++i) {
65 auto aname = std::to_string(i);
66 auto apath = adir / aname;
67 auto ameta = read_object_metadata(apath);
68 ::takane::validate(apath, ameta, options);
69
70 auto dims = ::takane::dimensions(apath, ameta, options);
71 if (dims.size() < 2) {
72 throw std::runtime_error("object in 'assays/" + aname + "' should have two or more dimensions");
73 }
74 if (dims[0] != num_rows) {
75 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of rows as its parent '" + metadata.type + "'");
76 }
77 if (dims[1] != num_cols) {
78 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of columns as its parent '" + metadata.type + "'");
79 }
80 }
81
82 size_t num_dir_obj = internal_other::count_directory_entries(adir);
83 if (num_dir_obj - 1 != num_assays) { // -1 to account for the names.json file itself.
84 throw std::runtime_error("more objects than expected inside the 'assays' subdirectory");
85 }
86 }
87
88 auto rd_path = path / "row_data";
89 if (std::filesystem::exists(rd_path)) {
90 auto rdmeta = read_object_metadata(rd_path);
91 if (!satisfies_interface(rdmeta.type, "DATA_FRAME", options)) {
92 throw std::runtime_error("object in 'row_data' should satisfy the 'DATA_FRAME' interface");
93 }
94 ::takane::validate(rd_path, rdmeta, options);
95 if (::takane::height(rd_path, rdmeta, options) != num_rows) {
96 throw std::runtime_error("data frame at 'row_data' should have number of rows equal to that of the '" + metadata.type + "'");
97 }
98 }
99
100 auto cd_path = path / "column_data";
101 if (std::filesystem::exists(cd_path)) {
102 auto cdmeta = read_object_metadata(cd_path);
103 if (!satisfies_interface(cdmeta.type, "DATA_FRAME", options)) {
104 throw std::runtime_error("object in 'column_data' should satisfy the 'DATA_FRAME' interface");
105 }
106 ::takane::validate(cd_path, cdmeta, options);
107 if (::takane::height(cd_path, cdmeta, options) != num_cols) {
108 throw std::runtime_error("data frame at 'column_data' should have number of rows equal to the number of columns of its parent '" + metadata.type + "'");
109 }
110 }
111
112 internal_other::validate_metadata(path, "other_data", options);
113}
114
121inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
122 const std::string type_name = "summarized_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
123 // Assume it's all valid, so we go straight for the kill.
124 const auto& semap = internal_json::extract_object(metadata.other, type_name);
125 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, type_name);
126 return dims.first;
127}
128
135inline std::vector<size_t> dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
136 const std::string type_name = "summarized_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
137 // Assume it's all valid, so we go straight for the kill.
138 const auto& semap = internal_json::extract_object(metadata.other, type_name);
139 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, type_name);
140 return std::vector<size_t>{ dims.first, dims.second };
141}
142
143}
144
145}
146
147#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:43
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:135
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:121
takane validation functions.
Definition _derived_from.hpp:15
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:68
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:73
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:109
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Object metadata, including the type and other fields.
Definition utils_public.hpp:25
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:34
std::string type
Definition utils_public.hpp:29
Validation options.
Definition utils_public.hpp:93
Exported utilities.