takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
summarized_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SUMMARIZED_EXPERIMENT_HPP
2#define TAKANE_SUMMARIZED_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5#include "byteme/byteme.hpp"
6
7#include "utils_public.hpp"
8#include "utils_other.hpp"
9#include "utils_summarized_experiment.hpp"
10
11#include <filesystem>
12#include <stdexcept>
13#include <string>
14
20namespace takane {
21
25void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options);
27std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
28bool satisfies_interface(const std::string&, const std::string&, const Options&);
37namespace summarized_experiment {
38
44inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
45 const auto& semap = internal_json::extract_typed_object_from_metadata(metadata.other, "summarized_experiment");
46
47 const std::string& vstring = internal_json::extract_string_from_typed_object(semap, "version", "summarized_experiment");
48 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
49 if (version.major != 1) {
50 throw std::runtime_error("unsupported version string '" + vstring + "'");
51 }
52
53 // Validating the dimensions.
54 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment");
55 size_t num_rows = dims.first;
56 size_t num_cols = dims.second;
57
58 // Checking the assays. The directory is also allowed to not exist,
59 // in which case we have no assays.
60 auto adir = path / "assays";
61 if (std::filesystem::exists(adir)) {
62 size_t num_assays = internal_summarized_experiment::check_names_json(adir);
63 for (size_t i = 0; i < num_assays; ++i) {
64 auto aname = std::to_string(i);
65 auto apath = adir / aname;
66 auto ameta = read_object_metadata(apath);
67 ::takane::validate(apath, ameta, options);
68
69 auto dims = ::takane::dimensions(apath, ameta, options);
70 if (dims.size() < 2) {
71 throw std::runtime_error("object in 'assays/" + aname + "' should have two or more dimensions");
72 }
73 if (dims[0] != num_rows) {
74 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of rows as its parent '" + metadata.type + "'");
75 }
76 if (dims[1] != num_cols) {
77 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of columns as its parent '" + metadata.type + "'");
78 }
79 }
80
81 size_t num_dir_obj = internal_other::count_directory_entries(adir);
82 if (num_dir_obj - 1 != num_assays) { // -1 to account for the names.json file itself.
83 throw std::runtime_error("more objects than expected inside the 'assays' subdirectory");
84 }
85 }
86
87 auto rd_path = path / "row_data";
88 if (std::filesystem::exists(rd_path)) {
89 auto rdmeta = read_object_metadata(rd_path);
90 if (!satisfies_interface(rdmeta.type, "DATA_FRAME", options)) {
91 throw std::runtime_error("object in 'row_data' should satisfy the 'DATA_FRAME' interface");
92 }
93 ::takane::validate(rd_path, rdmeta, options);
94 if (::takane::height(rd_path, rdmeta, options) != num_rows) {
95 throw std::runtime_error("data frame at 'row_data' should have number of rows equal to that of the '" + metadata.type + "'");
96 }
97 }
98
99 auto cd_path = path / "column_data";
100 if (std::filesystem::exists(cd_path)) {
101 auto cdmeta = read_object_metadata(cd_path);
102 if (!satisfies_interface(cdmeta.type, "DATA_FRAME", options)) {
103 throw std::runtime_error("object in 'column_data' should satisfy the 'DATA_FRAME' interface");
104 }
105 ::takane::validate(cd_path, cdmeta, options);
106 if (::takane::height(cd_path, cdmeta, options) != num_cols) {
107 throw std::runtime_error("data frame at 'column_data' should have number of rows equal to the number of columns of its parent '" + metadata.type + "'");
108 }
109 }
110
111 internal_other::validate_metadata(path, "other_data", options);
112}
113
120inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
121 // Assume it's all valid, so we go straight for the kill.
122 const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment");
123 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment");
124 return dims.first;
125}
126
133inline std::vector<size_t> dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
134 // Assume it's all valid, so we go straight for the kill.
135 const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment");
136 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment");
137 return std::vector<size_t>{ dims.first, dims.second };
138}
139
140}
141
142}
143
144#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:44
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:133
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:120
takane validation functions.
Definition _derived_from.hpp:15
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
std::string type
Definition utils_public.hpp:30
Validation options.
Definition utils_public.hpp:94
Exported utilities.