takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
summarized_experiment.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SUMMARIZED_EXPERIMENT_HPP
2#define TAKANE_SUMMARIZED_EXPERIMENT_HPP
3
4#include "millijson/millijson.hpp"
5#include "byteme/byteme.hpp"
6
7#include "utils_public.hpp"
8#include "utils_other.hpp"
9#include "utils_summarized_experiment.hpp"
10
11#include <filesystem>
12#include <stdexcept>
13#include <string>
14
20namespace takane {
21
25void validate(const std::filesystem::path&, const ObjectMetadata&, Options& options);
26size_t height(const std::filesystem::path&, const ObjectMetadata&, Options& options);
27std::vector<size_t> dimensions(const std::filesystem::path&, const ObjectMetadata&, Options& options);
28bool satisfies_interface(const std::string&, const std::string&, const Options&);
37namespace summarized_experiment {
38
44inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
45 const std::string type_name = "summarized_experiment"; // use a separate variable to avoid dangling reference warnings from GCC.
46 const auto& semap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
47
48 const std::string version_name = "version"; // again, avoid dangling reference warnings.
49 const std::string& vstring = internal_json::extract_string_from_typed_object(semap, version_name, type_name);
50 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
51 if (version.major != 1) {
52 throw std::runtime_error("unsupported version string '" + vstring + "'");
53 }
54
55 // Validating the dimensions.
56 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, type_name);
57 size_t num_rows = dims.first;
58 size_t num_cols = dims.second;
59
60 // Checking the assays. The directory is also allowed to not exist,
61 // in which case we have no assays.
62 auto adir = path / "assays";
63 if (std::filesystem::exists(adir)) {
64 size_t num_assays = internal_summarized_experiment::check_names_json(adir);
65 for (size_t i = 0; i < num_assays; ++i) {
66 auto aname = std::to_string(i);
67 auto apath = adir / aname;
68 auto ameta = read_object_metadata(apath);
69 ::takane::validate(apath, ameta, options);
70
71 auto dims = ::takane::dimensions(apath, ameta, options);
72 if (dims.size() < 2) {
73 throw std::runtime_error("object in 'assays/" + aname + "' should have two or more dimensions");
74 }
75 if (dims[0] != num_rows) {
76 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of rows as its parent '" + metadata.type + "'");
77 }
78 if (dims[1] != num_cols) {
79 throw std::runtime_error("object in 'assays/" + aname + "' should have the same number of columns as its parent '" + metadata.type + "'");
80 }
81 }
82
83 size_t num_dir_obj = internal_other::count_directory_entries(adir);
84 if (num_dir_obj - 1 != num_assays) { // -1 to account for the names.json file itself.
85 throw std::runtime_error("more objects than expected inside the 'assays' subdirectory");
86 }
87 }
88
89 auto rd_path = path / "row_data";
90 if (std::filesystem::exists(rd_path)) {
91 auto rdmeta = read_object_metadata(rd_path);
92 if (!satisfies_interface(rdmeta.type, "DATA_FRAME", options)) {
93 throw std::runtime_error("object in 'row_data' should satisfy the 'DATA_FRAME' interface");
94 }
95 ::takane::validate(rd_path, rdmeta, options);
96 if (::takane::height(rd_path, rdmeta, options) != num_rows) {
97 throw std::runtime_error("data frame at 'row_data' should have number of rows equal to that of the '" + metadata.type + "'");
98 }
99 }
100
101 auto cd_path = path / "column_data";
102 if (std::filesystem::exists(cd_path)) {
103 auto cdmeta = read_object_metadata(cd_path);
104 if (!satisfies_interface(cdmeta.type, "DATA_FRAME", options)) {
105 throw std::runtime_error("object in 'column_data' should satisfy the 'DATA_FRAME' interface");
106 }
107 ::takane::validate(cd_path, cdmeta, options);
108 if (::takane::height(cd_path, cdmeta, options) != num_cols) {
109 throw std::runtime_error("data frame at 'column_data' should have number of rows equal to the number of columns of its parent '" + metadata.type + "'");
110 }
111 }
112
113 internal_other::validate_metadata(path, "other_data", options);
114}
115
122inline size_t height([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
123 // Assume it's all valid, so we go straight for the kill.
124 const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment");
125 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment");
126 return dims.first;
127}
128
135inline std::vector<size_t> dimensions([[maybe_unused]] const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
136 // Assume it's all valid, so we go straight for the kill.
137 const auto& semap = internal_json::extract_object(metadata.other, "summarized_experiment");
138 auto dims = internal_summarized_experiment::extract_dimensions_json(semap, "summarized_experiment");
139 return std::vector<size_t>{ dims.first, dims.second };
140}
141
142}
143
144}
145
146#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:44
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:135
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition summarized_experiment.hpp:122
takane validation functions.
Definition _derived_from.hpp:15
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _height.hpp:88
bool satisfies_interface(const std::string &type, const std::string &interface, const Options &options)
Definition _satisfies_interface.hpp:67
ObjectMetadata read_object_metadata(const std::filesystem::path &path)
Definition utils_public.hpp:74
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _dimensions.hpp:69
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
std::string type
Definition utils_public.hpp:30
Validation options.
Definition utils_public.hpp:94
Exported utilities.