takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
simple_list.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SIMPLE_LIST_HPP
2#define TAKANE_SIMPLE_LIST_HPP
3
4#include <string>
5#include <stdexcept>
6#include <filesystem>
7
8#include "uzuki2/uzuki2.hpp"
9#include "byteme/byteme.hpp"
10
11#include "utils_public.hpp"
12#include "utils_other.hpp"
13
19namespace takane {
20
24void validate(const std::filesystem::path&, Options&);
33namespace simple_list {
34
38namespace internal {
39
40inline std::string extract_format(const internal_json::JsonObjectMap& map) {
41 auto fIt = map.find("format");
42 if (fIt == map.end()) {
43 return "hdf5";
44 }
45 const auto& val = fIt->second;
46 if (val->type() != millijson::STRING) {
47 throw std::runtime_error("'simple_list.format' in the object metadata should be a JSON string");
48 }
49 return reinterpret_cast<millijson::String*>(val.get())->value;
50}
51
52inline std::pair<bool, size_t> extract_length(const internal_json::JsonObjectMap& map) {
53 auto lIt = map.find("length");
54 if (lIt == map.end()) {
55 return std::pair<bool, size_t>(false, 0);
56 }
57 const auto& val = lIt->second;
58 if (val->type() != millijson::NUMBER) {
59 throw std::runtime_error("'simple_list.length' in the object metadata should be a JSON number");
60 }
61 return std::pair<bool, size_t>(true, reinterpret_cast<millijson::Number*>(val.get())->value);
62}
63
64}
74inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
75 const std::string type_name = "simple_list"; // use a separate variable to avoid dangling reference warnings from GCC.
76 const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
77
78 const std::string version_name = "version"; // again, avoid dangling reference warnings.
79 const std::string& vstring = internal_json::extract_string_from_typed_object(metamap, version_name, type_name);
80 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
81 if (version.major != 1) {
82 throw std::runtime_error("unsupported version string '" + vstring + "'");
83 }
84
85 std::string format = internal::extract_format(metamap);
86
87 auto other_dir = path / "other_contents";
88 int num_external = 0;
89 if (std::filesystem::exists(other_dir)) {
90 auto status = std::filesystem::status(other_dir);
91 if (status.type() != std::filesystem::file_type::directory) {
92 throw std::runtime_error("expected 'other_contents' to be a directory");
93 }
94
95 num_external = internal_other::count_directory_entries(other_dir);
96 for (int e = 0; e < num_external; ++e) {
97 auto epath = other_dir / std::to_string(e);
98 if (!std::filesystem::exists(epath)) {
99 throw std::runtime_error("expected an external list object at '" + std::filesystem::relative(epath, path).string() + "'");
100 }
101
102 try {
103 ::takane::validate(epath, options);
104 } catch (std::exception& e) {
105 throw std::runtime_error("failed to validate external list object at '" + std::filesystem::relative(epath, path).string() + "'; " + std::string(e.what()));
106 }
107 }
108 }
109
110 size_t len;
111 if (format == "json.gz") {
112 uzuki2::json::Options opt;
113 opt.parallel = options.parallel_reads;
114 auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
115 auto loaded = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
116 len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();
117
118 } else if (format == "hdf5") {
119 auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
120 auto ghandle = ritsuko::hdf5::open_group(handle, type_name.c_str());
121 auto loaded = uzuki2::hdf5::parse<uzuki2::DummyProvisioner>(ghandle, uzuki2::DummyExternals(num_external));
122 len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();
123
124 } else {
125 throw std::runtime_error("unknown format '" + format + "'");
126 }
127
128 if (version.ge(1, 1, 0)) {
129 auto len_info = internal::extract_length(metamap);
130 if (len_info.first) {
131 if (len_info.second != len) {
132 throw std::runtime_error("'simple_list.length' differs from the length of the list");
133 }
134 }
135 }
136}
137
144inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
145 const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list");
146
147 auto len_info = internal::extract_length(metamap);
148 if (len_info.first) {
149 return len_info.second;
150 }
151
152 std::string format = internal::extract_format(metamap);
153 if (format == "hdf5") {
154 auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
155 auto lhandle = handle.openGroup("simple_list");
156 auto vhandle = lhandle.openGroup("data");
157 return vhandle.getNumObjs();
158
159 } else {
160 // Not much choice but to parse the entire list here. We do so using the
161 // dummy, which still has enough self-awareness to hold its own length.
162 auto other_dir = path / "other_contents";
163 int num_external = 0;
164 if (std::filesystem::exists(other_dir)) {
165 num_external = internal_other::count_directory_entries(other_dir);
166 }
167
168 uzuki2::json::Options opt;
169 opt.parallel = options.parallel_reads;
170 auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
171 auto ptr = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
172 return reinterpret_cast<const uzuki2::List*>(ptr.get())->size();
173 }
174}
175
176}
177
178}
179
180#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition simple_list.hpp:74
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition simple_list.hpp:144
takane validation functions.
Definition _derived_from.hpp:15
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
bool parallel_reads
Definition utils_public.hpp:98
Exported utilities.