takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
simple_list.hpp
Go to the documentation of this file.
1#ifndef TAKANE_SIMPLE_LIST_HPP
2#define TAKANE_SIMPLE_LIST_HPP
3
4#include <string>
5#include <stdexcept>
6#include <filesystem>
7
8#include "uzuki2/uzuki2.hpp"
9#include "byteme/byteme.hpp"
10
11#include "utils_public.hpp"
12#include "utils_other.hpp"
13
19namespace takane {
20
24void validate(const std::filesystem::path&, Options&);
33namespace simple_list {
34
38namespace internal {
39
40inline std::string extract_format(const internal_json::JsonObjectMap& map) {
41 auto fIt = map.find("format");
42 if (fIt == map.end()) {
43 return "hdf5";
44 }
45 const auto& val = fIt->second;
46 if (val->type() != millijson::STRING) {
47 throw std::runtime_error("'simple_list.format' in the object metadata should be a JSON string");
48 }
49 return reinterpret_cast<millijson::String*>(val.get())->value;
50}
51
52inline std::pair<bool, size_t> extract_length(const internal_json::JsonObjectMap& map) {
53 auto lIt = map.find("length");
54 if (lIt == map.end()) {
55 return std::pair<bool, size_t>(false, 0);
56 }
57 const auto& val = lIt->second;
58 if (val->type() != millijson::NUMBER) {
59 throw std::runtime_error("'simple_list.length' in the object metadata should be a JSON number");
60 }
61 return std::pair<bool, size_t>(true, reinterpret_cast<millijson::Number*>(val.get())->value);
62}
63
64}
74inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
75 const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list");
76
77 const std::string& vstring = internal_json::extract_string_from_typed_object(metamap, "version", "simple_list");
78 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
79 if (version.major != 1) {
80 throw std::runtime_error("unsupported version string '" + vstring + "'");
81 }
82
83 std::string format = internal::extract_format(metamap);
84
85 auto other_dir = path / "other_contents";
86 int num_external = 0;
87 if (std::filesystem::exists(other_dir)) {
88 auto status = std::filesystem::status(other_dir);
89 if (status.type() != std::filesystem::file_type::directory) {
90 throw std::runtime_error("expected 'other_contents' to be a directory");
91 }
92
93 num_external = internal_other::count_directory_entries(other_dir);
94 for (int e = 0; e < num_external; ++e) {
95 auto epath = other_dir / std::to_string(e);
96 if (!std::filesystem::exists(epath)) {
97 throw std::runtime_error("expected an external list object at '" + std::filesystem::relative(epath, path).string() + "'");
98 }
99
100 try {
101 ::takane::validate(epath, options);
102 } catch (std::exception& e) {
103 throw std::runtime_error("failed to validate external list object at '" + std::filesystem::relative(epath, path).string() + "'; " + std::string(e.what()));
104 }
105 }
106 }
107
108 size_t len;
109 if (format == "json.gz") {
110 uzuki2::json::Options opt;
111 opt.parallel = options.parallel_reads;
112 auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
113 auto loaded = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
114 len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();
115
116 } else if (format == "hdf5") {
117 auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
118 auto ghandle = ritsuko::hdf5::open_group(handle, "simple_list");
119 auto loaded = uzuki2::hdf5::parse<uzuki2::DummyProvisioner>(ghandle, uzuki2::DummyExternals(num_external));
120 len = reinterpret_cast<const uzuki2::List*>(loaded.get())->size();
121
122 } else {
123 throw std::runtime_error("unknown format '" + format + "'");
124 }
125
126 if (version.ge(1, 1, 0)) {
127 auto len_info = internal::extract_length(metamap);
128 if (len_info.first) {
129 if (len_info.second != len) {
130 throw std::runtime_error("'simple_list.length' differs from the length of the list");
131 }
132 }
133 }
134}
135
142inline size_t height(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
143 const auto& metamap = internal_json::extract_typed_object_from_metadata(metadata.other, "simple_list");
144
145 auto len_info = internal::extract_length(metamap);
146 if (len_info.first) {
147 return len_info.second;
148 }
149
150 std::string format = internal::extract_format(metamap);
151 if (format == "hdf5") {
152 auto handle = ritsuko::hdf5::open_file(path / "list_contents.h5");
153 auto lhandle = handle.openGroup("simple_list");
154 auto vhandle = lhandle.openGroup("data");
155 return vhandle.getNumObjs();
156
157 } else {
158 // Not much choice but to parse the entire list here. We do so using the
159 // dummy, which still has enough self-awareness to hold its own length.
160 auto other_dir = path / "other_contents";
161 int num_external = 0;
162 if (std::filesystem::exists(other_dir)) {
163 num_external = internal_other::count_directory_entries(other_dir);
164 }
165
166 uzuki2::json::Options opt;
167 opt.parallel = options.parallel_reads;
168 auto gzreader = internal_other::open_reader<byteme::GzipFileReader>(path / "list_contents.json.gz");
169 auto ptr = uzuki2::json::parse<uzuki2::DummyProvisioner>(gzreader, uzuki2::DummyExternals(num_external), std::move(opt));
170 return reinterpret_cast<const uzuki2::List*>(ptr.get())->size();
171 }
172}
173
174}
175
176}
177
178#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition simple_list.hpp:74
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition simple_list.hpp:142
takane validation functions.
Definition _derived_from.hpp:15
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition _validate.hpp:107
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
bool parallel_reads
Definition utils_public.hpp:98
Exported utilities.