takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
atomic_vector.hpp
Go to the documentation of this file.
1#ifndef TAKANE_ATOMIC_VECTOR_HPP
2#define TAKANE_ATOMIC_VECTOR_HPP
3
4#include <string>
5#include <stdexcept>
6#include <filesystem>
7
8#include "ritsuko/hdf5/hdf5.hpp"
9#include "ritsuko/hdf5/vls/vls.hpp"
10
11#include "utils_public.hpp"
12#include "utils_string.hpp"
13#include "utils_json.hpp"
14
20namespace takane {
21
26namespace atomic_vector {
27
33inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
34 const std::string type_name = "atomic_vector"; // use a separate variable to avoid dangling reference warnings from GCC.
35 const auto& vstring = internal_json::extract_version_for_type(metadata.other, type_name);
36 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
37 if (version.major != 1) {
38 throw std::runtime_error("unsupported version string '" + vstring + "'");
39 }
40
41 auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
42 auto ghandle = ritsuko::hdf5::open_group(handle, type_name.c_str());
43 auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "type");
44 hsize_t vlen = 0;
45
46 const char* missing_attr_name = "missing-value-placeholder";
47
48 if (type == "vls") {
49 if (version.lt(1, 1, 0)) {
50 throw std::runtime_error("unsupported type '" + type + "'");
51 }
52
53 auto phandle = ritsuko::hdf5::vls::open_pointers(ghandle, "pointers", 64, 64);
54 vlen = ritsuko::hdf5::get_1d_length(phandle.getSpace(), false);
55 auto hhandle = ritsuko::hdf5::vls::open_heap(ghandle, "heap");
56 auto hlen = ritsuko::hdf5::get_1d_length(hhandle.getSpace(), false);
57 ritsuko::hdf5::vls::validate_1d_array<uint64_t, uint64_t>(phandle, vlen, hlen, options.hdf5_buffer_size);
58
59 if (phandle.attrExists(missing_attr_name)) {
60 auto attr = phandle.openAttribute(missing_attr_name);
61 ritsuko::hdf5::check_string_missing_placeholder_attribute(attr);
62 }
63
64 } else {
65 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "values");
66 vlen = ritsuko::hdf5::get_1d_length(dhandle.getSpace(), false);
67
68 if (type == "string") {
69 if (!ritsuko::hdf5::is_utf8_string(dhandle)) {
70 throw std::runtime_error("expected a datatype for 'values' that can be represented by a UTF-8 encoded string");
71 }
72 auto missingness = ritsuko::hdf5::open_and_load_optional_string_missing_placeholder(dhandle, missing_attr_name);
73 std::string format = internal_string::fetch_format_attribute(ghandle);
74 internal_string::validate_string_format(dhandle, vlen, format, missingness, options.hdf5_buffer_size);
75
76 } else {
77 if (type == "integer") {
78 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
79 throw std::runtime_error("expected a datatype for 'values' that fits in a 32-bit signed integer");
80 }
81 } else if (type == "boolean") {
82 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
83 throw std::runtime_error("expected a datatype for 'values' that fits in a 32-bit signed integer");
84 }
85 } else if (type == "number") {
86 if (ritsuko::hdf5::exceeds_float_limit(dhandle, 64)) {
87 throw std::runtime_error("expected a datatype for 'values' that fits in a 64-bit float");
88 }
89 } else {
90 throw std::runtime_error("unsupported type '" + type + "'");
91 }
92
93 if (dhandle.attrExists(missing_attr_name)) {
94 auto missing_attr = dhandle.openAttribute(missing_attr_name);
95 ritsuko::hdf5::check_numeric_missing_placeholder_attribute(dhandle, missing_attr);
96 }
97 }
98 }
99
100 internal_string::validate_names(ghandle, "names", vlen, options.hdf5_buffer_size);
101}
102
109inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
110 auto handle = ritsuko::hdf5::open_file(path / "contents.h5");
111 auto ghandle = handle.openGroup("atomic_vector");
112 auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "type");
113
114 if (type == "vls") {
115 auto phandle = ghandle.openDataSet("pointers");
116 return ritsuko::hdf5::get_1d_length(phandle.getSpace(), false);
117 } else {
118 auto dhandle = ghandle.openDataSet("values");
119 return ritsuko::hdf5::get_1d_length(dhandle.getSpace(), false);
120 }
121}
122
123}
124
125}
126
127#endif
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition atomic_vector.hpp:109
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition atomic_vector.hpp:33
takane validation functions.
Definition _derived_from.hpp:15
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
hsize_t hdf5_buffer_size
Definition utils_public.hpp:103
Exported utilities.