takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
dense_array.hpp
Go to the documentation of this file.
1#ifndef TAKANE_DENSE_ARRAY_HPP
2#define TAKANE_DENSE_ARRAY_HPP
3
4#include "ritsuko/hdf5/hdf5.hpp"
5#include "ritsuko/ritsuko.hpp"
6
7#include "utils_public.hpp"
8#include "utils_array.hpp"
9
10#include <vector>
11#include <string>
12#include <stdexcept>
13#include <filesystem>
14#include <cstdint>
15
21namespace takane {
22
27namespace dense_array {
28
32namespace internal {
33
34inline bool is_transposed(const H5::Group& ghandle) {
35 if (!ghandle.attrExists("transposed")) {
36 return false;
37 }
38
39 auto attr = ghandle.openAttribute("transposed");
40 if (!ritsuko::hdf5::is_scalar(attr)) {
41 throw std::runtime_error("expected 'transposed' attribute to be a scalar");
42 }
43 if (ritsuko::hdf5::exceeds_integer_limit(attr, 32, true)) {
44 throw std::runtime_error("expected 'transposed' attribute to have a datatype that fits in a 32-bit signed integer");
45 }
46
47 return ritsuko::hdf5::load_scalar_numeric_attribute<int32_t>(attr) != 0;
48}
49
50}
60inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
61 auto vstring = internal_json::extract_version_for_type(metadata.other, "dense_array");
62 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
63 if (version.major != 1) {
64 throw std::runtime_error("unsupported version '" + vstring + "'");
65 }
66
67 auto handle = ritsuko::hdf5::open_file(path / "array.h5");
68 auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
69 internal::is_transposed(ghandle); // just a check, not used here.
70 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
71
72 auto dspace = dhandle.getSpace();
73 size_t ndims = dspace.getSimpleExtentNdims();
74 if (ndims == 0) {
75 throw std::runtime_error("expected 'data' array to have at least one dimension");
76 }
77 std::vector<hsize_t> extents(ndims);
78 dspace.getSimpleExtentDims(extents.data());
79
80 auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(ghandle, "type");
81 if (type == "integer") {
82 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
83 throw std::runtime_error("expected integer array to have a datatype that fits into a 32-bit signed integer");
84 }
85 } else if (type == "boolean") {
86 if (ritsuko::hdf5::exceeds_integer_limit(dhandle, 32, true)) {
87 throw std::runtime_error("expected boolean array to have a datatype that fits into a 32-bit signed integer");
88 }
89 } else if (type == "number") {
90 if (ritsuko::hdf5::exceeds_float_limit(dhandle, 64)) {
91 throw std::runtime_error("expected number array to have a datatype that fits into a 64-bit float");
92 }
93 } else if (type == "string") {
94 if (!ritsuko::hdf5::is_utf8_string(dhandle)) {
95 throw std::runtime_error("expected string array to have a datatype that can be represented by a UTF-8 encoded string");
96 }
97 ritsuko::hdf5::validate_nd_string_dataset(dhandle, extents, options.hdf5_buffer_size);
98 } else {
99 throw std::runtime_error("unknown array type '" + type + "'");
100 }
101
102 if (dhandle.attrExists("missing-value-placeholder")) {
103 auto attr = dhandle.openAttribute("missing-value-placeholder");
104 ritsuko::hdf5::check_missing_placeholder_attribute(dhandle, attr);
105 }
106
107 if (ghandle.exists("names")) {
108 internal_array::check_dimnames(ghandle, "names", extents, options);
109 }
110}
111
118inline size_t height(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
119 auto handle = ritsuko::hdf5::open_file(path / "array.h5");
120 auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
121
122 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
123 auto dspace = dhandle.getSpace();
124 size_t ndims = dspace.getSimpleExtentNdims();
125 std::vector<hsize_t> extents(ndims);
126 dspace.getSimpleExtentDims(extents.data());
127
128 if (internal::is_transposed(ghandle)) {
129 return extents.back();
130 } else {
131 return extents.front();
132 }
133}
134
141inline std::vector<size_t> dimensions(const std::filesystem::path& path, [[maybe_unused]] const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
142 auto handle = ritsuko::hdf5::open_file(path / "array.h5");
143 auto ghandle = ritsuko::hdf5::open_group(handle, "dense_array");
144
145 auto dhandle = ritsuko::hdf5::open_dataset(ghandle, "data");
146 auto dspace = dhandle.getSpace();
147 size_t ndims = dspace.getSimpleExtentNdims();
148 std::vector<hsize_t> extents(ndims);
149 dspace.getSimpleExtentDims(extents.data());
150
151 if (internal::is_transposed(ghandle)) {
152 return std::vector<size_t>(extents.rbegin(), extents.rend());
153 } else {
154 return std::vector<size_t>(extents.begin(), extents.end());
155 }
156}
157
158}
159
160}
161
162#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition dense_array.hpp:60
size_t height(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition dense_array.hpp:118
std::vector< size_t > dimensions(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition dense_array.hpp:141
takane validation functions.
Definition _derived_from.hpp:15
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
hsize_t hdf5_buffer_size
Definition utils_public.hpp:103
Exported utilities.