ritsuko
Helper utilities for ArtifactDB C++ code
Loading...
Searching...
No Matches
Stream1dStringDataset.hpp
Go to the documentation of this file.
1#ifndef RITSUKO_HDF5_STREAM_1D_STRING_DATASET_HPP
2#define RITSUKO_HDF5_STREAM_1D_STRING_DATASET_HPP
3
4#include "H5Cpp.h"
5
6#include <vector>
7#include <string>
8#include <stdexcept>
9
11#include "get_1d_length.hpp"
12#include "get_name.hpp"
14#include "_strings.hpp"
15
21namespace ritsuko {
22
23namespace hdf5 {
24
32public:
39 Stream1dStringDataset(const H5::DataSet* ptr, hsize_t length, hsize_t buffer_size) :
40 ptr(ptr),
41 full_length(length),
42 block_size(pick_1d_block_size(ptr->getCreatePlist(), full_length, buffer_size)),
43 mspace(1, &block_size),
44 dspace(1, &full_length),
45 dtype(ptr->getDataType()),
46 is_variable(dtype.isVariableStr())
47 {
48 if (is_variable) {
49 var_buffer.resize(block_size);
50 } else {
51 fixed_length = dtype.getSize();
52 fix_buffer.resize(fixed_length * block_size);
53 }
54 final_buffer.resize(block_size);
55 }
56
63 Stream1dStringDataset(const H5::DataSet* ptr, hsize_t buffer_size) :
64 Stream1dStringDataset(ptr, get_1d_length(ptr->getSpace(), false), buffer_size)
65 {}
66
67public:
71 std::string get() {
72 while (consumed >= available) {
73 consumed -= available;
74 load();
75 }
76 return final_buffer[consumed];
77 }
78
84 std::string steal() {
85 while (consumed >= available) {
86 consumed -= available;
87 load();
88 }
89 return std::move(final_buffer[consumed]);
90 }
91
97 void next(size_t jump = 1) {
98 consumed += jump;
99 }
100
104 hsize_t length() const {
105 return full_length;
106 }
107
111 hsize_t position() const {
112 return consumed + last_loaded;
113 }
114
115private:
116 const H5::DataSet* ptr;
117 hsize_t full_length, block_size;
118 H5::DataSpace mspace;
119 H5::DataSpace dspace;
120
121 H5::DataType dtype;
122 bool is_variable;
123 std::vector<char*> var_buffer;
124 size_t fixed_length = 0;
125 std::vector<char> fix_buffer;
126 std::vector<std::string> final_buffer;
127
128 hsize_t last_loaded = 0;
129 hsize_t consumed = 0;
130 hsize_t available = 0;
131
132 void load() {
133 if (last_loaded >= full_length) {
134 throw std::runtime_error("requesting data beyond the end of the dataset at '" + get_name(*ptr) + "'");
135 }
136 available = std::min(full_length - last_loaded, block_size);
137 constexpr hsize_t zero = 0;
138 mspace.selectHyperslab(H5S_SELECT_SET, &available, &zero);
139 dspace.selectHyperslab(H5S_SELECT_SET, &available, &last_loaded);
140
141 if (is_variable) {
142 ptr->read(var_buffer.data(), dtype, mspace, dspace);
143 [[maybe_unused]] VariableStringCleaner deletor(dtype.getId(), mspace.getId(), var_buffer.data());
144 for (hsize_t i = 0; i < available; ++i) {
145 if (var_buffer[i] == NULL) {
146 throw std::runtime_error("detected a NULL pointer for a variable length string in '" + get_name(*ptr) + "'");
147 }
148 auto& curstr = final_buffer[i];
149 curstr.clear();
150 curstr.insert(0, var_buffer[i]);
151 }
152
153 } else {
154 auto bptr = fix_buffer.data();
155 ptr->read(bptr, dtype, mspace, dspace);
156 for (size_t i = 0; i < available; ++i, bptr += fixed_length) {
157 auto& curstr = final_buffer[i];
158 curstr.clear();
159 curstr.insert(curstr.end(), bptr, bptr + find_string_length(bptr, fixed_length));
160 }
161 }
162
163 last_loaded += available;
164 }
165};
166
167}
168
169}
170
171#endif
Choose a HDF5 datatype.
Stream a 1-dimensional HDF5 string dataset into memory.
Definition Stream1dStringDataset.hpp:31
Stream1dStringDataset(const H5::DataSet *ptr, hsize_t buffer_size)
Definition Stream1dStringDataset.hpp:63
hsize_t position() const
Definition Stream1dStringDataset.hpp:111
void next(size_t jump=1)
Definition Stream1dStringDataset.hpp:97
Stream1dStringDataset(const H5::DataSet *ptr, hsize_t length, hsize_t buffer_size)
Definition Stream1dStringDataset.hpp:39
std::string steal()
Definition Stream1dStringDataset.hpp:84
hsize_t length() const
Definition Stream1dStringDataset.hpp:104
std::string get()
Definition Stream1dStringDataset.hpp:71
Get the length of a 1-dimensional HDF5 dataset.
Get the name of a HDF5 object.
std::string get_name(const Handle_ &handle)
Definition get_name.hpp:24
hsize_t pick_1d_block_size(const H5::DSetCreatPropList &cplist, hsize_t full_length, hsize_t buffer_size=10000)
Definition pick_1d_block_size.hpp:26
hsize_t get_1d_length(const H5::DataSpace &space, bool allow_scalar)
Definition get_1d_length.hpp:25
Assorted helper functions for parsing and validation.
Definition choose_missing_placeholder.hpp:15
Pick a block size for a 1-dimensional HDF5 dataset.