ritsuko
Helper utilities for ArtifactDB C++ code
|
Assorted helper functions for HDF5 parsing. More...
Namespaces | |
namespace | vls |
Assorted functions for handling ritsuko's custom VLS arrays. | |
Classes | |
struct | IterateNdDataset |
Iterate through an N-dimensional dataset by block. More... | |
class | Stream1dNumericDataset |
Stream a numeric 1-dimensional HDF5 dataset into memory. More... | |
class | Stream1dStringDataset |
Stream a 1-dimensional HDF5 string dataset into memory. More... | |
Functions | |
template<typename Type_ > | |
const H5::PredType & | as_numeric_datatype () |
bool | exceeds_integer_limit (const H5::IntType &itype, size_t precision, bool is_signed) |
bool | exceeds_integer_limit (const H5::DataSet &handle, size_t precision, bool is_signed) |
bool | exceeds_integer_limit (const H5::Attribute &handle, size_t precision, bool is_signed) |
bool | exceeds_float_limit (const H5::DataSet &handle, size_t precision) |
bool | exceeds_float_limit (const H5::Attribute &handle, size_t precision) |
hsize_t | get_1d_length (const H5::DataSpace &space, bool allow_scalar) |
hsize_t | get_1d_length (const H5::DataSet &handle, bool allow_scalar) |
hsize_t | get_1d_length (const H5::Attribute &handle, bool allow_scalar) |
bool | is_scalar (const H5::DataSpace &space) |
bool | is_scalar (const H5::DataSet &handle) |
bool | is_scalar (const H5::Attribute &handle) |
std::vector< hsize_t > | get_dimensions (const H5::DataSpace &space, bool allow_scalar) |
std::vector< hsize_t > | get_dimensions (const H5::DataSet &handle, bool allow_scalar) |
std::vector< hsize_t > | get_dimensions (const H5::Attribute &handle, bool allow_scalar) |
template<class Handle_ > | |
std::string | get_name (const Handle_ &handle) |
bool | is_utf8_string (const H5::StrType &stype) |
bool | is_utf8_string (const H5::DataSet &handle) |
bool | is_utf8_string (const H5::Attribute &handle) |
std::string | load_scalar_string_attribute (const H5::Attribute &attr) |
std::vector< std::string > | load_1d_string_attribute (const H5::Attribute &attr, hsize_t full_length) |
std::vector< std::string > | load_1d_string_attribute (const H5::Attribute &attr) |
template<typename Type_ > | |
Type_ | load_scalar_numeric_attribute (const H5::Attribute &attr) |
template<typename Type_ > | |
std::vector< Type_ > | load_1d_numeric_attribute (const H5::Attribute &attr, hsize_t full_length) |
template<typename Type_ > | |
std::vector< Type_ > | load_1d_numeric_attribute (const H5::Attribute &attr) |
std::string | load_scalar_string_dataset (const H5::DataSet &handle) |
std::vector< std::string > | load_1d_string_dataset (const H5::DataSet &handle, hsize_t full_length, hsize_t buffer_size) |
std::vector< std::string > | load_1d_string_dataset (const H5::DataSet &handle, hsize_t buffer_size) |
template<typename Type_ > | |
Type_ | load_scalar_numeric_dataset (const H5::DataSet &handle) |
template<typename Type_ > | |
std::vector< Type_ > | load_1d_numeric_dataset (const H5::DataSet &handle, hsize_t full_length, hsize_t buffer_size) |
template<typename Type_ > | |
std::vector< Type_ > | load_1d_numeric_dataset (const H5::DataSet &handle, hsize_t buffer_size) |
template<class H5Object_ > | |
H5::Attribute | open_scalar_attribute (const H5Object_ &handle, const char *name) |
template<class H5Object_ > | |
std::string | open_and_load_scalar_string_attribute (const H5Object_ &handle, const char *name, bool utf8=true) |
void | check_numeric_missing_placeholder_attribute (const H5::DataSet &dset, const H5::Attribute &attr, bool type_class_only=false) |
template<typename Type_ > | |
std::optional< Type_ > | open_and_load_optional_numeric_missing_placeholder (const H5::DataSet &handle, const char *attr_name) |
void | check_string_missing_placeholder_attribute (const H5::Attribute &attr) |
std::optional< std::string > | open_and_load_optional_string_missing_placeholder (const H5::DataSet &handle, const char *attr_name) |
template<class Path_ > | |
H5::H5File | open_file (const Path_ &path) |
H5::Group | open_group (const H5::Group &handle, const char *name) |
H5::DataSet | open_dataset (const H5::Group &handle, const char *name) |
template<class Object_ > | |
H5::Attribute | open_attribute (const Object_ &handle, const char *name) |
hsize_t | pick_1d_block_size (const H5::DSetCreatPropList &cplist, hsize_t full_length, hsize_t buffer_size=10000) |
std::vector< hsize_t > | pick_nd_block_dimensions (const H5::DSetCreatPropList &cplist, const std::vector< hsize_t > &dimensions, hsize_t buffer_size=10000) |
void | validate_scalar_string_dataset (const H5::DataSet &handle) |
void | validate_1d_string_dataset (const H5::DataSet &handle, hsize_t full_length, hsize_t buffer_size) |
void | validate_1d_string_dataset (const H5::DataSet &handle, hsize_t buffer_size) |
void | validate_nd_string_dataset (const H5::DataSet &handle, const std::vector< hsize_t > &dimensions, hsize_t buffer_size) |
void | validate_nd_string_dataset (const H5::DataSet &handle, hsize_t buffer_size) |
void | validate_scalar_string_attribute (const H5::Attribute &attr) |
void | validate_1d_string_attribute (const H5::Attribute &attr, hsize_t full_length) |
void | validate_1d_string_attribute (const H5::Attribute &attr) |
Assorted helper functions for HDF5 parsing.
const H5::PredType & ritsuko::hdf5::as_numeric_datatype | ( | ) |
Choose the HDF5 datatype object corresponding to a particular C++ numeric type.
Type_ | A numeric C++ type. This can be any of the fixed-width integers, float or double . Some of the non-fixed integer types are also supported. |
|
inline |
Check the validity of a missing placeholder attribute on a numeric dataset. An error is raised if the attribute is not a scalar or has a different type (or type class, if type_class_only = true
) to the dataset.
dset | Dataset handle. |
attr | Handle for the attribute containing the missing placeholder, typically attached to dset . |
type_class_only | Whether to only require identical type classes for the placeholder. If false, the types between dset and attr must be identical. |
|
inline |
Check the validity of a missing placeholder attribute on a string dataset. An error is raised if the attribute is not a scalar or has a different type class. For variable length string attributes, this function will also check that the string is not NULL.
attr | Handle for the attribute containing the missing placeholder. |
|
inline |
Overload of exceeds_float_limit()
that accepts a HDF5 attribute handle.
handle | Handle for a HDF5 attribute. |
precision | Number of bits in the limiting float type. |
true
is also returned for non-numeric attributes.
|
inline |
Check if a HDF5 datatype could hold values beyond the range of a limiting (IEEE754-compliant) float type. This is used by validators to ensure that a dataset can be represented in memory by the limiting type.
Note that the limiting float type is assumed to be IEEE754-compliant. If the HDF5 datatype is not also IEEE754-compliant, it will be considered out-of-range regardless of its precision. This is necessary as non-IEEE754 floats could have an arbitrary split of bits between the exponent and significand, such that two float datatypes with the same number of bits could represent a different set of numbers. (Though this seems unlikely in practice, as all CPU-specific predefined float types in later HDF5 versions are already aliases of the IEEE types.)
handle | Handle for a HDF5 dataset. |
precision | Number of bits in the limiting float type. |
true
is also returned for non-numeric datasets.
|
inline |
Overload of exceeds_integer_limit()
that accepts a HDF5 attribute handle.
handle | Handle for a HDF5 attribute. |
precision | Number of bits in the limiting integer type, assuming 2's complement. |
is_signed | Whether the limiting integer type is signed. |
|
inline |
Overload of exceeds_integer_limit()
that accepts a HDF5 dataset handle.
handle | Handle for a HDF5 dataset. |
precision | Number of bits in the limiting integer type, assuming 2's complement. |
is_signed | Whether the limiting integer type is signed. |
|
inline |
Check if a HDF5 datatype could hold values beyond the range of a limiting integer type. This is used by validators to ensure that a dataset can be represented in memory by the limiting type.
itype | HDF5 integer datatype. |
precision | Number of bits in the limiting integer type, assuming 2's complement. |
is_signed | Whether the limiting integer type is signed. |
true
is also returned for non-integer datasets.
|
inline |
Overload of get_1d_length()
that accepts an attribute handle.
handle | Handle to a HDF5 attribute. |
allow_scalar | Whether to allow scalars. |
|
inline |
Overload of get_1d_length()
that accepts a dataset handle.
handle | Handle to a HDF5 dataset. |
allow_scalar | Whether to allow scalars. |
|
inline |
Get the length of a 1-dimensional HDF5 dataset.
space | The data space of the dataset. |
allow_scalar | Whether to allow scalars. |
allow_scalar = true
, zero is returned in the presence of a scalar dataset, otherwise an error is raised.
|
inline |
Overload of get_dimensions()
that accepts an attribute handle.
handle | Handle to a HDF5 attribute. |
allow_scalar | Whether to allow scalars. |
|
inline |
Overload of get_dimensions()
that accepts a dataset handle.
handle | Handle to a HDF5 dataset. |
allow_scalar | Whether to allow scalars. |
|
inline |
Get the dimensions of a dataset.
space | The data space of the dataset. |
allow_scalar | Whether to allow scalars. |
allow_scalar = true
, a zero-length vector is returned in the presence of a scalar dataset, otherwise an error is raised. std::string ritsuko::hdf5::get_name | ( | const Handle_ & | handle | ) |
Get the name of a HDF5 object from its handle, usually for printing informative error messages.
Handle_ | Type of HDF5 handle, usually a Group , DataSet or Attribute . |
handle | Handle to a HDF5 object. |
|
inline |
Overload of is_scalar()
that accepts an attribute handle.
handle | Handle to a HDF5 attribute. |
space
represents a scalar dataset.
|
inline |
Overload of is_scalar()
that accepts a dataset handle.
handle | Handle to a HDF5 dataset. |
space
represents a scalar dataset.
|
inline |
space | The data space of the dataset. |
space
represents a scalar dataset.
|
inline |
Overload of is_utf8_string()
that accepts a HDF5 attribute handle.
handle | Handle for a HDF5 attribute. |
|
inline |
Overload of is_utf8_string()
that accepts a HDF5 dataset handle.
handle | Handle for a HDF5 dataset. |
|
inline |
Check if a HDF5 string datatype could represent strings that are not compatible with the UTF-8 encoding.
Note that this returns true
even if the string datatype uses ASCII encoding, given that ASCII is a subset of UTF-8. As a result, this function is mostly performative as all valid HDF5 strings are encoded in either ASCII or UTF-8; nonetheless, we run these checks to be explicit and to protect against the future addition of more encodings.
stype | HDF5 string datatype. |
stype
uses UTF-8 (or ASCII) encoding. std::vector< Type_ > ritsuko::hdf5::load_1d_numeric_attribute | ( | const H5::Attribute & | attr | ) |
Overload of load_1d_numeric_attribute()
that determines the length of the attribute via get_1d_length()
.
Type_ | Type for holding the data in memory, see as_numeric_datatype() for supported types. |
attr | Handle to a numeric attribute. Callers are responsible for checking that the datatype of attr is appropriate for Type_ , e.g., with exceeds_integer_limit() . |
std::vector< Type_ > ritsuko::hdf5::load_1d_numeric_attribute | ( | const H5::Attribute & | attr, |
hsize_t | full_length ) |
Type_ | Type for holding the data in memory, see as_numeric_datatype() for supported types. |
attr | Handle to a numeric attribute. Callers are responsible for checking that the datatype of attr is appropriate for Type_ , e.g., with exceeds_integer_limit() . |
full_length | Length of the attribute in attr , usually obtained by get_1d_length() . |
std::vector< Type_ > ritsuko::hdf5::load_1d_numeric_dataset | ( | const H5::DataSet & | handle, |
hsize_t | buffer_size ) |
Overload of load_1d_numeric_dataset()
that determines the length via get_1d_length()
.
Type_ | Type of the number in memory. |
handle | Handle to the HDF5 dataset. |
buffer_size | Size of the buffer for holding loaded strings. |
std::vector< Type_ > ritsuko::hdf5::load_1d_numeric_dataset | ( | const H5::DataSet & | handle, |
hsize_t | full_length, | ||
hsize_t | buffer_size ) |
Load a 1-dimensional numeric dataset into a vector.
Type_ | Type of the number in memory. |
handle | Handle to the HDF5 dataset. |
full_length | Length of the dataset as a 1-dimensional vector. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Overload of load_1d_string_attribute()
that determines the length of the attribute via get_1d_length()
.
attr | Handle to a 1-dimensional string attribute. Callers are responsible for checking that attr contains a string datatype class. |
|
inline |
check_ | Whether to check that attr is a 1-dimensional string attribute. |
attr | Handle to a 1-dimensional string attribute. Callers are responsible for checking that attr contains a string datatype class. |
full_length | Length of the attribute in attr , usually obtained by get_1d_length() . |
|
inline |
Overload of load_1d_string_dataset()
that determines the length via get_1d_length()
.
handle | Handle to the 1-dimensional HDF5 dataset. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Load a 1-dimensional string dataset into a vector of strings.
handle | Handle to the 1-dimensional HDF5 dataset. |
full_length | Length of the dataset as a 1-dimensional vector. |
buffer_size | Size of the buffer for holding loaded strings. |
Type_ ritsuko::hdf5::load_scalar_numeric_attribute | ( | const H5::Attribute & | attr | ) |
Type_ | Type for holding the data in memory, see as_numeric_datatype() for supported types. |
attr | Handle to a scalar numeric attribute. Callers are responsible for checking that the datatype of attr is appropriate for Type_ , e.g., with exceeds_integer_limit() . |
Type_ ritsuko::hdf5::load_scalar_numeric_dataset | ( | const H5::DataSet & | handle | ) |
Load a scalar numeric dataset into a single number.
Type_ | Type of the number in memory. |
handle | Handle to the HDF5 scalar dataset. |
|
inline |
attr | Handle to a scalar string attribute. Callers are responsible for checking that attr contains a string datatype class. |
|
inline |
Load a scalar string dataset into a single string.
handle | Handle to the HDF5 scalar dataset. |
std::optional< Type_ > ritsuko::hdf5::open_and_load_optional_numeric_missing_placeholder | ( | const H5::DataSet & | handle, |
const char * | attr_name ) |
Check if a missing placeholder attribute is present, and if so, open it and loads it value. This will also call check_numeric_missing_placeholder_attribute()
to validate the placeholder's properties.
Type_ | Type to use to store the data in memory, see as_numeric_datatype() for supported types. |
handle | Dataset handle. |
attr_name | Name of the attribute containing the missing value placeholder. |
|
inline |
Check if a missing string placeholder attribute is present, and if so, open it and loads it value. This will also call check_string_missing_placeholder_attribute()
to validate the placeholder's properties.
handle | Dataset handle. |
attr_name | Name of the attribute containing the missing value placeholder. |
std::string ritsuko::hdf5::open_and_load_scalar_string_attribute | ( | const H5Object_ & | handle, |
const char * | name, | ||
bool | utf8 = true ) |
Object_ | Type of the HDF5 handle, usually a DataSet or Group . |
handle | HDF5 dataset or group handle. |
name | Name of the attribute. |
utf8 | Whether to check for a UTF-8 encoding. |
name
is not scalar or does not use a string datatype. If utf8 = true
, it was also fail if the datatype does not use a UTF-8 compatible encoding. H5::Attribute ritsuko::hdf5::open_attribute | ( | const Object_ & | handle, |
const char * | name ) |
Object_ | Type of the HDF5 handle, usually a DataSet or Group . |
handle | HDF5 dataset or group handle. |
name | Name of the attribute. |
name
does not refer to an attribute.
|
inline |
handle | Group containing the dataset. |
name | Name of the dataset inside the group. |
name
does not refer to a dataset.
|
inline |
path | Path to a HDF5 file. |
Path_ | Type of the path, either as a C-style array, a std::string , or a filesystem::path . |
path
does not exist.
|
inline |
handle | Parent group (or file). |
name | Name of the group. |
name
does not refer to a dataset.
|
inline |
Object_ | Type of the HDF5 handle, usually a DataSet or Group . |
handle | HDF5 dataset or group handle. |
name | Name of the attribute. |
name
does not refer to a scalar attribute.
|
inline |
Pick a block size to use for 1-dimensional iteration over a dataset. For compressed datasets, this aims to be the smallest multiple of the chunk size that fits into a buffer.
cplist | The creation property list for this dataset. |
full_length | Length of this dataset, e.g., from get_1d_length() . |
buffer_size | Size of the buffer in terms of the number of elements. Smaller values reduce peak memory usage at the cost of more iterations. |
|
inline |
Pick block dimensions to use for iteration over an N-dimensional dataset. For compressed datasets, this aims to be the smallest multiple of the chunk size that fits into a buffer.
cplist | The creation property list for this dataset. |
dimensions | Dimensions of this dataset. |
buffer_size | Size of the buffer in terms of the number of elements. Smaller values reduce peak memory usage at the cost of more iterations. |
|
inline |
Overload for validate_1d_string_attribute()
that automatically determines its length via get_1d_length()
.
attr | Handle to the HDF5 string attribute. |
|
inline |
Check that a 1-dimensional string attribute is valid. Currently, this involves checking that there are no NULL
entries for variable-length string datatypes. For fixed-width string attributes, this function is a no-op.
attr | Handle to the HDF5 string attribute. |
full_length | Length of the attribute as a 1-dimensional vector. |
|
inline |
Overload for validate_1d_string_dataset()
that automatically determines its length via get_1d_length()
.
handle | Handle to the HDF5 string dataset. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Check that a 1-dimensional string dataset is valid. Currently, this involves checking that there are no NULL
entries for variable-length string datatypes. For fixed-width string datasets, this function is a no-op.
handle | Handle to the HDF5 string dataset. |
full_length | Length of the dataset as a 1-dimensional vector. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Check that an N-dimensional string dataset is valid. Currently, this involves checking that there are no NULL
entries for variable-length string datatypes. For fixed-width string datasets, this function is a no-op.
handle | Handle to the HDF5 string dataset. |
dimensions | Dimensions of the dataset. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Overload for validate_nd_string_dataset()
that automatically determines the dimensions.
handle | Handle to the HDF5 string dataset. |
buffer_size | Size of the buffer for holding loaded strings. |
|
inline |
Check that a scalar string attribute is valid. Currently, this involves checking that there are no NULL
entries for variable-length string datatypes. For fixed-width string attributes, this function is a no-op.
attr | Handle to the HDF5 string attribute. |
|
inline |
Check that a scalar string dataset is valid. Currently, this involves checking that there are no NULL
entries for variable-length string datatypes. For fixed-width string datasets, this function is a no-op.
handle | Handle to the HDF5 string dataset. |