1#ifndef CHIHAYA_SPARSE_MATRIX_HPP
2#define CHIHAYA_SPARSE_MATRIX_HPP
5#include "ritsuko/hdf5/hdf5.hpp"
11#include "utils_misc.hpp"
12#include "utils_type.hpp"
13#include "utils_dimnames.hpp"
26namespace sparse_matrix {
33template<
typename Index_>
34void validate_indices(
const H5::DataSet& ihandle,
const std::vector<uint64_t>& indptrs,
size_t primary,
size_t secondary,
bool csc) {
35 ritsuko::hdf5::Stream1dNumericDataset<Index_> stream(&ihandle, indptrs.back(), 1000000);
37 for (
size_t p = 0; p < primary; ++p) {
38 auto start = indptrs[p];
39 auto end = indptrs[p + 1];
41 throw std::runtime_error(
"entries of 'indptr' must be sorted");
46 for (
auto x = start; x < end; ++x, stream.next()) {
47 auto i = stream.get();
49 throw std::runtime_error(
"entries of 'indices' should be non-negative");
51 if (x > start && i <= previous) {
52 throw std::runtime_error(
"'indices' should be strictly increasing within each " + (csc ? std::string(
"column") : std::string(
"row")));
54 if (
static_cast<size_t>(i) >= secondary) {
55 throw std::runtime_error(
"entries of 'indices' should be less than the number of " + (csc ? std::string(
"row") : std::string(
"column")) +
"s");
76 std::vector<uint64_t> dims(2);
80 auto shandle = ritsuko::hdf5::open_dataset(handle,
"shape");
81 auto len = ritsuko::hdf5::get_1d_length(shandle,
false);
83 throw std::runtime_error(
"'shape' should have length 2");
86 if (version.lt(1, 1, 0)) {
87 if (shandle.getTypeClass() != H5T_INTEGER) {
88 throw std::runtime_error(
"'shape' should be integer");
90 std::vector<int> dims_tmp(2);
91 shandle.read(dims_tmp.data(), H5::PredType::NATIVE_INT);
92 if (dims_tmp[0] < 0 || dims_tmp[1] < 0) {
93 throw std::runtime_error(
"'shape' should contain non-negative values");
95 std::copy(dims_tmp.begin(), dims_tmp.end(), dims.begin());
97 if (ritsuko::hdf5::exceeds_integer_limit(shandle, 64,
false)) {
98 throw std::runtime_error(
"'shape' should have a datatype that can fit into a 64-bit unsigned integer");
100 shandle.read(dims.data(), H5::PredType::NATIVE_UINT64);
106 auto dhandle = ritsuko::hdf5::open_dataset(handle,
"data");
109 nnz = ritsuko::hdf5::get_1d_length(dhandle,
false);
111 if (version.lt(1, 1, 0)) {
112 array_type = internal_type::translate_type_0_0(dhandle.getTypeClass());
113 if (internal_type::is_boolean(dhandle)) {
114 array_type = BOOLEAN;
117 auto type = ritsuko::hdf5::open_and_load_scalar_string_attribute(dhandle,
"type");
118 array_type = internal_type::translate_type_1_1(type);
119 internal_type::check_type_1_1(dhandle, array_type);
122 if (array_type != INTEGER && array_type != BOOLEAN && array_type != FLOAT) {
123 throw std::runtime_error(
"dataset should be integer, float or boolean");
126 internal_misc::validate_missing_placeholder(dhandle, version);
127 }
catch (std::exception& e) {
128 throw std::runtime_error(
"failed to validate 'data'; " + std::string(e.what()));
132 if (!options.details_only) {
134 if (!version.lt(1, 1, 0)) {
135 auto bhandle = ritsuko::hdf5::open_dataset(handle,
"by_column");
136 if (!ritsuko::hdf5::is_scalar(bhandle)) {
137 throw std::runtime_error(
"'by_column' should be a scalar");
139 if (ritsuko::hdf5::exceeds_integer_limit(bhandle, 8,
true)) {
140 throw std::runtime_error(
"datatype of 'by_column' should fit into an 8-bit signed integer");
142 csc = (ritsuko::hdf5::load_scalar_numeric_dataset<int8_t>(bhandle) != 0);
146 auto ihandle = ritsuko::hdf5::open_dataset(handle,
"indices");
148 if (version.lt(1, 1, 0)) {
149 if (ihandle.getTypeClass() != H5T_INTEGER) {
150 throw std::runtime_error(
"'indices' should be integer");
153 if (ritsuko::hdf5::exceeds_integer_limit(ihandle, 64,
false)) {
154 throw std::runtime_error(
"datatype of 'indices' should fit into a 64-bit unsigned integer");
158 if (nnz != ritsuko::hdf5::get_1d_length(ihandle,
false)) {
159 throw std::runtime_error(
"'indices' and 'data' should have the same length");
162 auto iphandle = ritsuko::hdf5::open_dataset(handle,
"indptr");
163 if (version.lt(1, 1, 0)) {
164 if (iphandle.getTypeClass() != H5T_INTEGER) {
165 throw std::runtime_error(
"'indptr' should be integer");
168 if (ritsuko::hdf5::exceeds_integer_limit(iphandle, 64,
false)) {
169 throw std::runtime_error(
"datatype of 'indptr' should fit into a 64-bit unsigned integer");
173 auto primary = (csc ? dims[1] : dims[0]);
174 auto secondary = (csc ? dims[0] : dims[1]);
175 if (ritsuko::hdf5::get_1d_length(iphandle,
false) !=
static_cast<size_t>(primary + 1)) {
176 throw std::runtime_error(
"'indptr' should have length equal to the number of " + (csc ? std::string(
"columns") : std::string(
"rows")) +
" plus 1");
178 std::vector<uint64_t> indptrs(primary + 1);
179 iphandle.read(indptrs.data(), H5::PredType::NATIVE_UINT64);
180 if (indptrs[0] != 0) {
181 throw std::runtime_error(
"first entry of 'indptr' should be 0 for a sparse matrix");
183 if (indptrs.back() !=
static_cast<uint64_t
>(nnz)) {
184 throw std::runtime_error(
"last entry of 'indptr' should be equal to the length of 'data'");
187 if (version.lt(1, 1, 0)) {
188 internal::validate_indices<int>(ihandle, indptrs, primary, secondary, csc);
190 internal::validate_indices<uint64_t>(ihandle, indptrs, primary, secondary, csc);
195 if (handle.exists(
"dimnames")) {
196 internal_dimnames::validate(handle, dims, version);
200 return ArrayDetails(array_type, std::vector<size_t>(dims.begin(), dims.end()));
ArrayDetails validate(const H5::Group &handle, const ritsuko::Version &version, Options &options)
Definition: sparse_matrix.hpp:75
Namespace for all chihaya functions.
Definition: binary_arithmetic.hpp:22
ArrayType
Definition: utils_public.hpp:27
Details about an array.
Definition: utils_public.hpp:36
Validation options.
Definition: utils_public.hpp:66
Various public utilities.