96 const auto& vstring = internal_json::extract_version_for_type(metadata.
other,
"genomic_ranges");
97 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
98 if (version.major != 1) {
99 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
103 auto limits = internal::find_sequence_limits(path /
"sequence_information", options);
104 size_t num_sequences = limits.seqlen.size();
107 auto handle = ritsuko::hdf5::open_file(path /
"ranges.h5");
108 auto ghandle = ritsuko::hdf5::open_group(handle,
"genomic_ranges");
109 auto id_handle = ritsuko::hdf5::open_dataset(ghandle,
"sequence");
110 auto num_ranges = ritsuko::hdf5::get_1d_length(id_handle,
false);
111 if (ritsuko::hdf5::exceeds_integer_limit(id_handle, 64,
false)) {
112 throw std::runtime_error(
"expected 'sequence' to have a datatype that fits into a 64-bit unsigned integer");
114 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> id_stream(&id_handle, num_ranges, options.
hdf5_buffer_size);
116 auto start_handle = ritsuko::hdf5::open_dataset(ghandle,
"start");
117 if (num_ranges != ritsuko::hdf5::get_1d_length(start_handle,
false)) {
118 throw std::runtime_error(
"'start' and 'sequence' should have the same length");
120 if (ritsuko::hdf5::exceeds_integer_limit(start_handle, 64,
true)) {
121 throw std::runtime_error(
"expected 'start' to have a datatype that fits into a 64-bit signed integer");
123 ritsuko::hdf5::Stream1dNumericDataset<int64_t> start_stream(&start_handle, num_ranges, options.
hdf5_buffer_size);
125 auto width_handle = ritsuko::hdf5::open_dataset(ghandle,
"width");
126 if (num_ranges != ritsuko::hdf5::get_1d_length(width_handle,
false)) {
127 throw std::runtime_error(
"'width' and 'sequence' should have the same length");
129 if (ritsuko::hdf5::exceeds_integer_limit(width_handle, 64,
false)) {
130 throw std::runtime_error(
"expected 'width' to have a datatype that fits into a 64-bit unsigned integer");
132 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> width_stream(&width_handle, num_ranges, options.
hdf5_buffer_size);
134 constexpr uint64_t end_limit = std::numeric_limits<int64_t>::max();
135 for (
size_t i = 0; i < num_ranges; ++i, id_stream.next(), start_stream.next(), width_stream.next()) {
136 auto id = id_stream.get();
137 if (
id >= num_sequences) {
138 throw std::runtime_error(
"'sequence' must be less than the number of sequences (got " + std::to_string(
id) +
")");
141 auto start = start_stream.get();
142 auto width = width_stream.get();
145 if (limits.has_circular[
id] && !limits.circular[
id]) {
147 throw std::runtime_error(
"non-positive start position (" + std::to_string(start) +
") for non-circular sequence");
150 if (limits.has_seqlen[
id]) {
152 auto spos =
static_cast<uint64_t
>(start);
153 auto limit = limits.seqlen[id];
155 throw std::runtime_error(
"start position beyond sequence length (" + std::to_string(start) +
" > " + std::to_string(limit) +
") for non-circular sequence");
159 if (limit - spos + 1 < width) {
160 throw std::runtime_error(
"end position beyond sequence length (" +
161 std::to_string(start) +
" + " + std::to_string(width) +
" > " + std::to_string(limit) +
162 ") for non-circular sequence");
167 bool exceeded =
false;
170 exceeded = (end_limit -
static_cast<uint64_t
>(start) < width);
173 exceeded = (end_limit +
static_cast<uint64_t
>(-start) < width);
176 throw std::runtime_error(
"end position beyond the range of a 64-bit integer (" + std::to_string(start) +
" + " + std::to_string(width) +
")");
181 auto strand_handle = ritsuko::hdf5::open_dataset(ghandle,
"strand");
182 if (num_ranges != ritsuko::hdf5::get_1d_length(strand_handle,
false)) {
183 throw std::runtime_error(
"'strand' and 'sequence' should have the same length");
185 if (ritsuko::hdf5::exceeds_integer_limit(strand_handle, 32,
true)) {
186 throw std::runtime_error(
"expected 'strand' to have a datatype that fits into a 32-bit signed integer");
189 ritsuko::hdf5::Stream1dNumericDataset<int32_t> strand_stream(&strand_handle, num_ranges, options.
hdf5_buffer_size);
190 for (hsize_t i = 0; i < num_ranges; ++i, strand_stream.next()) {
191 auto x = strand_stream.get();
192 if (x < -1 || x > 1) {
193 throw std::runtime_error(
"values of 'strand' should be one of 0, -1, or 1 (got " + std::to_string(x) +
")");
198 internal_other::validate_mcols(path,
"range_annotations", num_ranges, options);
199 internal_other::validate_metadata(path,
"other_annotations", options);
201 internal_string::validate_names(ghandle,
"name", num_ranges, options.
hdf5_buffer_size);