99 const std::string type_name =
"genomic_ranges";
100 const auto& vstring = internal_json::extract_version_for_type(metadata.
other, type_name);
101 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
102 if (version.major != 1) {
103 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
107 auto limits = internal::find_sequence_limits(path /
"sequence_information", options);
108 size_t num_sequences = limits.seqlen.size();
111 auto handle = ritsuko::hdf5::open_file(path /
"ranges.h5");
112 auto ghandle = ritsuko::hdf5::open_group(handle, type_name.c_str());
113 auto id_handle = ritsuko::hdf5::open_dataset(ghandle,
"sequence");
114 auto num_ranges = ritsuko::hdf5::get_1d_length(id_handle,
false);
115 if (ritsuko::hdf5::exceeds_integer_limit(id_handle, 64,
false)) {
116 throw std::runtime_error(
"expected 'sequence' to have a datatype that fits into a 64-bit unsigned integer");
118 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> id_stream(&id_handle, num_ranges, options.
hdf5_buffer_size);
120 auto start_handle = ritsuko::hdf5::open_dataset(ghandle,
"start");
121 if (num_ranges != ritsuko::hdf5::get_1d_length(start_handle,
false)) {
122 throw std::runtime_error(
"'start' and 'sequence' should have the same length");
124 if (ritsuko::hdf5::exceeds_integer_limit(start_handle, 64,
true)) {
125 throw std::runtime_error(
"expected 'start' to have a datatype that fits into a 64-bit signed integer");
127 ritsuko::hdf5::Stream1dNumericDataset<int64_t> start_stream(&start_handle, num_ranges, options.
hdf5_buffer_size);
129 auto width_handle = ritsuko::hdf5::open_dataset(ghandle,
"width");
130 if (num_ranges != ritsuko::hdf5::get_1d_length(width_handle,
false)) {
131 throw std::runtime_error(
"'width' and 'sequence' should have the same length");
133 if (ritsuko::hdf5::exceeds_integer_limit(width_handle, 64,
false)) {
134 throw std::runtime_error(
"expected 'width' to have a datatype that fits into a 64-bit unsigned integer");
136 ritsuko::hdf5::Stream1dNumericDataset<uint64_t> width_stream(&width_handle, num_ranges, options.
hdf5_buffer_size);
138 constexpr uint64_t end_limit = std::numeric_limits<int64_t>::max();
139 for (
size_t i = 0; i < num_ranges; ++i, id_stream.next(), start_stream.next(), width_stream.next()) {
140 auto id = id_stream.get();
141 if (
id >= num_sequences) {
142 throw std::runtime_error(
"'sequence' must be less than the number of sequences (got " + std::to_string(
id) +
")");
145 auto start = start_stream.get();
146 auto width = width_stream.get();
149 if (limits.has_circular[
id] && !limits.circular[
id]) {
151 throw std::runtime_error(
"non-positive start position (" + std::to_string(start) +
") for non-circular sequence");
154 if (limits.has_seqlen[
id]) {
156 auto spos =
static_cast<uint64_t
>(start);
157 auto limit = limits.seqlen[id];
159 throw std::runtime_error(
"start position beyond sequence length (" + std::to_string(start) +
" > " + std::to_string(limit) +
") for non-circular sequence");
163 if (limit - spos + 1 < width) {
164 throw std::runtime_error(
"end position beyond sequence length (" +
165 std::to_string(start) +
" + " + std::to_string(width) +
" > " + std::to_string(limit) +
166 ") for non-circular sequence");
171 bool exceeded =
false;
174 exceeded = (end_limit -
static_cast<uint64_t
>(start) < width);
177 exceeded = (end_limit +
static_cast<uint64_t
>(-start) < width);
180 throw std::runtime_error(
"end position beyond the range of a 64-bit integer (" + std::to_string(start) +
" + " + std::to_string(width) +
")");
185 auto strand_handle = ritsuko::hdf5::open_dataset(ghandle,
"strand");
186 if (num_ranges != ritsuko::hdf5::get_1d_length(strand_handle,
false)) {
187 throw std::runtime_error(
"'strand' and 'sequence' should have the same length");
189 if (ritsuko::hdf5::exceeds_integer_limit(strand_handle, 32,
true)) {
190 throw std::runtime_error(
"expected 'strand' to have a datatype that fits into a 32-bit signed integer");
193 ritsuko::hdf5::Stream1dNumericDataset<int32_t> strand_stream(&strand_handle, num_ranges, options.
hdf5_buffer_size);
194 for (hsize_t i = 0; i < num_ranges; ++i, strand_stream.next()) {
195 auto x = strand_stream.get();
196 if (x < -1 || x > 1) {
197 throw std::runtime_error(
"values of 'strand' should be one of 0, -1, or 1 (got " + std::to_string(x) +
")");
202 internal_other::validate_mcols(path,
"range_annotations", num_ranges, options);
203 internal_other::validate_metadata(path,
"other_annotations", options);
205 internal_string::validate_names(ghandle,
"name", num_ranges, options.
hdf5_buffer_size);