34 auto vstring = internal_json::extract_version_for_type(metadata.
other,
"sequence_information");
35 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
36 if (version.major != 1) {
37 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
40 auto handle = ritsuko::hdf5::open_file(path /
"info.h5");
41 auto ghandle = ritsuko::hdf5::open_group(handle,
"sequence_information");
45 auto nhandle = ritsuko::hdf5::open_dataset(ghandle,
"name");
46 if (!ritsuko::hdf5::is_utf8_string(nhandle)) {
47 throw std::runtime_error(
"expected 'name' to have a datatype that can be represented by a UTF-8 encoded string");
50 nseq = ritsuko::hdf5::get_1d_length(nhandle.getSpace(),
false);
51 std::unordered_set<std::string> collected;
52 ritsuko::hdf5::Stream1dStringDataset stream(&nhandle, nseq, options.
hdf5_buffer_size);
53 for (
size_t s = 0; s < nseq; ++s, stream.next()) {
54 auto x = stream.steal();
55 if (collected.find(x) != collected.end()) {
56 throw std::runtime_error(
"detected duplicated sequence name '" + x +
"'");
58 collected.insert(std::move(x));
62 const char* missing_attr_name =
"missing-value-placeholder";
65 auto lhandle = ritsuko::hdf5::open_dataset(ghandle,
"length");
66 if (ritsuko::hdf5::exceeds_integer_limit(lhandle, 64,
false)) {
67 throw std::runtime_error(
"expected a datatype for 'length' that fits in a 64-bit unsigned integer");
69 if (ritsuko::hdf5::get_1d_length(lhandle.getSpace(),
false) != nseq) {
70 throw std::runtime_error(
"expected lengths of 'length' and 'name' to be equal");
72 if (lhandle.attrExists(missing_attr_name)) {
73 auto ahandle = lhandle.openAttribute(missing_attr_name);
74 ritsuko::hdf5::check_missing_placeholder_attribute(lhandle, ahandle);
79 auto chandle = ritsuko::hdf5::open_dataset(ghandle,
"circular");
80 if (ritsuko::hdf5::exceeds_integer_limit(chandle, 32,
true)) {
81 throw std::runtime_error(
"expected a datatype for 'circular' that fits in a 32-bit signed integer");
83 if (ritsuko::hdf5::get_1d_length(chandle.getSpace(),
false) != nseq) {
84 throw std::runtime_error(
"expected lengths of 'length' and 'circular' to be equal");
86 if (chandle.attrExists(missing_attr_name)) {
87 auto ahandle = chandle.openAttribute(missing_attr_name);
88 ritsuko::hdf5::check_missing_placeholder_attribute(chandle, ahandle);
93 auto gnhandle = ritsuko::hdf5::open_dataset(ghandle,
"genome");
94 if (!ritsuko::hdf5::is_utf8_string(gnhandle)) {
95 throw std::runtime_error(
"expected 'genome' to have a datatype that can be represented by a UTF-8 encoded string");
97 if (ritsuko::hdf5::get_1d_length(gnhandle.getSpace(),
false) != nseq) {
98 throw std::runtime_error(
"expected lengths of 'length' and 'genome' to be equal");
100 if (gnhandle.attrExists(missing_attr_name)) {
101 auto ahandle = gnhandle.openAttribute(missing_attr_name);
102 ritsuko::hdf5::check_missing_placeholder_attribute(gnhandle, ahandle);