34 const std::string type_name =
"sequence_information";
35 const auto& vstring = internal_json::extract_version_for_type(metadata.
other, type_name);
36 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(),
true);
37 if (version.major != 1) {
38 throw std::runtime_error(
"unsupported version string '" + vstring +
"'");
41 auto handle = ritsuko::hdf5::open_file(path /
"info.h5");
42 auto ghandle = ritsuko::hdf5::open_group(handle, type_name.c_str());
46 auto nhandle = ritsuko::hdf5::open_dataset(ghandle,
"name");
47 if (!ritsuko::hdf5::is_utf8_string(nhandle)) {
48 throw std::runtime_error(
"expected 'name' to have a datatype that can be represented by a UTF-8 encoded string");
51 nseq = ritsuko::hdf5::get_1d_length(nhandle.getSpace(),
false);
52 std::unordered_set<std::string> collected;
53 ritsuko::hdf5::Stream1dStringDataset stream(&nhandle, nseq, options.
hdf5_buffer_size);
54 for (
size_t s = 0; s < nseq; ++s, stream.next()) {
55 auto x = stream.steal();
56 if (collected.find(x) != collected.end()) {
57 throw std::runtime_error(
"detected duplicated sequence name '" + x +
"'");
59 collected.insert(std::move(x));
63 const char* missing_attr_name =
"missing-value-placeholder";
66 auto lhandle = ritsuko::hdf5::open_dataset(ghandle,
"length");
67 if (ritsuko::hdf5::exceeds_integer_limit(lhandle, 64,
false)) {
68 throw std::runtime_error(
"expected a datatype for 'length' that fits in a 64-bit unsigned integer");
70 if (ritsuko::hdf5::get_1d_length(lhandle.getSpace(),
false) != nseq) {
71 throw std::runtime_error(
"expected lengths of 'length' and 'name' to be equal");
73 if (lhandle.attrExists(missing_attr_name)) {
74 auto ahandle = lhandle.openAttribute(missing_attr_name);
75 ritsuko::hdf5::check_numeric_missing_placeholder_attribute(lhandle, ahandle);
80 auto chandle = ritsuko::hdf5::open_dataset(ghandle,
"circular");
81 if (ritsuko::hdf5::exceeds_integer_limit(chandle, 32,
true)) {
82 throw std::runtime_error(
"expected a datatype for 'circular' that fits in a 32-bit signed integer");
84 if (ritsuko::hdf5::get_1d_length(chandle.getSpace(),
false) != nseq) {
85 throw std::runtime_error(
"expected lengths of 'length' and 'circular' to be equal");
87 if (chandle.attrExists(missing_attr_name)) {
88 auto ahandle = chandle.openAttribute(missing_attr_name);
89 ritsuko::hdf5::check_numeric_missing_placeholder_attribute(chandle, ahandle);
94 auto gnhandle = ritsuko::hdf5::open_dataset(ghandle,
"genome");
95 if (!ritsuko::hdf5::is_utf8_string(gnhandle)) {
96 throw std::runtime_error(
"expected 'genome' to have a datatype that can be represented by a UTF-8 encoded string");
98 if (ritsuko::hdf5::get_1d_length(gnhandle.getSpace(),
false) != nseq) {
99 throw std::runtime_error(
"expected lengths of 'length' and 'genome' to be equal");
101 if (gnhandle.attrExists(missing_attr_name)) {
102 auto ahandle = gnhandle.openAttribute(missing_attr_name);
103 ritsuko::hdf5::check_string_missing_placeholder_attribute(ahandle);