takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
gff_file.hpp
Go to the documentation of this file.
1#ifndef TAKANE_GFF_FILE_HPP
2#define TAKANE_GFF_FILE_HPP
3
4#include "utils_files.hpp"
5#include "ritsuko/ritsuko.hpp"
6
7#include <filesystem>
8#include <stdexcept>
9#include <string>
10
16namespace takane {
17
22namespace gff_file {
23
32inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
33 const std::string type_name = "gff_file"; // use a separate variable to avoid dangling reference warnings from GCC.
34 const auto& gffmap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
35
36 const std::string version_name = "version"; // again, avoid dangling reference warnings.
37 const std::string& vstring = internal_json::extract_string_from_typed_object(gffmap, version_name, type_name);
38 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
39 if (version.major != 1) {
40 throw std::runtime_error("unsupported version string '" + vstring + "'");
41 }
42
43 auto fpath = path / "file.";
44 const std::string format_name = "format"; // again, avoid dangling reference warnings.
45 const std::string& fstring = internal_json::extract_string_from_typed_object(gffmap, format_name, type_name);
46 if (fstring == "GFF2") {
47 fpath += "gff2";
48 } else if (fstring == "GFF3") {
49 fpath += "gff3";
50 } else {
51 throw std::runtime_error("unknown value '" + fstring + "' for 'gff_file.format' property");
52 }
53
54 // Check if it's indexed.
55 bool indexed = internal_files::is_indexed(gffmap);
56 fpath += ".";
57 if (indexed) {
58 fpath += "bgz";
59 } else {
60 fpath += "gz";
61 }
62
63 // Check magic numbers.
64 internal_files::check_gzip_signature(fpath);
65
66 if (fstring == "GFF3") {
67 const std::string expected = "##gff-version 3";
68 const size_t expected_len = expected.size();
69
70 auto reader = internal_other::open_reader<byteme::GzipFileReader>(fpath, expected_len);
71 byteme::PerByte<> pb(&reader);
72 bool okay = pb.valid();
73
74 for (size_t i = 0; i < expected_len; ++i) {
75 if (!okay) {
76 throw std::runtime_error("incomplete GFF3 file signature for '" + fpath.string() + "'");
77 }
78 if (pb.get() != expected[i]) {
79 throw std::runtime_error("incorrect GFF3 file signature for '" + fpath.string() + "'");
80 }
81 okay = pb.advance();
82 }
83 }
84
85 if (indexed) {
86 auto ixpath = fpath;
87 ixpath += ".tbi";
88 internal_files::check_gzip_signature(ixpath);
89 internal_files::check_signature<byteme::GzipFileReader>(ixpath, "TBI\1", 4, "tabix");
90 }
91
92 if (options.gff_file_strict_check) {
93 options.gff_file_strict_check(path, metadata, options, indexed);
94 }
95}
96
97}
98
99}
100
101#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition gff_file.hpp:32
takane validation functions.
Definition _derived_from.hpp:15
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94
std::function< void(const std::filesystem::path &, const ObjectMetadata &, Options &, bool)> gff_file_strict_check
Definition utils_public.hpp:215