takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
fastq_file.hpp
Go to the documentation of this file.
1#ifndef TAKANE_FASTQ_FILE_HPP
2#define TAKANE_FASTQ_FILE_HPP
3
4#include "utils_files.hpp"
5#include "ritsuko/ritsuko.hpp"
6#include "byteme/byteme.hpp"
7
8#include <filesystem>
9#include <stdexcept>
10#include <string>
11
17namespace takane {
18
23namespace fastq_file {
24
33inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, Options& options) {
34 const std::string type_name = "fastq_file"; // use a separate variable to avoid dangling reference warnings from GCC.
35 const auto& fqmap = internal_json::extract_typed_object_from_metadata(metadata.other, type_name);
36
37 const std::string version_name = "version"; // again, avoid dangling reference warnings.
38 const std::string& vstring = internal_json::extract_string_from_typed_object(fqmap, version_name, type_name);
39 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
40 if (version.major != 1) {
41 throw std::runtime_error("unsupported version string '" + vstring + "'");
42 }
43
44 internal_files::check_sequence_type(fqmap, type_name.c_str());
45
46 // Checking the quality type and offset.
47 {
48 const std::string qtype_name = "quality_type"; // again, avoid dangling reference warnings.
49 const std::string& qtype = internal_json::extract_string(fqmap, qtype_name, [&](std::exception& e) -> void {
50 throw std::runtime_error("failed to extract 'fastq_file." + qtype_name + "' from the object metadata; " + std::string(e.what()));
51 });
52
53 if (qtype == "phred") {
54 auto oIt = fqmap.find("quality_offset");
55 if (oIt == fqmap.end()) {
56 throw std::runtime_error("expected a 'fastq_file.quality_offset' property");
57 }
58
59 const auto& val = oIt->second;
60 if (val->type() != millijson::NUMBER) {
61 throw std::runtime_error("'fastq_file.quality_offset' property should be a JSON number");
62 }
63
64 double offset = reinterpret_cast<const millijson::Number*>(val.get())->value();
65 if (offset != 33 && offset != 64) {
66 throw std::runtime_error("'fastq_file.quality_offset' property should be either 33 or 64");
67 }
68 } else if (qtype != "solexa") {
69 throw std::runtime_error("unknown value '" + qtype + "' for the 'fastq_file." + qtype_name + "' property");
70 }
71 }
72
73 // Check if it's indexed.
74 bool indexed = internal_files::is_indexed(fqmap);
75 auto fpath = path / "file.fastq.";
76 if (indexed) {
77 fpath += "bgz";
78 } else {
79 fpath += "gz";
80 }
81
82 internal_files::check_gzip_signature(fpath);
83 {
84 auto reader = internal_other::open_reader<byteme::GzipFileReader>(fpath, byteme::GzipFileReaderOptions());
85 char first_val;
86 if (reader->read(reinterpret_cast<unsigned char*>(&first_val), 1) == 0 || first_val != '@') {
87 throw std::runtime_error("FASTQ file does not start with '@'");
88 }
89 }
90
91 if (indexed) {
92 auto fixpath = path / "file.fastq.fai";
93 if (!std::filesystem::exists(fixpath)) {
94 throw std::runtime_error("missing FASTQ index file");
95 }
96
97 auto ixpath = fpath;
98 ixpath += ".gzi";
99 if (!std::filesystem::exists(ixpath)) {
100 throw std::runtime_error("missing BGZF index file");
101 }
102 }
103
104 if (options.fastq_file_strict_check) {
105 options.fastq_file_strict_check(path, metadata, options, indexed);
106 }
107}
108
109}
110
111}
112
113#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition fastq_file.hpp:33
takane validation functions.
Definition _derived_from.hpp:15
Object metadata, including the type and other fields.
Definition utils_public.hpp:25
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:34
Validation options.
Definition utils_public.hpp:93
std::function< void(const std::filesystem::path &, const ObjectMetadata &, Options &, bool)> fastq_file_strict_check
Definition utils_public.hpp:206