takane
Validators for ArtifactDB file formats
Loading...
Searching...
No Matches
fastq_file.hpp
Go to the documentation of this file.
1#ifndef TAKANE_FASTQ_FILE_HPP
2#define TAKANE_FASTQ_FILE_HPP
3
4#include "utils_files.hpp"
5#include "ritsuko/ritsuko.hpp"
6
7#include <filesystem>
8#include <stdexcept>
9#include <string>
10
16namespace takane {
17
22namespace fastq_file {
23
32inline void validate(const std::filesystem::path& path, const ObjectMetadata& metadata, [[maybe_unused]] Options& options) {
33 const auto& fqmap = internal_json::extract_typed_object_from_metadata(metadata.other, "fastq_file");
34
35 const std::string& vstring = internal_json::extract_string_from_typed_object(fqmap, "version", "fastq_file");
36 auto version = ritsuko::parse_version_string(vstring.c_str(), vstring.size(), /* skip_patch = */ true);
37 if (version.major != 1) {
38 throw std::runtime_error("unsupported version string '" + vstring + "'");
39 }
40
41 internal_files::check_sequence_type(fqmap, "fastq_file");
42
43 // Checking the quality type and offset.
44 {
45 const std::string& qtype = internal_json::extract_string(fqmap, "quality_type", [&](std::exception& e) -> void {
46 throw std::runtime_error("failed to extract 'fastq_file.quality_type' from the object metadata; " + std::string(e.what()));
47 });
48
49 if (qtype == "phred") {
50 auto oIt = fqmap.find("quality_offset");
51 if (oIt == fqmap.end()) {
52 throw std::runtime_error("expected a 'fastq_file.quality_offset' property");
53 }
54
55 const auto& val = oIt->second;
56 if (val->type() != millijson::NUMBER) {
57 throw std::runtime_error("'fastq_file.quality_offset' property should be a JSON number");
58 }
59
60 double offset = reinterpret_cast<const millijson::Number*>(val.get())->value;
61 if (offset != 33 && offset != 64) {
62 throw std::runtime_error("'fastq_file.quality_offset' property should be either 33 or 64");
63 }
64 } else if (qtype != "solexa") {
65 throw std::runtime_error("unknown value '" + qtype + "' for the 'fastq_file.quality_type' property");
66 }
67 }
68
69 // Check if it's indexed.
70 bool indexed = internal_files::is_indexed(fqmap);
71 auto fpath = path / "file.fastq.";
72 if (indexed) {
73 fpath += "bgz";
74 } else {
75 fpath += "gz";
76 }
77
78 internal_files::check_gzip_signature(fpath);
79 auto reader = internal_other::open_reader<byteme::GzipFileReader>(fpath, 10);
80 byteme::PerByte<> pb(&reader);
81 if (!pb.valid() || pb.get() != '@') {
82 throw std::runtime_error("FASTQ file does not start with '@'");
83 }
84
85 if (indexed) {
86 auto fixpath = path / "file.fastq.fai";
87 if (!std::filesystem::exists(fixpath)) {
88 throw std::runtime_error("missing FASTQ index file");
89 }
90
91 auto ixpath = fpath;
92 ixpath += ".gzi";
93 if (!std::filesystem::exists(ixpath)) {
94 throw std::runtime_error("missing BGZF index file");
95 }
96 }
97
98 if (options.fastq_file_strict_check) {
99 options.fastq_file_strict_check(path, metadata, options, indexed);
100 }
101}
102
103}
104
105}
106
107#endif
void validate(const std::filesystem::path &path, const ObjectMetadata &metadata, Options &options)
Definition fastq_file.hpp:32
takane validation functions.
Definition _derived_from.hpp:15
Object metadata, including the type and other fields.
Definition utils_public.hpp:26
std::unordered_map< std::string, std::shared_ptr< millijson::Base > > other
Definition utils_public.hpp:35
Validation options.
Definition utils_public.hpp:94