btllib
 All Classes Namespaces Functions Variables
seq_reader_multiline_fastq_module.hpp
1 #ifndef BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
2 #define BTLLIB_SEQ_READER_MULTILINE_FASTQ_MODULE_HPP
3 
4 #include "btllib/cstring.hpp"
5 #include "btllib/status.hpp"
6 
7 #include <cstdlib>
8 
9 namespace btllib {
10 
12 class SeqReaderMultilineFastqModule
13 {
14 
15 private:
16  friend class SeqReader;
17 
18  enum class Stage
19  {
20  HEADER,
21  SEQ,
22  TRANSITION,
23  SEP,
24  QUAL
25  };
26 
27  Stage stage = Stage::HEADER;
28  CString tmp;
29 
30  static bool buffer_valid(const char* buffer, size_t size);
31  template<typename ReaderType, typename RecordType>
32  bool read_buffer(ReaderType& reader, RecordType& record);
33  template<typename ReaderType, typename RecordType>
34  bool read_transition(ReaderType& reader, RecordType& record);
35  template<typename ReaderType, typename RecordType>
36  bool read_file(ReaderType& reader, RecordType& record);
37 };
38 
39 template<typename ReaderType, typename RecordType>
40 inline bool
41 SeqReaderMultilineFastqModule::read_buffer(ReaderType& reader,
42  RecordType& record)
43 {
44  record.header.clear();
45  record.seq.clear();
46  record.qual.clear();
47  if (reader.buffer.start < reader.buffer.end) {
48  int c;
49  for (;;) {
50  switch (stage) {
51  case Stage::HEADER: {
52  if (!reader.readline_buffer_append(record.header)) {
53  return false;
54  }
55  stage = Stage::SEQ;
56  }
57  // fall through
58  case Stage::SEQ: {
59  if (!reader.readline_buffer_append(record.seq)) {
60  return false;
61  }
62  rtrim(record.seq);
63  stage = Stage::TRANSITION;
64  }
65  // fall through
66  case Stage::TRANSITION: {
67  c = reader.getc_buffer();
68  if (c == EOF) {
69  return false;
70  }
71  reader.ungetc_buffer(c);
72  if (c == '+') {
73  stage = Stage::SEP;
74  } else {
75  stage = Stage::SEQ;
76  }
77  break;
78  }
79  case Stage::SEP: {
80  if (!reader.readline_buffer_append(tmp)) {
81  return false;
82  }
83  stage = Stage::QUAL;
84  tmp.clear();
85  }
86  // fallthrough
87  case Stage::QUAL: {
88  if (!reader.readline_buffer_append(record.qual)) {
89  return false;
90  }
91  rtrim(record.qual);
92  if (record.qual.size() == record.seq.size()) {
93  stage = Stage::HEADER;
94  return true;
95  }
96  check_error(record.qual.size() > record.seq.size(),
97  "SeqReader: Multiline FASTQ reader: Quality string is "
98  "longer than sequence string.");
99  break;
100  }
101  default: {
102  log_error("SeqReader has entered an invalid state.");
103  std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
104  }
105  }
106  }
107  }
108  return false;
109 }
110 
111 template<typename ReaderType, typename RecordType>
112 inline bool
113 SeqReaderMultilineFastqModule::read_transition(ReaderType& reader,
114  RecordType& record)
115 {
116  if (std::ferror(reader.source) == 0 && std::feof(reader.source) == 0) {
117  const auto p = std::fgetc(reader.source);
118  if (p != EOF) {
119  const auto ret = std::ungetc(p, reader.source);
120  check_error(ret == EOF, "SeqReaderMultilineFastqModule: ungetc failed.");
121  int c;
122  for (;;) {
123  switch (stage) {
124  case Stage::HEADER: {
125  reader.readline_file_append(record.header, reader.source);
126  stage = Stage::SEQ;
127  }
128  // fall through
129  case Stage::SEQ: {
130  reader.readline_file_append(record.seq, reader.source);
131  rtrim(record.seq);
132  stage = Stage::TRANSITION;
133  }
134  // fall through
135  case Stage::TRANSITION: {
136  c = std::fgetc(reader.source);
137  if (c == EOF) {
138  return false;
139  }
140  const auto ret = std::ungetc(c, reader.source);
141  check_error(ret == EOF,
142  "SeqReaderMultilineFastqModule: ungetc failed.");
143  if (c == '+') {
144  stage = Stage::SEP;
145  } else {
146  stage = Stage::SEQ;
147  }
148  break;
149  }
150  case Stage::SEP: {
151  reader.readline_file_append(tmp, reader.source);
152  stage = Stage::QUAL;
153  tmp.clear();
154  }
155  // fallthrough
156  case Stage::QUAL: {
157  reader.readline_file_append(record.qual, reader.source);
158  rtrim(record.qual);
159  if (record.qual.size() == record.seq.size()) {
160  stage = Stage::HEADER;
161  return true;
162  }
163  check_error(record.qual.size() > record.seq.size(),
164  "SeqReader: Multiline FASTQ reader: Quality string is "
165  "longer than sequence string.");
166  break;
167  }
168  default: {
169  log_error("SeqReader has entered an invalid state.");
170  std::exit(EXIT_FAILURE); // NOLINT(concurrency-mt-unsafe)
171  }
172  }
173  }
174  }
175  }
176  return false;
177 }
178 
179 template<typename ReaderType, typename RecordType>
180 inline bool
181 SeqReaderMultilineFastqModule::read_file(ReaderType& reader, RecordType& record)
182 {
183  if (!reader.file_at_end(reader.source)) {
184  reader.readline_file(record.header, reader.source);
185  int c;
186  reader.readline_file(record.seq, reader.source);
187  rtrim(record.seq);
188  for (;;) {
189  c = std::fgetc(reader.source);
190  check_error(c == EOF,
191  "SeqReader: Multiline FASTQ reader: Unexpected end.");
192  const auto ret = std::ungetc(c, reader.source);
193  check_error(ret == EOF, "SeqReaderMultilineFastqModule: ungetc failed.");
194  if (c == '+') {
195  reader.readline_file(tmp, reader.source);
196  reader.readline_file(record.qual, reader.source);
197  rtrim(record.qual);
198  size_t prevlen;
199  while (record.qual.size() < record.seq.size()) {
200  prevlen = record.qual.size();
201  reader.readline_file_append(record.qual, reader.source);
202  check_error(prevlen == record.qual.size(),
203  "SeqReader: Multiline FASTQ reader: Failed to read the "
204  "quality string.");
205  rtrim(record.qual);
206  }
207  check_error(record.qual.size() > record.seq.size(),
208  "SeqReader: Multiline FASTQ reader: Quality string is "
209  "longer than sequence string.");
210  return true;
211  }
212  reader.readline_file_append(record.seq, reader.source);
213  rtrim(record.seq);
214  }
215  }
216  return false;
217 }
219 
220 } // namespace btllib
221 
222 #endif
void rtrim(std::string &s)
void check_error(bool condition, const std::string &msg)
void log_error(const std::string &msg)