1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2010-2013, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks.  If not, see <http://www.gnu.org/licenses/>.
19 //
20 
21 #ifndef __FASTQI_H__
22 #define __FASTQI_H__
23 
24 #include "input.h"
25 
26 class Fastq: public Input {
27 
28 public:
Fastq(const char * path)29     Fastq(const char *path) : Input(path) { };
Fastq(string path)30     Fastq(string path) : Input(path.c_str()) { };
~Fastq()31     ~Fastq() {};
32     Seq *next_seq();
33     int  next_seq(Seq &s);
34 };
35 
next_seq()36 Seq *Fastq::next_seq() {
37     //
38     // Check the contents of the line buffer. When we finish reading a FASTQ record
39     // the buffer will either contain whitespace or the header of the next FASTQ
40     // record.
41     //
42     while (this->line[0] != '@' && this->fh.good() ) {
43         this->fh.getline(this->line, max_len);
44     }
45 
46     if (!this->fh.good()) {
47         return NULL;
48     }
49 
50     //
51     // Check if there is a carraige return in the buffer
52     //
53     uint len = strlen(this->line);
54     if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0';
55 
56     //
57     // Initialize the Seq structure and store the FASTQ ID
58     //
59     Seq *s = new Seq;
60     s->id = new char[strlen(this->line) + 1];
61     strcpy(s->id, this->line + 1);
62 
63     //
64     // Read the sequence from the file
65     //
66     this->fh.getline(this->line, max_len);
67 
68     if (!this->fh.good()) {
69         return NULL;
70     }
71 
72     len = strlen(this->line);
73     if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0';
74 
75     s->seq = new char[len + 1];
76     strcpy(s->seq, this->line);
77 
78     //
79     // Read the repeat of the ID
80     //
81     this->fh.getline(this->line, max_len);
82 
83     if (this->line[0] != '+' || !this->fh.good()) {
84         return NULL;
85     }
86 
87     //
88     // Read the quality score from the file
89     //
90     this->fh.getline(this->line, max_len);
91 
92     if (!this->fh.good() && !this->fh.eof()) {
93         return NULL;
94     }
95 
96     len = strlen(this->line);
97     if (len > 0 && this->line[len - 1] == '\r') this->line[len - 1] = '\0';
98 
99     s->qual = new char[len + 1];
100     strcpy(s->qual, this->line);
101 
102     //
103     // Clear the line buffer so it is set up for the next record. If a '@'
104     // appears in the quality scores read, it will break parsing next time
105     // it is called.
106     //
107     this->line[0] = '\0';
108 
109     return s;
110 }
111 
next_seq(Seq & s)112 int Fastq::next_seq(Seq &s) {
113     //
114     // Check the contents of the line buffer. When we finish reading a FASTQ record
115     // the buffer will either contain whitespace or the header of the next FASTQ
116     // record.
117     //
118     while (this->line[0] != '@' && this->fh.good() ) {
119         this->fh.getline(this->line, max_len);
120     }
121 
122     if (!this->fh.good()) {
123         return 0;
124     }
125 
126     //
127     // Check if there is a carraige return in the buffer
128     //
129     uint len = strlen(this->line);
130     if (this->line[len - 1] == '\r') this->line[len - 1] = '\0';
131 
132     //
133     // Store the FASTQ ID
134     //
135     strcpy(s.id, this->line + 1);
136 
137     //
138     // Read the sequence from the file
139     //
140     this->fh.getline(this->line, max_len);
141 
142     if (!this->fh.good()) {
143         return 0;
144     }
145 
146     len = strlen(this->line);
147     if (this->line[len - 1] == '\r') this->line[len - 1] = '\0';
148 
149     strcpy(s.seq, this->line);
150 
151     //
152     // Read the repeat of the ID
153     //
154     this->fh.getline(this->line, max_len);
155 
156     if (this->line[0] != '+' || !this->fh.good()) {
157         return 0;
158     }
159 
160     //
161     // Read the quality score from the file
162     //
163     this->fh.getline(this->line, max_len);
164 
165     if (!this->fh.good() && !this->fh.eof()) {
166         return 0;
167     }
168 
169     len = strlen(this->line);
170     if (this->line[len - 1] == '\r') this->line[len - 1] = '\0';
171 
172     strcpy(s.qual, this->line);
173 
174     //
175     // Clear the line buffer so it is set up for the next record. If a '@'
176     // appears in the quality scores read, it will break parsing next time
177     // it is called.
178     //
179     this->line[0] = '\0';
180 
181     return 1;
182 }
183 
184 #endif // __FASTQI_H__
185