1 // -*-mode:c++; c-style:k&r; c-basic-offset:4;-*-
2 //
3 // Copyright 2013, Julian Catchen <jcatchen@uoregon.edu>
4 //
5 // This file is part of Stacks.
6 //
7 // Stacks is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // Stacks is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with Stacks. If not, see <http://www.gnu.org/licenses/>.
19 //
20
21 #ifndef __GZFASTQ_H__
22 #define __GZFASTQ_H__
23
24 #ifdef HAVE_LIBZ
25
26 #include <cerrno>
27 #include <zlib.h>
28 #include "input.h"
29
30 class GzFastq: public Input {
31 gzFile gz_fh;
32
33 public:
GzFastq(string path)34 GzFastq(string path) : Input() {
35 this->gz_fh = gzopen(path.c_str(), "rb");
36 if (!this->gz_fh) {
37 cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n";
38 exit(EXIT_FAILURE);
39 }
40 #if ZLIB_VERNUM >= 0x1240
41 gzbuffer(this->gz_fh, libz_buffer_size);
42 #endif
43 };
GzFastq(const char * path)44 GzFastq(const char *path) : Input() {
45 this->gz_fh = gzopen(path, "rb");
46 if (!this->gz_fh) {
47 cerr << "Failed to open gzipped file '" << path << "': " << strerror(errno) << ".\n";
48 exit(EXIT_FAILURE);
49 }
50 #if ZLIB_VERNUM >= 0x1240
51 gzbuffer(this->gz_fh, libz_buffer_size);
52 #endif
53 };
~GzFastq()54 ~GzFastq() {
55 gzclose(this->gz_fh);
56 };
57 Seq *next_seq();
58 int next_seq(Seq &s);
59 };
60
next_seq()61 Seq *GzFastq::next_seq() {
62 char *res = NULL;
63
64 //
65 // Check the contents of the line buffer. When we finish reading a FASTQ record
66 // the buffer will either contain whitespace or the header of the next FASTQ
67 // record.
68 //
69 this->line[0] = '\0';
70 do {
71 res = gzgets(this->gz_fh, this->line, max_len);
72 } while (this->line[0] != '@' && res != NULL);
73
74 if (res == NULL) {
75 return NULL;
76 }
77
78 //
79 // Check if there is a carraige return in the buffer
80 //
81 uint len = strlen(this->line);
82 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
83 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
84
85 //
86 // Initialize the Seq structure and store the FASTQ ID
87 //
88 Seq *s = new Seq;
89 s->id = new char[strlen(this->line) + 1];
90 strcpy(s->id, this->line + 1);
91
92 //
93 // Read the sequence from the file
94 //
95 gzgets(this->gz_fh, this->line, max_len);
96
97 if (gzeof(this->gz_fh)) {
98 return NULL;
99 }
100
101 len = strlen(this->line);
102 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
103 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
104
105 s->seq = new char[len + 1];
106 strcpy(s->seq, this->line);
107
108 //
109 // Read the repeat of the ID
110 //
111 this->line[0] = '\0';
112 res = gzgets(this->gz_fh, this->line, max_len);
113
114 if (this->line[0] != '+' || res == NULL) {
115 return NULL;
116 }
117
118 //
119 // Read the quality score from the file
120 //
121 this->line[0] = '\0';
122 res = gzgets(this->gz_fh, this->line, max_len);
123
124 if (res == NULL && strlen(this->line) == 0) {
125 return NULL;
126 }
127
128 len = strlen(this->line);
129 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
130 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
131
132 s->qual = new char[len + 1];
133 strcpy(s->qual, this->line);
134
135 //
136 // Clear the line buffer so it is set up for the next record. If a '@'
137 // appears in the quality scores read, it will break parsing next time
138 // it is called.
139 //
140 this->line[0] = '\0';
141
142 return s;
143 }
144
next_seq(Seq & s)145 int GzFastq::next_seq(Seq &s) {
146 char *res = NULL;
147
148 //
149 // Check the contents of the line buffer. When we finish reading a FASTQ record
150 // the buffer will either contain whitespace or the header of the next FASTQ
151 // record.
152 //
153 this->line[0] = '\0';
154 do {
155 res = gzgets(this->gz_fh, this->line, max_len);
156 } while (this->line[0] != '@' && res != NULL);
157
158 if (res == NULL) {
159 return 0;
160 }
161
162 //
163 // Check if there is a carraige return in the buffer
164 //
165 uint len = strlen(this->line);
166 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
167 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
168
169 //
170 // Store the FASTQ ID
171 //
172 strcpy(s.id, this->line + 1);
173
174 //
175 // Read the sequence from the file
176 //
177 this->line[0] = '\0';
178 res = gzgets(this->gz_fh, this->line, max_len);
179
180 if (res == NULL) {
181 return 0;
182 }
183
184 len = strlen(this->line);
185 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
186 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
187
188 strcpy(s.seq, this->line);
189
190 //
191 // Read the repeat of the ID
192 //
193 this->line[0] = '\0';
194 res = gzgets(this->gz_fh, this->line, max_len);
195
196 if (this->line[0] != '+' || res == NULL) {
197 return 0;
198 }
199
200 //
201 // Read the quality score from the file
202 //
203 this->line[0] = '\0';
204 res = gzgets(this->gz_fh, this->line, max_len);
205
206 if (res == NULL && strlen(this->line) == 0) {
207 return 0;
208 }
209
210 len = strlen(this->line);
211 if (len > 0 && this->line[len - 1] == '\n') this->line[len - 1] = '\0';
212 if (len > 0 && this->line[len - 2] == '\r') this->line[len - 2] = '\0';
213
214 strcpy(s.qual, this->line);
215
216 //
217 // Clear the line buffer so it is set up for the next record. If a '@'
218 // appears in the quality scores read, it will break parsing next time
219 // it is called.
220 //
221 this->line[0] = '\0';
222
223 return 1;
224 }
225
226 #else // If HAVE_LIBZ is undefined and zlib library is not present.
227
228 #include "input.h"
229
230 class GzFastq: public Input {
231 public:
GzFastq(const char * path)232 GzFastq(const char *path) : Input() { cerr << "Gzip support was not enabled when Stacks was compiled.\n"; };
GzFastq(string path)233 GzFastq(string path) : Input() { cerr << "Gzip support was not enabled when Stacks was compiled.\n"; };
~GzFastq()234 ~GzFastq() {};
next_seq()235 Seq *next_seq() { return NULL; };
next_seq(Seq &)236 int next_seq(Seq &) { return 0; };
237 };
238
239 #endif // HAVE_LIBZ
240
241 #endif // __GZFASTQ_H__
242