1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef TESTS_SEQ_IO_TEST_EASY_SEQ_IO_H_
36 #define TESTS_SEQ_IO_TEST_EASY_SEQ_IO_H_
37 
38 #include <seqan/basic.h>
39 #include <seqan/sequence.h>
40 #include <seqan/seq_io.h>
41 
42 // ---------------------------------------------------------------------------
43 // Test recognition of supported file types.
44 // ---------------------------------------------------------------------------
45 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_type_gz_fasta)46 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_type_gz_fasta)
47 {
48 #if SEQAN_HAS_ZLIB
49     // Build path to file.
50     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fa.gz");
51 
52     // Create SequenceStream object.
53     SeqFileIn seqIO(toCString(filePath));
54 
55     // Check that the file type and format are set correctly.
56     SEQAN_ASSERT(isEqual(format(seqIO), Fasta()));
57 #endif  // #if SEQAN_HAS_ZLIB
58 }
59 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_type_bz2_fasta)60 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_type_bz2_fasta)
61 {
62 #if SEQAN_HAS_BZIP2
63     // Build path to file.
64     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fa.bz2");
65 
66     // Create SequenceStream object.
67     SeqFileIn seqIO(toCString(filePath));
68 
69     // Check that the file type and format are set correctly.
70     SEQAN_ASSERT(isEqual(format(seqIO), Fasta()));
71 #endif  // #if SEQAN_HAS_BZIP2
72 }
73 
74 // ---------------------------------------------------------------------------
75 // Test recognition of supported file formats.
76 // ---------------------------------------------------------------------------
77 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_format_text_fasta)78 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_format_text_fasta)
79 {
80     // Build path to file.
81     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fa");
82 
83     // Create SequenceStream object.
84     SeqFileIn seqIO(toCString(filePath));
85 
86     // Check that the file type and format are set correctly.
87     SEQAN_ASSERT(isEqual(format(seqIO), Fasta()));
88 }
89 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_format_text_fastq)90 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_recognize_file_format_text_fastq)
91 {
92     // Build path to file.
93     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fq");
94 
95     // Create SequenceStream object.
96     SeqFileIn seqIO(toCString(filePath));
97 
98     // Check that the file type and format are set correctly.
99     SEQAN_ASSERT(isEqual(format(seqIO), Fastq()));
100 }
101 
102 // ---------------------------------------------------------------------------
103 // Test reading with different interfaces.
104 // ---------------------------------------------------------------------------
105 
106 template <typename TId, typename TSeq>
testSeqIOSequenceFileReadRecordTextFasta()107 void testSeqIOSequenceFileReadRecordTextFasta()
108 {
109     // Build path to file.
110     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fa");
111 
112     // Create SequenceStream object.
113     SeqFileIn seqIO(toCString(filePath));
114 
115     // Check that the file type and format are set correctly.
116     TId id;
117     TSeq seq;
118 
119     readRecord(id, seq, seqIO);
120     SEQAN_ASSERT_EQ(id, "seq1");
121     SEQAN_ASSERT_EQ(seq, "CGATCGATAAT");
122 
123     readRecord(id, seq, seqIO);
124     SEQAN_ASSERT_EQ(id, "seq2");
125     SEQAN_ASSERT_EQ(seq, "CCTCTCTCTCCCT");
126 
127     readRecord(id, seq, seqIO);
128     SEQAN_ASSERT_EQ(id, "seq3");
129     SEQAN_ASSERT_EQ(seq, "CCCCCCCC");
130 
131     SEQAN_ASSERT(atEnd(seqIO));
132 }
133 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_read_record_text_fasta)134 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_read_record_text_fasta)
135 {
136     testSeqIOSequenceFileReadRecordTextFasta<seqan::CharString, seqan::Dna5String>();
137     testSeqIOSequenceFileReadRecordTextFasta<std::string, std::string>();
138     testSeqIOSequenceFileReadRecordTextFasta<std::string, seqan::Dna5String>();
139     testSeqIOSequenceFileReadRecordTextFasta<seqan::CharString, std::string>();
140 }
141 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_read_all_text_fasta)142 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_read_all_text_fasta)
143 {
144     // Build path to file.
145     seqan::CharString filePath = getAbsolutePath("/tests/seq_io/test_dna.fa");
146 
147     // Create SequenceStream object.
148     SeqFileIn seqIO(toCString(filePath));
149 
150     // Check that the file type and format are set correctly.
151     seqan::StringSet<seqan::CharString> ids;
152     seqan::StringSet<seqan::Dna5String> seqs;
153 
154     readRecords(ids, seqs, seqIO);
155     SEQAN_ASSERT_EQ(length(seqs), 3u);
156     SEQAN_ASSERT_EQ(ids[0], "seq1");
157     SEQAN_ASSERT_EQ(seqs[0], "CGATCGATAAT");
158     SEQAN_ASSERT_EQ(ids[1], "seq2");
159     SEQAN_ASSERT_EQ(seqs[1], "CCTCTCTCTCCCT");
160     SEQAN_ASSERT_EQ(ids[2], "seq3");
161     SEQAN_ASSERT_EQ(seqs[2], "CCCCCCCC");
162 
163     SEQAN_ASSERT(atEnd(seqIO));
164 }
165 
166 // ---------------------------------------------------------------------------
167 // Test writing with different interfaces.
168 // ---------------------------------------------------------------------------
169 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fasta)170 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fasta)
171 {
172     // Build path to file.
173     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
174     append(filePath, ".fa");
175 
176     // Create SequenceStream object.
177     SeqFileOut seqIO(toCString(filePath));
178 
179     // Check that the file type and format are set correctly.
180     seqan::StringSet<seqan::CharString> ids;
181     appendValue(ids, "seq1");
182     appendValue(ids, "seq2");
183     appendValue(ids, "seq3");
184     seqan::StringSet<seqan::Dna5String> seqs;
185     appendValue(seqs, "CGATCGATAAT");
186     appendValue(seqs, "CCTCTCTCTCCCT");
187     appendValue(seqs, "CCCCCCCC");
188 
189     writeRecords(seqIO, ids, seqs);
190 
191     close(seqIO);  // Make sure we can read this later.
192 
193     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fa");
194     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
195 }
196 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fasta)197 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fasta)
198 {
199     // Build path to file.
200     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
201     append(filePath, ".fa");
202 
203     // Create SequenceStream object.
204     SeqFileOut seqIO(toCString(filePath));
205 
206     // Check that the file type and format are set correctly.
207     seqan::StringSet<seqan::CharString> ids;
208     appendValue(ids, "seq1");
209     appendValue(ids, "seq2");
210     appendValue(ids, "seq3");
211     seqan::StringSet<seqan::Dna5String> seqs;
212     appendValue(seqs, "CGATCGATAAT");
213     appendValue(seqs, "CCTCTCTCTCCCT");
214     appendValue(seqs, "CCCCCCCC");
215 
216     writeRecords(seqIO, ids, seqs);
217 
218     close(seqIO);  // Make sure we can read this later.
219 
220     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fa");
221     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
222 }
223 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fastq_no_qual)224 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fastq_no_qual)
225 {
226     // Build path to file.
227     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
228     append(filePath, ".fq");
229     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fq");
230 
231     // Create SequenceStream object.
232     SeqFileIn seqIn(toCString(pathToExpected));
233     SeqFileOut seqOut(toCString(filePath));
234 
235     // Check that the file type and format are set correctly.
236     seqan::StringSet<seqan::CharString> ids;
237     seqan::StringSet<seqan::Dna5QString> seqs;
238 
239     readRecords(ids, seqs, seqIn);
240     writeRecords(seqOut, ids, seqs);
241 
242     close(seqIn);
243     close(seqOut);  // Make sure we can read this later.
244 
245     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
246 }
247 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fastq_with_qual)248 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_record_text_fastq_with_qual)
249 {
250     // Build path to file.
251     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
252     append(filePath, ".fq");
253 
254     // Create SequenceStream object.
255     SeqFileOut seqIO(toCString(filePath));
256 
257     // Check that the file type and format are set correctly.
258     seqan::StringSet<seqan::CharString> ids;
259     appendValue(ids, "seq1");
260     appendValue(ids, "seq2");
261     appendValue(ids, "seq3");
262     seqan::StringSet<seqan::Dna5String> seqs;
263     appendValue(seqs, "CGATCGATAAT");
264     appendValue(seqs, "CCTCTCTCTCCCT");
265     appendValue(seqs, "CCCCCCCC");
266     seqan::StringSet<seqan::CharString> quals;
267     appendValue(quals, "IIIIIIIIIII");
268     appendValue(quals, "IIIIIIIIIIIII");
269     appendValue(quals, "IIIIIIII");
270 
271     writeRecords(seqIO, ids, seqs, quals);
272 
273     close(seqIO);  // Make sure we can read this later.
274 
275     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fq");
276     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
277 }
278 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fastq_no_qual)279 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fastq_no_qual)
280 {
281     // Build path to file.
282     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
283     append(filePath, ".fq");
284 
285     // Create SequenceStream object.
286     SeqFileOut seqIO(toCString(filePath));
287 
288     // Check that the file type and format are set correctly.
289     seqan::StringSet<seqan::CharString> ids;
290     appendValue(ids, "seq1");
291     appendValue(ids, "seq2");
292     appendValue(ids, "seq3");
293     seqan::StringSet<seqan::Dna5QString> seqs;
294     appendValue(seqs, "CGATCGATAAT");
295     appendValue(seqs, "CCTCTCTCTCCCT");
296     appendValue(seqs, "CCCCCCCC");
297 
298     writeRecords(seqIO, ids, seqs);
299 
300     close(seqIO);  // Make sure we can read this later.
301 
302     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fq");
303     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
304 }
305 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fastq_with_qual)306 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_write_all_text_fastq_with_qual)
307 {
308     // Build path to file.
309     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
310     append(filePath, ".fq");
311 
312     // Create SequenceStream object.
313     SeqFileOut seqIO(toCString(filePath));
314 
315     // Check that the file type and format are set correctly.
316     seqan::StringSet<seqan::CharString> ids;
317     appendValue(ids, "seq1");
318     appendValue(ids, "seq2");
319     appendValue(ids, "seq3");
320     seqan::StringSet<seqan::Dna5String> seqs;
321     appendValue(seqs, "CGATCGATAAT");
322     appendValue(seqs, "CCTCTCTCTCCCT");
323     appendValue(seqs, "CCCCCCCC");
324     seqan::StringSet<seqan::CharString> quals;
325     appendValue(quals, "IIIIIIIIIII");
326     appendValue(quals, "IIIIIIIIIIIII");
327     appendValue(quals, "IIIIIIII");
328 
329     writeRecords(seqIO, ids, seqs, quals);
330 
331     close(seqIO);  // Make sure we can read this later.
332 
333     seqan::CharString pathToExpected = getAbsolutePath("/tests/seq_io/test_dna.fq");
334     SEQAN_ASSERT_MSG(seqan::_compareTextFilesAlt(toCString(pathToExpected), toCString(filePath)), "Output should match example.");
335 }
336 
337 // ---------------------------------------------------------------------------
338 // Test isOpen() functionality.
339 // ---------------------------------------------------------------------------
340 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_isOpen_fileIn)341 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_isOpen_fileIn)
342 {
343     // Build path to file.
344     seqan::CharString filePath = SEQAN_PATH_TO_ROOT();
345     append(filePath, "/tests/seq_io/test_dna.fa");
346 
347     // Create SequenceStream object.
348     SeqFileIn seqI;
349     SEQAN_ASSERT(!isOpen(seqI));
350 
351     // open file
352     open(seqI, toCString(filePath));
353     SEQAN_ASSERT(isOpen(seqI));
354 
355     // close file
356     close(seqI);
357     SEQAN_ASSERT(!isOpen(seqI));
358 }
359 
SEQAN_DEFINE_TEST(test_seq_io_sequence_file_isOpen_fileOut)360 SEQAN_DEFINE_TEST(test_seq_io_sequence_file_isOpen_fileOut)
361 {
362     // Build path to file.
363     seqan::CharString filePath = SEQAN_TEMP_FILENAME();
364     append(filePath, ".fa");
365 
366     // Create SequenceStream object.
367     SeqFileOut  seqO;
368     SEQAN_ASSERT(!isOpen(seqO));
369 
370     // open files
371     open(seqO, toCString(filePath));
372     SEQAN_ASSERT(isOpen(seqO));
373 
374     // close files
375     close(seqO);
376     SEQAN_ASSERT(!isOpen(seqO));
377 }
378 
379 #endif  // TESTS_SEQ_IO_TEST_EASY_SEQ_IO_H_
380