1 /*
2 * Copyright (C) 2011-2013 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "VcfFileTest.h"
19 #include "VcfFileReader.h"
20 #include "VcfFileWriter.h"
21 #include "VcfHeaderTest.h"
22 #include <assert.h>
23
24 const std::string HEADER_LINE_SUBSET1="#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002";
25 const std::string HEADER_LINE_SUBSET2="#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00002 NA00003";
26 const int NUM_SAMPLES_SUBSET1 = 2;
27 const int NUM_SAMPLES_SUBSET2 = 2;
28 const std::string HEADER_LINE_EXCLUDE_SUBSET1="#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00003";
29 const int NUM_SAMPLES_EXCLUDE2 = 2;
30 const std::string HEADER_LINE_EXCLUDE2="#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00003";
31
testVcfFile()32 void testVcfFile()
33 {
34 testVcfReadFile();
35 testVcfWriteFile();
36
37 testVcfReadSection();
38 testVcfReadSectionNoIndex();
39 testVcfReadSectionBadIndex();
40 }
41
42
testVcfReadFile()43 void testVcfReadFile()
44 {
45 // VcfFileHeader header;
46
47 // Test open for read via the constructor with return.
48 VcfFileReader reader;
49 VcfHeader header;
50 VcfRecord record;
51 // Try reading without opening.
52 bool caughtException = false;
53 try
54 {
55 assert(reader.readRecord(record) == false);
56 }
57 catch (std::exception& e)
58 {
59 caughtException = true;
60 }
61
62 assert(caughtException);
63
64
65 // Try opening a file that doesn't exist.
66 caughtException = false;
67 try
68 {
69 assert(reader.open("fileDoesNotExist.txt", header) == false);
70 }
71 catch (std::exception& e)
72 {
73 caughtException = true;
74 }
75 assert(caughtException);
76 // "testFiles/testVcf.vcf");
77 // assert(vcfInConstructorReadDefault.WriteHeader(header) == false);
78 // assert(vcfInConstructorReadDefault.ReadHeader(header) == true);
79
80 // // Test open for write via the constructor.
81 // VcfFile vcfInConstructorWrite("results/newWrite.vcf", VcfFile::WRITE,
82 // ErrorHandler::RETURN);
83 // assert(vcfInConstructorWrite.ReadHeader(header) == false);
84 // assert(vcfInConstructorWrite.WriteHeader(header) == true);
85
86 // // Test open for read via the constructor
87 // VcfFile vcfInConstructorRead("testFiles/testVcf.vcf", VcfFile::READ);
88 // bool caughtException = false;
89 // try
90 // {
91 // assert(vcfInConstructorRead.WriteHeader(header) == false);
92 // }
93 // catch (std::exception& e)
94 // {
95 // caughtException = true;
96 // }
97 // assert(caughtException);
98 // assert(vcfInConstructorRead.ReadHeader(header) == true);
99
100 // // Test open for write via child class.
101 // VcfFileWriter vcfWriteConstructor("results/newWrite1.vcf");
102 // caughtException = false;
103 // try
104 // {
105 // assert(vcfWriteConstructor.ReadHeader(header) == false);
106 // }
107 // catch (std::exception& e)
108 // {
109 // caughtException = true;
110 // }
111 // assert(caughtException);
112 // assert(vcfWriteConstructor.WriteHeader(header) == true);
113
114 // // Test open for read via child class.
115 // VcfFileReader vcfReadConstructor("testFiles/testVcf.vcf");
116 // caughtException = false;
117 // try
118 // {
119 // assert(vcfReadConstructor.WriteHeader(header) == false);
120 // }
121 // catch (std::exception& e)
122 // {
123 // caughtException = true;
124 // }
125 // assert(caughtException);
126 // assert(vcfReadConstructor.ReadHeader(header) == true);
127
128
129 ////////////////////////////////
130 // Test the subset logic.
131 VcfRecordGenotype* sampleInfo = NULL;
132
133 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
134 NULL, NULL, ";");
135
136 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
137 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
138 assert(header.getSampleName(2) == NULL);
139 assert(header.getSampleName(0) == SAMPLES[0]);
140 assert(header.getSampleName(1) == SAMPLES[1]);
141 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
142 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
143 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
144
145 assert(strcmp(reader.getFileName(), "testFiles/vcfFile.vcf") == 0);
146
147 // Read the records to make sure they were subset.
148 assert(reader.readRecord(record));
149 assert(record.getGT(0,0) == 0);
150 assert(record.getGT(1,1) == 0);
151 assert(record.getGT(1,0) == 1);
152 assert(record.getGT(0,1) == 0);
153 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
154 assert(record.getGT(2,0) == VcfGenotypeSample::INVALID_GT);
155 assert(strcmp(record.getAlleles(0), "G") == 0);
156 assert(strcmp(record.getAlleles(1), "A") == 0);
157 assert(record.getIntAllele(0) == 3);
158 assert(record.getIntAllele(1) == 1);
159 assert(record.getNumAlts() == 1);
160 try
161 {
162 caughtException = false;
163 assert(record.getIntAllele(2) == 0);
164 }
165 catch (std::exception& e)
166 {
167 caughtException = true;
168 }
169 try
170 {
171 caughtException = false;
172 assert(record.getAlleles(2) == NULL);
173 }
174 catch (std::exception& e)
175 {
176 caughtException = true;
177 }
178 assert(caughtException);
179 assert(record.allPhased() == true);
180 assert(record.allUnphased() == false);
181 assert(record.hasAllGenotypeAlleles() == true);
182 sampleInfo = &(record.getGenotypeInfo());
183 assert(sampleInfo->getNumSamples() == 2);
184 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
185 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
186 assert(sampleInfo->getString("GT", 2) == NULL);
187 assert(sampleInfo->isPhased(0) == true);
188 assert(sampleInfo->isPhased(1) == true);
189 assert(sampleInfo->isPhased(2) == false);
190 assert(sampleInfo->isUnphased(0) == false);
191 assert(sampleInfo->isUnphased(1) == false);
192 assert(sampleInfo->isUnphased(2) == false);
193 assert(record.passedAllFilters() == true);
194 assert(record.getNumAlts() == 1);
195
196 assert(reader.readRecord(record));
197 assert(record.getGT(0,0) == 0);
198 assert(record.getGT(1,1) == 1);
199 assert(record.getGT(1,0) == 0);
200 assert(record.getGT(0,1) == 0);
201 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
202 assert(record.getGT(2,0) == VcfGenotypeSample::INVALID_GT);
203 assert(record.getNumAlts() == 1);
204 assert(strcmp(record.getAlleles(0), "T") == 0);
205 assert(strcmp(record.getAlleles(1), "A") == 0);
206 try
207 {
208 caughtException = false;
209 assert(record.getAlleles(2) == NULL);
210 }
211 catch (std::exception& e)
212 {
213 caughtException = true;
214 }
215 assert(caughtException);
216 assert(record.allPhased() == false);
217 assert(record.allUnphased() == false);
218 assert(record.hasAllGenotypeAlleles() == true);
219 sampleInfo = &(record.getGenotypeInfo());
220 assert(sampleInfo->getNumSamples() == 2);
221 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
222 assert(*(sampleInfo->getString("GT", 1)) == "0|1");
223 assert(sampleInfo->getString("GT", 2) == NULL);
224 assert(sampleInfo->isPhased(0) == false);
225 assert(sampleInfo->isPhased(1) == true);
226 assert(sampleInfo->isPhased(2) == false);
227 assert(sampleInfo->isUnphased(0) == true);
228 assert(sampleInfo->isUnphased(1) == false);
229 assert(sampleInfo->isUnphased(2) == false);
230 assert(record.passedAllFilters() == false);
231 assert(record.getNumAlts() == 1);
232
233 assert(reader.readRecord(record));
234 assert(record.getGT(0,0) == 1);
235 assert(record.getGT(1,1) == 1);
236 assert(record.getGT(1,0) == 2);
237 assert(record.getGT(0,1) == 2);
238 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
239 assert(record.getGT(2,0) == VcfGenotypeSample::INVALID_GT);
240 assert(strcmp(record.getAlleles(0), "A") == 0);
241 assert(strcmp(record.getAlleles(1), "G") == 0);
242 assert(strcmp(record.getAlleles(2), "T") == 0);
243 assert(record.getIntAllele(2) == 4);
244 assert(record.getNumAlts() == 2);
245 try
246 {
247 caughtException = false;
248 assert(record.getAlleles(3) == NULL);
249 }
250 catch (std::exception& e)
251 {
252 caughtException = true;
253 }
254 assert(caughtException);
255 assert(record.allPhased() == true);
256 assert(record.allUnphased() == false);
257 assert(record.hasAllGenotypeAlleles() == true);
258 sampleInfo = &(record.getGenotypeInfo());
259 assert(sampleInfo->getNumSamples() == 2);
260 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
261 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
262 assert(sampleInfo->getString("GT", 2) == NULL);
263 assert(sampleInfo->isPhased(0) == true);
264 assert(sampleInfo->isPhased(1) == true);
265 assert(sampleInfo->isPhased(2) == false);
266 assert(sampleInfo->isUnphased(0) == false);
267 assert(sampleInfo->isUnphased(1) == false);
268 assert(sampleInfo->isUnphased(2) == false);
269 assert(record.passedAllFilters() == true);
270 assert(record.getNumAlts() == 2);
271
272 assert(reader.readRecord(record));
273 assert(strcmp(record.getAlleles(0), "T") == 0);
274 try
275 {
276 caughtException = false;
277 assert(record.getAlleles(1) == NULL);
278 }
279 catch (std::exception& e)
280 {
281 caughtException = true;
282 }
283 assert(caughtException);
284 assert(record.getNumAlts() == 0);
285 assert(record.allPhased() == true);
286 assert(record.allUnphased() == false);
287 assert(record.hasAllGenotypeAlleles() == true);
288 sampleInfo = &(record.getGenotypeInfo());
289 assert(sampleInfo->getNumSamples() == 2);
290 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
291 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
292 assert(sampleInfo->getString("GT", 2) == NULL);
293 assert(sampleInfo->isPhased(0) == true);
294 assert(sampleInfo->isPhased(1) == true);
295 assert(sampleInfo->isPhased(2) == false);
296 assert(sampleInfo->isUnphased(0) == false);
297 assert(sampleInfo->isUnphased(1) == false);
298 assert(sampleInfo->isUnphased(2) == false);
299 assert(record.passedAllFilters() == true);
300 assert(record.getNumAlts() == 0);
301
302 assert(reader.readRecord(record));
303 assert(strcmp(record.getAlleles(0), "GTC") == 0);
304 assert(record.getIntAllele(0) == 3);
305 assert(strcmp(record.getAlleles(1), "G") == 0);
306 assert(record.getIntAllele(1) == 3);
307 assert(record.getNumAlts() == 2);
308 assert(strcmp(record.getAlleles(2), "GTCT") == 0);
309 assert(record.getIntAllele(2) == 3);
310 try
311 {
312 caughtException = false;
313 assert(record.getAlleles(3) == NULL);
314 }
315 catch (std::exception& e)
316 {
317 caughtException = true;
318 }
319 assert(caughtException);
320 assert(record.allPhased() == false);
321 assert(record.allUnphased() == true);
322 assert(record.hasAllGenotypeAlleles() == true);
323 sampleInfo = &(record.getGenotypeInfo());
324 assert(sampleInfo->getNumSamples() == 2);
325 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
326 assert(*(sampleInfo->getString("GT", 1)) == "0/2");
327 assert(sampleInfo->getString("GT", 2) == NULL);
328 assert(sampleInfo->isPhased(0) == false);
329 assert(sampleInfo->isPhased(1) == false);
330 assert(sampleInfo->isPhased(2) == false);
331 assert(sampleInfo->isUnphased(0) == true);
332 assert(sampleInfo->isUnphased(1) == true);
333 assert(sampleInfo->isUnphased(2) == false);
334 assert(record.passedAllFilters() == true);
335 assert(record.getNumAlts() == 2);
336
337 assert(reader.readRecord(record));
338 assert(strcmp(record.getAlleles(0), "GTC") == 0);
339 assert(record.getNumAlts() == 2);
340 assert(strcmp(record.getAlleles(1), "G") == 0);
341 assert(strcmp(record.getAlleles(2), "GTCT") == 0);
342 try
343 {
344 caughtException = false;
345 assert(record.getAlleles(3) == NULL);
346 }
347 catch (std::exception& e)
348 {
349 caughtException = true;
350 }
351 assert(caughtException);
352 assert(record.allPhased() == false);
353 assert(record.allUnphased() == false);
354 assert(record.hasAllGenotypeAlleles() == false);
355 sampleInfo = &(record.getGenotypeInfo());
356 assert(sampleInfo->getNumSamples() == 2);
357 assert(sampleInfo->getString("GT", 0) == NULL);
358 assert(sampleInfo->getString("GT", 1) == NULL);
359 assert(sampleInfo->getString("GT", 2) == NULL);
360 assert(sampleInfo->isPhased(0) == false);
361 assert(sampleInfo->isPhased(1) == false);
362 assert(sampleInfo->isPhased(2) == false);
363 assert(sampleInfo->isUnphased(0) == false);
364 assert(sampleInfo->isUnphased(1) == false);
365 assert(sampleInfo->isUnphased(2) == false);
366 assert(record.passedAllFilters() == true);
367 assert(record.getNumAlts() == 2);
368
369 assert(reader.readRecord(record));
370 assert(record.getGT(0,0) == 0);
371 assert(record.getGT(1,1) == VcfGenotypeSample::MISSING_GT);
372 assert(record.getGT(1,0) == 0);
373 assert(record.getGT(0,1) == 1);
374 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
375 assert(record.getGT(2,0) == VcfGenotypeSample::INVALID_GT);
376 assert(record.getNumAlts() == 1);
377 assert(strcmp(record.getAlleles(0), "GTC") == 0);
378 assert(strcmp(record.getAlleles(1), "G") == 0);
379 try
380 {
381 caughtException = false;
382 assert(record.getAlleles(2) == NULL);
383 }
384 catch (std::exception& e)
385 {
386 caughtException = true;
387 }
388 assert(caughtException);
389 assert(record.allPhased() == true);
390 assert(record.allUnphased() == false);
391 assert(record.hasAllGenotypeAlleles() == false);
392 sampleInfo = &(record.getGenotypeInfo());
393 assert(sampleInfo->getNumSamples() == 2);
394 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
395 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
396 assert(sampleInfo->getString("GT", 2) == NULL);
397 assert(sampleInfo->isPhased(0) == true);
398 assert(sampleInfo->isPhased(1) == true);
399 assert(sampleInfo->isPhased(2) == false);
400 assert(sampleInfo->isUnphased(0) == false);
401 assert(sampleInfo->isUnphased(1) == false);
402 assert(sampleInfo->isUnphased(2) == false);
403 assert(record.passedAllFilters() == true);
404 assert(record.getNumAlts() == 1);
405
406 assert(reader.readRecord(record) == false);
407
408 assert(reader.getNumKeptRecords() == 7);
409 assert(reader.getNumRecords() == 7);
410
411 reader.close();
412
413 //////////////////////////
414 // Subset with a different file.
415 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset2.txt",
416 NULL, NULL);
417
418 assert(header.getHeaderLine() == HEADER_LINE_SUBSET2);
419 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET2);
420 assert(header.getSampleName(2) == NULL);
421 assert(header.getSampleName(0) == SAMPLES[1]);
422 assert(header.getSampleName(1) == SAMPLES[2]);
423 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 0);
424 assert(header.getSampleIndex(SAMPLES[0].c_str()) == -1);
425 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 1);
426
427 // Read the records to make sure they were subset.
428 assert(reader.readRecord(record));
429 assert(record.allPhased() == false);
430 assert(record.allUnphased() == false);
431 assert(record.hasAllGenotypeAlleles() == true);
432 sampleInfo = &(record.getGenotypeInfo());
433 assert(sampleInfo->getNumSamples() == 2);
434 assert(*(sampleInfo->getString("GT", 0)) == "1|0");
435 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
436 assert(sampleInfo->getString("GT", 2) == NULL);
437 assert(sampleInfo->allPhased() == false);
438 assert(sampleInfo->allUnphased() == false);
439 assert(sampleInfo->hasAllGenotypeAlleles() == true);
440 assert(sampleInfo->isPhased(0) == true);
441 assert(sampleInfo->isPhased(1) == false);
442 assert(sampleInfo->isPhased(2) == false);
443 assert(sampleInfo->isUnphased(0) == false);
444 assert(sampleInfo->isUnphased(1) == true);
445 assert(sampleInfo->isUnphased(2) == false);
446
447 assert(reader.readRecord(record));
448 assert(record.allPhased() == false);
449 assert(record.allUnphased() == false);
450 assert(record.hasAllGenotypeAlleles() == true);
451 sampleInfo = &(record.getGenotypeInfo());
452 assert(sampleInfo->getNumSamples() == 2);
453 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
454 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
455 assert(sampleInfo->getString("GT", 2) == NULL);
456 assert(sampleInfo->allPhased() == false);
457 assert(sampleInfo->allUnphased() == false);
458 assert(sampleInfo->hasAllGenotypeAlleles() == true);
459 assert(sampleInfo->isPhased(0) == true);
460 assert(sampleInfo->isPhased(1) == false);
461 assert(sampleInfo->isPhased(2) == false);
462 assert(sampleInfo->isUnphased(0) == false);
463 assert(sampleInfo->isUnphased(1) == true);
464 assert(sampleInfo->isUnphased(2) == false);
465
466 assert(reader.readRecord(record));
467 assert(record.allPhased() == false);
468 assert(record.allUnphased() == false);
469 assert(record.hasAllGenotypeAlleles() == true);
470 sampleInfo = &(record.getGenotypeInfo());
471 assert(sampleInfo->getNumSamples() == 2);
472 assert(*(sampleInfo->getString("GT", 0)) == "2|1");
473 assert(*(sampleInfo->getString("GT", 1)) == "2/2");
474 assert(sampleInfo->getString("GT", 2) == NULL);
475 assert(sampleInfo->allPhased() == false);
476 assert(sampleInfo->allUnphased() == false);
477 assert(sampleInfo->hasAllGenotypeAlleles() == true);
478 assert(sampleInfo->isPhased(0) == true);
479 assert(sampleInfo->isPhased(1) == false);
480 assert(sampleInfo->isPhased(2) == false);
481 assert(sampleInfo->isUnphased(0) == false);
482 assert(sampleInfo->isUnphased(1) == true);
483 assert(sampleInfo->isUnphased(2) == false);
484
485 assert(reader.readRecord(record));
486 assert(record.allPhased() == false);
487 assert(record.allUnphased() == false);
488 assert(record.hasAllGenotypeAlleles() == true);
489 sampleInfo = &(record.getGenotypeInfo());
490 assert(sampleInfo->getNumSamples() == 2);
491 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
492 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
493 assert(sampleInfo->getString("GT", 2) == NULL);
494 assert(sampleInfo->allPhased() == false);
495 assert(sampleInfo->allUnphased() == false);
496 assert(sampleInfo->hasAllGenotypeAlleles() == true);
497 assert(sampleInfo->isPhased(0) == true);
498 assert(sampleInfo->isPhased(1) == false);
499 assert(sampleInfo->isPhased(2) == false);
500 assert(sampleInfo->isUnphased(0) == false);
501 assert(sampleInfo->isUnphased(1) == true);
502 assert(sampleInfo->isUnphased(2) == false);
503
504 assert(reader.readRecord(record));
505 assert(record.allPhased() == false);
506 assert(record.allUnphased() == true);
507 assert(record.hasAllGenotypeAlleles() == true);
508 sampleInfo = &(record.getGenotypeInfo());
509 assert(sampleInfo->getNumSamples() == 2);
510 assert(*(sampleInfo->getString("GT", 0)) == "0/2");
511 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
512 assert(sampleInfo->getString("GT", 2) == NULL);
513 assert(sampleInfo->allPhased() == false);
514 assert(sampleInfo->allUnphased() == true);
515 assert(sampleInfo->hasAllGenotypeAlleles() == true);
516 assert(sampleInfo->isPhased(0) == false);
517 assert(sampleInfo->isPhased(1) == false);
518 assert(sampleInfo->isPhased(2) == false);
519 assert(sampleInfo->isUnphased(0) == true);
520 assert(sampleInfo->isUnphased(1) == true);
521 assert(sampleInfo->isUnphased(2) == false);
522
523 assert(reader.readRecord(record));
524 assert(record.allPhased() == false);
525 assert(record.allUnphased() == false);
526 assert(record.hasAllGenotypeAlleles() == false);
527 sampleInfo = &(record.getGenotypeInfo());
528 assert(sampleInfo->getNumSamples() == 2);
529 assert(sampleInfo->getString("GT", 0) == NULL);
530 assert(sampleInfo->getString("GT", 1) == NULL);
531 assert(sampleInfo->getString("GT", 2) == NULL);
532 assert(sampleInfo->allPhased() == false);
533 assert(sampleInfo->allUnphased() == false);
534 assert(sampleInfo->hasAllGenotypeAlleles() == false);
535 assert(sampleInfo->isPhased(0) == false);
536 assert(sampleInfo->isPhased(1) == false);
537 assert(sampleInfo->isPhased(2) == false);
538 assert(sampleInfo->isUnphased(0) == false);
539 assert(sampleInfo->isUnphased(1) == false);
540 assert(sampleInfo->isUnphased(2) == false);
541
542 assert(reader.readRecord(record));
543 assert(record.allPhased() == true);
544 assert(record.allUnphased() == false);
545 assert(record.hasAllGenotypeAlleles() == false);
546 sampleInfo = &(record.getGenotypeInfo());
547 assert(sampleInfo->getNumSamples() == 2);
548 assert(*(sampleInfo->getString("GT", 0)) == "0|.");
549 assert(*(sampleInfo->getString("GT", 1)) == "1|1");
550 assert(sampleInfo->getString("GT", 2) == NULL);
551 assert(sampleInfo->allPhased() == true);
552 assert(sampleInfo->allUnphased() == false);
553 assert(sampleInfo->hasAllGenotypeAlleles() == false);
554 assert(sampleInfo->isPhased(0) == true);
555 assert(sampleInfo->isPhased(1) == true);
556 assert(sampleInfo->isPhased(2) == false);
557 assert(sampleInfo->isUnphased(0) == false);
558 assert(sampleInfo->isUnphased(1) == false);
559 assert(sampleInfo->isUnphased(2) == false);
560
561 assert(reader.readRecord(record) == false);
562
563 assert(reader.getNumKeptRecords() == 7);
564 assert(reader.getNumRecords() == 7);
565
566
567 //////////////////////////
568 // Subset using an exclude file
569 reader.open("testFiles/vcfFile.vcf", header, NULL, NULL,
570 "testFiles/subset1.txt", ";");
571
572 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE_SUBSET1);
573 assert(header.getNumSamples() == NUM_SAMPLES - NUM_SAMPLES_SUBSET1);
574 assert(header.getSampleName(2) == NULL);
575 assert(header.getSampleName(0) == SAMPLES[2]);
576 assert(header.getSampleName(1) == NULL);
577 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
578 assert(header.getSampleIndex(SAMPLES[0].c_str()) == -1);
579 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 0);
580
581 // Read the records to make sure they were subset.
582 assert(reader.readRecord(record));
583 assert(record.allPhased() == false);
584 assert(record.allUnphased() == true);
585 assert(record.hasAllGenotypeAlleles() == true);
586 sampleInfo = &(record.getGenotypeInfo());
587 assert(sampleInfo->getNumSamples() == 1);
588 assert(*(sampleInfo->getString("GT", 0)) == "1/1");
589 assert(sampleInfo->getString("GT", 1) == NULL);
590 assert(sampleInfo->getString("GT", 2) == NULL);
591 assert(sampleInfo->allPhased() == false);
592 assert(sampleInfo->allUnphased() == true);
593 assert(sampleInfo->hasAllGenotypeAlleles() == true);
594 assert(sampleInfo->isPhased(0) == false);
595 assert(sampleInfo->isPhased(1) == false);
596 assert(sampleInfo->isPhased(2) == false);
597 assert(sampleInfo->isUnphased(0) == true);
598 assert(sampleInfo->isUnphased(1) == false);
599 assert(sampleInfo->isUnphased(2) == false);
600
601 assert(reader.readRecord(record));
602 assert(record.allPhased() == false);
603 assert(record.allUnphased() == true);
604 assert(record.hasAllGenotypeAlleles() == true);
605 sampleInfo = &(record.getGenotypeInfo());
606 assert(sampleInfo->getNumSamples() == 1);
607 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
608 assert(sampleInfo->getString("GT", 1) == NULL);
609 assert(sampleInfo->getString("GT", 2) == NULL);
610 assert(sampleInfo->allPhased() == false);
611 assert(sampleInfo->allUnphased() == true);
612 assert(sampleInfo->hasAllGenotypeAlleles() == true);
613 assert(sampleInfo->isPhased(0) == false);
614 assert(sampleInfo->isPhased(1) == false);
615 assert(sampleInfo->isPhased(2) == false);
616 assert(sampleInfo->isUnphased(0) == true);
617 assert(sampleInfo->isUnphased(1) == false);
618 assert(sampleInfo->isUnphased(2) == false);
619
620 assert(reader.readRecord(record));
621 assert(record.allPhased() == false);
622 assert(record.allUnphased() == true);
623 assert(record.hasAllGenotypeAlleles() == true);
624 sampleInfo = &(record.getGenotypeInfo());
625 assert(sampleInfo->getNumSamples() == 1);
626 assert(*(sampleInfo->getString("GT", 0)) == "2/2");
627 assert(sampleInfo->getString("GT", 1) == NULL);
628 assert(sampleInfo->getString("GT", 2) == NULL);
629 assert(sampleInfo->allPhased() == false);
630 assert(sampleInfo->allUnphased() == true);
631 assert(sampleInfo->hasAllGenotypeAlleles() == true);
632 assert(sampleInfo->isPhased(0) == false);
633 assert(sampleInfo->isPhased(1) == false);
634 assert(sampleInfo->isPhased(2) == false);
635 assert(sampleInfo->isUnphased(0) == true);
636 assert(sampleInfo->isUnphased(1) == false);
637 assert(sampleInfo->isUnphased(2) == false);
638
639 assert(reader.readRecord(record));
640 assert(record.allPhased() == false);
641 assert(record.allUnphased() == true);
642 assert(record.hasAllGenotypeAlleles() == true);
643 sampleInfo = &(record.getGenotypeInfo());
644 assert(sampleInfo->getNumSamples() == 1);
645 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
646 assert(sampleInfo->getString("GT", 1) == NULL);
647 assert(sampleInfo->getString("GT", 2) == NULL);
648 assert(sampleInfo->allPhased() == false);
649 assert(sampleInfo->allUnphased() == true);
650 assert(sampleInfo->hasAllGenotypeAlleles() == true);
651 assert(sampleInfo->isPhased(0) == false);
652 assert(sampleInfo->isPhased(1) == false);
653 assert(sampleInfo->isPhased(2) == false);
654 assert(sampleInfo->isUnphased(0) == true);
655 assert(sampleInfo->isUnphased(1) == false);
656 assert(sampleInfo->isUnphased(2) == false);
657
658 assert(reader.readRecord(record));
659 assert(record.allPhased() == false);
660 assert(record.allUnphased() == true);
661 assert(record.hasAllGenotypeAlleles() == true);
662 sampleInfo = &(record.getGenotypeInfo());
663 assert(sampleInfo->getNumSamples() == 1);
664 assert(*(sampleInfo->getString("GT", 0)) == "1/1");
665 assert(sampleInfo->getString("GT", 1) == NULL);
666 assert(sampleInfo->getString("GT", 2) == NULL);
667 assert(sampleInfo->allPhased() == false);
668 assert(sampleInfo->allUnphased() == true);
669 assert(sampleInfo->hasAllGenotypeAlleles() == true);
670 assert(sampleInfo->isPhased(0) == false);
671 assert(sampleInfo->isPhased(1) == false);
672 assert(sampleInfo->isPhased(2) == false);
673 assert(sampleInfo->isUnphased(0) == true);
674 assert(sampleInfo->isUnphased(1) == false);
675 assert(sampleInfo->isUnphased(2) == false);
676
677 assert(reader.readRecord(record));
678 assert(record.allPhased() == false);
679 assert(record.allUnphased() == false);
680 assert(record.hasAllGenotypeAlleles() == false);
681 sampleInfo = &(record.getGenotypeInfo());
682 assert(sampleInfo->getNumSamples() == 1);
683 assert(sampleInfo->getString("GT", 0) == NULL);
684 assert(sampleInfo->getString("GT", 1) == NULL);
685 assert(sampleInfo->getString("GT", 2) == NULL);
686 assert(sampleInfo->allPhased() == false);
687 assert(sampleInfo->allUnphased() == false);
688 assert(sampleInfo->hasAllGenotypeAlleles() == false);
689 assert(sampleInfo->isPhased(0) == false);
690 assert(sampleInfo->isPhased(1) == false);
691 assert(sampleInfo->isPhased(2) == false);
692 assert(sampleInfo->isUnphased(0) == false);
693 assert(sampleInfo->isUnphased(1) == false);
694 assert(sampleInfo->isUnphased(2) == false);
695
696 assert(reader.readRecord(record));
697 assert(record.allPhased() == true);
698 assert(record.allUnphased() == false);
699 assert(record.hasAllGenotypeAlleles() == true);
700 sampleInfo = &(record.getGenotypeInfo());
701 assert(sampleInfo->getNumSamples() == 1);
702 assert(*(sampleInfo->getString("GT", 0)) == "1|1");
703 assert(sampleInfo->getString("GT", 1) == NULL);
704 assert(sampleInfo->getString("GT", 2) == NULL);
705 assert(sampleInfo->allPhased() == true);
706 assert(sampleInfo->allUnphased() == false);
707 assert(sampleInfo->hasAllGenotypeAlleles() == true);
708 assert(sampleInfo->isPhased(0) == true);
709 assert(sampleInfo->isPhased(1) == false);
710 assert(sampleInfo->isPhased(2) == false);
711 assert(sampleInfo->isUnphased(0) == false);
712 assert(sampleInfo->isUnphased(1) == false);
713 assert(sampleInfo->isUnphased(2) == false);
714
715 assert(reader.readRecord(record) == false);
716
717 assert(reader.getNumKeptRecords() == 7);
718 assert(reader.getNumRecords() == 7);
719
720
721 //////////////////////////
722 // Subset with a different exclude.
723 reader.open("testFiles/vcfFile.vcf", header, NULL, NULL,
724 "testFiles/exclude2.txt");
725
726 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE2);
727 assert(header.getNumSamples() == NUM_SAMPLES_EXCLUDE2);
728 assert(header.getSampleName(2) == NULL);
729 assert(header.getSampleName(0) == SAMPLES[0]);
730 assert(header.getSampleName(1) == SAMPLES[2]);
731 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
732 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
733 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 1);
734
735 // Read the records to make sure they were subset.
736 assert(reader.readRecord(record));
737 assert(record.allPhased() == false);
738 assert(record.allUnphased() == false);
739 assert(record.hasAllGenotypeAlleles() == true);
740 sampleInfo = &(record.getGenotypeInfo());
741 assert(sampleInfo->getNumSamples() == 2);
742 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
743 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
744 assert(sampleInfo->getString("GT", 2) == NULL);
745 assert(sampleInfo->allPhased() == false);
746 assert(sampleInfo->allUnphased() == false);
747 assert(sampleInfo->hasAllGenotypeAlleles() == true);
748 assert(sampleInfo->isPhased(0) == true);
749 assert(sampleInfo->isPhased(1) == false);
750 assert(sampleInfo->isPhased(2) == false);
751 assert(sampleInfo->isUnphased(0) == false);
752 assert(sampleInfo->isUnphased(1) == true);
753 assert(sampleInfo->isUnphased(2) == false);
754
755 assert(reader.readRecord(record));
756 assert(record.allPhased() == false);
757 assert(record.allUnphased() == true);
758 assert(record.hasAllGenotypeAlleles() == true);
759 sampleInfo = &(record.getGenotypeInfo());
760 assert(sampleInfo->getNumSamples() == 2);
761 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
762 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
763 assert(sampleInfo->getString("GT", 2) == NULL);
764 assert(sampleInfo->allPhased() == false);
765 assert(sampleInfo->allUnphased() == true);
766 assert(sampleInfo->hasAllGenotypeAlleles() == true);
767 assert(sampleInfo->isPhased(0) == false);
768 assert(sampleInfo->isPhased(1) == false);
769 assert(sampleInfo->isPhased(2) == false);
770 assert(sampleInfo->isUnphased(0) == true);
771 assert(sampleInfo->isUnphased(1) == true);
772 assert(sampleInfo->isUnphased(2) == false);
773
774 assert(reader.readRecord(record));
775 assert(record.allPhased() == false);
776 assert(record.allUnphased() == false);
777 assert(record.hasAllGenotypeAlleles() == true);
778 sampleInfo = &(record.getGenotypeInfo());
779 assert(sampleInfo->getNumSamples() == 2);
780 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
781 assert(*(sampleInfo->getString("GT", 1)) == "2/2");
782 assert(sampleInfo->getString("GT", 2) == NULL);
783 assert(sampleInfo->allPhased() == false);
784 assert(sampleInfo->allUnphased() == false);
785 assert(sampleInfo->hasAllGenotypeAlleles() == true);
786 assert(sampleInfo->isPhased(0) == true);
787 assert(sampleInfo->isPhased(1) == false);
788 assert(sampleInfo->isPhased(2) == false);
789 assert(sampleInfo->isUnphased(0) == false);
790 assert(sampleInfo->isUnphased(1) == true);
791 assert(sampleInfo->isUnphased(2) == false);
792
793 assert(reader.readRecord(record));
794 assert(record.allPhased() == false);
795 assert(record.allUnphased() == false);
796 assert(record.hasAllGenotypeAlleles() == true);
797 sampleInfo = &(record.getGenotypeInfo());
798 assert(sampleInfo->getNumSamples() == 2);
799 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
800 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
801 assert(sampleInfo->getString("GT", 2) == NULL);
802 assert(sampleInfo->allPhased() == false);
803 assert(sampleInfo->allUnphased() == false);
804 assert(sampleInfo->hasAllGenotypeAlleles() == true);
805 assert(sampleInfo->isPhased(0) == true);
806 assert(sampleInfo->isPhased(1) == false);
807 assert(sampleInfo->isPhased(2) == false);
808 assert(sampleInfo->isUnphased(0) == false);
809 assert(sampleInfo->isUnphased(1) == true);
810 assert(sampleInfo->isUnphased(2) == false);
811
812 assert(reader.readRecord(record));
813 assert(record.allPhased() == false);
814 assert(record.allUnphased() == true);
815 assert(record.hasAllGenotypeAlleles() == true);
816 sampleInfo = &(record.getGenotypeInfo());
817 assert(sampleInfo->getNumSamples() == 2);
818 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
819 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
820 assert(sampleInfo->getString("GT", 2) == NULL);
821 assert(sampleInfo->allPhased() == false);
822 assert(sampleInfo->allUnphased() == true);
823 assert(sampleInfo->hasAllGenotypeAlleles() == true);
824 assert(sampleInfo->isPhased(0) == false);
825 assert(sampleInfo->isPhased(1) == false);
826 assert(sampleInfo->isPhased(2) == false);
827 assert(sampleInfo->isUnphased(0) == true);
828 assert(sampleInfo->isUnphased(1) == true);
829 assert(sampleInfo->isUnphased(2) == false);
830
831 assert(reader.readRecord(record));
832 assert(record.allPhased() == false);
833 assert(record.allUnphased() == false);
834 assert(record.hasAllGenotypeAlleles() == false);
835 sampleInfo = &(record.getGenotypeInfo());
836 assert(sampleInfo->getNumSamples() == 2);
837 assert(sampleInfo->getString("GT", 0) == NULL);
838 assert(sampleInfo->getString("GT", 1) == NULL);
839 assert(sampleInfo->getString("GT", 2) == NULL);
840 assert(sampleInfo->allPhased() == false);
841 assert(sampleInfo->allUnphased() == false);
842 assert(sampleInfo->hasAllGenotypeAlleles() == false);
843 assert(sampleInfo->isPhased(0) == false);
844 assert(sampleInfo->isPhased(1) == false);
845 assert(sampleInfo->isPhased(2) == false);
846 assert(sampleInfo->isUnphased(0) == false);
847 assert(sampleInfo->isUnphased(1) == false);
848 assert(sampleInfo->isUnphased(2) == false);
849
850 assert(reader.readRecord(record));
851 assert(record.allPhased() == true);
852 assert(record.allUnphased() == false);
853 assert(record.hasAllGenotypeAlleles() == true);
854 sampleInfo = &(record.getGenotypeInfo());
855 assert(sampleInfo->getNumSamples() == 2);
856 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
857 assert(*(sampleInfo->getString("GT", 1)) == "1|1");
858 assert(sampleInfo->getString("GT", 2) == NULL);
859 assert(sampleInfo->allPhased() == true);
860 assert(sampleInfo->allUnphased() == false);
861 assert(sampleInfo->hasAllGenotypeAlleles() == true);
862 assert(sampleInfo->isPhased(0) == true);
863 assert(sampleInfo->isPhased(1) == true);
864 assert(sampleInfo->isPhased(2) == false);
865 assert(sampleInfo->isUnphased(0) == false);
866 assert(sampleInfo->isUnphased(1) == false);
867 assert(sampleInfo->isUnphased(2) == false);
868
869 assert(reader.readRecord(record) == false);
870
871 assert(reader.getNumKeptRecords() == 7);
872 assert(reader.getNumRecords() == 7);
873
874
875 //////////////////////////
876 // Subset with an exclude sample.
877 reader.open("testFiles/vcfFile.vcf", header, NULL, "NA00002",
878 NULL);
879
880 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE2);
881 assert(header.getNumSamples() == NUM_SAMPLES_EXCLUDE2);
882 assert(header.getSampleName(2) == NULL);
883 assert(header.getSampleName(0) == SAMPLES[0]);
884 assert(header.getSampleName(1) == SAMPLES[2]);
885 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
886 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
887 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 1);
888
889 // Read the records to make sure they were subset.
890 assert(reader.readRecord(record));
891 assert(record.allPhased() == false);
892 assert(record.allUnphased() == false);
893 assert(record.hasAllGenotypeAlleles() == true);
894 sampleInfo = &(record.getGenotypeInfo());
895 assert(sampleInfo->getNumSamples() == 2);
896 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
897 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
898 assert(sampleInfo->getString("GT", 2) == NULL);
899 assert(sampleInfo->allPhased() == false);
900 assert(sampleInfo->allUnphased() == false);
901 assert(sampleInfo->hasAllGenotypeAlleles() == true);
902 assert(sampleInfo->isPhased(0) == true);
903 assert(sampleInfo->isPhased(1) == false);
904 assert(sampleInfo->isPhased(2) == false);
905 assert(sampleInfo->isUnphased(0) == false);
906 assert(sampleInfo->isUnphased(1) == true);
907 assert(sampleInfo->isUnphased(2) == false);
908
909 assert(reader.readRecord(record));
910 assert(record.allPhased() == false);
911 assert(record.allUnphased() == true);
912 assert(record.hasAllGenotypeAlleles() == true);
913 sampleInfo = &(record.getGenotypeInfo());
914 assert(sampleInfo->getNumSamples() == 2);
915 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
916 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
917 assert(sampleInfo->getString("GT", 2) == NULL);
918 assert(sampleInfo->allPhased() == false);
919 assert(sampleInfo->allUnphased() == true);
920 assert(sampleInfo->hasAllGenotypeAlleles() == true);
921 assert(sampleInfo->isPhased(0) == false);
922 assert(sampleInfo->isPhased(1) == false);
923 assert(sampleInfo->isPhased(2) == false);
924 assert(sampleInfo->isUnphased(0) == true);
925 assert(sampleInfo->isUnphased(1) == true);
926 assert(sampleInfo->isUnphased(2) == false);
927
928 assert(reader.readRecord(record));
929 assert(record.allPhased() == false);
930 assert(record.allUnphased() == false);
931 assert(record.hasAllGenotypeAlleles() == true);
932 sampleInfo = &(record.getGenotypeInfo());
933 assert(sampleInfo->getNumSamples() == 2);
934 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
935 assert(*(sampleInfo->getString("GT", 1)) == "2/2");
936 assert(sampleInfo->getString("GT", 2) == NULL);
937 assert(sampleInfo->allPhased() == false);
938 assert(sampleInfo->allUnphased() == false);
939 assert(sampleInfo->hasAllGenotypeAlleles() == true);
940 assert(sampleInfo->isPhased(0) == true);
941 assert(sampleInfo->isPhased(1) == false);
942 assert(sampleInfo->isPhased(2) == false);
943 assert(sampleInfo->isUnphased(0) == false);
944 assert(sampleInfo->isUnphased(1) == true);
945 assert(sampleInfo->isUnphased(2) == false);
946
947 assert(reader.readRecord(record));
948 assert(record.allPhased() == false);
949 assert(record.allUnphased() == false);
950 assert(record.hasAllGenotypeAlleles() == true);
951 sampleInfo = &(record.getGenotypeInfo());
952 assert(sampleInfo->getNumSamples() == 2);
953 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
954 assert(*(sampleInfo->getString("GT", 1)) == "0/0");
955 assert(sampleInfo->getString("GT", 2) == NULL);
956 assert(sampleInfo->allPhased() == false);
957 assert(sampleInfo->allUnphased() == false);
958 assert(sampleInfo->hasAllGenotypeAlleles() == true);
959 assert(sampleInfo->isPhased(0) == true);
960 assert(sampleInfo->isPhased(1) == false);
961 assert(sampleInfo->isPhased(2) == false);
962 assert(sampleInfo->isUnphased(0) == false);
963 assert(sampleInfo->isUnphased(1) == true);
964 assert(sampleInfo->isUnphased(2) == false);
965
966 assert(reader.readRecord(record));
967 assert(record.allPhased() == false);
968 assert(record.allUnphased() == true);
969 assert(record.hasAllGenotypeAlleles() == true);
970 sampleInfo = &(record.getGenotypeInfo());
971 assert(sampleInfo->getNumSamples() == 2);
972 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
973 assert(*(sampleInfo->getString("GT", 1)) == "1/1");
974 assert(sampleInfo->getString("GT", 2) == NULL);
975 assert(sampleInfo->allPhased() == false);
976 assert(sampleInfo->allUnphased() == true);
977 assert(sampleInfo->hasAllGenotypeAlleles() == true);
978 assert(sampleInfo->isPhased(0) == false);
979 assert(sampleInfo->isPhased(1) == false);
980 assert(sampleInfo->isPhased(2) == false);
981 assert(sampleInfo->isUnphased(0) == true);
982 assert(sampleInfo->isUnphased(1) == true);
983 assert(sampleInfo->isUnphased(2) == false);
984
985 assert(reader.readRecord(record));
986 assert(record.allPhased() == false);
987 assert(record.allUnphased() == false);
988 assert(record.hasAllGenotypeAlleles() == false);
989 sampleInfo = &(record.getGenotypeInfo());
990 assert(sampleInfo->getNumSamples() == 2);
991 assert(sampleInfo->getString("GT", 0) == NULL);
992 assert(sampleInfo->getString("GT", 1) == NULL);
993 assert(sampleInfo->getString("GT", 2) == NULL);
994 assert(sampleInfo->allPhased() == false);
995 assert(sampleInfo->allUnphased() == false);
996 assert(sampleInfo->hasAllGenotypeAlleles() == false);
997 assert(sampleInfo->isPhased(0) == false);
998 assert(sampleInfo->isPhased(1) == false);
999 assert(sampleInfo->isPhased(2) == false);
1000 assert(sampleInfo->isUnphased(0) == false);
1001 assert(sampleInfo->isUnphased(1) == false);
1002 assert(sampleInfo->isUnphased(2) == false);
1003
1004 assert(reader.readRecord(record));
1005 assert(record.allPhased() == true);
1006 assert(record.allUnphased() == false);
1007 assert(record.hasAllGenotypeAlleles() == true);
1008 sampleInfo = &(record.getGenotypeInfo());
1009 assert(sampleInfo->getNumSamples() == 2);
1010 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
1011 assert(*(sampleInfo->getString("GT", 1)) == "1|1");
1012 assert(sampleInfo->getString("GT", 2) == NULL);
1013 assert(sampleInfo->allPhased() == true);
1014 assert(sampleInfo->allUnphased() == false);
1015 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1016 assert(sampleInfo->isPhased(0) == true);
1017 assert(sampleInfo->isPhased(1) == true);
1018 assert(sampleInfo->isPhased(2) == false);
1019 assert(sampleInfo->isUnphased(0) == false);
1020 assert(sampleInfo->isUnphased(1) == false);
1021 assert(sampleInfo->isUnphased(2) == false);
1022
1023 assert(reader.readRecord(record) == false);
1024
1025 assert(reader.getNumKeptRecords() == 7);
1026 assert(reader.getNumRecords() == 7);
1027
1028
1029 //////////////////////////
1030 // Subset using an exclude file and exclude sample.
1031 reader.open("testFiles/vcfFile.vcf", header, NULL, "NA00001",
1032 "testFiles/exclude2.txt");
1033
1034 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE_SUBSET1);
1035 assert(header.getNumSamples() == NUM_SAMPLES - NUM_SAMPLES_SUBSET1);
1036 assert(header.getSampleName(2) == NULL);
1037 assert(header.getSampleName(0) == SAMPLES[2]);
1038 assert(header.getSampleName(1) == NULL);
1039 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
1040 assert(header.getSampleIndex(SAMPLES[0].c_str()) == -1);
1041 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 0);
1042
1043 // Read the records to make sure they were subset.
1044 assert(reader.readRecord(record));
1045 assert(record.allPhased() == false);
1046 assert(record.allUnphased() == true);
1047 assert(record.hasAllGenotypeAlleles() == true);
1048 sampleInfo = &(record.getGenotypeInfo());
1049 assert(sampleInfo->getNumSamples() == 1);
1050 assert(*(sampleInfo->getString("GT", 0)) == "1/1");
1051 assert(sampleInfo->getString("GT", 1) == NULL);
1052 assert(sampleInfo->getString("GT", 2) == NULL);
1053 assert(sampleInfo->allPhased() == false);
1054 assert(sampleInfo->allUnphased() == true);
1055 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1056 assert(sampleInfo->isPhased(0) == false);
1057 assert(sampleInfo->isPhased(1) == false);
1058 assert(sampleInfo->isPhased(2) == false);
1059 assert(sampleInfo->isUnphased(0) == true);
1060 assert(sampleInfo->isUnphased(1) == false);
1061 assert(sampleInfo->isUnphased(2) == false);
1062
1063 assert(reader.readRecord(record));
1064 assert(record.allPhased() == false);
1065 assert(record.allUnphased() == true);
1066 assert(record.hasAllGenotypeAlleles() == true);
1067 sampleInfo = &(record.getGenotypeInfo());
1068 assert(sampleInfo->getNumSamples() == 1);
1069 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1070 assert(sampleInfo->getString("GT", 1) == NULL);
1071 assert(sampleInfo->getString("GT", 2) == NULL);
1072 assert(sampleInfo->allPhased() == false);
1073 assert(sampleInfo->allUnphased() == true);
1074 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1075 assert(sampleInfo->isPhased(0) == false);
1076 assert(sampleInfo->isPhased(1) == false);
1077 assert(sampleInfo->isPhased(2) == false);
1078 assert(sampleInfo->isUnphased(0) == true);
1079 assert(sampleInfo->isUnphased(1) == false);
1080 assert(sampleInfo->isUnphased(2) == false);
1081
1082 assert(reader.readRecord(record));
1083 assert(record.allPhased() == false);
1084 assert(record.allUnphased() == true);
1085 assert(record.hasAllGenotypeAlleles() == true);
1086 sampleInfo = &(record.getGenotypeInfo());
1087 assert(sampleInfo->getNumSamples() == 1);
1088 assert(*(sampleInfo->getString("GT", 0)) == "2/2");
1089 assert(sampleInfo->getString("GT", 1) == NULL);
1090 assert(sampleInfo->getString("GT", 2) == NULL);
1091 assert(sampleInfo->allPhased() == false);
1092 assert(sampleInfo->allUnphased() == true);
1093 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1094 assert(sampleInfo->isPhased(0) == false);
1095 assert(sampleInfo->isPhased(1) == false);
1096 assert(sampleInfo->isPhased(2) == false);
1097 assert(sampleInfo->isUnphased(0) == true);
1098 assert(sampleInfo->isUnphased(1) == false);
1099 assert(sampleInfo->isUnphased(2) == false);
1100
1101 assert(reader.readRecord(record));
1102 assert(record.allPhased() == false);
1103 assert(record.allUnphased() == true);
1104 assert(record.hasAllGenotypeAlleles() == true);
1105 sampleInfo = &(record.getGenotypeInfo());
1106 assert(sampleInfo->getNumSamples() == 1);
1107 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1108 assert(sampleInfo->getString("GT", 1) == NULL);
1109 assert(sampleInfo->getString("GT", 2) == NULL);
1110 assert(sampleInfo->allPhased() == false);
1111 assert(sampleInfo->allUnphased() == true);
1112 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1113 assert(sampleInfo->isPhased(0) == false);
1114 assert(sampleInfo->isPhased(1) == false);
1115 assert(sampleInfo->isPhased(2) == false);
1116 assert(sampleInfo->isUnphased(0) == true);
1117 assert(sampleInfo->isUnphased(1) == false);
1118 assert(sampleInfo->isUnphased(2) == false);
1119
1120 assert(reader.readRecord(record));
1121 assert(record.allPhased() == false);
1122 assert(record.allUnphased() == true);
1123 assert(record.hasAllGenotypeAlleles() == true);
1124 sampleInfo = &(record.getGenotypeInfo());
1125 assert(sampleInfo->getNumSamples() == 1);
1126 assert(*(sampleInfo->getString("GT", 0)) == "1/1");
1127 assert(sampleInfo->getString("GT", 1) == NULL);
1128 assert(sampleInfo->getString("GT", 2) == NULL);
1129 assert(sampleInfo->allPhased() == false);
1130 assert(sampleInfo->allUnphased() == true);
1131 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1132 assert(sampleInfo->isPhased(0) == false);
1133 assert(sampleInfo->isPhased(1) == false);
1134 assert(sampleInfo->isPhased(2) == false);
1135 assert(sampleInfo->isUnphased(0) == true);
1136 assert(sampleInfo->isUnphased(1) == false);
1137 assert(sampleInfo->isUnphased(2) == false);
1138
1139 assert(reader.readRecord(record));
1140 assert(record.allPhased() == false);
1141 assert(record.allUnphased() == false);
1142 assert(record.hasAllGenotypeAlleles() == false);
1143 sampleInfo = &(record.getGenotypeInfo());
1144 assert(sampleInfo->getNumSamples() == 1);
1145 assert(sampleInfo->getString("GT", 0) == NULL);
1146 assert(sampleInfo->getString("GT", 1) == NULL);
1147 assert(sampleInfo->getString("GT", 2) == NULL);
1148 assert(sampleInfo->allPhased() == false);
1149 assert(sampleInfo->allUnphased() == false);
1150 assert(sampleInfo->hasAllGenotypeAlleles() == false);
1151 assert(sampleInfo->isPhased(0) == false);
1152 assert(sampleInfo->isPhased(1) == false);
1153 assert(sampleInfo->isPhased(2) == false);
1154 assert(sampleInfo->isUnphased(0) == false);
1155 assert(sampleInfo->isUnphased(1) == false);
1156 assert(sampleInfo->isUnphased(2) == false);
1157
1158 assert(reader.readRecord(record));
1159 assert(record.allPhased() == true);
1160 assert(record.allUnphased() == false);
1161 assert(record.hasAllGenotypeAlleles() == true);
1162 sampleInfo = &(record.getGenotypeInfo());
1163 assert(sampleInfo->getNumSamples() == 1);
1164 assert(*(sampleInfo->getString("GT", 0)) == "1|1");
1165 assert(sampleInfo->getString("GT", 1) == NULL);
1166 assert(sampleInfo->getString("GT", 2) == NULL);
1167 assert(sampleInfo->allPhased() == true);
1168 assert(sampleInfo->allUnphased() == false);
1169 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1170 assert(sampleInfo->isPhased(0) == true);
1171 assert(sampleInfo->isPhased(1) == false);
1172 assert(sampleInfo->isPhased(2) == false);
1173 assert(sampleInfo->isUnphased(0) == false);
1174 assert(sampleInfo->isUnphased(1) == false);
1175 assert(sampleInfo->isUnphased(2) == false);
1176
1177 assert(reader.readRecord(record) == false);
1178
1179 assert(reader.getNumKeptRecords() == 7);
1180 assert(reader.getNumRecords() == 7);
1181
1182
1183 //////////////////////////
1184 // Subset using an exclude file and exclude sample.
1185 // Add variant discard
1186 reader.open("testFiles/vcfFile.vcf", header, NULL, "NA00001",
1187 "testFiles/exclude2.txt");
1188 reader.setExcludeIDs("testFiles/excludeIDs.txt");
1189
1190 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE_SUBSET1);
1191 assert(header.getNumSamples() == NUM_SAMPLES - NUM_SAMPLES_SUBSET1);
1192 assert(header.getSampleName(2) == NULL);
1193 assert(header.getSampleName(0) == SAMPLES[2]);
1194 assert(header.getSampleName(1) == NULL);
1195 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
1196 assert(header.getSampleIndex(SAMPLES[0].c_str()) == -1);
1197 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 0);
1198
1199 // Read the records to make sure they were subset.
1200 assert(reader.readRecord(record));
1201 assert(strcmp(record.getIDStr(), ".") == 0);
1202 assert(record.allPhased() == false);
1203 assert(record.allUnphased() == true);
1204 assert(record.hasAllGenotypeAlleles() == true);
1205 sampleInfo = &(record.getGenotypeInfo());
1206 assert(sampleInfo->getNumSamples() == 1);
1207 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1208 assert(sampleInfo->getString("GT", 1) == NULL);
1209 assert(sampleInfo->getString("GT", 2) == NULL);
1210 assert(sampleInfo->allPhased() == false);
1211 assert(sampleInfo->allUnphased() == true);
1212 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1213 assert(sampleInfo->isPhased(0) == false);
1214 assert(sampleInfo->isPhased(1) == false);
1215 assert(sampleInfo->isPhased(2) == false);
1216 assert(sampleInfo->isUnphased(0) == true);
1217 assert(sampleInfo->isUnphased(1) == false);
1218 assert(sampleInfo->isUnphased(2) == false);
1219
1220 assert(reader.readRecord(record));
1221 assert(strcmp(record.getIDStr(), "rs6040355") == 0);
1222 assert(record.allPhased() == false);
1223 assert(record.allUnphased() == true);
1224 assert(record.hasAllGenotypeAlleles() == true);
1225 sampleInfo = &(record.getGenotypeInfo());
1226 assert(sampleInfo->getNumSamples() == 1);
1227 assert(*(sampleInfo->getString("GT", 0)) == "2/2");
1228 assert(sampleInfo->getString("GT", 1) == NULL);
1229 assert(sampleInfo->getString("GT", 2) == NULL);
1230 assert(sampleInfo->allPhased() == false);
1231 assert(sampleInfo->allUnphased() == true);
1232 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1233 assert(sampleInfo->isPhased(0) == false);
1234 assert(sampleInfo->isPhased(1) == false);
1235 assert(sampleInfo->isPhased(2) == false);
1236 assert(sampleInfo->isUnphased(0) == true);
1237 assert(sampleInfo->isUnphased(1) == false);
1238 assert(sampleInfo->isUnphased(2) == false);
1239
1240 assert(reader.readRecord(record));
1241 assert(strcmp(record.getIDStr(), ".") == 0);
1242 assert(record.allPhased() == false);
1243 assert(record.allUnphased() == true);
1244 assert(record.hasAllGenotypeAlleles() == true);
1245 sampleInfo = &(record.getGenotypeInfo());
1246 assert(sampleInfo->getNumSamples() == 1);
1247 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1248 assert(sampleInfo->getString("GT", 1) == NULL);
1249 assert(sampleInfo->getString("GT", 2) == NULL);
1250 assert(sampleInfo->allPhased() == false);
1251 assert(sampleInfo->allUnphased() == true);
1252 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1253 assert(sampleInfo->isPhased(0) == false);
1254 assert(sampleInfo->isPhased(1) == false);
1255 assert(sampleInfo->isPhased(2) == false);
1256 assert(sampleInfo->isUnphased(0) == true);
1257 assert(sampleInfo->isUnphased(1) == false);
1258 assert(sampleInfo->isUnphased(2) == false);
1259
1260 assert(reader.readRecord(record) == false);
1261
1262 assert(reader.getNumKeptRecords() == 3);
1263 assert(reader.getNumRecords() == 7);
1264
1265 //////////////////////////
1266 // Subset using an exclude file and exclude sample.
1267 // Add variant discard
1268 reader.open("testFiles/vcfFile.vcf", header, NULL, "NA00001",
1269 "testFiles/exclude2.txt");
1270 reader.setIncludeIDs("testFiles/includeIDs.txt");
1271
1272 assert(header.getHeaderLine() == HEADER_LINE_EXCLUDE_SUBSET1);
1273 assert(header.getNumSamples() == NUM_SAMPLES - NUM_SAMPLES_SUBSET1);
1274 assert(header.getSampleName(2) == NULL);
1275 assert(header.getSampleName(0) == SAMPLES[2]);
1276 assert(header.getSampleName(1) == NULL);
1277 assert(header.getSampleIndex(SAMPLES[1].c_str()) == -1);
1278 assert(header.getSampleIndex(SAMPLES[0].c_str()) == -1);
1279 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 0);
1280
1281 // Read the records to make sure they were subset.
1282 assert(reader.readRecord(record));
1283 assert(strcmp(record.getIDStr(), ".") == 0);
1284 assert(record.allPhased() == false);
1285 assert(record.allUnphased() == true);
1286 assert(record.hasAllGenotypeAlleles() == true);
1287 sampleInfo = &(record.getGenotypeInfo());
1288 assert(sampleInfo->getNumSamples() == 1);
1289 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1290 assert(sampleInfo->getString("GT", 1) == NULL);
1291 assert(sampleInfo->getString("GT", 2) == NULL);
1292 assert(sampleInfo->allPhased() == false);
1293 assert(sampleInfo->allUnphased() == true);
1294 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1295 assert(sampleInfo->isPhased(0) == false);
1296 assert(sampleInfo->isPhased(1) == false);
1297 assert(sampleInfo->isPhased(2) == false);
1298 assert(sampleInfo->isUnphased(0) == true);
1299 assert(sampleInfo->isUnphased(1) == false);
1300 assert(sampleInfo->isUnphased(2) == false);
1301
1302 assert(reader.readRecord(record));
1303 assert(strcmp(record.getIDStr(), "rs6040355") == 0);
1304 assert(record.allPhased() == false);
1305 assert(record.allUnphased() == true);
1306 assert(record.hasAllGenotypeAlleles() == true);
1307 sampleInfo = &(record.getGenotypeInfo());
1308 assert(sampleInfo->getNumSamples() == 1);
1309 assert(*(sampleInfo->getString("GT", 0)) == "2/2");
1310 assert(sampleInfo->getString("GT", 1) == NULL);
1311 assert(sampleInfo->getString("GT", 2) == NULL);
1312 assert(sampleInfo->allPhased() == false);
1313 assert(sampleInfo->allUnphased() == true);
1314 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1315 assert(sampleInfo->isPhased(0) == false);
1316 assert(sampleInfo->isPhased(1) == false);
1317 assert(sampleInfo->isPhased(2) == false);
1318 assert(sampleInfo->isUnphased(0) == true);
1319 assert(sampleInfo->isUnphased(1) == false);
1320 assert(sampleInfo->isUnphased(2) == false);
1321
1322 assert(reader.readRecord(record));
1323 assert(strcmp(record.getIDStr(), ".") == 0);
1324 assert(record.allPhased() == false);
1325 assert(record.allUnphased() == true);
1326 assert(record.hasAllGenotypeAlleles() == true);
1327 sampleInfo = &(record.getGenotypeInfo());
1328 assert(sampleInfo->getNumSamples() == 1);
1329 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1330 assert(sampleInfo->getString("GT", 1) == NULL);
1331 assert(sampleInfo->getString("GT", 2) == NULL);
1332 assert(sampleInfo->allPhased() == false);
1333 assert(sampleInfo->allUnphased() == true);
1334 assert(sampleInfo->hasAllGenotypeAlleles() == true);
1335 assert(sampleInfo->isPhased(0) == false);
1336 assert(sampleInfo->isPhased(1) == false);
1337 assert(sampleInfo->isPhased(2) == false);
1338 assert(sampleInfo->isUnphased(0) == true);
1339 assert(sampleInfo->isUnphased(1) == false);
1340 assert(sampleInfo->isUnphased(2) == false);
1341
1342 assert(reader.readRecord(record) == false);
1343
1344 assert(reader.getNumKeptRecords() == 3);
1345 assert(reader.getNumRecords() == 7);
1346
1347 //////////////////////////
1348 // Add in discarding non-phased.
1349 reader.setDiscardRules(VcfFileReader::DISCARD_NON_PHASED);
1350 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
1351 NULL, NULL, ";");
1352
1353 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
1354 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
1355 assert(header.getSampleName(2) == NULL);
1356 assert(header.getSampleName(0) == SAMPLES[0]);
1357 assert(header.getSampleName(1) == SAMPLES[1]);
1358 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1359 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1360 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
1361
1362 // Read the records to make sure they were subset.
1363 assert(reader.readRecord(record));
1364 sampleInfo = &(record.getGenotypeInfo());
1365 assert(sampleInfo->getNumSamples() == 2);
1366 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1367 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1368 assert(sampleInfo->getString("GT", 2) == NULL);
1369 assert(sampleInfo->isPhased(0) == true);
1370 assert(sampleInfo->isPhased(1) == true);
1371 assert(sampleInfo->isPhased(2) == false);
1372 assert(sampleInfo->isUnphased(0) == false);
1373 assert(sampleInfo->isUnphased(1) == false);
1374 assert(sampleInfo->isUnphased(2) == false);
1375
1376 assert(reader.readRecord(record));
1377 sampleInfo = &(record.getGenotypeInfo());
1378 assert(sampleInfo->getNumSamples() == 2);
1379 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
1380 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
1381 assert(sampleInfo->getString("GT", 2) == NULL);
1382 assert(sampleInfo->isPhased(0) == true);
1383 assert(sampleInfo->isPhased(1) == true);
1384 assert(sampleInfo->isPhased(2) == false);
1385 assert(sampleInfo->isUnphased(0) == false);
1386 assert(sampleInfo->isUnphased(1) == false);
1387 assert(sampleInfo->isUnphased(2) == false);
1388
1389 assert(reader.readRecord(record));
1390 sampleInfo = &(record.getGenotypeInfo());
1391 assert(sampleInfo->getNumSamples() == 2);
1392 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1393 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1394 assert(sampleInfo->getString("GT", 2) == NULL);
1395 assert(sampleInfo->isPhased(0) == true);
1396 assert(sampleInfo->isPhased(1) == true);
1397 assert(sampleInfo->isPhased(2) == false);
1398 assert(sampleInfo->isUnphased(0) == false);
1399 assert(sampleInfo->isUnphased(1) == false);
1400 assert(sampleInfo->isUnphased(2) == false);
1401
1402 assert(reader.readRecord(record));
1403 sampleInfo = &(record.getGenotypeInfo());
1404 assert(sampleInfo->getNumSamples() == 2);
1405 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
1406 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
1407 assert(sampleInfo->getString("GT", 2) == NULL);
1408 assert(sampleInfo->isPhased(0) == true);
1409 assert(sampleInfo->isPhased(1) == true);
1410 assert(sampleInfo->isPhased(2) == false);
1411 assert(sampleInfo->isUnphased(0) == false);
1412 assert(sampleInfo->isUnphased(1) == false);
1413 assert(sampleInfo->isUnphased(2) == false);
1414
1415 assert(reader.readRecord(record) == false);
1416
1417 assert(reader.getNumKeptRecords() == 4);
1418 assert(reader.getNumRecords() == 7);
1419
1420 reader.close();
1421
1422 //////////////////////////
1423 // Discard missing GTs.
1424 reader.setDiscardRules(VcfFileReader::DISCARD_MISSING_GT);
1425 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
1426 NULL, NULL, ";");
1427
1428 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
1429 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
1430 assert(header.getSampleName(2) == NULL);
1431 assert(header.getSampleName(0) == SAMPLES[0]);
1432 assert(header.getSampleName(1) == SAMPLES[1]);
1433 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1434 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1435 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
1436
1437 // Read the records to make sure they were subset.
1438 assert(reader.readRecord(record));
1439 sampleInfo = &(record.getGenotypeInfo());
1440 assert(sampleInfo->getNumSamples() == 2);
1441 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1442 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1443 assert(sampleInfo->getString("GT", 2) == NULL);
1444 assert(sampleInfo->isPhased(0) == true);
1445 assert(sampleInfo->isPhased(1) == true);
1446 assert(sampleInfo->isPhased(2) == false);
1447 assert(sampleInfo->isUnphased(0) == false);
1448 assert(sampleInfo->isUnphased(1) == false);
1449 assert(sampleInfo->isUnphased(2) == false);
1450
1451 assert(reader.readRecord(record));
1452 sampleInfo = &(record.getGenotypeInfo());
1453 assert(sampleInfo->getNumSamples() == 2);
1454 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1455 assert(*(sampleInfo->getString("GT", 1)) == "0|1");
1456 assert(sampleInfo->getString("GT", 2) == NULL);
1457 assert(sampleInfo->isPhased(0) == false);
1458 assert(sampleInfo->isPhased(1) == true);
1459 assert(sampleInfo->isPhased(2) == false);
1460 assert(sampleInfo->isUnphased(0) == true);
1461 assert(sampleInfo->isUnphased(1) == false);
1462 assert(sampleInfo->isUnphased(2) == false);
1463
1464 assert(reader.readRecord(record));
1465 sampleInfo = &(record.getGenotypeInfo());
1466 assert(sampleInfo->getNumSamples() == 2);
1467 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
1468 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
1469 assert(sampleInfo->getString("GT", 2) == NULL);
1470 assert(sampleInfo->isPhased(0) == true);
1471 assert(sampleInfo->isPhased(1) == true);
1472 assert(sampleInfo->isPhased(2) == false);
1473 assert(sampleInfo->isUnphased(0) == false);
1474 assert(sampleInfo->isUnphased(1) == false);
1475 assert(sampleInfo->isUnphased(2) == false);
1476
1477 assert(reader.readRecord(record));
1478 sampleInfo = &(record.getGenotypeInfo());
1479 assert(sampleInfo->getNumSamples() == 2);
1480 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1481 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1482 assert(sampleInfo->getString("GT", 2) == NULL);
1483 assert(sampleInfo->isPhased(0) == true);
1484 assert(sampleInfo->isPhased(1) == true);
1485 assert(sampleInfo->isPhased(2) == false);
1486 assert(sampleInfo->isUnphased(0) == false);
1487 assert(sampleInfo->isUnphased(1) == false);
1488 assert(sampleInfo->isUnphased(2) == false);
1489
1490 assert(reader.readRecord(record));
1491 sampleInfo = &(record.getGenotypeInfo());
1492 assert(sampleInfo->getNumSamples() == 2);
1493 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
1494 assert(*(sampleInfo->getString("GT", 1)) == "0/2");
1495 assert(sampleInfo->getString("GT", 2) == NULL);
1496 assert(sampleInfo->isPhased(0) == false);
1497 assert(sampleInfo->isPhased(1) == false);
1498 assert(sampleInfo->isPhased(2) == false);
1499 assert(sampleInfo->isUnphased(0) == true);
1500 assert(sampleInfo->isUnphased(1) == true);
1501 assert(sampleInfo->isUnphased(2) == false);
1502
1503 assert(reader.readRecord(record) == false);
1504
1505 assert(reader.getNumKeptRecords() == 5);
1506 assert(reader.getNumRecords() == 7);
1507
1508 //////////////////////////
1509 // Discard missing GTs & non-Phased.
1510 reader.setDiscardRules(VcfFileReader::DISCARD_MISSING_GT |
1511 VcfFileReader::DISCARD_NON_PHASED);
1512 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
1513 NULL, NULL, ";");
1514
1515 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
1516 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
1517 assert(header.getSampleName(2) == NULL);
1518 assert(header.getSampleName(0) == SAMPLES[0]);
1519 assert(header.getSampleName(1) == SAMPLES[1]);
1520 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1521 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1522 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
1523
1524 // Read the records to make sure they were subset.
1525 assert(reader.readRecord(record));
1526 sampleInfo = &(record.getGenotypeInfo());
1527 assert(sampleInfo->getNumSamples() == 2);
1528 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1529 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1530 assert(sampleInfo->getString("GT", 2) == NULL);
1531 assert(sampleInfo->isPhased(0) == true);
1532 assert(sampleInfo->isPhased(1) == true);
1533 assert(sampleInfo->isPhased(2) == false);
1534 assert(sampleInfo->isUnphased(0) == false);
1535 assert(sampleInfo->isUnphased(1) == false);
1536 assert(sampleInfo->isUnphased(2) == false);
1537
1538 assert(reader.readRecord(record));
1539 sampleInfo = &(record.getGenotypeInfo());
1540 assert(sampleInfo->getNumSamples() == 2);
1541 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
1542 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
1543 assert(sampleInfo->getString("GT", 2) == NULL);
1544 assert(sampleInfo->isPhased(0) == true);
1545 assert(sampleInfo->isPhased(1) == true);
1546 assert(sampleInfo->isPhased(2) == false);
1547 assert(sampleInfo->isUnphased(0) == false);
1548 assert(sampleInfo->isUnphased(1) == false);
1549 assert(sampleInfo->isUnphased(2) == false);
1550
1551 assert(reader.readRecord(record));
1552 sampleInfo = &(record.getGenotypeInfo());
1553 assert(sampleInfo->getNumSamples() == 2);
1554 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1555 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1556 assert(sampleInfo->getString("GT", 2) == NULL);
1557 assert(sampleInfo->isPhased(0) == true);
1558 assert(sampleInfo->isPhased(1) == true);
1559 assert(sampleInfo->isPhased(2) == false);
1560 assert(sampleInfo->isUnphased(0) == false);
1561 assert(sampleInfo->isUnphased(1) == false);
1562 assert(sampleInfo->isUnphased(2) == false);
1563
1564 assert(reader.readRecord(record) == false);
1565
1566 assert(reader.getNumKeptRecords() == 3);
1567 assert(reader.getNumRecords() == 7);
1568
1569 reader.close();
1570
1571
1572 ////////////////////////////////
1573 // Test Discarding filtered without subsetting.
1574 reader.open("testFiles/vcfFile.vcf", header);
1575
1576 reader.setDiscardRules(VcfFileReader::DISCARD_FILTERED);
1577
1578 assert(header.getHeaderLine() == HEADER_LINE);
1579 assert(header.getNumSamples() == NUM_SAMPLES);
1580 assert(header.getSampleName(2) == SAMPLES[2]);
1581 assert(header.getSampleName(0) == SAMPLES[0]);
1582 assert(header.getSampleName(1) == SAMPLES[1]);
1583 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1584 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1585 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
1586
1587 // Read the records.
1588 assert(reader.readRecord(record));
1589 assert(record.allPhased() == false);
1590 assert(record.allUnphased() == false);
1591 assert(record.hasAllGenotypeAlleles() == true);
1592 sampleInfo = &(record.getGenotypeInfo());
1593 assert(sampleInfo->getNumSamples() == 3);
1594 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1595 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1596 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
1597 assert(sampleInfo->isPhased(0) == true);
1598 assert(sampleInfo->isPhased(1) == true);
1599 assert(sampleInfo->isPhased(2) == false);
1600 assert(sampleInfo->isUnphased(0) == false);
1601 assert(sampleInfo->isUnphased(1) == false);
1602 assert(sampleInfo->isUnphased(2) == true);
1603 assert(record.passedAllFilters() == true);
1604 assert(record.getNumAlts() == 1);
1605
1606 assert(reader.readRecord(record));
1607 assert(record.allPhased() == false);
1608 assert(record.allUnphased() == false);
1609 assert(record.hasAllGenotypeAlleles() == true);
1610 sampleInfo = &(record.getGenotypeInfo());
1611 assert(sampleInfo->getNumSamples() == 3);
1612 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
1613 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
1614 assert(*(sampleInfo->getString("GT", 2)) == "2/2");
1615 assert(sampleInfo->isPhased(0) == true);
1616 assert(sampleInfo->isPhased(1) == true);
1617 assert(sampleInfo->isPhased(2) == false);
1618 assert(sampleInfo->isUnphased(0) == false);
1619 assert(sampleInfo->isUnphased(1) == false);
1620 assert(sampleInfo->isUnphased(2) == true);
1621 assert(record.passedAllFilters() == true);
1622 assert(record.getNumAlts() == 2);
1623
1624 assert(reader.readRecord(record));
1625 assert(record.allPhased() == false);
1626 assert(record.allUnphased() == false);
1627 assert(record.hasAllGenotypeAlleles() == true);
1628 sampleInfo = &(record.getGenotypeInfo());
1629 assert(sampleInfo->getNumSamples() == 3);
1630 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1631 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1632 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
1633 assert(sampleInfo->isPhased(0) == true);
1634 assert(sampleInfo->isPhased(1) == true);
1635 assert(sampleInfo->isPhased(2) == false);
1636 assert(sampleInfo->isUnphased(0) == false);
1637 assert(sampleInfo->isUnphased(1) == false);
1638 assert(sampleInfo->isUnphased(2) == true);
1639 assert(record.passedAllFilters() == true);
1640 assert(record.getNumAlts() == 0);
1641
1642 assert(reader.readRecord(record));
1643 assert(record.allPhased() == false);
1644 assert(record.allUnphased() == true);
1645 assert(record.hasAllGenotypeAlleles() == true);
1646 sampleInfo = &(record.getGenotypeInfo());
1647 assert(sampleInfo->getNumSamples() == 3);
1648 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
1649 assert(*(sampleInfo->getString("GT", 1)) == "0/2");
1650 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
1651 assert(sampleInfo->isPhased(0) == false);
1652 assert(sampleInfo->isPhased(1) == false);
1653 assert(sampleInfo->isPhased(2) == false);
1654 assert(sampleInfo->isUnphased(0) == true);
1655 assert(sampleInfo->isUnphased(1) == true);
1656 assert(sampleInfo->isUnphased(2) == true);
1657 assert(record.passedAllFilters() == true);
1658 assert(record.getNumAlts() == 2);
1659
1660 assert(reader.readRecord(record));
1661 assert(record.allPhased() == false);
1662 assert(record.allUnphased() == false);
1663 assert(record.hasAllGenotypeAlleles() == false);
1664 sampleInfo = &(record.getGenotypeInfo());
1665 assert(sampleInfo->getNumSamples() == 3);
1666 assert(sampleInfo->getString("GT", 0) == NULL);
1667 assert(sampleInfo->getString("GT", 1) == NULL);
1668 assert(sampleInfo->getString("GT", 2) == NULL);
1669 assert(sampleInfo->isPhased(0) == false);
1670 assert(sampleInfo->isPhased(1) == false);
1671 assert(sampleInfo->isPhased(2) == false);
1672 assert(sampleInfo->isUnphased(0) == false);
1673 assert(sampleInfo->isUnphased(1) == false);
1674 assert(sampleInfo->isUnphased(2) == false);
1675 assert(record.passedAllFilters() == true);
1676 assert(record.getNumAlts() == 2);
1677
1678 assert(reader.readRecord(record));
1679 assert(record.allPhased() == true);
1680 assert(record.allUnphased() == false);
1681 assert(record.hasAllGenotypeAlleles() == false);
1682 sampleInfo = &(record.getGenotypeInfo());
1683 assert(sampleInfo->getNumSamples() == 3);
1684 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
1685 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
1686 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
1687 assert(sampleInfo->isPhased(0) == true);
1688 assert(sampleInfo->isPhased(1) == true);
1689 assert(sampleInfo->isPhased(2) == true);
1690 assert(sampleInfo->isUnphased(0) == false);
1691 assert(sampleInfo->isUnphased(1) == false);
1692 assert(sampleInfo->isUnphased(2) == false);
1693 assert(record.passedAllFilters() == true);
1694 assert(record.getNumAlts() == 1);
1695
1696 assert(reader.readRecord(record) == false);
1697
1698 assert(reader.getNumKeptRecords() == 6);
1699 assert(reader.getNumRecords() == 7);
1700
1701 reader.close();
1702
1703 ////////////////////////////////
1704 // Test Discarding multiple Alts without subsetting.
1705 reader.open("testFiles/vcfFile.vcf", header);
1706
1707 reader.setDiscardRules(VcfFileReader::DISCARD_MULTIPLE_ALTS);
1708
1709 assert(header.getHeaderLine() == HEADER_LINE);
1710 assert(header.getNumSamples() == NUM_SAMPLES);
1711 assert(header.getSampleName(2) == SAMPLES[2]);
1712 assert(header.getSampleName(0) == SAMPLES[0]);
1713 assert(header.getSampleName(1) == SAMPLES[1]);
1714 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1715 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1716 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
1717
1718 // Read the records.
1719 assert(reader.readRecord(record));
1720 assert(record.allPhased() == false);
1721 assert(record.allUnphased() == false);
1722 assert(record.hasAllGenotypeAlleles() == true);
1723 sampleInfo = &(record.getGenotypeInfo());
1724 assert(sampleInfo->getNumSamples() == 3);
1725 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1726 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1727 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
1728 assert(sampleInfo->isPhased(0) == true);
1729 assert(sampleInfo->isPhased(1) == true);
1730 assert(sampleInfo->isPhased(2) == false);
1731 assert(sampleInfo->isUnphased(0) == false);
1732 assert(sampleInfo->isUnphased(1) == false);
1733 assert(sampleInfo->isUnphased(2) == true);
1734 assert(record.passedAllFilters() == true);
1735 assert(record.getNumAlts() == 1);
1736
1737 assert(reader.readRecord(record));
1738 assert(record.allPhased() == false);
1739 assert(record.allUnphased() == false);
1740 assert(record.hasAllGenotypeAlleles() == true);
1741 sampleInfo = &(record.getGenotypeInfo());
1742 assert(sampleInfo->getNumSamples() == 3);
1743 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
1744 assert(*(sampleInfo->getString("GT", 1)) == "0|1");
1745 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
1746 assert(sampleInfo->isPhased(0) == false);
1747 assert(sampleInfo->isPhased(1) == true);
1748 assert(sampleInfo->isPhased(2) == false);
1749 assert(sampleInfo->isUnphased(0) == true);
1750 assert(sampleInfo->isUnphased(1) == false);
1751 assert(sampleInfo->isUnphased(2) == true);
1752 assert(record.passedAllFilters() == false);
1753 assert(record.getNumAlts() == 1);
1754
1755 assert(reader.readRecord(record));
1756 assert(record.allPhased() == false);
1757 assert(record.allUnphased() == false);
1758 assert(record.hasAllGenotypeAlleles() == true);
1759 sampleInfo = &(record.getGenotypeInfo());
1760 assert(sampleInfo->getNumSamples() == 3);
1761 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1762 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1763 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
1764 assert(sampleInfo->isPhased(0) == true);
1765 assert(sampleInfo->isPhased(1) == true);
1766 assert(sampleInfo->isPhased(2) == false);
1767 assert(sampleInfo->isUnphased(0) == false);
1768 assert(sampleInfo->isUnphased(1) == false);
1769 assert(sampleInfo->isUnphased(2) == true);
1770 assert(record.passedAllFilters() == true);
1771 assert(record.getNumAlts() == 0);
1772
1773 assert(reader.readRecord(record));
1774 assert(record.getGT(0,0) == 0);
1775 assert(record.getGT(1,1) == VcfGenotypeSample::MISSING_GT);
1776 assert(record.getGT(1,0) == 0);
1777 assert(record.getGT(0,1) == 1);
1778 assert(record.getGT(2,0) == 1);
1779 assert(record.getGT(2,1) == 1);
1780 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
1781 assert(record.getGT(3,0) == VcfGenotypeSample::INVALID_GT);
1782 assert(record.allPhased() == true);
1783 assert(record.allUnphased() == false);
1784 assert(record.hasAllGenotypeAlleles() == false);
1785 sampleInfo = &(record.getGenotypeInfo());
1786 assert(sampleInfo->getNumSamples() == 3);
1787 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
1788 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
1789 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
1790 assert(sampleInfo->isPhased(0) == true);
1791 assert(sampleInfo->isPhased(1) == true);
1792 assert(sampleInfo->isPhased(2) == true);
1793 assert(sampleInfo->isUnphased(0) == false);
1794 assert(sampleInfo->isUnphased(1) == false);
1795 assert(sampleInfo->isUnphased(2) == false);
1796 assert(record.passedAllFilters() == true);
1797 assert(record.getNumAlts() == 1);
1798
1799 assert(reader.readRecord(record) == false);
1800
1801 assert(reader.getNumKeptRecords() == 4);
1802 assert(reader.getNumRecords() == 7);
1803
1804 reader.close();
1805
1806 ////////////////////////////////
1807 // Test subsetting and discarding multiple Alts, filter failures,
1808 // non-phased, and missing genotypes.
1809 reader.open("testFiles/vcfFile.vcf", header);
1810
1811 reader.setDiscardRules(VcfFileReader::DISCARD_MULTIPLE_ALTS|
1812 VcfFileReader::DISCARD_FILTERED |
1813 VcfFileReader::DISCARD_MISSING_GT |
1814 VcfFileReader::DISCARD_NON_PHASED);
1815 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
1816 NULL, NULL, ";");
1817
1818 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
1819 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
1820 assert(header.getSampleName(2) == NULL);
1821 assert(header.getSampleName(0) == SAMPLES[0]);
1822 assert(header.getSampleName(1) == SAMPLES[1]);
1823 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1824 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1825 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
1826
1827 // Read the records to make sure they were subset.
1828 assert(reader.readRecord(record));
1829 assert(record.allPhased() == true);
1830 assert(record.allUnphased() == false);
1831 assert(record.hasAllGenotypeAlleles() == true);
1832 sampleInfo = &(record.getGenotypeInfo());
1833 assert(sampleInfo->getNumSamples() == 2);
1834 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1835 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1836 assert(sampleInfo->getString("GT", 2) == NULL);
1837 assert(sampleInfo->isPhased(0) == true);
1838 assert(sampleInfo->isPhased(1) == true);
1839 assert(sampleInfo->isPhased(2) == false);
1840 assert(sampleInfo->isUnphased(0) == false);
1841 assert(sampleInfo->isUnphased(1) == false);
1842 assert(sampleInfo->isUnphased(2) == false);
1843 assert(record.passedAllFilters() == true);
1844 assert(record.getNumAlts() == 1);
1845
1846 assert(reader.readRecord(record));
1847 assert(record.allPhased() == true);
1848 assert(record.allUnphased() == false);
1849 assert(record.hasAllGenotypeAlleles() == true);
1850 sampleInfo = &(record.getGenotypeInfo());
1851 assert(sampleInfo->getNumSamples() == 2);
1852 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1853 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
1854 assert(sampleInfo->getString("GT", 2) == NULL);
1855 assert(sampleInfo->isPhased(0) == true);
1856 assert(sampleInfo->isPhased(1) == true);
1857 assert(sampleInfo->isPhased(2) == false);
1858 assert(sampleInfo->isUnphased(0) == false);
1859 assert(sampleInfo->isUnphased(1) == false);
1860 assert(sampleInfo->isUnphased(2) == false);
1861 assert(record.passedAllFilters() == true);
1862 assert(record.getNumAlts() == 0);
1863
1864 assert(reader.readRecord(record) == false);
1865
1866 assert(reader.getNumKeptRecords() == 2);
1867 assert(reader.getNumRecords() == 7);
1868
1869 reader.close();
1870
1871 //////////////////////////
1872 // Discard missing GTs & non-Phased and filtering
1873 // AND discard without at least 2 alternates with no additional subsetting.
1874 reader.setDiscardRules(VcfFileReader::DISCARD_MISSING_GT |
1875 VcfFileReader::DISCARD_NON_PHASED);
1876 reader.addDiscardMinAltAlleleCount(2, NULL);
1877 reader.open("testFiles/vcfFile.vcf", header, "testFiles/subset1.txt",
1878 NULL, NULL, ";");
1879
1880 assert(header.getHeaderLine() == HEADER_LINE_SUBSET1);
1881 assert(header.getNumSamples() == NUM_SAMPLES_SUBSET1);
1882 assert(header.getSampleName(2) == NULL);
1883 assert(header.getSampleName(0) == SAMPLES[0]);
1884 assert(header.getSampleName(1) == SAMPLES[1]);
1885 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1886 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1887 assert(header.getSampleIndex(SAMPLES[2].c_str()) == -1);
1888
1889 // Read the records to make sure they were subset.
1890 assert(reader.readRecord(record));
1891 sampleInfo = &(record.getGenotypeInfo());
1892 assert(sampleInfo->getNumSamples() == 2);
1893 assert(*(sampleInfo->getString("GT", 0)) == "1|2");
1894 assert(*(sampleInfo->getString("GT", 1)) == "2|1");
1895 assert(sampleInfo->getString("GT", 2) == NULL);
1896 assert(sampleInfo->isPhased(0) == true);
1897 assert(sampleInfo->isPhased(1) == true);
1898 assert(sampleInfo->isPhased(2) == false);
1899 assert(sampleInfo->isUnphased(0) == false);
1900 assert(sampleInfo->isUnphased(1) == false);
1901 assert(sampleInfo->isUnphased(2) == false);
1902
1903 assert(reader.readRecord(record) == false);
1904
1905 assert(reader.getNumKeptRecords() == 1);
1906 assert(reader.getNumRecords() == 7);
1907
1908 reader.close();
1909
1910 ////////////////////////////////
1911 // Test Discarding multiple Alts without subsetting
1912 // and discard any without at least 3 alts.
1913 reader.open("testFiles/vcfFile.vcf", header);
1914 reader.addDiscardMinAltAlleleCount(3, NULL);
1915 reader.setDiscardRules(VcfFileReader::DISCARD_MULTIPLE_ALTS);
1916
1917 assert(header.getHeaderLine() == HEADER_LINE);
1918 assert(header.getNumSamples() == NUM_SAMPLES);
1919 assert(header.getSampleName(2) == SAMPLES[2]);
1920 assert(header.getSampleName(0) == SAMPLES[0]);
1921 assert(header.getSampleName(1) == SAMPLES[1]);
1922 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1923 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1924 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
1925
1926 // Read the records.
1927 assert(reader.readRecord(record));
1928 assert(record.allPhased() == false);
1929 assert(record.allUnphased() == false);
1930 assert(record.hasAllGenotypeAlleles() == true);
1931 sampleInfo = &(record.getGenotypeInfo());
1932 assert(sampleInfo->getNumSamples() == 3);
1933 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
1934 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
1935 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
1936 assert(sampleInfo->isPhased(0) == true);
1937 assert(sampleInfo->isPhased(1) == true);
1938 assert(sampleInfo->isPhased(2) == false);
1939 assert(sampleInfo->isUnphased(0) == false);
1940 assert(sampleInfo->isUnphased(1) == false);
1941 assert(sampleInfo->isUnphased(2) == true);
1942 assert(record.passedAllFilters() == true);
1943 assert(record.getNumAlts() == 1);
1944
1945 assert(reader.readRecord(record));
1946 assert(record.getGT(0,0) == 0);
1947 assert(record.getGT(1,1) == VcfGenotypeSample::MISSING_GT);
1948 assert(record.getGT(1,0) == 0);
1949 assert(record.getGT(0,1) == 1);
1950 assert(record.getGT(2,0) == 1);
1951 assert(record.getGT(2,1) == 1);
1952 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
1953 assert(record.getGT(3,0) == VcfGenotypeSample::INVALID_GT);
1954 assert(record.allPhased() == true);
1955 assert(record.allUnphased() == false);
1956 assert(record.hasAllGenotypeAlleles() == false);
1957 sampleInfo = &(record.getGenotypeInfo());
1958 assert(sampleInfo->getNumSamples() == 3);
1959 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
1960 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
1961 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
1962 assert(sampleInfo->isPhased(0) == true);
1963 assert(sampleInfo->isPhased(1) == true);
1964 assert(sampleInfo->isPhased(2) == true);
1965 assert(sampleInfo->isUnphased(0) == false);
1966 assert(sampleInfo->isUnphased(1) == false);
1967 assert(sampleInfo->isUnphased(2) == false);
1968 assert(record.passedAllFilters() == true);
1969 assert(record.getNumAlts() == 1);
1970
1971 assert(reader.readRecord(record) == false);
1972
1973 assert(reader.getNumKeptRecords() == 2);
1974 assert(reader.getNumRecords() == 7);
1975
1976 reader.close();
1977
1978 ////////////////////////////////
1979 // Test Discarding multiple Alts without subsetting
1980 // and discard any without at least 3 alts and only samples 1 & 2.
1981 reader.open("testFiles/vcfFile.vcf", header);
1982 VcfSubsetSamples minAltAlleleSubset;
1983 minAltAlleleSubset.init(header, true);
1984 minAltAlleleSubset.addExcludeSample("NA00002");
1985 reader.addDiscardMinAltAlleleCount(3, &minAltAlleleSubset);
1986 reader.setDiscardRules(VcfFileReader::DISCARD_MULTIPLE_ALTS);
1987
1988 assert(header.getHeaderLine() == HEADER_LINE);
1989 assert(header.getNumSamples() == 3);
1990 assert(header.getSampleName(0) == SAMPLES[0]);
1991 assert(header.getSampleName(2) == SAMPLES[2]);
1992 assert(header.getSampleName(1) == SAMPLES[1]);
1993 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
1994 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
1995 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
1996
1997 // Read the records.
1998 assert(reader.readRecord(record));
1999 assert(record.getGT(0,0) == 0);
2000 assert(record.getGT(1,1) == VcfGenotypeSample::MISSING_GT);
2001 assert(record.getGT(1,0) == 0);
2002 assert(record.getGT(0,1) == 1);
2003 assert(record.getGT(2,0) == 1);
2004 assert(record.getGT(2,1) == 1);
2005 assert(record.getGT(1,2) == VcfGenotypeSample::INVALID_GT);
2006 assert(record.getGT(3,0) == VcfGenotypeSample::INVALID_GT);
2007 assert(record.allPhased() == true);
2008 assert(record.allUnphased() == false);
2009 assert(record.hasAllGenotypeAlleles() == false);
2010 sampleInfo = &(record.getGenotypeInfo());
2011 assert(sampleInfo->getNumSamples() == 3);
2012 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
2013 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
2014 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
2015 assert(sampleInfo->isPhased(0) == true);
2016 assert(sampleInfo->isPhased(1) == true);
2017 assert(sampleInfo->isPhased(2) == true);
2018 assert(sampleInfo->isUnphased(0) == false);
2019 assert(sampleInfo->isUnphased(1) == false);
2020 assert(sampleInfo->isUnphased(2) == false);
2021 assert(record.passedAllFilters() == true);
2022 assert(record.getNumAlts() == 1);
2023
2024 assert(reader.readRecord(record) == false);
2025
2026 assert(reader.getNumKeptRecords() == 1);
2027 assert(reader.getNumRecords() == 7);
2028
2029 reader.close();
2030
2031
2032 ////////////////////////////////
2033 // Test Discarding minor allele count < 1 without subsetting.
2034 reader.rmDiscardMinAltAlleleCount();
2035 reader.setDiscardRules(0);
2036 reader.addDiscardMinMinorAlleleCount(1, NULL);
2037
2038 reader.open("testFiles/vcfFile.vcf", header);
2039
2040 assert(header.getHeaderLine() == HEADER_LINE);
2041 assert(header.getNumSamples() == NUM_SAMPLES);
2042 assert(header.getSampleName(2) == SAMPLES[2]);
2043 assert(header.getSampleName(0) == SAMPLES[0]);
2044 assert(header.getSampleName(1) == SAMPLES[1]);
2045 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
2046 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
2047 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
2048
2049 // Read the records.
2050 assert(reader.readRecord(record));
2051 assert(record.allPhased() == false);
2052 assert(record.allUnphased() == false);
2053 assert(record.hasAllGenotypeAlleles() == true);
2054 sampleInfo = &(record.getGenotypeInfo());
2055 assert(sampleInfo->getNumSamples() == 3);
2056 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2057 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
2058 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
2059 assert(sampleInfo->isPhased(0) == true);
2060 assert(sampleInfo->isPhased(1) == true);
2061 assert(sampleInfo->isPhased(2) == false);
2062 assert(sampleInfo->isUnphased(0) == false);
2063 assert(sampleInfo->isUnphased(1) == false);
2064 assert(sampleInfo->isUnphased(2) == true);
2065 assert(record.passedAllFilters() == true);
2066 assert(record.getNumAlts() == 1);
2067
2068 assert(reader.readRecord(record));
2069 assert(record.allPhased() == false);
2070 assert(record.allUnphased() == false);
2071 assert(record.hasAllGenotypeAlleles() == true);
2072 sampleInfo = &(record.getGenotypeInfo());
2073 assert(sampleInfo->getNumSamples() == 3);
2074 assert(*(sampleInfo->getString("GT", 0)) == "0/0");
2075 assert(*(sampleInfo->getString("GT", 1)) == "0|1");
2076 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
2077 assert(sampleInfo->isPhased(0) == false);
2078 assert(sampleInfo->isPhased(1) == true);
2079 assert(sampleInfo->isPhased(2) == false);
2080 assert(sampleInfo->isUnphased(0) == true);
2081 assert(sampleInfo->isUnphased(1) == false);
2082 assert(sampleInfo->isUnphased(2) == true);
2083 assert(record.passedAllFilters() == false);
2084 assert(record.getNumAlts() == 1);
2085
2086 assert(reader.readRecord(record));
2087 assert(record.allPhased() == false);
2088 assert(record.allUnphased() == false);
2089 assert(record.hasAllGenotypeAlleles() == true);
2090 sampleInfo = &(record.getGenotypeInfo());
2091 assert(sampleInfo->getNumSamples() == 3);
2092 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2093 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
2094 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
2095 assert(sampleInfo->isPhased(0) == true);
2096 assert(sampleInfo->isPhased(1) == true);
2097 assert(sampleInfo->isPhased(2) == false);
2098 assert(sampleInfo->isUnphased(0) == false);
2099 assert(sampleInfo->isUnphased(1) == false);
2100 assert(sampleInfo->isUnphased(2) == true);
2101 assert(record.passedAllFilters() == true);
2102 assert(record.getNumAlts() == 0);
2103
2104 assert(reader.readRecord(record));
2105 assert(record.allPhased() == false);
2106 assert(record.allUnphased() == true);
2107 assert(record.hasAllGenotypeAlleles() == true);
2108 sampleInfo = &(record.getGenotypeInfo());
2109 assert(sampleInfo->getNumSamples() == 3);
2110 assert(*(sampleInfo->getString("GT", 0)) == "0/1");
2111 assert(*(sampleInfo->getString("GT", 1)) == "0/2");
2112 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
2113 assert(sampleInfo->isPhased(0) == false);
2114 assert(sampleInfo->isPhased(1) == false);
2115 assert(sampleInfo->isPhased(2) == false);
2116 assert(sampleInfo->isUnphased(0) == true);
2117 assert(sampleInfo->isUnphased(1) == true);
2118 assert(sampleInfo->isUnphased(2) == true);
2119 assert(record.passedAllFilters() == true);
2120 assert(record.getNumAlts() == 2);
2121
2122 assert(reader.readRecord(record));
2123 assert(record.allPhased() == true);
2124 assert(record.allUnphased() == false);
2125 assert(record.hasAllGenotypeAlleles() == false);
2126 sampleInfo = &(record.getGenotypeInfo());
2127 assert(sampleInfo->getNumSamples() == 3);
2128 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
2129 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
2130 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
2131 assert(sampleInfo->isPhased(0) == true);
2132 assert(sampleInfo->isPhased(1) == true);
2133 assert(sampleInfo->isPhased(2) == true);
2134 assert(sampleInfo->isUnphased(0) == false);
2135 assert(sampleInfo->isUnphased(1) == false);
2136 assert(sampleInfo->isUnphased(2) == false);
2137 assert(record.passedAllFilters() == true);
2138 assert(record.getNumAlts() == 1);
2139
2140 assert(reader.readRecord(record) == false);
2141
2142 assert(reader.getNumKeptRecords() == 5);
2143 assert(reader.getNumRecords() == 7);
2144
2145 reader.close();
2146
2147 ////////////////////////////////
2148 // Test Discarding minor allele count < 2 without subsetting.
2149 reader.addDiscardMinMinorAlleleCount(2, NULL);
2150
2151 reader.open("testFiles/vcfFile.vcf", header);
2152
2153 assert(header.getHeaderLine() == HEADER_LINE);
2154 assert(header.getNumSamples() == NUM_SAMPLES);
2155 assert(header.getSampleName(2) == SAMPLES[2]);
2156 assert(header.getSampleName(0) == SAMPLES[0]);
2157 assert(header.getSampleName(1) == SAMPLES[1]);
2158 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
2159 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
2160 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
2161
2162 // Read the records.
2163 assert(reader.readRecord(record));
2164 assert(record.allPhased() == false);
2165 assert(record.allUnphased() == false);
2166 assert(record.hasAllGenotypeAlleles() == true);
2167 sampleInfo = &(record.getGenotypeInfo());
2168 assert(sampleInfo->getNumSamples() == 3);
2169 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2170 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
2171 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
2172 assert(sampleInfo->isPhased(0) == true);
2173 assert(sampleInfo->isPhased(1) == true);
2174 assert(sampleInfo->isPhased(2) == false);
2175 assert(sampleInfo->isUnphased(0) == false);
2176 assert(sampleInfo->isUnphased(1) == false);
2177 assert(sampleInfo->isUnphased(2) == true);
2178 assert(record.passedAllFilters() == true);
2179 assert(record.getNumAlts() == 1);
2180
2181 assert(reader.readRecord(record));
2182 assert(record.allPhased() == false);
2183 assert(record.allUnphased() == false);
2184 assert(record.hasAllGenotypeAlleles() == true);
2185 sampleInfo = &(record.getGenotypeInfo());
2186 assert(sampleInfo->getNumSamples() == 3);
2187 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2188 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
2189 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
2190 assert(sampleInfo->isPhased(0) == true);
2191 assert(sampleInfo->isPhased(1) == true);
2192 assert(sampleInfo->isPhased(2) == false);
2193 assert(sampleInfo->isUnphased(0) == false);
2194 assert(sampleInfo->isUnphased(1) == false);
2195 assert(sampleInfo->isUnphased(2) == true);
2196 assert(record.passedAllFilters() == true);
2197 assert(record.getNumAlts() == 0);
2198
2199 assert(reader.readRecord(record));
2200 assert(record.allPhased() == true);
2201 assert(record.allUnphased() == false);
2202 assert(record.hasAllGenotypeAlleles() == false);
2203 sampleInfo = &(record.getGenotypeInfo());
2204 assert(sampleInfo->getNumSamples() == 3);
2205 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
2206 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
2207 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
2208 assert(sampleInfo->isPhased(0) == true);
2209 assert(sampleInfo->isPhased(1) == true);
2210 assert(sampleInfo->isPhased(2) == true);
2211 assert(sampleInfo->isUnphased(0) == false);
2212 assert(sampleInfo->isUnphased(1) == false);
2213 assert(sampleInfo->isUnphased(2) == false);
2214 assert(record.passedAllFilters() == true);
2215 assert(record.getNumAlts() == 1);
2216
2217 assert(reader.readRecord(record) == false);
2218
2219 assert(reader.getNumKeptRecords() == 3);
2220 assert(reader.getNumRecords() == 7);
2221
2222 reader.close();
2223
2224
2225 ////////////////////////////////
2226 // Test Discarding minor allele count < 1 with subsetting.
2227 VcfSubsetSamples minMinorAlleleSubset;
2228 minMinorAlleleSubset.init(header, true);
2229 minMinorAlleleSubset.addExcludeSample("NA00002");
2230 reader.addDiscardMinMinorAlleleCount(1, &minMinorAlleleSubset);
2231
2232 reader.open("testFiles/vcfFile.vcf", header);
2233
2234 assert(header.getHeaderLine() == HEADER_LINE);
2235 assert(header.getNumSamples() == NUM_SAMPLES);
2236 assert(header.getSampleName(2) == SAMPLES[2]);
2237 assert(header.getSampleName(0) == SAMPLES[0]);
2238 assert(header.getSampleName(1) == SAMPLES[1]);
2239 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
2240 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
2241 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
2242
2243 // Read the records.
2244 assert(reader.readRecord(record));
2245 assert(record.allPhased() == false);
2246 assert(record.allUnphased() == false);
2247 assert(record.hasAllGenotypeAlleles() == true);
2248 sampleInfo = &(record.getGenotypeInfo());
2249 assert(sampleInfo->getNumSamples() == 3);
2250 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2251 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
2252 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
2253 assert(sampleInfo->isPhased(0) == true);
2254 assert(sampleInfo->isPhased(1) == true);
2255 assert(sampleInfo->isPhased(2) == false);
2256 assert(sampleInfo->isUnphased(0) == false);
2257 assert(sampleInfo->isUnphased(1) == false);
2258 assert(sampleInfo->isUnphased(2) == true);
2259 assert(record.passedAllFilters() == true);
2260 assert(record.getNumAlts() == 1);
2261
2262 assert(reader.readRecord(record));
2263 assert(record.allPhased() == false);
2264 assert(record.allUnphased() == false);
2265 assert(record.hasAllGenotypeAlleles() == true);
2266 sampleInfo = &(record.getGenotypeInfo());
2267 assert(sampleInfo->getNumSamples() == 3);
2268 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2269 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
2270 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
2271 assert(sampleInfo->isPhased(0) == true);
2272 assert(sampleInfo->isPhased(1) == true);
2273 assert(sampleInfo->isPhased(2) == false);
2274 assert(sampleInfo->isUnphased(0) == false);
2275 assert(sampleInfo->isUnphased(1) == false);
2276 assert(sampleInfo->isUnphased(2) == true);
2277 assert(record.passedAllFilters() == true);
2278 assert(record.getNumAlts() == 0);
2279
2280 assert(reader.readRecord(record));
2281 assert(record.allPhased() == true);
2282 assert(record.allUnphased() == false);
2283 assert(record.hasAllGenotypeAlleles() == false);
2284 sampleInfo = &(record.getGenotypeInfo());
2285 assert(sampleInfo->getNumSamples() == 3);
2286 assert(*(sampleInfo->getString("GT", 0)) == "0|1");
2287 assert(*(sampleInfo->getString("GT", 1)) == "0|.");
2288 assert(*(sampleInfo->getString("GT", 2)) == "1|1");
2289 assert(sampleInfo->isPhased(0) == true);
2290 assert(sampleInfo->isPhased(1) == true);
2291 assert(sampleInfo->isPhased(2) == true);
2292 assert(sampleInfo->isUnphased(0) == false);
2293 assert(sampleInfo->isUnphased(1) == false);
2294 assert(sampleInfo->isUnphased(2) == false);
2295 assert(record.passedAllFilters() == true);
2296 assert(record.getNumAlts() == 1);
2297
2298 assert(reader.readRecord(record) == false);
2299
2300 assert(reader.getNumKeptRecords() == 3);
2301 assert(reader.getNumRecords() == 7);
2302
2303 reader.close();
2304
2305 ////////////////////////////////
2306 // Test Discarding minor allele count < 2 without subsetting.
2307 reader.addDiscardMinMinorAlleleCount(2, &minMinorAlleleSubset);
2308
2309 reader.open("testFiles/vcfFile.vcf", header);
2310
2311 assert(header.getHeaderLine() == HEADER_LINE);
2312 assert(header.getNumSamples() == NUM_SAMPLES);
2313 assert(header.getSampleName(2) == SAMPLES[2]);
2314 assert(header.getSampleName(0) == SAMPLES[0]);
2315 assert(header.getSampleName(1) == SAMPLES[1]);
2316 assert(header.getSampleIndex(SAMPLES[1].c_str()) == 1);
2317 assert(header.getSampleIndex(SAMPLES[0].c_str()) == 0);
2318 assert(header.getSampleIndex(SAMPLES[2].c_str()) == 2);
2319
2320 // Read the records.
2321 assert(reader.readRecord(record));
2322 assert(record.allPhased() == false);
2323 assert(record.allUnphased() == false);
2324 assert(record.hasAllGenotypeAlleles() == true);
2325 sampleInfo = &(record.getGenotypeInfo());
2326 assert(sampleInfo->getNumSamples() == 3);
2327 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2328 assert(*(sampleInfo->getString("GT", 1)) == "1|0");
2329 assert(*(sampleInfo->getString("GT", 2)) == "1/1");
2330 assert(sampleInfo->isPhased(0) == true);
2331 assert(sampleInfo->isPhased(1) == true);
2332 assert(sampleInfo->isPhased(2) == false);
2333 assert(sampleInfo->isUnphased(0) == false);
2334 assert(sampleInfo->isUnphased(1) == false);
2335 assert(sampleInfo->isUnphased(2) == true);
2336 assert(record.passedAllFilters() == true);
2337 assert(record.getNumAlts() == 1);
2338
2339 assert(reader.readRecord(record));
2340 assert(record.allPhased() == false);
2341 assert(record.allUnphased() == false);
2342 assert(record.hasAllGenotypeAlleles() == true);
2343 sampleInfo = &(record.getGenotypeInfo());
2344 assert(sampleInfo->getNumSamples() == 3);
2345 assert(*(sampleInfo->getString("GT", 0)) == "0|0");
2346 assert(*(sampleInfo->getString("GT", 1)) == "0|0");
2347 assert(*(sampleInfo->getString("GT", 2)) == "0/0");
2348 assert(sampleInfo->isPhased(0) == true);
2349 assert(sampleInfo->isPhased(1) == true);
2350 assert(sampleInfo->isPhased(2) == false);
2351 assert(sampleInfo->isUnphased(0) == false);
2352 assert(sampleInfo->isUnphased(1) == false);
2353 assert(sampleInfo->isUnphased(2) == true);
2354 assert(record.passedAllFilters() == true);
2355 assert(record.getNumAlts() == 0);
2356
2357 assert(reader.readRecord(record) == false);
2358
2359 assert(reader.getNumKeptRecords() == 2);
2360 assert(reader.getNumRecords() == 7);
2361
2362 reader.close();
2363 }
2364
2365
testVcfWriteFile()2366 void testVcfWriteFile()
2367 {
2368 VcfFileWriter writer;
2369 VcfFileReader reader;
2370 VcfHeader header;
2371 VcfRecord record;
2372
2373 assert(reader.open("testFiles/vcfFile.vcf", header) == true);
2374 assert(writer.open("results/vcfFile.vcf", header, InputFile::DEFAULT)
2375 == true);
2376 while(reader.readRecord(record))
2377 {
2378 // Write the record.
2379 assert(writer.writeRecord(record));
2380 }
2381
2382 assert(reader.open("testFiles/vcfFile.vcf", header) == true);
2383 assert(writer.open("results/vcfFileNoInfo.vcf", header,
2384 InputFile::DEFAULT) == true);
2385 while(reader.readRecord(record))
2386 {
2387 // Test Clearing the INFO field.
2388 record.getInfo().clear();
2389 // Write the record.
2390 assert(writer.writeRecord(record));
2391 }
2392
2393 assert(reader.open("testFiles/vcfFile.vcf", header) == true);
2394 assert(writer.open("results/vcfFileNoInfoBGZF.vcf", header) == true);
2395 while(reader.readRecord(record))
2396 {
2397 // Test Clearing the INFO field.
2398 record.getInfo().clear();
2399 // Write the record.
2400 assert(writer.writeRecord(record));
2401 }
2402
2403 assert(reader.open("testFiles/vcfFile.vcf", header) == true);
2404 VcfRecordGenotype::addStoreField("GT");
2405 assert(writer.open("results/vcfFileNoInfoKeepGT.vcf", header,
2406 InputFile::DEFAULT) == true);
2407 while(reader.readRecord(record))
2408 {
2409 // Test Clearing the INFO field.
2410 record.getInfo().clear();
2411 // Write the record.
2412 assert(writer.writeRecord(record));
2413 }
2414
2415 assert(reader.open("testFiles/vcfFile.vcf", header) == true);
2416 // Undo the storing of GT.
2417 VcfRecordGenotype::storeAllFields();
2418 VcfRecordGenotype::addStoreField("GQ");
2419 VcfRecordGenotype::addStoreField("XX");
2420 VcfRecordGenotype::addStoreField("HQ");
2421 assert(writer.open("results/vcfFileNoInfoKeepGQHQ.vcf", header,
2422 InputFile::DEFAULT) == true);
2423
2424 assert(strcmp(writer.getFileName(), "results/vcfFileNoInfoKeepGQHQ.vcf") == 0);
2425
2426 while(reader.readRecord(record))
2427 {
2428 // Test Clearing the INFO field.
2429 record.getInfo().clear();
2430 // Write the record.
2431 assert(writer.writeRecord(record));
2432 }
2433 }
2434
2435
testVcfReadSection()2436 void testVcfReadSection()
2437 {
2438 // Test open for read via the constructor with return.
2439 VcfFileReader reader;
2440 VcfHeader header;
2441 VcfRecord record;
2442
2443 const Tabix* tabixPtr = NULL;
2444
2445 tabixPtr = reader.getVcfIndex();
2446 assert(tabixPtr == NULL);
2447
2448 ////////////////////////////////
2449 // Test the read section logic.
2450 reader.open("testFiles/testTabix.vcf.bgzf", header);
2451 tabixPtr = reader.getVcfIndex();
2452 assert(tabixPtr == NULL);
2453 reader.readVcfIndex();
2454
2455 //////////////////
2456 // Test index accessors.
2457 tabixPtr = reader.getVcfIndex();
2458 assert(tabixPtr != NULL);
2459 assert(tabixPtr->getFormat() == Tabix::FORMAT_VCF);
2460 assert(tabixPtr->getNumRefs() == 2);
2461 assert(strcmp(tabixPtr->getRefName(0), "1") == 0);
2462 assert(strcmp(tabixPtr->getRefName(1), "3") == 0);
2463 bool caughtException = false;
2464 try
2465 {
2466 tabixPtr->getRefName(2);
2467 }
2468 catch(std::exception& e)
2469 {
2470 caughtException = true;
2471 }
2472 assert(caughtException);
2473 caughtException = false;
2474 try
2475 {
2476 tabixPtr->getRefName(-1);
2477 }
2478 catch(std::exception& e)
2479 {
2480 caughtException = true;
2481 }
2482 assert(caughtException == true);
2483 caughtException = false;
2484
2485 reader.set1BasedReadSection("10", 16384, 32767);
2486 assert(reader.readRecord(record) == false);
2487
2488 reader.set1BasedReadSection("1", 16384, 32767);
2489 assert(reader.readRecord(record) == false);
2490
2491 reader.set1BasedReadSection("1", 16384, 32768);
2492 assert(reader.readRecord(record) == false);
2493
2494 reader.set1BasedReadSection("1", 16384, 32769);
2495 assert(reader.readRecord(record) == true);
2496 assert(record.get1BasedPosition() == 32768);
2497 assert(reader.readRecord(record) == false);
2498
2499 reader.set1BasedReadSection("1", 32768, 32769);
2500 assert(reader.readRecord(record) == true);
2501 assert(record.get1BasedPosition() == 32768);
2502 assert(reader.readRecord(record) == false);
2503
2504 reader.set1BasedReadSection("1", 32769, 32767);
2505 assert(reader.readRecord(record) == false);
2506 assert(reader.readRecord(record) == false);
2507
2508 reader.set1BasedReadSection("1", 32769, 65537);
2509 assert(reader.readRecord(record) == false);
2510 assert(reader.readRecord(record) == false);
2511 assert(reader.readRecord(record) == false);
2512
2513 reader.set1BasedReadSection("1", 32769, 65537);
2514 assert(reader.readRecord(record) == false);
2515
2516 assert(reader.set1BasedReadSection("1", 32768, 65538));
2517 assert(reader.readRecord(record) == true);
2518 assert(record.get1BasedPosition() == 32768);
2519 assert(reader.readRecord(record) == true);
2520 assert(record.get1BasedPosition() == 65537);
2521 assert(reader.readRecord(record) == false);
2522 assert(reader.readRecord(record) == false);
2523
2524 assert(reader.set1BasedReadSection("1", 32769, 65538));
2525 assert(reader.readRecord(record) == true);
2526 assert(record.get1BasedPosition() == 65537);
2527 assert(reader.readRecord(record) == false);
2528 assert(reader.readRecord(record) == false);
2529
2530 assert(reader.set1BasedReadSection("1", 0, 65538));
2531 assert(reader.readRecord(record) == true);
2532 assert(record.get1BasedPosition() == 32768);
2533 assert(reader.readRecord(record) == true);
2534 assert(record.get1BasedPosition() == 65537);
2535 assert(reader.readRecord(record) == false);
2536 assert(reader.readRecord(record) == false);
2537
2538
2539 ////////////////////////////////////////
2540 // Test selecting whole chroms
2541
2542 assert(reader.setReadSection("10"));
2543 assert(reader.readRecord(record) == false);
2544
2545 assert(reader.setReadSection("1"));
2546 assert(reader.readRecord(record) == true);
2547 assert(record.get1BasedPosition() == 32768);
2548 assert(reader.readRecord(record) == true);
2549 assert(record.get1BasedPosition() == 65537);
2550 assert(reader.readRecord(record) == false);
2551 assert(reader.readRecord(record) == false);
2552
2553 assert(reader.setReadSection("3"));
2554 assert(reader.readRecord(record) == true);
2555 assert(record.get1BasedPosition() == 32768);
2556 assert(reader.readRecord(record) == true);
2557 assert(record.get1BasedPosition() == 32780);
2558 assert(reader.readRecord(record) == false);
2559 assert(reader.readRecord(record) == false);
2560
2561 ////////////////////////////////////////
2562 // Test selecting sections with deletions
2563 reader.set1BasedReadSection("3", 16384, 32767);
2564 assert(reader.readRecord(record) == false);
2565
2566 reader.set1BasedReadSection("3", 16384, 32768);
2567 assert(reader.readRecord(record) == false);
2568
2569 reader.set1BasedReadSection("3", 16384, 32769);
2570 assert(reader.readRecord(record) == true);
2571 assert(record.get1BasedPosition() == 32768);
2572 assert(reader.readRecord(record) == false);
2573
2574 reader.set1BasedReadSection("3", 32768, 32769);
2575 assert(reader.readRecord(record) == true);
2576 assert(record.get1BasedPosition() == 32768);
2577 assert(reader.readRecord(record) == false);
2578
2579 reader.set1BasedReadSection("3", 32769, 32767);
2580 assert(reader.readRecord(record) == false);
2581 assert(reader.readRecord(record) == false);
2582
2583 reader.set1BasedReadSection("3", 32769, 65537);
2584 assert(reader.readRecord(record) == true);
2585 assert(record.get1BasedPosition() == 32780);
2586 assert(reader.readRecord(record) == false);
2587 assert(reader.readRecord(record) == false);
2588
2589 reader.set1BasedReadSection("3", 32769, 65537);
2590 assert(reader.readRecord(record) == true);
2591 assert(record.get1BasedPosition() == 32780);
2592 assert(reader.readRecord(record) == false);
2593
2594 reader.set1BasedReadSection("3", 32770, 65537);
2595 assert(reader.readRecord(record) == true);
2596 assert(record.get1BasedPosition() == 32780);
2597 assert(reader.readRecord(record) == false);
2598
2599 reader.set1BasedReadSection("3", 32771, 65537);
2600 assert(reader.readRecord(record) == true);
2601 assert(record.get1BasedPosition() == 32780);
2602 assert(reader.readRecord(record) == false);
2603
2604 reader.set1BasedReadSection("3", 32780, 65537);
2605 assert(reader.readRecord(record) == true);
2606 assert(record.get1BasedPosition() == 32780);
2607 assert(reader.readRecord(record) == false);
2608
2609 reader.set1BasedReadSection("3", 32781, 65537);
2610 assert(reader.readRecord(record) == false);
2611
2612 assert(reader.set1BasedReadSection("3", 32768, 65538));
2613 assert(reader.readRecord(record) == true);
2614 assert(record.get1BasedPosition() == 32768);
2615 assert(reader.readRecord(record) == true);
2616 assert(record.get1BasedPosition() == 32780);
2617 assert(reader.readRecord(record) == false);
2618 assert(reader.readRecord(record) == false);
2619
2620 assert(reader.set1BasedReadSection("3", 32769, 65538));
2621 assert(reader.readRecord(record) == true);
2622 assert(record.get1BasedPosition() == 32780);
2623 assert(reader.readRecord(record) == false);
2624 assert(reader.readRecord(record) == false);
2625
2626 assert(reader.set1BasedReadSection("3", 32770, 65538));
2627 assert(reader.readRecord(record) == true);
2628 assert(record.get1BasedPosition() == 32780);
2629 assert(reader.readRecord(record) == false);
2630 assert(reader.readRecord(record) == false);
2631
2632 assert(reader.set1BasedReadSection("3", 32771, 65538));
2633 assert(reader.readRecord(record) == true);
2634 assert(record.get1BasedPosition() == 32780);
2635 assert(reader.readRecord(record) == false);
2636 assert(reader.readRecord(record) == false);
2637
2638 assert(reader.set1BasedReadSection("3", 0, 65538));
2639 assert(reader.readRecord(record) == true);
2640 assert(record.get1BasedPosition() == 32768);
2641 assert(reader.readRecord(record) == true);
2642 assert(record.get1BasedPosition() == 32780);
2643 assert(reader.readRecord(record) == false);
2644 assert(reader.readRecord(record) == false);
2645
2646 ////////////////////////////////////////
2647 // Test selecting sections with deletions for overlapping
2648 reader.set1BasedReadSection("3", 16384, 32767, true);
2649 assert(reader.readRecord(record) == false);
2650
2651 reader.set1BasedReadSection("3", 16384, 32768, true);
2652 assert(reader.readRecord(record) == false);
2653
2654 reader.set1BasedReadSection("3", 16384, 32769, true);
2655 assert(reader.readRecord(record) == true);
2656 assert(record.get1BasedPosition() == 32768);
2657 assert(reader.readRecord(record) == false);
2658
2659 reader.set1BasedReadSection("3", 32768, 32769, true);
2660 assert(reader.readRecord(record) == true);
2661 assert(record.get1BasedPosition() == 32768);
2662 assert(reader.readRecord(record) == false);
2663
2664 reader.set1BasedReadSection("3", 32769, 32767, true);
2665 assert(reader.readRecord(record) == false);
2666 assert(reader.readRecord(record) == false);
2667
2668 reader.set1BasedReadSection("3", 32769, 65537, true);
2669 assert(reader.readRecord(record) == true);
2670 assert(record.get1BasedPosition() == 32768);
2671 assert(reader.readRecord(record) == true);
2672 assert(record.get1BasedPosition() == 32780);
2673 assert(reader.readRecord(record) == false);
2674 assert(reader.readRecord(record) == false);
2675
2676 reader.set1BasedReadSection("3", 32769, 65537, true);
2677 assert(reader.readRecord(record) == true);
2678 assert(record.get1BasedPosition() == 32768);
2679 assert(reader.readRecord(record) == true);
2680 assert(record.get1BasedPosition() == 32780);
2681 assert(reader.readRecord(record) == false);
2682
2683 reader.set1BasedReadSection("3", 32770, 65537, true);
2684 assert(reader.readRecord(record) == true);
2685 assert(record.get1BasedPosition() == 32768);
2686 assert(reader.readRecord(record) == true);
2687 assert(record.get1BasedPosition() == 32780);
2688 assert(reader.readRecord(record) == false);
2689
2690 reader.set1BasedReadSection("3", 32771, 65537, true);
2691 assert(reader.readRecord(record) == true);
2692 assert(record.get1BasedPosition() == 32780);
2693 assert(reader.readRecord(record) == false);
2694
2695 reader.set1BasedReadSection("3", 32780, 65537, true);
2696 assert(reader.readRecord(record) == true);
2697 assert(record.get1BasedPosition() == 32780);
2698 assert(reader.readRecord(record) == false);
2699
2700 reader.set1BasedReadSection("3", 32781, 65537, true);
2701 assert(reader.readRecord(record) == false);
2702
2703 assert(reader.set1BasedReadSection("3", 32768, 65538, true));
2704 assert(reader.readRecord(record) == true);
2705 assert(record.get1BasedPosition() == 32768);
2706 assert(reader.readRecord(record) == true);
2707 assert(record.get1BasedPosition() == 32780);
2708 assert(reader.readRecord(record) == false);
2709 assert(reader.readRecord(record) == false);
2710
2711 assert(reader.set1BasedReadSection("3", 32769, 65538, true));
2712 assert(reader.readRecord(record) == true);
2713 assert(record.get1BasedPosition() == 32768);
2714 assert(reader.readRecord(record) == true);
2715 assert(record.get1BasedPosition() == 32780);
2716 assert(reader.readRecord(record) == false);
2717 assert(reader.readRecord(record) == false);
2718
2719 assert(reader.set1BasedReadSection("3", 32770, 65538, true));
2720 assert(reader.readRecord(record) == true);
2721 assert(record.get1BasedPosition() == 32768);
2722 assert(reader.readRecord(record) == true);
2723 assert(record.get1BasedPosition() == 32780);
2724 assert(reader.readRecord(record) == false);
2725 assert(reader.readRecord(record) == false);
2726
2727 assert(reader.set1BasedReadSection("3", 32771, 65538, true));
2728 assert(reader.readRecord(record) == true);
2729 assert(record.get1BasedPosition() == 32780);
2730 assert(reader.readRecord(record) == false);
2731 assert(reader.readRecord(record) == false);
2732
2733 assert(reader.set1BasedReadSection("3", 0, 65538, true));
2734 assert(reader.readRecord(record) == true);
2735 assert(record.get1BasedPosition() == 32768);
2736 assert(reader.readRecord(record) == true);
2737 assert(record.get1BasedPosition() == 32780);
2738 assert(reader.readRecord(record) == false);
2739 assert(reader.readRecord(record) == false);
2740
2741 reader.close();
2742 }
2743
2744
testVcfReadSectionNoIndex()2745 void testVcfReadSectionNoIndex()
2746 {
2747 // Test open for read via the constructor with return.
2748 VcfFileReader reader;
2749 VcfHeader header;
2750 VcfRecord record;
2751 bool caughtException = false;
2752
2753 ////////////////////////////////
2754 // Test the read section logic.
2755 reader.open("testFiles/testTabix.vcf.bgzf", header);
2756
2757 reader.set1BasedReadSection("10", 16384, 32767);
2758 assert(reader.readRecord(record) == false);
2759
2760 // Can't call setReadSection after reading a record a second time with no index.
2761 try
2762 {
2763 caughtException = false;
2764 assert(reader.set1BasedReadSection("1", 16384, 32769));
2765 assert(reader.readRecord(record) == true);
2766 }
2767 catch (std::exception& e)
2768 {
2769 caughtException = true;
2770 }
2771 assert(caughtException == true);
2772
2773 // Reopen to begining to check chrom 1.
2774 reader.open("testFiles/testTabix.vcf.bgzf", header);
2775
2776 reader.set1BasedReadSection("1", 16384, 32768);
2777 assert(reader.readRecord(record) == false);
2778
2779 // Reopen to begining to check valid range.
2780 reader.open("testFiles/testTabix.vcf.bgzf", header);
2781 reader.set1BasedReadSection("1", 16384, 32769);
2782 assert(reader.readRecord(record) == true);
2783 assert(record.get1BasedPosition() == 32768);
2784 assert(reader.readRecord(record) == false);
2785
2786 // Reopen to begining to check another range.
2787 reader.open("testFiles/testTabix.vcf.bgzf", header);
2788 reader.set1BasedReadSection("1", 32769, 32767);
2789 assert(reader.readRecord(record) == false);
2790 assert(reader.readRecord(record) == false);
2791
2792 // Reopen to begining to check another range.
2793 reader.open("testFiles/testTabix.vcf.bgzf", header);
2794 reader.set1BasedReadSection("1", 32769, 65537);
2795 assert(reader.readRecord(record) == false);
2796
2797 // Reopen to begining to check another range.
2798 reader.open("testFiles/testTabix.vcf.bgzf", header);
2799 assert(reader.set1BasedReadSection("1", 32768, 65538));
2800 assert(reader.readRecord(record) == true);
2801 assert(record.get1BasedPosition() == 32768);
2802 assert(reader.readRecord(record) == true);
2803 assert(record.get1BasedPosition() == 65537);
2804 assert(reader.readRecord(record) == false);
2805 assert(reader.readRecord(record) == false);
2806
2807 // Reopen to begining to check another range.
2808 reader.open("testFiles/testTabix.vcf.bgzf", header);
2809 assert(reader.set1BasedReadSection("1", 32769, 65538));
2810 assert(reader.readRecord(record) == true);
2811 assert(record.get1BasedPosition() == 65537);
2812 assert(reader.readRecord(record) == false);
2813 assert(reader.readRecord(record) == false);
2814
2815 // Reopen to begining to check another range.
2816 reader.open("testFiles/testTabix.vcf.bgzf", header);
2817 assert(reader.set1BasedReadSection("1", 0, 65538));
2818 assert(reader.readRecord(record) == true);
2819 assert(record.get1BasedPosition() == 32768);
2820 assert(reader.readRecord(record) == true);
2821 assert(record.get1BasedPosition() == 65537);
2822 assert(reader.readRecord(record) == false);
2823 assert(reader.readRecord(record) == false);
2824
2825
2826 ////////////////////////////////////////
2827 // Test selecting whole chroms
2828 try
2829 {
2830 caughtException = false;
2831 assert(reader.setReadSection("1"));
2832 assert(reader.readRecord(record) == true);
2833 }
2834 catch (std::exception& e)
2835 {
2836 caughtException = true;
2837 }
2838 assert(caughtException);
2839
2840 // Reopen to begining to check another range.
2841 reader.open("testFiles/testTabix.vcf.bgzf", header);
2842 assert(reader.setReadSection("10"));
2843 assert(reader.readRecord(record) == false);
2844
2845 assert(strcmp(reader.getFileName(), "testFiles/testTabix.vcf.bgzf") == 0);
2846
2847 // Reopen to begining to check another range.
2848 reader.open("testFiles/testTabix.vcf.bgzf", header);
2849 assert(reader.setReadSection("1"));
2850 assert(reader.readRecord(record) == true);
2851 assert(record.get1BasedPosition() == 32768);
2852 assert(reader.readRecord(record) == true);
2853 assert(record.get1BasedPosition() == 65537);
2854 assert(reader.readRecord(record) == false);
2855 assert(reader.readRecord(record) == false);
2856
2857 // Reopen to begining to check another range.
2858 reader.open("testFiles/testTabix.vcf.bgzf", header);
2859 assert(reader.setReadSection("3"));
2860 assert(reader.readRecord(record) == true);
2861 assert(record.get1BasedPosition() == 32768);
2862 assert(reader.readRecord(record) == true);
2863 assert(record.get1BasedPosition() == 32780);
2864 assert(reader.readRecord(record) == false);
2865 assert(reader.readRecord(record) == false);
2866
2867 ////////////////////////////////////////
2868 // Test selecting sections with deletions
2869 // Reopen to begining to check another range.
2870 reader.open("testFiles/testTabix.vcf.bgzf", header);
2871 reader.set1BasedReadSection("3", 16384, 32767);
2872 assert(reader.readRecord(record) == false);
2873
2874 // Reopen to begining to check another range.
2875 reader.open("testFiles/testTabix.vcf.bgzf", header);
2876 reader.set1BasedReadSection("3", 16384, 32768);
2877 assert(reader.readRecord(record) == false);
2878
2879 // Reopen to begining to check another range.
2880 reader.open("testFiles/testTabix.vcf.bgzf", header);
2881 reader.set1BasedReadSection("3", 16384, 32769);
2882 assert(reader.readRecord(record) == true);
2883 assert(record.get1BasedPosition() == 32768);
2884 assert(reader.readRecord(record) == false);
2885
2886 // Reopen to begining to check another range.
2887 reader.open("testFiles/testTabix.vcf.bgzf", header);
2888 reader.set1BasedReadSection("3", 32768, 32769);
2889 assert(reader.readRecord(record) == true);
2890 assert(record.get1BasedPosition() == 32768);
2891 assert(reader.readRecord(record) == false);
2892
2893 ////////////////////////////////////////
2894 // Test selecting sections with deletions for overlapping
2895 // Reopen to begining to check another range.
2896 reader.open("testFiles/testTabix.vcf.bgzf", header);
2897 reader.set1BasedReadSection("3", 16384, 32767, true);
2898 assert(reader.readRecord(record) == false);
2899
2900 // Reopen to begining to check another range.
2901 reader.open("testFiles/testTabix.vcf.bgzf", header);
2902 reader.set1BasedReadSection("3", 16384, 32768, true);
2903 assert(reader.readRecord(record) == false);
2904
2905 // Reopen to begining to check another range.
2906 reader.open("testFiles/testTabix.vcf.bgzf", header);
2907 reader.set1BasedReadSection("3", 16384, 32769, true);
2908 assert(reader.readRecord(record) == true);
2909 assert(record.get1BasedPosition() == 32768);
2910 assert(reader.readRecord(record) == false);
2911
2912 // Reopen to begining to check another range.
2913 reader.open("testFiles/testTabix.vcf.bgzf", header);
2914 assert(reader.set1BasedReadSection("3", 32771, 65538, true));
2915 assert(reader.readRecord(record) == true);
2916 assert(record.get1BasedPosition() == 32780);
2917 assert(reader.readRecord(record) == false);
2918 assert(reader.readRecord(record) == false);
2919
2920 // Reopen to begining to check another range.
2921 reader.open("testFiles/testTabix.vcf.bgzf", header);
2922 assert(reader.set1BasedReadSection("3", 0, 65538, true));
2923 assert(reader.readRecord(record) == true);
2924 assert(record.get1BasedPosition() == 32768);
2925 assert(reader.readRecord(record) == true);
2926 assert(record.get1BasedPosition() == 32780);
2927 assert(reader.readRecord(record) == false);
2928 assert(reader.readRecord(record) == false);
2929
2930 reader.close();
2931 }
2932
2933
testVcfReadSectionBadIndex()2934 void testVcfReadSectionBadIndex()
2935 {
2936 // Test open for read via the constructor with return.
2937 VcfFileReader reader;
2938 VcfFileWriter writer;
2939 VcfHeader header;
2940 VcfRecord record;
2941
2942 ////////////////////////////////
2943 // Test the read section logic.
2944 reader.open("testFiles/testTabixBadIndex.vcf.bgzf", header);
2945 bool hitError = false;
2946 try
2947 {
2948 reader.readVcfIndex();
2949 }
2950 catch(std::exception& e)
2951 {
2952 hitError = true;
2953 std::string expectedError = "FAIL_PARSE: ERROR: Tabix file not in VCF format: testFiles/testTabixBadIndex.vcf.bgzf.tbi\nFAIL_IO: Failed to read the vcf Index file: testFiles/testTabixBadIndex.bgzf.tbi";
2954 assert(expectedError == e.what());
2955 }
2956 assert(hitError);
2957 }
2958