1 /* test/test-vcf-api.c -- VCF test harness.
2
3 Copyright (C) 2013, 2014, 2017-2021 Genome Research Ltd.
4
5 Author: Petr Danecek <pd3@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE. */
24
25 #include <config.h>
26
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30
31 #include "../htslib/hts.h"
32 #include "../htslib/vcf.h"
33 #include "../htslib/kstring.h"
34 #include "../htslib/kseq.h"
35
error(const char * format,...)36 void error(const char *format, ...)
37 {
38 va_list ap;
39 va_start(ap, format);
40 vfprintf(stderr, format, ap);
41 va_end(ap);
42 if (strrchr(format, '\n') == NULL) fputc('\n', stderr);
43 exit(-1);
44 }
45
46 #define STRINGIFY(x) #x
47 #define check0(x) ((x) == 0 ? (void) 0 : error("Failed: %s", STRINGIFY(x)))
48
check_alleles(bcf1_t * rec,const char ** alleles,int num)49 static int check_alleles(bcf1_t *rec, const char **alleles, int num) {
50 int i;
51 if (rec->n_allele != num) {
52 fprintf(stderr, "Wrong number of alleles - expected %d, got %d\n",
53 num, rec->n_allele);
54 return -1;
55 }
56 if (bcf_unpack(rec, BCF_UN_STR) != 0)
57 return -1;
58 for (i = 0; i < num; i++) {
59 if (0 != strcmp(alleles[i], rec->d.allele[i])) {
60 fprintf(stderr,
61 "Mismatch for allele %d : expected '%s' got '%s'\n",
62 i, alleles[i], rec->d.allele[i]);
63 return -1;
64 }
65 }
66 return 0;
67 }
68
test_update_alleles(bcf_hdr_t * hdr,bcf1_t * rec)69 static void test_update_alleles(bcf_hdr_t *hdr, bcf1_t *rec)
70 {
71 // Exercise bcf_update_alleles() a bit
72 const char *alleles1[2] = { "G", "A" };
73 const char *alleles2[3] = { "C", "TGCA", "CATG" };
74 #define rep10(x) x x x x x x x x x x
75 const char *alleles3[3] = { rep10("ATTCTAGATC"), "TGCA",
76 rep10("CTATTATCTCTAATGACATG") };
77 #undef rep10
78 const char *alleles4[3] = { alleles3[2], NULL, alleles3[0] };
79 // Add some alleles
80 check0(bcf_update_alleles(hdr, rec, alleles1, 2));
81 check0(check_alleles(rec, alleles1, 2));
82 // Erase them
83 check0(bcf_update_alleles(hdr, rec, NULL, 0));
84 check0(check_alleles(rec, NULL, 0));
85 // Expand to three
86 check0(bcf_update_alleles(hdr, rec, alleles2, 3));
87 check0(check_alleles(rec, alleles2, 3));
88 // Now try some bigger ones (should force a realloc)
89 check0(bcf_update_alleles(hdr, rec, alleles3, 3));
90 check0(check_alleles(rec, alleles3, 3));
91 // Ensure it works even if one of the alleles points into the
92 // existing structure
93 alleles4[1] = rec->d.allele[1];
94 check0(bcf_update_alleles(hdr, rec, alleles4, 3));
95 alleles4[1] = alleles3[1]; // Will have been clobbered by the update
96 check0(check_alleles(rec, alleles4, 3));
97 // Ensure it works when the alleles point into the existing data,
98 // rec->d.allele is used to define the input array and the
99 // order of the entries is changed. The result of this should
100 // be the same as alleles2.
101 char *tmp = rec->d.allele[0] + strlen(rec->d.allele[0]) - 4;
102 rec->d.allele[0] = rec->d.allele[2] + strlen(rec->d.allele[2]) - 1;
103 rec->d.allele[2] = tmp;
104 check0(bcf_update_alleles(hdr, rec, (const char **) rec->d.allele, 3));
105 check0(check_alleles(rec, alleles2, 3));
106 }
107
write_bcf(char * fname)108 void write_bcf(char *fname)
109 {
110 // Init
111 htsFile *fp = hts_open(fname,"wb");
112 if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
113 bcf_hdr_t *hdr = bcf_hdr_init("w");
114 if (!hdr) error("bcf_hdr_init : %s", strerror(errno));
115 bcf1_t *rec = bcf_init1();
116 if (!rec) error("bcf_init1 : %s", strerror(errno));
117
118 // Check no-op on fresh bcf1_t
119 check0(bcf_update_alleles(hdr, rec, NULL, 0));
120
121 // Create VCF header
122 kstring_t str = {0,0,0};
123 check0(bcf_hdr_append(hdr, "##fileDate=20090805"));
124 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=UF,Number=1,Type=Integer,Description=\"Unused FORMAT\">"));
125 check0(bcf_hdr_append(hdr, "##INFO=<ID=UI,Number=1,Type=Integer,Description=\"Unused INFO\">"));
126 check0(bcf_hdr_append(hdr, "##FILTER=<ID=Flt,Description=\"Unused FILTER\">"));
127 check0(bcf_hdr_append(hdr, "##unused=<XX=AA,Description=\"Unused generic\">"));
128 check0(bcf_hdr_append(hdr, "##unused=unformatted text 1"));
129 check0(bcf_hdr_append(hdr, "##unused=unformatted text 2"));
130 check0(bcf_hdr_append(hdr, "##contig=<ID=Unused,length=1>"));
131 check0(bcf_hdr_append(hdr, "##source=myImputationProgramV3.1"));
132 check0(bcf_hdr_append(hdr, "##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta"));
133 check0(bcf_hdr_append(hdr, "##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"));
134 check0(bcf_hdr_append(hdr, "##phasing=partial"));
135 check0(bcf_hdr_append(hdr, "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"));
136 check0(bcf_hdr_append(hdr, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"));
137 check0(bcf_hdr_append(hdr, "##INFO=<ID=NEG,Number=.,Type=Integer,Description=\"Test -ve Numbers\">"));
138 check0(bcf_hdr_append(hdr, "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">"));
139 check0(bcf_hdr_append(hdr, "##INFO=<ID=AA,Number=1,Type=String,Description=\"Ancestral Allele\">"));
140 check0(bcf_hdr_append(hdr, "##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP membership, build 129\">"));
141 check0(bcf_hdr_append(hdr, "##INFO=<ID=H2,Number=0,Type=Flag,Description=\"HapMap2 membership\">"));
142 check0(bcf_hdr_append(hdr, "##FILTER=<ID=q10,Description=\"Quality below 10\">"));
143 check0(bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\"Less than half of samples have data\">"));
144 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">"));
145 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">"));
146 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">"));
147 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=HQ,Number=2,Type=Integer,Description=\"Haplotype Quality\">"));
148 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\"Test String 1\">"));
149
150 // Try a few header modifications
151 bcf_hdr_remove(hdr, BCF_HL_CTG, "Unused");
152 check0(bcf_hdr_append(hdr, "##contig=<ID=Unused,length=62435964>"));
153 bcf_hdr_remove(hdr, BCF_HL_FMT, "TS");
154 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\"Test String\">"));
155 bcf_hdr_remove(hdr, BCF_HL_INFO, "NEG");
156 check0(bcf_hdr_append(hdr, "##INFO=<ID=NEG,Number=.,Type=Integer,Description=\"Test Negative Numbers\">"));
157 bcf_hdr_remove(hdr, BCF_HL_FLT, "s50");
158 check0(bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\"Less than 50% of samples have data\">"));
159
160 check0(bcf_hdr_add_sample(hdr, "NA00001"));
161 check0(bcf_hdr_add_sample(hdr, "NA00002"));
162 check0(bcf_hdr_add_sample(hdr, "NA00003"));
163 check0(bcf_hdr_add_sample(hdr, NULL)); // to update internal structures
164 if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname);
165
166
167 // Add a record
168 // 20 14370 rs6054257 G A 29 PASS NS=3;DP=14;NEG=-127;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
169 // .. CHROM
170 rec->rid = bcf_hdr_name2id(hdr, "20");
171 // .. POS
172 rec->pos = 14369;
173 // .. ID
174 check0(bcf_update_id(hdr, rec, "rs6054257"));
175 // .. REF and ALT
176 test_update_alleles(hdr, rec);
177 const char *alleles[2] = { "G", "A" };
178 check0(bcf_update_alleles_str(hdr, rec, "G,A"));
179 check0(check_alleles(rec, alleles, 2));
180 // .. QUAL
181 rec->qual = 29;
182 // .. FILTER
183 int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
184 check0(bcf_update_filter(hdr, rec, &tmpi, 1));
185 // .. INFO
186 tmpi = 3;
187 check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1));
188 tmpi = 500;
189 check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
190 tmpi = 100000;
191 check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
192 tmpi = 14;
193 check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
194 tmpi = -127;
195 check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1));
196 float tmpf = 0.5;
197 check0(bcf_update_info_float(hdr, rec, "AF", &tmpf, 1));
198 check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1));
199 check0(bcf_update_info_flag(hdr, rec, "H2", NULL, 1));
200 // .. FORMAT
201 int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int));
202 tmpia[0] = bcf_gt_phased(0);
203 tmpia[1] = bcf_gt_phased(0);
204 tmpia[2] = bcf_gt_phased(1);
205 tmpia[3] = bcf_gt_phased(0);
206 tmpia[4] = bcf_gt_unphased(1);
207 tmpia[5] = bcf_gt_unphased(1);
208 check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2));
209 tmpia[0] = 48;
210 tmpia[1] = 48;
211 tmpia[2] = 43;
212 check0(bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr)));
213 tmpia[0] = 0;
214 tmpia[1] = 0;
215 tmpia[2] = 1;
216 check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
217 tmpia[0] = 1;
218 tmpia[1] = 100000;
219 tmpia[2] = 1;
220 check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
221 tmpia[0] = 1;
222 tmpia[1] = 8;
223 tmpia[2] = 5;
224 check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
225 tmpia[0] = 51;
226 tmpia[1] = 51;
227 tmpia[2] = 51;
228 tmpia[3] = 51;
229 tmpia[4] = bcf_int32_missing;
230 tmpia[5] = bcf_int32_missing;
231 check0(bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2));
232 char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"};
233 check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
234 tmp_str[0] = "LongerStringRequiringBufferReallocation";
235 check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
236 tmp_str[0] = "String1";
237 check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
238 if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
239
240 // 20 1110696 . A G,T 67 . NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1 ./.
241 bcf_clear1(rec);
242 rec->rid = bcf_hdr_name2id(hdr, "20");
243 rec->pos = 1110695;
244 check0(bcf_update_alleles_str(hdr, rec, "A,G,T"));
245 rec->qual = 67;
246 tmpi = 2;
247 check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1));
248 tmpi = 10;
249 check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
250 tmpi = -128;
251 check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1));
252 float *tmpfa = (float*)malloc(2*sizeof(float));
253 tmpfa[0] = 0.333;
254 bcf_float_set_missing(tmpfa[1]);
255 check0(bcf_update_info_float(hdr, rec, "AF", tmpfa, 2));
256 check0(bcf_update_info_string(hdr, rec, "AA", "SHORT"));
257 check0(bcf_update_info_string(hdr, rec, "AA", "LONGSTRING"));
258 check0(bcf_update_info_string(hdr, rec, "AA", "T"));
259 check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1));
260 tmpia[0] = bcf_gt_phased(2);
261 tmpia[1] = bcf_int32_vector_end;
262 tmpia[2] = bcf_gt_phased(1);
263 tmpia[3] = bcf_int32_vector_end;
264 tmpia[4] = bcf_gt_missing;
265 tmpia[5] = bcf_gt_missing;
266 check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2));
267 if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
268
269 free(tmpia);
270 free(tmpfa);
271
272 // Clean
273 free(str.s);
274 bcf_destroy1(rec);
275 bcf_hdr_destroy(hdr);
276 int ret;
277 if ( (ret=hts_close(fp)) )
278 {
279 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
280 exit(ret);
281 }
282 }
283
bcf_to_vcf(char * fname)284 void bcf_to_vcf(char *fname)
285 {
286 htsFile *fp = hts_open(fname,"rb");
287 if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
288 bcf_hdr_t *hdr = bcf_hdr_read(fp);
289 if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
290 bcf1_t *rec = bcf_init1();
291 if (!rec) error("bcf_init1 : %s", strerror(errno));
292
293 char *gz_fname = (char*) malloc(strlen(fname)+4);
294 if (!gz_fname) error("malloc : %s", strerror(errno));
295 snprintf(gz_fname,strlen(fname)+4,"%s.gz",fname);
296 htsFile *out = hts_open(gz_fname,"wg");
297 if (!out) error("Couldn't open \"%s\" : %s\n", gz_fname, strerror(errno));
298
299 bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr);
300 bcf_hdr_remove(hdr_out,BCF_HL_STR,"unused");
301 bcf_hdr_remove(hdr_out,BCF_HL_GEN,"unused");
302 bcf_hdr_remove(hdr_out,BCF_HL_FLT,"Flt");
303 bcf_hdr_remove(hdr_out,BCF_HL_INFO,"UI");
304 bcf_hdr_remove(hdr_out,BCF_HL_FMT,"UF");
305 bcf_hdr_remove(hdr_out,BCF_HL_CTG,"Unused");
306 if ( bcf_hdr_write(out, hdr_out)!=0 ) error("Failed to write to %s\n", fname);
307 int r;
308 while ((r = bcf_read1(fp, hdr, rec)) >= 0)
309 {
310 if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname);
311
312 // Test problems caused by bcf1_sync: the data block
313 // may be realloced, also the unpacked structures must
314 // get updated.
315 check0(bcf_unpack(rec, BCF_UN_STR));
316 check0(bcf_update_id(hdr, rec, 0));
317 check0(bcf_update_format_int32(hdr, rec, "GQ", NULL, 0));
318
319 bcf1_t *dup = bcf_dup(rec); // force bcf1_sync call
320 if ( bcf_write1(out, hdr_out, dup)!=0 ) error("Failed to write to %s\n", fname);
321 bcf_destroy1(dup);
322
323 check0(bcf_update_alleles_str(hdr_out, rec, "G,A"));
324 int32_t tmpi = 99;
325 check0(bcf_update_info_int32(hdr_out, rec, "DP", &tmpi, 1));
326 int32_t tmpia[] = {9,9,9};
327 check0(bcf_update_format_int32(hdr_out, rec, "DP", tmpia, 3));
328
329 if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname);
330 }
331 if (r < -1) error("bcf_read1");
332
333 bcf_destroy1(rec);
334 bcf_hdr_destroy(hdr);
335 bcf_hdr_destroy(hdr_out);
336 int ret;
337 if ( (ret=hts_close(fp)) )
338 {
339 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
340 exit(ret);
341 }
342 if ( (ret=hts_close(out)) )
343 {
344 fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
345 exit(ret);
346 }
347
348
349 // read gzip, write stdout
350 htsFile *gz_in = hts_open(gz_fname, "r");
351 if ( !gz_in )
352 {
353 fprintf(stderr,"Could not read: %s\n", gz_fname);
354 exit(1);
355 }
356
357 kstring_t line = {0,0,0};
358 while ( hts_getline(gz_in, KS_SEP_LINE, &line)>0 )
359 {
360 kputc('\n',&line);
361 fwrite(line.s,1,line.l,stdout);
362 }
363
364 if ( (ret=hts_close(gz_in)) )
365 {
366 fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
367 exit(ret);
368 }
369 free(line.s);
370 free(gz_fname);
371 }
372
iterator(const char * fname)373 void iterator(const char *fname)
374 {
375 htsFile *fp = hts_open(fname, "r");
376 if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
377 bcf_hdr_t *hdr = bcf_hdr_read(fp);
378 if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
379 hts_idx_t *idx;
380 hts_itr_t *iter;
381
382 bcf_index_build(fname, 0);
383 idx = bcf_index_load(fname);
384
385 iter = bcf_itr_queryi(idx, bcf_hdr_name2id(hdr, "20"), 1110600, 1110800);
386 bcf_itr_destroy(iter);
387
388 iter = bcf_itr_querys(idx, hdr, "20:1110600-1110800");
389 bcf_itr_destroy(iter);
390
391 hts_idx_destroy(idx);
392 bcf_hdr_destroy(hdr);
393 int ret;
394 if ( (ret=hts_close(fp)) )
395 {
396 fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
397 exit(ret);
398 }
399 }
400
test_get_info_values(const char * fname)401 void test_get_info_values(const char *fname)
402 {
403 htsFile *fp = hts_open(fname, "r");
404 if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
405 bcf_hdr_t *hdr = bcf_hdr_read(fp);
406 if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
407 bcf1_t *line = bcf_init();
408 if (!line) error("bcf_init : %s", strerror(errno));
409 int r;
410 while ((r = bcf_read(fp, hdr, line)) == 0)
411 {
412 float *afs = 0;
413 int32_t *negs = NULL;
414 int count = 0;
415 int ret = bcf_get_info_float(hdr, line, "AF", &afs, &count);
416
417 if (line->pos == 14369)
418 {
419 if (ret != 1 || afs[0] != 0.5f)
420 {
421 fprintf(stderr, "AF on position 14370 should be 0.5\n");
422 exit(-1);
423 }
424 }
425 else
426 {
427 if (ret != 2 || afs[0] != 0.333f || !bcf_float_is_missing(afs[1]))
428 {
429 fprintf(stderr, "AF on position 1110696 should be 0.333, missing\n");
430 exit(-1);
431 }
432 }
433
434 free(afs);
435
436 int32_t expected = (line->pos == 14369)? -127 : -128;
437 count = 0;
438 ret = bcf_get_info_int32(hdr, line, "NEG", &negs, &count);
439 if (ret != 1 || negs[0] != expected)
440 {
441 if (ret < 0)
442 fprintf(stderr, "NEG should be %d, got error ret=%d\n", expected, ret);
443 else if (ret == 0)
444 fprintf(stderr, "NEG should be %d, got no entries\n", expected);
445 else
446 fprintf(stderr, "NEG should be %d, got %d entries (first is %d)\n", expected, ret, negs[0]);
447 exit(1);
448 }
449 free(negs);
450 }
451 if (r < -1) error("bcf_read");
452
453 bcf_destroy(line);
454 bcf_hdr_destroy(hdr);
455 hts_close(fp);
456 }
457
write_format_values(const char * fname)458 void write_format_values(const char *fname)
459 {
460 // Init
461 htsFile *fp = hts_open(fname, "wb");
462 if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
463 bcf_hdr_t *hdr = bcf_hdr_init("w");
464 if (!hdr) error("bcf_hdr_init : %s", strerror(errno));
465 bcf1_t *rec = bcf_init1();
466 if (!rec) error("bcf_init1 : %s", strerror(errno));
467
468 // Create VCF header
469 check0(bcf_hdr_append(hdr, "##contig=<ID=1>"));
470 check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">"));
471 check0(bcf_hdr_add_sample(hdr, "S"));
472 check0(bcf_hdr_add_sample(hdr, NULL)); // to update internal structures
473 if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname);
474
475 // Add a record
476 // .. FORMAT
477 float test[4];
478 bcf_float_set_missing(test[0]);
479 test[1] = 47.11f;
480 bcf_float_set_vector_end(test[2]);
481 test[3] = -1.2e-13;
482 check0(bcf_update_format_float(hdr, rec, "TF", test, 4));
483 if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
484
485 bcf_destroy1(rec);
486 bcf_hdr_destroy(hdr);
487 int ret;
488 if ((ret = hts_close(fp)))
489 {
490 fprintf(stderr, "hts_close(%s): non-zero status %d\n", fname, ret);
491 exit(ret);
492 }
493 }
494
check_format_values(const char * fname)495 void check_format_values(const char *fname)
496 {
497 htsFile *fp = hts_open(fname, "r");
498 bcf_hdr_t *hdr = bcf_hdr_read(fp);
499 bcf1_t *line = bcf_init();
500
501 while (bcf_read(fp, hdr, line) == 0)
502 {
503 float *values = 0;
504 int count = 0;
505 int ret = bcf_get_format_float(hdr, line, "TF", &values, &count);
506
507 // NOTE the return value from bcf_get_format_float is different from
508 // bcf_get_info_float in the sense that vector-end markers also count.
509 if (ret != 4 ||
510 count < ret ||
511 !bcf_float_is_missing(values[0]) ||
512 values[1] != 47.11f ||
513 !bcf_float_is_vector_end(values[2]) ||
514 !bcf_float_is_vector_end(values[3]))
515 {
516 fprintf(stderr, "bcf_get_format_float didn't produce the expected output.\n");
517 exit(-1);
518 }
519
520 free(values);
521 }
522
523 bcf_destroy(line);
524 bcf_hdr_destroy(hdr);
525 hts_close(fp);
526 }
527
test_get_format_values(const char * fname)528 void test_get_format_values(const char *fname)
529 {
530 write_format_values(fname);
531 check_format_values(fname);
532 }
533
test_invalid_end_tag(void)534 void test_invalid_end_tag(void)
535 {
536 static const char vcf_data[] = "data:,"
537 "##fileformat=VCFv4.1\n"
538 "##contig=<ID=X,length=155270560>\n"
539 "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End coordinate of this variant\">\n"
540 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"
541 "X\t86470037\trs59780433a\tTTTCA\tTGGTT,T\t.\t.\tEND=85725113\n"
542 "X\t86470038\trs59780433b\tT\tTGGTT,T\t.\t.\tEND=86470047\n";
543
544 htsFile *fp;
545 bcf_hdr_t *hdr;
546 bcf1_t *rec;
547 int ret;
548 int32_t tmpi;
549 enum htsLogLevel logging = hts_get_log_level();
550
551 // Silence warning messages
552 hts_set_log_level(HTS_LOG_ERROR);
553
554 fp = hts_open(vcf_data, "r");
555 if (!fp) error("Failed to open vcf data : %s", strerror(errno));
556 rec = bcf_init1();
557 if (!rec) error("Failed to allocate BCF record : %s", strerror(errno));
558
559 hdr = bcf_hdr_read(fp);
560 if (!hdr) error("Failed to read BCF header : %s", strerror(errno));
561
562 check0(bcf_read(fp, hdr, rec));
563 // rec->rlen should ignore the bogus END tag value on the first read
564 if (rec->rlen != 5) {
565 error("Incorrect rlen - expected 5 got %"PRIhts_pos"\n", rec->rlen);
566 }
567
568 check0(bcf_read(fp, hdr, rec));
569 // While on the second it should use it
570 if (rec->rlen != 10) {
571 error("Incorrect rlen - expected 10 got %"PRIhts_pos"\n", rec->rlen);
572 }
573
574 // Try to break it - will change rlen
575 tmpi = 85725113;
576 check0(bcf_update_info_int32(hdr, rec, "END", &tmpi, 1));
577
578 if (rec->rlen != 1) {
579 error("Incorrect rlen - expected 1 got %"PRIhts_pos"\n", rec->rlen);
580 }
581
582 ret = bcf_read(fp, hdr, rec);
583 if (ret != -1) {
584 error("Unexpected return code %d from bcf_read at EOF", ret);
585 }
586
587 bcf_destroy1(rec);
588 bcf_hdr_destroy(hdr);
589 ret = hts_close(fp);
590 if (ret != 0) {
591 error("Unexpected return code %d from hts_close", ret);
592 }
593
594 hts_set_log_level(logging);
595 }
596
test_open_format()597 void test_open_format() {
598 char mode[5];
599 int ret;
600 strcpy(mode, "r");
601 ret = vcf_open_mode(mode+1, "mode1.bcf", NULL);
602 if (strncmp(mode, "rb", 2) || ret)
603 error("Mode '%s' does not match the expected value '%s'", mode, "rb");
604 mode[1] = 0;
605 ret = vcf_open_mode(mode+1, "mode1.vcf", NULL);
606 if (strncmp(mode, "r", 1) || ret)
607 error("Mode '%s' does not match the expected value '%s'", mode, "r");
608 mode[1] = 0;
609 ret = vcf_open_mode(mode+1, "mode1.vcf.gz", NULL);
610 if (strncmp(mode, "rz", 2) || ret)
611 error("Mode '%s' does not match the expected value '%s'", mode, "rz");
612 mode[1] = 0;
613 ret = vcf_open_mode(mode+1, "mode1.vcf.bgz", NULL);
614 if (strncmp(mode, "rz", 2) || ret)
615 error("Mode '%s' does not match the expected value '%s'", mode, "rz");
616 mode[1] = 0;
617 ret = vcf_open_mode(mode+1, "mode1.xcf", NULL);
618 if (!ret)
619 error("Expected failure for wrong extension 'xcf'");
620 mode[1] = 0;
621 ret = vcf_open_mode(mode+1, "mode1.vcf.gbz", NULL);
622 if (!ret)
623 error("Expected failure for wrong extension 'vcf.gbz'");
624 mode[1] = 0;
625 ret = vcf_open_mode(mode+1, "mode1.bvcf.bgz", NULL);
626 if (!ret)
627 error("Expected failure for wrong extension 'vcf.bvcf.bgz'");
628 }
629
main(int argc,char ** argv)630 int main(int argc, char **argv)
631 {
632 char *fname = argc>1 ? argv[1] : "rmme.bcf";
633
634 // format test. quiet unless there's a failure
635 test_get_format_values(fname);
636
637 // main test. writes to stdout
638 write_bcf(fname);
639 bcf_to_vcf(fname);
640 iterator(fname);
641
642 // additional tests. quiet unless there's a failure.
643 test_get_info_values(fname);
644 test_invalid_end_tag();
645 test_open_format();
646 return 0;
647 }
648