1 /*  test/test-vcf-api.c -- VCF test harness.
2 
3     Copyright (C) 2013, 2014, 2017-2021 Genome Research Ltd.
4 
5     Author: Petr Danecek <pd3@sanger.ac.uk>
6 
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13 
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16 
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 DEALINGS IN THE SOFTWARE.  */
24 
25 #include <config.h>
26 
27 #include <errno.h>
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "../htslib/hts.h"
32 #include "../htslib/vcf.h"
33 #include "../htslib/kstring.h"
34 #include "../htslib/kseq.h"
35 
error(const char * format,...)36 void error(const char *format, ...)
37 {
38     va_list ap;
39     va_start(ap, format);
40     vfprintf(stderr, format, ap);
41     va_end(ap);
42     if (strrchr(format, '\n') == NULL) fputc('\n', stderr);
43     exit(-1);
44 }
45 
46 #define STRINGIFY(x) #x
47 #define check0(x) ((x) == 0 ? (void) 0 : error("Failed: %s", STRINGIFY(x)))
48 
check_alleles(bcf1_t * rec,const char ** alleles,int num)49 static int check_alleles(bcf1_t *rec, const char **alleles, int num) {
50     int i;
51     if (rec->n_allele !=  num) {
52         fprintf(stderr, "Wrong number of alleles - expected %d, got %d\n",
53                 num, rec->n_allele);
54         return -1;
55     }
56     if (bcf_unpack(rec, BCF_UN_STR) != 0)
57         return -1;
58     for (i = 0; i < num; i++) {
59         if (0 != strcmp(alleles[i], rec->d.allele[i])) {
60             fprintf(stderr,
61                     "Mismatch for allele %d : expected '%s' got '%s'\n",
62                     i, alleles[i], rec->d.allele[i]);
63             return -1;
64         }
65     }
66     return 0;
67 }
68 
test_update_alleles(bcf_hdr_t * hdr,bcf1_t * rec)69 static void test_update_alleles(bcf_hdr_t *hdr, bcf1_t *rec)
70 {
71     // Exercise bcf_update_alleles() a bit
72     const char *alleles1[2] = { "G", "A" };
73     const char *alleles2[3] = { "C", "TGCA", "CATG" };
74 #define rep10(x) x x x x x x x x x x
75     const char *alleles3[3] = { rep10("ATTCTAGATC"), "TGCA",
76                                 rep10("CTATTATCTCTAATGACATG") };
77 #undef rep10
78     const char *alleles4[3] = { alleles3[2], NULL, alleles3[0] };
79     // Add some alleles
80     check0(bcf_update_alleles(hdr, rec, alleles1, 2));
81     check0(check_alleles(rec, alleles1, 2));
82     // Erase them
83     check0(bcf_update_alleles(hdr, rec, NULL, 0));
84     check0(check_alleles(rec, NULL, 0));
85     // Expand to three
86     check0(bcf_update_alleles(hdr, rec, alleles2, 3));
87     check0(check_alleles(rec, alleles2, 3));
88     // Now try some bigger ones (should force a realloc)
89     check0(bcf_update_alleles(hdr, rec, alleles3, 3));
90     check0(check_alleles(rec, alleles3, 3));
91     // Ensure it works even if one of the alleles points into the
92     // existing structure
93     alleles4[1] = rec->d.allele[1];
94     check0(bcf_update_alleles(hdr, rec, alleles4, 3));
95     alleles4[1] = alleles3[1]; // Will have been clobbered by the update
96     check0(check_alleles(rec, alleles4, 3));
97     // Ensure it works when the alleles point into the existing data,
98     // rec->d.allele is used to define the input array and the
99     // order of the entries is changed.  The result of this should
100     // be the same as alleles2.
101     char *tmp = rec->d.allele[0] + strlen(rec->d.allele[0]) - 4;
102     rec->d.allele[0] = rec->d.allele[2] + strlen(rec->d.allele[2]) - 1;
103     rec->d.allele[2] = tmp;
104     check0(bcf_update_alleles(hdr, rec, (const char **) rec->d.allele, 3));
105     check0(check_alleles(rec, alleles2, 3));
106 }
107 
write_bcf(char * fname)108 void write_bcf(char *fname)
109 {
110     // Init
111     htsFile *fp    = hts_open(fname,"wb");
112     if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
113     bcf_hdr_t *hdr = bcf_hdr_init("w");
114     if (!hdr) error("bcf_hdr_init : %s", strerror(errno));
115     bcf1_t *rec    = bcf_init1();
116     if (!rec) error("bcf_init1 : %s", strerror(errno));
117 
118     // Check no-op on fresh bcf1_t
119     check0(bcf_update_alleles(hdr, rec, NULL, 0));
120 
121     // Create VCF header
122     kstring_t str = {0,0,0};
123     check0(bcf_hdr_append(hdr, "##fileDate=20090805"));
124     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=UF,Number=1,Type=Integer,Description=\"Unused FORMAT\">"));
125     check0(bcf_hdr_append(hdr, "##INFO=<ID=UI,Number=1,Type=Integer,Description=\"Unused INFO\">"));
126     check0(bcf_hdr_append(hdr, "##FILTER=<ID=Flt,Description=\"Unused FILTER\">"));
127     check0(bcf_hdr_append(hdr, "##unused=<XX=AA,Description=\"Unused generic\">"));
128     check0(bcf_hdr_append(hdr, "##unused=unformatted text 1"));
129     check0(bcf_hdr_append(hdr, "##unused=unformatted text 2"));
130     check0(bcf_hdr_append(hdr, "##contig=<ID=Unused,length=1>"));
131     check0(bcf_hdr_append(hdr, "##source=myImputationProgramV3.1"));
132     check0(bcf_hdr_append(hdr, "##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta"));
133     check0(bcf_hdr_append(hdr, "##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"));
134     check0(bcf_hdr_append(hdr, "##phasing=partial"));
135     check0(bcf_hdr_append(hdr, "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"));
136     check0(bcf_hdr_append(hdr, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"));
137     check0(bcf_hdr_append(hdr, "##INFO=<ID=NEG,Number=.,Type=Integer,Description=\"Test -ve Numbers\">"));
138     check0(bcf_hdr_append(hdr, "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Allele Frequency\">"));
139     check0(bcf_hdr_append(hdr, "##INFO=<ID=AA,Number=1,Type=String,Description=\"Ancestral Allele\">"));
140     check0(bcf_hdr_append(hdr, "##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP membership, build 129\">"));
141     check0(bcf_hdr_append(hdr, "##INFO=<ID=H2,Number=0,Type=Flag,Description=\"HapMap2 membership\">"));
142     check0(bcf_hdr_append(hdr, "##FILTER=<ID=q10,Description=\"Quality below 10\">"));
143     check0(bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\"Less than half of samples have data\">"));
144     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">"));
145     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">"));
146     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">"));
147     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=HQ,Number=2,Type=Integer,Description=\"Haplotype Quality\">"));
148     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\"Test String 1\">"));
149 
150     // Try a few header modifications
151     bcf_hdr_remove(hdr, BCF_HL_CTG, "Unused");
152     check0(bcf_hdr_append(hdr, "##contig=<ID=Unused,length=62435964>"));
153     bcf_hdr_remove(hdr, BCF_HL_FMT, "TS");
154     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\"Test String\">"));
155     bcf_hdr_remove(hdr, BCF_HL_INFO, "NEG");
156     check0(bcf_hdr_append(hdr, "##INFO=<ID=NEG,Number=.,Type=Integer,Description=\"Test Negative Numbers\">"));
157     bcf_hdr_remove(hdr, BCF_HL_FLT, "s50");
158     check0(bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\"Less than 50% of samples have data\">"));
159 
160     check0(bcf_hdr_add_sample(hdr, "NA00001"));
161     check0(bcf_hdr_add_sample(hdr, "NA00002"));
162     check0(bcf_hdr_add_sample(hdr, "NA00003"));
163     check0(bcf_hdr_add_sample(hdr, NULL));      // to update internal structures
164     if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname);
165 
166 
167     // Add a record
168     // 20     14370   rs6054257 G      A       29   PASS   NS=3;DP=14;NEG=-127;AF=0.5;DB;H2           GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
169     // .. CHROM
170     rec->rid = bcf_hdr_name2id(hdr, "20");
171     // .. POS
172     rec->pos = 14369;
173     // .. ID
174     check0(bcf_update_id(hdr, rec, "rs6054257"));
175     // .. REF and ALT
176     test_update_alleles(hdr, rec);
177     const char *alleles[2] = { "G", "A" };
178     check0(bcf_update_alleles_str(hdr, rec, "G,A"));
179     check0(check_alleles(rec, alleles, 2));
180     // .. QUAL
181     rec->qual = 29;
182     // .. FILTER
183     int32_t tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
184     check0(bcf_update_filter(hdr, rec, &tmpi, 1));
185     // .. INFO
186     tmpi = 3;
187     check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1));
188     tmpi = 500;
189     check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
190     tmpi = 100000;
191     check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
192     tmpi = 14;
193     check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
194     tmpi = -127;
195     check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1));
196     float tmpf = 0.5;
197     check0(bcf_update_info_float(hdr, rec, "AF", &tmpf, 1));
198     check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1));
199     check0(bcf_update_info_flag(hdr, rec, "H2", NULL, 1));
200     // .. FORMAT
201     int32_t *tmpia = (int*)malloc(bcf_hdr_nsamples(hdr)*2*sizeof(int));
202     tmpia[0] = bcf_gt_phased(0);
203     tmpia[1] = bcf_gt_phased(0);
204     tmpia[2] = bcf_gt_phased(1);
205     tmpia[3] = bcf_gt_phased(0);
206     tmpia[4] = bcf_gt_unphased(1);
207     tmpia[5] = bcf_gt_unphased(1);
208     check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2));
209     tmpia[0] = 48;
210     tmpia[1] = 48;
211     tmpia[2] = 43;
212     check0(bcf_update_format_int32(hdr, rec, "GQ", tmpia, bcf_hdr_nsamples(hdr)));
213     tmpia[0] = 0;
214     tmpia[1] = 0;
215     tmpia[2] = 1;
216     check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
217     tmpia[0] = 1;
218     tmpia[1] = 100000;
219     tmpia[2] = 1;
220     check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
221     tmpia[0] = 1;
222     tmpia[1] = 8;
223     tmpia[2] = 5;
224     check0(bcf_update_format_int32(hdr, rec, "DP", tmpia, bcf_hdr_nsamples(hdr)));
225     tmpia[0] = 51;
226     tmpia[1] = 51;
227     tmpia[2] = 51;
228     tmpia[3] = 51;
229     tmpia[4] = bcf_int32_missing;
230     tmpia[5] = bcf_int32_missing;
231     check0(bcf_update_format_int32(hdr, rec, "HQ", tmpia, bcf_hdr_nsamples(hdr)*2));
232     char *tmp_str[] = {"String1","SomeOtherString2","YetAnotherString3"};
233     check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
234     tmp_str[0] = "LongerStringRequiringBufferReallocation";
235     check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
236     tmp_str[0] = "String1";
237     check0(bcf_update_format_string(hdr, rec, "TS", (const char**)tmp_str, 3));
238     if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
239 
240     // 20     1110696 . A      G,T     67   .   NS=2;DP=10;NEG=-128;AF=0.333,.;AA=T;DB GT 2 1   ./.
241     bcf_clear1(rec);
242     rec->rid = bcf_hdr_name2id(hdr, "20");
243     rec->pos = 1110695;
244     check0(bcf_update_alleles_str(hdr, rec, "A,G,T"));
245     rec->qual = 67;
246     tmpi = 2;
247     check0(bcf_update_info_int32(hdr, rec, "NS", &tmpi, 1));
248     tmpi = 10;
249     check0(bcf_update_info_int32(hdr, rec, "DP", &tmpi, 1));
250     tmpi = -128;
251     check0(bcf_update_info_int32(hdr, rec, "NEG", &tmpi, 1));
252     float *tmpfa = (float*)malloc(2*sizeof(float));
253     tmpfa[0] = 0.333;
254     bcf_float_set_missing(tmpfa[1]);
255     check0(bcf_update_info_float(hdr, rec, "AF", tmpfa, 2));
256     check0(bcf_update_info_string(hdr, rec, "AA", "SHORT"));
257     check0(bcf_update_info_string(hdr, rec, "AA", "LONGSTRING"));
258     check0(bcf_update_info_string(hdr, rec, "AA", "T"));
259     check0(bcf_update_info_flag(hdr, rec, "DB", NULL, 1));
260     tmpia[0] = bcf_gt_phased(2);
261     tmpia[1] = bcf_int32_vector_end;
262     tmpia[2] = bcf_gt_phased(1);
263     tmpia[3] = bcf_int32_vector_end;
264     tmpia[4] = bcf_gt_missing;
265     tmpia[5] = bcf_gt_missing;
266     check0(bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2));
267     if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
268 
269     free(tmpia);
270     free(tmpfa);
271 
272     // Clean
273     free(str.s);
274     bcf_destroy1(rec);
275     bcf_hdr_destroy(hdr);
276     int ret;
277     if ( (ret=hts_close(fp)) )
278     {
279         fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
280         exit(ret);
281     }
282 }
283 
bcf_to_vcf(char * fname)284 void bcf_to_vcf(char *fname)
285 {
286     htsFile *fp    = hts_open(fname,"rb");
287     if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
288     bcf_hdr_t *hdr = bcf_hdr_read(fp);
289     if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
290     bcf1_t *rec    = bcf_init1();
291     if (!rec) error("bcf_init1 : %s", strerror(errno));
292 
293     char *gz_fname = (char*) malloc(strlen(fname)+4);
294     if (!gz_fname) error("malloc : %s", strerror(errno));
295     snprintf(gz_fname,strlen(fname)+4,"%s.gz",fname);
296     htsFile *out   = hts_open(gz_fname,"wg");
297     if (!out) error("Couldn't open \"%s\" : %s\n", gz_fname, strerror(errno));
298 
299     bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr);
300     bcf_hdr_remove(hdr_out,BCF_HL_STR,"unused");
301     bcf_hdr_remove(hdr_out,BCF_HL_GEN,"unused");
302     bcf_hdr_remove(hdr_out,BCF_HL_FLT,"Flt");
303     bcf_hdr_remove(hdr_out,BCF_HL_INFO,"UI");
304     bcf_hdr_remove(hdr_out,BCF_HL_FMT,"UF");
305     bcf_hdr_remove(hdr_out,BCF_HL_CTG,"Unused");
306     if ( bcf_hdr_write(out, hdr_out)!=0 ) error("Failed to write to %s\n", fname);
307     int r;
308     while ((r = bcf_read1(fp, hdr, rec)) >= 0)
309     {
310         if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname);
311 
312         // Test problems caused by bcf1_sync: the data block
313         // may be realloced, also the unpacked structures must
314         // get updated.
315         check0(bcf_unpack(rec, BCF_UN_STR));
316         check0(bcf_update_id(hdr, rec, 0));
317         check0(bcf_update_format_int32(hdr, rec, "GQ", NULL, 0));
318 
319         bcf1_t *dup = bcf_dup(rec);     // force bcf1_sync call
320         if ( bcf_write1(out, hdr_out, dup)!=0 ) error("Failed to write to %s\n", fname);
321         bcf_destroy1(dup);
322 
323         check0(bcf_update_alleles_str(hdr_out, rec, "G,A"));
324         int32_t tmpi = 99;
325         check0(bcf_update_info_int32(hdr_out, rec, "DP", &tmpi, 1));
326         int32_t tmpia[] = {9,9,9};
327         check0(bcf_update_format_int32(hdr_out, rec, "DP", tmpia, 3));
328 
329         if ( bcf_write1(out, hdr_out, rec)!=0 ) error("Failed to write to %s\n", fname);
330     }
331     if (r < -1) error("bcf_read1");
332 
333     bcf_destroy1(rec);
334     bcf_hdr_destroy(hdr);
335     bcf_hdr_destroy(hdr_out);
336     int ret;
337     if ( (ret=hts_close(fp)) )
338     {
339         fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
340         exit(ret);
341     }
342     if ( (ret=hts_close(out)) )
343     {
344         fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
345         exit(ret);
346     }
347 
348 
349     // read gzip, write stdout
350     htsFile *gz_in = hts_open(gz_fname, "r");
351     if ( !gz_in )
352     {
353         fprintf(stderr,"Could not read: %s\n", gz_fname);
354         exit(1);
355     }
356 
357     kstring_t line = {0,0,0};
358     while ( hts_getline(gz_in, KS_SEP_LINE, &line)>0 )
359     {
360         kputc('\n',&line);
361         fwrite(line.s,1,line.l,stdout);
362     }
363 
364     if ( (ret=hts_close(gz_in)) )
365     {
366         fprintf(stderr,"hts_close(%s): non-zero status %d\n",gz_fname,ret);
367         exit(ret);
368     }
369     free(line.s);
370     free(gz_fname);
371 }
372 
iterator(const char * fname)373 void iterator(const char *fname)
374 {
375     htsFile *fp = hts_open(fname, "r");
376     if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
377     bcf_hdr_t *hdr = bcf_hdr_read(fp);
378     if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
379     hts_idx_t *idx;
380     hts_itr_t *iter;
381 
382     bcf_index_build(fname, 0);
383     idx = bcf_index_load(fname);
384 
385     iter = bcf_itr_queryi(idx, bcf_hdr_name2id(hdr, "20"), 1110600, 1110800);
386     bcf_itr_destroy(iter);
387 
388     iter = bcf_itr_querys(idx, hdr, "20:1110600-1110800");
389     bcf_itr_destroy(iter);
390 
391     hts_idx_destroy(idx);
392     bcf_hdr_destroy(hdr);
393     int ret;
394     if ( (ret=hts_close(fp)) )
395     {
396         fprintf(stderr,"hts_close(%s): non-zero status %d\n",fname,ret);
397         exit(ret);
398     }
399 }
400 
test_get_info_values(const char * fname)401 void test_get_info_values(const char *fname)
402 {
403     htsFile *fp = hts_open(fname, "r");
404     if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
405     bcf_hdr_t *hdr = bcf_hdr_read(fp);
406     if (!hdr) error("bcf_hdr_read : %s", strerror(errno));
407     bcf1_t *line = bcf_init();
408     if (!line) error("bcf_init : %s", strerror(errno));
409     int r;
410     while ((r = bcf_read(fp, hdr, line)) == 0)
411     {
412         float *afs = 0;
413         int32_t *negs = NULL;
414         int count = 0;
415         int ret = bcf_get_info_float(hdr, line, "AF", &afs, &count);
416 
417         if (line->pos == 14369)
418         {
419             if (ret != 1 || afs[0] != 0.5f)
420             {
421                 fprintf(stderr, "AF on position 14370 should be 0.5\n");
422                 exit(-1);
423             }
424         }
425         else
426         {
427             if (ret != 2 || afs[0] != 0.333f || !bcf_float_is_missing(afs[1]))
428             {
429                 fprintf(stderr, "AF on position 1110696 should be 0.333, missing\n");
430                 exit(-1);
431             }
432         }
433 
434         free(afs);
435 
436         int32_t expected = (line->pos == 14369)? -127 : -128;
437         count = 0;
438         ret = bcf_get_info_int32(hdr, line, "NEG", &negs, &count);
439         if (ret != 1 || negs[0] != expected)
440         {
441             if (ret < 0)
442                 fprintf(stderr, "NEG should be %d, got error ret=%d\n", expected, ret);
443             else if (ret == 0)
444                 fprintf(stderr, "NEG should be %d, got no entries\n", expected);
445             else
446                 fprintf(stderr, "NEG should be %d, got %d entries (first is %d)\n", expected, ret, negs[0]);
447             exit(1);
448         }
449         free(negs);
450     }
451     if (r < -1) error("bcf_read");
452 
453     bcf_destroy(line);
454     bcf_hdr_destroy(hdr);
455     hts_close(fp);
456 }
457 
write_format_values(const char * fname)458 void write_format_values(const char *fname)
459 {
460     // Init
461     htsFile *fp = hts_open(fname, "wb");
462     if (!fp) error("Failed to open \"%s\" : %s", fname, strerror(errno));
463     bcf_hdr_t *hdr = bcf_hdr_init("w");
464     if (!hdr) error("bcf_hdr_init : %s", strerror(errno));
465     bcf1_t *rec = bcf_init1();
466     if (!rec) error("bcf_init1 : %s", strerror(errno));
467 
468     // Create VCF header
469     check0(bcf_hdr_append(hdr, "##contig=<ID=1>"));
470     check0(bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">"));
471     check0(bcf_hdr_add_sample(hdr, "S"));
472     check0(bcf_hdr_add_sample(hdr, NULL)); // to update internal structures
473     if ( bcf_hdr_write(fp, hdr)!=0 ) error("Failed to write to %s\n", fname);
474 
475     // Add a record
476     // .. FORMAT
477     float test[4];
478     bcf_float_set_missing(test[0]);
479     test[1] = 47.11f;
480     bcf_float_set_vector_end(test[2]);
481     test[3] = -1.2e-13;
482     check0(bcf_update_format_float(hdr, rec, "TF", test, 4));
483     if ( bcf_write1(fp, hdr, rec)!=0 ) error("Failed to write to %s\n", fname);
484 
485     bcf_destroy1(rec);
486     bcf_hdr_destroy(hdr);
487     int ret;
488     if ((ret = hts_close(fp)))
489     {
490         fprintf(stderr, "hts_close(%s): non-zero status %d\n", fname, ret);
491         exit(ret);
492     }
493 }
494 
check_format_values(const char * fname)495 void check_format_values(const char *fname)
496 {
497     htsFile *fp = hts_open(fname, "r");
498     bcf_hdr_t *hdr = bcf_hdr_read(fp);
499     bcf1_t *line = bcf_init();
500 
501     while (bcf_read(fp, hdr, line) == 0)
502     {
503         float *values = 0;
504         int count = 0;
505         int ret = bcf_get_format_float(hdr, line, "TF", &values, &count);
506 
507         // NOTE the return value from bcf_get_format_float is different from
508         // bcf_get_info_float in the sense that vector-end markers also count.
509         if (ret != 4 ||
510             count < ret ||
511             !bcf_float_is_missing(values[0]) ||
512             values[1] != 47.11f ||
513             !bcf_float_is_vector_end(values[2]) ||
514             !bcf_float_is_vector_end(values[3]))
515         {
516             fprintf(stderr, "bcf_get_format_float didn't produce the expected output.\n");
517             exit(-1);
518         }
519 
520         free(values);
521     }
522 
523     bcf_destroy(line);
524     bcf_hdr_destroy(hdr);
525     hts_close(fp);
526 }
527 
test_get_format_values(const char * fname)528 void test_get_format_values(const char *fname)
529 {
530     write_format_values(fname);
531     check_format_values(fname);
532 }
533 
test_invalid_end_tag(void)534 void test_invalid_end_tag(void)
535 {
536     static const char vcf_data[] = "data:,"
537         "##fileformat=VCFv4.1\n"
538         "##contig=<ID=X,length=155270560>\n"
539         "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End coordinate of this variant\">\n"
540         "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n"
541         "X\t86470037\trs59780433a\tTTTCA\tTGGTT,T\t.\t.\tEND=85725113\n"
542         "X\t86470038\trs59780433b\tT\tTGGTT,T\t.\t.\tEND=86470047\n";
543 
544     htsFile *fp;
545     bcf_hdr_t *hdr;
546     bcf1_t *rec;
547     int ret;
548     int32_t tmpi;
549     enum htsLogLevel logging = hts_get_log_level();
550 
551     // Silence warning messages
552     hts_set_log_level(HTS_LOG_ERROR);
553 
554     fp = hts_open(vcf_data, "r");
555     if (!fp) error("Failed to open vcf data : %s", strerror(errno));
556     rec = bcf_init1();
557     if (!rec) error("Failed to allocate BCF record : %s", strerror(errno));
558 
559     hdr = bcf_hdr_read(fp);
560     if (!hdr) error("Failed to read BCF header : %s", strerror(errno));
561 
562     check0(bcf_read(fp, hdr, rec));
563     // rec->rlen should ignore the bogus END tag value on the first read
564     if (rec->rlen != 5) {
565         error("Incorrect rlen - expected 5 got %"PRIhts_pos"\n", rec->rlen);
566     }
567 
568     check0(bcf_read(fp, hdr, rec));
569     // While on the second it should use it
570     if (rec->rlen != 10) {
571         error("Incorrect rlen - expected 10 got %"PRIhts_pos"\n", rec->rlen);
572     }
573 
574     // Try to break it - will change rlen
575     tmpi = 85725113;
576     check0(bcf_update_info_int32(hdr, rec, "END", &tmpi, 1));
577 
578     if (rec->rlen != 1) {
579         error("Incorrect rlen - expected 1 got %"PRIhts_pos"\n", rec->rlen);
580     }
581 
582     ret = bcf_read(fp, hdr, rec);
583     if (ret != -1) {
584         error("Unexpected return code %d from bcf_read at EOF", ret);
585     }
586 
587     bcf_destroy1(rec);
588     bcf_hdr_destroy(hdr);
589     ret = hts_close(fp);
590     if (ret != 0) {
591         error("Unexpected return code %d from hts_close", ret);
592     }
593 
594     hts_set_log_level(logging);
595 }
596 
test_open_format()597 void test_open_format() {
598     char mode[5];
599     int ret;
600     strcpy(mode, "r");
601     ret = vcf_open_mode(mode+1, "mode1.bcf", NULL);
602     if (strncmp(mode, "rb", 2) || ret)
603         error("Mode '%s' does not match the expected value '%s'", mode, "rb");
604     mode[1] = 0;
605     ret = vcf_open_mode(mode+1, "mode1.vcf", NULL);
606     if (strncmp(mode, "r", 1) || ret)
607         error("Mode '%s' does not match the expected value '%s'", mode, "r");
608     mode[1] = 0;
609     ret = vcf_open_mode(mode+1, "mode1.vcf.gz", NULL);
610     if (strncmp(mode, "rz", 2) || ret)
611         error("Mode '%s' does not match the expected value '%s'", mode, "rz");
612     mode[1] = 0;
613     ret = vcf_open_mode(mode+1, "mode1.vcf.bgz", NULL);
614     if (strncmp(mode, "rz", 2) || ret)
615         error("Mode '%s' does not match the expected value '%s'", mode, "rz");
616     mode[1] = 0;
617     ret = vcf_open_mode(mode+1, "mode1.xcf", NULL);
618     if (!ret)
619         error("Expected failure for wrong extension 'xcf'");
620     mode[1] = 0;
621     ret = vcf_open_mode(mode+1, "mode1.vcf.gbz", NULL);
622     if (!ret)
623         error("Expected failure for wrong extension 'vcf.gbz'");
624     mode[1] = 0;
625     ret = vcf_open_mode(mode+1, "mode1.bvcf.bgz", NULL);
626     if (!ret)
627         error("Expected failure for wrong extension 'vcf.bvcf.bgz'");
628 }
629 
main(int argc,char ** argv)630 int main(int argc, char **argv)
631 {
632     char *fname = argc>1 ? argv[1] : "rmme.bcf";
633 
634     // format test. quiet unless there's a failure
635     test_get_format_values(fname);
636 
637     // main test. writes to stdout
638     write_bcf(fname);
639     bcf_to_vcf(fname);
640     iterator(fname);
641 
642     // additional tests. quiet unless there's a failure.
643     test_get_info_values(fname);
644     test_invalid_end_tag();
645     test_open_format();
646     return 0;
647 }
648