1 /* The MIT License
2 
3    Copyright (c) 2018-2021 Genome Research Ltd.
4 
5    Author: Petr Danecek <pd3@sanger.ac.uk>
6 
7    Permission is hereby granted, free of charge, to any person obtaining a copy
8    of this software and associated documentation files (the "Software"), to deal
9    in the Software without restriction, including without limitation the rights
10    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11    copies of the Software, and to permit persons to whom the Software is
12    furnished to do so, subject to the following conditions:
13 
14    The above copyright notice and this permission notice shall be included in
15    all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23    THE SOFTWARE.
24 
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <getopt.h>
30 #include <strings.h>
31 #include <errno.h>
32 #include <unistd.h>     // for isatty
33 #include <inttypes.h>
34 #include <htslib/hts.h>
35 #include <htslib/vcf.h>
36 #include <htslib/kstring.h>
37 #include <htslib/kseq.h>
38 #include <htslib/kfunc.h>
39 #include <htslib/synced_bcf_reader.h>
40 #include "bcftools.h"
41 #include "filter.h"
42 
43 
44 // Logic of the filters: include or exclude sites which match the filters?
45 #define FLT_INCLUDE 1
46 #define FLT_EXCLUDE 2
47 
48 #define PRINT_PASSOC  (1<<0)
49 #define PRINT_FASSOC  (1<<1)
50 #define PRINT_NASSOC  (1<<2)
51 #define PRINT_NOVELAL (1<<3)
52 #define PRINT_NOVELGT (1<<4)
53 
54 typedef struct
55 {
56     int argc, filter_logic, regions_is_file, targets_is_file, output_type, force_samples, clevel;
57     int regions_overlap, targets_overlap;
58     uint32_t annots;
59     char **argv, *output_fname, *fname, *regions, *targets, *filter_str, *annots_str;
60     char *control_samples_str, *case_samples_str, *max_AC_str;
61     int *control_smpl, *case_smpl, ncontrol_smpl, ncase_smpl;
62     filter_t *filter;
63     bcf_srs_t *sr;
64     bcf_hdr_t *hdr, *hdr_out;
65     htsFile *out_fh;
66     int32_t *gts;
67     int mgts;
68     uint32_t *control_gts;
69     int ncontrol_gts, mcontrol_gts, ntotal, nskipped, ntested, ncase_al, ncase_gt;
70     kstring_t case_als_smpl, case_gts_smpl;
71     int max_AC, nals[4];    // nals: number of control-ref, control-alt, case-ref and case-alt alleles in the region
72 }
73 args_t;
74 
75 args_t args;
76 
about(void)77 const char *about(void)
78 {
79     return "Find novel alleles and genotypes in two groups of samples.\n";
80 }
81 
usage_text(void)82 static const char *usage_text(void)
83 {
84     return
85         "\n"
86         "About: Runs a basic association test, per-site or in a region, and checks for novel alleles and\n"
87         "       genotypes in two groups of samples. Adds the following INFO annotations:\n"
88         "       - PASSOC  .. Fisher's exact test probability of genotypic association (REF vs non-REF allele)\n"
89         "       - FASSOC  .. proportion of non-REF allele in controls and cases\n"
90         "       - NASSOC  .. number of control-ref, control-alt, case-ref and case-alt alleles\n"
91         "       - NOVELAL .. lists samples with a novel allele not observed in the control group\n"
92         "       - NOVELGT .. lists samples with a novel genotype not observed in the control group\n"
93         "Usage: bcftools +contrast [Plugin Options]\n"
94         "Plugin options:\n"
95         "   -a, --annots LIST                List of annotations to output [PASSOC,FASSOC,NOVELAL]\n"
96         "   -0, --control-samples LIST|FILE  File or comma-separated list of control (background) samples\n"
97         "   -1, --case-samples LIST|FILE     File or comma-separated list of samples where novel allele or genotype is expected\n"
98         "   -e, --exclude EXPR               Exclude sites and samples for which the expression is true\n"
99         "   -f, --max-allele-freq NUM        Calculate enrichment of rare alleles. Floating point numbers between 0 and 1 are\n"
100         "                                        interpreted as ALT allele frequencies, integers as ALT allele counts\n"
101         "       --force-samples              Continue even if some samples listed in the -0,-1 files are missing from the VCF\n"
102         "   -i, --include EXPR               Include sites and samples for which the expression is true\n"
103         "   -o, --output FILE                Output file name [stdout]\n"
104         "   -O, --output-type u|b|v|z[0-9]   u/b: un/compressed BCF, v/z: un/compressed VCF, 0-9: compression level [v]\n"
105         "   -r, --regions REG                Restrict to comma-separated list of regions\n"
106         "   -R, --regions-file FILE          Restrict to regions listed in a file\n"
107         "       --regions-overlap 0|1|2      Include if POS in the region (0), record overlaps (1), variant overlaps (2) [1]\n"
108         "   -t, --targets REG                Similar to -r but streams rather than index-jumps\n"
109         "   -T, --targets-file FILE          Similar to -R but streams rather than index-jumps\n"
110         "       --targets-overlap 0|1|2      Include if POS in the region (0), record overlaps (1), variant overlaps (2) [0]\n"
111         "\n"
112         "Example:\n"
113         "   # Test if any of the samples a,b is different from the samples c,d,e\n"
114         "   bcftools +contrast -0 c,d,e -1 a,b file.bcf\n"
115         "\n"
116         "   # Same as above, but read samples from a file. In case of a name collision, the sample name\n"
117         "   # has precedence: the existence of a file with a list of samples is not checked unless no such\n"
118         "   # sample exists in the VCF. Use a full path (e.g. \"./string\" instead of \"string\") to avoid\n"
119         "   # name clashes\n"
120         "   bcftools +contrast -0 samples0.txt -1 samples1.txt file.bcf\n"
121         "\n"
122         "   # The same as above but checks for enrichment of rare alleles, AF<0.001 in this example, in a region\n"
123         "   bcftools +contrast -r 20:1000-2000 -f 0.001 -0 samples0.txt -1 samples1.txt file.bcf\n"
124         "\n";
125 }
126 
cmp_int(const void * a,const void * b)127 static int cmp_int(const void *a, const void *b)
128 {
129     if ( *((int*)a) < *((int*)b) ) return -1;
130     if ( *((int*)a) > *((int*)b) ) return -1;
131     return 0;
132 }
read_sample_list_or_file(bcf_hdr_t * hdr,const char * str,int ** smpl,int * nsmpl,int force_samples)133 static void read_sample_list_or_file(bcf_hdr_t *hdr, const char *str, int **smpl, int *nsmpl, int force_samples)
134 {
135     char **str_list = NULL;
136     int i,j, *list = NULL, nlist = 0, is_file, nskipped = 0;
137 
138     for (is_file=0; is_file<=1; is_file++)
139     {
140         if ( str_list )
141         {
142             for (i=0; i<nlist; i++) free(str_list[i]);
143             free(str_list);
144             free(list);
145             str_list = NULL;
146             list = NULL;
147             nlist = 0;
148         }
149 
150         str_list = hts_readlist(str, is_file, &nlist);
151         if ( !str_list )
152         {
153             if ( force_samples ) continue;
154             error("The sample \"%s\", is not present in the VCF\n", str);
155         }
156 
157         list = (int*) malloc(sizeof(int)*nlist);
158         for (i=0,j=0; i<nlist; i++,j++)
159         {
160             list[j] = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, str_list[i]);
161             if ( list[j] >= 0 ) continue;
162             if ( is_file )
163             {
164                 if ( !force_samples ) error("The sample \"%s\" is not present in the VCF. Use --force-samples to proceed anyway.\n", str_list[i]);
165                 j--;
166                 nskipped++;
167                 continue;
168             }
169             break;
170         }
171         if ( i==nlist ) break;
172     }
173     for (i=0; i<nlist; i++) free(str_list[i]);
174     nlist -= nskipped;
175     if ( !nlist && !force_samples ) error("None of the samples are present in the VCF: %s\n", str);
176     if ( nskipped ) fprintf(stderr,"Warning: using %d sample%s, %d from %s %s not present in the VCF\n", nlist,nlist>1?"s":"",nskipped,str,nskipped>1?"are":"is");
177     free(str_list);
178     qsort(list,nlist,sizeof(*list),cmp_int);
179     *smpl = list;
180     *nsmpl = nlist;
181 }
182 
init_data(args_t * args)183 static void init_data(args_t *args)
184 {
185     int ntmp, i;
186     char **tmp = hts_readlist(args->annots_str, 0, &ntmp);
187     for (i=0; i<ntmp; i++)
188     {
189         if ( !strcasecmp("PASSOC",tmp[i]) ) args->annots |= PRINT_PASSOC;
190         else if ( !strcasecmp("FASSOC",tmp[i]) ) args->annots |= PRINT_FASSOC;
191         else if ( !strcasecmp("NASSOC",tmp[i]) ) args->annots |= PRINT_NASSOC;
192         else if ( !strcasecmp("NOVELAL",tmp[i]) ) args->annots |= PRINT_NOVELAL;
193         else if ( !strcasecmp("NOVELGT",tmp[i]) ) args->annots |= PRINT_NOVELGT;
194         else error("The annotation is not recognised: %s\n", tmp[i]);
195         free(tmp[i]);
196     }
197     free(tmp);
198 
199     args->sr = bcf_sr_init();
200     if ( args->regions )
201     {
202         args->sr->require_index = 1;
203         bcf_sr_set_opt(args->sr,BCF_SR_REGIONS_OVERLAP,args->regions_overlap);
204         if ( bcf_sr_set_regions(args->sr, args->regions, args->regions_is_file)<0 ) error("Failed to read the regions: %s\n",args->regions);
205     }
206     if ( args->targets )
207     {
208         bcf_sr_set_opt(args->sr,BCF_SR_TARGETS_OVERLAP,args->targets_overlap);
209         if ( bcf_sr_set_targets(args->sr, args->targets, args->targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n",args->targets);
210     }
211     if ( !bcf_sr_add_reader(args->sr,args->fname) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
212     args->hdr = bcf_sr_get_header(args->sr,0);
213     args->hdr_out = bcf_hdr_dup(args->hdr);
214     if ( args->annots & PRINT_PASSOC )
215         bcf_hdr_append(args->hdr_out, "##INFO=<ID=PASSOC,Number=1,Type=Float,Description=\"Fisher's exact test probability of genotypic association (REF vs non-REF allele)\">");
216     if ( args->annots & PRINT_FASSOC )
217         bcf_hdr_append(args->hdr_out, "##INFO=<ID=FASSOC,Number=2,Type=Float,Description=\"Proportion of non-REF allele in controls and cases\">");
218     if ( args->annots & PRINT_NASSOC )
219         bcf_hdr_append(args->hdr_out, "##INFO=<ID=NASSOC,Number=4,Type=Integer,Description=\"Number of control-ref, control-alt, case-ref and case-alt alleles\">");
220     if ( args->annots & PRINT_NOVELAL )
221         bcf_hdr_append(args->hdr_out, "##INFO=<ID=NOVELAL,Number=.,Type=String,Description=\"List of samples with novel alleles. Note that samples listed here are not listed in NOVELGT again.\">");
222     if ( args->annots & PRINT_NOVELGT )
223         bcf_hdr_append(args->hdr_out, "##INFO=<ID=NOVELGT,Number=.,Type=String,Description=\"List of samples with novel genotypes\">");
224 
225     if ( args->filter_str )
226         args->filter = filter_init(args->hdr, args->filter_str);
227 
228     read_sample_list_or_file(args->hdr, args->control_samples_str, &args->control_smpl, &args->ncontrol_smpl, args->force_samples);
229     read_sample_list_or_file(args->hdr, args->case_samples_str, &args->case_smpl, &args->ncase_smpl, args->force_samples);
230 
231     char wmode[8];
232     set_wmode(wmode,args->output_type,args->output_fname,args->clevel);
233     args->out_fh = hts_open(args->output_fname ? args->output_fname : "-", wmode);
234     if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
235     if ( bcf_hdr_write(args->out_fh, args->hdr_out)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
236 
237     if ( args->max_AC_str )
238     {
239         char *tmp;
240         args->max_AC = strtol(args->max_AC_str, &tmp, 10);
241         if ( tmp==args->max_AC_str || *tmp )
242         {
243             double val = strtod(args->max_AC_str, &tmp);
244             if ( tmp==args->max_AC_str || *tmp ) error("Could not parse the argument: -f, --max-allele-freq %s\n", args->max_AC_str);
245             if ( val<0 || val>1 ) error("Expected integer or float from the range [0,1]: -f, --max-allele-freq %s\n", args->max_AC_str);
246             args->max_AC = val * bcf_hdr_nsamples(args->hdr);
247             if ( !args->max_AC ) args->max_AC = 1;
248         }
249     }
250 }
destroy_data(args_t * args)251 static void destroy_data(args_t *args)
252 {
253     bcf_hdr_destroy(args->hdr_out);
254     if ( hts_close(args->out_fh)!=0 ) error("[%s] Error: close failed .. %s\n", __func__,args->output_fname);
255     free(args->case_als_smpl.s);
256     free(args->case_gts_smpl.s);
257     free(args->gts);
258     free(args->control_gts);
259     free(args->control_smpl);
260     free(args->case_smpl);
261     if ( args->filter ) filter_destroy(args->filter);
262     bcf_sr_destroy(args->sr);
263     free(args);
264 }
binary_search(uint32_t val,uint32_t * dat,int ndat)265 static inline int binary_search(uint32_t val, uint32_t *dat, int ndat)
266 {
267     int i = -1, imin = 0, imax = ndat - 1;
268     while ( imin<=imax )
269     {
270         i = (imin+imax)/2;
271         if ( dat[i] < val ) imin = i + 1;
272         else if ( dat[i] > val ) imax = i - 1;
273         else return 1;
274     }
275     return 0;
276 }
binary_insert(uint32_t val,uint32_t ** dat,int * ndat,int * mdat)277 static inline void binary_insert(uint32_t val, uint32_t **dat, int *ndat, int *mdat)
278 {
279     int i = -1, imin = 0, imax = *ndat - 1;
280     while ( imin<=imax )
281     {
282         i = (imin+imax)/2;
283         if ( (*dat)[i] < val ) imin = i + 1;
284         else if ( (*dat)[i] > val ) imax = i - 1;
285         else return;
286     }
287     while ( i>=0 && (*dat)[i]>val ) i--;
288 
289     (*ndat)++;
290     hts_expand(uint32_t, (*ndat), (*mdat), (*dat));
291 
292     if ( *ndat > 1 )
293         memmove(*dat + i + 1, *dat + i, sizeof(uint32_t)*(*ndat - i - 1));
294 
295     (*dat)[i+1] = val;
296 }
process_record(args_t * args,bcf1_t * rec)297 static int process_record(args_t *args, bcf1_t *rec)
298 {
299     args->ntotal++;
300 
301     static int warned = 0;
302     int ngts = bcf_get_genotypes(args->hdr, rec, &args->gts, &args->mgts);
303     ngts /= rec->n_sample;
304     if ( ngts>2 ) error("todo: ploidy=%d\n", ngts);
305 
306     args->ncontrol_gts = 0;
307     uint32_t control_als = 0;
308     int32_t nals[4] = {0,0,0,0};    // ctrl-ref, ctrl-alt, case-ref, case-alt
309     int i,j;
310     for (i=0; i<args->ncontrol_smpl; i++)
311     {
312         uint32_t gt  = 0;
313         int32_t *ptr = args->gts + args->control_smpl[i]*ngts;
314         for (j=0; j<ngts; j++)
315         {
316             if ( ptr[j]==bcf_int32_vector_end ) break;
317             if ( bcf_gt_is_missing(ptr[j]) ) continue;
318             int ial = bcf_gt_allele(ptr[j]);
319             if ( ial > 31 )
320             {
321                 if ( !warned )
322                 {
323                     fprintf(stderr,"Too many alleles (>32) at %s:%"PRId64", skipping the site.\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
324                     warned = 1;
325                 }
326                 args->nskipped++;
327                 return -1;
328             }
329             control_als |= 1<<ial;
330             gt |= 1<<ial;
331             if ( ial ) nals[1]++;
332             else nals[0]++;
333         }
334         if ( args->annots & PRINT_NOVELGT )
335             binary_insert(gt, &args->control_gts, &args->ncontrol_gts, &args->mcontrol_gts);
336     }
337     if ( !control_als && args->ncontrol_smpl )
338     {
339         // all are missing
340         args->nskipped++;
341         return -1;
342     }
343 
344     args->case_als_smpl.l = 0;
345     args->case_gts_smpl.l = 0;
346 
347     int has_gt = 0;
348     for (i=0; i<args->ncase_smpl; i++)
349     {
350         int case_al = 0;
351         uint32_t gt  = 0;
352         int32_t *ptr = args->gts + args->case_smpl[i]*ngts;
353         for (j=0; j<ngts; j++)
354         {
355             if ( ptr[j]==bcf_int32_vector_end ) break;
356             if ( bcf_gt_is_missing(ptr[j]) ) continue;
357             int ial = bcf_gt_allele(ptr[j]);
358             if ( ial > 31 )
359             {
360                 if ( !warned )
361                 {
362                     fprintf(stderr,"Too many alleles (>32) at %s:%"PRId64", skipping. (todo?)\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1);
363                     warned = 1;
364                 }
365                 args->nskipped++;
366                 return -1;
367             }
368             if ( !(control_als & (1<<ial)) ) case_al = 1;
369             gt |= 1<<ial;
370             if ( ial ) nals[3]++;
371             else nals[2]++;
372         }
373         if ( !gt ) continue;
374         has_gt = 1;
375 
376         char *smpl = args->hdr->samples[ args->case_smpl[i] ];
377         if ( case_al && (args->annots & PRINT_NOVELAL) )
378         {
379             if ( args->case_als_smpl.l ) kputc(',', &args->case_als_smpl);
380             kputs(smpl, &args->case_als_smpl);
381         }
382         else if ( (args->annots & PRINT_NOVELGT) && !binary_search(gt, args->control_gts, args->ncontrol_gts) )
383         {
384             if ( args->case_gts_smpl.l ) kputc(',', &args->case_gts_smpl);
385             kputs(smpl, &args->case_gts_smpl);
386         }
387     }
388     if ( !has_gt && args->ncase_smpl )
389     {
390         // all are missing
391         args->nskipped++;
392         return -1;
393     }
394 
395     if ( args->max_AC )
396     {
397         if ( nals[0]+nals[2] > nals[1]+nals[3] )
398         {
399             if ( nals[1]+nals[3] <= args->max_AC )
400                 for (i=0; i<4; i++) args->nals[i] += nals[i];
401         }
402         else
403         {
404             if ( nals[0]+nals[2] <= args->max_AC )
405             {
406                 args->nals[0] += nals[1];
407                 args->nals[1] += nals[0];
408                 args->nals[2] += nals[3];
409                 args->nals[3] += nals[2];
410             }
411         }
412     }
413 
414     float vals[2];
415     if ( (args->annots & PRINT_PASSOC) && args->ncontrol_smpl && args->ncase_smpl )
416     {
417         double left, right, fisher;
418         kt_fisher_exact(nals[0],nals[1],nals[2],nals[3], &left,&right,&fisher);
419         vals[0] = fisher;
420         bcf_update_info_float(args->hdr_out, rec, "PASSOC", vals, 1);
421     }
422     if ( (args->annots & PRINT_FASSOC) && args->ncontrol_smpl && args->ncase_smpl )
423     {
424         if ( nals[0]+nals[1] ) vals[0] = (float)nals[1]/(nals[0]+nals[1]);
425         else bcf_float_set_missing(vals[0]);
426         if ( nals[2]+nals[3] ) vals[1] = (float)nals[3]/(nals[2]+nals[3]);
427         else bcf_float_set_missing(vals[1]);
428         bcf_update_info_float(args->hdr_out, rec, "FASSOC", vals, 2);
429     }
430     if ( args->annots & PRINT_NASSOC )
431         bcf_update_info_int32(args->hdr_out, rec, "NASSOC", nals, 4);
432 
433     if ( args->case_als_smpl.l )
434     {
435         bcf_update_info_string(args->hdr_out, rec, "NOVELAL", args->case_als_smpl.s);
436         args->ncase_al++;
437     }
438     if ( args->case_gts_smpl.l )
439     {
440         bcf_update_info_string(args->hdr_out, rec, "NOVELGT", args->case_gts_smpl.s);
441         args->ncase_gt++;
442     }
443     args->ntested++;
444     return 0;
445 }
446 
run(int argc,char ** argv)447 int run(int argc, char **argv)
448 {
449     args_t *args = (args_t*) calloc(1,sizeof(args_t));
450     args->argc   = argc; args->argv = argv;
451     args->output_fname = "-";
452     args->annots_str = "PASSOC,FASSOC";
453     args->regions_overlap = 1;
454     args->targets_overlap = 0;
455     args->clevel = -1;
456     static struct option loptions[] =
457     {
458         {"max-allele-freq",required_argument,0,'f'},
459         {"annots",required_argument,0,'a'},
460         {"force-samples",no_argument,0,1},
461         {"bg-samples",required_argument,0,'0'},     // renamed to --control-samples, leaving it in for backward compatibility
462         {"control-samples",required_argument,0,'0'},
463         {"novel-samples",required_argument,0,'1'},  // renamed to --case-samples, leaving it in for backward compatibility
464         {"case-samples",required_argument,0,'1'},
465         {"include",required_argument,0,'i'},
466         {"exclude",required_argument,0,'e'},
467         {"output",required_argument,NULL,'o'},
468         {"output-type",required_argument,NULL,'O'},
469         {"regions",1,0,'r'},
470         {"regions-file",1,0,'R'},
471         {"regions-overlap",required_argument,NULL,3},
472         {"targets",1,0,'t'},
473         {"targets-file",1,0,'T'},
474         {"targets-overlap",required_argument,NULL,4},
475         {NULL,0,NULL,0}
476     };
477     int c;
478     char *tmp;
479     while ((c = getopt_long(argc, argv, "O:o:i:e:r:R:t:T:0:1:a:f:",loptions,NULL)) >= 0)
480     {
481         switch (c)
482         {
483             case  1 : args->force_samples = 1; break;
484             case 'f': args->max_AC_str = optarg; break;
485             case 'a': args->annots_str = optarg; break;
486             case '0': args->control_samples_str = optarg; break;
487             case '1': args->case_samples_str = optarg; break;
488             case 'e':
489                 if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
490                 args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
491             case 'i':
492                 if ( args->filter_str ) error("Error: only one -i or -e expression can be given, and they cannot be combined\n");
493                 args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
494             case 't': args->targets = optarg; break;
495             case 'T': args->targets = optarg; args->targets_is_file = 1; break;
496             case 'r': args->regions = optarg; break;
497             case 'R': args->regions = optarg; args->regions_is_file = 1; break;
498             case 'o': args->output_fname = optarg; break;
499             case 'O':
500                       switch (optarg[0]) {
501                           case 'b': args->output_type = FT_BCF_GZ; break;
502                           case 'u': args->output_type = FT_BCF; break;
503                           case 'z': args->output_type = FT_VCF_GZ; break;
504                           case 'v': args->output_type = FT_VCF; break;
505                           default:
506                           {
507                               args->clevel = strtol(optarg,&tmp,10);
508                               if ( *tmp || args->clevel<0 || args->clevel>9 ) error("The output type \"%s\" not recognised\n", optarg);
509                           }
510                       };
511                       if ( optarg[1] )
512                       {
513                           args->clevel = strtol(optarg+1,&tmp,10);
514                           if ( *tmp || args->clevel<0 || args->clevel>9 ) error("Could not parse argument: --compression-level %s\n", optarg+1);
515                       }
516                       break;
517             case  3 :
518                 if ( !strcasecmp(optarg,"0") ) args->regions_overlap = 0;
519                 else if ( !strcasecmp(optarg,"1") ) args->regions_overlap = 1;
520                 else if ( !strcasecmp(optarg,"2") ) args->regions_overlap = 2;
521                 else error("Could not parse: --regions-overlap %s\n",optarg);
522                 break;
523             case  4 :
524                 if ( !strcasecmp(optarg,"0") ) args->targets_overlap = 0;
525                 else if ( !strcasecmp(optarg,"1") ) args->targets_overlap = 1;
526                 else if ( !strcasecmp(optarg,"2") ) args->targets_overlap = 2;
527                 else error("Could not parse: --targets-overlap %s\n",optarg);
528                 break;
529             case 'h':
530             case '?':
531             default: error("%s", usage_text()); break;
532         }
533     }
534     if ( optind==argc )
535     {
536         if ( !isatty(fileno((FILE *)stdin)) ) args->fname = "-";  // reading from stdin
537         else { error("%s",usage_text()); }
538     }
539     else if ( optind+1!=argc ) error("%s",usage_text());
540     else args->fname = argv[optind];
541 
542     if ( !args->control_samples_str ) error("Error: missing the -0, --control-samples option\n");
543     if ( !args->case_samples_str ) error("Error: missing the -1, --case-samples option\n");
544 
545     init_data(args);
546 
547     while ( bcf_sr_next_line(args->sr) )
548     {
549         bcf1_t *rec = bcf_sr_get_line(args->sr,0);
550         if ( args->filter )
551         {
552             int pass = filter_test(args->filter, rec, NULL);
553             if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
554             if ( !pass ) continue;
555         }
556         process_record(args, rec);
557         if ( bcf_write(args->out_fh, args->hdr_out, rec)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
558     }
559 
560     fprintf(stderr,"Total/processed/skipped/case_allele/case_gt:\t%d\t%d\t%d\t%d\t%d\n", args->ntotal, args->ntested, args->nskipped, args->ncase_al, args->ncase_gt);
561     if ( args->max_AC )
562     {
563         double val1, val2, fisher;
564         kt_fisher_exact(args->nals[0],args->nals[1],args->nals[2],args->nals[3], &val1,&val2,&fisher);
565         val1 = args->nals[0]+args->nals[1] ? (float)args->nals[1]/(args->nals[0]+args->nals[1]) : 0;
566         val2 = args->nals[2]+args->nals[3] ? (float)args->nals[3]/(args->nals[2]+args->nals[3]) : 0;
567         fprintf(stderr,"max_AC/PASSOC/FASSOC/NASSOC:\t%d\t%e\t%f,%f\t%d,%d,%d,%d\n",args->max_AC,fisher,val1,val2,args->nals[0],args->nals[1],args->nals[2],args->nals[3]);
568     }
569     destroy_data(args);
570 
571     return 0;
572 }
573