1 /*
2     Copyright (C) 2016-2021 Genome Research Ltd.
3 
4     Author: Petr Danecek <pd3@sanger.ac.uk>
5 
6     Permission is hereby granted, free of charge, to any person obtaining a copy
7     of this software and associated documentation files (the "Software"), to deal
8     in the Software without restriction, including without limitation the rights
9     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10     copies of the Software, and to permit persons to whom the Software is
11     furnished to do so, subject to the following conditions:
12 
13     The above copyright notice and this permission notice shall be included in
14     all copies or substantial portions of the Software.
15 
16     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22     THE SOFTWARE.
23 */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <strings.h>
28 #include <getopt.h>
29 #include <stdarg.h>
30 #include <stdint.h>
31 #include <htslib/vcf.h>
32 #include <htslib/synced_bcf_reader.h>
33 #include <htslib/vcfutils.h>
34 #include <inttypes.h>
35 #include <unistd.h>
36 #include "bcftools.h"
37 #include "filter.h"
38 
39 // Logic of the filters: include or exclude sites which match the filters?
40 #define FLT_INCLUDE 1
41 #define FLT_EXCLUDE 2
42 
43 #define GUESS_GT 1
44 #define GUESS_PL 2
45 #define GUESS_GL 4
46 
47 typedef struct
48 {
49     uint64_t ncount;
50     double phap, pdip;
51 }
52 count_t;
53 
54 typedef struct
55 {
56     char *chr;
57     uint32_t start, end;
58     count_t *counts;    // per-sample counts: counts[isample]
59 }
60 stats_t;
61 
62 typedef struct
63 {
64     int argc;
65     char **argv, *af_tag;
66     double af_dflt;
67     stats_t stats;
68     filter_t *filter;
69     char *filter_str;
70     int filter_logic;   // include or exclude sites which match the filters? One of FLT_INCLUDE/FLT_EXCLUDE
71     const uint8_t *smpl_pass;
72     int nsample, verbose, tag, include_indels;
73     int *counts, ncounts;       // number of observed GTs with given ploidy, used when -g is not given
74     double *tmpf, *pl2p, gt_err_prob;
75     float *af;
76     int maf;
77     int32_t *arr, narr, nfarr;
78     float *farr;
79     bcf_srs_t *sr;
80     bcf_hdr_t *hdr;
81 }
82 args_t;
83 
about(void)84 const char *about(void)
85 {
86     return "Determine sample sex by checking genotype likelihoods in haploid regions.\n";
87 }
88 
usage_text(void)89 static const char *usage_text(void)
90 {
91     return
92         "\n"
93         "About: Determine sample sex by checking genotype likelihoods (GL,PL) or genotypes (GT)\n"
94         "       in the non-PAR region of chrX. The HWE is assumed, so given the alternate allele\n"
95         "       frequency fA and the genotype likelihoods pRR,pRA,pAA, the probabilities are\n"
96         "       calculated as\n"
97         "           P(dip) = pRR*(1-fA)^2 + pAA*fA^2 + 2*pRA*(1-fA)*fA\n"
98         "           P(hap) = pRR*(1-fA) + pAA*fA\n"
99         "       When genotype likelihoods are not available, the -e option is used to account\n"
100         "       for genotyping errors with -t GT. The alternate allele frequency fA is estimated\n"
101         "       directly from the data (the default) or can be provided by an INFO tag.\n"
102         "       The results can be visualized using the accompanied guess-ploidy.py script.\n"
103         "       Note that this plugin is intended to replace the former vcf2sex plugin.\n"
104         "\n"
105         "Usage: bcftools +guess-ploidy <file.vcf.gz> [Plugin Options]\n"
106         "Plugin options:\n"
107         "       --AF-dflt <float>           the default alternate allele frequency [0.5]\n"
108         "       --AF-tag <TAG>              use TAG for allele frequency\n"
109         "   -e, --error-rate <float>        probability of GT being wrong (with -t GT) [1e-3]\n"
110         "       --exclude <expr>            exclude sites for which the expression is true\n"
111         "   -i, --include-indels            do not skip indel sites\n"
112         "       --include <expr>            include only sites for which the expression is true\n"
113         "   -g, --genome <str>              shortcut to select nonPAR region for common genomes b37|hg19|b38|hg38\n"
114         "   -r, --regions <chr:beg-end>     restrict to comma-separated list of regions\n"
115         "   -R, --regions-file <file>       restrict to regions listed in a file\n"
116         "   -t, --tag <tag>                 genotype or genotype likelihoods: GT, PL, GL [PL]\n"
117         "   -v, --verbose                   verbose output (specify twice to increase verbosity)\n"
118         "\n"
119         "Region shortcuts:\n"
120         "   b37  .. -r X:2699521-154931043      # GRCh37 no-chr prefix\n"
121         "   b38  .. -r X:2781480-155701381      # GRCh38 no-chr prefix\n"
122         "   hg19 .. -r chrX:2699521-154931043   # GRCh37 chr prefix\n"
123         "   hg38 .. -r chrX:2781480-155701381   # GRCh38 chr prefix\n"
124         "\n"
125         "Examples:\n"
126         "   bcftools +guess-ploidy -g b37 in.vcf.gz\n"
127         "   bcftools +guess-ploidy in.vcf.gz -t GL -r chrX:2699521-154931043\n"
128         "   bcftools view file.vcf.gz -r chrX:2699521-154931043 | bcftools +guess-ploidy\n"
129         "   bcftools +guess-ploidy in.bcf -v > ploidy.txt && guess-ploidy.py ploidy.txt img\n"
130         "\n";
131 }
132 
smpl_pass(args_t * args,int ismpl)133 static inline int smpl_pass(args_t *args, int ismpl)
134 {
135     if ( !args->smpl_pass ) return 1;
136     int pass = args->smpl_pass[ismpl];
137     if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
138     if ( pass ) return 1;
139     return 0;
140 }
141 
process_region_guess(args_t * args)142 void process_region_guess(args_t *args)
143 {
144     while ( bcf_sr_next_line(args->sr) )
145     {
146         bcf1_t *rec = bcf_sr_get_line(args->sr,0);
147         if ( rec->n_allele==1 ) continue;
148         if ( !args->include_indels && !(bcf_get_variant_types(rec)&VCF_SNP) ) continue;
149 
150         if ( args->filter )
151         {
152             int pass = filter_test(args->filter, rec, &args->smpl_pass);
153             if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
154             if ( !args->smpl_pass && !pass ) continue;     // site-level filtering, not per-sample filtering
155         }
156 
157         double freq[2] = {0,0}, sum;
158         int ismpl,i;
159         if ( args->tag & GUESS_GT )   // use GTs to guess the ploidy, considering only one ALT
160         {
161             int ngt = bcf_get_genotypes(args->hdr,rec,&args->arr,&args->narr);
162             if ( ngt<=0 ) continue;
163             ngt /= args->nsample;
164             for (ismpl=0; ismpl<args->nsample; ismpl++)
165             {
166                 if ( !smpl_pass(args,ismpl) ) continue;
167                 int32_t *ptr = args->arr + ismpl*ngt;
168                 double *tmp = args->tmpf + ismpl*3;
169 
170                 if ( ptr[0]==bcf_gt_missing )
171                 {
172                     tmp[0] = -1;
173                     continue;
174                 }
175                 if ( ptr[1]==bcf_int32_vector_end )
176                 {
177                     if ( bcf_gt_allele(ptr[0])==0 ) // haploid R
178                     {
179                         tmp[0] = 1 - 2*args->gt_err_prob;
180                         tmp[1] = tmp[2] = args->gt_err_prob;
181                     }
182                     else    // haploid A
183                     {
184                         tmp[0] = tmp[1] = args->gt_err_prob;
185                         tmp[2] = 1 - 2*args->gt_err_prob;
186                     }
187                     continue;
188                 }
189                 if ( bcf_gt_allele(ptr[0])==0 && bcf_gt_allele(ptr[1])==0 ) // RR
190                 {
191                     tmp[0] = 1 - 2*args->gt_err_prob;
192                     tmp[1] = tmp[2] = args->gt_err_prob;
193                 }
194                 else if ( bcf_gt_allele(ptr[0])==bcf_gt_allele(ptr[1]) ) // AA
195                 {
196                     tmp[0] = tmp[1] = args->gt_err_prob;
197                     tmp[2] = 1 - 2*args->gt_err_prob;
198                 }
199                 else  // RA or hetAA, treating as RA
200                 {
201                     tmp[1] = 1 - 2*args->gt_err_prob;
202                     tmp[0] = tmp[2] = args->gt_err_prob;
203                 }
204                 freq[0] += 2*tmp[0]+tmp[1];
205                 freq[1] += tmp[1]+2*tmp[2];
206             }
207         }
208         else if ( args->tag & GUESS_PL )    // use PL guess the ploidy, restrict to first ALT allele
209         {
210             int npl = bcf_get_format_int32(args->hdr,rec,"PL",&args->arr,&args->narr);
211             if ( npl<=0 ) continue;
212             npl /= args->nsample;
213             int ndip_gt = rec->n_allele*(rec->n_allele+1)/2;
214             if ( npl==ndip_gt )             // diploid
215             {
216                 for (ismpl=0; ismpl<args->nsample; ismpl++)
217                 {
218                     if ( !smpl_pass(args,ismpl) ) continue;
219                     int32_t *ptr = args->arr + ismpl*npl;
220                     double *tmp = args->tmpf + ismpl*3;
221 
222                     // restrict to first ALT
223                     if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing || ptr[2]==bcf_int32_missing )
224                     {
225                         tmp[0] = -1;
226                         continue;
227                     }
228                     if ( ptr[0]==ptr[1] && ptr[0]==ptr[2] ) // non-informative
229                     {
230                         tmp[0] = -1;
231                         continue;
232                     }
233                     if ( ptr[2]==bcf_int32_vector_end )
234                     {
235                         tmp[0] = (ptr[0]<0 || ptr[0]>=256) ? args->pl2p[255] : args->pl2p[ptr[0]];
236                         tmp[1] = args->pl2p[255];
237                         tmp[2] = (ptr[1]<0 || ptr[1]>=256) ? args->pl2p[255] : args->pl2p[ptr[1]];
238                     }
239                     else
240                         for (i=0; i<3; i++)
241                             tmp[i] = (ptr[i]<0 || ptr[i]>=256) ? args->pl2p[255] : args->pl2p[ptr[i]];
242 
243                     sum = 0;
244                     for (i=0; i<3; i++) sum += tmp[i];
245                     for (i=0; i<3; i++) tmp[i] /= sum;
246 
247                     if ( ptr[2]==bcf_int32_vector_end )
248                     {
249                         freq[0] += tmp[0];
250                         freq[1] += tmp[2];
251                     }
252                     else
253                     {
254                         freq[0] += 2*tmp[0]+tmp[1];
255                         freq[1] += tmp[1]+2*tmp[2];
256                     }
257                 }
258             }
259             else if ( npl==rec->n_allele )  // all samples haploid
260             {
261                 for (ismpl=0; ismpl<args->nsample; ismpl++)
262                 {
263                     if ( !smpl_pass(args,ismpl) ) continue;
264                     int32_t *ptr = args->arr + ismpl*npl;
265                     double *tmp = args->tmpf + ismpl*3;
266 
267                     // restrict to first ALT
268                     if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing )
269                     {
270                         tmp[0] = -1;
271                         continue;
272                     }
273                     tmp[0] = (ptr[0]<0 || ptr[0]>=256) ? args->pl2p[255] : args->pl2p[ptr[0]];
274                     tmp[1] = args->pl2p[255];
275                     tmp[2] = (ptr[1]<0 || ptr[1]>=256) ? args->pl2p[255] : args->pl2p[ptr[1]];
276 
277                     sum = 0;
278                     for (i=0; i<3; i++) sum += tmp[i];
279                     for (i=0; i<3; i++) tmp[i] /= sum;
280 
281                     freq[0] += tmp[0];
282                     freq[1] += tmp[2];
283                 }
284             }
285             else
286                 continue;   // neither diploid nor haploid
287         }
288         else    // use GL
289         {
290             int ngl = bcf_get_format_float(args->hdr,rec,"GL",&args->farr,&args->nfarr);
291             if ( ngl<=0 ) continue;
292             ngl /= args->nsample;
293             int ndip_gt = rec->n_allele*(rec->n_allele+1)/2;
294             if ( ngl==ndip_gt )             // diploid
295             {
296                 for (ismpl=0; ismpl<args->nsample; ismpl++)
297                 {
298                     if ( !smpl_pass(args,ismpl) ) continue;
299                     float *ptr = args->farr + ismpl*ngl;
300                     double *tmp = args->tmpf + ismpl*3;
301 
302                     // restrict to first ALT
303                     if ( bcf_float_is_missing(ptr[0]) || bcf_float_is_missing(ptr[1]) || bcf_float_is_missing(ptr[2]) )
304                     {
305                         tmp[0] = -1;
306                         continue;
307                     }
308                     if ( ptr[0]==ptr[1] && ptr[0]==ptr[2] ) // non-informative
309                     {
310                         tmp[0] = -1;
311                         continue;
312                     }
313                     if ( bcf_float_is_vector_end(ptr[2]) )
314                     {
315                         tmp[0] = pow(10.,ptr[0]);
316                         tmp[1] = 1e-26;             // arbitrary small value for a het
317                         tmp[2] = pow(10.,ptr[1]);
318                     }
319                     else
320                         for (i=0; i<3; i++)
321                             tmp[i] = pow(10.,ptr[i]);
322 
323                     sum = 0;
324                     for (i=0; i<3; i++) sum += tmp[i];
325                     for (i=0; i<3; i++) tmp[i] /= sum;
326 
327                     if ( bcf_float_is_vector_end(ptr[2]) )
328                     {
329                         freq[0] += tmp[0];
330                         freq[1] += tmp[2];
331                     }
332                     else
333                     {
334                         freq[0] += 2*tmp[0]+tmp[1];
335                         freq[1] += tmp[1]+2*tmp[2];
336                     }
337                 }
338             }
339             else if ( ngl==rec->n_allele )  // all samples haploid
340             {
341                 for (ismpl=0; ismpl<args->nsample; ismpl++)
342                 {
343                     if ( !smpl_pass(args,ismpl) ) continue;
344                     float *ptr = args->farr + ismpl*ngl;
345                     double *tmp = args->tmpf + ismpl*3;
346 
347                     // restrict to first ALT
348                     if ( bcf_float_is_missing(ptr[0]) || bcf_float_is_missing(ptr[1]) )
349                     {
350                         tmp[0] = -1;
351                         continue;
352                     }
353                     tmp[0] = pow(10.,ptr[0]);
354                     tmp[1] = 1e-26;
355                     tmp[2] = pow(10.,ptr[1]);
356 
357                     sum = 0;
358                     for (i=0; i<3; i++) sum += tmp[i];
359                     for (i=0; i<3; i++) tmp[i] /= sum;
360 
361                     freq[0] += tmp[0];
362                     freq[1] += tmp[2];
363                 }
364             }
365             else
366                 continue;   // neither diploid nor haploid
367         }
368         if ( args->af_tag )
369         {
370             int ret = bcf_get_info_float(args->hdr,rec,args->af_tag,&args->af, &args->maf);
371             if ( ret>0 ) { freq[0] = 1 - args->af[0]; freq[1] = args->af[0]; }
372         }
373 
374         if ( !freq[0] && !freq[1] ) { freq[0] = 1 - args->af_dflt; freq[1] = args->af_dflt; }
375         sum = freq[0] + freq[1];
376         freq[0] /= sum;
377         freq[1] /= sum;
378         for (ismpl=0; ismpl<args->nsample; ismpl++)
379         {
380             if ( !smpl_pass(args,ismpl) ) continue;
381             count_t *counts = &args->stats.counts[ismpl];
382             double *tmp = args->tmpf + ismpl*3;
383             if ( tmp[0] < 0 ) continue;
384             double phap = freq[0]*tmp[0] + freq[1]*tmp[2];
385             double pdip = freq[0]*freq[0]*tmp[0] + 2*freq[0]*freq[1]*tmp[1] + freq[1]*freq[1]*tmp[2];
386             counts->phap += log(phap);
387             counts->pdip += log(pdip);
388             counts->ncount++;
389             if ( args->verbose>1 )
390                 printf("DBG\t%s\t%"PRId64"\t%s\t%e\t%e\t%e\t%e\t%e\t%e\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1,bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,ismpl),
391                     freq[1],tmp[0],tmp[1],tmp[2],phap,pdip);
392         }
393     }
394 }
395 
run(int argc,char ** argv)396 int run(int argc, char **argv)
397 {
398     args_t *args = (args_t*) calloc(1,sizeof(args_t));
399     args->tag    = GUESS_PL;
400     args->argc   = argc; args->argv = argv;
401     args->gt_err_prob = 1e-3;
402     args->af_dflt = 0.5;
403     char *region  = NULL;
404     int region_is_file = 0;
405     static struct option loptions[] =
406     {
407         {"AF-tag",required_argument,NULL,0},
408         {"AF-dflt",required_argument,NULL,1},
409         {"exclude",required_argument,NULL,2},
410         {"include",required_argument,NULL,3},
411         {"verbose",no_argument,NULL,'v'},
412         {"include-indels",no_argument,NULL,'i'},
413         {"error-rate",required_argument,NULL,'e'},
414         {"tag",required_argument,NULL,'t'},
415         {"genome",required_argument,NULL,'g'},
416         {"regions",required_argument,NULL,'r'},
417         {"regions-file",required_argument,NULL,'R'},
418         {"background",required_argument,NULL,'b'},
419         {NULL,0,NULL,0}
420     };
421     int c;
422     char *tmp;
423     while ((c = getopt_long(argc, argv, "vr:R:t:e:ig:",loptions,NULL)) >= 0)
424     {
425         switch (c)
426         {
427             case 0: args->af_tag = optarg; break;
428             case 1:
429                     args->af_dflt = strtod(optarg,&tmp);
430                     if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
431                     break;
432             case 2:
433                 if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
434                 args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
435             case 3:
436                 if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
437                 args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
438             case 'i': args->include_indels = 1; break;
439             case 'e':
440                 args->gt_err_prob = strtod(optarg,&tmp);
441                 if ( *tmp ) error("Could not parse: -e %s\n", optarg);
442                 if ( args->gt_err_prob<0 || args->gt_err_prob>1 ) error("Expected value from the interval [0,1]: -e %s\n", optarg);
443                 break;
444             case 'g':
445                 if ( !strcasecmp(optarg,"b37") ) region = "X:2699521-154931043";
446                 else if ( !strcasecmp(optarg,"b38") ) region = "X:2781480-155701381";
447                 else if ( !strcasecmp(optarg,"hg19") ) region = "chrX:2699521-154931043";
448                 else if ( !strcasecmp(optarg,"hg38") ) region = "chrX:2781480-155701381";
449                 else error("The argument not recognised, expected --genome b37, b38, hg19 or hg38: %s\n", optarg);
450                 break;
451             case 'R': region_is_file = 1; // fall-through
452             case 'r': region = optarg; break;
453             case 'v': args->verbose++; break;
454             case 't':
455                 if ( !strcasecmp(optarg,"GT") ) args->tag = GUESS_GT;
456                 else if ( !strcasecmp(optarg,"PL") ) args->tag = GUESS_PL;
457                 else if ( !strcasecmp(optarg,"GL") ) args->tag = GUESS_GL;
458                 else error("The argument not recognised, expected --tag GT, PL or GL: %s\n", optarg);
459                 break;
460             case 'h':
461             case '?':
462             default: error("%s", usage_text()); break;
463         }
464     }
465     if ( args->filter_logic == (FLT_EXCLUDE|FLT_INCLUDE) ) error("Only one of --include or --exclude can be given.\n");
466 
467     char *fname = NULL;
468     if ( optind==argc )
469     {
470         if ( !isatty(fileno((FILE *)stdin)) ) fname = "-";  // reading from stdin
471         else { error("%s",usage_text()); }
472     }
473     else if ( optind+1!=argc ) error("%s",usage_text());
474     else fname = argv[optind];
475 
476     args->sr = bcf_sr_init();
477     if ( strcmp("-",fname) )
478     {
479         if ( region )
480         {
481             args->sr->require_index = 1;
482             if ( bcf_sr_set_regions(args->sr, region, region_is_file)<0 )
483                 error("Failed to read the regions: %s\n",region);
484         }
485     }
486     else
487     {
488         if ( region )
489         {
490             if ( bcf_sr_set_targets(args->sr, region, region_is_file, 0)<0 )
491                 error("Failed to read the targets: %s\n",region);
492         }
493     }
494     if ( !bcf_sr_add_reader(args->sr,fname) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
495     args->hdr = args->sr->readers[0].header;
496     args->nsample = bcf_hdr_nsamples(args->hdr);
497     args->stats.counts = (count_t*) calloc(args->nsample,sizeof(count_t));
498 
499     if ( args->filter_str )
500         args->filter = filter_init(args->hdr, args->filter_str);
501 
502     if ( args->af_tag && !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
503         error("No such INFO tag: %s\n", args->af_tag);
504 
505     if ( args->tag&GUESS_PL && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL")<0 )
506     {
507         fprintf(stderr, "Warning: PL tag not found in header, switching to GL\n");
508         args->tag = GUESS_GL;
509     }
510 
511     if ( args->tag&GUESS_GL && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GL")<0 )
512     {
513         fprintf(stderr, "Warning: GL tag not found in header, switching to GT\n");
514         args->tag = GUESS_GT;
515     }
516 
517     if ( args->tag&GUESS_GT && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT")<0 )
518         error("Error: GT tag not found in header\n");
519 
520     int i;
521     if ( args->tag&GUESS_PL )
522     {
523         args->pl2p = (double*) calloc(256,sizeof(double));
524         for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);
525     }
526     if ( args->tag&GUESS_PL || args->tag&GUESS_GL || args->tag&GUESS_GT )
527         args->tmpf = (double*) malloc(sizeof(*args->tmpf)*3*args->nsample);
528 
529     if ( args->verbose )
530     {
531         printf("# This file was produced by: bcftools +guess-ploidy(%s+htslib-%s)\n", bcftools_version(),hts_version());
532         printf("# The command line was:\tbcftools +%s", args->argv[0]);
533         for (i=1; i<args->argc; i++)
534             printf(" %s",args->argv[i]);
535         printf("\n");
536         printf("# [1]SEX\t[2]Sample\t[3]Predicted sex\t[4]log P(Haploid)/nSites\t[5]log P(Diploid)/nSites\t[6]nSites\t[7]Score: F < 0 < M ($4-$5)\n");
537         if ( args->verbose>1 )
538             printf("# [1]DBG\t[2]Chr\t[3]Pos\t[4]Sample\t[5]AF\t[6]pRR\t[7]pRA\t[8]pAA\t[9]P(Haploid)\t[10]P(Diploid)\n");
539     }
540 
541     process_region_guess(args);
542 
543     for (i=0; i<args->nsample; i++)
544     {
545         double phap = args->stats.counts[i].ncount ? args->stats.counts[i].phap / args->stats.counts[i].ncount : 0.5;
546         double pdip = args->stats.counts[i].ncount ? args->stats.counts[i].pdip / args->stats.counts[i].ncount : 0.5;
547         char predicted_sex = 'U';
548         if (phap>pdip) predicted_sex = 'M';
549         else if (phap<pdip) predicted_sex = 'F';
550         if ( args->verbose )
551         {
552             printf("SEX\t%s\t%c\t%f\t%f\t%"PRId64"\t%f\n", args->hdr->samples[i],predicted_sex,
553                     phap,pdip,args->stats.counts[i].ncount,phap-pdip);
554         }
555         else
556             printf("%s\t%c\n", args->hdr->samples[i],predicted_sex);
557     }
558 
559     if ( args->filter )
560         filter_destroy(args->filter);
561 
562     bcf_sr_destroy(args->sr);
563     free(args->pl2p);
564     free(args->tmpf);
565     free(args->counts);
566     free(args->stats.counts);
567     free(args->arr);
568     free(args->farr);
569     free(args->af);
570     free(args);
571     return 0;
572 }
573