1 /*
2 Copyright (C) 2016-2021 Genome Research Ltd.
3
4 Author: Petr Danecek <pd3@sanger.ac.uk>
5
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12
13 The above copyright notice and this permission notice shall be included in
14 all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 THE SOFTWARE.
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <strings.h>
28 #include <getopt.h>
29 #include <stdarg.h>
30 #include <stdint.h>
31 #include <htslib/vcf.h>
32 #include <htslib/synced_bcf_reader.h>
33 #include <htslib/vcfutils.h>
34 #include <inttypes.h>
35 #include <unistd.h>
36 #include "bcftools.h"
37 #include "filter.h"
38
39 // Logic of the filters: include or exclude sites which match the filters?
40 #define FLT_INCLUDE 1
41 #define FLT_EXCLUDE 2
42
43 #define GUESS_GT 1
44 #define GUESS_PL 2
45 #define GUESS_GL 4
46
47 typedef struct
48 {
49 uint64_t ncount;
50 double phap, pdip;
51 }
52 count_t;
53
54 typedef struct
55 {
56 char *chr;
57 uint32_t start, end;
58 count_t *counts; // per-sample counts: counts[isample]
59 }
60 stats_t;
61
62 typedef struct
63 {
64 int argc;
65 char **argv, *af_tag;
66 double af_dflt;
67 stats_t stats;
68 filter_t *filter;
69 char *filter_str;
70 int filter_logic; // include or exclude sites which match the filters? One of FLT_INCLUDE/FLT_EXCLUDE
71 const uint8_t *smpl_pass;
72 int nsample, verbose, tag, include_indels;
73 int *counts, ncounts; // number of observed GTs with given ploidy, used when -g is not given
74 double *tmpf, *pl2p, gt_err_prob;
75 float *af;
76 int maf;
77 int32_t *arr, narr, nfarr;
78 float *farr;
79 bcf_srs_t *sr;
80 bcf_hdr_t *hdr;
81 }
82 args_t;
83
about(void)84 const char *about(void)
85 {
86 return "Determine sample sex by checking genotype likelihoods in haploid regions.\n";
87 }
88
usage_text(void)89 static const char *usage_text(void)
90 {
91 return
92 "\n"
93 "About: Determine sample sex by checking genotype likelihoods (GL,PL) or genotypes (GT)\n"
94 " in the non-PAR region of chrX. The HWE is assumed, so given the alternate allele\n"
95 " frequency fA and the genotype likelihoods pRR,pRA,pAA, the probabilities are\n"
96 " calculated as\n"
97 " P(dip) = pRR*(1-fA)^2 + pAA*fA^2 + 2*pRA*(1-fA)*fA\n"
98 " P(hap) = pRR*(1-fA) + pAA*fA\n"
99 " When genotype likelihoods are not available, the -e option is used to account\n"
100 " for genotyping errors with -t GT. The alternate allele frequency fA is estimated\n"
101 " directly from the data (the default) or can be provided by an INFO tag.\n"
102 " The results can be visualized using the accompanied guess-ploidy.py script.\n"
103 " Note that this plugin is intended to replace the former vcf2sex plugin.\n"
104 "\n"
105 "Usage: bcftools +guess-ploidy <file.vcf.gz> [Plugin Options]\n"
106 "Plugin options:\n"
107 " --AF-dflt <float> the default alternate allele frequency [0.5]\n"
108 " --AF-tag <TAG> use TAG for allele frequency\n"
109 " -e, --error-rate <float> probability of GT being wrong (with -t GT) [1e-3]\n"
110 " --exclude <expr> exclude sites for which the expression is true\n"
111 " -i, --include-indels do not skip indel sites\n"
112 " --include <expr> include only sites for which the expression is true\n"
113 " -g, --genome <str> shortcut to select nonPAR region for common genomes b37|hg19|b38|hg38\n"
114 " -r, --regions <chr:beg-end> restrict to comma-separated list of regions\n"
115 " -R, --regions-file <file> restrict to regions listed in a file\n"
116 " -t, --tag <tag> genotype or genotype likelihoods: GT, PL, GL [PL]\n"
117 " -v, --verbose verbose output (specify twice to increase verbosity)\n"
118 "\n"
119 "Region shortcuts:\n"
120 " b37 .. -r X:2699521-154931043 # GRCh37 no-chr prefix\n"
121 " b38 .. -r X:2781480-155701381 # GRCh38 no-chr prefix\n"
122 " hg19 .. -r chrX:2699521-154931043 # GRCh37 chr prefix\n"
123 " hg38 .. -r chrX:2781480-155701381 # GRCh38 chr prefix\n"
124 "\n"
125 "Examples:\n"
126 " bcftools +guess-ploidy -g b37 in.vcf.gz\n"
127 " bcftools +guess-ploidy in.vcf.gz -t GL -r chrX:2699521-154931043\n"
128 " bcftools view file.vcf.gz -r chrX:2699521-154931043 | bcftools +guess-ploidy\n"
129 " bcftools +guess-ploidy in.bcf -v > ploidy.txt && guess-ploidy.py ploidy.txt img\n"
130 "\n";
131 }
132
smpl_pass(args_t * args,int ismpl)133 static inline int smpl_pass(args_t *args, int ismpl)
134 {
135 if ( !args->smpl_pass ) return 1;
136 int pass = args->smpl_pass[ismpl];
137 if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
138 if ( pass ) return 1;
139 return 0;
140 }
141
process_region_guess(args_t * args)142 void process_region_guess(args_t *args)
143 {
144 while ( bcf_sr_next_line(args->sr) )
145 {
146 bcf1_t *rec = bcf_sr_get_line(args->sr,0);
147 if ( rec->n_allele==1 ) continue;
148 if ( !args->include_indels && !(bcf_get_variant_types(rec)&VCF_SNP) ) continue;
149
150 if ( args->filter )
151 {
152 int pass = filter_test(args->filter, rec, &args->smpl_pass);
153 if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
154 if ( !args->smpl_pass && !pass ) continue; // site-level filtering, not per-sample filtering
155 }
156
157 double freq[2] = {0,0}, sum;
158 int ismpl,i;
159 if ( args->tag & GUESS_GT ) // use GTs to guess the ploidy, considering only one ALT
160 {
161 int ngt = bcf_get_genotypes(args->hdr,rec,&args->arr,&args->narr);
162 if ( ngt<=0 ) continue;
163 ngt /= args->nsample;
164 for (ismpl=0; ismpl<args->nsample; ismpl++)
165 {
166 if ( !smpl_pass(args,ismpl) ) continue;
167 int32_t *ptr = args->arr + ismpl*ngt;
168 double *tmp = args->tmpf + ismpl*3;
169
170 if ( ptr[0]==bcf_gt_missing )
171 {
172 tmp[0] = -1;
173 continue;
174 }
175 if ( ptr[1]==bcf_int32_vector_end )
176 {
177 if ( bcf_gt_allele(ptr[0])==0 ) // haploid R
178 {
179 tmp[0] = 1 - 2*args->gt_err_prob;
180 tmp[1] = tmp[2] = args->gt_err_prob;
181 }
182 else // haploid A
183 {
184 tmp[0] = tmp[1] = args->gt_err_prob;
185 tmp[2] = 1 - 2*args->gt_err_prob;
186 }
187 continue;
188 }
189 if ( bcf_gt_allele(ptr[0])==0 && bcf_gt_allele(ptr[1])==0 ) // RR
190 {
191 tmp[0] = 1 - 2*args->gt_err_prob;
192 tmp[1] = tmp[2] = args->gt_err_prob;
193 }
194 else if ( bcf_gt_allele(ptr[0])==bcf_gt_allele(ptr[1]) ) // AA
195 {
196 tmp[0] = tmp[1] = args->gt_err_prob;
197 tmp[2] = 1 - 2*args->gt_err_prob;
198 }
199 else // RA or hetAA, treating as RA
200 {
201 tmp[1] = 1 - 2*args->gt_err_prob;
202 tmp[0] = tmp[2] = args->gt_err_prob;
203 }
204 freq[0] += 2*tmp[0]+tmp[1];
205 freq[1] += tmp[1]+2*tmp[2];
206 }
207 }
208 else if ( args->tag & GUESS_PL ) // use PL guess the ploidy, restrict to first ALT allele
209 {
210 int npl = bcf_get_format_int32(args->hdr,rec,"PL",&args->arr,&args->narr);
211 if ( npl<=0 ) continue;
212 npl /= args->nsample;
213 int ndip_gt = rec->n_allele*(rec->n_allele+1)/2;
214 if ( npl==ndip_gt ) // diploid
215 {
216 for (ismpl=0; ismpl<args->nsample; ismpl++)
217 {
218 if ( !smpl_pass(args,ismpl) ) continue;
219 int32_t *ptr = args->arr + ismpl*npl;
220 double *tmp = args->tmpf + ismpl*3;
221
222 // restrict to first ALT
223 if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing || ptr[2]==bcf_int32_missing )
224 {
225 tmp[0] = -1;
226 continue;
227 }
228 if ( ptr[0]==ptr[1] && ptr[0]==ptr[2] ) // non-informative
229 {
230 tmp[0] = -1;
231 continue;
232 }
233 if ( ptr[2]==bcf_int32_vector_end )
234 {
235 tmp[0] = (ptr[0]<0 || ptr[0]>=256) ? args->pl2p[255] : args->pl2p[ptr[0]];
236 tmp[1] = args->pl2p[255];
237 tmp[2] = (ptr[1]<0 || ptr[1]>=256) ? args->pl2p[255] : args->pl2p[ptr[1]];
238 }
239 else
240 for (i=0; i<3; i++)
241 tmp[i] = (ptr[i]<0 || ptr[i]>=256) ? args->pl2p[255] : args->pl2p[ptr[i]];
242
243 sum = 0;
244 for (i=0; i<3; i++) sum += tmp[i];
245 for (i=0; i<3; i++) tmp[i] /= sum;
246
247 if ( ptr[2]==bcf_int32_vector_end )
248 {
249 freq[0] += tmp[0];
250 freq[1] += tmp[2];
251 }
252 else
253 {
254 freq[0] += 2*tmp[0]+tmp[1];
255 freq[1] += tmp[1]+2*tmp[2];
256 }
257 }
258 }
259 else if ( npl==rec->n_allele ) // all samples haploid
260 {
261 for (ismpl=0; ismpl<args->nsample; ismpl++)
262 {
263 if ( !smpl_pass(args,ismpl) ) continue;
264 int32_t *ptr = args->arr + ismpl*npl;
265 double *tmp = args->tmpf + ismpl*3;
266
267 // restrict to first ALT
268 if ( ptr[0]==bcf_int32_missing || ptr[1]==bcf_int32_missing )
269 {
270 tmp[0] = -1;
271 continue;
272 }
273 tmp[0] = (ptr[0]<0 || ptr[0]>=256) ? args->pl2p[255] : args->pl2p[ptr[0]];
274 tmp[1] = args->pl2p[255];
275 tmp[2] = (ptr[1]<0 || ptr[1]>=256) ? args->pl2p[255] : args->pl2p[ptr[1]];
276
277 sum = 0;
278 for (i=0; i<3; i++) sum += tmp[i];
279 for (i=0; i<3; i++) tmp[i] /= sum;
280
281 freq[0] += tmp[0];
282 freq[1] += tmp[2];
283 }
284 }
285 else
286 continue; // neither diploid nor haploid
287 }
288 else // use GL
289 {
290 int ngl = bcf_get_format_float(args->hdr,rec,"GL",&args->farr,&args->nfarr);
291 if ( ngl<=0 ) continue;
292 ngl /= args->nsample;
293 int ndip_gt = rec->n_allele*(rec->n_allele+1)/2;
294 if ( ngl==ndip_gt ) // diploid
295 {
296 for (ismpl=0; ismpl<args->nsample; ismpl++)
297 {
298 if ( !smpl_pass(args,ismpl) ) continue;
299 float *ptr = args->farr + ismpl*ngl;
300 double *tmp = args->tmpf + ismpl*3;
301
302 // restrict to first ALT
303 if ( bcf_float_is_missing(ptr[0]) || bcf_float_is_missing(ptr[1]) || bcf_float_is_missing(ptr[2]) )
304 {
305 tmp[0] = -1;
306 continue;
307 }
308 if ( ptr[0]==ptr[1] && ptr[0]==ptr[2] ) // non-informative
309 {
310 tmp[0] = -1;
311 continue;
312 }
313 if ( bcf_float_is_vector_end(ptr[2]) )
314 {
315 tmp[0] = pow(10.,ptr[0]);
316 tmp[1] = 1e-26; // arbitrary small value for a het
317 tmp[2] = pow(10.,ptr[1]);
318 }
319 else
320 for (i=0; i<3; i++)
321 tmp[i] = pow(10.,ptr[i]);
322
323 sum = 0;
324 for (i=0; i<3; i++) sum += tmp[i];
325 for (i=0; i<3; i++) tmp[i] /= sum;
326
327 if ( bcf_float_is_vector_end(ptr[2]) )
328 {
329 freq[0] += tmp[0];
330 freq[1] += tmp[2];
331 }
332 else
333 {
334 freq[0] += 2*tmp[0]+tmp[1];
335 freq[1] += tmp[1]+2*tmp[2];
336 }
337 }
338 }
339 else if ( ngl==rec->n_allele ) // all samples haploid
340 {
341 for (ismpl=0; ismpl<args->nsample; ismpl++)
342 {
343 if ( !smpl_pass(args,ismpl) ) continue;
344 float *ptr = args->farr + ismpl*ngl;
345 double *tmp = args->tmpf + ismpl*3;
346
347 // restrict to first ALT
348 if ( bcf_float_is_missing(ptr[0]) || bcf_float_is_missing(ptr[1]) )
349 {
350 tmp[0] = -1;
351 continue;
352 }
353 tmp[0] = pow(10.,ptr[0]);
354 tmp[1] = 1e-26;
355 tmp[2] = pow(10.,ptr[1]);
356
357 sum = 0;
358 for (i=0; i<3; i++) sum += tmp[i];
359 for (i=0; i<3; i++) tmp[i] /= sum;
360
361 freq[0] += tmp[0];
362 freq[1] += tmp[2];
363 }
364 }
365 else
366 continue; // neither diploid nor haploid
367 }
368 if ( args->af_tag )
369 {
370 int ret = bcf_get_info_float(args->hdr,rec,args->af_tag,&args->af, &args->maf);
371 if ( ret>0 ) { freq[0] = 1 - args->af[0]; freq[1] = args->af[0]; }
372 }
373
374 if ( !freq[0] && !freq[1] ) { freq[0] = 1 - args->af_dflt; freq[1] = args->af_dflt; }
375 sum = freq[0] + freq[1];
376 freq[0] /= sum;
377 freq[1] /= sum;
378 for (ismpl=0; ismpl<args->nsample; ismpl++)
379 {
380 if ( !smpl_pass(args,ismpl) ) continue;
381 count_t *counts = &args->stats.counts[ismpl];
382 double *tmp = args->tmpf + ismpl*3;
383 if ( tmp[0] < 0 ) continue;
384 double phap = freq[0]*tmp[0] + freq[1]*tmp[2];
385 double pdip = freq[0]*freq[0]*tmp[0] + 2*freq[0]*freq[1]*tmp[1] + freq[1]*freq[1]*tmp[2];
386 counts->phap += log(phap);
387 counts->pdip += log(pdip);
388 counts->ncount++;
389 if ( args->verbose>1 )
390 printf("DBG\t%s\t%"PRId64"\t%s\t%e\t%e\t%e\t%e\t%e\t%e\n", bcf_seqname(args->hdr,rec),(int64_t) rec->pos+1,bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,ismpl),
391 freq[1],tmp[0],tmp[1],tmp[2],phap,pdip);
392 }
393 }
394 }
395
run(int argc,char ** argv)396 int run(int argc, char **argv)
397 {
398 args_t *args = (args_t*) calloc(1,sizeof(args_t));
399 args->tag = GUESS_PL;
400 args->argc = argc; args->argv = argv;
401 args->gt_err_prob = 1e-3;
402 args->af_dflt = 0.5;
403 char *region = NULL;
404 int region_is_file = 0;
405 static struct option loptions[] =
406 {
407 {"AF-tag",required_argument,NULL,0},
408 {"AF-dflt",required_argument,NULL,1},
409 {"exclude",required_argument,NULL,2},
410 {"include",required_argument,NULL,3},
411 {"verbose",no_argument,NULL,'v'},
412 {"include-indels",no_argument,NULL,'i'},
413 {"error-rate",required_argument,NULL,'e'},
414 {"tag",required_argument,NULL,'t'},
415 {"genome",required_argument,NULL,'g'},
416 {"regions",required_argument,NULL,'r'},
417 {"regions-file",required_argument,NULL,'R'},
418 {"background",required_argument,NULL,'b'},
419 {NULL,0,NULL,0}
420 };
421 int c;
422 char *tmp;
423 while ((c = getopt_long(argc, argv, "vr:R:t:e:ig:",loptions,NULL)) >= 0)
424 {
425 switch (c)
426 {
427 case 0: args->af_tag = optarg; break;
428 case 1:
429 args->af_dflt = strtod(optarg,&tmp);
430 if ( *tmp ) error("Could not parse: --AF-dflt %s\n", optarg);
431 break;
432 case 2:
433 if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
434 args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
435 case 3:
436 if ( args->filter_str ) error("Error: only one --include or --exclude expression can be given, and they cannot be combined\n");
437 args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
438 case 'i': args->include_indels = 1; break;
439 case 'e':
440 args->gt_err_prob = strtod(optarg,&tmp);
441 if ( *tmp ) error("Could not parse: -e %s\n", optarg);
442 if ( args->gt_err_prob<0 || args->gt_err_prob>1 ) error("Expected value from the interval [0,1]: -e %s\n", optarg);
443 break;
444 case 'g':
445 if ( !strcasecmp(optarg,"b37") ) region = "X:2699521-154931043";
446 else if ( !strcasecmp(optarg,"b38") ) region = "X:2781480-155701381";
447 else if ( !strcasecmp(optarg,"hg19") ) region = "chrX:2699521-154931043";
448 else if ( !strcasecmp(optarg,"hg38") ) region = "chrX:2781480-155701381";
449 else error("The argument not recognised, expected --genome b37, b38, hg19 or hg38: %s\n", optarg);
450 break;
451 case 'R': region_is_file = 1; // fall-through
452 case 'r': region = optarg; break;
453 case 'v': args->verbose++; break;
454 case 't':
455 if ( !strcasecmp(optarg,"GT") ) args->tag = GUESS_GT;
456 else if ( !strcasecmp(optarg,"PL") ) args->tag = GUESS_PL;
457 else if ( !strcasecmp(optarg,"GL") ) args->tag = GUESS_GL;
458 else error("The argument not recognised, expected --tag GT, PL or GL: %s\n", optarg);
459 break;
460 case 'h':
461 case '?':
462 default: error("%s", usage_text()); break;
463 }
464 }
465 if ( args->filter_logic == (FLT_EXCLUDE|FLT_INCLUDE) ) error("Only one of --include or --exclude can be given.\n");
466
467 char *fname = NULL;
468 if ( optind==argc )
469 {
470 if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin
471 else { error("%s",usage_text()); }
472 }
473 else if ( optind+1!=argc ) error("%s",usage_text());
474 else fname = argv[optind];
475
476 args->sr = bcf_sr_init();
477 if ( strcmp("-",fname) )
478 {
479 if ( region )
480 {
481 args->sr->require_index = 1;
482 if ( bcf_sr_set_regions(args->sr, region, region_is_file)<0 )
483 error("Failed to read the regions: %s\n",region);
484 }
485 }
486 else
487 {
488 if ( region )
489 {
490 if ( bcf_sr_set_targets(args->sr, region, region_is_file, 0)<0 )
491 error("Failed to read the targets: %s\n",region);
492 }
493 }
494 if ( !bcf_sr_add_reader(args->sr,fname) ) error("Error: %s\n", bcf_sr_strerror(args->sr->errnum));
495 args->hdr = args->sr->readers[0].header;
496 args->nsample = bcf_hdr_nsamples(args->hdr);
497 args->stats.counts = (count_t*) calloc(args->nsample,sizeof(count_t));
498
499 if ( args->filter_str )
500 args->filter = filter_init(args->hdr, args->filter_str);
501
502 if ( args->af_tag && !bcf_hdr_idinfo_exists(args->hdr,BCF_HL_INFO,bcf_hdr_id2int(args->hdr,BCF_DT_ID,args->af_tag)) )
503 error("No such INFO tag: %s\n", args->af_tag);
504
505 if ( args->tag&GUESS_PL && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "PL")<0 )
506 {
507 fprintf(stderr, "Warning: PL tag not found in header, switching to GL\n");
508 args->tag = GUESS_GL;
509 }
510
511 if ( args->tag&GUESS_GL && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GL")<0 )
512 {
513 fprintf(stderr, "Warning: GL tag not found in header, switching to GT\n");
514 args->tag = GUESS_GT;
515 }
516
517 if ( args->tag&GUESS_GT && bcf_hdr_id2int(args->hdr, BCF_DT_ID, "GT")<0 )
518 error("Error: GT tag not found in header\n");
519
520 int i;
521 if ( args->tag&GUESS_PL )
522 {
523 args->pl2p = (double*) calloc(256,sizeof(double));
524 for (i=0; i<256; i++) args->pl2p[i] = pow(10., -i/10.);
525 }
526 if ( args->tag&GUESS_PL || args->tag&GUESS_GL || args->tag&GUESS_GT )
527 args->tmpf = (double*) malloc(sizeof(*args->tmpf)*3*args->nsample);
528
529 if ( args->verbose )
530 {
531 printf("# This file was produced by: bcftools +guess-ploidy(%s+htslib-%s)\n", bcftools_version(),hts_version());
532 printf("# The command line was:\tbcftools +%s", args->argv[0]);
533 for (i=1; i<args->argc; i++)
534 printf(" %s",args->argv[i]);
535 printf("\n");
536 printf("# [1]SEX\t[2]Sample\t[3]Predicted sex\t[4]log P(Haploid)/nSites\t[5]log P(Diploid)/nSites\t[6]nSites\t[7]Score: F < 0 < M ($4-$5)\n");
537 if ( args->verbose>1 )
538 printf("# [1]DBG\t[2]Chr\t[3]Pos\t[4]Sample\t[5]AF\t[6]pRR\t[7]pRA\t[8]pAA\t[9]P(Haploid)\t[10]P(Diploid)\n");
539 }
540
541 process_region_guess(args);
542
543 for (i=0; i<args->nsample; i++)
544 {
545 double phap = args->stats.counts[i].ncount ? args->stats.counts[i].phap / args->stats.counts[i].ncount : 0.5;
546 double pdip = args->stats.counts[i].ncount ? args->stats.counts[i].pdip / args->stats.counts[i].ncount : 0.5;
547 char predicted_sex = 'U';
548 if (phap>pdip) predicted_sex = 'M';
549 else if (phap<pdip) predicted_sex = 'F';
550 if ( args->verbose )
551 {
552 printf("SEX\t%s\t%c\t%f\t%f\t%"PRId64"\t%f\n", args->hdr->samples[i],predicted_sex,
553 phap,pdip,args->stats.counts[i].ncount,phap-pdip);
554 }
555 else
556 printf("%s\t%c\n", args->hdr->samples[i],predicted_sex);
557 }
558
559 if ( args->filter )
560 filter_destroy(args->filter);
561
562 bcf_sr_destroy(args->sr);
563 free(args->pl2p);
564 free(args->tmpf);
565 free(args->counts);
566 free(args->stats.counts);
567 free(args->arr);
568 free(args->farr);
569 free(args->af);
570 free(args);
571 return 0;
572 }
573