1 /* The MIT License
2 
3    Copyright (c) 2015 Genome Research Ltd.
4 
5    Author: Petr Danecek <pd3@sanger.ac.uk>
6 
7    Permission is hereby granted, free of charge, to any person obtaining a copy
8    of this software and associated documentation files (the "Software"), to deal
9    in the Software without restriction, including without limitation the rights
10    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11    copies of the Software, and to permit persons to whom the Software is
12    furnished to do so, subject to the following conditions:
13 
14    The above copyright notice and this permission notice shall be included in
15    all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23    THE SOFTWARE.
24 
25  */
26 
27 /*
28     Trio haplotypes: mother (A,B), father (C,D), child (E,F)
29     Modeling the following states:
30         01|23|02
31         01|23|03
32         01|23|12
33         01|23|13
34         01|23|20
35         01|23|30
36         01|23|21
37         01|23|31
38     with the likelihoods of two haplotypes A,B segments sharing an allele:
39         P(01|A==B)  .. e (P of error)
40         P(00|A==B)  .. 1-e
41     and
42         P(ab,cd,ef|E=A,F=C) = P(ea|E=A)*P(fc|F=C)
43 
44 
45     Unrelated samples: (A,B) and (C,D)
46     Modeling the states:
47         xxxx .. A!=C,A!=D,B!=C,B!=D
48         0x0x .. A=C,B!=D
49         0xx0 .. A=D,B!=C
50         x00x .. B=C,A!=D
51         x0x0 .. B=D,A!=C
52         0101 .. A=C,B=D
53         0110 .. A=D,B=C
54     with the likelihoods
55         P(01|A!=B)  .. f*(1-f)
56         P(00|A!=B)  .. (1-f)*(1-f)
57         P(11|A!=B)  .. f*f
58 
59     Assuming 2x30 crossovers, P=2e-8.
60  */
61 
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <getopt.h>
65 #include <math.h>
66 #include <htslib/hts.h>
67 #include <htslib/vcf.h>
68 #include <errno.h>
69 #include "bcftools.h"
70 #include "HMM.h"
71 
72 #define C_TRIO 1
73 #define C_UNRL 2
74 
75 // states for unrelated samples
76 #define UNRL_xxxx  0
77 #define UNRL_0x0x  1
78 #define UNRL_0xx0  2
79 #define UNRL_x00x  3
80 #define UNRL_x0x0  4
81 #define UNRL_0101  5
82 #define UNRL_0110  6
83 
84 // trio states
85 #define TRIO_AC 0
86 #define TRIO_AD 1
87 #define TRIO_BC 2
88 #define TRIO_BD 3
89 #define TRIO_CA 4
90 #define TRIO_DA 5
91 #define TRIO_CB 6
92 #define TRIO_DB 7
93 
94 typedef struct _args_t
95 {
96     bcf_hdr_t *hdr;
97     hmm_t *hmm;
98     double *eprob, *tprob, pij, pgt_err;
99     uint32_t *sites;
100     int32_t *gt_arr;
101     int nsites, msites, ngt_arr, prev_rid;
102     int mode, nstates, nhet_father, nhet_mother;
103     int imother,ifather,ichild, isample,jsample;
104     void (*set_observed_prob) (bcf1_t *rec);
105     char *prefix;
106     FILE *fp;
107 }
108 args_t;
109 
110 static args_t args;
111 
112 #define SW_MOTHER 1
113 #define SW_FATHER 2
114 static int hap_switch[8][8];
115 
116 static void set_observed_prob_trio(bcf1_t *rec);
117 static void set_observed_prob_unrelated(bcf1_t *rec);
118 static void init_hmm_trio(args_t *args);
119 static void init_hmm_unrelated(args_t *args);
120 
121 
about(void)122 const char *about(void)
123 {
124     return "Color shared chromosomal segments, requires phased GTs.\n";
125 }
126 
usage(void)127 const char *usage(void)
128 {
129     return
130         "\n"
131         "About: Color shared chromosomal segments, requires phased GTs. The output\n"
132         "       can be visualized using the color-chrs.pl script.\n"
133         "Usage: bcftools +color-chrs [General Options] -- [Plugin Options]\n"
134         "Options:\n"
135         "   run \"bcftools plugin\" for a list of common options\n"
136         "\n"
137         "Plugin options:\n"
138         "   -p, --prefix <path>     output files prefix\n"
139         "   -t, --trio <m,f,c>      names of mother, father and the child\n"
140         "   -u, --unrelated <a,b>   names of two unrelated samples\n"
141         "\n"
142         "Example:\n"
143         "   bcftools +color-chrs in.vcf --\n"
144         "\n";
145 }
146 
init(int argc,char ** argv,bcf_hdr_t * in,bcf_hdr_t * out)147 int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
148 {
149     char *trio_samples = NULL, *unrelated_samples = NULL;
150     memset(&args,0,sizeof(args_t));
151     args.prev_rid = -1;
152     args.hdr = in;
153     args.pij = 2e-8;
154     args.pgt_err = 1e-9;
155 
156     static struct option loptions[] =
157     {
158         {"prefix",1,0,'p'},
159         {"trio",1,0,'t'},
160         {"unrelated",1,0,'u'},
161         {0,0,0,0}
162     };
163     int c;
164     while ((c = getopt_long(argc, argv, "?ht:u:p:",loptions,NULL)) >= 0)
165     {
166         switch (c)
167         {
168             case 'p': args.prefix = optarg; break;
169             case 't': trio_samples = optarg; break;
170             case 'u': unrelated_samples = optarg; break;
171             case 'h':
172             case '?':
173             default: error("%s", usage()); break;
174         }
175     }
176     if ( optind != argc ) error("%s",usage());
177     if ( trio_samples && unrelated_samples ) error("Expected only one of the -t/-u options\n");
178     if ( !trio_samples && !unrelated_samples ) error("Expected one of the -t/-u options\n");
179     if ( !args.prefix ) error("Expected the -p option\n");
180 
181     int ret = bcf_hdr_set_samples(args.hdr, trio_samples ? trio_samples : unrelated_samples, 0);
182     if ( ret<0 ) error("Could not parse samples: %s\n", trio_samples ? trio_samples : unrelated_samples);
183     else if ( ret>0 ) error("%d-th sample not found: %s\n", ret,trio_samples ? trio_samples : unrelated_samples);
184 
185     if ( trio_samples )
186     {
187         int i,n = 0;
188         char **list = hts_readlist(trio_samples, 0, &n);
189         if ( n!=3 ) error("Expected three sample names with -t\n");
190         args.imother = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[0]);
191         args.ifather = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[1]);
192         args.ichild  = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[2]);
193         for (i=0; i<n; i++) free(list[i]);
194         free(list);
195         args.set_observed_prob = set_observed_prob_trio;
196         args.mode = C_TRIO;
197         init_hmm_trio(&args);
198     }
199     else
200     {
201         int i,n = 0;
202         char **list = hts_readlist(unrelated_samples, 0, &n);
203         if ( n!=2 ) error("Expected two sample names with -u\n");
204         args.isample = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[0]);
205         args.jsample = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[1]);
206         for (i=0; i<n; i++) free(list[i]);
207         free(list);
208         args.set_observed_prob = set_observed_prob_unrelated;
209         args.mode = C_UNRL;
210         init_hmm_unrelated(&args);
211     }
212     return 1;
213 }
214 
init_hmm_trio(args_t * args)215 static void init_hmm_trio(args_t *args)
216 {
217     int i,j;
218     args->nstates = 8;
219     args->tprob   = (double*) malloc(sizeof(double)*args->nstates*args->nstates);
220 
221     for (i=0; i<args->nstates; i++)
222         for (j=0; j<args->nstates; j++) hap_switch[i][j] = 0;
223 
224     hap_switch[TRIO_AD][TRIO_AC] = SW_FATHER;
225     hap_switch[TRIO_BC][TRIO_AC] = SW_MOTHER;
226     hap_switch[TRIO_BD][TRIO_AC] = SW_MOTHER | SW_FATHER;
227     hap_switch[TRIO_AC][TRIO_AD] = SW_FATHER;
228     hap_switch[TRIO_BC][TRIO_AD] = SW_MOTHER | SW_FATHER;
229     hap_switch[TRIO_BD][TRIO_AD] = SW_MOTHER;
230     hap_switch[TRIO_AC][TRIO_BC] = SW_MOTHER;
231     hap_switch[TRIO_AD][TRIO_BC] = SW_MOTHER | SW_FATHER;
232     hap_switch[TRIO_BD][TRIO_BC] = SW_FATHER;
233     hap_switch[TRIO_AC][TRIO_BD] = SW_MOTHER | SW_FATHER;
234     hap_switch[TRIO_AD][TRIO_BD] = SW_MOTHER;
235     hap_switch[TRIO_BC][TRIO_BD] = SW_FATHER;
236 
237     hap_switch[TRIO_DA][TRIO_CA] = SW_FATHER;
238     hap_switch[TRIO_CB][TRIO_CA] = SW_MOTHER;
239     hap_switch[TRIO_DB][TRIO_CA] = SW_MOTHER | SW_FATHER;
240     hap_switch[TRIO_CA][TRIO_DA] = SW_FATHER;
241     hap_switch[TRIO_CB][TRIO_DA] = SW_MOTHER | SW_FATHER;
242     hap_switch[TRIO_DB][TRIO_DA] = SW_MOTHER;
243     hap_switch[TRIO_CA][TRIO_CB] = SW_MOTHER;
244     hap_switch[TRIO_DA][TRIO_CB] = SW_MOTHER | SW_FATHER;
245     hap_switch[TRIO_DB][TRIO_CB] = SW_FATHER;
246     hap_switch[TRIO_CA][TRIO_DB] = SW_MOTHER | SW_FATHER;
247     hap_switch[TRIO_DA][TRIO_DB] = SW_MOTHER;
248     hap_switch[TRIO_CB][TRIO_DB] = SW_FATHER;
249 
250     for (i=0; i<args->nstates; i++)
251     {
252         for (j=0; j<args->nstates; j++)
253         {
254             if ( !hap_switch[i][j] ) MAT(args->tprob,args->nstates,i,j) = 0;
255             else
256             {
257                 MAT(args->tprob,args->nstates,i,j) = 1;
258                 if ( hap_switch[i][j] & SW_MOTHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
259                 if ( hap_switch[i][j] & SW_FATHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
260             }
261         }
262     }
263     for (i=0; i<args->nstates; i++)
264     {
265         double sum = 0;
266         for (j=0; j<args->nstates; j++)
267         {
268             if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
269         }
270         MAT(args->tprob,args->nstates,i,i) = 1 - sum;
271     }
272 
273     #if 0
274     for (i=0; i<args->nstates; i++)
275     {
276         for (j=0; j<args->nstates; j++)
277             fprintf(stderr,"\t%d",hap_switch[j][i]);
278         fprintf(stderr,"\n");
279     }
280     for (i=0; i<args->nstates; i++)
281     {
282         for (j=0; j<args->nstates; j++)
283             fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
284         fprintf(stderr,"\n");
285     }
286     #endif
287 
288     args->hmm = hmm_init(args->nstates, args->tprob, 10000);
289 }
init_hmm_unrelated(args_t * args)290 static void init_hmm_unrelated(args_t *args)
291 {
292     int i,j;
293     args->nstates = 7;
294     args->tprob   = (double*) malloc(sizeof(double)*args->nstates*args->nstates);
295 
296     for (i=0; i<args->nstates; i++)
297     {
298         for (j=0; j<args->nstates; j++)
299             MAT(args->tprob,args->nstates,i,j) = args->pij;
300     }
301     MAT(args->tprob,args->nstates,UNRL_0101,UNRL_xxxx) = args->pij*args->pij;
302     MAT(args->tprob,args->nstates,UNRL_0110,UNRL_xxxx) = args->pij*args->pij;
303     MAT(args->tprob,args->nstates,UNRL_x0x0,UNRL_0x0x) = args->pij*args->pij;
304     MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0x0x) = args->pij*args->pij;
305     MAT(args->tprob,args->nstates,UNRL_x00x,UNRL_0xx0) = args->pij*args->pij;
306     MAT(args->tprob,args->nstates,UNRL_0101,UNRL_0xx0) = args->pij*args->pij;
307     MAT(args->tprob,args->nstates,UNRL_0101,UNRL_x00x) = args->pij*args->pij;
308     MAT(args->tprob,args->nstates,UNRL_0110,UNRL_x0x0) = args->pij*args->pij;
309     MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0101) = args->pij*args->pij;
310 
311     for (i=0; i<args->nstates; i++)
312     {
313         for (j=i+1; j<args->nstates; j++)
314             MAT(args->tprob,args->nstates,i,j) = MAT(args->tprob,args->nstates,j,i);
315     }
316     for (i=0; i<args->nstates; i++)
317     {
318         double sum = 0;
319         for (j=0; j<args->nstates; j++)
320             if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
321         MAT(args->tprob,args->nstates,i,i) = 1 - sum;
322     }
323 
324     #if 0
325     for (i=0; i<args->nstates; i++)
326     {
327         for (j=0; j<args->nstates; j++)
328             fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
329         fprintf(stderr,"\n");
330     }
331     #endif
332 
333     args->hmm = hmm_init(args->nstates, args->tprob, 10000);
334 }
prob_shared(float af,int a,int b)335 static inline double prob_shared(float af, int a, int b)
336 {
337     return a==b ? 1-args.pgt_err : args.pgt_err;
338 }
prob_not_shared(float af,int a,int b)339 static inline double prob_not_shared(float af, int a, int b)
340 {
341     if ( a!=b ) return af*(1-af);
342     else if ( a==0 ) return (1-af)*(1-af);
343     else return af*af;
344 }
set_observed_prob_unrelated(bcf1_t * rec)345 static void set_observed_prob_unrelated(bcf1_t *rec)
346 {
347     float af = 0.5;  // alternate allele frequency
348 
349     int ngt = bcf_get_genotypes(args.hdr, rec, &args.gt_arr, &args.ngt_arr);
350     if ( ngt<0 ) return;
351     if ( ngt!=4 ) return;   // chrX
352 
353     int32_t a,b,c,d;
354     a = args.gt_arr[2*args.isample];
355     b = args.gt_arr[2*args.isample+1];
356     c = args.gt_arr[2*args.jsample];
357     d = args.gt_arr[2*args.jsample+1];
358     if ( bcf_gt_is_missing(a) || bcf_gt_is_missing(b) ) return;
359     if ( bcf_gt_is_missing(c) || bcf_gt_is_missing(d) ) return;
360     if ( !bcf_gt_is_phased(a) && !bcf_gt_is_phased(b) ) return; // only the second allele should be set when phased
361     if ( !bcf_gt_is_phased(c) && !bcf_gt_is_phased(d) ) return;
362     a = bcf_gt_allele(a);
363     b = bcf_gt_allele(b);
364     c = bcf_gt_allele(c);
365     d = bcf_gt_allele(d);
366 
367     int m = args.msites;
368     args.nsites++;
369     hts_expand(uint32_t,args.nsites,args.msites,args.sites);
370     if ( m!=args.msites ) args.eprob = (double*) realloc(args.eprob, sizeof(double)*args.msites*args.nstates);
371 
372     args.sites[args.nsites-1] = rec->pos;
373     double *prob = args.eprob + args.nstates*(args.nsites-1);
374     prob[UNRL_xxxx] = prob_not_shared(af,a,c) * prob_not_shared(af,a,d) * prob_not_shared(af,b,c) * prob_not_shared(af,b,d);
375     prob[UNRL_0x0x] = prob_shared(af,a,c) * prob_not_shared(af,b,d);
376     prob[UNRL_0xx0] = prob_shared(af,a,d) * prob_not_shared(af,b,c);
377     prob[UNRL_x00x] = prob_shared(af,b,c) * prob_not_shared(af,a,d);
378     prob[UNRL_x0x0] = prob_shared(af,b,d) * prob_not_shared(af,a,c);
379     prob[UNRL_0101] = prob_shared(af,a,c) * prob_shared(af,b,d);
380     prob[UNRL_0110] = prob_shared(af,a,d) * prob_shared(af,b,c);
381 
382 #if 0
383     static int x = 0;
384     if ( !x++)
385     {
386         printf("p(0==0) .. %f\n", prob_shared(af,0,0));
387         printf("p(0!=0) .. %f\n", prob_not_shared(af,0,0));
388         printf("p(0==1) .. %f\n", prob_shared(af,0,1));
389         printf("p(0!=1) .. %f\n", prob_not_shared(af,0,1));
390     }
391     printf("%d|%d %d|%d  x:%f 11:%f 12:%f 21:%f 22:%f 11,22:%f 12,21:%f  %d\n", a,b,c,d,
392             prob[UNRL_xxxx], prob[UNRL_0x0x], prob[UNRL_0xx0], prob[UNRL_x00x], prob[UNRL_x0x0], prob[UNRL_0101], prob[UNRL_0110], rec->pos+1);
393 #endif
394 }
set_observed_prob_trio(bcf1_t * rec)395 static void set_observed_prob_trio(bcf1_t *rec)
396 {
397     int ngt = bcf_get_genotypes(args.hdr, rec, &args.gt_arr, &args.ngt_arr);
398     if ( ngt<0 ) return;
399     if ( ngt!=6 ) return;   // chrX
400 
401     int32_t a,b,c,d,e,f;
402     a = args.gt_arr[2*args.imother];
403     b = args.gt_arr[2*args.imother+1];
404     c = args.gt_arr[2*args.ifather];
405     d = args.gt_arr[2*args.ifather+1];
406     e = args.gt_arr[2*args.ichild];
407     f = args.gt_arr[2*args.ichild+1];
408     if ( bcf_gt_is_missing(a) || bcf_gt_is_missing(b) ) return;
409     if ( bcf_gt_is_missing(c) || bcf_gt_is_missing(d) ) return;
410     if ( bcf_gt_is_missing(e) || bcf_gt_is_missing(f) ) return;
411     if ( !bcf_gt_is_phased(a) && !bcf_gt_is_phased(b) ) return; // only the second allele should be set when phased
412     if ( !bcf_gt_is_phased(c) && !bcf_gt_is_phased(d) ) return;
413     if ( !bcf_gt_is_phased(e) && !bcf_gt_is_phased(f) ) return;
414     a = bcf_gt_allele(a);
415     b = bcf_gt_allele(b);
416     c = bcf_gt_allele(c);
417     d = bcf_gt_allele(d);
418     e = bcf_gt_allele(e);
419     f = bcf_gt_allele(f);
420 
421     int mother = (1<<a) | (1<<b);
422     int father = (1<<c) | (1<<d);
423     int child  = (1<<e) | (1<<f);
424     if ( !(mother&child) || !(father&child) )  return;      // Mendelian-inconsistent site, skip
425 
426     if ( a!=b ) args.nhet_mother++;
427     if ( c!=d ) args.nhet_father++;
428 
429     int m = args.msites;
430     args.nsites++;
431     hts_expand(uint32_t,args.nsites,args.msites,args.sites);
432     if ( m!=args.msites ) args.eprob = (double*) realloc(args.eprob, sizeof(double)*args.msites*args.nstates);
433 
434     args.sites[args.nsites-1] = rec->pos;
435     double *prob = args.eprob + args.nstates*(args.nsites-1);
436     prob[TRIO_AC] = prob_shared(0,e,a) * prob_shared(0,f,c);
437     prob[TRIO_AD] = prob_shared(0,e,a) * prob_shared(0,f,d);
438     prob[TRIO_BC] = prob_shared(0,e,b) * prob_shared(0,f,c);
439     prob[TRIO_BD] = prob_shared(0,e,b) * prob_shared(0,f,d);
440     prob[TRIO_CA] = prob_shared(0,e,c) * prob_shared(0,f,a);
441     prob[TRIO_DA] = prob_shared(0,e,d) * prob_shared(0,f,a);
442     prob[TRIO_CB] = prob_shared(0,e,c) * prob_shared(0,f,b);
443     prob[TRIO_DB] = prob_shared(0,e,d) * prob_shared(0,f,b);
444 }
445 
flush_viterbi(args_t * args)446 void flush_viterbi(args_t *args)
447 {
448     const char *s1, *s2, *s3 = NULL;
449     if ( args->mode==C_UNRL )
450     {
451         s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->isample);
452         s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->jsample);
453     }
454     else if ( args->mode==C_TRIO )
455     {
456         s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->imother);
457         s3 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ifather);
458         s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ichild);
459     }
460     else abort();
461 
462     if ( !args->fp )
463     {
464         kstring_t str = {0,0,0};
465         kputs(args->prefix, &str);
466         kputs(".dat", &str);
467         args->fp = fopen(str.s,"w");
468         if ( !args->fp ) error("%s: %s\n", str.s,strerror(errno));
469         free(str.s);
470         fprintf(args->fp,"# SG, shared segment\t[2]Chromosome\t[3]Start\t[4]End\t[5]%s:1\t[6]%s:2\n",s2,s2);
471         fprintf(args->fp,"# SW, number of switches\t[3]Sample\t[4]Chromosome\t[5]nHets\t[5]nSwitches\t[6]switch rate\n");
472     }
473 
474     hmm_run_viterbi(args->hmm,args->nsites,args->eprob,args->sites);
475     uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
476     int i, iprev = -1, prev_state = -1, nstates = hmm_get_nstates(args->hmm);
477     int nswitch_mother = 0, nswitch_father = 0;
478     for (i=0; i<args->nsites; i++)
479     {
480         int state = vpath[i*nstates];
481         if ( state!=prev_state || i+1==args->nsites )
482         {
483             uint32_t start = iprev>=0 ? args->sites[iprev]+1 : 1, end = i>0 ? args->sites[i-1] : 1;
484             const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid);
485             if ( args->mode==C_UNRL )
486             {
487                 switch (prev_state)
488                 {
489                     case UNRL_0x0x:
490                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t-\n", chr,start,end,s1); break;
491                     case UNRL_0xx0:
492                         fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:1\n", chr,start,end,s1); break;
493                     case UNRL_x00x:
494                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t-\n", chr,start,end,s1); break;
495                     case UNRL_x0x0:
496                         fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:2\n", chr,start,end,s1); break;
497                     case UNRL_0101:
498                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s1); break;
499                     case UNRL_0110:
500                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s1); break;
501                 }
502             }
503             else if ( args->mode==C_TRIO )
504             {
505                 switch (prev_state)
506                 {
507                     case TRIO_AC:
508                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s1,s3); break;
509                     case TRIO_AD:
510                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s3); break;
511                     case TRIO_BC:
512                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s3); break;
513                     case TRIO_BD:
514                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s1,s3); break;
515                     case TRIO_CA:
516                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s3,s1); break;
517                     case TRIO_DA:
518                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s3,s1); break;
519                     case TRIO_CB:
520                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s3,s1); break;
521                     case TRIO_DB:
522                         fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s3,s1); break;
523                 }
524                 if ( hap_switch[state][prev_state] & SW_MOTHER ) nswitch_mother++;
525                 if ( hap_switch[state][prev_state] & SW_FATHER ) nswitch_father++;
526             }
527             iprev = i-1;
528         }
529         prev_state = state;
530     }
531     float mrate = args->nhet_mother>1 ? (float)nswitch_mother/(args->nhet_mother-1) : 0;
532     float frate = args->nhet_father>1 ? (float)nswitch_father/(args->nhet_father-1) : 0;
533     fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s1,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_mother,nswitch_mother,mrate);
534     fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s3,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_father,nswitch_father,frate);
535     args->nsites = 0;
536     args->nhet_father = args->nhet_mother = 0;
537 }
538 
process(bcf1_t * rec)539 bcf1_t *process(bcf1_t *rec)
540 {
541     if ( args.prev_rid==-1 ) args.prev_rid = rec->rid;
542     if ( args.prev_rid!=rec->rid ) flush_viterbi(&args);
543     args.prev_rid = rec->rid;
544     args.set_observed_prob(rec);
545     return NULL;
546 }
547 
destroy(void)548 void destroy(void)
549 {
550     flush_viterbi(&args);
551     fclose(args.fp);
552 
553     free(args.gt_arr);
554     free(args.tprob);
555     free(args.sites);
556     free(args.eprob);
557     hmm_destroy(args.hmm);
558 }
559 
560 
561 
562