1 /* The MIT License
2
3 Copyright (c) 2015 Genome Research Ltd.
4
5 Author: Petr Danecek <pd3@sanger.ac.uk>
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in
15 all copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 THE SOFTWARE.
24
25 */
26
27 /*
28 Trio haplotypes: mother (A,B), father (C,D), child (E,F)
29 Modeling the following states:
30 01|23|02
31 01|23|03
32 01|23|12
33 01|23|13
34 01|23|20
35 01|23|30
36 01|23|21
37 01|23|31
38 with the likelihoods of two haplotypes A,B segments sharing an allele:
39 P(01|A==B) .. e (P of error)
40 P(00|A==B) .. 1-e
41 and
42 P(ab,cd,ef|E=A,F=C) = P(ea|E=A)*P(fc|F=C)
43
44
45 Unrelated samples: (A,B) and (C,D)
46 Modeling the states:
47 xxxx .. A!=C,A!=D,B!=C,B!=D
48 0x0x .. A=C,B!=D
49 0xx0 .. A=D,B!=C
50 x00x .. B=C,A!=D
51 x0x0 .. B=D,A!=C
52 0101 .. A=C,B=D
53 0110 .. A=D,B=C
54 with the likelihoods
55 P(01|A!=B) .. f*(1-f)
56 P(00|A!=B) .. (1-f)*(1-f)
57 P(11|A!=B) .. f*f
58
59 Assuming 2x30 crossovers, P=2e-8.
60 */
61
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <getopt.h>
65 #include <math.h>
66 #include <htslib/hts.h>
67 #include <htslib/vcf.h>
68 #include <errno.h>
69 #include "bcftools.h"
70 #include "HMM.h"
71
72 #define C_TRIO 1
73 #define C_UNRL 2
74
75 // states for unrelated samples
76 #define UNRL_xxxx 0
77 #define UNRL_0x0x 1
78 #define UNRL_0xx0 2
79 #define UNRL_x00x 3
80 #define UNRL_x0x0 4
81 #define UNRL_0101 5
82 #define UNRL_0110 6
83
84 // trio states
85 #define TRIO_AC 0
86 #define TRIO_AD 1
87 #define TRIO_BC 2
88 #define TRIO_BD 3
89 #define TRIO_CA 4
90 #define TRIO_DA 5
91 #define TRIO_CB 6
92 #define TRIO_DB 7
93
94 typedef struct _args_t
95 {
96 bcf_hdr_t *hdr;
97 hmm_t *hmm;
98 double *eprob, *tprob, pij, pgt_err;
99 uint32_t *sites;
100 int32_t *gt_arr;
101 int nsites, msites, ngt_arr, prev_rid;
102 int mode, nstates, nhet_father, nhet_mother;
103 int imother,ifather,ichild, isample,jsample;
104 void (*set_observed_prob) (bcf1_t *rec);
105 char *prefix;
106 FILE *fp;
107 }
108 args_t;
109
110 static args_t args;
111
112 #define SW_MOTHER 1
113 #define SW_FATHER 2
114 static int hap_switch[8][8];
115
116 static void set_observed_prob_trio(bcf1_t *rec);
117 static void set_observed_prob_unrelated(bcf1_t *rec);
118 static void init_hmm_trio(args_t *args);
119 static void init_hmm_unrelated(args_t *args);
120
121
about(void)122 const char *about(void)
123 {
124 return "Color shared chromosomal segments, requires phased GTs.\n";
125 }
126
usage(void)127 const char *usage(void)
128 {
129 return
130 "\n"
131 "About: Color shared chromosomal segments, requires phased GTs. The output\n"
132 " can be visualized using the color-chrs.pl script.\n"
133 "Usage: bcftools +color-chrs [General Options] -- [Plugin Options]\n"
134 "Options:\n"
135 " run \"bcftools plugin\" for a list of common options\n"
136 "\n"
137 "Plugin options:\n"
138 " -p, --prefix <path> output files prefix\n"
139 " -t, --trio <m,f,c> names of mother, father and the child\n"
140 " -u, --unrelated <a,b> names of two unrelated samples\n"
141 "\n"
142 "Example:\n"
143 " bcftools +color-chrs in.vcf --\n"
144 "\n";
145 }
146
init(int argc,char ** argv,bcf_hdr_t * in,bcf_hdr_t * out)147 int init(int argc, char **argv, bcf_hdr_t *in, bcf_hdr_t *out)
148 {
149 char *trio_samples = NULL, *unrelated_samples = NULL;
150 memset(&args,0,sizeof(args_t));
151 args.prev_rid = -1;
152 args.hdr = in;
153 args.pij = 2e-8;
154 args.pgt_err = 1e-9;
155
156 static struct option loptions[] =
157 {
158 {"prefix",1,0,'p'},
159 {"trio",1,0,'t'},
160 {"unrelated",1,0,'u'},
161 {0,0,0,0}
162 };
163 int c;
164 while ((c = getopt_long(argc, argv, "?ht:u:p:",loptions,NULL)) >= 0)
165 {
166 switch (c)
167 {
168 case 'p': args.prefix = optarg; break;
169 case 't': trio_samples = optarg; break;
170 case 'u': unrelated_samples = optarg; break;
171 case 'h':
172 case '?':
173 default: error("%s", usage()); break;
174 }
175 }
176 if ( optind != argc ) error("%s",usage());
177 if ( trio_samples && unrelated_samples ) error("Expected only one of the -t/-u options\n");
178 if ( !trio_samples && !unrelated_samples ) error("Expected one of the -t/-u options\n");
179 if ( !args.prefix ) error("Expected the -p option\n");
180
181 int ret = bcf_hdr_set_samples(args.hdr, trio_samples ? trio_samples : unrelated_samples, 0);
182 if ( ret<0 ) error("Could not parse samples: %s\n", trio_samples ? trio_samples : unrelated_samples);
183 else if ( ret>0 ) error("%d-th sample not found: %s\n", ret,trio_samples ? trio_samples : unrelated_samples);
184
185 if ( trio_samples )
186 {
187 int i,n = 0;
188 char **list = hts_readlist(trio_samples, 0, &n);
189 if ( n!=3 ) error("Expected three sample names with -t\n");
190 args.imother = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[0]);
191 args.ifather = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[1]);
192 args.ichild = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[2]);
193 for (i=0; i<n; i++) free(list[i]);
194 free(list);
195 args.set_observed_prob = set_observed_prob_trio;
196 args.mode = C_TRIO;
197 init_hmm_trio(&args);
198 }
199 else
200 {
201 int i,n = 0;
202 char **list = hts_readlist(unrelated_samples, 0, &n);
203 if ( n!=2 ) error("Expected two sample names with -u\n");
204 args.isample = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[0]);
205 args.jsample = bcf_hdr_id2int(args.hdr, BCF_DT_SAMPLE, list[1]);
206 for (i=0; i<n; i++) free(list[i]);
207 free(list);
208 args.set_observed_prob = set_observed_prob_unrelated;
209 args.mode = C_UNRL;
210 init_hmm_unrelated(&args);
211 }
212 return 1;
213 }
214
init_hmm_trio(args_t * args)215 static void init_hmm_trio(args_t *args)
216 {
217 int i,j;
218 args->nstates = 8;
219 args->tprob = (double*) malloc(sizeof(double)*args->nstates*args->nstates);
220
221 for (i=0; i<args->nstates; i++)
222 for (j=0; j<args->nstates; j++) hap_switch[i][j] = 0;
223
224 hap_switch[TRIO_AD][TRIO_AC] = SW_FATHER;
225 hap_switch[TRIO_BC][TRIO_AC] = SW_MOTHER;
226 hap_switch[TRIO_BD][TRIO_AC] = SW_MOTHER | SW_FATHER;
227 hap_switch[TRIO_AC][TRIO_AD] = SW_FATHER;
228 hap_switch[TRIO_BC][TRIO_AD] = SW_MOTHER | SW_FATHER;
229 hap_switch[TRIO_BD][TRIO_AD] = SW_MOTHER;
230 hap_switch[TRIO_AC][TRIO_BC] = SW_MOTHER;
231 hap_switch[TRIO_AD][TRIO_BC] = SW_MOTHER | SW_FATHER;
232 hap_switch[TRIO_BD][TRIO_BC] = SW_FATHER;
233 hap_switch[TRIO_AC][TRIO_BD] = SW_MOTHER | SW_FATHER;
234 hap_switch[TRIO_AD][TRIO_BD] = SW_MOTHER;
235 hap_switch[TRIO_BC][TRIO_BD] = SW_FATHER;
236
237 hap_switch[TRIO_DA][TRIO_CA] = SW_FATHER;
238 hap_switch[TRIO_CB][TRIO_CA] = SW_MOTHER;
239 hap_switch[TRIO_DB][TRIO_CA] = SW_MOTHER | SW_FATHER;
240 hap_switch[TRIO_CA][TRIO_DA] = SW_FATHER;
241 hap_switch[TRIO_CB][TRIO_DA] = SW_MOTHER | SW_FATHER;
242 hap_switch[TRIO_DB][TRIO_DA] = SW_MOTHER;
243 hap_switch[TRIO_CA][TRIO_CB] = SW_MOTHER;
244 hap_switch[TRIO_DA][TRIO_CB] = SW_MOTHER | SW_FATHER;
245 hap_switch[TRIO_DB][TRIO_CB] = SW_FATHER;
246 hap_switch[TRIO_CA][TRIO_DB] = SW_MOTHER | SW_FATHER;
247 hap_switch[TRIO_DA][TRIO_DB] = SW_MOTHER;
248 hap_switch[TRIO_CB][TRIO_DB] = SW_FATHER;
249
250 for (i=0; i<args->nstates; i++)
251 {
252 for (j=0; j<args->nstates; j++)
253 {
254 if ( !hap_switch[i][j] ) MAT(args->tprob,args->nstates,i,j) = 0;
255 else
256 {
257 MAT(args->tprob,args->nstates,i,j) = 1;
258 if ( hap_switch[i][j] & SW_MOTHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
259 if ( hap_switch[i][j] & SW_FATHER ) MAT(args->tprob,args->nstates,i,j) *= args->pij;
260 }
261 }
262 }
263 for (i=0; i<args->nstates; i++)
264 {
265 double sum = 0;
266 for (j=0; j<args->nstates; j++)
267 {
268 if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
269 }
270 MAT(args->tprob,args->nstates,i,i) = 1 - sum;
271 }
272
273 #if 0
274 for (i=0; i<args->nstates; i++)
275 {
276 for (j=0; j<args->nstates; j++)
277 fprintf(stderr,"\t%d",hap_switch[j][i]);
278 fprintf(stderr,"\n");
279 }
280 for (i=0; i<args->nstates; i++)
281 {
282 for (j=0; j<args->nstates; j++)
283 fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
284 fprintf(stderr,"\n");
285 }
286 #endif
287
288 args->hmm = hmm_init(args->nstates, args->tprob, 10000);
289 }
init_hmm_unrelated(args_t * args)290 static void init_hmm_unrelated(args_t *args)
291 {
292 int i,j;
293 args->nstates = 7;
294 args->tprob = (double*) malloc(sizeof(double)*args->nstates*args->nstates);
295
296 for (i=0; i<args->nstates; i++)
297 {
298 for (j=0; j<args->nstates; j++)
299 MAT(args->tprob,args->nstates,i,j) = args->pij;
300 }
301 MAT(args->tprob,args->nstates,UNRL_0101,UNRL_xxxx) = args->pij*args->pij;
302 MAT(args->tprob,args->nstates,UNRL_0110,UNRL_xxxx) = args->pij*args->pij;
303 MAT(args->tprob,args->nstates,UNRL_x0x0,UNRL_0x0x) = args->pij*args->pij;
304 MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0x0x) = args->pij*args->pij;
305 MAT(args->tprob,args->nstates,UNRL_x00x,UNRL_0xx0) = args->pij*args->pij;
306 MAT(args->tprob,args->nstates,UNRL_0101,UNRL_0xx0) = args->pij*args->pij;
307 MAT(args->tprob,args->nstates,UNRL_0101,UNRL_x00x) = args->pij*args->pij;
308 MAT(args->tprob,args->nstates,UNRL_0110,UNRL_x0x0) = args->pij*args->pij;
309 MAT(args->tprob,args->nstates,UNRL_0110,UNRL_0101) = args->pij*args->pij;
310
311 for (i=0; i<args->nstates; i++)
312 {
313 for (j=i+1; j<args->nstates; j++)
314 MAT(args->tprob,args->nstates,i,j) = MAT(args->tprob,args->nstates,j,i);
315 }
316 for (i=0; i<args->nstates; i++)
317 {
318 double sum = 0;
319 for (j=0; j<args->nstates; j++)
320 if ( i!=j ) sum += MAT(args->tprob,args->nstates,i,j);
321 MAT(args->tprob,args->nstates,i,i) = 1 - sum;
322 }
323
324 #if 0
325 for (i=0; i<args->nstates; i++)
326 {
327 for (j=0; j<args->nstates; j++)
328 fprintf(stderr,"\t%e",MAT(args->tprob,args->nstates,j,i));
329 fprintf(stderr,"\n");
330 }
331 #endif
332
333 args->hmm = hmm_init(args->nstates, args->tprob, 10000);
334 }
prob_shared(float af,int a,int b)335 static inline double prob_shared(float af, int a, int b)
336 {
337 return a==b ? 1-args.pgt_err : args.pgt_err;
338 }
prob_not_shared(float af,int a,int b)339 static inline double prob_not_shared(float af, int a, int b)
340 {
341 if ( a!=b ) return af*(1-af);
342 else if ( a==0 ) return (1-af)*(1-af);
343 else return af*af;
344 }
set_observed_prob_unrelated(bcf1_t * rec)345 static void set_observed_prob_unrelated(bcf1_t *rec)
346 {
347 float af = 0.5; // alternate allele frequency
348
349 int ngt = bcf_get_genotypes(args.hdr, rec, &args.gt_arr, &args.ngt_arr);
350 if ( ngt<0 ) return;
351 if ( ngt!=4 ) return; // chrX
352
353 int32_t a,b,c,d;
354 a = args.gt_arr[2*args.isample];
355 b = args.gt_arr[2*args.isample+1];
356 c = args.gt_arr[2*args.jsample];
357 d = args.gt_arr[2*args.jsample+1];
358 if ( bcf_gt_is_missing(a) || bcf_gt_is_missing(b) ) return;
359 if ( bcf_gt_is_missing(c) || bcf_gt_is_missing(d) ) return;
360 if ( !bcf_gt_is_phased(a) && !bcf_gt_is_phased(b) ) return; // only the second allele should be set when phased
361 if ( !bcf_gt_is_phased(c) && !bcf_gt_is_phased(d) ) return;
362 a = bcf_gt_allele(a);
363 b = bcf_gt_allele(b);
364 c = bcf_gt_allele(c);
365 d = bcf_gt_allele(d);
366
367 int m = args.msites;
368 args.nsites++;
369 hts_expand(uint32_t,args.nsites,args.msites,args.sites);
370 if ( m!=args.msites ) args.eprob = (double*) realloc(args.eprob, sizeof(double)*args.msites*args.nstates);
371
372 args.sites[args.nsites-1] = rec->pos;
373 double *prob = args.eprob + args.nstates*(args.nsites-1);
374 prob[UNRL_xxxx] = prob_not_shared(af,a,c) * prob_not_shared(af,a,d) * prob_not_shared(af,b,c) * prob_not_shared(af,b,d);
375 prob[UNRL_0x0x] = prob_shared(af,a,c) * prob_not_shared(af,b,d);
376 prob[UNRL_0xx0] = prob_shared(af,a,d) * prob_not_shared(af,b,c);
377 prob[UNRL_x00x] = prob_shared(af,b,c) * prob_not_shared(af,a,d);
378 prob[UNRL_x0x0] = prob_shared(af,b,d) * prob_not_shared(af,a,c);
379 prob[UNRL_0101] = prob_shared(af,a,c) * prob_shared(af,b,d);
380 prob[UNRL_0110] = prob_shared(af,a,d) * prob_shared(af,b,c);
381
382 #if 0
383 static int x = 0;
384 if ( !x++)
385 {
386 printf("p(0==0) .. %f\n", prob_shared(af,0,0));
387 printf("p(0!=0) .. %f\n", prob_not_shared(af,0,0));
388 printf("p(0==1) .. %f\n", prob_shared(af,0,1));
389 printf("p(0!=1) .. %f\n", prob_not_shared(af,0,1));
390 }
391 printf("%d|%d %d|%d x:%f 11:%f 12:%f 21:%f 22:%f 11,22:%f 12,21:%f %d\n", a,b,c,d,
392 prob[UNRL_xxxx], prob[UNRL_0x0x], prob[UNRL_0xx0], prob[UNRL_x00x], prob[UNRL_x0x0], prob[UNRL_0101], prob[UNRL_0110], rec->pos+1);
393 #endif
394 }
set_observed_prob_trio(bcf1_t * rec)395 static void set_observed_prob_trio(bcf1_t *rec)
396 {
397 int ngt = bcf_get_genotypes(args.hdr, rec, &args.gt_arr, &args.ngt_arr);
398 if ( ngt<0 ) return;
399 if ( ngt!=6 ) return; // chrX
400
401 int32_t a,b,c,d,e,f;
402 a = args.gt_arr[2*args.imother];
403 b = args.gt_arr[2*args.imother+1];
404 c = args.gt_arr[2*args.ifather];
405 d = args.gt_arr[2*args.ifather+1];
406 e = args.gt_arr[2*args.ichild];
407 f = args.gt_arr[2*args.ichild+1];
408 if ( bcf_gt_is_missing(a) || bcf_gt_is_missing(b) ) return;
409 if ( bcf_gt_is_missing(c) || bcf_gt_is_missing(d) ) return;
410 if ( bcf_gt_is_missing(e) || bcf_gt_is_missing(f) ) return;
411 if ( !bcf_gt_is_phased(a) && !bcf_gt_is_phased(b) ) return; // only the second allele should be set when phased
412 if ( !bcf_gt_is_phased(c) && !bcf_gt_is_phased(d) ) return;
413 if ( !bcf_gt_is_phased(e) && !bcf_gt_is_phased(f) ) return;
414 a = bcf_gt_allele(a);
415 b = bcf_gt_allele(b);
416 c = bcf_gt_allele(c);
417 d = bcf_gt_allele(d);
418 e = bcf_gt_allele(e);
419 f = bcf_gt_allele(f);
420
421 int mother = (1<<a) | (1<<b);
422 int father = (1<<c) | (1<<d);
423 int child = (1<<e) | (1<<f);
424 if ( !(mother&child) || !(father&child) ) return; // Mendelian-inconsistent site, skip
425
426 if ( a!=b ) args.nhet_mother++;
427 if ( c!=d ) args.nhet_father++;
428
429 int m = args.msites;
430 args.nsites++;
431 hts_expand(uint32_t,args.nsites,args.msites,args.sites);
432 if ( m!=args.msites ) args.eprob = (double*) realloc(args.eprob, sizeof(double)*args.msites*args.nstates);
433
434 args.sites[args.nsites-1] = rec->pos;
435 double *prob = args.eprob + args.nstates*(args.nsites-1);
436 prob[TRIO_AC] = prob_shared(0,e,a) * prob_shared(0,f,c);
437 prob[TRIO_AD] = prob_shared(0,e,a) * prob_shared(0,f,d);
438 prob[TRIO_BC] = prob_shared(0,e,b) * prob_shared(0,f,c);
439 prob[TRIO_BD] = prob_shared(0,e,b) * prob_shared(0,f,d);
440 prob[TRIO_CA] = prob_shared(0,e,c) * prob_shared(0,f,a);
441 prob[TRIO_DA] = prob_shared(0,e,d) * prob_shared(0,f,a);
442 prob[TRIO_CB] = prob_shared(0,e,c) * prob_shared(0,f,b);
443 prob[TRIO_DB] = prob_shared(0,e,d) * prob_shared(0,f,b);
444 }
445
flush_viterbi(args_t * args)446 void flush_viterbi(args_t *args)
447 {
448 const char *s1, *s2, *s3 = NULL;
449 if ( args->mode==C_UNRL )
450 {
451 s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->isample);
452 s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->jsample);
453 }
454 else if ( args->mode==C_TRIO )
455 {
456 s1 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->imother);
457 s3 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ifather);
458 s2 = bcf_hdr_int2id(args->hdr,BCF_DT_SAMPLE,args->ichild);
459 }
460 else abort();
461
462 if ( !args->fp )
463 {
464 kstring_t str = {0,0,0};
465 kputs(args->prefix, &str);
466 kputs(".dat", &str);
467 args->fp = fopen(str.s,"w");
468 if ( !args->fp ) error("%s: %s\n", str.s,strerror(errno));
469 free(str.s);
470 fprintf(args->fp,"# SG, shared segment\t[2]Chromosome\t[3]Start\t[4]End\t[5]%s:1\t[6]%s:2\n",s2,s2);
471 fprintf(args->fp,"# SW, number of switches\t[3]Sample\t[4]Chromosome\t[5]nHets\t[5]nSwitches\t[6]switch rate\n");
472 }
473
474 hmm_run_viterbi(args->hmm,args->nsites,args->eprob,args->sites);
475 uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
476 int i, iprev = -1, prev_state = -1, nstates = hmm_get_nstates(args->hmm);
477 int nswitch_mother = 0, nswitch_father = 0;
478 for (i=0; i<args->nsites; i++)
479 {
480 int state = vpath[i*nstates];
481 if ( state!=prev_state || i+1==args->nsites )
482 {
483 uint32_t start = iprev>=0 ? args->sites[iprev]+1 : 1, end = i>0 ? args->sites[i-1] : 1;
484 const char *chr = bcf_hdr_id2name(args->hdr,args->prev_rid);
485 if ( args->mode==C_UNRL )
486 {
487 switch (prev_state)
488 {
489 case UNRL_0x0x:
490 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t-\n", chr,start,end,s1); break;
491 case UNRL_0xx0:
492 fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:1\n", chr,start,end,s1); break;
493 case UNRL_x00x:
494 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t-\n", chr,start,end,s1); break;
495 case UNRL_x0x0:
496 fprintf(args->fp,"SG\t%s\t%d\t%d\t-\t%s:2\n", chr,start,end,s1); break;
497 case UNRL_0101:
498 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s1); break;
499 case UNRL_0110:
500 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s1); break;
501 }
502 }
503 else if ( args->mode==C_TRIO )
504 {
505 switch (prev_state)
506 {
507 case TRIO_AC:
508 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s1,s3); break;
509 case TRIO_AD:
510 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s1,s3); break;
511 case TRIO_BC:
512 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s1,s3); break;
513 case TRIO_BD:
514 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s1,s3); break;
515 case TRIO_CA:
516 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:1\n", chr,start,end,s3,s1); break;
517 case TRIO_DA:
518 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:1\n", chr,start,end,s3,s1); break;
519 case TRIO_CB:
520 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:1\t%s:2\n", chr,start,end,s3,s1); break;
521 case TRIO_DB:
522 fprintf(args->fp,"SG\t%s\t%d\t%d\t%s:2\t%s:2\n", chr,start,end,s3,s1); break;
523 }
524 if ( hap_switch[state][prev_state] & SW_MOTHER ) nswitch_mother++;
525 if ( hap_switch[state][prev_state] & SW_FATHER ) nswitch_father++;
526 }
527 iprev = i-1;
528 }
529 prev_state = state;
530 }
531 float mrate = args->nhet_mother>1 ? (float)nswitch_mother/(args->nhet_mother-1) : 0;
532 float frate = args->nhet_father>1 ? (float)nswitch_father/(args->nhet_father-1) : 0;
533 fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s1,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_mother,nswitch_mother,mrate);
534 fprintf(args->fp,"SW\t%s\t%s\t%d\t%d\t%f\n", s3,bcf_hdr_id2name(args->hdr,args->prev_rid),args->nhet_father,nswitch_father,frate);
535 args->nsites = 0;
536 args->nhet_father = args->nhet_mother = 0;
537 }
538
process(bcf1_t * rec)539 bcf1_t *process(bcf1_t *rec)
540 {
541 if ( args.prev_rid==-1 ) args.prev_rid = rec->rid;
542 if ( args.prev_rid!=rec->rid ) flush_viterbi(&args);
543 args.prev_rid = rec->rid;
544 args.set_observed_prob(rec);
545 return NULL;
546 }
547
destroy(void)548 void destroy(void)
549 {
550 flush_viterbi(&args);
551 fclose(args.fp);
552
553 free(args.gt_arr);
554 free(args.tprob);
555 free(args.sites);
556 free(args.eprob);
557 hmm_destroy(args.hmm);
558 }
559
560
561
562