1 /* The MIT License
2
3 Copyright (c) 2016 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "snp_genotyping_record.h"
25
26 /**
27 * Constructor.
28 * @v - VCF record.
29 */
SNPGenotypingRecord()30 SNPGenotypingRecord::SNPGenotypingRecord() {};
31
32 /**
33 * Constructor.
34 * @v - VCF record.
35 */
SNPGenotypingRecord(bcf_hdr_t * h,bcf1_t * v,int32_t nsamples,int32_t ploidy,Estimator * est)36 SNPGenotypingRecord::SNPGenotypingRecord(bcf_hdr_t *h, bcf1_t *v, int32_t nsamples, int32_t ploidy, Estimator* est)
37 {
38 clear();
39
40 this->h = h;
41 //this->v = v;
42 this->rid = bcf_get_rid(v);
43 this->pos1 = bcf_get_pos1(v);
44 this->nsamples = nsamples;
45
46 this->alleles = {0,0,0};
47 char** tmp_alleles = bcf_get_allele(v);
48 for (size_t i=0; i< bcf_get_n_allele(v); ++i) {
49 if (i) kputc(',', &this->alleles);
50 kputs(tmp_alleles[i], &this->alleles);
51 v_alleles.push_back(tmp_alleles[i]);
52 }
53
54 n_filter = 0;
55
56 if (vtype==VT_SNP && v_alleles.size()==2)
57 {
58 //rid = bcf_get_rid(v);
59 beg1 = bcf_get_pos1(v);
60 end1 = beg1;
61
62 if (bcf_has_filter(h, v, const_cast<char*>("overlap_snp"))==1)
63 n_filter |= FILTER_MASK_OVERLAP_SNP;
64
65 if (bcf_has_filter(h, v, const_cast<char*>("overlap_indel"))==1)
66 n_filter |= FILTER_MASK_OVERLAP_INDEL;
67
68 if (bcf_has_filter(h, v, const_cast<char*>("overlap_vntr"))==1)
69 n_filter |= FILTER_MASK_OVERLAP_VNTR;
70 }
71
72 pls = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
73 ads = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
74 }
75
76 /**
77 * Destructor.
78 */
~SNPGenotypingRecord()79 SNPGenotypingRecord::~SNPGenotypingRecord()
80 {
81 //if (v) bcf_destroy(v);
82 if ( pls ) free(pls);
83 if ( ads ) free(ads);
84 if ( alleles.l > 0 ) free(alleles.s);
85 }
86
87 /**
88 * Clears this record.
89 */
clearTemp()90 void SNPGenotypingRecord::clearTemp()
91 {
92 tmp_dp_q20 = 0;
93 tmp_dp_ra = 0;
94 tmp_bq_s1 = tmp_bq_s2 = 0;
95 tmp_mq_s1 = tmp_mq_s2 = 0;
96 tmp_cy_s1 = tmp_cy_s2 = 0;
97 tmp_st_s1 = tmp_st_s2 = 0;
98 tmp_al_s1 = tmp_bq_al = tmp_mq_al = tmp_cy_al = tmp_st_al = tmp_nm_al = 0;
99 tmp_nm_s1 = tmp_nm_s2 = 0;
100 tmp_oth_exp_q20 = tmp_oth_obs_q20 = 0;
101 tmp_pls[0] = tmp_pls[1] = tmp_pls[2] = 1.;
102 tmp_ads[0] = tmp_ads[1] = tmp_ads[2] = 0;
103 }
104
105 /**
106 * Clear fields.
107 */
clear()108 void SNPGenotypingRecord::clear()
109 {
110 vtype = -1;
111
112 pls = ads = NULL;
113
114 bqr_num = bqr_den = 0;
115 mqr_num = mqr_den = 0;
116 cyr_num = cyr_den = 0;
117 str_num = str_den = 0;
118 nmr_num = nmr_den = 0;
119 ior_num = ior_den = 0;
120 nm0_num = nm0_den = 0;
121 nm1_num = nm1_den = 0;
122 abe_num = abe_den = 0;
123 abz_num = abz_den = 0;
124 ns_nref = dp_sum = max_gq = 0;
125 clearTemp();
126 }
127
128 /**
129 * Destructor.
130 */
flush_variant(bcf_hdr_t * hdr)131 bcf1_t* SNPGenotypingRecord::flush_variant(bcf_hdr_t* hdr)
132 {
133 bcf1_t *nv = bcf_init();
134 bcf_clear(nv);
135 bcf_set_n_sample(nv, nsamples);
136
137 bcf_set_rid(nv, rid);
138 bcf_set_pos1(nv, pos1);
139 bcf_update_alleles_str(hdr, nv, alleles.s);
140
141 int32_t* gt = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
142 int32_t* pl = (int32_t*) calloc ( nsamples * 3, sizeof(int32_t) );
143 int32_t* ad = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
144 int32_t* td = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
145 int32_t* gq = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
146 float MLE_HWE_AF[2];
147 float MLE_HWE_GF[3];
148 double gp, gp_sum, max_gp;
149 int32_t best_gt;
150 int32_t best_a1, best_a2;
151 int32_t* pls_i;
152 int32_t an = 0;
153 int32_t acs[2] = {0,0};
154 int32_t gcs[3] = {0,0,0};
155 float afs[3];
156 int32_t max_gq = 0;
157 int64_t dp_sum = 0;
158 int32_t ploidy = 2;
159
160 std::copy( pls, pls + (nsamples*3), pl );
161 for(int32_t i=0; i < nsamples; ++i)
162 {
163 dp_sum += ( td[i] = ( (ad[2*i] = ads[3*i]) + (ad[2*i+1] = ads[3*i+1]) + ads[3*i+2] ) );
164 }
165
166 int32_t n = 0;
167 int32_t adSumHet[2] = {0,0};
168 Estimator::compute_gl_af_hwe(pl, nsamples, ploidy, 2, MLE_HWE_AF, MLE_HWE_GF, n, 1e-20);
169
170 for(int32_t i=0; i < nsamples; ++i)
171 {
172 int32_t* pli = &pl[ i * 3 ];
173 max_gp = gp_sum = gp = ( LogTool::pl2prob(pli[0]) * MLE_HWE_AF[0] * MLE_HWE_AF[0] );
174 best_gt = 0; best_a1 = 0; best_a2 = 0;
175 for(size_t l=1; l < 2; ++l)
176 {
177 for(size_t m=0; m <= l; ++m)
178 {
179 gp = ( LogTool::pl2prob(pli[ l*(l+1)/2 + m]) * MLE_HWE_AF[l] * MLE_HWE_AF[m] * (l == m ? 1 : 2) );
180 gp_sum += gp;
181 if ( max_gp < gp )
182 {
183 max_gp = gp;
184 best_gt = l*(l+1)/2 + m;
185 best_a1 = m;
186 best_a2 = l;
187 }
188 }
189 }
190
191 if ( best_gt == 1 )
192 {
193 adSumHet[0] += ad[2*i];
194 adSumHet[1] += ad[2*i+1];
195 }
196
197 double prob = 1.-max_gp/gp_sum; // to calculate GQ
198 if ( prob <= 3.162278e-26 )
199 {
200 prob = 3.162278e-26;
201 }
202
203 if ( prob > 1 )
204 {
205 prob = 1;
206 }
207
208 gq[i] = (int32_t)LogTool::prob2pl(prob);
209
210 if ( ( best_gt > 0 ) && ( max_gq < gq[i] ) )
211 {
212 max_gq = gq[i];
213 }
214
215 gt[2*i] = ((best_a1 + 1) << 1);
216 gt[2*i+1] = ((best_a2 + 1) << 1);
217 an += 2; // still use diploid representation of chrX for now.
218 ++acs[best_a1];
219 ++acs[best_a2];
220 ++gcs[best_gt];
221 }
222
223 for(size_t i=0; i < 2; ++i)
224 {
225 afs[i] = acs[i]/(float)an;
226 }
227
228 bcf_update_format_int32(hdr, nv, "GT", gt, nsamples * 2);
229 bcf_update_format_int32(hdr, nv, "GQ", gq, nsamples );
230 bcf_update_format_int32(hdr, nv, "AD", ad, nsamples * 2);
231 bcf_update_format_int32(hdr, nv, "DP", td, nsamples );
232 bcf_update_format_int32(hdr, nv, "PL", pl, nsamples * 3);
233
234 float avgdp = (float)dp_sum / (float)nsamples;
235
236 nv->qual = (float) max_gq;
237 bcf_update_info_float(hdr, nv, "AVGDP", &avgdp, 1);
238 bcf_update_info_int32(hdr, nv, "AC", &acs[1], 1);
239 bcf_update_info_int32(hdr, nv, "AN", &an, 1);
240 bcf_update_info_float(hdr, nv, "AF", &afs[1], 1);
241 bcf_update_info_int32(hdr, nv, "GC", gcs, 3);
242 bcf_update_info_int32(hdr, nv, "GN", &nsamples, 1);
243
244 if (n)
245 {
246 float* MLE_HWE_AF_PTR = &MLE_HWE_AF[1];
247 bcf_update_info_float(hdr, nv, "HWEAF", MLE_HWE_AF_PTR, 1);
248 }
249
250 // calculate the allele frequencies under HWD
251 float MLE_AF[2];
252 float MLE_GF[3];
253 n = 0;
254 Estimator::compute_gl_af(pl, nsamples, ploidy, 2, MLE_AF, MLE_GF, n, 1e-20);
255 if (n)
256 {
257 //float* MLE_AF_PTR = &MLE_AF[1];
258 //bcf_update_info_float(odw->hdr, nv, "HWDAF", MLE_AF_PTR, n_alleles-1);
259 bcf_update_info_float(hdr, nv, "HWDGF", &MLE_GF, 3);
260 }
261
262 float fic = 0;
263 n = 0;
264 Estimator::compute_gl_fic(pl, nsamples, ploidy, MLE_HWE_AF, 2, MLE_GF, fic, n);
265 if ( std::isnan((double)fic) ) fic = 0;
266 if (n)
267 {
268 bcf_update_info_float(hdr, nv, "IBC", &fic, 1);
269 }
270
271 // calculate the LRT statistics related to HWE
272 float lrts;
273 float logp;
274 int32_t df;
275 n = 0;
276 Estimator::compute_hwe_lrt(pl, nsamples, ploidy, 2, MLE_HWE_GF, MLE_GF, n, lrts, logp, df);
277 if (n)
278 {
279 if ( fic < 0 ) logp = 0-logp;
280 {
281 bcf_update_info_float(hdr, nv, "HWE_SLP", &logp, 1);
282 }
283 }
284
285 float abe = (adSumHet[0] + 0.5)/(adSumHet[0] + adSumHet[1] + 1.0);
286 bcf_update_info_float(hdr, nv, "ABE", &abe, 1);
287
288 // update filter
289 if ( n_filter & FILTER_MASK_OVERLAP_SNP )
290 bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_snp"));
291 if ( n_filter & FILTER_MASK_OVERLAP_INDEL )
292 bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_indel"));
293 if ( n_filter & FILTER_MASK_OVERLAP_VNTR )
294 bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_vntr"));
295
296 //bcf_update_info_int32(odw->hdr, nv, "NS_NREF", &v_ns_nrefs[k], 1);
297 abe_num /= (abe_den+1e-6); bcf_update_info_float(hdr, nv, "ABE", &abe_num, 1);
298 abz_num /= sqrt(abz_den+1e-6); bcf_update_info_float(hdr, nv, "ABZ", &abz_num, 1);
299 bqr_num /= sqrt(bqr_den+1e-6); bcf_update_info_float(hdr, nv, "BQZ", &bqr_num, 1);
300 mqr_num /= sqrt(mqr_den+1e-6); bcf_update_info_float(hdr, nv, "MQZ", &mqr_num, 1);
301 cyr_num /= sqrt(cyr_den+1e-6); bcf_update_info_float(hdr, nv, "CYZ", &cyr_num, 1);
302 str_num /= sqrt(str_den+1e-6); bcf_update_info_float(hdr, nv, "STZ", &str_num, 1);
303 nmr_num /= sqrt(nmr_den+1e-6); bcf_update_info_float(hdr, nv, "NMZ", &nmr_num, 1);
304 ior_num = log(ior_num/ior_den+1e-6)/log(10.); bcf_update_info_float(hdr, nv, "IOR", &ior_num, 1);
305 nm1_num /= (nm1_den+1e-6); bcf_update_info_float(hdr, nv, "NM1", &nm1_num, 1);
306 nm0_num /= (nm0_den+1e-6); bcf_update_info_float(hdr, nv, "NM0", &nm0_num, 1);
307
308 //odw->write(nv);
309 //bcf_destroy(nv);
310 free(gt);
311 free(gq);
312 free(pl);
313 free(ad);
314 free(td);
315
316 free(pls); pls = NULL;
317 free(ads); ads = NULL;
318
319 free(alleles.s);
320
321 return nv;
322 }
323
flush_sample(int32_t sampleIndex)324 void SNPGenotypingRecord::flush_sample(int32_t sampleIndex)
325 {
326 uint8_t* p_pls = &pls[sampleIndex*3];
327 uint8_t* p_ads = &ads[sampleIndex*3];
328
329 int32_t imax = ( tmp_pls[0] > tmp_pls[1] ) ? ( tmp_pls[0] > tmp_pls[2] ? 0 : 2 ) : ( tmp_pls[1] > tmp_pls[2] ? 1 : 2);
330 for(int32_t i=0; i < 3; ++i)
331 {
332 uint32_t l = LogTool::prob2pl(tmp_pls[i]/tmp_pls[imax]);
333 p_pls[i] = ((l > 255) ? 255 : l);
334 p_ads[i] = ((tmp_ads[i] > 255) ? 255 : (uint8_t)tmp_ads[i]);
335 }
336
337 float sqrt_dp_ra = sqrt((float)tmp_dp_ra);
338 float ior = (float)(tmp_oth_obs_q20 / (tmp_oth_exp_q20 + 1e-6));
339 float nm1 = tmp_al_s1 == 0 ? 0 : tmp_nm_al / (float)tmp_al_s1;
340 float nm0 = (tmp_dp_ra - tmp_al_s1) == 0 ? 0 : (tmp_nm_s1-tmp_nm_al) / (float)(tmp_dp_ra - tmp_al_s1);
341 float w_dp_ra = log(tmp_dp_ra+1.); //sqrt(dp_ra);
342 float w_dp_q20 = log(tmp_dp_q20+1.); //sqrt(dp_q20);
343 float w_al_s1 = log(tmp_al_s1+1.); //sqrt(al_s1);
344 float w_ref_s1 = log(tmp_dp_ra - tmp_al_s1+1.);
345
346 if ( p_pls[1] == 0 )
347 { // het genotypes
348 abe_num += (w_dp_ra * (tmp_dp_ra - tmp_al_s1 + 0.05) / (double)(tmp_dp_ra + 0.1));
349 abe_den += w_dp_ra;
350
351 // E(r) = 0.5(r+a) V(r) = 0.25(r+a)
352 abz_num += w_dp_ra * (tmp_dp_ra - tmp_al_s1 - tmp_dp_ra*0.5)/sqrt(0.25 * tmp_dp_ra + 1e-3);
353 abz_den += (w_dp_ra * w_dp_ra);
354
355 float bqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_bq_al, tmp_bq_s1, tmp_bq_s2, tmp_al_s1, tmp_al_s1, .1 );
356 float mqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_mq_al, tmp_mq_s1, tmp_mq_s2, tmp_al_s1, tmp_al_s1, .1 );
357 float cyr = sqrt_dp_ra * Estimator::compute_correlation_f( tmp_dp_ra, tmp_cy_al, tmp_cy_s1, tmp_cy_s2, (float)tmp_al_s1, (float)tmp_al_s1, .1 );
358 float str = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_st_al, tmp_st_s1, tmp_st_s1, tmp_al_s1, tmp_al_s1, .1 );
359 float nmr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_nm_al, tmp_nm_s1, tmp_nm_s2, tmp_al_s1, tmp_al_s1, .1 );
360
361 // Use Stouffer's method to combine the z-scores, but weighted by log of sample size
362 bqr_num += (bqr * w_dp_ra); bqr_den += (w_dp_ra * w_dp_ra);
363 mqr_num += (mqr * w_dp_ra); mqr_den += (w_dp_ra * w_dp_ra);
364 cyr_num += (cyr * w_dp_ra); cyr_den += (w_dp_ra * w_dp_ra);
365 str_num += (str * w_dp_ra); str_den += (w_dp_ra * w_dp_ra);
366 nmr_num += (nmr * w_dp_ra); nmr_den += (w_dp_ra * w_dp_ra);
367 }
368
369 ior_num += (ior * w_dp_q20); ior_den += w_dp_q20;
370 nm1_num += (nm1 * w_al_s1); nm1_den += w_al_s1;
371 nm0_num += (nm0 * w_ref_s1); nm0_den += w_ref_s1;
372
373 clearTemp();
374 }
375
376 /**
377 * Adds an allele based with collected sufficient statistics.
378 */
add_allele(double contam,int32_t allele,uint8_t mapq,bool fwd,uint32_t q,int32_t cycle,uint32_t nm)379 void SNPGenotypingRecord::add_allele(double contam, int32_t allele, uint8_t mapq, bool fwd, uint32_t q, int32_t cycle, uint32_t nm)
380 {
381 double pe = LogTool::pl2prob(q);
382 double pm = 1 - pe;
383
384 if (q>40)
385 {
386 q = 40;
387 }
388
389 if ( allele == 0 )
390 {
391 ++tmp_ads[0];
392 tmp_pls[0] *= ( pm * (1-contam) + pe * contam / 3 );
393 tmp_pls[1] *= ( pm / 2 + pe / 6 );
394 tmp_pls[2] *= ( pm * contam + pe * (1-contam) / 3 );
395 }
396 else if ( allele > 0 ) // currently, bi-allelic only
397 {
398 ++tmp_ads[1];
399 tmp_pls[0] *= ( pm * contam + pe * (1-contam) / 3 );
400 tmp_pls[1] *= ( pm / 2 + pe / 6 );
401 tmp_pls[2] *= ( pm * (1-contam) + pe * contam / 3 );
402 }
403 else
404 {
405 ++tmp_ads[2];
406 }
407 double sump = tmp_pls[0] + tmp_pls[1] + tmp_pls[2] + 1e-300;
408 tmp_pls[0] /= sump;
409 tmp_pls[1] /= sump;
410 tmp_pls[2] /= sump;
411
412 if ( allele >= 0 )
413 {
414 if ( q > 20 )
415 {
416 tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
417 ++tmp_dp_q20;
418 }
419
420 float log_td = (cycle > 0) ? 0-logf((float)cycle) : 0;
421
422 ++tmp_dp_ra;
423 tmp_bq_s1 += q;
424 tmp_bq_s2 += (q*q);
425 tmp_mq_s1 += mapq;
426 tmp_mq_s2 += (mapq * mapq);
427 tmp_cy_s1 += log_td;
428 tmp_cy_s2 += (log_td * log_td);
429 tmp_st_s1 += fwd;
430
431 if ( allele > 0 )
432 {
433 ++tmp_al_s1;
434 tmp_bq_al += q;
435 tmp_mq_al += mapq;
436 tmp_cy_al += log_td;
437 tmp_st_al += fwd;
438 tmp_nm_al += (nm-1);
439 tmp_nm_s1 += (nm-1);
440 tmp_nm_s2 += (nm-1)*(nm-1);
441 }
442 else
443 {
444 tmp_nm_s1 += nm;
445 tmp_nm_s2 += (nm * nm);
446 }
447 }
448 else
449 {
450 if (q>20)
451 {
452 tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
453 ++tmp_oth_obs_q20;
454 ++tmp_dp_q20;
455 }
456 }
457 }
458
459 /**
460 * Collects sufficient statistics from read for variants to be genotyped.
461 */
process_read(AugmentedBAMRecord & as,int32_t sampleIndex,double contam)462 void SNPGenotypingRecord::process_read(AugmentedBAMRecord& as, int32_t sampleIndex, double contam)
463 {
464 if (v_alleles.size()==2)
465 {
466 bam1_t *s = as.s;
467
468 char strand = bam_is_rev(s) ? 'R' : 'F';
469 int32_t allele = 0;
470 //uint32_t bpos1 = bam_get_pos1(s);
471 uint8_t* seq = bam_get_seq(s);
472 uint8_t* qual = bam_get_qual(s);
473 int32_t rlen = bam_get_l_qseq(s);
474 uint8_t mapq = bam_get_mapq(s);
475 uint32_t q = 30;
476 int32_t cycle = 0;
477
478 std::vector<uint32_t>& aug_cigar = as.aug_cigar;
479 std::vector<std::string>& aug_ref = as.aug_ref;
480 std::vector<std::string>& aug_alt = as.aug_alt;
481
482 int32_t vpos1 = pos1;
483 int32_t cpos1 = bam_get_pos1(s);
484 int32_t rpos0 = 0;
485
486 for (uint32_t i=0; i<aug_cigar.size(); ++i)
487 {
488 uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
489 char opchr = bam_cigar_opchr(aug_cigar[i]);
490
491 if (opchr=='S')
492 {
493 rpos0 += oplen;
494 }
495 else if (opchr=='=')
496 {
497 if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
498 {
499 rpos0 += vpos1-cpos1;
500 allele = 0;
501 q = qual[rpos0];
502 cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
503 break;
504 }
505 else
506 {
507 cpos1 += oplen;
508 rpos0 += oplen;
509 }
510 }
511 else if (opchr=='X')
512 {
513 if (vpos1==cpos1)
514 {
515 allele = (aug_alt[i].at(0) == v_alleles[1].at(0)) ? 1 : -1;
516 q = qual[rpos0];
517 cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
518 break;
519 }
520
521 ++cpos1;
522 ++rpos0;
523 }
524 else if (opchr=='I')
525 {
526 rpos0 += oplen;
527 }
528 else if (opchr=='D')
529 {
530 cpos1 += oplen;
531 }
532 else
533 {
534 notice("unrecognized cigar state %d", opchr);
535 }
536 }
537
538 uint32_t no_mismatches = as.no_mismatches;
539 if (allele!=0 && q<20)
540 {
541 ++no_mismatches;
542 }
543
544 if (allele!=0 && no_mismatches==0)
545 {
546 //no_mismatches = 1;
547 std::cerr << "something wrong\n";
548 }
549
550 add_allele( contam, allele, mapq, strand == 'F', q, cycle, no_mismatches );
551 }
552 else if (v_alleles.size()>2) //multiallelic
553 {
554 bam1_t *s = as.s;
555
556 char strand = bam_is_rev(s) ? 'R' : 'F';
557 int32_t allele = 0;
558 //uint32_t bpos1 = bam_get_pos1(s);
559 uint8_t* seq = bam_get_seq(s);
560 uint8_t* qual = bam_get_qual(s);
561 int32_t rlen = bam_get_l_qseq(s);
562 uint8_t mapq = bam_get_mapq(s);
563 uint32_t q = 30;
564 int32_t cycle = 0;
565
566 std::vector<uint32_t>& aug_cigar = as.aug_cigar;
567 std::vector<std::string>& aug_ref = as.aug_ref;
568 std::vector<std::string>& aug_alt = as.aug_alt;
569
570 int32_t vpos1 = pos1;
571 int32_t cpos1 = bam_get_pos1(s);
572 int32_t rpos0 = 0;
573
574 for (uint32_t i=0; i<aug_cigar.size(); ++i)
575 {
576 uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
577 char opchr = bam_cigar_opchr(aug_cigar[i]);
578
579 if (opchr=='S')
580 {
581 rpos0 += oplen;
582 }
583 else if (opchr=='=')
584 {
585 if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
586 {
587 rpos0 += vpos1-cpos1;
588 allele = 0;
589 q = qual[rpos0];
590 cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
591 break;
592 }
593 else
594 {
595 cpos1 += oplen;
596 rpos0 += oplen;
597 }
598 }
599 else if (opchr=='X')
600 {
601 if (vpos1==cpos1)
602 {
603 allele = (aug_alt[i].at(0) == v_alleles[1].at(0)) ? 1 : -1;
604 q = qual[rpos0];
605 cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
606 break;
607 }
608
609 ++cpos1;
610 ++rpos0;
611 }
612 else if (opchr=='I')
613 {
614 rpos0 += oplen;
615 }
616 else if (opchr=='D')
617 {
618 cpos1 += oplen;
619 }
620 else
621 {
622 notice("unrecognized cigar state %d", opchr);
623 }
624 }
625
626 uint32_t no_mismatches = as.no_mismatches;
627 if (allele!=0 && q<20)
628 {
629 ++no_mismatches;
630 }
631
632 if (allele!=0 && no_mismatches==0)
633 {
634 //no_mismatches = 1;
635 std::cerr << "something wrong\n";
636 }
637
638 add_allele( contam, allele, mapq, strand == 'F', q, cycle, no_mismatches );
639
640 //abort();
641 }
642 }
643