1 /* The MIT License
2 
3    Copyright (c) 2016 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "snp_genotyping_record.h"
25 
26 /**
27  * Constructor.
28  * @v - VCF record.
29  */
SNPGenotypingRecord()30 SNPGenotypingRecord::SNPGenotypingRecord() {};
31 
32 /**
33  * Constructor.
34  * @v - VCF record.
35  */
SNPGenotypingRecord(bcf_hdr_t * h,bcf1_t * v,int32_t nsamples,int32_t ploidy,Estimator * est)36 SNPGenotypingRecord::SNPGenotypingRecord(bcf_hdr_t *h, bcf1_t *v, int32_t nsamples, int32_t ploidy, Estimator* est)
37 {
38     clear();
39 
40     this->h = h;
41     //this->v = v;
42     this->rid = bcf_get_rid(v);
43     this->pos1 = bcf_get_pos1(v);
44     this->nsamples = nsamples;
45 
46     this->alleles = {0,0,0};
47     char** tmp_alleles = bcf_get_allele(v);
48     for (size_t i=0; i< bcf_get_n_allele(v); ++i) {
49       if (i) kputc(',', &this->alleles);
50       kputs(tmp_alleles[i], &this->alleles);
51       v_alleles.push_back(tmp_alleles[i]);
52     }
53 
54     n_filter = 0;
55 
56     if (vtype==VT_SNP && v_alleles.size()==2)
57     {
58         //rid = bcf_get_rid(v);
59         beg1 = bcf_get_pos1(v);
60         end1 = beg1;
61 
62         if (bcf_has_filter(h, v, const_cast<char*>("overlap_snp"))==1)
63             n_filter |= FILTER_MASK_OVERLAP_SNP;
64 
65         if (bcf_has_filter(h, v, const_cast<char*>("overlap_indel"))==1)
66             n_filter |= FILTER_MASK_OVERLAP_INDEL;
67 
68         if (bcf_has_filter(h, v, const_cast<char*>("overlap_vntr"))==1)
69             n_filter |= FILTER_MASK_OVERLAP_VNTR;
70     }
71 
72     pls = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
73     ads = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
74 }
75 
76 /**
77  * Destructor.
78  */
~SNPGenotypingRecord()79 SNPGenotypingRecord::~SNPGenotypingRecord()
80 {
81   //if (v) bcf_destroy(v);
82     if ( pls ) free(pls);
83     if ( ads ) free(ads);
84     if ( alleles.l > 0 ) free(alleles.s);
85 }
86 
87 /**
88  * Clears this record.
89  */
clearTemp()90 void SNPGenotypingRecord::clearTemp()
91 {
92     tmp_dp_q20 = 0;
93     tmp_dp_ra = 0;
94     tmp_bq_s1 = tmp_bq_s2 = 0;
95     tmp_mq_s1 = tmp_mq_s2 = 0;
96     tmp_cy_s1 = tmp_cy_s2 = 0;
97     tmp_st_s1 = tmp_st_s2 = 0;
98     tmp_al_s1 = tmp_bq_al = tmp_mq_al = tmp_cy_al = tmp_st_al = tmp_nm_al = 0;
99     tmp_nm_s1 = tmp_nm_s2 = 0;
100     tmp_oth_exp_q20 = tmp_oth_obs_q20 = 0;
101     tmp_pls[0] = tmp_pls[1] = tmp_pls[2] = 1.;
102     tmp_ads[0] = tmp_ads[1] = tmp_ads[2] = 0;
103 }
104 
105 /**
106  * Clear fields.
107  */
clear()108 void SNPGenotypingRecord::clear()
109 {
110     vtype = -1;
111 
112     pls = ads = NULL;
113 
114     bqr_num = bqr_den = 0;
115     mqr_num = mqr_den = 0;
116     cyr_num = cyr_den = 0;
117     str_num = str_den = 0;
118     nmr_num = nmr_den = 0;
119     ior_num = ior_den = 0;
120     nm0_num = nm0_den = 0;
121     nm1_num = nm1_den = 0;
122     abe_num = abe_den = 0;
123     abz_num = abz_den = 0;
124     ns_nref = dp_sum = max_gq = 0;
125     clearTemp();
126 }
127 
128 /**
129  * Destructor.
130  */
flush_variant(bcf_hdr_t * hdr)131 bcf1_t* SNPGenotypingRecord::flush_variant(bcf_hdr_t* hdr)
132 {
133     bcf1_t *nv = bcf_init();
134     bcf_clear(nv);
135     bcf_set_n_sample(nv, nsamples);
136 
137     bcf_set_rid(nv, rid);
138     bcf_set_pos1(nv, pos1);
139     bcf_update_alleles_str(hdr, nv, alleles.s);
140 
141     int32_t* gt = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
142     int32_t* pl = (int32_t*) calloc ( nsamples * 3, sizeof(int32_t) );
143     int32_t* ad = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
144     int32_t* td = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
145     int32_t* gq = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
146     float MLE_HWE_AF[2];
147     float MLE_HWE_GF[3];
148     double gp, gp_sum, max_gp;
149     int32_t best_gt;
150     int32_t best_a1, best_a2;
151     int32_t* pls_i;
152     int32_t an = 0;
153     int32_t acs[2] = {0,0};
154     int32_t gcs[3] = {0,0,0};
155     float afs[3];
156     int32_t max_gq = 0;
157     int64_t dp_sum = 0;
158     int32_t ploidy = 2;
159 
160     std::copy( pls, pls + (nsamples*3), pl );
161     for(int32_t i=0; i < nsamples; ++i)
162     {
163         dp_sum += ( td[i] = ( (ad[2*i] = ads[3*i]) + (ad[2*i+1] = ads[3*i+1]) + ads[3*i+2] ) );
164     }
165 
166     int32_t n = 0;
167     int32_t adSumHet[2] = {0,0};
168     Estimator::compute_gl_af_hwe(pl, nsamples, ploidy, 2, MLE_HWE_AF, MLE_HWE_GF, n, 1e-20);
169 
170     for(int32_t i=0; i < nsamples; ++i)
171     {
172         int32_t* pli = &pl[ i * 3 ];
173         max_gp = gp_sum = gp = ( LogTool::pl2prob(pli[0]) * MLE_HWE_AF[0] * MLE_HWE_AF[0] );
174         best_gt = 0; best_a1 = 0; best_a2 = 0;
175         for(size_t l=1; l < 2; ++l)
176         {
177             for(size_t m=0; m <= l; ++m)
178             {
179                 gp = ( LogTool::pl2prob(pli[ l*(l+1)/2 + m]) * MLE_HWE_AF[l] * MLE_HWE_AF[m] * (l == m ? 1 : 2) );
180                 gp_sum += gp;
181                 if ( max_gp < gp )
182                 {
183                     max_gp = gp;
184                     best_gt = l*(l+1)/2 + m;
185                     best_a1 = m;
186                     best_a2 = l;
187                 }
188             }
189         }
190 
191         if ( best_gt == 1 )
192         {
193             adSumHet[0] += ad[2*i];
194             adSumHet[1] += ad[2*i+1];
195         }
196 
197         double prob = 1.-max_gp/gp_sum;  // to calculate GQ
198         if ( prob <= 3.162278e-26 )
199         {
200             prob = 3.162278e-26;
201         }
202 
203         if ( prob > 1 )
204         {
205             prob = 1;
206         }
207 
208         gq[i] = (int32_t)LogTool::prob2pl(prob);
209 
210         if ( ( best_gt > 0 ) && ( max_gq < gq[i] ) )
211         {
212             max_gq = gq[i];
213         }
214 
215         gt[2*i]   = ((best_a1 + 1) << 1);
216         gt[2*i+1] = ((best_a2 + 1) << 1);
217         an += 2;             // still use diploid representation of chrX for now.
218         ++acs[best_a1];
219         ++acs[best_a2];
220         ++gcs[best_gt];
221     }
222 
223     for(size_t i=0; i < 2; ++i)
224     {
225         afs[i] = acs[i]/(float)an;
226     }
227 
228     bcf_update_format_int32(hdr, nv, "GT", gt, nsamples * 2);
229     bcf_update_format_int32(hdr, nv, "GQ", gq, nsamples );
230     bcf_update_format_int32(hdr, nv, "AD", ad, nsamples * 2);
231     bcf_update_format_int32(hdr, nv, "DP", td, nsamples );
232     bcf_update_format_int32(hdr, nv, "PL", pl, nsamples * 3);
233 
234     float avgdp = (float)dp_sum / (float)nsamples;
235 
236     nv->qual = (float) max_gq;
237     bcf_update_info_float(hdr, nv, "AVGDP", &avgdp, 1);
238     bcf_update_info_int32(hdr, nv, "AC", &acs[1], 1);
239     bcf_update_info_int32(hdr, nv, "AN", &an, 1);
240     bcf_update_info_float(hdr, nv, "AF", &afs[1], 1);
241     bcf_update_info_int32(hdr, nv, "GC", gcs, 3);
242     bcf_update_info_int32(hdr, nv, "GN", &nsamples, 1);
243 
244     if (n)
245     {
246         float* MLE_HWE_AF_PTR = &MLE_HWE_AF[1];
247         bcf_update_info_float(hdr, nv, "HWEAF", MLE_HWE_AF_PTR, 1);
248     }
249 
250     // calculate the allele frequencies under HWD
251     float MLE_AF[2];
252     float MLE_GF[3];
253     n = 0;
254     Estimator::compute_gl_af(pl, nsamples, ploidy, 2, MLE_AF, MLE_GF,  n, 1e-20);
255     if (n)
256     {
257         //float* MLE_AF_PTR = &MLE_AF[1];
258         //bcf_update_info_float(odw->hdr, nv, "HWDAF", MLE_AF_PTR, n_alleles-1);
259         bcf_update_info_float(hdr, nv, "HWDGF", &MLE_GF, 3);
260     }
261 
262     float fic = 0;
263     n = 0;
264     Estimator::compute_gl_fic(pl, nsamples, ploidy, MLE_HWE_AF, 2, MLE_GF, fic, n);
265     if ( std::isnan((double)fic) ) fic = 0;
266     if (n)
267     {
268         bcf_update_info_float(hdr, nv, "IBC", &fic, 1);
269     }
270 
271     // calculate the LRT statistics related to HWE
272     float lrts;
273     float logp;
274     int32_t df;
275     n = 0;
276     Estimator::compute_hwe_lrt(pl, nsamples, ploidy, 2, MLE_HWE_GF, MLE_GF, n, lrts, logp, df);
277     if (n)
278     {
279         if ( fic < 0 ) logp = 0-logp;
280         {
281             bcf_update_info_float(hdr, nv, "HWE_SLP", &logp, 1);
282         }
283     }
284 
285     float abe = (adSumHet[0] + 0.5)/(adSumHet[0] + adSumHet[1] + 1.0);
286     bcf_update_info_float(hdr, nv, "ABE", &abe, 1);
287 
288     // update filter
289     if ( n_filter & FILTER_MASK_OVERLAP_SNP )
290         bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_snp"));
291     if ( n_filter & FILTER_MASK_OVERLAP_INDEL )
292         bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_indel"));
293     if ( n_filter & FILTER_MASK_OVERLAP_VNTR )
294         bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_vntr"));
295 
296     //bcf_update_info_int32(odw->hdr, nv, "NS_NREF", &v_ns_nrefs[k], 1);
297     abe_num /= (abe_den+1e-6); bcf_update_info_float(hdr, nv, "ABE",  &abe_num, 1);
298     abz_num /= sqrt(abz_den+1e-6); bcf_update_info_float(hdr, nv, "ABZ",  &abz_num, 1);
299     bqr_num /= sqrt(bqr_den+1e-6); bcf_update_info_float(hdr, nv, "BQZ", &bqr_num, 1);
300     mqr_num /= sqrt(mqr_den+1e-6); bcf_update_info_float(hdr, nv, "MQZ", &mqr_num, 1);
301     cyr_num /= sqrt(cyr_den+1e-6); bcf_update_info_float(hdr, nv, "CYZ", &cyr_num, 1);
302     str_num /= sqrt(str_den+1e-6); bcf_update_info_float(hdr, nv, "STZ", &str_num, 1);
303     nmr_num /= sqrt(nmr_den+1e-6); bcf_update_info_float(hdr, nv, "NMZ", &nmr_num, 1);
304     ior_num = log(ior_num/ior_den+1e-6)/log(10.); bcf_update_info_float(hdr, nv, "IOR", &ior_num, 1);
305     nm1_num /= (nm1_den+1e-6); bcf_update_info_float(hdr, nv, "NM1", &nm1_num, 1);
306     nm0_num /= (nm0_den+1e-6); bcf_update_info_float(hdr, nv, "NM0", &nm0_num, 1);
307 
308     //odw->write(nv);
309     //bcf_destroy(nv);
310     free(gt);
311     free(gq);
312     free(pl);
313     free(ad);
314     free(td);
315 
316     free(pls); pls = NULL;
317     free(ads); ads = NULL;
318 
319     free(alleles.s);
320 
321     return nv;
322 }
323 
flush_sample(int32_t sampleIndex)324 void SNPGenotypingRecord::flush_sample(int32_t sampleIndex)
325 {
326     uint8_t* p_pls = &pls[sampleIndex*3];
327     uint8_t* p_ads = &ads[sampleIndex*3];
328 
329     int32_t imax = ( tmp_pls[0] > tmp_pls[1] ) ? ( tmp_pls[0] > tmp_pls[2] ? 0 : 2 ) : ( tmp_pls[1] > tmp_pls[2] ? 1 : 2);
330     for(int32_t i=0; i < 3; ++i)
331     {
332         uint32_t l = LogTool::prob2pl(tmp_pls[i]/tmp_pls[imax]);
333         p_pls[i] = ((l > 255) ? 255 : l);
334         p_ads[i] = ((tmp_ads[i] > 255) ? 255 : (uint8_t)tmp_ads[i]);
335     }
336 
337     float sqrt_dp_ra = sqrt((float)tmp_dp_ra);
338     float ior = (float)(tmp_oth_obs_q20 / (tmp_oth_exp_q20 + 1e-6));
339     float nm1 = tmp_al_s1 == 0 ? 0 : tmp_nm_al / (float)tmp_al_s1;
340     float nm0 = (tmp_dp_ra - tmp_al_s1) == 0 ? 0 : (tmp_nm_s1-tmp_nm_al) / (float)(tmp_dp_ra - tmp_al_s1);
341     float w_dp_ra  = log(tmp_dp_ra+1.); //sqrt(dp_ra);
342     float w_dp_q20 = log(tmp_dp_q20+1.); //sqrt(dp_q20);
343     float w_al_s1  = log(tmp_al_s1+1.); //sqrt(al_s1);
344     float w_ref_s1 = log(tmp_dp_ra - tmp_al_s1+1.);
345 
346     if ( p_pls[1] == 0 )
347     { // het genotypes
348         abe_num += (w_dp_ra * (tmp_dp_ra - tmp_al_s1 + 0.05) / (double)(tmp_dp_ra + 0.1));
349         abe_den += w_dp_ra;
350 
351         // E(r) = 0.5(r+a) V(r) = 0.25(r+a)
352         abz_num += w_dp_ra * (tmp_dp_ra - tmp_al_s1 - tmp_dp_ra*0.5)/sqrt(0.25 * tmp_dp_ra + 1e-3);
353         abz_den += (w_dp_ra * w_dp_ra);
354 
355         float bqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_bq_al, tmp_bq_s1, tmp_bq_s2, tmp_al_s1, tmp_al_s1, .1 );
356         float mqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_mq_al, tmp_mq_s1, tmp_mq_s2, tmp_al_s1, tmp_al_s1, .1 );
357         float cyr = sqrt_dp_ra * Estimator::compute_correlation_f( tmp_dp_ra, tmp_cy_al, tmp_cy_s1, tmp_cy_s2, (float)tmp_al_s1, (float)tmp_al_s1, .1 );
358         float str = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_st_al, tmp_st_s1, tmp_st_s1, tmp_al_s1, tmp_al_s1, .1 );
359         float nmr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_nm_al, tmp_nm_s1, tmp_nm_s2, tmp_al_s1, tmp_al_s1, .1 );
360 
361         // Use Stouffer's method to combine the z-scores, but weighted by log of sample size
362         bqr_num += (bqr * w_dp_ra); bqr_den += (w_dp_ra * w_dp_ra);
363         mqr_num += (mqr * w_dp_ra); mqr_den += (w_dp_ra * w_dp_ra);
364         cyr_num += (cyr * w_dp_ra); cyr_den += (w_dp_ra * w_dp_ra);
365         str_num += (str * w_dp_ra); str_den += (w_dp_ra * w_dp_ra);
366         nmr_num += (nmr * w_dp_ra); nmr_den += (w_dp_ra * w_dp_ra);
367     }
368 
369     ior_num += (ior * w_dp_q20); ior_den += w_dp_q20;
370     nm1_num += (nm1 * w_al_s1);  nm1_den += w_al_s1;
371     nm0_num += (nm0 * w_ref_s1); nm0_den += w_ref_s1;
372 
373     clearTemp();
374 }
375 
376 /**
377  * Adds an allele based with collected sufficient statistics.
378  */
add_allele(double contam,int32_t allele,uint8_t mapq,bool fwd,uint32_t q,int32_t cycle,uint32_t nm)379 void SNPGenotypingRecord::add_allele(double contam, int32_t allele, uint8_t mapq, bool fwd, uint32_t q, int32_t cycle, uint32_t nm)
380 {
381     double pe = LogTool::pl2prob(q);
382     double pm = 1 - pe;
383 
384     if (q>40)
385     {
386         q = 40;
387     }
388 
389     if ( allele == 0 )
390     {
391         ++tmp_ads[0];
392         tmp_pls[0] *= ( pm * (1-contam) + pe * contam / 3 );
393         tmp_pls[1] *= ( pm / 2 + pe / 6 );
394         tmp_pls[2] *= ( pm * contam + pe * (1-contam) / 3 );
395     }
396     else if ( allele > 0 ) // currently, bi-allelic only
397     {
398         ++tmp_ads[1];
399         tmp_pls[0] *= ( pm * contam + pe * (1-contam) / 3 );
400         tmp_pls[1] *= ( pm / 2 + pe / 6 );
401         tmp_pls[2] *= ( pm * (1-contam) + pe * contam / 3 );
402     }
403     else
404     {
405         ++tmp_ads[2];
406     }
407     double sump = tmp_pls[0] + tmp_pls[1] + tmp_pls[2] + 1e-300;
408     tmp_pls[0] /= sump;
409     tmp_pls[1] /= sump;
410     tmp_pls[2] /= sump;
411 
412     if ( allele >= 0 )
413     {
414         if ( q > 20 )
415         {
416           tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
417           ++tmp_dp_q20;
418         }
419 
420         float log_td = (cycle > 0) ? 0-logf((float)cycle) : 0;
421 
422         ++tmp_dp_ra;
423         tmp_bq_s1 += q;
424         tmp_bq_s2 += (q*q);
425         tmp_mq_s1 += mapq;
426         tmp_mq_s2 += (mapq * mapq);
427         tmp_cy_s1 += log_td;
428         tmp_cy_s2 += (log_td * log_td);
429         tmp_st_s1 += fwd;
430 
431         if ( allele > 0 )
432         {
433             ++tmp_al_s1;
434             tmp_bq_al += q;
435             tmp_mq_al += mapq;
436             tmp_cy_al += log_td;
437             tmp_st_al += fwd;
438             tmp_nm_al += (nm-1);
439             tmp_nm_s1 += (nm-1);
440             tmp_nm_s2 += (nm-1)*(nm-1);
441         }
442         else
443         {
444           tmp_nm_s1 += nm;
445           tmp_nm_s2 += (nm * nm);
446         }
447     }
448     else
449     {
450         if (q>20)
451         {
452           tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
453           ++tmp_oth_obs_q20;
454           ++tmp_dp_q20;
455         }
456     }
457 }
458 
459 /**
460  * Collects sufficient statistics from read for variants to be genotyped.
461  */
process_read(AugmentedBAMRecord & as,int32_t sampleIndex,double contam)462 void SNPGenotypingRecord::process_read(AugmentedBAMRecord& as, int32_t sampleIndex, double contam)
463 {
464     if (v_alleles.size()==2)
465     {
466         bam1_t *s = as.s;
467 
468         char strand = bam_is_rev(s) ? 'R' : 'F';
469         int32_t allele = 0;
470         //uint32_t bpos1 = bam_get_pos1(s);
471         uint8_t* seq = bam_get_seq(s);
472         uint8_t* qual = bam_get_qual(s);
473         int32_t rlen = bam_get_l_qseq(s);
474         uint8_t mapq = bam_get_mapq(s);
475         uint32_t q = 30;
476         int32_t cycle = 0;
477 
478         std::vector<uint32_t>& aug_cigar = as.aug_cigar;
479         std::vector<std::string>& aug_ref = as.aug_ref;
480         std::vector<std::string>& aug_alt = as.aug_alt;
481 
482         int32_t vpos1 = pos1;
483         int32_t cpos1 = bam_get_pos1(s);
484         int32_t rpos0 = 0;
485 
486         for (uint32_t i=0; i<aug_cigar.size(); ++i)
487         {
488             uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
489             char opchr = bam_cigar_opchr(aug_cigar[i]);
490 
491             if (opchr=='S')
492             {
493                 rpos0 += oplen;
494             }
495             else if (opchr=='=')
496             {
497                 if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
498                 {
499                     rpos0 += vpos1-cpos1;
500                     allele = 0;
501                     q = qual[rpos0];
502                     cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
503                     break;
504                 }
505                 else
506                 {
507                     cpos1 += oplen;
508                     rpos0 += oplen;
509                 }
510             }
511             else if (opchr=='X')
512             {
513                 if (vpos1==cpos1)
514                 {
515                     allele = (aug_alt[i].at(0) == v_alleles[1].at(0)) ? 1 : -1;
516                     q = qual[rpos0];
517                     cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
518                     break;
519                 }
520 
521                 ++cpos1;
522                 ++rpos0;
523             }
524             else if (opchr=='I')
525             {
526                 rpos0 += oplen;
527             }
528             else if (opchr=='D')
529             {
530                 cpos1 += oplen;
531             }
532             else
533             {
534                 notice("unrecognized cigar state %d", opchr);
535             }
536         }
537 
538         uint32_t no_mismatches = as.no_mismatches;
539         if (allele!=0 && q<20)
540         {
541             ++no_mismatches;
542         }
543 
544         if (allele!=0 && no_mismatches==0)
545         {
546             //no_mismatches = 1;
547             std::cerr << "something wrong\n";
548         }
549 
550         add_allele( contam, allele, mapq, strand == 'F', q, cycle, no_mismatches );
551     }
552     else if (v_alleles.size()>2) //multiallelic
553     {
554         bam1_t *s = as.s;
555 
556         char strand = bam_is_rev(s) ? 'R' : 'F';
557         int32_t allele = 0;
558         //uint32_t bpos1 = bam_get_pos1(s);
559         uint8_t* seq = bam_get_seq(s);
560         uint8_t* qual = bam_get_qual(s);
561         int32_t rlen = bam_get_l_qseq(s);
562         uint8_t mapq = bam_get_mapq(s);
563         uint32_t q = 30;
564         int32_t cycle = 0;
565 
566         std::vector<uint32_t>& aug_cigar = as.aug_cigar;
567         std::vector<std::string>& aug_ref = as.aug_ref;
568         std::vector<std::string>& aug_alt = as.aug_alt;
569 
570         int32_t vpos1 = pos1;
571         int32_t cpos1 = bam_get_pos1(s);
572         int32_t rpos0 = 0;
573 
574         for (uint32_t i=0; i<aug_cigar.size(); ++i)
575         {
576             uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
577             char opchr = bam_cigar_opchr(aug_cigar[i]);
578 
579             if (opchr=='S')
580             {
581                 rpos0 += oplen;
582             }
583             else if (opchr=='=')
584             {
585                 if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
586                 {
587                     rpos0 += vpos1-cpos1;
588                     allele = 0;
589                     q = qual[rpos0];
590                     cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
591                     break;
592                 }
593                 else
594                 {
595                     cpos1 += oplen;
596                     rpos0 += oplen;
597                 }
598             }
599             else if (opchr=='X')
600             {
601                 if (vpos1==cpos1)
602                 {
603                     allele = (aug_alt[i].at(0) == v_alleles[1].at(0)) ? 1 : -1;
604                     q = qual[rpos0];
605                     cycle = rpos0<(rlen>>1) ? (rpos0+1) : -(rlen - rpos0 + 1);
606                     break;
607                 }
608 
609                 ++cpos1;
610                 ++rpos0;
611             }
612             else if (opchr=='I')
613             {
614                 rpos0 += oplen;
615             }
616             else if (opchr=='D')
617             {
618                 cpos1 += oplen;
619             }
620             else
621             {
622                 notice("unrecognized cigar state %d", opchr);
623             }
624         }
625 
626         uint32_t no_mismatches = as.no_mismatches;
627         if (allele!=0 && q<20)
628         {
629             ++no_mismatches;
630         }
631 
632         if (allele!=0 && no_mismatches==0)
633         {
634             //no_mismatches = 1;
635             std::cerr << "something wrong\n";
636         }
637 
638         add_allele( contam, allele, mapq, strand == 'F', q, cycle, no_mismatches );
639 
640         //abort();
641     }
642 }
643