1 /* The MIT License
2 
3    Copyright (c) 2016 Hyun Min Kang <hmkang.umich.edu> and Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "complex_genotyping_record.h"
25 
26 /**
27  * Constructor.
28  */
ComplexGenotypingRecord(bcf_hdr_t * h,bcf1_t * v,int32_t vtype,int32_t nsamples)29 ComplexGenotypingRecord::ComplexGenotypingRecord(bcf_hdr_t *h, bcf1_t *v, int32_t vtype, int32_t nsamples)
30 {
31     clear();
32 
33     this->h = h;
34     //this->v = v;
35     this->rid = bcf_get_rid(v);
36     this->pos1 = bcf_get_pos1(v);
37     this->vtype = vtype;
38     this->nsamples = nsamples;
39 
40     this->alleles = {0,0,0};
41     char** tmp_alleles = bcf_get_allele(v);
42     for (size_t i=0; i< bcf_get_n_allele(v); ++i) {
43       if (i) kputc(',', &this->alleles);
44       kputs(tmp_alleles[i], &this->alleles);
45       v_alleles.push_back(tmp_alleles[i]);
46     }
47 
48     n_filter = 0;
49 
50     if (vtype==VT_SNP && v_alleles.size()==2)
51     {
52         //rid = bcf_get_rid(v);
53         beg1 = bcf_get_pos1(v);
54         end1 = beg1;
55 
56         if (bcf_has_filter(h, v, const_cast<char*>("overlap_snp"))==1)
57             n_filter |= FILTER_MASK_OVERLAP_SNP;
58 
59         if (bcf_has_filter(h, v, const_cast<char*>("overlap_indel"))==1)
60             n_filter |= FILTER_MASK_OVERLAP_INDEL;
61 
62         if (bcf_has_filter(h, v, const_cast<char*>("overlap_vntr"))==1)
63             n_filter |= FILTER_MASK_OVERLAP_VNTR;
64     }
65     else if (vtype==VT_INDEL && v_alleles.size()==2)
66     {
67         //rid = bcf_get_rid(v);
68         dlen = strlen(tmp_alleles[1])-strlen(tmp_alleles[0]);
69         len = abs(dlen);
70 
71         int32_t *flanks_pos1 = NULL;
72         int32_t n = 0;
73 
74         if (bcf_get_info_int32(h, v, "FLANKS", &flanks_pos1, &n)>0)
75         {
76             this->beg1 = flanks_pos1[0];
77             this->end1 = flanks_pos1[1];
78             free(flanks_pos1);
79         }
80         else
81         {
82             this->beg1 = bcf_get_pos1(v) - 3;
83             this->end1 = bcf_get_end1(v) + 3;
84         }
85 
86         if (dlen>0)
87         {
88             indel.append(&tmp_alleles[1][1]);
89         }
90         else
91         {
92             indel.append(&tmp_alleles[0][1]);
93         }
94 
95         if (bcf_has_filter(h, v, const_cast<char*>("overlap_snp"))==1)
96             n_filter |= FILTER_MASK_OVERLAP_SNP;
97 
98         if (bcf_has_filter(h, v, const_cast<char*>("overlap_indel"))==1)
99             n_filter |= FILTER_MASK_OVERLAP_INDEL;
100 
101         if (bcf_has_filter(h, v, const_cast<char*>("overlap_vntr"))==1)
102             n_filter |= FILTER_MASK_OVERLAP_VNTR;
103     }
104     else if (vtype==VT_VNTR)
105     {
106         rid = bcf_get_rid(v);
107         beg1 = bcf_get_pos1(v) - 1;
108         end1 = bcf_get_end1(v) + 1;
109 
110         char *motif = NULL;
111         int32_t n = 0;
112 
113         if (bcf_get_info_string(h, v, "MOTIF", &motif, &n)>0)
114         {
115             this->motif.assign(motif);
116             free(motif);
117         }
118     }
119     else
120     {
121         return;
122     }
123 
124     pls = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
125     ads = (uint8_t*)calloc( nsamples*3, sizeof(uint8_t) );
126 }
127 
128 /**
129  * Clears this record.
130  */
clearTemp()131 void ComplexGenotypingRecord::clearTemp()
132 {
133     tmp_dp_q20 = 0;
134     tmp_dp_ra = 0;
135     tmp_bq_s1 = tmp_bq_s2 = 0;
136     tmp_mq_s1 = tmp_mq_s2 = 0;
137     tmp_cy_s1 = tmp_cy_s2 = 0;
138     tmp_st_s1 = tmp_st_s2 = 0;
139     tmp_al_s1 = tmp_bq_al = tmp_mq_al = tmp_cy_al = tmp_st_al = tmp_nm_al = 0;
140     tmp_nm_s1 = tmp_nm_s2 = 0;
141     tmp_oth_exp_q20 = tmp_oth_obs_q20 = 0;
142     tmp_pls[0] = tmp_pls[1] = tmp_pls[2] = 1.;
143     tmp_ads[0] = tmp_ads[1] = tmp_ads[2] = 0;
144 }
145 
clear()146 void ComplexGenotypingRecord::clear()
147 {
148     vtype = -1;
149 
150     pls = ads = NULL;
151 
152     bqr_num = bqr_den = 0;
153     mqr_num = mqr_den = 0;
154     cyr_num = cyr_den = 0;
155     str_num = str_den = 0;
156     nmr_num = nmr_den = 0;
157     ior_num = ior_den = 0;
158     nm0_num = nm0_den = 0;
159     nm1_num = nm1_den = 0;
160     abe_num = abe_den = 0;
161     abz_num = abz_den = 0;
162     ns_nref = dp_sum = max_gq = 0;
163     clearTemp();
164 }
165 
166 /**
167  * Destructor.
168  */
~ComplexGenotypingRecord()169 ComplexGenotypingRecord::~ComplexGenotypingRecord()
170 {
171   //if (v) bcf_destroy(v);
172     if ( pls ) free(pls);
173     if ( ads ) free(ads);
174     if ( alleles.l > 0 ) free(alleles.s);
175 }
176 
177 /**
178  * Destructor.
179  */
flush_variant(bcf_hdr_t * hdr)180 bcf1_t* ComplexGenotypingRecord::flush_variant(bcf_hdr_t* hdr)
181 {
182     bcf1_t *nv = bcf_init();
183     bcf_clear(nv);
184     bcf_set_n_sample(nv, nsamples);
185 
186     bcf_set_rid(nv, rid);
187     bcf_set_pos1(nv, pos1);
188     bcf_update_alleles_str(hdr, nv, alleles.s);
189 
190     int32_t* gt = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
191     int32_t* pl = (int32_t*) calloc ( nsamples * 3, sizeof(int32_t) );
192     int32_t* ad = (int32_t*) calloc ( nsamples * 2, sizeof(int32_t) );
193     int32_t* td = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
194     int32_t* gq = (int32_t*) calloc ( nsamples * 1, sizeof(int32_t) );
195     float MLE_HWE_AF[2];
196     float MLE_HWE_GF[3];
197     double gp, gp_sum, max_gp;
198     int32_t best_gt;
199     int32_t best_a1, best_a2;
200     int32_t* pls_i;
201     int32_t an = 0;
202     int32_t acs[2] = {0,0};
203     int32_t gcs[3] = {0,0,0};
204     float afs[3];
205     int32_t max_gq = 0;
206     int64_t dp_sum = 0;
207     int32_t ploidy = 2;
208 
209     std::copy( pls, pls + (nsamples*3), pl );
210     for(int32_t i=0; i < nsamples; ++i)
211     {
212         dp_sum += ( td[i] = ( (ad[2*i] = ads[3*i]) + (ad[2*i+1] = ads[3*i+1]) + ads[3*i+2] ) );
213     }
214 
215     int32_t n = 0;
216     int32_t adSumHet[2] = {0,0};
217     Estimator::compute_gl_af_hwe(pl, nsamples, ploidy, 2, MLE_HWE_AF, MLE_HWE_GF, n, 1e-20);
218 
219     for(int32_t i=0; i < nsamples; ++i)
220     {
221         int32_t* pli = &pl[ i * 3 ];
222         max_gp = gp_sum = gp = ( LogTool::pl2prob(pli[0]) * MLE_HWE_AF[0] * MLE_HWE_AF[0] );
223         best_gt = 0; best_a1 = 0; best_a2 = 0;
224         for(size_t l=1; l < 2; ++l)
225         {
226             for(size_t m=0; m <= l; ++m)
227             {
228                 gp = ( LogTool::pl2prob(pli[ l*(l+1)/2 + m]) * MLE_HWE_AF[l] * MLE_HWE_AF[m] * (l == m ? 1 : 2) );
229                 gp_sum += gp;
230                 if ( max_gp < gp )
231                 {
232                     max_gp = gp;
233                     best_gt = l*(l+1)/2 + m;
234                     best_a1 = m;
235                     best_a2 = l;
236                 }
237             }
238         }
239 
240         if ( best_gt == 1 )
241         {
242             adSumHet[0] += ad[2*i];
243             adSumHet[1] += ad[2*i+1];
244         }
245 
246         double prob = 1.-max_gp/gp_sum;  // to calculate GQ
247         if ( prob <= 3.162278e-26 )
248         {
249             prob = 3.162278e-26;
250         }
251 
252         if ( prob > 1 )
253         {
254             prob = 1;
255         }
256 
257         gq[i] = (int32_t)LogTool::prob2pl(prob);
258 
259         if ( ( best_gt > 0 ) && ( max_gq < gq[i] ) )
260         {
261             max_gq = gq[i];
262         }
263 
264         gt[2*i]   = ((best_a1 + 1) << 1);
265         gt[2*i+1] = ((best_a2 + 1) << 1);
266         an += 2;             // still use diploid representation of chrX for now.
267         ++acs[best_a1];
268         ++acs[best_a2];
269         ++gcs[best_gt];
270     }
271 
272     for(size_t i=0; i < 2; ++i)
273     {
274         afs[i] = acs[i]/(float)an;
275     }
276 
277     bcf_update_format_int32(hdr, nv, "GT", gt, nsamples * 2);
278     bcf_update_format_int32(hdr, nv, "GQ", gq, nsamples );
279     bcf_update_format_int32(hdr, nv, "AD", ad, nsamples * 2);
280     bcf_update_format_int32(hdr, nv, "DP", td, nsamples );
281     bcf_update_format_int32(hdr, nv, "PL", pl, nsamples * 3);
282 
283     float avgdp = (float)dp_sum / (float)nsamples;
284 
285     nv->qual = (float) max_gq;
286     bcf_update_info_float(hdr, nv, "AVGDP", &avgdp, 1);
287     bcf_update_info_int32(hdr, nv, "AC", &acs[1], 1);
288     bcf_update_info_int32(hdr, nv, "AN", &an, 1);
289     bcf_update_info_float(hdr, nv, "AF", &afs[1], 1);
290     bcf_update_info_int32(hdr, nv, "GC", gcs, 3);
291     bcf_update_info_int32(hdr, nv, "GN", &nsamples, 1);
292 
293     if (n)
294     {
295         float* MLE_HWE_AF_PTR = &MLE_HWE_AF[1];
296         bcf_update_info_float(hdr, nv, "HWEAF", MLE_HWE_AF_PTR, 1);
297     }
298 
299     // calculate the allele frequencies under HWD
300     float MLE_AF[2];
301     float MLE_GF[3];
302     n = 0;
303     Estimator::compute_gl_af(pl, nsamples, ploidy, 2, MLE_AF, MLE_GF,  n, 1e-20);
304     if (n)
305     {
306         //float* MLE_AF_PTR = &MLE_AF[1];
307         //bcf_update_info_float(odw->hdr, nv, "HWDAF", MLE_AF_PTR, n_alleles-1);
308         bcf_update_info_float(hdr, nv, "HWDGF", &MLE_GF, 3);
309     }
310 
311     float fic = 0;
312     n = 0;
313     Estimator::compute_gl_fic(pl, nsamples, ploidy, MLE_HWE_AF, 2, MLE_GF, fic, n);
314     if ( std::isnan((double)fic) ) fic = 0;
315     if (n)
316     {
317         bcf_update_info_float(hdr, nv, "IBC", &fic, 1);
318     }
319 
320     // calculate the LRT statistics related to HWE
321     float lrts;
322     float logp;
323     int32_t df;
324     n = 0;
325     Estimator::compute_hwe_lrt(pl, nsamples, ploidy, 2, MLE_HWE_GF, MLE_GF, n, lrts, logp, df);
326     if (n)
327     {
328         if ( fic < 0 ) logp = 0-logp;
329         {
330             bcf_update_info_float(hdr, nv, "HWE_SLP", &logp, 1);
331         }
332     }
333 
334     float abe = (adSumHet[0] + 0.5)/(adSumHet[0] + adSumHet[1] + 1.0);
335     bcf_update_info_float(hdr, nv, "ABE", &abe, 1);
336 
337     // update filter
338     if ( n_filter & FILTER_MASK_OVERLAP_SNP )
339     bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_snp"));
340     if ( n_filter & FILTER_MASK_OVERLAP_INDEL )
341     bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_indel"));
342     if ( n_filter & FILTER_MASK_OVERLAP_VNTR )
343     bcf_add_filter(hdr, nv, bcf_hdr_id2int(hdr, BCF_DT_ID, "overlap_vntr"));
344 
345     //bcf_update_info_int32(odw->hdr, nv, "NS_NREF", &v_ns_nrefs[k], 1);
346     abe_num /= (abe_den+1e-6); bcf_update_info_float(hdr, nv, "ABE",  &abe_num, 1);
347     abz_num /= sqrt(abz_den+1e-6); bcf_update_info_float(hdr, nv, "ABZ",  &abz_num, 1);
348     bqr_num /= sqrt(bqr_den+1e-6); bcf_update_info_float(hdr, nv, "BQZ", &bqr_num, 1);
349     mqr_num /= sqrt(mqr_den+1e-6); bcf_update_info_float(hdr, nv, "MQZ", &mqr_num, 1);
350     cyr_num /= sqrt(cyr_den+1e-6); bcf_update_info_float(hdr, nv, "CYZ", &cyr_num, 1);
351     str_num /= sqrt(str_den+1e-6); bcf_update_info_float(hdr, nv, "STZ", &str_num, 1);
352     nmr_num /= sqrt(nmr_den+1e-6); bcf_update_info_float(hdr, nv, "NMZ", &nmr_num, 1);
353     ior_num = log(ior_num/ior_den+1e-6)/log(10.); bcf_update_info_float(hdr, nv, "IOR", &ior_num, 1);
354     nm1_num /= (nm1_den+1e-6); bcf_update_info_float(hdr, nv, "NM1", &nm1_num, 1);
355     nm0_num /= (nm0_den+1e-6); bcf_update_info_float(hdr, nv, "NM0", &nm0_num, 1);
356 
357     //odw->write(nv);
358     //bcf_destroy(nv);
359     free(gt);
360     free(gq);
361     free(pl);
362     free(ad);
363     free(td);
364 
365     free(pls); pls = NULL;
366     free(ads); ads = NULL;
367 
368     free(alleles.s);
369 
370     return nv;
371 }
372 
flush_sample(int32_t sampleIndex)373 void ComplexGenotypingRecord::flush_sample(int32_t sampleIndex)
374 {
375     uint8_t* p_pls = &pls[sampleIndex*3];
376     uint8_t* p_ads = &ads[sampleIndex*3];
377 
378     int32_t imax = ( tmp_pls[0] > tmp_pls[1] ) ? ( tmp_pls[0] > tmp_pls[2] ? 0 : 2 ) : ( tmp_pls[1] > tmp_pls[2] ? 1 : 2);
379     for(int32_t i=0; i < 3; ++i)
380     {
381         uint32_t l = LogTool::prob2pl(tmp_pls[i]/tmp_pls[imax]);
382         p_pls[i] = ((l > 255) ? 255 : l);
383         p_ads[i] = ((tmp_ads[i] > 255) ? 255 : (uint8_t)tmp_ads[i]);
384     }
385 
386     float sqrt_dp_ra = sqrt((float)tmp_dp_ra);
387     float ior = (float)(tmp_oth_obs_q20 / (tmp_oth_exp_q20 + 1e-6));
388     float nm1 = tmp_al_s1 == 0 ? 0 : tmp_nm_al / (float)tmp_al_s1;
389     float nm0 = (tmp_dp_ra - tmp_al_s1) == 0 ? 0 : (tmp_nm_s1-tmp_nm_al) / (float)(tmp_dp_ra - tmp_al_s1);
390     float w_dp_ra  = log(tmp_dp_ra+1.); //sqrt(dp_ra);
391     float w_dp_q20 = log(tmp_dp_q20+1.); //sqrt(dp_q20);
392     float w_al_s1  = log(tmp_al_s1+1.); //sqrt(al_s1);
393     float w_ref_s1 = log(tmp_dp_ra - tmp_al_s1+1.);
394 
395     if ( p_pls[1] == 0 )
396     { // het genotypes
397         abe_num += (w_dp_ra * (tmp_dp_ra - tmp_al_s1 + 0.05) / (double)(tmp_dp_ra + 0.1));
398         abe_den += w_dp_ra;
399 
400         // E(r) = 0.5(r+a) V(r) = 0.25(r+a)
401         abz_num += w_dp_ra * (tmp_dp_ra - tmp_al_s1 - tmp_dp_ra*0.5)/sqrt(0.25 * tmp_dp_ra + 1e-3);
402         abz_den += (w_dp_ra * w_dp_ra);
403 
404         float bqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_bq_al, tmp_bq_s1, tmp_bq_s2, tmp_al_s1, tmp_al_s1, .1 );
405         float mqr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_mq_al, tmp_mq_s1, tmp_mq_s2, tmp_al_s1, tmp_al_s1, .1 );
406         float cyr = sqrt_dp_ra * Estimator::compute_correlation_f( tmp_dp_ra, tmp_cy_al, tmp_cy_s1, tmp_cy_s2, (float)tmp_al_s1, (float)tmp_al_s1, .1 );
407         float str = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_st_al, tmp_st_s1, tmp_st_s1, tmp_al_s1, tmp_al_s1, .1 );
408         float nmr = sqrt_dp_ra * Estimator::compute_correlation( tmp_dp_ra, tmp_nm_al, tmp_nm_s1, tmp_nm_s2, tmp_al_s1, tmp_al_s1, .1 );
409 
410         // Use Stouffer's method to combine the z-scores, but weighted by log of sample size
411         bqr_num += (bqr * w_dp_ra); bqr_den += (w_dp_ra * w_dp_ra);
412         mqr_num += (mqr * w_dp_ra); mqr_den += (w_dp_ra * w_dp_ra);
413         cyr_num += (cyr * w_dp_ra); cyr_den += (w_dp_ra * w_dp_ra);
414         str_num += (str * w_dp_ra); str_den += (w_dp_ra * w_dp_ra);
415         nmr_num += (nmr * w_dp_ra); nmr_den += (w_dp_ra * w_dp_ra);
416     }
417 
418     ior_num += (ior * w_dp_q20); ior_den += w_dp_q20;
419     nm1_num += (nm1 * w_al_s1);  nm1_den += w_al_s1;
420     nm0_num += (nm0 * w_ref_s1); nm0_den += w_ref_s1;
421 
422     clearTemp();
423 }
424 
425 /**
426  * Adds an allele based with collected sufficient statistics.
427  */
add_allele(double contam,int32_t allele,uint8_t mapq,bool fwd,uint32_t q,int32_t cycle,uint32_t nm)428 void ComplexGenotypingRecord::add_allele(double contam, int32_t allele, uint8_t mapq, bool fwd, uint32_t q, int32_t cycle, uint32_t nm)
429 {
430     double pe = LogTool::pl2prob(q);
431     double pm = 1 - pe;
432 
433     if (q>40)
434     {
435         q = 40;
436     }
437 
438     if ( allele == 0 )
439     {
440         ++tmp_ads[0];
441         tmp_pls[0] *= ( pm * (1-contam) + pe * contam / 3 );
442         tmp_pls[1] *= ( pm / 2 + pe / 6 );
443         tmp_pls[2] *= ( pm * contam + pe * (1-contam) / 3 );
444     }
445     else if ( allele > 0 ) // currently, bi-allelic only
446     {
447         ++tmp_ads[1];
448         tmp_pls[0] *= ( pm * contam + pe * (1-contam) / 3 );
449         tmp_pls[1] *= ( pm / 2 + pe / 6 );
450         tmp_pls[2] *= ( pm * (1-contam) + pe * contam / 3 );
451     }
452     else
453     {
454         ++tmp_ads[2];
455     }
456     double sump = tmp_pls[0] + tmp_pls[1] + tmp_pls[2] + 1e-300;
457     tmp_pls[0] /= sump;
458     tmp_pls[1] /= sump;
459     tmp_pls[2] /= sump;
460 
461     if ( allele >= 0 )
462     {
463         if ( q > 20 )
464         {
465           tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
466           ++tmp_dp_q20;
467         }
468 
469         float log_td = (cycle > 0) ? 0-logf((float)cycle) : 0;
470 
471         ++tmp_dp_ra;
472         tmp_bq_s1 += q;
473         tmp_bq_s2 += (q*q);
474         tmp_mq_s1 += mapq;
475         tmp_mq_s2 += (mapq * mapq);
476         tmp_cy_s1 += log_td;
477         tmp_cy_s2 += (log_td * log_td);
478         tmp_st_s1 += fwd;
479 
480         if ( allele > 0 )
481         {
482             ++tmp_al_s1;
483             tmp_bq_al += q;
484             tmp_mq_al += mapq;
485             tmp_cy_al += log_td;
486             tmp_st_al += fwd;
487             tmp_nm_al += (nm-1);
488             tmp_nm_s1 += (nm-1);
489             tmp_nm_s2 += (nm-1)*(nm-1);
490         }
491         else
492         {
493           tmp_nm_s1 += nm;
494           tmp_nm_s2 += (nm * nm);
495         }
496     }
497     else
498     {
499         if (q>20)
500         {
501           tmp_oth_exp_q20 += (LogTool::pl2prob(q) * 2. / 3.);
502           ++tmp_oth_obs_q20;
503           ++tmp_dp_q20;
504         }
505     }
506 }
507 
508 /**
509  * Collects sufficient statistics from read for variants to be genotyped.
510  */
process_read(AugmentedBAMRecord & as,int32_t sampleIndex,double contam)511 void ComplexGenotypingRecord::process_read(AugmentedBAMRecord& as, int32_t sampleIndex, double contam)
512 {
513     if (v_alleles.size()==2)
514     {
515         if (as.beg1 <= beg1 && end1 <= as.end1)
516         {
517             bam1_t *s = as.s;
518 
519             char strand = bam_is_rev(s) ? 'R' : 'F';
520             int32_t allele = 0;
521             //uint32_t bpos1 = bam_get_pos1(s);
522             uint8_t* seq = bam_get_seq(s);
523             uint8_t* qual = bam_get_qual(s);
524             uint32_t rlen = bam_get_l_qseq(s);
525             uint8_t mapq = bam_get_mapq(s);
526 
527             uint32_t q = len*30;
528             uint32_t cycle = 10;
529 
530             std::vector<uint32_t>& aug_cigar = as.aug_cigar;
531             std::vector<std::string>& aug_ref = as.aug_ref;
532             std::vector<std::string>& aug_alt = as.aug_alt;
533 
534             int32_t vpos1 = pos1;
535 
536             int32_t cpos1 = bam_get_pos1(s);
537             int32_t rpos0 = 0;
538 
539             for (uint32_t i=0; i<aug_cigar.size(); ++i)
540             {
541                 uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
542                 char opchr = bam_cigar_opchr(aug_cigar[i]);
543 
544                 if (opchr=='S')
545                 {
546                     rpos0 += oplen;
547                 }
548                 else if (opchr=='=')
549                 {
550                     if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
551                     {
552                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
553                     }
554                     cpos1 += oplen;
555                     rpos0 += oplen;
556                 }
557                 else if (opchr=='X')
558                 {
559                     if (cpos1-1==vpos1)
560                     {
561                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
562                     }
563                     ++cpos1;
564                     ++rpos0;
565                 }
566                 else if (opchr=='I')
567                 {
568                     if (dlen>0 && cpos1-1==vpos1)
569                     {
570                         if (indel==aug_alt[i])
571                         {
572                             q = len*30;
573                             allele = 1;
574                         }
575                         else
576                         {
577                             q = abs(len-(int32_t)aug_ref[i].size())*30;
578                             allele = -1;
579                         }
580 
581                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
582                         break;
583                     }
584                     else if (dlen<0 && cpos1-1==vpos1)
585                     {
586                         q = 30;
587                         allele = -3;
588                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
589                         break;
590                     }
591 
592                     rpos0 += oplen;
593                 }
594                 else if (opchr=='D')
595                 {
596                     if (dlen<0 && cpos1-1==vpos1)
597                     {
598                         if (indel==aug_ref[i])
599                         {
600                             q = len*30;
601                             allele = 1;
602                         }
603                         else
604                         {
605                             q = abs(len-(int32_t)aug_ref[i].size())*30;
606                             allele = -1;
607                         }
608 
609                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
610                         break;
611                     }
612                     else if (dlen>0 && cpos1-1==vpos1)
613                     {
614                         q = 30;
615                         allele = -2;
616 
617                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
618                         break;
619                     }
620 
621                 }
622                 else
623                 {
624                     std::cerr << "unrecognized cigar state " << opchr << "\n";
625                 }
626             }
627 
628             add_allele( contam, allele, mapq, strand == 'F', q, rand() % 75, as.no_mismatches );
629         }
630         else
631         {
632             bam1_t *s = as.s;
633             uint8_t mapq = bam_get_mapq(s);
634 
635             add_allele( contam, -1, mapq, bam_is_rev(as.s) ? false : true, 20, rand() % 75, as.no_mismatches );
636         }
637     }
638     else //multiallelic
639     {
640         if (as.beg1 <= beg1 && end1 <= as.end1)
641         {
642             bam1_t *s = as.s;
643 
644             char strand = bam_is_rev(s) ? 'R' : 'F';
645             int32_t allele = 0;
646             //uint32_t bpos1 = bam_get_pos1(s);
647             uint8_t* seq = bam_get_seq(s);
648             uint8_t* qual = bam_get_qual(s);
649             uint32_t rlen = bam_get_l_qseq(s);
650             uint8_t mapq = bam_get_mapq(s);
651 
652             uint32_t q = len*30;
653             uint32_t cycle = 10;
654 
655             std::vector<uint32_t>& aug_cigar = as.aug_cigar;
656             std::vector<std::string>& aug_ref = as.aug_ref;
657             std::vector<std::string>& aug_alt = as.aug_alt;
658 
659             int32_t vpos1 = pos1;
660 
661             int32_t cpos1 = bam_get_pos1(s);
662             int32_t rpos0 = 0;
663 
664             for (uint32_t i=0; i<aug_cigar.size(); ++i)
665             {
666                 uint32_t oplen = bam_cigar_oplen(aug_cigar[i]);
667                 char opchr = bam_cigar_opchr(aug_cigar[i]);
668 
669                 if (opchr=='S')
670                 {
671                     rpos0 += oplen;
672                 }
673                 else if (opchr=='=')
674                 {
675                     if (vpos1>=cpos1 && vpos1<=(cpos1+oplen-1))
676                     {
677                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
678                     }
679                     cpos1 += oplen;
680                     rpos0 += oplen;
681                 }
682                 else if (opchr=='X')
683                 {
684                     if (cpos1-1==vpos1)
685                     {
686                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
687                     }
688                     ++cpos1;
689                     ++rpos0;
690                 }
691                 else if (opchr=='I')
692                 {
693                     if (dlen>0 && cpos1-1==vpos1)
694                     {
695                         if (indel==aug_alt[i])
696                         {
697                             q = len*30;
698                             allele = 1;
699                         }
700                         else
701                         {
702                             q = abs(len-(int32_t)aug_ref[i].size())*30;
703                             allele = -1;
704                         }
705 
706                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
707                         break;
708                     }
709                     else if (dlen<0 && cpos1-1==vpos1)
710                     {
711                         q = 30;
712                         allele = -3;
713                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
714                         break;
715                     }
716 
717                     rpos0 += oplen;
718                 }
719                 else if (opchr=='D')
720                 {
721                     if (dlen<0 && cpos1-1==vpos1)
722                     {
723                         if (indel==aug_ref[i])
724                         {
725                             q = len*30;
726                             allele = 1;
727                         }
728                         else
729                         {
730                             q = abs(len-(int32_t)aug_ref[i].size())*30;
731                             allele = -1;
732                         }
733 
734                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
735                         break;
736                     }
737                     else if (dlen>0 && cpos1-1==vpos1)
738                     {
739                         q = 30;
740                         allele = -2;
741 
742                         cycle = strand == 'F' ? (rpos0+1) : (rlen - rpos0);
743                         break;
744                     }
745 
746                 }
747                 else
748                 {
749                     std::cerr << "unrecognized cigar state " << opchr << "\n";
750                 }
751             }
752 
753             add_allele( contam, allele, mapq, strand == 'F', q, rand() % 75, as.no_mismatches );
754         }
755         else
756         {
757             bam1_t *s = as.s;
758             uint8_t mapq = bam_get_mapq(s);
759 
760             add_allele( contam, -1, mapq, bam_is_rev(as.s) ? false : true, 20, rand() % 75, as.no_mismatches );
761         }
762     }
763 }
764