1 /* The MIT License
2 
3    Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #include "variant.h"
25 
26 /**
27  * Constructor.
28  */
Variant(bcf_hdr_t * h,bcf1_t * v)29 Variant::Variant(bcf_hdr_t* h, bcf1_t* v)
30 {
31     this->h = h;
32     this->v = v;
33 
34     type = classify(h, v);
35 
36     chrom = bcf_get_chrom(h, v);
37     rid = bcf_get_rid(v);
38     pos1 = bcf_get_pos1(v);
39 
40     no_overlapping_snps = 0;
41     no_overlapping_indels = 0;
42     no_overlapping_vntrs = 0;
43 
44     is_new_multiallelic =  false;
45 
46     //attempts to update relevant information on variants
47     if (type==VT_SNP)
48     {
49         beg1 = bcf_get_pos1(v);
50         end1 = bcf_get_pos1(v);
51     }
52     else if (type==VT_INDEL)
53     {
54         beg1 = bcf_get_pos1(v);
55         end1 = bcf_get_info_int(h, v, "END", 0);
56 
57         //annotate ends
58         if (!end1) end1 = bcf_get_end1(v);
59     }
60     //complex variants
61     else if (type & (VT_SNP|VT_MNP|VT_INDEL|VT_CLUMPED))
62     {
63         beg1 = bcf_get_pos1(v);
64         end1 = bcf_get_info_int(h, v, "END", 0);
65         if (!end1) end1 = bcf_get_end1(v);
66     }
67     else if (type==VT_VNTR)
68     {
69         beg1 = bcf_get_pos1(v);
70         end1 = bcf_get_info_int(h, v, "END", 0);
71         if (!end1) end1 = bcf_get_end1(v);
72 
73         update_vntr_from_info_fields(h, v);
74 
75         vs.push_back(v);
76         vntr_vs.push_back(v);
77     }
78     else if (type==VT_SV)
79     {
80         beg1 = bcf_get_pos1(v);
81         end1 = bcf_get_info_int(h, v, "END", 0);
82         if (!end1) end1 = bcf_get_end1(v);
83     }
84     else
85     {
86         std::cerr << "unexpected type in variant construction\n";
87         print();
88         exit(1);
89     }
90 }
91 
92 /**
93  * Constructor for a variant object that is to be composed from several variants.
94  * ??????? WHY DO THIS????????
95  */
Variant(Variant * v1,Variant * v2)96 Variant::Variant(Variant* v1, Variant* v2)
97 {
98     type = VT_UNDEFINED;
99 
100     chrom = v1->chrom;
101     rid = v1->rid;
102     pos1 = std::min(v1->pos1, v2->pos1);
103     beg1 = std::min(v1->beg1, v2->beg1);
104     end1 = std::max(v1->end1, v2->end1);
105 
106     no_overlapping_snps = 0;
107     no_overlapping_indels = 0;
108     no_overlapping_vntrs = 0;
109 
110     vs.push_back(v1->v);
111     vs.push_back(v2->v);
112 
113     if (v1->type==VT_SNP)
114     {
115         snp_vs.push_back(v1->v);
116     }
117     else if (v1->type==VT_INDEL)
118     {
119         indel_vs.push_back(v1->v);
120     }
121     else if (v1->type==VT_VNTR)
122     {
123         vntr_vs.push_back(v1->v);
124     }
125 
126     if (v2->type==VT_SNP)
127     {
128         snp_vs.push_back(v2->v);
129     }
130     else if (v2->type==VT_INDEL)
131     {
132         indel_vs.push_back(v2->v);
133     }
134     else if (v2->type==VT_VNTR)
135     {
136         vntr_vs.push_back(v2->v);
137     }
138 }
139 
140 /**
141  * Constructor.
142  */
Variant()143 Variant::Variant()
144 {
145     clear();
146 }
147 
148 /**
149  * Destructor.
150  */
~Variant()151 Variant::~Variant()
152 {
153 //    if (v) bcf_destroy(v);
154 //    for (uint32_t i=0; i<vs.size(); ++i)
155 //    {
156 //        if (vs[i]) bcf_destroy(vs[i]);
157 //    }
158     clear();
159 };
160 
161 /**
162  * Clears variant information.
163  */
clear()164 void Variant::clear()
165 {
166     type = VT_REF;
167 
168     h = NULL;
169     v = NULL;
170 
171     is_new_multiallelic = false;
172     is_involved_in_a_multiallelic = false;
173     associated_new_multiallelic = NULL;
174     updated_multiallelic = false;
175 
176     chrom.clear();
177     rid = 0;
178     pos1 = 0;
179     beg1 = 0;
180     end1 = 0;
181     ts = 0;
182     tv = 0;
183     ins = 0;
184     del = 0;
185     max_dlen = 0;
186     min_dlen = 0;
187 
188     no_overlapping_snps = 0;
189     no_overlapping_indels = 0;
190     no_overlapping_vntrs = 0;
191 
192     contains_N = false;
193     alleles.clear();
194     vntr.clear();
195     vs.clear();
196     snp_vs.clear();
197     indel_vs.clear();
198     vntr_vs.clear();
199     consolidated_vntr_vs.clear();
200 };
201 
202 /**
203  * Classifies variants.
204  */
classify(bcf_hdr_t * h,bcf1_t * v)205 int32_t Variant::classify(bcf_hdr_t *h, bcf1_t *v)
206 {
207     clear();
208 
209     this->h = h;
210     this->v = v;
211 
212     bcf_unpack(v, BCF_UN_STR);
213     chrom.assign(bcf_get_chrom(h, v));
214     rid = bcf_get_rid(v);
215     pos1 = bcf_get_pos1(v);
216     end1 = bcf_get_end1(v);
217     char** allele = bcf_get_allele(v);
218     int32_t n_allele = bcf_get_n_allele(v);
219     int32_t pos0 = pos1-1;
220 
221     bool homogeneous_length = true;
222     char* ref = allele[0];
223     int32_t rlen = strlen(ref);
224 
225     if (strchr(ref, 'N'))
226     {
227         contains_N = true;
228     }
229 
230     //if only ref allele, skip this entire for loop
231     for (size_t i=1; i<n_allele; ++i)
232     {
233         int32_t allele_type = VT_REF;
234 
235         //check for symbolic alternative alleles
236         if (strchr(allele[i],'<'))
237         {
238             size_t len = strlen(allele[i]);
239             if (len>=5)
240             {
241                 //VN/d+
242                 if (allele[i][0]=='<' && allele[i][1]=='V' && allele[i][2]=='N' && allele[i][len-1]=='>' )
243                 {
244                     for (size_t j=3; j<len-1; ++j)
245                     {
246                         if (allele[i][j]<'0' || allele[i][j]>'9')
247                         {
248                             allele_type = VT_VNTR;
249                         }
250                     }
251                 }
252                 //VNTR
253                 else if (len==6 &&
254                          allele[i][0]=='<' &&
255                          allele[i][1]=='V' && allele[i][2]=='N' && allele[i][3]=='T' && allele[i][4]=='R' &&
256                          allele[i][5]=='>' )
257                 {
258                      allele_type = VT_VNTR;
259                 }
260                 //STR
261                 else if (len==5 &&
262                          allele[i][0]=='<' &&
263                          allele[i][1]=='S' && allele[i][2]=='T' && allele[i][3]=='R' &&
264                          allele[i][4]=='>' )
265                 {
266                      allele_type = VT_VNTR;
267                 }
268                 //ST/d+
269                 else if (allele[i][0]=='<' && allele[i][1]=='S' && allele[i][2]=='T' && allele[i][len-1]=='>' )
270                 {
271                     type = VT_VNTR;
272 
273                     for (size_t j=3; j<len-1; ++j)
274                     {
275                         if ((allele[i][j]<'0' || allele[i][j]>'9') && allele[i][j]!='.')
276                         {
277                             type = VT_SV;
278                         }
279                     }
280                 }
281             }
282 
283             if (allele_type==VT_VNTR)
284             {
285                 allele_type = VT_VNTR;
286                 type |= allele_type;
287                 alleles.push_back(Allele(allele_type));
288             }
289             else
290             {
291                 allele_type = VT_SV;
292                 type |= allele_type;
293                 std::string sv_type(allele[i]);
294                 alleles.push_back(Allele(allele_type, sv_type));
295             }
296         }
297         //checks for chromosomal breakpoints
298         else if (strchr(allele[i],'[')||strchr(allele[i],']'))
299         {
300             allele_type = VT_SV;
301             type |= allele_type;
302             std::string sv_type("<BND>");
303             alleles.push_back(Allele(allele_type, sv_type));
304         }
305         //non variant record
306         else if (allele[i][0]=='.' || strcmp(allele[i],allele[0])==0)
307         {
308             type = VT_REF;
309         }
310         //explicit sequence of bases
311         else
312         {
313             kstring_t REF = {0,0,0};
314             kstring_t ALT = {0,0,0};
315 
316             ref = allele[0];
317             char* alt = allele[i];
318             int32_t alen = strlen(alt);
319 
320             if (strchr(alt, 'N'))
321             {
322                 contains_N = true;
323             }
324 
325             if (rlen!=alen)
326             {
327                 homogeneous_length = false;
328             }
329 
330             //trimming
331             //this is required in particular for the
332             //characterization of multiallelics and
333             //in general, any unnormalized variant
334             int32_t rl = rlen;
335             int32_t al = alen;
336             //trim right
337             while (rl!=1 && al!=1)
338             {
339                 if (ref[rl-1]==alt[al-1])
340                 {
341                     --rl;
342                     --al;
343                 }
344                 else
345                 {
346                     break;
347                 }
348             }
349 
350             //trim left
351             while (rl !=1 && al!=1)
352             {
353                 if (ref[0]==alt[0])
354                 {
355                     ++ref;
356                     ++alt;
357                     --rl;
358                     --al;
359                 }
360                 else
361                 {
362                     break;
363                 }
364             }
365 
366             kputsn(ref, rl, &REF);
367             kputsn(alt, al, &ALT);
368 
369             ref = REF.s;
370             alt = ALT.s;
371 
372             int32_t mlen = std::min(rl, al);
373             int32_t dlen = al-rl;
374             int32_t diff = 0;
375             int32_t ts = 0;
376             int32_t tv = 0;
377 
378             if (mlen==1 && dlen)
379             {
380                 char ls, le, ss;
381 
382                 if (rl>al)
383                 {
384                      ls = ref[0];
385                      le = ref[rl-1];
386                      ss = alt[0];
387                 }
388                 else
389                 {
390                      ls = alt[0];
391                      le = alt[al-1];
392                      ss = ref[0];
393                 }
394 
395                 if (ls!=ss && le!=ss)
396                 {
397                     ++diff;
398 
399                     if ((ls=='G' && ss=='A') ||
400                         (ls=='A' && ss=='G') ||
401                         (ls=='C' && ss=='T') ||
402                         (ls=='T' && ss=='C'))
403                     {
404                         ++ts;
405                     }
406                     else
407                     {
408                         ++tv;
409                     }
410                 }
411             }
412             else
413             {
414                 for (int32_t j=0; j<mlen; ++j)
415                 {
416                     if (ref[j]!=alt[j])
417                     {
418                         ++diff;
419 
420                         if ((ref[j]=='G' && alt[j]=='A') ||
421                             (ref[j]=='A' && alt[j]=='G') ||
422                             (ref[j]=='C' && alt[j]=='T') ||
423                             (ref[j]=='T' && alt[j]=='C'))
424                         {
425                             ++ts;
426                         }
427                         else
428                         {
429                             ++tv;
430                         }
431                     }
432                 }
433             }
434 
435             //substitution variants
436             if (mlen==diff)
437             {
438                 allele_type |= mlen==1 ? VT_SNP : VT_MNP;
439             }
440 
441             //indel variants
442             if (dlen)
443             {
444                 allele_type |= VT_INDEL;
445             }
446 
447             //clumped SNPs and MNPs
448             if (diff && diff < mlen) //internal gaps
449             {
450                 allele_type |= VT_CLUMPED;
451             }
452 
453             type |= allele_type;
454             alleles.push_back(Allele(type, diff, alen, dlen, mlen, ts, tv));
455             ts += ts;
456             tv += tv;
457             ins = dlen>0?1:0;
458             del = dlen<0?1:0;
459 
460             if (REF.m) free(REF.s);
461             if (ALT.m) free(ALT.s);
462         }
463     }
464 
465     if (type==VT_VNTR)
466     {
467         update_vntr_from_info_fields(h, v);
468     }
469 
470     //additionally define MNPs by length of all alleles
471     if (!(type&(VT_VNTR|VT_SV)) && type!=VT_REF)
472     {
473         if (homogeneous_length && rlen>1 && n_allele>1)
474         {
475             type |= VT_MNP;
476         }
477     }
478 
479     return type;
480 }
481 
482 /**
483  * Updates VNTR related information from INFO fields.
484  */
update_vntr_from_info_fields()485 void Variant::update_vntr_from_info_fields()
486 {
487     vntr.motif = bcf_get_rid(v);
488     char** allele = bcf_get_allele(v);
489 //    vntr.exact_repeat_tract.assign(allele[0]);
490 //   std::string tags[16] = {"MOTIF", "RU", "BASIS", "MLEN", "BLEN", "REPEAT_TRACT", "COMP", "ENTROPY", "ENTROPY2", "KL_DIVERGENCE", "KL_DIVERGENCE2", "RL", "LL", "RU_COUNTS", "SCORE", "TRF_SCORE"};
491 
492     vntr.motif = bcf_get_info_str(h, v, "MOTIF");
493     vntr.ru = bcf_get_info_str(h, v, "RU");
494     vntr.basis = bcf_get_info_str(h, v, "BASIS");
495     if (vntr.basis=="") vntr.basis = VNTR::get_basis(vntr.motif);
496     vntr.mlen = vntr.motif.size();
497     vntr.blen = (int32_t) vntr.basis.size();
498     std::vector<int32_t> i_vec = bcf_get_info_int_vec(h, v, "REPEAT_TRACT", 2, 0);
499     vntr.beg1 = i_vec[0];
500     vntr.end1 = i_vec[1];
501     i_vec = bcf_get_info_int_vec(h, v, "COMP", 4, 0);
502     vntr.comp[0] = i_vec[0];
503     vntr.comp[1] = i_vec[1];
504     vntr.comp[2] = i_vec[2];
505     vntr.comp[3] = i_vec[3];
506     vntr.entropy = bcf_get_info_flt(h, v, "ENTROPY");
507     vntr.entropy2 = bcf_get_info_flt(h, v, "ENTROPY2");
508     vntr.kl_divergence = bcf_get_info_flt(h, v, "KL_DIVERGENCE");
509     vntr.kl_divergence2 = bcf_get_info_flt(h, v, "KL_DIVERGENCE2");
510     vntr.rl = bcf_get_info_int(h, v, "RL");
511     vntr.ll = bcf_get_info_int(h, v, "LL");
512     i_vec = bcf_get_info_int_vec(h, v, "RU_COUNTS", 2, 0);
513     vntr.no_perfect_ru = i_vec[0];
514     vntr.no_ru = i_vec[1];
515     vntr.score = bcf_get_info_flt(h, v, "SCORE");
516     vntr.trf_score = bcf_get_info_int(h, v, "TRF_SCORE");
517 
518     vntr.exact_motif = bcf_get_info_str(h, v, "EX_MOTIF");
519     vntr.exact_ru = bcf_get_info_str(h, v, "EX_RU");
520     vntr.exact_basis = bcf_get_info_str(h, v, "EX_BASIS");
521     vntr.exact_mlen = (int32_t) vntr.exact_motif.size();
522     vntr.exact_blen = (int32_t) vntr.exact_basis.size();
523     i_vec = bcf_get_info_int_vec(h, v, "EX_REPEAT_TRACT", 2, 0);
524     vntr.exact_beg1 = i_vec[0];
525     vntr.exact_end1 = i_vec[1];
526     i_vec = bcf_get_info_int_vec(h, v, "EX_COMP", 4, 0);
527     vntr.exact_comp[0] = i_vec[0];
528     vntr.exact_comp[1] = i_vec[1];
529     vntr.exact_comp[2] = i_vec[2];
530     vntr.exact_comp[3] = i_vec[3];
531     vntr.exact_entropy = bcf_get_info_flt(h, v, "EX_ENTROPY");
532     vntr.exact_entropy2 = bcf_get_info_flt(h, v, "EX_ENTROPY2");
533     vntr.exact_kl_divergence = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE");
534     vntr.exact_kl_divergence2 = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE2");
535     vntr.exact_rl = bcf_get_info_int(h, v, "EX_RL");
536     vntr.exact_ll = bcf_get_info_int(h, v, "EX_LL");
537     i_vec = bcf_get_info_int_vec(h, v, "EX_RU_COUNTS", 2, 0);
538     vntr.exact_no_perfect_ru = i_vec[0];
539     vntr.exact_no_ru = i_vec[1];
540     vntr.exact_score = bcf_get_info_flt(h, v, "EX_SCORE");
541     vntr.exact_trf_score = bcf_get_info_int(h, v, "EX_TRF_SCORE");
542 
543     vntr.fuzzy_motif = bcf_get_info_str(h, v, "FZ_MOTIF");
544     vntr.fuzzy_ru = bcf_get_info_str(h, v, "FZ_RU");
545     vntr.fuzzy_basis = bcf_get_info_str(h, v, "FZ_BASIS");
546     vntr.fuzzy_mlen = (int32_t) vntr.fuzzy_motif.size();
547     vntr.fuzzy_blen = (int32_t) vntr.fuzzy_basis.size();
548     i_vec = bcf_get_info_int_vec(h, v, "FZ_REPEAT_TRACT", 2, 0);
549     vntr.fuzzy_beg1 = i_vec[0];
550     vntr.fuzzy_end1 = i_vec[1];
551     i_vec = bcf_get_info_int_vec(h, v, "FZ_COMP", 4, 0);
552     vntr.fuzzy_comp[0] = i_vec[0];
553     vntr.fuzzy_comp[1] = i_vec[1];
554     vntr.fuzzy_comp[2] = i_vec[2];
555     vntr.fuzzy_comp[3] = i_vec[3];
556     vntr.fuzzy_entropy = bcf_get_info_flt(h, v, "FZ_ENTROPY");
557     vntr.fuzzy_entropy2 = bcf_get_info_flt(h, v, "FZ_ENTROPY2");
558     vntr.fuzzy_kl_divergence = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE");
559     vntr.fuzzy_kl_divergence2 = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE2");
560     vntr.fuzzy_rl = bcf_get_info_int(h, v, "FZ_RL");
561     vntr.fuzzy_ll = bcf_get_info_int(h, v, "FZ_LL");
562     i_vec = bcf_get_info_int_vec(h, v, "FZ_RU_COUNTS", 2, 0);
563     vntr.fuzzy_no_perfect_ru = i_vec[0];
564     vntr.fuzzy_no_ru = i_vec[1];
565     vntr.fuzzy_score = bcf_get_info_flt(h, v, "FZ_SCORE");
566     vntr.fuzzy_trf_score = bcf_get_info_int(h, v, "FZ_TRF_SCORE");
567 }
568 
569 /**
570  * Updates VNTR related information from INFO fields.
571  */
update_vntr_from_info_fields(bcf_hdr_t * h,bcf1_t * v)572 void Variant::update_vntr_from_info_fields(bcf_hdr_t *h, bcf1_t *v)
573 {
574     this->h = h;
575     this->v = v;
576 
577     update_vntr_from_info_fields();
578 }
579 
580 /**
581  * Updates a bcf1_t object with VNTR information.
582  */
update_bcf_with_vntr_info(bcf_hdr_t * h,bcf1_t * v)583 void Variant::update_bcf_with_vntr_info(bcf_hdr_t *h, bcf1_t *v)
584 {
585     bcf_update_info_flag(h, v, "FUZZY", NULL, 1);
586 
587     //VNTR position and sequences
588     bcf_set_pos1(v, vntr.fuzzy_beg1);
589     kstring_t s = {0,0,0};
590     s.l = 0;
591     kputs(vntr.fuzzy_repeat_tract.c_str(), &s);
592     kputc(',', &s);
593     kputs("<VNTR>", &s);
594     bcf_update_alleles_str(h, v, s.s);
595 
596     //VNTR motif
597     bcf_update_info_string(h, v, "MOTIF", vntr.motif.c_str());
598     bcf_update_info_string(h, v, "RU", vntr.ru.c_str());
599 
600     //VNTR characteristics
601     bcf_update_info_float(h, v, "FZ_CONCORDANCE", &vntr.fuzzy_score, 1);
602     bcf_update_info_float(h, v, "FZ_RL", &vntr.fuzzy_rl, 1);
603     bcf_update_info_float(h, v, "FZ_LL", &vntr.fuzzy_ll, 1);
604     int32_t flank_pos1[2] = {vntr.exact_beg1-1, vntr.exact_end1+1};
605     bcf_update_info_int32(h, v, "FLANKS", &flank_pos1, 2);
606 
607     //flank positions
608     int32_t fuzzy_flank_pos1[2] = {vntr.fuzzy_beg1-1, vntr.fuzzy_end1+1};
609     bcf_update_info_int32(h, v, "FZ_FLANKS", &fuzzy_flank_pos1, 2);
610     int32_t ru_count[2] = {vntr.fuzzy_no_perfect_ru, vntr.fuzzy_no_ru};
611     bcf_update_info_int32(h, v, "FZ_RU_COUNTS", &ru_count, 2);
612 
613     if (vntr.is_large_repeat_tract) bcf_update_info_flag(h, v, "LARGE_REPEAT_REGION", NULL, 1);
614 
615     if (s.m) free(s.s);
616 }
617 
618 /**
619  * Prints variant information.
620  */
print()621 void Variant::print()
622 {
623     std::cerr << "type  : " << vtype2string(type) << "\n";
624     std::cerr << "\n";
625     if (h!=NULL && v!=NULL) bcf_print_liten(h, v);
626     std::cerr << "chrom : " << chrom << "\n";
627     std::cerr << "rid   : " << rid << "\n";
628     std::cerr << "beg1  : " << beg1 << "\n";
629     std::cerr << "end1  : " << end1 << "\n";
630 
631     std::cerr << "motif: " << vntr.motif << "\n";
632     std::cerr << "rlen : " << vntr.motif.size() << "\n";
633 
634     for (int32_t i=0; i<alleles.size(); ++i)
635     {
636         std::cerr << "\tallele: " << i << "\n";
637         std::cerr << "\t  type: " << vtype2string(alleles[i].type) << "\n";
638         std::cerr << "\t  diff: " << alleles[i].diff << "\n";
639         std::cerr << "\t  alen: " << alleles[i].alen << "\n";
640         std::cerr << "\t  dlen: " << alleles[i].dlen << "\n";
641     }
642 };
643 
644 /**
645  * Gets a string representation of the variant.
646  */
get_variant_string()647 std::string Variant::get_variant_string()
648 {
649     kstring_t var = {0,0,0};
650     bcf_unpack(v, BCF_UN_STR);
651     var.l = 0;
652     kputs(bcf_get_chrom(h, v), &var);
653     kputc(':', &var);
654     kputw(bcf_get_pos1(v), &var);
655     kputc(':', &var);
656     for (size_t i=0; i<bcf_get_n_allele(v); ++i)
657     {
658         if (i) kputc('/', &var);
659         kputs(bcf_get_alt(v, i), &var);
660     }
661 
662     std::string str(var.s);
663 
664     if (var.m) free(var.s);
665 
666     return str;
667 }
668 
669 /**
670  * Gets a string representation of the underlying VNTR by exact alignment.
671  */
get_vntr_string(kstring_t * s)672 void Variant::get_vntr_string(kstring_t* s)
673 {
674     s->l = 0;
675     kputs(chrom.c_str(), s);
676     kputc(':', s);
677     kputw(vntr.exact_beg1, s);
678     kputc(':', s);
679     kputs(vntr.exact_repeat_tract.c_str(), s);
680     kputc(':', s);
681     kputs("<VNTR>", s);
682     kputc(':', s);
683     kputs(vntr.motif.c_str(), s);
684 };
685 
686 /**
687  * Gets a string representation of the underlying VNTR by fuzzy alignment.
688  */
get_fuzzy_vntr_string(kstring_t * s)689 void Variant::get_fuzzy_vntr_string(kstring_t* s)
690 {
691     s->l = 0;
692     kputs(chrom.c_str(), s);
693     kputc(':', s);
694     kputw(vntr.fuzzy_beg1, s);
695     kputc(':', s);
696     kputs(vntr.fuzzy_repeat_tract.c_str(), s);
697     kputc(':', s);
698     kputs("<VNTR>", s);
699     kputc(':', s);
700     kputs(vntr.motif.c_str(), s);
701 };
702 
703 /**
704  * Converts VTYPE to string.
705  */
vtype2string(int32_t VTYPE)706 std::string Variant::vtype2string(int32_t VTYPE)
707 {
708     std::string s;
709 
710     if (!VTYPE)
711     {
712         s += (s.size()==0) ? "" : "/";
713         s += "REF";
714     }
715 
716     if (VTYPE & VT_SNP)
717     {
718         s += (s.size()==0) ? "" : "/";
719         s += "SNP";
720     }
721 
722     if (VTYPE & VT_MNP)
723     {
724         s += (s.size()==0) ? "" : "/";
725         s += "MNP";
726     }
727 
728     if (VTYPE & VT_INDEL)
729     {
730         s += (s.size()==0) ? "" : "/";
731         s += "INDEL";
732     }
733 
734     if (VTYPE & VT_CLUMPED)
735     {
736         s += (s.size()==0) ? "" : "/";
737         s += "CLUMPED";
738     }
739 
740     if (VTYPE & VT_VNTR)
741     {
742         s += (s.size()==0) ? "" : "/";
743         s += "VNTR";
744     }
745 
746     if (VTYPE & VT_SV)
747     {
748         s += (s.size()==0) ? "" : "/";
749         s += "SV";
750     }
751 
752     return s;
753 }