1 /* The MIT License
2
3 Copyright (c) 2013 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "variant.h"
25
26 /**
27 * Constructor.
28 */
Variant(bcf_hdr_t * h,bcf1_t * v)29 Variant::Variant(bcf_hdr_t* h, bcf1_t* v)
30 {
31 this->h = h;
32 this->v = v;
33
34 type = classify(h, v);
35
36 chrom = bcf_get_chrom(h, v);
37 rid = bcf_get_rid(v);
38 pos1 = bcf_get_pos1(v);
39
40 no_overlapping_snps = 0;
41 no_overlapping_indels = 0;
42 no_overlapping_vntrs = 0;
43
44 is_new_multiallelic = false;
45
46 //attempts to update relevant information on variants
47 if (type==VT_SNP)
48 {
49 beg1 = bcf_get_pos1(v);
50 end1 = bcf_get_pos1(v);
51 }
52 else if (type==VT_INDEL)
53 {
54 beg1 = bcf_get_pos1(v);
55 end1 = bcf_get_info_int(h, v, "END", 0);
56
57 //annotate ends
58 if (!end1) end1 = bcf_get_end1(v);
59 }
60 //complex variants
61 else if (type & (VT_SNP|VT_MNP|VT_INDEL|VT_CLUMPED))
62 {
63 beg1 = bcf_get_pos1(v);
64 end1 = bcf_get_info_int(h, v, "END", 0);
65 if (!end1) end1 = bcf_get_end1(v);
66 }
67 else if (type==VT_VNTR)
68 {
69 beg1 = bcf_get_pos1(v);
70 end1 = bcf_get_info_int(h, v, "END", 0);
71 if (!end1) end1 = bcf_get_end1(v);
72
73 update_vntr_from_info_fields(h, v);
74
75 vs.push_back(v);
76 vntr_vs.push_back(v);
77 }
78 else if (type==VT_SV)
79 {
80 beg1 = bcf_get_pos1(v);
81 end1 = bcf_get_info_int(h, v, "END", 0);
82 if (!end1) end1 = bcf_get_end1(v);
83 }
84 else
85 {
86 std::cerr << "unexpected type in variant construction\n";
87 print();
88 exit(1);
89 }
90 }
91
92 /**
93 * Constructor for a variant object that is to be composed from several variants.
94 * ??????? WHY DO THIS????????
95 */
Variant(Variant * v1,Variant * v2)96 Variant::Variant(Variant* v1, Variant* v2)
97 {
98 type = VT_UNDEFINED;
99
100 chrom = v1->chrom;
101 rid = v1->rid;
102 pos1 = std::min(v1->pos1, v2->pos1);
103 beg1 = std::min(v1->beg1, v2->beg1);
104 end1 = std::max(v1->end1, v2->end1);
105
106 no_overlapping_snps = 0;
107 no_overlapping_indels = 0;
108 no_overlapping_vntrs = 0;
109
110 vs.push_back(v1->v);
111 vs.push_back(v2->v);
112
113 if (v1->type==VT_SNP)
114 {
115 snp_vs.push_back(v1->v);
116 }
117 else if (v1->type==VT_INDEL)
118 {
119 indel_vs.push_back(v1->v);
120 }
121 else if (v1->type==VT_VNTR)
122 {
123 vntr_vs.push_back(v1->v);
124 }
125
126 if (v2->type==VT_SNP)
127 {
128 snp_vs.push_back(v2->v);
129 }
130 else if (v2->type==VT_INDEL)
131 {
132 indel_vs.push_back(v2->v);
133 }
134 else if (v2->type==VT_VNTR)
135 {
136 vntr_vs.push_back(v2->v);
137 }
138 }
139
140 /**
141 * Constructor.
142 */
Variant()143 Variant::Variant()
144 {
145 clear();
146 }
147
148 /**
149 * Destructor.
150 */
~Variant()151 Variant::~Variant()
152 {
153 // if (v) bcf_destroy(v);
154 // for (uint32_t i=0; i<vs.size(); ++i)
155 // {
156 // if (vs[i]) bcf_destroy(vs[i]);
157 // }
158 clear();
159 };
160
161 /**
162 * Clears variant information.
163 */
clear()164 void Variant::clear()
165 {
166 type = VT_REF;
167
168 h = NULL;
169 v = NULL;
170
171 is_new_multiallelic = false;
172 is_involved_in_a_multiallelic = false;
173 associated_new_multiallelic = NULL;
174 updated_multiallelic = false;
175
176 chrom.clear();
177 rid = 0;
178 pos1 = 0;
179 beg1 = 0;
180 end1 = 0;
181 ts = 0;
182 tv = 0;
183 ins = 0;
184 del = 0;
185 max_dlen = 0;
186 min_dlen = 0;
187
188 no_overlapping_snps = 0;
189 no_overlapping_indels = 0;
190 no_overlapping_vntrs = 0;
191
192 contains_N = false;
193 alleles.clear();
194 vntr.clear();
195 vs.clear();
196 snp_vs.clear();
197 indel_vs.clear();
198 vntr_vs.clear();
199 consolidated_vntr_vs.clear();
200 };
201
202 /**
203 * Classifies variants.
204 */
classify(bcf_hdr_t * h,bcf1_t * v)205 int32_t Variant::classify(bcf_hdr_t *h, bcf1_t *v)
206 {
207 clear();
208
209 this->h = h;
210 this->v = v;
211
212 bcf_unpack(v, BCF_UN_STR);
213 chrom.assign(bcf_get_chrom(h, v));
214 rid = bcf_get_rid(v);
215 pos1 = bcf_get_pos1(v);
216 end1 = bcf_get_end1(v);
217 char** allele = bcf_get_allele(v);
218 int32_t n_allele = bcf_get_n_allele(v);
219 int32_t pos0 = pos1-1;
220
221 bool homogeneous_length = true;
222 char* ref = allele[0];
223 int32_t rlen = strlen(ref);
224
225 if (strchr(ref, 'N'))
226 {
227 contains_N = true;
228 }
229
230 //if only ref allele, skip this entire for loop
231 for (size_t i=1; i<n_allele; ++i)
232 {
233 int32_t allele_type = VT_REF;
234
235 //check for symbolic alternative alleles
236 if (strchr(allele[i],'<'))
237 {
238 size_t len = strlen(allele[i]);
239 if (len>=5)
240 {
241 //VN/d+
242 if (allele[i][0]=='<' && allele[i][1]=='V' && allele[i][2]=='N' && allele[i][len-1]=='>' )
243 {
244 for (size_t j=3; j<len-1; ++j)
245 {
246 if (allele[i][j]<'0' || allele[i][j]>'9')
247 {
248 allele_type = VT_VNTR;
249 }
250 }
251 }
252 //VNTR
253 else if (len==6 &&
254 allele[i][0]=='<' &&
255 allele[i][1]=='V' && allele[i][2]=='N' && allele[i][3]=='T' && allele[i][4]=='R' &&
256 allele[i][5]=='>' )
257 {
258 allele_type = VT_VNTR;
259 }
260 //STR
261 else if (len==5 &&
262 allele[i][0]=='<' &&
263 allele[i][1]=='S' && allele[i][2]=='T' && allele[i][3]=='R' &&
264 allele[i][4]=='>' )
265 {
266 allele_type = VT_VNTR;
267 }
268 //ST/d+
269 else if (allele[i][0]=='<' && allele[i][1]=='S' && allele[i][2]=='T' && allele[i][len-1]=='>' )
270 {
271 type = VT_VNTR;
272
273 for (size_t j=3; j<len-1; ++j)
274 {
275 if ((allele[i][j]<'0' || allele[i][j]>'9') && allele[i][j]!='.')
276 {
277 type = VT_SV;
278 }
279 }
280 }
281 }
282
283 if (allele_type==VT_VNTR)
284 {
285 allele_type = VT_VNTR;
286 type |= allele_type;
287 alleles.push_back(Allele(allele_type));
288 }
289 else
290 {
291 allele_type = VT_SV;
292 type |= allele_type;
293 std::string sv_type(allele[i]);
294 alleles.push_back(Allele(allele_type, sv_type));
295 }
296 }
297 //checks for chromosomal breakpoints
298 else if (strchr(allele[i],'[')||strchr(allele[i],']'))
299 {
300 allele_type = VT_SV;
301 type |= allele_type;
302 std::string sv_type("<BND>");
303 alleles.push_back(Allele(allele_type, sv_type));
304 }
305 //non variant record
306 else if (allele[i][0]=='.' || strcmp(allele[i],allele[0])==0)
307 {
308 type = VT_REF;
309 }
310 //explicit sequence of bases
311 else
312 {
313 kstring_t REF = {0,0,0};
314 kstring_t ALT = {0,0,0};
315
316 ref = allele[0];
317 char* alt = allele[i];
318 int32_t alen = strlen(alt);
319
320 if (strchr(alt, 'N'))
321 {
322 contains_N = true;
323 }
324
325 if (rlen!=alen)
326 {
327 homogeneous_length = false;
328 }
329
330 //trimming
331 //this is required in particular for the
332 //characterization of multiallelics and
333 //in general, any unnormalized variant
334 int32_t rl = rlen;
335 int32_t al = alen;
336 //trim right
337 while (rl!=1 && al!=1)
338 {
339 if (ref[rl-1]==alt[al-1])
340 {
341 --rl;
342 --al;
343 }
344 else
345 {
346 break;
347 }
348 }
349
350 //trim left
351 while (rl !=1 && al!=1)
352 {
353 if (ref[0]==alt[0])
354 {
355 ++ref;
356 ++alt;
357 --rl;
358 --al;
359 }
360 else
361 {
362 break;
363 }
364 }
365
366 kputsn(ref, rl, &REF);
367 kputsn(alt, al, &ALT);
368
369 ref = REF.s;
370 alt = ALT.s;
371
372 int32_t mlen = std::min(rl, al);
373 int32_t dlen = al-rl;
374 int32_t diff = 0;
375 int32_t ts = 0;
376 int32_t tv = 0;
377
378 if (mlen==1 && dlen)
379 {
380 char ls, le, ss;
381
382 if (rl>al)
383 {
384 ls = ref[0];
385 le = ref[rl-1];
386 ss = alt[0];
387 }
388 else
389 {
390 ls = alt[0];
391 le = alt[al-1];
392 ss = ref[0];
393 }
394
395 if (ls!=ss && le!=ss)
396 {
397 ++diff;
398
399 if ((ls=='G' && ss=='A') ||
400 (ls=='A' && ss=='G') ||
401 (ls=='C' && ss=='T') ||
402 (ls=='T' && ss=='C'))
403 {
404 ++ts;
405 }
406 else
407 {
408 ++tv;
409 }
410 }
411 }
412 else
413 {
414 for (int32_t j=0; j<mlen; ++j)
415 {
416 if (ref[j]!=alt[j])
417 {
418 ++diff;
419
420 if ((ref[j]=='G' && alt[j]=='A') ||
421 (ref[j]=='A' && alt[j]=='G') ||
422 (ref[j]=='C' && alt[j]=='T') ||
423 (ref[j]=='T' && alt[j]=='C'))
424 {
425 ++ts;
426 }
427 else
428 {
429 ++tv;
430 }
431 }
432 }
433 }
434
435 //substitution variants
436 if (mlen==diff)
437 {
438 allele_type |= mlen==1 ? VT_SNP : VT_MNP;
439 }
440
441 //indel variants
442 if (dlen)
443 {
444 allele_type |= VT_INDEL;
445 }
446
447 //clumped SNPs and MNPs
448 if (diff && diff < mlen) //internal gaps
449 {
450 allele_type |= VT_CLUMPED;
451 }
452
453 type |= allele_type;
454 alleles.push_back(Allele(type, diff, alen, dlen, mlen, ts, tv));
455 ts += ts;
456 tv += tv;
457 ins = dlen>0?1:0;
458 del = dlen<0?1:0;
459
460 if (REF.m) free(REF.s);
461 if (ALT.m) free(ALT.s);
462 }
463 }
464
465 if (type==VT_VNTR)
466 {
467 update_vntr_from_info_fields(h, v);
468 }
469
470 //additionally define MNPs by length of all alleles
471 if (!(type&(VT_VNTR|VT_SV)) && type!=VT_REF)
472 {
473 if (homogeneous_length && rlen>1 && n_allele>1)
474 {
475 type |= VT_MNP;
476 }
477 }
478
479 return type;
480 }
481
482 /**
483 * Updates VNTR related information from INFO fields.
484 */
update_vntr_from_info_fields()485 void Variant::update_vntr_from_info_fields()
486 {
487 vntr.motif = bcf_get_rid(v);
488 char** allele = bcf_get_allele(v);
489 // vntr.exact_repeat_tract.assign(allele[0]);
490 // std::string tags[16] = {"MOTIF", "RU", "BASIS", "MLEN", "BLEN", "REPEAT_TRACT", "COMP", "ENTROPY", "ENTROPY2", "KL_DIVERGENCE", "KL_DIVERGENCE2", "RL", "LL", "RU_COUNTS", "SCORE", "TRF_SCORE"};
491
492 vntr.motif = bcf_get_info_str(h, v, "MOTIF");
493 vntr.ru = bcf_get_info_str(h, v, "RU");
494 vntr.basis = bcf_get_info_str(h, v, "BASIS");
495 if (vntr.basis=="") vntr.basis = VNTR::get_basis(vntr.motif);
496 vntr.mlen = vntr.motif.size();
497 vntr.blen = (int32_t) vntr.basis.size();
498 std::vector<int32_t> i_vec = bcf_get_info_int_vec(h, v, "REPEAT_TRACT", 2, 0);
499 vntr.beg1 = i_vec[0];
500 vntr.end1 = i_vec[1];
501 i_vec = bcf_get_info_int_vec(h, v, "COMP", 4, 0);
502 vntr.comp[0] = i_vec[0];
503 vntr.comp[1] = i_vec[1];
504 vntr.comp[2] = i_vec[2];
505 vntr.comp[3] = i_vec[3];
506 vntr.entropy = bcf_get_info_flt(h, v, "ENTROPY");
507 vntr.entropy2 = bcf_get_info_flt(h, v, "ENTROPY2");
508 vntr.kl_divergence = bcf_get_info_flt(h, v, "KL_DIVERGENCE");
509 vntr.kl_divergence2 = bcf_get_info_flt(h, v, "KL_DIVERGENCE2");
510 vntr.rl = bcf_get_info_int(h, v, "RL");
511 vntr.ll = bcf_get_info_int(h, v, "LL");
512 i_vec = bcf_get_info_int_vec(h, v, "RU_COUNTS", 2, 0);
513 vntr.no_perfect_ru = i_vec[0];
514 vntr.no_ru = i_vec[1];
515 vntr.score = bcf_get_info_flt(h, v, "SCORE");
516 vntr.trf_score = bcf_get_info_int(h, v, "TRF_SCORE");
517
518 vntr.exact_motif = bcf_get_info_str(h, v, "EX_MOTIF");
519 vntr.exact_ru = bcf_get_info_str(h, v, "EX_RU");
520 vntr.exact_basis = bcf_get_info_str(h, v, "EX_BASIS");
521 vntr.exact_mlen = (int32_t) vntr.exact_motif.size();
522 vntr.exact_blen = (int32_t) vntr.exact_basis.size();
523 i_vec = bcf_get_info_int_vec(h, v, "EX_REPEAT_TRACT", 2, 0);
524 vntr.exact_beg1 = i_vec[0];
525 vntr.exact_end1 = i_vec[1];
526 i_vec = bcf_get_info_int_vec(h, v, "EX_COMP", 4, 0);
527 vntr.exact_comp[0] = i_vec[0];
528 vntr.exact_comp[1] = i_vec[1];
529 vntr.exact_comp[2] = i_vec[2];
530 vntr.exact_comp[3] = i_vec[3];
531 vntr.exact_entropy = bcf_get_info_flt(h, v, "EX_ENTROPY");
532 vntr.exact_entropy2 = bcf_get_info_flt(h, v, "EX_ENTROPY2");
533 vntr.exact_kl_divergence = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE");
534 vntr.exact_kl_divergence2 = bcf_get_info_flt(h, v, "EX_KL_DIVERGENCE2");
535 vntr.exact_rl = bcf_get_info_int(h, v, "EX_RL");
536 vntr.exact_ll = bcf_get_info_int(h, v, "EX_LL");
537 i_vec = bcf_get_info_int_vec(h, v, "EX_RU_COUNTS", 2, 0);
538 vntr.exact_no_perfect_ru = i_vec[0];
539 vntr.exact_no_ru = i_vec[1];
540 vntr.exact_score = bcf_get_info_flt(h, v, "EX_SCORE");
541 vntr.exact_trf_score = bcf_get_info_int(h, v, "EX_TRF_SCORE");
542
543 vntr.fuzzy_motif = bcf_get_info_str(h, v, "FZ_MOTIF");
544 vntr.fuzzy_ru = bcf_get_info_str(h, v, "FZ_RU");
545 vntr.fuzzy_basis = bcf_get_info_str(h, v, "FZ_BASIS");
546 vntr.fuzzy_mlen = (int32_t) vntr.fuzzy_motif.size();
547 vntr.fuzzy_blen = (int32_t) vntr.fuzzy_basis.size();
548 i_vec = bcf_get_info_int_vec(h, v, "FZ_REPEAT_TRACT", 2, 0);
549 vntr.fuzzy_beg1 = i_vec[0];
550 vntr.fuzzy_end1 = i_vec[1];
551 i_vec = bcf_get_info_int_vec(h, v, "FZ_COMP", 4, 0);
552 vntr.fuzzy_comp[0] = i_vec[0];
553 vntr.fuzzy_comp[1] = i_vec[1];
554 vntr.fuzzy_comp[2] = i_vec[2];
555 vntr.fuzzy_comp[3] = i_vec[3];
556 vntr.fuzzy_entropy = bcf_get_info_flt(h, v, "FZ_ENTROPY");
557 vntr.fuzzy_entropy2 = bcf_get_info_flt(h, v, "FZ_ENTROPY2");
558 vntr.fuzzy_kl_divergence = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE");
559 vntr.fuzzy_kl_divergence2 = bcf_get_info_flt(h, v, "FZ_KL_DIVERGENCE2");
560 vntr.fuzzy_rl = bcf_get_info_int(h, v, "FZ_RL");
561 vntr.fuzzy_ll = bcf_get_info_int(h, v, "FZ_LL");
562 i_vec = bcf_get_info_int_vec(h, v, "FZ_RU_COUNTS", 2, 0);
563 vntr.fuzzy_no_perfect_ru = i_vec[0];
564 vntr.fuzzy_no_ru = i_vec[1];
565 vntr.fuzzy_score = bcf_get_info_flt(h, v, "FZ_SCORE");
566 vntr.fuzzy_trf_score = bcf_get_info_int(h, v, "FZ_TRF_SCORE");
567 }
568
569 /**
570 * Updates VNTR related information from INFO fields.
571 */
update_vntr_from_info_fields(bcf_hdr_t * h,bcf1_t * v)572 void Variant::update_vntr_from_info_fields(bcf_hdr_t *h, bcf1_t *v)
573 {
574 this->h = h;
575 this->v = v;
576
577 update_vntr_from_info_fields();
578 }
579
580 /**
581 * Updates a bcf1_t object with VNTR information.
582 */
update_bcf_with_vntr_info(bcf_hdr_t * h,bcf1_t * v)583 void Variant::update_bcf_with_vntr_info(bcf_hdr_t *h, bcf1_t *v)
584 {
585 bcf_update_info_flag(h, v, "FUZZY", NULL, 1);
586
587 //VNTR position and sequences
588 bcf_set_pos1(v, vntr.fuzzy_beg1);
589 kstring_t s = {0,0,0};
590 s.l = 0;
591 kputs(vntr.fuzzy_repeat_tract.c_str(), &s);
592 kputc(',', &s);
593 kputs("<VNTR>", &s);
594 bcf_update_alleles_str(h, v, s.s);
595
596 //VNTR motif
597 bcf_update_info_string(h, v, "MOTIF", vntr.motif.c_str());
598 bcf_update_info_string(h, v, "RU", vntr.ru.c_str());
599
600 //VNTR characteristics
601 bcf_update_info_float(h, v, "FZ_CONCORDANCE", &vntr.fuzzy_score, 1);
602 bcf_update_info_float(h, v, "FZ_RL", &vntr.fuzzy_rl, 1);
603 bcf_update_info_float(h, v, "FZ_LL", &vntr.fuzzy_ll, 1);
604 int32_t flank_pos1[2] = {vntr.exact_beg1-1, vntr.exact_end1+1};
605 bcf_update_info_int32(h, v, "FLANKS", &flank_pos1, 2);
606
607 //flank positions
608 int32_t fuzzy_flank_pos1[2] = {vntr.fuzzy_beg1-1, vntr.fuzzy_end1+1};
609 bcf_update_info_int32(h, v, "FZ_FLANKS", &fuzzy_flank_pos1, 2);
610 int32_t ru_count[2] = {vntr.fuzzy_no_perfect_ru, vntr.fuzzy_no_ru};
611 bcf_update_info_int32(h, v, "FZ_RU_COUNTS", &ru_count, 2);
612
613 if (vntr.is_large_repeat_tract) bcf_update_info_flag(h, v, "LARGE_REPEAT_REGION", NULL, 1);
614
615 if (s.m) free(s.s);
616 }
617
618 /**
619 * Prints variant information.
620 */
print()621 void Variant::print()
622 {
623 std::cerr << "type : " << vtype2string(type) << "\n";
624 std::cerr << "\n";
625 if (h!=NULL && v!=NULL) bcf_print_liten(h, v);
626 std::cerr << "chrom : " << chrom << "\n";
627 std::cerr << "rid : " << rid << "\n";
628 std::cerr << "beg1 : " << beg1 << "\n";
629 std::cerr << "end1 : " << end1 << "\n";
630
631 std::cerr << "motif: " << vntr.motif << "\n";
632 std::cerr << "rlen : " << vntr.motif.size() << "\n";
633
634 for (int32_t i=0; i<alleles.size(); ++i)
635 {
636 std::cerr << "\tallele: " << i << "\n";
637 std::cerr << "\t type: " << vtype2string(alleles[i].type) << "\n";
638 std::cerr << "\t diff: " << alleles[i].diff << "\n";
639 std::cerr << "\t alen: " << alleles[i].alen << "\n";
640 std::cerr << "\t dlen: " << alleles[i].dlen << "\n";
641 }
642 };
643
644 /**
645 * Gets a string representation of the variant.
646 */
get_variant_string()647 std::string Variant::get_variant_string()
648 {
649 kstring_t var = {0,0,0};
650 bcf_unpack(v, BCF_UN_STR);
651 var.l = 0;
652 kputs(bcf_get_chrom(h, v), &var);
653 kputc(':', &var);
654 kputw(bcf_get_pos1(v), &var);
655 kputc(':', &var);
656 for (size_t i=0; i<bcf_get_n_allele(v); ++i)
657 {
658 if (i) kputc('/', &var);
659 kputs(bcf_get_alt(v, i), &var);
660 }
661
662 std::string str(var.s);
663
664 if (var.m) free(var.s);
665
666 return str;
667 }
668
669 /**
670 * Gets a string representation of the underlying VNTR by exact alignment.
671 */
get_vntr_string(kstring_t * s)672 void Variant::get_vntr_string(kstring_t* s)
673 {
674 s->l = 0;
675 kputs(chrom.c_str(), s);
676 kputc(':', s);
677 kputw(vntr.exact_beg1, s);
678 kputc(':', s);
679 kputs(vntr.exact_repeat_tract.c_str(), s);
680 kputc(':', s);
681 kputs("<VNTR>", s);
682 kputc(':', s);
683 kputs(vntr.motif.c_str(), s);
684 };
685
686 /**
687 * Gets a string representation of the underlying VNTR by fuzzy alignment.
688 */
get_fuzzy_vntr_string(kstring_t * s)689 void Variant::get_fuzzy_vntr_string(kstring_t* s)
690 {
691 s->l = 0;
692 kputs(chrom.c_str(), s);
693 kputc(':', s);
694 kputw(vntr.fuzzy_beg1, s);
695 kputc(':', s);
696 kputs(vntr.fuzzy_repeat_tract.c_str(), s);
697 kputc(':', s);
698 kputs("<VNTR>", s);
699 kputc(':', s);
700 kputs(vntr.motif.c_str(), s);
701 };
702
703 /**
704 * Converts VTYPE to string.
705 */
vtype2string(int32_t VTYPE)706 std::string Variant::vtype2string(int32_t VTYPE)
707 {
708 std::string s;
709
710 if (!VTYPE)
711 {
712 s += (s.size()==0) ? "" : "/";
713 s += "REF";
714 }
715
716 if (VTYPE & VT_SNP)
717 {
718 s += (s.size()==0) ? "" : "/";
719 s += "SNP";
720 }
721
722 if (VTYPE & VT_MNP)
723 {
724 s += (s.size()==0) ? "" : "/";
725 s += "MNP";
726 }
727
728 if (VTYPE & VT_INDEL)
729 {
730 s += (s.size()==0) ? "" : "/";
731 s += "INDEL";
732 }
733
734 if (VTYPE & VT_CLUMPED)
735 {
736 s += (s.size()==0) ? "" : "/";
737 s += "CLUMPED";
738 }
739
740 if (VTYPE & VT_VNTR)
741 {
742 s += (s.size()==0) ? "" : "/";
743 s += "VNTR";
744 }
745
746 if (VTYPE & VT_SV)
747 {
748 s += (s.size()==0) ? "" : "/";
749 s += "SV";
750 }
751
752 return s;
753 }