1 /* The MIT License
2
3 Copyright (c) 2017 Adrian Tan <atks@umich.edu>
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #include "indel_annotator.h"
25
26 /**
27 * Constructor.
28 */
IndelAnnotator(std::string & ref_fasta_file,bool debug)29 IndelAnnotator::IndelAnnotator(std::string& ref_fasta_file, bool debug)
30 {
31 vm = new VariantManip(ref_fasta_file.c_str());
32
33 fai = fai_load(ref_fasta_file.c_str());
34 if (fai==NULL)
35 {
36 fprintf(stderr, "[%s:%d %s] Cannot load genome index: %s\n", __FILE__, __LINE__, __FUNCTION__, ref_fasta_file.c_str());
37 exit(1);
38 }
39
40 cre = new CandidateRegionExtractor(ref_fasta_file, debug);
41 cmp = new CandidateMotifPicker(debug);
42 fd = new FlankDetector(ref_fasta_file, debug);
43
44 this->debug = debug;
45 };
46
47 /**
48 * Destructor.
49 */
~IndelAnnotator()50 IndelAnnotator::~IndelAnnotator()
51 {
52 delete vm;
53 fai_destroy(fai);
54 }
55
56 /**
57 * Annotates VNTR characteristics.
58 * @mode - e for exact alignment annotation
59 * - f for fuzzy alignment annotation
60 * -
61 */
annotate(Variant & variant)62 void IndelAnnotator::annotate(Variant& variant)
63 {
64 VNTR& vntr = variant.vntr;
65
66 bcf_hdr_t* h = variant.h;
67 bcf1_t* v = variant.v;
68
69 //update chromosome and position
70 variant.rid = bcf_get_rid(v);
71 variant.pos1 = bcf_get_pos1(v);
72
73 //this is for reannotating an VNTR record
74 //this is more for the purpose of evaluation to
75 //check if vt's algorithm is concordant with
76 //VNTRs from other sources.
77 if (variant.type==VT_VNTR)
78 {
79 if (debug) std::cerr << "============================================\n";
80 if (debug) std::cerr << "ANNOTATING VNTR/STR \n";
81
82 //1. pick candidate region
83 cre->pick_candidate_region(variant, REFERENCE, FINAL);
84
85 //2. set motifs from info field
86 cmp->set_motif_from_info_field(variant);
87
88 //3. compute purity scores
89 fd->compute_purity_score(variant, FINAL);
90
91 //4. compute composition and sequence statistics
92 fd->compute_composition_and_entropy(variant, FINAL);
93 }
94 //main purpose - annotation of Indels.
95 else if (variant.type&VT_INDEL)
96 {
97 //the basic steps in annotating a TR
98 //
99 //1. extract a region that has a chance of containing the repeat units
100 //2. choose a set of candidate motifs and pick motif
101 //3. detect repeat region and evaluate
102 //4. iterate 2 and 3
103
104 if (debug) std::cerr << "============================================\n";
105 if (debug) std::cerr << "ANNOTATING INDEL\n";
106
107
108
109
110
111 //1. selects candidate region by fuzzy left and right alignment
112 cre->pick_candidate_region(variant, EXACT_LEFT_RIGHT_ALIGNMENT, EXACT);
113 cmp->update_exact_repeat_unit(variant);
114
115 //2. detect candidate motifs from a reference sequence
116 cmp->generate_candidate_motifs(variant);
117
118 //this cannot possibly fail as next_motif() guarantees it
119 if (!cmp->next_motif(variant, CHECK_MOTIF_PRESENCE_IN_ALLELE))
120 {
121 //fall back on exact motif chosen.
122 VNTR& vntr = variant.vntr;
123 vntr.fuzzy_motif = vntr.exact_motif;
124 vntr.fuzzy_ru = vntr.exact_ru;
125 vntr.fuzzy_basis = vntr.exact_basis;
126 vntr.fuzzy_mlen = vntr.exact_mlen;
127 vntr.fuzzy_blen = vntr.exact_blen;;
128
129 if (debug) std::cerr << "updating fuzzy motif with exact motifs\n";
130 }
131
132 fd->detect_flanks(variant, EXACT);
133 fd->compute_purity_score(variant, EXACT);
134 fd->compute_composition_and_entropy(variant, EXACT);
135
136 fd->detect_flanks(variant, FUZZY);
137 fd->compute_purity_score(variant, FUZZY);
138 fd->compute_composition_and_entropy(variant, FUZZY);
139
140 //introduce reiteration based on concordance and exact concordance.
141
142 if (debug)
143 {
144 std::cerr << "\n";
145 vntr.print();
146 std::cerr << "\n";
147 }
148
149 if (debug) std::cerr << "============================================\n";
150 return;
151
152 }
153 }