1 /* The MIT License
2    Copyright (c) 2014 Adrian Tan <atks@umich.edu>
3    Permission is hereby granted, free of charge, to any person obtaining a copy
4    of this software and associated documentation files (the "Software"), to deal
5    in the Software without restriction, including without limitation the rights
6    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7    copies of the Software, and to permit persons to whom the Software is
8    furnished to do so, subject to the following conditions:
9    The above copyright notice and this permission notice shall be included in
10    all copies or substantial portions of the Software.
11    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17    THE SOFTWARE.
18 */
19 
20 #include "sv_tree.h"
21 
22 /**
23  * Constructor.
24  */
SVNode()25 SVNode::SVNode()
26 {
27     this->parent = NULL;
28     this->type = {0,0,0};
29     this->depth = 0;
30     this->count = 0;
31     this->mcount = 0;
32 };
33 
34 /**
35  * Constructor.
36  */
SVNode(const char * type)37 SVNode::SVNode(const char* type)
38 {
39     this->parent = NULL;
40     this->type = {0,0,0};
41     this->depth = 0;
42     this->count = 0;
43     this->mcount = 0;
44     kputs(type, &this->type);
45 };
46 
47 /**
48  * Constructor.
49  */
SVNode(const char * type,int32_t depth)50 SVNode::SVNode(const char* type, int32_t depth)
51 {
52     this->parent = NULL;
53     this->type = {0,0,0};
54     this->depth = depth;
55     this->count = 0;
56     this->mcount = 0;
57     kputs(type, &this->type);
58 };
59 
60 /**
61  * Destructor.
62  */
~SVNode()63 SVNode::~SVNode()
64 {
65     for (int32_t i=0; i<children.size(); ++i)
66     {
67         delete children[i];
68     }
69     children.clear();
70     this->parent = NULL;
71 
72     if (type.m) free(type.s);
73 };
74 
75 /**
76  * Set depth.
77  */
set_depth(int32_t depth)78 void SVNode::set_depth(int32_t depth)
79 {
80     this->depth = depth;
81 };
82 
83 /**
84  * Increment count.
85  */
increment_count()86 void SVNode::increment_count()
87 {
88     ++count;
89 
90     if (parent!=NULL)
91     {
92         parent->increment_count();
93     }
94 };
95 
96 /**
97  * Increment mcount.
98  */
increment_mcount()99 void SVNode::increment_mcount()
100 {
101     ++mcount;
102 
103     if (parent!=NULL)
104     {
105         parent->increment_mcount();
106     }
107 };
108 
109 /**
110  * Clear values.
111  */
clear()112 void SVNode::clear()
113 {
114     if (type.m) free(type.s);
115     count = 0;
116 };
117 
118 /**
119  * Print values.
120  */
print()121 void SVNode::print()
122 {
123     for (int32_t i=0; i<depth; ++i) std::cerr << "\t";
124 
125     std::cerr << sv_type2string(type.s) << " (" << count << ")\n";
126 
127     for (int32_t i=0; i<children.size(); ++i)
128     {
129         children[i]->print();
130     }
131 };
132 
133 /**
134  *  For translating reserved keywords.
135  */
sv_type2string(char * sv_type)136 std::string SVNode::sv_type2string(char* sv_type)
137 {
138     if (!strcmp(sv_type,"TRA"))
139     {
140         return "translocation";
141     }
142     else if (!strcmp(sv_type,"DEL"))
143     {
144         return "deletion";
145     }
146     else if (!strcmp(sv_type,"INS"))
147     {
148         return "insertion";
149     }
150     else if (!strcmp(sv_type,"INV"))
151     {
152         return "inversion";
153     }
154     else if (!strcmp(sv_type,"ME"))
155     {
156         return "mobile element";
157     }
158     else if (!strcmp(sv_type,"MT"))
159     {
160         return "numt";
161     }
162     else if (!strcmp(sv_type,"DUP"))
163     {
164         return "duplication";
165     }
166     else if (!strcmp(sv_type,"TANDEM"))
167     {
168         return "tandem repeats";
169     }
170     else if (!strcmp(sv_type,"CNV"))
171     {
172         return "copy number variation";
173     }
174     else
175     {
176         return sv_type;
177     }
178 };
179 
180 /**
181  * Constructor.
182  */
SVTree()183 SVTree::SVTree()
184 {
185     root = new SVNode("root", 0);
186     max_depth = 0;
187     mixed_sv_count = 0;
188     m = kh_init(xdict);
189 
190     this->add("<TRA>");
191     this->add("<DEL>");
192     this->add("<INS>");
193     this->add("<DUP>");
194     this->add("<INV>");
195     this->add("<CNV>");
196     this->add("<DUP:TANDEM>");
197     this->add("<DEL:ME>");
198     this->add("<INS:ME>");
199     this->add("<INS:MT>");
200     this->add("<INS:ME:ALU>");
201     this->add("<INS:ME:LINE1>");
202     this->add("<INS:ME:SVA>");
203 //    this->add("<RSCLIP>");
204 //    this->add("<LSCLIP>");
205 };
206 
207 /**
208  * Destructor.
209  */
~SVTree()210 SVTree::~SVTree()
211 {
212     if (root)
213     {
214         delete root;
215     }
216     root = NULL;
217 
218    // kh_destroy(xdict, m);
219 };
220 
221 /**
222  * Adds a new tag, returns true if successful.
223  */
add(const char * sv_type)224 bool SVTree::add(const char* sv_type)
225 {
226     //update hash
227     khiter_t k;
228     int32_t ret = 0;
229 
230     if ((k=kh_get(xdict, m, sv_type))==kh_end(m))
231     {
232         std::vector<std::string> vec;
233         split(vec, "<:>", sv_type);
234 
235         SVNode* cnode = root;
236         for (size_t i=0; i<vec.size(); ++i)
237         {
238             bool found_type = false;
239             for (size_t j=0; j<cnode->children.size(); ++j)
240             {
241                 if (!strcmp(cnode->children[j]->type.s, vec[i].c_str()))
242                 {
243                     cnode = cnode->children[j];
244                     found_type = true;
245                     break;
246                 }
247             }
248 
249             if (!found_type)
250             {
251                 max_depth = i+1>max_depth?i+1:max_depth;
252                 SVNode* newnode = new SVNode(vec[i].c_str(), i+1);
253                 char* new_sv_type = strdup(sv_type);
254                 k = kh_put(xdict, m, new_sv_type, &ret);
255                 if (!ret)
256                 {
257                     //already present
258                     free(new_sv_type);
259                 }
260                 kh_value(m, k) = newnode;
261                 cnode->children.push_back(newnode);
262                 newnode->parent = cnode;
263                 cnode = newnode;
264             }
265         }
266 
267         return true;
268     }
269 
270     return false;
271 }
272 
273 /**
274  * Observes and update the count of a new tag.
275  */
count(Variant & variant)276 void SVTree::count(Variant& variant)
277 {
278     khiter_t k;
279     int32_t ret = 0;
280     bool mixed_sv = false;
281 
282     for (size_t i=0; i<variant.alleles.size(); ++i)
283     {
284         const char* sv_type = variant.alleles[i].sv_type.c_str();
285         if ((k=kh_get(xdict, m, sv_type))==kh_end(m))
286         {
287             this->add(sv_type);
288             k=kh_get(xdict, m, sv_type);
289         }
290 
291         if (variant.alleles[i].sv_type!=variant.alleles[0].sv_type)
292         {
293             mixed_sv = true;
294         }
295     }
296 
297     if (mixed_sv)
298     {
299         ++mixed_sv_count;
300     }
301     else
302     {
303         if (variant.alleles.size()==1)
304         {
305             kh_value(m, k)->increment_count();
306         }
307         else
308         {
309             kh_value(m, k)->increment_mcount();
310         }
311     }
312 };
313 
314 /**
315  * Enumerates the children in a depth first order.
316  */
enumerate_dfs()317 std::vector<SVNode*> SVTree::enumerate_dfs()
318 {
319     std::vector<SVNode*> s;
320     enumerate_dfs(s, root);
321     return s;
322 };
323 
324 /**
325  * Helper function for enumerating the children in a depth first order.
326  */
enumerate_dfs(std::vector<SVNode * > & s,SVNode * node)327 void SVTree::enumerate_dfs(std::vector<SVNode*>& s, SVNode* node)
328 {
329     s.push_back(node);
330     for (size_t i=0; i<node->children.size(); ++i)
331     {
332         enumerate_dfs(s, node->children[i]);
333     }
334 };
335 
336 /**
337  * Print this tree.
338  */
print()339 void SVTree::print()
340 {
341     root->print();
342 };