1 /* The MIT License
2 Copyright (c) 2014 Adrian Tan <atks@umich.edu>
3 Permission is hereby granted, free of charge, to any person obtaining a copy
4 of this software and associated documentation files (the "Software"), to deal
5 in the Software without restriction, including without limitation the rights
6 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 copies of the Software, and to permit persons to whom the Software is
8 furnished to do so, subject to the following conditions:
9 The above copyright notice and this permission notice shall be included in
10 all copies or substantial portions of the Software.
11 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
12 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
13 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
14 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
15 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
16 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
17 THE SOFTWARE.
18 */
19
20 #include "sv_tree.h"
21
22 /**
23 * Constructor.
24 */
SVNode()25 SVNode::SVNode()
26 {
27 this->parent = NULL;
28 this->type = {0,0,0};
29 this->depth = 0;
30 this->count = 0;
31 this->mcount = 0;
32 };
33
34 /**
35 * Constructor.
36 */
SVNode(const char * type)37 SVNode::SVNode(const char* type)
38 {
39 this->parent = NULL;
40 this->type = {0,0,0};
41 this->depth = 0;
42 this->count = 0;
43 this->mcount = 0;
44 kputs(type, &this->type);
45 };
46
47 /**
48 * Constructor.
49 */
SVNode(const char * type,int32_t depth)50 SVNode::SVNode(const char* type, int32_t depth)
51 {
52 this->parent = NULL;
53 this->type = {0,0,0};
54 this->depth = depth;
55 this->count = 0;
56 this->mcount = 0;
57 kputs(type, &this->type);
58 };
59
60 /**
61 * Destructor.
62 */
~SVNode()63 SVNode::~SVNode()
64 {
65 for (int32_t i=0; i<children.size(); ++i)
66 {
67 delete children[i];
68 }
69 children.clear();
70 this->parent = NULL;
71
72 if (type.m) free(type.s);
73 };
74
75 /**
76 * Set depth.
77 */
set_depth(int32_t depth)78 void SVNode::set_depth(int32_t depth)
79 {
80 this->depth = depth;
81 };
82
83 /**
84 * Increment count.
85 */
increment_count()86 void SVNode::increment_count()
87 {
88 ++count;
89
90 if (parent!=NULL)
91 {
92 parent->increment_count();
93 }
94 };
95
96 /**
97 * Increment mcount.
98 */
increment_mcount()99 void SVNode::increment_mcount()
100 {
101 ++mcount;
102
103 if (parent!=NULL)
104 {
105 parent->increment_mcount();
106 }
107 };
108
109 /**
110 * Clear values.
111 */
clear()112 void SVNode::clear()
113 {
114 if (type.m) free(type.s);
115 count = 0;
116 };
117
118 /**
119 * Print values.
120 */
print()121 void SVNode::print()
122 {
123 for (int32_t i=0; i<depth; ++i) std::cerr << "\t";
124
125 std::cerr << sv_type2string(type.s) << " (" << count << ")\n";
126
127 for (int32_t i=0; i<children.size(); ++i)
128 {
129 children[i]->print();
130 }
131 };
132
133 /**
134 * For translating reserved keywords.
135 */
sv_type2string(char * sv_type)136 std::string SVNode::sv_type2string(char* sv_type)
137 {
138 if (!strcmp(sv_type,"TRA"))
139 {
140 return "translocation";
141 }
142 else if (!strcmp(sv_type,"DEL"))
143 {
144 return "deletion";
145 }
146 else if (!strcmp(sv_type,"INS"))
147 {
148 return "insertion";
149 }
150 else if (!strcmp(sv_type,"INV"))
151 {
152 return "inversion";
153 }
154 else if (!strcmp(sv_type,"ME"))
155 {
156 return "mobile element";
157 }
158 else if (!strcmp(sv_type,"MT"))
159 {
160 return "numt";
161 }
162 else if (!strcmp(sv_type,"DUP"))
163 {
164 return "duplication";
165 }
166 else if (!strcmp(sv_type,"TANDEM"))
167 {
168 return "tandem repeats";
169 }
170 else if (!strcmp(sv_type,"CNV"))
171 {
172 return "copy number variation";
173 }
174 else
175 {
176 return sv_type;
177 }
178 };
179
180 /**
181 * Constructor.
182 */
SVTree()183 SVTree::SVTree()
184 {
185 root = new SVNode("root", 0);
186 max_depth = 0;
187 mixed_sv_count = 0;
188 m = kh_init(xdict);
189
190 this->add("<TRA>");
191 this->add("<DEL>");
192 this->add("<INS>");
193 this->add("<DUP>");
194 this->add("<INV>");
195 this->add("<CNV>");
196 this->add("<DUP:TANDEM>");
197 this->add("<DEL:ME>");
198 this->add("<INS:ME>");
199 this->add("<INS:MT>");
200 this->add("<INS:ME:ALU>");
201 this->add("<INS:ME:LINE1>");
202 this->add("<INS:ME:SVA>");
203 // this->add("<RSCLIP>");
204 // this->add("<LSCLIP>");
205 };
206
207 /**
208 * Destructor.
209 */
~SVTree()210 SVTree::~SVTree()
211 {
212 if (root)
213 {
214 delete root;
215 }
216 root = NULL;
217
218 // kh_destroy(xdict, m);
219 };
220
221 /**
222 * Adds a new tag, returns true if successful.
223 */
add(const char * sv_type)224 bool SVTree::add(const char* sv_type)
225 {
226 //update hash
227 khiter_t k;
228 int32_t ret = 0;
229
230 if ((k=kh_get(xdict, m, sv_type))==kh_end(m))
231 {
232 std::vector<std::string> vec;
233 split(vec, "<:>", sv_type);
234
235 SVNode* cnode = root;
236 for (size_t i=0; i<vec.size(); ++i)
237 {
238 bool found_type = false;
239 for (size_t j=0; j<cnode->children.size(); ++j)
240 {
241 if (!strcmp(cnode->children[j]->type.s, vec[i].c_str()))
242 {
243 cnode = cnode->children[j];
244 found_type = true;
245 break;
246 }
247 }
248
249 if (!found_type)
250 {
251 max_depth = i+1>max_depth?i+1:max_depth;
252 SVNode* newnode = new SVNode(vec[i].c_str(), i+1);
253 char* new_sv_type = strdup(sv_type);
254 k = kh_put(xdict, m, new_sv_type, &ret);
255 if (!ret)
256 {
257 //already present
258 free(new_sv_type);
259 }
260 kh_value(m, k) = newnode;
261 cnode->children.push_back(newnode);
262 newnode->parent = cnode;
263 cnode = newnode;
264 }
265 }
266
267 return true;
268 }
269
270 return false;
271 }
272
273 /**
274 * Observes and update the count of a new tag.
275 */
count(Variant & variant)276 void SVTree::count(Variant& variant)
277 {
278 khiter_t k;
279 int32_t ret = 0;
280 bool mixed_sv = false;
281
282 for (size_t i=0; i<variant.alleles.size(); ++i)
283 {
284 const char* sv_type = variant.alleles[i].sv_type.c_str();
285 if ((k=kh_get(xdict, m, sv_type))==kh_end(m))
286 {
287 this->add(sv_type);
288 k=kh_get(xdict, m, sv_type);
289 }
290
291 if (variant.alleles[i].sv_type!=variant.alleles[0].sv_type)
292 {
293 mixed_sv = true;
294 }
295 }
296
297 if (mixed_sv)
298 {
299 ++mixed_sv_count;
300 }
301 else
302 {
303 if (variant.alleles.size()==1)
304 {
305 kh_value(m, k)->increment_count();
306 }
307 else
308 {
309 kh_value(m, k)->increment_mcount();
310 }
311 }
312 };
313
314 /**
315 * Enumerates the children in a depth first order.
316 */
enumerate_dfs()317 std::vector<SVNode*> SVTree::enumerate_dfs()
318 {
319 std::vector<SVNode*> s;
320 enumerate_dfs(s, root);
321 return s;
322 };
323
324 /**
325 * Helper function for enumerating the children in a depth first order.
326 */
enumerate_dfs(std::vector<SVNode * > & s,SVNode * node)327 void SVTree::enumerate_dfs(std::vector<SVNode*>& s, SVNode* node)
328 {
329 s.push_back(node);
330 for (size_t i=0; i<node->children.size(); ++i)
331 {
332 enumerate_dfs(s, node->children[i]);
333 }
334 };
335
336 /**
337 * Print this tree.
338 */
print()339 void SVTree::print()
340 {
341 root->print();
342 };