1 /***************************************************************************
2 * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "mexttree.h"
21 #include "alignment/alignment.h"
22
generateRandomTree(TreeGenType tree_type,Params & params,bool binary)23 void MExtTree::generateRandomTree(TreeGenType tree_type, Params ¶ms, bool binary) {
24 Alignment *alignment = NULL;
25 if (params.aln_file) {
26 // generate random tree with leaf sets taken from an alignment
27 alignment = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name);
28 params.sub_size = alignment->getNSeq();
29 }
30 if (params.sub_size < 3) {
31 outError(ERR_FEW_TAXA);
32 }
33 switch (tree_type) {
34 case YULE_HARDING:
35 generateYuleHarding(params, binary);
36 break;
37 case UNIFORM:
38 generateUniform(params.sub_size, binary);
39 break;
40 case CATERPILLAR:
41 generateCaterpillar(params.sub_size);
42 break;
43 case BALANCED:
44 generateBalanced(params.sub_size);
45 break;
46 case STAR_TREE:
47 generateStarTree(params);
48 break;
49 default:
50 break;
51 }
52 if (!alignment) return;
53 NodeVector taxa;
54 getTaxa(taxa);
55 ASSERT(taxa.size() == params.sub_size);
56 for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++)
57 (*it)->name = alignment->getSeqName((*it)->id);
58 }
59
setZeroInternalBranches(int num_zero_len)60 void MExtTree::setZeroInternalBranches(int num_zero_len) {
61 NodeVector nodes, nodes2;
62 generateNNIBraches(nodes, nodes2);
63 if (num_zero_len > nodes.size()) outError("The specified number of zero branches is too much");
64 for (int i = 0; i < num_zero_len;) {
65 int id = random_int(nodes.size());
66 if (!nodes[id]) continue;
67 i++;
68 nodes[id]->findNeighbor(nodes2[id])->length = 0.0;
69 nodes2[id]->findNeighbor(nodes[id])->length = 0.0;
70 nodes[id] = NULL;
71 nodes2[id] = NULL;
72 }
73 }
74
generateCaterpillar(int size)75 void MExtTree::generateCaterpillar(int size) {
76 if (size < 3)
77 outError(ERR_FEW_TAXA);
78 root = newNode();
79 int i;
80 NodeVector myleaves;
81 NodeVector innodes;
82 Node *node;
83 double len;
84
85 innodes.push_back(root);
86 // create initial tree with 3 leaves
87 for (i = 0; i < 3; i++)
88 {
89 node = newNode();
90 len = random_double();
91 root->addNeighbor(node, len);
92 node->addNeighbor(root, len);
93 myleaves.push_back(node);
94 }
95
96 // additionally add a leaf
97 for (i = 3; i < size; i++)
98 {
99 int index;
100 index = i-1;
101
102 node = myleaves[index];
103 innodes.push_back(node);
104 // add the first leaf
105 Node *newleaf = newNode();
106 len = random_double();
107 node->addNeighbor(newleaf, len);
108 newleaf->addNeighbor(node, len);
109 myleaves[index] = newleaf;
110
111 // add the second leaf
112 newleaf = newNode();
113 len = random_double();
114 node->addNeighbor(newleaf, len);
115 newleaf->addNeighbor(node, len);
116 myleaves.push_back(newleaf);
117
118 }
119
120 root = myleaves[0];
121 // indexing the leaves
122 setLeavesName(myleaves);
123
124 leafNum = myleaves.size();
125 nodeNum = leafNum;
126 initializeTree();
127
128 }
129
130
generateBalanced(int size)131 void MExtTree::generateBalanced(int size) {
132 if (size < 3)
133 outError(ERR_FEW_TAXA);
134 root = newNode();
135 int i;
136 NodeVector myleaves;
137 Node *node;
138 double len;
139
140 myleaves.push_back(root);
141 // create initial tree with 2 leaves
142 node = newNode();
143 len = random_double();
144 root->addNeighbor(node, len);
145 node->addNeighbor(root, len);
146 myleaves.push_back(node);
147
148 while (myleaves.size() < size) {
149
150 int cur_size = myleaves.size();
151 // additionally add a leaf
152 for (i = 0; i < cur_size && myleaves.size() < size; i++)
153 {
154 int index = i;
155
156 node = myleaves[index];
157 // add the first leaf
158 Node *newleaf = newNode();
159 len = random_double();
160 node->addNeighbor(newleaf, len);
161 newleaf->addNeighbor(node, len);
162 myleaves[index] = newleaf;
163
164 // add the second leaf
165 newleaf = newNode();
166 len = random_double();
167 node->addNeighbor(newleaf, len);
168 newleaf->addNeighbor(node, len);
169 myleaves.push_back(newleaf);
170
171 }
172 }
173
174 root = myleaves[0];
175 // indexing the leaves
176 setLeavesName(myleaves);
177
178 leafNum = myleaves.size();
179 nodeNum = leafNum;
180 initializeTree();
181
182 }
183
184 /**
185 generate a random tree following uniform model
186 */
generateUniform(int size,bool binary)187 void MExtTree::generateUniform(int size, bool binary)
188 {
189 if (size < 3)
190 outError(ERR_FEW_TAXA);
191 int i;
192
193 // list of left- and right-end of branches
194 NodeVector leftend, rightend, myleaves;
195 Node *node;
196 double len;
197
198 root = newNode(0, "0");
199 // create initial tree with 2 leaves
200 node = newNode(1, "1");
201 len = random_double();
202 root->addNeighbor(node, len);
203 node->addNeighbor(root, len);
204
205 leftend.push_back(root);
206 rightend.push_back(node);
207
208 myleaves.push_back(root);
209 myleaves.push_back(node);
210
211 // additionally add a leaf
212 for (i = 2; i < size; i++)
213 {
214 int index;
215 index = random_int(2*i-3);
216 //cout << "step " << i << " left = " << leftend[index]->id << " right = " << rightend[index]->id << endl;
217
218 // add an internal node
219 Node *newnode = newNode(size+i-2);
220 // reconnect the left end
221 node = leftend[index];
222 for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
223 if ((*it)->node == rightend[index]) {
224 len = random_double();
225 (*it)->node = newnode;
226 (*it)->length = len;
227 newnode->addNeighbor(node, len);
228 //cout << " left " << leftend[index]->id << " " << newnode->id << endl;
229 break;
230 }
231 // reconnect the right end
232 node = rightend[index];
233 for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
234 if ((*it)->node == leftend[index]) {
235 len = random_double();
236 (*it)->node = newnode;
237 (*it)->length = len;
238 newnode->addNeighbor(node, len);
239 //cout << " right " << rightend[index]->id << " " << newnode->id << endl;
240 break;
241 }
242
243 // add a new leaf
244 Node *newleaf = newNode(i, i);
245 len = random_double();
246 newnode->addNeighbor(newleaf, len);
247 newleaf->addNeighbor(newnode, len);
248
249 // update the leftend and rightend list
250 leftend.push_back(newnode);
251 rightend.push_back(rightend[index]);
252
253 leftend.push_back(newnode);
254 rightend.push_back(newleaf);
255
256 rightend[index] = newnode;
257
258 myleaves.push_back(newleaf);
259
260 }
261
262 // indexing the leaves
263 setLeavesName(myleaves);
264
265 leafNum = size;
266 nodeNum = leafNum;
267 initializeTree();
268
269 }
270
271 /**
272 generate a random tree following Yule Harding model
273 */
generateYuleHarding(Params & params,bool binary)274 void MExtTree::generateYuleHarding(Params ¶ms, bool binary) {
275 int size = params.sub_size;
276 if (size < 3)
277 outError(ERR_FEW_TAXA);
278 root = newNode();
279 int i;
280 NodeVector myleaves;
281 NodeVector innodes;
282 Node *node;
283 double len;
284
285 innodes.push_back(root);
286 // create initial tree with 3 leaves
287 for (i = 0; i < 3; i++) {
288 node = newNode();
289 len = randomLen(params);
290 root->addNeighbor(node, len);
291 node->addNeighbor(root, len);
292 myleaves.push_back(node);
293 }
294
295 // additionally add a leaf
296 for (i = 3; i < size; i++)
297 {
298 int index;
299 if (binary) {
300 index = random_int(i);
301 } else {
302 index = random_int(i + innodes.size());
303 }
304 if (index < i) {
305 node = myleaves[index];
306 innodes.push_back(node);
307 // add the first leaf
308 Node *newleaf = newNode();
309 len = randomLen(params);
310 node->addNeighbor(newleaf, len);
311 newleaf->addNeighbor(node, len);
312 myleaves[index] = newleaf;
313
314 // add the second leaf
315 newleaf = newNode();
316 len = randomLen(params);
317 node->addNeighbor(newleaf, len);
318 newleaf->addNeighbor(node, len);
319 myleaves.push_back(newleaf);
320 }
321 else {
322 node = innodes[index-i];
323 // add only 1 new leaf
324 Node *newleaf = newNode();
325 len = randomLen(params);
326 node->addNeighbor(newleaf, len);
327 newleaf->addNeighbor(node, len);
328 myleaves.push_back(newleaf);
329
330 }
331
332 }
333
334 root = myleaves[0];
335 // indexing the leaves
336 setLeavesName(myleaves);
337
338 leafNum = myleaves.size();
339 nodeNum = leafNum;
340 initializeTree();
341
342
343 }
344
generateConstrainedYuleHarding(Params & params,MTree * constraint_tree,StrVector & taxnames)345 void MExtTree::generateConstrainedYuleHarding(Params ¶ms, MTree* constraint_tree, StrVector &taxnames) {
346 int size = taxnames.size();
347 if (size < 3)
348 outError(ERR_FEW_TAXA);
349 NodeVector myleaves;
350 NodeVector innodes;
351 StrVector names;
352 StringIntMap namemap;
353 StrVector::iterator it;
354
355 // copy constraint tree and resolve multifurcation
356 copyTree(constraint_tree);
357 resolveMultifurcation();
358
359 getTaxa(myleaves);
360 getTaxaName(names);
361 for (it = names.begin(); it != names.end(); it++)
362 namemap[*it] = 1;
363
364 // add the remaining taxa names
365 for (it = taxnames.begin(); it != taxnames.end(); it++)
366 if (namemap.find(*it) == namemap.end())
367 names.push_back(*it);
368 ASSERT(names.size() == taxnames.size());
369 my_random_shuffle(names.begin()+leafNum, names.end());
370
371 // additionally add a leaf
372 for (; leafNum < size; leafNum++)
373 {
374 int index;
375 index = random_int(leafNum);
376 Node *leaf = myleaves[index];
377 Node *dad = leaf->neighbors[0]->node;
378 // add the first leaf
379
380 Node *newleaf = newNode(leafNum, names[leafNum].c_str());
381 Node *node = newNode();
382
383 // redirect the current leaf
384 node->addNeighbor(leaf, -1.0);
385 leaf->updateNeighbor(dad, node);
386
387 // add the new leaf
388 node->addNeighbor(newleaf, -1.0);
389 newleaf->addNeighbor(node, -1.0);
390
391 // connect dad and new node
392 dad->updateNeighbor(leaf, node);
393 node->addNeighbor(dad, -1.0);
394
395 myleaves.push_back(newleaf);
396 }
397
398 // assign random branch lengths
399 myleaves.clear();
400 innodes.clear();
401 getBranches(myleaves, innodes);
402 for (int i = 0; i < myleaves.size(); i++) {
403 double len = randomLen(params);
404 myleaves[i]->findNeighbor(innodes[i])->length = len;
405 innodes[i]->findNeighbor(myleaves[i])->length = len;
406 }
407
408
409 nodeNum = leafNum;
410 initializeTree();
411
412 }
413
generateStarTree(Params & params)414 void MExtTree::generateStarTree(Params ¶ms) {
415 generateYuleHarding(params);
416 NodeVector nodes, nodes2;
417 generateNNIBraches(nodes, nodes2);
418 for (int i = 0; i < nodes.size(); i++) {
419 nodes[i]->findNeighbor(nodes2[i])->length = 0.0;
420 nodes2[i]->findNeighbor(nodes[i])->length = 0.0;
421 }
422
423 }
424
generateRandomBranchLengths(Params & params,Node * node,Node * dad)425 void MExtTree::generateRandomBranchLengths(Params ¶ms, Node *node, Node *dad) {
426 if (!node) node = root;
427 FOR_NEIGHBOR_IT(node, dad, it) {
428 double len = randomLen(params);
429 (*it)->length = len;
430 (*it)->node->findNeighbor(node)->length = len;
431 generateRandomBranchLengths(params, (*it)->node, node);
432 }
433 }
434
435
setLeavesName(NodeVector & myleaves)436 void MExtTree::setLeavesName(NodeVector &myleaves) {
437 for (int i = 0; i < myleaves.size(); i++)
438 {
439 myleaves[i]->id = i;
440 stringstream str;
441 str << 'T' << myleaves[i]->id;
442 myleaves[i]->name = str.str();
443 }
444 }
445
446
createCluster(NodeVector & taxa,mmatrix (int)& clusters,Node * node,Node * dad)447 void MExtTree::createCluster(NodeVector &taxa, mmatrix(int) &clusters, Node *node, Node *dad) {
448 if (node == NULL) node = root;
449 FOR_NEIGHBOR_IT(node, dad, it) {
450 // if both end-nodes are bifurcating
451 Node *child = (*it)->node;
452 if (!child->isLeaf()) child->name = "";
453 if (node->degree() == 3 && child->degree() == 3) {
454 int count = 0;
455 FOR_NEIGHBOR_DECLARE(child, node, it2)
456 createCluster(count++, (*it2)->node, child);
457 if (!rooted) {
458 FOR_NEIGHBOR(node, child, it2)
459 createCluster(count++, (*it2)->node, node);
460 } else createCluster(count++, node, child);
461
462
463 clusters.resize(clusters.size()+1);
464 for (NodeVector::iterator nit = taxa.begin(); nit != taxa.end(); nit++) {
465 clusters.back().push_back((int)((*nit)->height));
466 }
467 child->name = "";
468 child->name += clusters.size();
469 }
470 createCluster(taxa, clusters, child, node);
471 }
472 }
473
createCluster(int clu_num,Node * node,Node * dad)474 void MExtTree::createCluster(int clu_num, Node *node, Node *dad) {
475 if (node->isLeaf()) node->height = clu_num;
476 FOR_NEIGHBOR_IT(node, dad, it) {
477 createCluster(clu_num, (*it)->node, node);
478 }
479 }
480
481
collapseLowBranchSupport(DoubleVector & minsup,Node * node,Node * dad)482 void MExtTree::collapseLowBranchSupport(DoubleVector &minsup, Node *node, Node *dad) {
483 if (!node) node = root;
484 FOR_NEIGHBOR_IT(node, dad, it) {
485 collapseLowBranchSupport(minsup, (*it)->node, node);
486 }
487 if (!node->isLeaf() && dad && node->name != "") {
488 DoubleVector vec;
489 convert_double_vec(node->name.c_str(), vec, '/');
490 if (vec.size() != minsup.size()) {
491 cout << "Branch with name " << node->name << " ignored" << endl;
492 return;
493 }
494 for (int i = 0; i < vec.size(); i++)
495 if (vec[i] < minsup[i]) {
496 // support smaller than threshold, mark this branch for deletion
497 dad->findNeighbor(node)->length = -1.0;
498 node->findNeighbor(dad)->length = -1.0;
499 break;
500 }
501 }
502 }
503