1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "mexttree.h"
21 #include "alignment/alignment.h"
22 
generateRandomTree(TreeGenType tree_type,Params & params,bool binary)23 void MExtTree::generateRandomTree(TreeGenType tree_type, Params &params, bool binary) {
24 	Alignment *alignment = NULL;
25 	if (params.aln_file) {
26 		// generate random tree with leaf sets taken from an alignment
27 		alignment = createAlignment(params.aln_file, params.sequence_type, params.intype, params.model_name);
28 		params.sub_size = alignment->getNSeq();
29 	}
30 	if (params.sub_size < 3) {
31 		outError(ERR_FEW_TAXA);
32 	}
33 	switch (tree_type) {
34 	case YULE_HARDING:
35 		generateYuleHarding(params, binary);
36 		break;
37 	case UNIFORM:
38 		generateUniform(params.sub_size, binary);
39 		break;
40 	case CATERPILLAR:
41 		generateCaterpillar(params.sub_size);
42 		break;
43 	case BALANCED:
44 		generateBalanced(params.sub_size);
45 		break;
46 	case STAR_TREE:
47 		generateStarTree(params);
48 		break;
49 	default:
50 		break;
51 	}
52 	if (!alignment) return;
53 	NodeVector taxa;
54 	getTaxa(taxa);
55 	ASSERT(taxa.size() == params.sub_size);
56 	for (NodeVector::iterator it = taxa.begin(); it != taxa.end(); it++)
57 		(*it)->name = alignment->getSeqName((*it)->id);
58 }
59 
setZeroInternalBranches(int num_zero_len)60 void MExtTree::setZeroInternalBranches(int num_zero_len) {
61 	NodeVector nodes, nodes2;
62 	generateNNIBraches(nodes, nodes2);
63 	if (num_zero_len > nodes.size()) outError("The specified number of zero branches is too much");
64 	for (int i = 0; i < num_zero_len;) {
65 		int id = random_int(nodes.size());
66 		if (!nodes[id]) continue;
67 		i++;
68 		nodes[id]->findNeighbor(nodes2[id])->length = 0.0;
69 		nodes2[id]->findNeighbor(nodes[id])->length = 0.0;
70 		nodes[id] = NULL;
71 		nodes2[id] = NULL;
72 	}
73 }
74 
generateCaterpillar(int size)75 void MExtTree::generateCaterpillar(int size) {
76 	if (size < 3)
77 		outError(ERR_FEW_TAXA);
78 	root = newNode();
79 	int i;
80 	NodeVector myleaves;
81 	NodeVector innodes;
82 	Node *node;
83 	double len;
84 
85 	innodes.push_back(root);
86 	// create initial tree with 3 leaves
87 	for (i = 0; i < 3; i++)
88 	{
89 		node = newNode();
90 		len = random_double();
91 		root->addNeighbor(node, len);
92 		node->addNeighbor(root, len);
93 		myleaves.push_back(node);
94 	}
95 
96 	// additionally add a leaf
97 	for (i = 3; i < size; i++)
98 	{
99 		int index;
100 		index = i-1;
101 
102 		node = myleaves[index];
103 		innodes.push_back(node);
104 		// add the first leaf
105 		Node *newleaf = newNode();
106 		len = random_double();
107 		node->addNeighbor(newleaf, len);
108 		newleaf->addNeighbor(node, len);
109 		myleaves[index] = newleaf;
110 
111 		// add the second leaf
112 		newleaf = newNode();
113 		len = random_double();
114 		node->addNeighbor(newleaf, len);
115 		newleaf->addNeighbor(node, len);
116 		myleaves.push_back(newleaf);
117 
118 	}
119 
120 	root = myleaves[0];
121 	// indexing the leaves
122 	setLeavesName(myleaves);
123 
124 	leafNum = myleaves.size();
125 	nodeNum = leafNum;
126 	initializeTree();
127 
128 }
129 
130 
generateBalanced(int size)131 void MExtTree::generateBalanced(int size) {
132 	if (size < 3)
133 		outError(ERR_FEW_TAXA);
134 	root = newNode();
135 	int i;
136 	NodeVector myleaves;
137 	Node *node;
138 	double len;
139 
140 	myleaves.push_back(root);
141 	// create initial tree with 2 leaves
142 	node = newNode();
143 	len = random_double();
144 	root->addNeighbor(node, len);
145 	node->addNeighbor(root, len);
146 	myleaves.push_back(node);
147 
148 	while (myleaves.size() < size) {
149 
150 		int cur_size = myleaves.size();
151 		// additionally add a leaf
152 		for (i = 0; i < cur_size && myleaves.size() < size; i++)
153 		{
154 			int index = i;
155 
156 			node = myleaves[index];
157 			// add the first leaf
158 			Node *newleaf = newNode();
159 			len = random_double();
160 			node->addNeighbor(newleaf, len);
161 			newleaf->addNeighbor(node, len);
162 			myleaves[index] = newleaf;
163 
164 			// add the second leaf
165 			newleaf = newNode();
166 			len = random_double();
167 			node->addNeighbor(newleaf, len);
168 			newleaf->addNeighbor(node, len);
169 			myleaves.push_back(newleaf);
170 
171 		}
172 	}
173 
174 	root = myleaves[0];
175 	// indexing the leaves
176 	setLeavesName(myleaves);
177 
178 	leafNum = myleaves.size();
179 	nodeNum = leafNum;
180 	initializeTree();
181 
182 }
183 
184 /**
185 	generate a random tree following uniform model
186 */
generateUniform(int size,bool binary)187 void MExtTree::generateUniform(int size, bool binary)
188 {
189 	if (size < 3)
190 		outError(ERR_FEW_TAXA);
191 	int i;
192 
193 	// list of left- and right-end of branches
194 	NodeVector leftend, rightend, myleaves;
195 	Node *node;
196 	double len;
197 
198 	root = newNode(0, "0");
199 	// create initial tree with 2 leaves
200 	node = newNode(1, "1");
201 	len = random_double();
202 	root->addNeighbor(node, len);
203 	node->addNeighbor(root, len);
204 
205 	leftend.push_back(root);
206 	rightend.push_back(node);
207 
208 	myleaves.push_back(root);
209 	myleaves.push_back(node);
210 
211 	// additionally add a leaf
212 	for (i = 2; i < size; i++)
213 	{
214 		int index;
215 		index = random_int(2*i-3);
216 		//cout << "step " << i << " left = " << leftend[index]->id << " right = " << rightend[index]->id << endl;
217 
218 		// add an internal node
219 		Node *newnode = newNode(size+i-2);
220 		// reconnect the left end
221 		node = leftend[index];
222 		for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
223 			if ((*it)->node == rightend[index]) {
224 				len = random_double();
225 				(*it)->node = newnode;
226 				(*it)->length = len;
227 				newnode->addNeighbor(node, len);
228 				//cout << "  left " << leftend[index]->id << " " << newnode->id << endl;
229 				break;
230 			}
231 		// reconnect the right end
232 		node = rightend[index];
233 		for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
234 			if ((*it)->node == leftend[index]) {
235 				len = random_double();
236 				(*it)->node = newnode;
237 				(*it)->length = len;
238 				newnode->addNeighbor(node, len);
239 				//cout << "  right " << rightend[index]->id  << " " << newnode->id  << endl;
240 				break;
241 			}
242 
243 		// add a new leaf
244 		Node *newleaf = newNode(i, i);
245 		len = random_double();
246 		newnode->addNeighbor(newleaf, len);
247 		newleaf->addNeighbor(newnode, len);
248 
249 		// update the leftend and rightend list
250 		leftend.push_back(newnode);
251 		rightend.push_back(rightend[index]);
252 
253 		leftend.push_back(newnode);
254 		rightend.push_back(newleaf);
255 
256 		rightend[index] = newnode;
257 
258 		myleaves.push_back(newleaf);
259 
260 	}
261 
262 	// indexing the leaves
263 	setLeavesName(myleaves);
264 
265 	leafNum = size;
266 	nodeNum = leafNum;
267 	initializeTree();
268 
269 }
270 
271 /**
272 	generate a random tree following Yule Harding model
273 */
generateYuleHarding(Params & params,bool binary)274 void MExtTree::generateYuleHarding(Params &params, bool binary) {
275 	int size = params.sub_size;
276 	if (size < 3)
277 		outError(ERR_FEW_TAXA);
278 	root = newNode();
279 	int i;
280 	NodeVector myleaves;
281 	NodeVector innodes;
282 	Node *node;
283 	double len;
284 
285 	innodes.push_back(root);
286 	// create initial tree with 3 leaves
287 	for (i = 0; i < 3; i++) {
288 		node = newNode();
289 		len = randomLen(params);
290 		root->addNeighbor(node, len);
291 		node->addNeighbor(root, len);
292 		myleaves.push_back(node);
293 	}
294 
295 	// additionally add a leaf
296 	for (i = 3; i < size; i++)
297 	{
298 		int index;
299 		if (binary) {
300 			index = random_int(i);
301 		} else {
302  			index = random_int(i + innodes.size());
303 		}
304 		if (index < i) {
305 			node = myleaves[index];
306 			innodes.push_back(node);
307 			// add the first leaf
308 			Node *newleaf = newNode();
309 			len = randomLen(params);
310 			node->addNeighbor(newleaf, len);
311 			newleaf->addNeighbor(node, len);
312 			myleaves[index] = newleaf;
313 
314 			// add the second leaf
315 			newleaf = newNode();
316 			len = randomLen(params);
317 			node->addNeighbor(newleaf, len);
318 			newleaf->addNeighbor(node, len);
319 			myleaves.push_back(newleaf);
320 		}
321 		else {
322 			node = innodes[index-i];
323 			// add only 1 new leaf
324 			Node *newleaf = newNode();
325 			len = randomLen(params);
326 			node->addNeighbor(newleaf, len);
327 			newleaf->addNeighbor(node, len);
328 			myleaves.push_back(newleaf);
329 
330 		}
331 
332 	}
333 
334 	root = myleaves[0];
335 	// indexing the leaves
336 	setLeavesName(myleaves);
337 
338 	leafNum = myleaves.size();
339 	nodeNum = leafNum;
340 	initializeTree();
341 
342 
343 }
344 
generateConstrainedYuleHarding(Params & params,MTree * constraint_tree,StrVector & taxnames)345 void MExtTree::generateConstrainedYuleHarding(Params &params, MTree* constraint_tree, StrVector &taxnames) {
346 	int size = taxnames.size();
347 	if (size < 3)
348 		outError(ERR_FEW_TAXA);
349 	NodeVector myleaves;
350 	NodeVector innodes;
351     StrVector names;
352     StringIntMap namemap;
353     StrVector::iterator it;
354 
355     // copy constraint tree and resolve multifurcation
356     copyTree(constraint_tree);
357     resolveMultifurcation();
358 
359     getTaxa(myleaves);
360     getTaxaName(names);
361     for (it = names.begin(); it != names.end(); it++)
362         namemap[*it] = 1;
363 
364     // add the remaining taxa names
365     for (it = taxnames.begin(); it != taxnames.end(); it++)
366         if (namemap.find(*it) == namemap.end())
367             names.push_back(*it);
368     ASSERT(names.size() == taxnames.size());
369     my_random_shuffle(names.begin()+leafNum, names.end());
370 
371 	// additionally add a leaf
372 	for (; leafNum < size; leafNum++)
373 	{
374 		int index;
375 		index = random_int(leafNum);
376         Node *leaf = myleaves[index];
377         Node *dad = leaf->neighbors[0]->node;
378         // add the first leaf
379 
380         Node *newleaf = newNode(leafNum, names[leafNum].c_str());
381         Node *node = newNode();
382 
383         // redirect the current leaf
384         node->addNeighbor(leaf, -1.0);
385         leaf->updateNeighbor(dad, node);
386 
387         // add the new leaf
388         node->addNeighbor(newleaf, -1.0);
389         newleaf->addNeighbor(node, -1.0);
390 
391         // connect dad and new node
392         dad->updateNeighbor(leaf, node);
393         node->addNeighbor(dad, -1.0);
394 
395         myleaves.push_back(newleaf);
396 	}
397 
398     // assign random branch lengths
399     myleaves.clear();
400     innodes.clear();
401     getBranches(myleaves, innodes);
402     for (int i = 0; i < myleaves.size(); i++) {
403         double len = randomLen(params);
404         myleaves[i]->findNeighbor(innodes[i])->length = len;
405         innodes[i]->findNeighbor(myleaves[i])->length = len;
406     }
407 
408 
409 	nodeNum = leafNum;
410 	initializeTree();
411 
412 }
413 
generateStarTree(Params & params)414 void MExtTree::generateStarTree(Params &params) {
415 	generateYuleHarding(params);
416 	NodeVector nodes, nodes2;
417 	generateNNIBraches(nodes, nodes2);
418 	for (int i = 0; i < nodes.size(); i++) {
419 		nodes[i]->findNeighbor(nodes2[i])->length = 0.0;
420 		nodes2[i]->findNeighbor(nodes[i])->length = 0.0;
421 	}
422 
423 }
424 
generateRandomBranchLengths(Params & params,Node * node,Node * dad)425 void MExtTree::generateRandomBranchLengths(Params &params, Node *node, Node *dad) {
426 	if (!node) node = root;
427 	FOR_NEIGHBOR_IT(node, dad, it) {
428 		double len = randomLen(params);
429 		(*it)->length = len;
430 		(*it)->node->findNeighbor(node)->length = len;
431 		generateRandomBranchLengths(params, (*it)->node, node);
432 	}
433 }
434 
435 
setLeavesName(NodeVector & myleaves)436 void MExtTree::setLeavesName(NodeVector &myleaves) {
437 	for (int i = 0; i < myleaves.size(); i++)
438 	{
439 		myleaves[i]->id = i;
440 		stringstream str;
441 		str << 'T' << myleaves[i]->id;
442 		myleaves[i]->name = str.str();
443 	}
444 }
445 
446 
createCluster(NodeVector & taxa,mmatrix (int)& clusters,Node * node,Node * dad)447 void MExtTree::createCluster(NodeVector &taxa, mmatrix(int) &clusters, Node *node, Node *dad) {
448 	if (node == NULL) node = root;
449 	FOR_NEIGHBOR_IT(node, dad, it) {
450 		// if both end-nodes are bifurcating
451 		Node *child = (*it)->node;
452 		if (!child->isLeaf()) child->name = "";
453 		if (node->degree() == 3 && child->degree() == 3) {
454 			int count = 0;
455 			FOR_NEIGHBOR_DECLARE(child, node, it2)
456 				createCluster(count++, (*it2)->node, child);
457 			if (!rooted) {
458 				FOR_NEIGHBOR(node, child, it2)
459 					createCluster(count++, (*it2)->node, node);
460 			} else createCluster(count++, node, child);
461 
462 
463 			clusters.resize(clusters.size()+1);
464 			for (NodeVector::iterator nit = taxa.begin(); nit != taxa.end(); nit++) {
465 				clusters.back().push_back((int)((*nit)->height));
466 			}
467 			child->name = "";
468 			child->name += clusters.size();
469 		}
470 		createCluster(taxa, clusters, child, node);
471 	}
472 }
473 
createCluster(int clu_num,Node * node,Node * dad)474 void MExtTree::createCluster(int clu_num, Node *node, Node *dad) {
475 	if (node->isLeaf()) node->height = clu_num;
476 	FOR_NEIGHBOR_IT(node, dad, it) {
477 		createCluster(clu_num, (*it)->node, node);
478 	}
479 }
480 
481 
collapseLowBranchSupport(DoubleVector & minsup,Node * node,Node * dad)482 void MExtTree::collapseLowBranchSupport(DoubleVector &minsup, Node *node, Node *dad) {
483     if (!node) node = root;
484     FOR_NEIGHBOR_IT(node, dad, it) {
485         collapseLowBranchSupport(minsup, (*it)->node, node);
486     }
487     if (!node->isLeaf() && dad && node->name != "") {
488         DoubleVector vec;
489         convert_double_vec(node->name.c_str(), vec, '/');
490         if (vec.size() != minsup.size()) {
491             cout << "Branch with name " << node->name << " ignored" << endl;
492             return;
493         }
494         for (int i = 0; i < vec.size(); i++)
495             if (vec[i] < minsup[i]) {
496                 // support smaller than threshold, mark this branch for deletion
497                 dad->findNeighbor(node)->length = -1.0;
498                 node->findNeighbor(dad)->length = -1.0;
499                 break;
500             }
501     }
502 }
503