1 /***************************************************************************
2 * Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "mtree.h"
21 #include <iostream>
22 //#include <fstream>
23 #include <iterator>
24 //#include <mtree.h>
25 #include "pda/splitgraph.h"
26 #include "utils/tools.h"
27 #include "mtreeset.h"
28 using namespace std;
29
30 /*********************************************
31 class MTree
32 *********************************************/
33
MTree()34 MTree::MTree() {
35 root = NULL;
36 leafNum = 0;
37 nodeNum = 0;
38 rooted = false;
39 if (Params::getInstance().min_branch_length <= 0)
40 num_precision = 6;
41 else
42 num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
43 len_scale = 1.0;
44 fig_char = "|-+++";
45 }
46
MTree(const char * userTreeFile,bool & is_rooted)47 MTree::MTree(const char *userTreeFile, bool &is_rooted)
48 {
49 init(userTreeFile, is_rooted);
50 }
51
init(const char * userTreeFile,bool & is_rooted)52 void MTree::init(const char *userTreeFile, bool &is_rooted) {
53 if (Params::getInstance().min_branch_length <= 0)
54 num_precision = 6;
55 else
56 num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
57 len_scale = 1.0;
58 readTree(userTreeFile, is_rooted);
59 //printInfo();
60 fig_char = "|-+++";
61 }
62
63
64 /**
65 constructor, get from another tree
66 */
MTree(MTree & tree)67 MTree::MTree(MTree &tree) {
68 init(tree);
69 }
70
MTree(string & treeString,vector<string> & taxaNames,bool isRooted)71 MTree::MTree(string& treeString, vector<string>& taxaNames, bool isRooted) {
72 stringstream str;
73 str << treeString;
74 str.seekg(0, ios::beg);
75 readTree(str, isRooted);
76 assignIDs(taxaNames);
77 assignLeafID();
78 }
79
MTree(string & treeString,bool isRooted)80 MTree::MTree(string& treeString, bool isRooted) {
81 stringstream str;
82 str << treeString;
83 str.seekg(0, ios::beg);
84 readTree(str, isRooted);
85 assignLeafID();
86 }
87
init(MTree & tree)88 void MTree::init(MTree &tree) {
89 root = tree.root;
90 leafNum = tree.leafNum;
91 nodeNum = tree.nodeNum;
92 rooted = tree.rooted;
93 //userFile = tree.userFile;
94 // have to delete the root when exchange to another object
95 tree.root = NULL;
96 num_precision = tree.num_precision;
97 len_scale = tree.len_scale;
98 fig_char = tree.fig_char;
99 }
100
assignIDs(vector<string> & taxaNames)101 void MTree::assignIDs(vector<string>& taxaNames) {
102 bool err = false;
103 int nseq = taxaNames.size();
104 for (int seq = 0; seq < nseq; seq++) {
105 string seq_name = taxaNames[seq];
106 Node *node = findLeafName(seq_name);
107 if (!node) {
108 string str = "Sequence ";
109 str += seq_name;
110 str += " does not appear in the tree";
111 err = true;
112 outError(str, false);
113 } else {
114 ASSERT(node->isLeaf());
115 node->id = seq;
116 }
117 }
118 StrVector taxname;
119 getTaxaName(taxname);
120 for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
121 bool foundTaxa = false;
122 for (vector<string>::iterator it2 = taxaNames.begin(); it2 != taxaNames.end(); it2++) {
123 if ( *it == *it2 ) {
124 foundTaxa = true;
125 break;
126 }
127 }
128 if (!foundTaxa) {
129 outError((string) "Tree taxon " + (*it) + " does not appear in the input taxa names", false);
130 err = true;
131 }
132 }
133 if (err) outError("Tree taxa and input taxa names do not match (see above)");
134 }
135
copyTree(MTree * tree)136 void MTree::copyTree(MTree *tree) {
137 if (root) freeNode();
138 stringstream ss;
139 tree->printTree(ss);
140 readTree(ss, tree->rooted);
141 rooted = tree->rooted;
142 }
143
copyTree(MTree * tree,string & taxa_set)144 void MTree::copyTree(MTree *tree, string &taxa_set) {
145 rooted = tree->rooted;
146 if (rooted) {
147 ASSERT(tree->rooted);
148 taxa_set.push_back(1);
149 }
150 // if (tree->leafNum != taxa_set.length())
151 // outError("#leaves and taxa_set do not match!");
152 leafNum = nodeNum = branchNum = 0;
153 for (string::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
154 nodeNum += (*it);
155 double new_len;
156 if (root) freeNode();
157 root = NULL;
158 root = copyTree(tree, taxa_set, new_len);
159 if (rooted)
160 ASSERT(root->name == ROOT_NAME);
161 }
162
copyTree(MTree * tree,string & taxa_set,double & len,Node * node,Node * dad)163 Node* MTree::copyTree(MTree *tree, string &taxa_set, double &len, Node *node, Node *dad) {
164 if (!node) {
165 if (taxa_set[tree->root->id]) {
166 node = tree->root;
167 } else {
168 for (int i = 0; i < tree->leafNum; i++)
169 if (taxa_set[i]) {
170 node = tree->findNodeID(i);
171 break;
172 }
173 }
174 }
175 Node *new_node = NULL;
176 NodeVector new_nodes;
177 DoubleVector new_lens;
178 if (node->isLeaf()) {
179 len = 0.0;
180 if (taxa_set[node->id]) {
181 new_node = newNode(leafNum++, node->name.c_str());
182 }
183 if (dad) return new_node;
184 }
185 if (new_node) {
186 new_nodes.push_back(new_node);
187 new_lens.push_back(len);
188 }
189 FOR_NEIGHBOR_IT(node, dad, it) {
190 double new_len;
191 new_node = copyTree(tree, taxa_set, new_len, (*it)->node, node);
192 if (new_node) {
193 new_nodes.push_back(new_node);
194 new_lens.push_back((*it)->length + new_len);
195 }
196 }
197 if (new_nodes.empty()) return NULL;
198 if (new_nodes.size() == 1) {
199 len = new_lens[0];
200 return new_nodes[0];
201 }
202 if (!dad && new_nodes.size() == 2) {
203 double sum_len = new_lens[0] + new_lens[1];
204 new_nodes[0]->addNeighbor(new_nodes[1], sum_len, branchNum);
205 new_nodes[1]->addNeighbor(new_nodes[0], sum_len, branchNum);
206 branchNum++;
207 return new_nodes[0];
208 }
209 Node* int_node = newNode(nodeNum++, node->name.c_str());
210 len = 0.0;
211 for (int i = 0; i < new_nodes.size(); i++) {
212 int_node->addNeighbor(new_nodes[i], new_lens[i], branchNum);
213 new_nodes[i]->addNeighbor(int_node, new_lens[i], branchNum);
214 branchNum++;
215 }
216 return int_node;
217 }
218
extractBifurcatingSubTree(Node * node,Node * dad)219 void MTree::extractBifurcatingSubTree(Node *node, Node *dad) {
220 if (!node) node = root;
221 if (node->degree() > 3) {
222 int id1, id2, id3;
223 id1 = node->findNeighborIt(dad) - node->neighbors.begin();
224 do {
225 id2 = random_int(node->degree());
226 } while (id2 == id1);
227
228 // make sure that id1 < id2
229 if (id1 > id2) {
230 int tmp = id1;
231 id1 = id2;
232 id2 = tmp;
233 }
234 do {
235 id3 = random_int(node->degree());
236 } while (id3 == id1 || id3 == id2);
237 //make sure that id1 < id2 < id3
238 if (id3 < id2) {
239 if (id3 < id1) {
240 // id3 < id1 < id2
241 int tmp = id1;
242 id1 = id3;
243 id3 = id2;
244 id2 = tmp;
245 } else {
246 // id1 < id3 < id2
247 int tmp = id2;
248 id2 = id3;
249 id3 = tmp;
250 }
251 }
252 // remove all neighbors except id1, id2, id3
253 for (int i = 0; i != node->neighbors.size(); i++)
254 if (i != id1 && i != id2 && i != id3) {
255 freeNode(node->neighbors[i]->node, node);
256 delete node->neighbors[i];
257 }
258 node->neighbors[0] = node->neighbors[id1];
259 node->neighbors[1] = node->neighbors[id2];
260 node->neighbors[2] = node->neighbors[id3];
261 node->neighbors.erase(node->neighbors.begin()+3, node->neighbors.end());
262 }
263 FOR_NEIGHBOR_IT(node, dad, it) {
264 if (!(*it)->node->isLeaf())
265 extractBifurcatingSubTree((*it)->node, node);
266 }
267 }
268
resolveMultifurcation()269 void MTree::resolveMultifurcation() {
270 // randomly resolve multifurcating node
271
272 NodeVector nodes;
273 getInternalNodes(nodes);
274 for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++)
275 while ((*it)->degree() > 3) {
276 Node *new_node = newNode();
277 int id1 = random_int((*it)->degree());
278 int id2;
279 do {
280 id2 = random_int((*it)->degree());
281 } while (id2 == id1);
282
283 // make sure that id1 < id2
284 if (id1 > id2) {
285 int tmp = id1;
286 id1 = id2;
287 id2 = tmp;
288 }
289 Neighbor *nei1 = (*it)->neighbors[id1];
290 Neighbor *nei2 = (*it)->neighbors[id2];
291
292 // connect id1 with new_node
293 nei1->node->updateNeighbor((*it), new_node);
294 new_node->neighbors.push_back(nei1);
295
296 // connect id2 with new_node
297 nei2->node->updateNeighbor((*it), new_node);
298 new_node->neighbors.push_back(nei2);
299
300 // connect new_node with old node
301 new_node->addNeighbor((*it), -1.0);
302 (*it)->neighbors.erase((*it)->neighbors.begin() + id2);
303 (*it)->neighbors.erase((*it)->neighbors.begin() + id1);
304 (*it)->addNeighbor(new_node, -1.0);
305 }
306 }
307
newNode(int node_id,const char * node_name)308 Node* MTree::newNode(int node_id, const char* node_name) {
309 return new Node(node_id, node_name);
310 }
311
newNode(int node_id,int node_name)312 Node* MTree::newNode(int node_id, int node_name) {
313 return new Node(node_id, node_name);
314 }
315
isBifurcating(Node * node,Node * dad)316 bool MTree::isBifurcating(Node *node, Node *dad) {
317 if (!node) node = root;
318 if (!node->isLeaf() && node->degree() != 3) return false;
319 FOR_NEIGHBOR_IT(node, dad, it) {
320 if (!(*it)->node->isLeaf() && (*it)->node->degree() != 3) return false;
321 if (!isBifurcating((*it)->node, node)) return false;
322 }
323 return true;
324 }
325
printBranchLengths(ostream & out,Node * node,Node * dad)326 void MTree::printBranchLengths(ostream &out, Node *node, Node *dad)
327 {
328 if (node == NULL) {
329 node = root;
330 sortTaxa();
331 }
332 FOR_NEIGHBOR_IT(node, dad, it) {
333 if (node->name != "") out << node->name; else out << node->id;
334 out << "\t";
335 if ((*it)->node->name != "") out << (*it)->node->name; else out << (*it)->node->id;
336 out << "\t" << (*it)->length << endl;
337 printBranchLengths(out, (*it)->node, node);
338 }
339 }
340
countZeroBranches(Node * node,Node * dad,double epsilon)341 int MTree::countZeroBranches(Node *node, Node *dad, double epsilon) {
342 int count = 0;
343 if (node == NULL) node = root;
344 FOR_NEIGHBOR_IT(node, dad, it) {
345 if ((*it)->length <= epsilon) count++;
346 count += countZeroBranches((*it)->node, node, epsilon);
347 }
348 return count;
349 }
350
countZeroInternalBranches(Node * node,Node * dad,double epsilon)351 int MTree::countZeroInternalBranches(Node *node, Node *dad, double epsilon) {
352 int count = 0;
353 if (node == NULL) node = root;
354 FOR_NEIGHBOR_IT(node, dad, it) {
355 if ((*it)->length <= epsilon && !(*it)->node->isLeaf() && !node->isLeaf()) count++;
356 count += countZeroInternalBranches((*it)->node, node, epsilon);
357 }
358 return count;
359
360 }
361
countLongBranches(Node * node,Node * dad,double upper_limit)362 int MTree::countLongBranches(Node *node, Node *dad, double upper_limit) {
363 int count = 0;
364 if (node == NULL) node = root;
365 FOR_NEIGHBOR_IT(node, dad, it) {
366 if ((*it)->length >= upper_limit) count++;
367 count += countLongBranches((*it)->node, node, upper_limit);
368 }
369 return count;
370 }
371
372
printTree(const char * ofile,int brtype)373 void MTree::printTree(const char *ofile, int brtype)
374 {
375 try {
376 ofstream out;
377 out.exceptions(ios::failbit | ios::badbit);
378 if (brtype & WT_APPEND)
379 out.open(ofile, ios_base::out | ios_base::app);
380 else
381 out.open(ofile);
382 printTree(out, brtype);
383 out.close();
384 if (verbose_mode >= VB_DEBUG)
385 cout << "Tree was printed to " << ofile << endl;
386 } catch (ios::failure) {
387 outError(ERR_WRITE_OUTPUT, ofile);
388 }
389 }
390
printNexus(string ofile,int brtype,string nexus_comment)391 void MTree::printNexus(string ofile, int brtype, string nexus_comment)
392 {
393 try {
394 ofstream out;
395 out.exceptions(ios::failbit | ios::badbit);
396 if (brtype & WT_APPEND)
397 out.open(ofile, ios_base::out | ios_base::app);
398 else
399 out.open(ofile);
400 out << "#NEXUS" << endl;
401 if (!nexus_comment.empty())
402 out << "[ " << nexus_comment << " ]" << endl;
403 out << "begin trees;" << endl;
404 out << " tree tree_1 = ";
405 printTree(out, brtype | WT_BR_ATTR);
406 out << endl;
407 out << "end;" << endl;
408 out.close();
409 if (verbose_mode >= VB_DEBUG)
410 cout << "Tree was printed to " << ofile << endl;
411 } catch (ios::failure) {
412 outError(ERR_WRITE_OUTPUT, ofile);
413 }
414 }
415
printTree(ostream & out,int brtype)416 void MTree::printTree(ostream &out, int brtype) {
417 if (root->isLeaf()) {
418 if (root->neighbors[0]->node->isLeaf()) {
419 // tree has only 2 taxa!
420 out << "(";
421 printTree(out, brtype, root);
422 out << ",";
423 if (brtype & WT_TAXON_ID)
424 out << root->neighbors[0]->node->id;
425 else
426 out << root->neighbors[0]->node->name;
427
428 if (brtype & WT_BR_LEN)
429 out << ":0";
430 out << ")";
431 } else
432 // tree has more than 2 taxa
433 printTree(out, brtype, root->neighbors[0]->node);
434 } else
435 printTree(out, brtype, root);
436
437 out << ";";
438 if (brtype & WT_NEWLINE) out << endl;
439 }
440
441 struct IntString {
442 int id;
443 string str;
444 };
445
446 /**
447 nodecmp, for pruning algorithm
448 */
449 struct IntStringCmp
450 {
451 /**
452 nodecmp, for pruning algorithm
453 */
operator ()IntStringCmp454 bool operator()(const IntString* s1, const IntString* s2) const
455 {
456 return (s1->id) < (s2->id);
457 }
458 };
459
460 typedef set<IntString*, IntStringCmp> IntStringSet;
461
printBranchLength(ostream & out,int brtype,bool print_slash,Neighbor * length_nei)462 void MTree::printBranchLength(ostream &out, int brtype, bool print_slash, Neighbor *length_nei) {
463 if (length_nei->length == -1.0)
464 return; // NA branch length
465 int prec = 10;
466 double length = length_nei->length;
467 if (brtype & WT_BR_SCALE) length *= len_scale;
468 if (brtype & WT_BR_LEN_SHORT) prec = 6;
469 if (brtype & WT_BR_LEN_ROUNDING) length = round(length);
470 out.precision(prec);
471 if ((brtype & WT_BR_ATTR) && !length_nei->attributes.empty()) {
472 // print branch attributes
473 out << "[&";
474 bool first = true;
475 for (auto attr : length_nei->attributes) {
476 if (!first)
477 out << ",";
478 out << attr.first << "=\"" << attr.second << '"';
479 first = false;
480 }
481 out << "]";
482 }
483
484 if (brtype & WT_BR_LEN) {
485 if (brtype & WT_BR_LEN_FIXED_WIDTH)
486 out << ":" << fixed << length;
487 else
488 out << ":" << length;
489 } else if (brtype & WT_BR_CLADE && length_nei->node->name != ROOT_NAME) {
490 if (print_slash)
491 out << "/";
492 out << length;
493 }
494 }
495
printTree(ostream & out,int brtype,Node * node,Node * dad)496 int MTree::printTree(ostream &out, int brtype, Node *node, Node *dad)
497 {
498 int smallest_taxid = leafNum;
499 out.precision(num_precision);
500 if (!node) node = root;
501 if (node->isLeaf()) {
502 smallest_taxid = node->id;
503 if (brtype & WT_TAXON_ID)
504 out << node->id;
505 else
506 out << node->name;
507
508 if (brtype & WT_BR_LEN) {
509 out.setf( std::ios::fixed, std:: ios::floatfield ); // some sofware does handle number format like '1.234e-6'
510 // out.precision(10); // increase precision to avoid zero branch (like in RAxML)
511 printBranchLength(out, brtype, false, node->neighbors[0]);
512 // double len = node->neighbors[0]->length;
513 // if (brtype & WT_BR_SCALE) len *= len_scale;
514 // if (brtype & WT_BR_LEN_ROUNDING) len = round(len);
515 // if (brtype & WT_BR_LEN_FIXED_WIDTH)
516 // out << ":" << fixed << len;
517 // else
518 // out << ":" << len;
519 }
520 } else {
521 // internal node
522 out << "(";
523 bool first = true;
524 Neighbor *length_nei = NULL;
525 //for (int i = 0; i < node->neighbors.size(); i++)
526 //if (node->neighbors[i]->node != dad)
527 if (! (brtype & WT_SORT_TAXA)) {
528 FOR_NEIGHBOR_IT(node, dad, it) {
529 if ((*it)->node->name != ROOT_NAME) {
530 if (!first)
531 out << ",";
532 int taxid = printTree(out, brtype, (*it)->node, node);
533 if (taxid < smallest_taxid) smallest_taxid = taxid;
534 first = false;
535 } else
536 length_nei = (*it);
537 } else {
538 length_nei = (*it);
539 }
540 } else {
541 IntStringSet strout;
542 FOR_NEIGHBOR_IT(node, dad, it) {
543 if ((*it)->node->name != ROOT_NAME) {
544 ostringstream ss;
545 IntString *str = new IntString;
546 str->id = printTree(ss, brtype, (*it)->node, node);
547 //ss.flush();
548 str->str = ss.str();
549 strout.insert(str);
550 } else
551 length_nei = (*it);
552 } else {
553 length_nei = (*it);
554 }
555 smallest_taxid = (*strout.begin())->id;
556 IntStringSet::iterator iss;
557 for (iss = strout.begin(); iss != strout.end(); iss++) {
558 if (!first) out << ",";
559 out << (*iss)->str;
560 first = false;
561 }
562 for (iss = strout.begin(); iss != strout.end(); iss++)
563 delete (*iss);
564 }
565 out << ")";
566 if (brtype & WT_INT_NODE)
567 out << node->id;
568 else if (!node->name.empty() && (brtype & WT_BR_ATTR) == 0)
569 out << node->name;
570 if (dad != NULL || length_nei) {
571 printBranchLength(out, brtype, !node->name.empty(), length_nei);
572 }
573 }
574 return smallest_taxid;
575 }
576
577
printSubTree(ostream & out,NodeVector & subtree)578 void MTree::printSubTree(ostream &out, NodeVector &subtree) {
579 if (root->isLeaf())
580 printSubTree(out, subtree, root->neighbors[0]->node);
581 else
582 printSubTree(out, subtree, root);
583 out << ";";
584 }
585
printSubTree(ostream & out,NodeVector & subtree,Node * node,Node * dad)586 void MTree::printSubTree(ostream &out, NodeVector &subtree, Node *node, Node *dad) {
587 if (!node) node = root;
588
589 NeighborVec::iterator it;
590 double length = 0.0, dad_length = 0.0;
591 // go down if only 1 child available
592 Node *child = NULL;
593 int degree;
594 do {
595 degree = 0;
596 FOR_NEIGHBOR(node, dad, it) {
597 if (subtree[(*it)->node->id] != NULL) {
598 degree++;
599 child = (*it)->node;
600 }
601 } else dad_length = (*it)->length;
602
603 if (degree == 1) {
604 dad = node;
605 node = child;
606 length += dad_length;
607 }
608 } while (degree == 1 && !node->isLeaf());
609
610 if (node->isLeaf())
611 out << node->name << ":" << node->neighbors[0]->length + length;
612 else
613 {
614 // internal node
615 out << "(";
616 bool first = true;
617
618 FOR_NEIGHBOR(node, dad, it) {
619 if (subtree[(*it)->node->id] != NULL) {
620 if ((*it)->node->name != ROOT_NAME) {
621 if (!first)
622 out << ",";
623 printSubTree(out, subtree, (*it)->node, node);
624 first = false;
625 } else
626 length += (*it)->length;
627 }
628 } else {
629 length += (*it)->length;
630 }
631 out << ")";
632 if (!node->name.empty())
633 out << node->name;
634 if (dad != NULL || length > 1e-20)
635 out << ":" << length;
636 }
637 }
638
639
printTaxa(const char * ofile)640 void MTree::printTaxa(const char *ofile)
641 {
642 try {
643 ofstream out;
644 out.exceptions(ios::failbit | ios::badbit);
645 out.open(ofile);
646 if (root->isLeaf())
647 printTaxa(out, root->neighbors[0]->node);
648 else
649 printTaxa(out);
650 out.close();
651 cout << "Taxa list was printed to " << ofile << endl;
652 } catch (ios::failure) {
653 outError(ERR_WRITE_OUTPUT, ofile);
654 }
655 }
656
printTaxa(ostream & out,Node * node,Node * dad)657 void MTree::printTaxa(ostream &out, Node *node, Node *dad)
658 {
659 if (!node) node = root;
660 if (node->isLeaf())
661 out << node->name << endl;
662 else
663 {
664 // internal node
665 //for (int i = 0; i < node->neighbors.size(); i++)
666 //if (node->neighbors[i]->node != dad)
667 FOR_NEIGHBOR_IT(node, dad, it) {
668 printTaxa(out, (*it)->node, node);
669 }
670 }
671 }
672
printTaxa(ostream & out,NodeVector & subtree)673 void MTree::printTaxa(ostream &out, NodeVector &subtree) {
674 for (int i = 0; i < leafNum; i++)
675 if (subtree[i] != NULL) {
676 out << subtree[i]->name << endl;
677 }
678 }
679
readTree(const char * infile,bool & is_rooted)680 void MTree::readTree(const char *infile, bool &is_rooted) {
681 ifstream in;
682 try {
683 in.exceptions(ios::failbit | ios::badbit);
684 in.open(infile);
685 readTree(in, is_rooted);
686 in.close();
687 } catch (ios::failure) {
688 outError(ERR_READ_INPUT, infile);
689 }
690
691 rooted = is_rooted;
692
693 if (verbose_mode >= VB_MED)
694 cout << "Tree contains " << leafNum - is_rooted <<
695 " taxa and " << nodeNum-1-is_rooted << " branches" << (is_rooted ? " (rooted)" : "") << endl;
696 }
697
698 /*
699 void MTree::readTreeString(string tree_string, bool is_rooted) {
700 stringstream str;
701 str << tree_string;
702 str.seekg(0, ios::beg);
703 freeNode();
704 readTree(str, is_rooted);
705 }
706 */
707
708
readTree(istream & in,bool & is_rooted)709 void MTree::readTree(istream &in, bool &is_rooted)
710 {
711 in_line = 1;
712 in_column = 1;
713 in_comment = "";
714 try {
715 char ch;
716 ch = readNextChar(in);
717 if (ch != '(') {
718 cout << in.rdbuf() << endl;
719 throw "Tree file does not start with an opening-bracket '('";
720 }
721
722 leafNum = 0;
723
724 DoubleVector branch_len;
725 Node *node;
726 parseFile(in, ch, node, branch_len);
727 // 2018-01-05: assuming rooted tree if root node has two children
728 if (is_rooted || (!branch_len.empty() && branch_len[0] != 0.0) || node->degree() == 2) {
729 if (branch_len.empty())
730 branch_len.push_back(-1.0);
731 if (branch_len[0] == -1.0) branch_len[0] = 0.0;
732 if (branch_len[0] < 0.0)
733 throw ERR_NEG_BRANCH;
734 rooted = is_rooted = true;
735 root = newNode(leafNum, ROOT_NAME);
736 root->addNeighbor(node, branch_len);
737 node->addNeighbor(root, branch_len);
738 leafNum++;
739 rooted = true;
740 } else { // assign root to one of the neighbor of node, if any
741 FOR_NEIGHBOR_IT(node, NULL, it)
742 if ((*it)->node->isLeaf()) {
743 root = (*it)->node;
744 break;
745 }
746 }
747 // make sure that root is a leaf
748 ASSERT(root->isLeaf());
749
750 if (in.eof() || ch != ';')
751 throw "Tree file must be ended with a semi-colon ';'";
752 } catch (bad_alloc) {
753 outError(ERR_NO_MEMORY);
754 } catch (const char *str) {
755 outError(str, reportInputInfo());
756 } catch (string str) {
757 outError(str.c_str(), reportInputInfo());
758 } catch (ios::failure) {
759 outError(ERR_READ_INPUT, reportInputInfo());
760 } catch (...) {
761 // anything else
762 outError(ERR_READ_ANY, reportInputInfo());
763 }
764
765 nodeNum = leafNum;
766 initializeTree();
767
768 //bool stop = false;
769 //checkValidTree(stop);
770 }
771
initializeTree(Node * node,Node * dad)772 void MTree::initializeTree(Node *node, Node* dad)
773 {
774 if (!node) {
775 node = root;
776 nodeNum = leafNum;
777 branchNum = 0;
778 }
779 if (!node->isLeaf())
780 {
781 node->id = nodeNum;
782 nodeNum++;
783 //node->name = node->id;
784
785 }
786 //for (int i = 0; i < node->neighbors.size(); i++)
787 //if (node->neighbors[i]->node != dad)
788 FOR_NEIGHBOR_IT(node, dad, it) {
789 (*it)->id = branchNum;
790 (*it)->node->findNeighbor(node)->id = branchNum;
791 branchNum++;
792 initializeTree((*it)->node, node);
793 }
794 }
795
parseBranchLength(string & lenstr,DoubleVector & branch_len)796 void MTree::parseBranchLength(string &lenstr, DoubleVector &branch_len) {
797 // branch_len.push_back(convert_double(lenstr.c_str()));
798 double len = convert_double(lenstr.c_str());
799 if (in_comment.empty()) {
800 branch_len.push_back(len);
801 return;
802 }
803 convert_double_vec(in_comment.c_str(), branch_len, BRANCH_LENGTH_SEPARATOR);
804 // char* str = (char*)in_comment.c_str() + 1;
805 // int pos;
806 // for (int i = 1; str[0] == 'L'; i++) {
807 // str++;
808 // int id = convert_int(str, pos);
809 // if (id != i)
810 // throw "Wrong ID in " + string(str);
811 // if (str[pos] != '=')
812 // throw "= is expected in " + string(str);
813 // str += pos+1;
814 // double val = convert_double(str, pos);
815 // branch_len.push_back(val);
816 // if (str[pos] == ',') {
817 // str += pos+1;
818 // continue;
819 // } else
820 // break;
821 // }
822 }
823
824
parseFile(istream & infile,char & ch,Node * & root,DoubleVector & branch_len)825 void MTree::parseFile(istream &infile, char &ch, Node* &root, DoubleVector &branch_len)
826 {
827 Node *node;
828 int maxlen = 1000;
829 string seqname;
830 int seqlen;
831 DoubleVector brlen;
832 branch_len.clear();
833
834 root = newNode();
835
836 if (ch == '(') {
837 // internal node
838 ch = readNextChar(infile);
839 while (ch != ')' && !infile.eof())
840 {
841 node = NULL;
842 parseFile(infile, ch, node, brlen);
843 //if (brlen == -1.0)
844 //throw "Found branch with no length.";
845 //if (brlen < 0.0)
846 //throw ERR_NEG_BRANCH;
847 root->addNeighbor(node, brlen);
848 node->addNeighbor(root, brlen);
849 if (infile.eof())
850 throw "Expecting ')', but end of file instead";
851 if (ch == ',')
852 ch = readNextChar(infile);
853 else if (ch != ')') {
854 string err = "Expecting ')', but found '";
855 err += ch;
856 err += "' instead";
857 throw err;
858 }
859 }
860 if (!infile.eof()) ch = readNextChar(infile);
861 }
862 // now read the node name
863 seqlen = 0;
864 char end_ch = 0;
865 if (ch == '\'' || ch == '"') end_ch = ch;
866 seqname = "";
867
868 while (!infile.eof() && seqlen < maxlen)
869 {
870 if (end_ch == 0) {
871 if (is_newick_token(ch) || controlchar(ch)) break;
872 }
873 seqname += ch;
874 seqlen++;
875 // seqname[seqlen++] = ch;
876 ch = infile.get();
877 in_column++;
878 if (end_ch != 0 && ch == end_ch) {
879 seqname += ch;
880 seqlen++;
881 // seqname[seqlen++] = ch;
882 break;
883 }
884 }
885 if ((controlchar(ch) || ch == '[' || ch == end_ch) && !infile.eof())
886 ch = readNextChar(infile, ch);
887 if (seqlen == maxlen)
888 throw "Too long name ( > 1000)";
889 if (root->isLeaf())
890 renameString(seqname);
891 // seqname[seqlen] = 0;
892 if (seqlen == 0 && root->isLeaf())
893 throw "Redundant double-bracket ‘((…))’ with closing bracket ending at";
894 if (seqlen > 0)
895 root->name.append(seqname);
896 if (root->isLeaf()) {
897 // is a leaf, assign its ID
898 root->id = leafNum;
899 if (leafNum == 0)
900 MTree::root = root;
901 leafNum++;
902 }
903
904 if (ch == ';' || infile.eof())
905 return;
906 if (ch == ':')
907 {
908 string saved_comment = in_comment;
909 ch = readNextChar(infile);
910 if (in_comment.empty())
911 in_comment = saved_comment;
912 seqlen = 0;
913 seqname = "";
914 while (!is_newick_token(ch) && !controlchar(ch) && !infile.eof() && seqlen < maxlen)
915 {
916 // seqname[seqlen] = ch;
917 seqname += ch;
918 seqlen++;
919 ch = infile.get();
920 in_column++;
921 }
922 if ((controlchar(ch) || ch == '[') && !infile.eof())
923 ch = readNextChar(infile, ch);
924 if (seqlen == maxlen || infile.eof())
925 throw "branch length format error.";
926 // seqname[seqlen] = 0;
927 parseBranchLength(seqname, branch_len);
928 // convert_double_vec(seqname.c_str(), branch_len, BRANCH_LENGTH_SEPARATOR);
929 }
930 }
931
932
933
934 /**
935 check tree is bifurcating tree (every leaf with level 1 or 3)
936 */
checkValidTree(bool & stop,Node * node,Node * dad)937 void MTree::checkValidTree(bool &stop, Node *node, Node *dad)
938 {
939 if (!node) node = root;
940 if (node->degree() != 1 && node->degree() != 3) {
941 cout << "Tree is not bifurcating." << endl;
942 stop = true;
943 return;
944 }
945 //for (int i = 0; i < node->neighbors.size(); i++)
946 //if (node->neighbors[i]->node != dad) {
947 FOR_NEIGHBOR_IT(node, dad, it) {
948 checkValidTree(stop, (*it)->node, node);
949 if (stop)
950 return;
951 }
952 }
953
treeLength(Node * node,Node * dad)954 double MTree::treeLength(Node *node, Node *dad)
955 {
956 if (!node) node = root;
957 double sum = 0;
958 FOR_NEIGHBOR_IT(node, dad, it) {
959 sum += (*it)->length + treeLength((*it)->node, node);
960 }
961 return sum;
962 }
963
treeLengthInternal(double epsilon,Node * node,Node * dad)964 double MTree::treeLengthInternal( double epsilon, Node *node, Node *dad)
965 {
966 if (!node) node = root;
967 double sum = 0;
968 FOR_NEIGHBOR_IT(node, dad, it) {
969 if (!(*it)->node->isLeaf() && !node->isLeaf())
970 {
971 if (treeLength((*it)->node, node) > epsilon) {
972 sum += (*it)->length + treeLengthInternal(epsilon, (*it)->node, node);
973 }
974 }
975 else {
976 if (treeLength((*it)->node, node) > epsilon) {
977 sum += treeLengthInternal(epsilon, (*it)->node, node);
978 }
979 }
980 }
981 return sum;
982 }
983
treeDepth(Node * node,Node * dad)984 double MTree::treeDepth(Node *node, Node *dad)
985 {
986 if (!node) node = root;
987 double maxsum = 0.0;
988 FOR_NEIGHBOR_IT(node, dad, it) {
989 double len = (*it)->length;
990 if (len < 0.0) len = 0.0;
991 double sum = len + treeDepth((*it)->node, node);
992 if (sum > maxsum) maxsum = sum;
993 }
994 return maxsum;
995 }
996
getNonCherryLeaves(NodeVector & noncherry,NodeVector & cherry,Node * node,Node * dad)997 void MTree::getNonCherryLeaves(NodeVector &noncherry, NodeVector &cherry, Node *node, Node *dad) {
998 if (!node) node = root;
999 if (node->isLeaf()) {
1000 if (node->isInCherry()) {
1001 cherry.push_back(node);
1002 } else {
1003 noncherry.push_back(node);
1004 }
1005 }
1006 FOR_NEIGHBOR_IT(node, dad, it) {
1007 getNonCherryLeaves(noncherry, cherry, (*it)->node, node);
1008 }
1009 }
1010
getTaxa(NodeVector & taxa,Node * node,Node * dad)1011 void MTree::getTaxa(NodeVector &taxa, Node *node, Node *dad) {
1012 if (!node) node = root;
1013 if (node->isLeaf()) {
1014 taxa.push_back(node);
1015 }
1016 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1017 //if ((*it)->node != dad) {
1018 FOR_NEIGHBOR_IT(node, dad, it) {
1019 getTaxa(taxa, (*it)->node, node);
1020 }
1021 }
1022
getAllNodesInSubtree(Node * node,Node * dad,NodeVector & nodeList)1023 void MTree::getAllNodesInSubtree(Node *node, Node *dad, NodeVector &nodeList) {
1024 ASSERT(node);
1025 nodeList.push_back(node);
1026 if (node->isLeaf()) {
1027 return;
1028 }
1029 FOR_NEIGHBOR_IT(node, dad, it) {
1030 getAllNodesInSubtree((*it)->node, node, nodeList);
1031 }
1032 }
1033
getNumTaxa(Node * node,Node * dad)1034 int MTree::getNumTaxa(Node *node, Node *dad) {
1035 int numLeaf = 0;
1036 if (!node) {
1037 node = root;
1038 numLeaf = 1;
1039 } else {
1040 if (node->isLeaf()) {
1041 return 1;
1042 }
1043 }
1044
1045 FOR_NEIGHBOR_IT(node, dad, it) {
1046 numLeaf += getNumTaxa((*it)->node, node);
1047 }
1048 return numLeaf;
1049 }
1050
getInternalNodes(NodeVector & nodes,Node * node,Node * dad)1051 void MTree::getInternalNodes(NodeVector &nodes, Node *node, Node *dad) {
1052 if (!node) node = root;
1053 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1054 //if ((*it)->node != dad) {
1055 FOR_NEIGHBOR_IT(node, dad, it)
1056 if (!(*it)->node->isLeaf()) {
1057 getInternalNodes(nodes, (*it)->node, node);
1058 nodes.push_back((*it)->node);
1059 }
1060 }
1061
getMultifurcatingNodes(NodeVector & nodes,Node * node,Node * dad)1062 void MTree::getMultifurcatingNodes(NodeVector &nodes, Node *node, Node *dad) {
1063 if (!node) node = root;
1064 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1065 //if ((*it)->node != dad) {
1066 FOR_NEIGHBOR_IT(node, dad, it)
1067 if (!(*it)->node->isLeaf()) {
1068 if ((*it)->node->degree() > 3)
1069 nodes.push_back((*it)->node);
1070 getMultifurcatingNodes(nodes, (*it)->node, node);
1071 }
1072 }
1073
generateNNIBraches(NodeVector & nodes1,NodeVector & nodes2,SplitGraph * excludeSplits,Node * node,Node * dad)1074 void MTree::generateNNIBraches(NodeVector &nodes1, NodeVector &nodes2, SplitGraph* excludeSplits, Node *node, Node *dad) {
1075 if (!node) node = root;
1076 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1077 //if ((*it)->node != dad) {
1078 FOR_NEIGHBOR_IT(node, dad, it)
1079 if (!(*it)->node->isLeaf()) {
1080 generateNNIBraches(nodes1, nodes2, excludeSplits, (*it)->node, node);
1081 if (!node->isLeaf()) {
1082 if (excludeSplits != NULL && excludeSplits->size() != 0) {
1083 Split* sp = getSplit(node, (*it)->node);
1084 if (excludeSplits->containSplit(*sp)) {
1085 delete sp;
1086 continue;
1087 }
1088 delete sp;
1089 }
1090 if (node->id < (*it)->node->id) {
1091 nodes1.push_back(node);
1092 nodes2.push_back((*it)->node);
1093 } else {
1094 nodes1.push_back((*it)->node);
1095 nodes2.push_back(node);
1096 }
1097 }
1098 }
1099 }
1100
1101 //bool MTree::branchExist(Node* node1, Node* node2, NodeVector& nodes1, NodeVector& nodes2) {
1102 // assert(nodes1.size() == nodes2.size());
1103 // bool existed = false;
1104 // for (int i = 0; i < nodes1.size(); i++) {
1105 // if (nodes1[i] == node1) {
1106 // if (nodes2[i] == node2) {
1107 // existed = true;
1108 // break;
1109 // }
1110 // }
1111 // if (nodes1[i] == node2) {
1112 // if (nodes2[i] == node1) {
1113 // existed = true;
1114 // break;
1115 // }
1116 // }
1117 // }
1118 // return existed;
1119 //}
1120
getSurroundingInnerBranches(Node * node,Node * dad,int depth,Branches & surrBranches)1121 void MTree::getSurroundingInnerBranches(Node *node, Node *dad, int depth, Branches &surrBranches) {
1122 if (depth == 0)
1123 return;
1124 FOR_NEIGHBOR_IT(node, dad, it) {
1125 if (!(*it)->node->isLeaf()) {
1126 Branch curBranch;
1127 curBranch.first = node;
1128 curBranch.second = (*it)->node;
1129 int branchID = pairInteger(node->id, (*it)->node->id);
1130 if (surrBranches.find(branchID) == surrBranches.end())
1131 surrBranches.insert(pair<int,Branch>(branchID, curBranch));
1132 getSurroundingInnerBranches((*it)->node, node, depth-1, surrBranches);
1133 }
1134 }
1135 }
1136
isInnerBranch(Node * node1,Node * node2)1137 bool MTree::isInnerBranch(Node* node1, Node* node2) {
1138 return(node1->degree() >= 3 && node2->degree() >= 3 && isABranch(node1, node2));
1139 }
1140
isABranch(Node * node1,Node * node2)1141 bool MTree::isABranch(Node* node1, Node* node2) {
1142 return (node1->findNeighbor(node2) != NULL && node2->findNeighbor(node1) != NULL);
1143 }
1144
getBranches(NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad,bool post_traversal)1145 void MTree::getBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad, bool post_traversal) {
1146 if (!node) node = root;
1147 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1148 //if ((*it)->node != dad) {
1149 FOR_NEIGHBOR_IT(node, dad, it) {
1150 if (!post_traversal) {
1151 if (node->id < (*it)->node->id) {
1152 nodes.push_back(node);
1153 nodes2.push_back((*it)->node);
1154 } else {
1155 nodes.push_back((*it)->node);
1156 nodes2.push_back(node);
1157 }
1158 }
1159 getBranches(nodes, nodes2, (*it)->node, node, post_traversal);
1160 if (post_traversal) {
1161 if (node->id < (*it)->node->id) {
1162 nodes.push_back(node);
1163 nodes2.push_back((*it)->node);
1164 } else {
1165 nodes.push_back((*it)->node);
1166 nodes2.push_back(node);
1167 }
1168 }
1169 }
1170 }
1171
getBranches(int max_dist,NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad)1172 void MTree::getBranches(int max_dist, NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
1173 if (!node) node = root;
1174 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1175 //if ((*it)->node != dad) {
1176 FOR_NEIGHBOR_IT(node, dad, it) {
1177 if (node->id < (*it)->node->id) {
1178 nodes.push_back(node);
1179 nodes2.push_back((*it)->node);
1180 } else {
1181 nodes.push_back((*it)->node);
1182 nodes2.push_back(node);
1183 }
1184 if (max_dist > 1)
1185 getBranches(max_dist-1, nodes, nodes2, (*it)->node, node);
1186 }
1187 }
1188
getBranches(BranchVector & branches,Node * node,Node * dad,bool post_traversal)1189 void MTree::getBranches(BranchVector& branches, Node *node, Node *dad, bool post_traversal) {
1190 if (!node) node = root;
1191 FOR_NEIGHBOR_IT(node, dad, it) {
1192 if (!post_traversal) {
1193 Branch branch;
1194 branch.first = node;
1195 branch.second = (*it)->node;
1196 branches.push_back(branch);
1197 }
1198 getBranches(branches, (*it)->node, node, post_traversal);
1199 if (post_traversal) {
1200 Branch branch;
1201 branch.first = node;
1202 branch.second = (*it)->node;
1203 branches.push_back(branch);
1204 }
1205 }
1206 }
1207
getInnerBranches(Branches & branches,Node * node,Node * dad)1208 void MTree::getInnerBranches(Branches& branches, Node *node, Node *dad) {
1209 if (!node) node = root;
1210 FOR_NEIGHBOR_IT(node, dad, it) {
1211 if (isInnerBranch((*it)->node, node)) {
1212 Branch branch;
1213 branch.first = node;
1214 branch.second = (*it)->node;
1215 branches.insert(pair<int, Branch>(pairInteger(branch.first->id, branch.second->id), branch));
1216 }
1217 getInnerBranches(branches, (*it)->node, node);
1218 }
1219 }
1220
getInnerBranches(BranchVector & branches,Node * node,Node * dad,bool post_traversal)1221 void MTree::getInnerBranches(BranchVector& branches, Node *node, Node *dad, bool post_traversal) {
1222 if (!node) node = root;
1223 FOR_NEIGHBOR_IT(node, dad, it) {
1224 if (!node->isLeaf() && !(*it)->node->isLeaf() && !post_traversal) {
1225 Branch branch;
1226 branch.first = node;
1227 branch.second = (*it)->node;
1228 branches.push_back(branch);
1229 }
1230 getInnerBranches(branches, (*it)->node, node, post_traversal);
1231 if (!node->isLeaf() && !(*it)->node->isLeaf() && post_traversal) {
1232 Branch branch;
1233 branch.first = node;
1234 branch.second = (*it)->node;
1235 branches.push_back(branch);
1236 }
1237 }
1238 }
1239
getBranchLengths(vector<DoubleVector> & len,Node * node,Node * dad)1240 void MTree::getBranchLengths(vector<DoubleVector> &len, Node *node, Node *dad) {
1241 if (!node) {
1242 node = root;
1243 ASSERT(len.size() == branchNum);
1244 }
1245 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1246 //if ((*it)->node != dad) {
1247 FOR_NEIGHBOR_IT(node, dad, it) {
1248 (*it)->getLength(len[(*it)->id]);
1249 getBranchLengths(len, (*it)->node, node);
1250 }
1251 }
1252
setBranchLengths(vector<DoubleVector> & len,Node * node,Node * dad)1253 void MTree::setBranchLengths(vector<DoubleVector> &len, Node *node, Node *dad) {
1254 if (!node) {
1255 node = root;
1256 ASSERT(len.size() == branchNum);
1257 }
1258 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1259 //if ((*it)->node != dad) {
1260 FOR_NEIGHBOR_IT(node, dad, it) {
1261 (*it)->setLength(len[(*it)->id]);
1262 (*it)->node->findNeighbor(node)->setLength(len[(*it)->id]);
1263 setBranchLengths(len, (*it)->node, node);
1264 }
1265 }
1266
getOrderedTaxa(NodeVector & taxa,Node * node,Node * dad)1267 void MTree::getOrderedTaxa(NodeVector &taxa, Node *node, Node *dad) {
1268 if (!node) node = root;
1269 if (node->isLeaf()) {
1270 if (taxa.empty()) taxa.resize(leafNum);
1271 taxa[node->id] = node;
1272 }
1273 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1274 //if ((*it)->node != dad) {
1275 FOR_NEIGHBOR_IT(node, dad, it) {
1276 getOrderedTaxa(taxa, (*it)->node, node);
1277 }
1278 }
1279
getTaxaName(vector<string> & taxname,Node * node,Node * dad)1280 void MTree::getTaxaName(vector<string> &taxname, Node *node, Node *dad) {
1281 if (!node) node = root;
1282 if (node->isLeaf()) {
1283 if (taxname.empty()) taxname.resize(leafNum);
1284 taxname[node->id] = node->name;
1285 }
1286 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1287 //if ((*it)->node != dad) {
1288 FOR_NEIGHBOR_IT(node, dad, it) {
1289 getTaxaName(taxname, (*it)->node, node);
1290 }
1291 }
1292
getNodeName(set<string> & nodename,Node * node,Node * dad)1293 void MTree::getNodeName(set<string> &nodename, Node *node, Node *dad) {
1294 if (!node) node = root;
1295 if (!node->name.empty())
1296 nodename.insert(node->name);
1297 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1298 //if ((*it)->node != dad) {
1299 FOR_NEIGHBOR_IT(node, dad, it) {
1300 getNodeName(nodename, (*it)->node, node);
1301 }
1302 }
1303
getUnorderedTaxaName(vector<string> & taxname,Node * node,Node * dad)1304 void MTree::getUnorderedTaxaName(vector<string> &taxname, Node *node, Node *dad) {
1305 if (!node) node = root;
1306 if (node->isLeaf()) {
1307 taxname.push_back(node->name);
1308 }
1309 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1310 //if ((*it)->node != dad) {
1311 FOR_NEIGHBOR_IT(node, dad, it) {
1312 getUnorderedTaxaName(taxname, (*it)->node, node);
1313 }
1314 }
1315
getTaxaID(vector<int> & taxa,Node * node,Node * dad)1316 void MTree::getTaxaID(vector<int> &taxa, Node *node, Node *dad) {
1317 if (!node) node = root;
1318 if (node->isLeaf()) {
1319 taxa.push_back(node->id);
1320 }
1321 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1322 //if ((*it)->node != dad) {
1323 FOR_NEIGHBOR_IT(node, dad, it) {
1324 getTaxaID(taxa, (*it)->node, node);
1325 }
1326 }
1327
containsSplits(SplitGraph & splits)1328 bool MTree::containsSplits(SplitGraph& splits) {
1329 SplitGraph treeSplits;
1330 convertSplits(treeSplits);
1331 //check if treeSplits contains all splits in splits
1332 for (SplitGraph::iterator it = splits.begin(); it != splits.end(); it++) {
1333 if (!treeSplits.containSplit(**it))
1334 return false;
1335 }
1336 //treeSplits.report(cout);
1337 //splits.report(cout);
1338 return true;
1339 }
1340
getSplit(Node * node1,Node * node2)1341 Split* MTree::getSplit(Node* node1, Node* node2) {
1342 Neighbor* node12 = node1->findNeighbor(node2);
1343 return node12->split;
1344 }
1345
_getSplit(Node * node1,Node * node2)1346 Split* MTree::_getSplit(Node* node1, Node* node2) {
1347 Split* sp = new Split(leafNum);
1348 getTaxa(*sp, node1, node2);
1349 if (sp->shouldInvert())
1350 sp->invert();
1351 return sp;
1352 }
1353
convertSplits(SplitGraph & sg,Split * resp,NodeVector * nodes,Node * node,Node * dad)1354 void MTree::convertSplits(SplitGraph &sg, Split *resp, NodeVector *nodes, Node *node, Node *dad) {
1355 if (!node) node = root;
1356 ASSERT(resp->getNTaxa() == leafNum);
1357 bool has_child = false;
1358 FOR_NEIGHBOR_IT(node, dad, it) {
1359 //vector<int> taxa;
1360 //getTaxaID((*it)->node, node, taxa);
1361
1362 Split *sp = new Split(leafNum, (*it)->length);
1363 convertSplits(sg, sp, nodes, (*it)->node, node);
1364 *resp += *sp;
1365 if (sp->shouldInvert())
1366 sp->invert();
1367 /* ignore nodes with degree of 2 because such split will be added before */
1368 if (node->degree() != 2) {
1369 sg.push_back(sp);
1370 if (nodes) nodes->push_back((*it)->node);
1371 }
1372 has_child = true;
1373 }
1374 if (!has_child)
1375 resp->addTaxon(node->id);
1376 }
1377
convertSplits(vector<string> & taxname,SplitGraph & sg,NodeVector * nodes,Node * node,Node * dad)1378 void MTree::convertSplits(vector<string> &taxname, SplitGraph &sg, NodeVector *nodes, Node *node, Node *dad) {
1379 if (!sg.taxa) {
1380 sg.taxa = new NxsTaxaBlock();
1381 for (vector<string>::iterator it = taxname.begin(); it != taxname.end(); it++)
1382 sg.taxa->AddTaxonLabel(NxsString(it->c_str()));
1383 }
1384 if (!sg.splits)
1385 sg.splits = new MSplitsBlock(&sg);
1386 if (!sg.pda)
1387 sg.pda = new MPdaBlock(&sg);
1388
1389 // make the cycle
1390 getTaxaID(sg.splits->cycle);
1391 // make the splits
1392 Split sp(leafNum);
1393 convertSplits(sg, &sp, nodes, node, dad);
1394 }
1395
convertSplits(SplitGraph & sg,NodeVector * nodes,Node * node,Node * dad)1396 void MTree::convertSplits(SplitGraph &sg, NodeVector *nodes, Node *node, Node *dad) {
1397
1398 // make the taxa name
1399 vector<string> taxname;
1400 taxname.resize(leafNum);
1401 getTaxaName(taxname);
1402
1403 convertSplits(taxname, sg, nodes, node, dad);
1404 }
1405
splitnumtaxacmp(const Split * a,const Split * b)1406 inline int splitnumtaxacmp(const Split* a, const Split* b)
1407 {
1408 return (a->countTaxa() < b->countTaxa());
1409 }
1410
convertToTree(SplitGraph & sg)1411 void MTree::convertToTree(SplitGraph &sg) {
1412 SplitGraph::iterator it;
1413 int taxid;
1414 int count;
1415 BoolVector has_tax;
1416 has_tax.resize(sg.getNTaxa(), false);
1417 // first add trivial splits if not existed
1418 for (it = sg.begin(); it != sg.end(); it++) {
1419 taxid = (*it)->trivial();
1420 if (taxid >= 0) has_tax[taxid] = true;
1421 }
1422 for (count = 0; count < has_tax.size(); count++)
1423 if (!has_tax[count]) {
1424 Split *sp = new Split(sg.getNTaxa());
1425 sp->addTaxon(count);
1426 sg.push_back(sp);
1427 }
1428 // sort splits by the number of taxa they contain
1429 sort(sg.begin(), sg.end(), splitnumtaxacmp);
1430
1431 // initialize the tree
1432 rooted = false;
1433 leafNum = sg.getNTaxa();
1434 nodeNum = leafNum;
1435
1436 // create the ground nodes, first as the leaves
1437 NodeVector leaves;
1438 vector<Split*> cladetaxa;
1439 leaves.resize(leafNum, NULL);
1440 cladetaxa.resize(leafNum, NULL);
1441 // first add all trivial splits into tree
1442 for (it = sg.begin(), count = 0; it != sg.end(); it++, count++) {
1443 //(*it)->report(cout);
1444 taxid = (*it)->trivial();
1445 if (taxid < 0) break;
1446 ASSERT(leaves[taxid] == NULL);
1447 leaves[taxid] = newNode(taxid, sg.getTaxa()->GetTaxonLabel(taxid).c_str());
1448 leaves[taxid]->addNeighbor(NULL, (*it)->getWeight());
1449 cladetaxa[taxid] = (*it);
1450 }
1451 // now fill in all missing taxa with zero terminal branch
1452 for (taxid = 0; taxid < leafNum; taxid++)
1453 ASSERT(leaves[taxid]);
1454
1455 // now add non-trivial splits, cotinue with the interrupted iterator
1456 for (/*it = sg.begin()*/; it != sg.end(); it++) {
1457 //(*it)->report(cout);
1458 Split *mysp = *it;
1459 Node *newnode = newNode(nodeNum);
1460 int count = 0;
1461
1462 for (taxid = 0; taxid < leaves.size(); )
1463 if (cladetaxa[taxid]->subsetOf(*mysp)) // clade is a subset of current split
1464 {
1465 count += cladetaxa[taxid]->countTaxa();
1466 double len = leaves[taxid]->updateNeighbor(NULL, newnode);
1467 newnode->addNeighbor(leaves[taxid], len);
1468 leaves[taxid] = leaves.back();
1469 leaves.pop_back();
1470 cladetaxa[taxid] = cladetaxa.back();
1471 cladetaxa.pop_back();
1472 } else taxid++;
1473 ASSERT(count == mysp->countTaxa());
1474 cladetaxa.push_back(mysp);
1475 leaves.push_back(newnode);
1476
1477 newnode->addNeighbor(NULL, mysp->getWeight());
1478 nodeNum++;
1479 }
1480 ASSERT(leaves.size() >= 3);
1481 Node *newnode = newNode(nodeNum);
1482 for (taxid = 0; taxid < leaves.size(); taxid++) {
1483 double len = leaves[taxid]->updateNeighbor(NULL, newnode);
1484 newnode->addNeighbor(leaves[taxid], len);
1485 }
1486 root = newnode;
1487 nodeNum++;
1488 cladetaxa.clear();
1489 string root_name = ROOT_NAME;
1490 newnode = findLeafName(root_name);
1491 if (newnode) {
1492 rooted = true;
1493 root = newnode;
1494 }
1495
1496 }
1497
findNodeName(string & name,Node * node,Node * dad)1498 Node *MTree::findNodeName(string &name, Node *node, Node *dad) {
1499 if (!node) node = root;
1500 if (node->name == name) return node;
1501 FOR_NEIGHBOR_IT(node, dad, it) {
1502 Node *res = findNodeName(name, (*it)->node, node);
1503 if (res) return res;
1504 }
1505 return NULL;
1506 }
1507
findNodeNames(unordered_set<string> & taxa_set,pair<Node *,Neighbor * > & res,Node * node,Node * dad)1508 bool MTree::findNodeNames(unordered_set<string> &taxa_set, pair<Node*,Neighbor*> &res, Node *node, Node *dad) {
1509 int presence = 0;
1510 Neighbor *target = NULL;
1511 FOR_NEIGHBOR_IT(node, dad, it) {
1512 if ((*it)->node->isLeaf()) {
1513 if (taxa_set.find((*it)->node->name) != taxa_set.end()) {
1514 presence++;
1515 } else target = (*it);
1516 } else {
1517 if (findNodeNames(taxa_set, res, (*it)->node, node))
1518 presence++;
1519 else
1520 target = *it;
1521 if (res.first)
1522 return false;
1523 }
1524 }
1525 // all presence or absence
1526 if (presence == node->neighbors.size()-1)
1527 return true;
1528 if (presence == 0)
1529 return false;
1530 // inbetween: detect it!
1531 res.first = node;
1532 res.second = target;
1533 if (target != node->neighbors[0]) {
1534 // move target into the first neighbor
1535 FOR_NEIGHBOR_IT(node, NULL, it)
1536 if ((*it) == target) {
1537 (*it) = node->neighbors[0];
1538 node->neighbors[0] = target;
1539 break;
1540 }
1541 }
1542 return false;
1543 }
1544
findLeafName(string & name,Node * node,Node * dad)1545 Node *MTree::findLeafName(string &name, Node *node, Node *dad) {
1546 if (!node) node = root;
1547 if (node->isLeaf() && node->name == name) return node;
1548 FOR_NEIGHBOR_IT(node, dad, it) {
1549 Node *res = findLeafName(name, (*it)->node, node);
1550 if (res) return res;
1551 }
1552 return NULL;
1553 }
1554
findNodeID(int id,Node * node,Node * dad)1555 Node *MTree::findNodeID(int id, Node *node, Node* dad) {
1556 if (!node) node = root;
1557 if (node->id == id) return node;
1558 FOR_NEIGHBOR_IT(node, dad, it) {
1559 Node *res = findNodeID(id, (*it)->node, node);
1560 if (res) return res;
1561 }
1562 return NULL;
1563 }
1564
1565
scaleLength(double norm,bool make_int,Node * node,Node * dad)1566 void MTree::scaleLength(double norm, bool make_int, Node *node, Node *dad) {
1567 if (!node) node = root;
1568 FOR_NEIGHBOR_DECLARE(node, NULL, it) {
1569 (*it)->length *= norm;
1570 if (make_int)
1571 (*it)->length = round((*it)->length);
1572 }
1573
1574 FOR_NEIGHBOR(node, dad, it) {
1575 scaleLength(norm, make_int, (*it)->node, node);
1576 }
1577 }
1578
transformBranchLenRAX(double factor,Node * node,Node * dad)1579 void MTree::transformBranchLenRAX(double factor, Node *node, Node *dad) {
1580 if (!node) node = root;
1581 FOR_NEIGHBOR_DECLARE(node, NULL, it) {
1582 (*it)->length /= factor;
1583 (*it)->length = exp(-(*it)->length);
1584 }
1585
1586 FOR_NEIGHBOR(node, dad, it) {
1587 transformBranchLenRAX(factor, (*it)->node, node);
1588 }
1589 }
1590
scaleCladeSupport(double norm,bool make_int,Node * node,Node * dad)1591 void MTree::scaleCladeSupport(double norm, bool make_int, Node *node, Node *dad) {
1592 if (!node) node = root;
1593 if (!node->isLeaf() && !node->name.empty()) {
1594 double supp = 0.0;
1595 try {
1596 supp = convert_double(node->name.c_str());
1597 } catch (string str) {
1598 outError(str);
1599 }
1600 supp *= norm;
1601 if (make_int)
1602 supp = round(supp);
1603 node->name = "";
1604 node->name += supp;
1605 }
1606
1607 FOR_NEIGHBOR_IT(node, dad, it) {
1608 scaleCladeSupport(norm, make_int, (*it)->node, node);
1609 }
1610 }
1611
1612
~MTree()1613 MTree::~MTree()
1614 {
1615 if (root != NULL)
1616 freeNode();
1617 root = NULL;
1618 }
1619
freeNode(Node * node,Node * dad)1620 int MTree::freeNode(Node *node, Node *dad)
1621 {
1622 if ( root == NULL )
1623 return 0;
1624 if (!node) node = root;
1625 NeighborVec::reverse_iterator it;
1626 int num_nodes = 1;
1627 for (it = node->neighbors.rbegin(); it != node->neighbors.rend(); it++)
1628 if ((*it)->node != dad) {
1629 num_nodes += freeNode((*it)->node, node);
1630 }
1631 delete node;
1632 return num_nodes;
1633 }
1634
readNextChar(istream & in,char current_ch)1635 char MTree::readNextChar(istream &in, char current_ch) {
1636 char ch;
1637 if (current_ch == '[')
1638 ch = current_ch;
1639 else {
1640 in.get(ch);
1641 in_column++;
1642 if (ch == 10) {
1643 in_line++;
1644 in_column = 1;
1645 }
1646 }
1647 while (controlchar(ch) && !in.eof()) {
1648 in.get(ch);
1649 in_column++;
1650 if (ch == 10) {
1651 in_line++;
1652 in_column = 1;
1653 }
1654 }
1655 in_comment = "";
1656 // ignore comment
1657 while (ch=='[' && !in.eof()) {
1658 while (ch!=']' && !in.eof()) {
1659 in.get(ch);
1660 if (ch != ']')
1661 in_comment += ch;
1662 in_column++;
1663 if (ch == 10) {
1664 in_line++;
1665 in_column = 1;
1666 }
1667 }
1668 if (ch != ']') throw "Comments not ended with ]";
1669 in_column++;
1670 in.get(ch);
1671 if (ch == 10) {
1672 in_line++;
1673 in_column = 1;
1674 }
1675 while (controlchar(ch) && !in.eof()) {
1676 in_column++;
1677 in.get(ch);
1678 if (ch == 10) {
1679 in_line++;
1680 in_column = 1;
1681 }
1682 }
1683 }
1684 return ch;
1685 }
1686
reportInputInfo()1687 string MTree::reportInputInfo() {
1688 string str = " (line ";
1689 str += convertIntToString(in_line) + " column " + convertIntToString(in_column-1) + ")";
1690 return str;
1691 }
1692
convertToUnrooted()1693 void MTree::convertToUnrooted() {
1694 ASSERT(rooted && root);
1695 ASSERT(root->isLeaf() && root->id == leafNum-1);
1696 Node *node = root->neighbors[0]->node;
1697 Node *taxon = findFirstTaxon();
1698
1699 rooted = false;
1700 leafNum--;
1701
1702 // delete root node
1703 if (node->degree() == 3) {
1704 // delete and join adjacent branches
1705 Node *node1 = NULL, *node2 = NULL;
1706 double len = 0.0;
1707 FOR_NEIGHBOR_IT(node, root, it) {
1708 if (!node1) node1 = (*it)->node; else node2 = (*it)->node;
1709 len += (*it)->length;
1710 }
1711 node1->updateNeighbor(node, node2, len);
1712 node2->updateNeighbor(node, node1, len);
1713 delete node;
1714 } else {
1715 // only delete root node
1716 auto it = node->findNeighborIt(root);
1717 delete *it;
1718 node->neighbors.erase(it);
1719
1720 }
1721
1722 delete root;
1723 // set a temporary taxon so that tree traversal works
1724 root = taxon;
1725
1726 initializeTree();
1727 // computeBranchDirection();
1728 }
1729
1730 typedef map<int, Neighbor*> IntNeighborMap;
1731
sortTaxa(Node * node,Node * dad)1732 int MTree::sortTaxa(Node *node, Node *dad) {
1733 if (!node) {
1734 node = root;
1735 if (node->isLeaf()) node = node->neighbors[0]->node;
1736 }
1737 if (node->isLeaf())
1738 return node->id;
1739 IntNeighborMap taxid_nei_map;
1740 FOR_NEIGHBOR_IT(node, dad, it) {
1741 int taxid = sortTaxa((*it)->node, node);
1742 taxid_nei_map.insert(IntNeighborMap::value_type(taxid, (*it)));
1743 }
1744 ;
1745 int i = 0;
1746 for (IntNeighborMap::iterator it = taxid_nei_map.begin(); it != taxid_nei_map.end(); it++, i++) {
1747 if (node->neighbors[i]->node == dad) i++;
1748 node->neighbors[i] = it->second;
1749 }
1750
1751 return taxid_nei_map.begin()->first;
1752 }
1753
setExtendedFigChar()1754 void MTree::setExtendedFigChar() {
1755 //fig_char[0] = 179;
1756 //fig_char[1] = 196;
1757 fig_char[2] = '/';
1758 //fig_char[3] = 195;
1759 fig_char[4] = '\\';
1760 }
1761
drawTree(ostream & out,int brtype,double zero_epsilon)1762 void MTree::drawTree(ostream &out, int brtype, double zero_epsilon) {
1763 IntVector sub_tree_br;
1764 if (verbose_mode >= VB_DEBUG) {
1765 printTree(cout);
1766 cout << endl;
1767 }
1768 Node *node = root;
1769 if (node->isLeaf()) node = node->neighbors[0]->node;
1770 double scale = 60.0/treeDepth(node);
1771 //if (verbose_mode >= VB_DEBUG)
1772 //cout << "Tree depth: " << scale<< endl;
1773 drawTree2(out, brtype, scale, sub_tree_br, zero_epsilon);
1774 /*
1775 if (brtype & WT_INT_NODE)
1776 drawTree2(out, brtype, scale, sub_tree_br, zero_epsilon);
1777 else
1778 drawTree(out, brtype, scale, sub_tree_br, zero_epsilon);
1779 */
1780 out << endl;
1781 }
1782
1783 /*
1784 void MTree::drawTree(ostream &out, int brtype, double brscale, IntVector &subtree_br, double zero_epsilon, Node *node, Node *dad) {
1785 int i, br_len = 3;
1786 if (!node) {
1787 node = root;
1788 if (node->isLeaf()) node = node->neighbors[0]->node;
1789 } else {
1790
1791 if (brtype & WT_BR_SCALE) {
1792 br_len = floor(node->findNeighbor(dad)->length * brscale)-1;
1793 if (br_len < 3) br_len = 3;
1794 //if (!node->isLeaf() && br_len < 4) br_len = 4;
1795 }
1796 out << '+';
1797 if ((brtype & WT_INT_NODE) && !node->isLeaf()) {
1798 string str = convertIntToString(node->id);
1799 for (i = 0; i < br_len-str.length(); i++) out << '-';
1800 out << node->id;
1801 } else
1802 for (i = 0; i < br_len; i++) out << '-';
1803 }
1804 if (node->isLeaf()) {
1805 out << node->name;
1806 if (brtype & WT_TAXON_ID)
1807 out << " (" << node->id << ")";
1808 out << endl;
1809 return;
1810 }
1811 int descendant_cnt = node->degree();
1812 if (dad) descendant_cnt--;
1813 int cnt = 0;
1814 subtree_br.push_back(br_len);
1815 FOR_NEIGHBOR_IT(node, dad, it) {
1816 if (cnt == descendant_cnt-1)
1817 subtree_br.back() = -subtree_br.back();
1818
1819 drawTree(out, brtype, brscale, subtree_br, zero_epsilon, (*it)->node, node);
1820 cnt++;
1821 if (cnt == descendant_cnt) break;
1822 for (IntVector::iterator it = subtree_br.begin()+1; it != subtree_br.end(); it++)
1823 {
1824 if ((*(it-1)) > 0) out << '|';
1825 else out << ' ';
1826 for (i = 0; i < abs(*it); i++) out << ' ';
1827 }
1828 }
1829 subtree_br.pop_back();
1830 }
1831 */
1832
drawTree2(ostream & out,int brtype,double brscale,IntVector & subtree_br,double zero_epsilon,Node * node,Node * dad)1833 void MTree::drawTree2(ostream &out, int brtype, double brscale, IntVector &subtree_br, double zero_epsilon, Node *node, Node *dad) {
1834 int i, br_len = 3;
1835 IntVector::iterator ii;
1836 bool zero_length = false;
1837
1838 //cout << "DrawTree2!" << endl;
1839 if (!node) {
1840 node = root;
1841 if (node->isLeaf()) node = node->neighbors[0]->node;
1842 } else {
1843 if (brtype & WT_BR_SCALE) {
1844 br_len = floor(node->findNeighbor(dad)->length * brscale)-1;
1845 if (br_len < 2) br_len = 2;
1846 }
1847 if (node->findNeighbor(dad)->length <= zero_epsilon) zero_length = true;
1848 }
1849 if (node->isLeaf()) {
1850 for (ii = subtree_br.begin()+1; ii != subtree_br.end(); ii++) {
1851 if (abs(*(ii-1)) > 1000) out << ' ';
1852 else out << fig_char[0];
1853 int num = abs(*ii);
1854 if (num > 1000) num -= 1000;
1855 for (i = 0; i < num; i++) out << ' ';
1856 }
1857 out << ((node==dad->neighbors.front()->node) ? fig_char[2] : ((node==dad->neighbors.back()->node) ? fig_char[4] : fig_char[3]));
1858 for (i = 0; i < br_len; i++)
1859 out << ((zero_length) ? '*' : fig_char[1]);
1860 out << node->name;
1861 if (brtype & WT_TAXON_ID)
1862 out << " (" << node->id << ")";
1863 if (brtype & WT_BR_ID)
1864 out << " [" << node->neighbors[0]->id << "]";
1865 if (brtype & WT_BR_LEN)
1866 out << " " << node->neighbors[0]->length;
1867 //out << " ";
1868 //copy (subtree_br.begin(), subtree_br.end(), ostream_iterator<int> (out, " "));
1869 out << endl;
1870 return;
1871 }
1872 int descendant_cnt = node->degree();
1873 if (dad) descendant_cnt--;
1874 int cnt = 0;
1875 bool first = true;
1876
1877 br_len = br_len+1000;
1878 FOR_NEIGHBOR_IT(node, dad, it) {
1879 if (cnt == descendant_cnt-1)
1880 br_len = -br_len;
1881 subtree_br.push_back(br_len);
1882
1883 drawTree2(out, brtype, brscale, subtree_br, zero_epsilon, (*it)->node, node);
1884 subtree_br.pop_back();
1885 if (br_len > 1000) br_len -= 1000;
1886 cnt++;
1887 if (cnt == descendant_cnt) break;
1888 if (subtree_br.size() > 1)
1889 for (ii = subtree_br.begin()+1; ii != subtree_br.end(); ii++) {
1890 if (abs(*(ii-1)) > 1000) out << ' ';
1891 else out << fig_char[0];
1892 if (ii == subtree_br.begin()) continue;
1893 int num = abs(*ii);
1894 if (num > 1000) num -= 1000;
1895 for (i = 0; i < num; i++) out << ' ';
1896 }
1897 if (first) {
1898 if (dad) {
1899 out << ((node==dad->neighbors.front()->node) ? fig_char[2] : ((node==dad->neighbors.back()->node) ? fig_char[4] : fig_char[3]));
1900 for (i = 0; i < abs(br_len); i++)
1901 out << ((zero_length) ? '*' : fig_char[1]);
1902 }
1903 if (brtype & WT_INT_NODE)
1904 out << node->id;
1905 else
1906 out << fig_char[0];
1907 if (!node->name.empty())
1908 out << " (" << node->name << ")";
1909 if (brtype & WT_BR_LEN && dad)
1910 out << " " << node->findNeighbor(dad)->length;
1911 if (brtype & WT_BR_ID && dad)
1912 out << " [" << node->findNeighbor(dad)->id << "]";
1913 if (!subtree_br.empty()) {
1914 if (subtree_br.back() >1000)
1915 subtree_br.back() -= 1000;
1916 else if (subtree_br.back() < 0)
1917 subtree_br.back() -= 1000;
1918 }
1919 } else {
1920 if (dad) {
1921 if (abs(subtree_br.back()) > 1000) out << ' ';
1922 else out << fig_char[0];
1923 for (i = 0; i < abs(br_len); i++)
1924 out << ' ';
1925 }
1926 out << fig_char[0];
1927 }
1928 //out << " ";
1929 //copy (subtree_br.begin(), subtree_br.end(), ostream_iterator<int> (out, " "));
1930 out << endl;
1931 first = false;
1932 }
1933 }
1934
equalTopology(MTree * tree)1935 bool MTree::equalTopology(MTree *tree) {
1936 ASSERT(root->isLeaf());
1937 Node *root2 = tree->findLeafName(root->name);
1938 if (!root2) return false;
1939 ostringstream ostr, ostr2;
1940 printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
1941 tree->printTree(ostr2, WT_TAXON_ID | WT_SORT_TAXA, root2);
1942 return ostr.str() == ostr2.str();
1943 }
1944
calcDist(char * filename)1945 void MTree::calcDist(char *filename) {
1946 vector<string> taxname;
1947 int i, j;
1948
1949 // allocate memory
1950 taxname.resize(leafNum);
1951 double *dist = new double [leafNum * leafNum];
1952 // calculate the distances
1953 calcDist(dist);
1954 // get the taxa name
1955 getTaxaName(taxname);
1956
1957 try {
1958 ofstream out;
1959 out.exceptions(ios::failbit | ios::badbit);
1960 out.open(filename);
1961
1962 // now write the distances in phylip .dist format
1963 out << leafNum << endl;
1964
1965 for (i = 0; i < leafNum; i++) {
1966 out << taxname[i] << " ";
1967 for (j = 0; j < leafNum; j++) {
1968 out << dist[i*leafNum + j] << " ";
1969 }
1970 out << endl;
1971 }
1972 out.close();
1973 } catch (ios::failure) {
1974 outError(ERR_WRITE_OUTPUT, filename);
1975 }
1976 delete [] dist;
1977 }
1978
calcDist(double * & dist,Node * node,Node * dad)1979 void MTree::calcDist(double* &dist, Node *node, Node *dad) {
1980 if (!node) node = root;
1981 if (node->isLeaf()) {
1982 calcDist(node, 0.0, dist, node, NULL);
1983 }
1984 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1985 //if ((*it)->node != dad) {
1986 FOR_NEIGHBOR_IT(node, dad, it) {
1987 calcDist(dist, (*it)->node, node);
1988 }
1989 }
1990
calcDist(Node * aroot,double cur_len,double * & dist,Node * node,Node * dad)1991 void MTree::calcDist(Node *aroot, double cur_len, double* &dist, Node *node, Node *dad) {
1992 double branch_length;
1993 if (!node) node = root;
1994 if (node->isLeaf()) {
1995 dist[aroot->id * leafNum + node->id] = cur_len;
1996 dist[node->id * leafNum + aroot->id] = cur_len;
1997 }
1998 //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1999 //if ((*it)->node != dad) {
2000 FOR_NEIGHBOR_IT(node, dad, it) {
2001 branch_length = (*it)->length;
2002 calcDist(aroot, cur_len + branch_length, dist, (*it)->node, node);
2003 }
2004
2005 }
2006
2007
2008 /*********************************************
2009 class PDTaxaSet
2010 *********************************************/
2011
setSubTree(MTree & tree,NodeVector & subtree)2012 void PDTaxaSet::setSubTree(MTree &tree, NodeVector &subtree) {
2013 stringstream ostr;
2014 tree.printSubTree(ostr, subtree);
2015 tree_str = ostr.str();
2016 }
2017
setTree(MTree & tree)2018 void PDTaxaSet::setTree(MTree &tree) {
2019 // assign the taxa set
2020 tree.getTaxa(*this);
2021 // assign the score
2022 score = tree.treeLength();
2023
2024 // assign tree_str
2025 stringstream ostr;
2026 tree.printTree(ostr);
2027 tree_str = ostr.str();
2028 }
2029
2030
printTaxa(ostream & out)2031 void PDTaxaSet::printTaxa(ostream &out) {
2032 for (iterator it = begin(); it != end(); it++)
2033 if ((*it)->name != ROOT_NAME)
2034 out << (*it)->name << endl;
2035 }
2036
printTaxa(char * filename)2037 void PDTaxaSet::printTaxa(char *filename) {
2038 try {
2039 ofstream out;
2040 out.exceptions(ios::failbit | ios::badbit);
2041 out.open(filename);
2042 printTaxa(out);
2043 out.close();
2044 cout << "Taxa list was printed to " << filename << endl;
2045 } catch (ios::failure) {
2046 outError(ERR_WRITE_OUTPUT, filename);
2047 }
2048 }
2049
printTree(ostream & out)2050 void PDTaxaSet::printTree(ostream &out) {
2051 if (!tree_str.empty())
2052 out << tree_str << endl;
2053 }
2054
printTree(char * filename)2055 void PDTaxaSet::printTree(char *filename) {
2056 try {
2057 ofstream out;
2058 out.exceptions(ios::failbit | ios::badbit);
2059 out.open(filename);
2060 printTree(out);
2061 out.close();
2062 cout << "Tree was printed to " << filename << endl;
2063 } catch (ios::failure) {
2064 outError(ERR_WRITE_OUTPUT, filename);
2065 }
2066 }
2067
2068
makeIDSet(int ntaxa,Split & id_set)2069 void PDTaxaSet::makeIDSet(int ntaxa, Split &id_set) {
2070 id_set.setNTaxa(ntaxa);
2071 id_set.setWeight(score);
2072 for (iterator it = begin(); it != end(); it++)
2073 id_set.addTaxon((*it)->id);
2074 }
2075
writeInternalNodeNames(string & out_file)2076 void MTree::writeInternalNodeNames(string &out_file) {
2077 try {
2078 ofstream out(out_file.c_str());
2079 NodeVector nodes;
2080 getInternalNodes(nodes);
2081 for (NodeVector::iterator nit = nodes.begin(); nit != nodes.end(); nit++) {
2082 out << " " << (*nit)->name;
2083 }
2084 out << endl;
2085 out.close();
2086 } catch (ios::failure) {
2087 outError(ERR_WRITE_OUTPUT, out_file);
2088 }
2089 }
2090
assignLeafID(Node * node,Node * dad)2091 void MTree::assignLeafID(Node *node, Node *dad) {
2092 if (!node) node = root;
2093 if (node->isLeaf()) {
2094 node->id = atoi(node->name.c_str());
2095 ASSERT(node->id >= 0 && node->id < leafNum);
2096 }
2097 FOR_NEIGHBOR_IT(node, dad, it)
2098 assignLeafID((*it)->node, node);
2099 }
2100
assignLeafNameByID(Node * node,Node * dad)2101 void MTree::assignLeafNameByID(Node *node, Node *dad) {
2102 if (!node) node = root;
2103 if (node->isLeaf()) {
2104 // node->id = atoi(node->name.c_str());
2105 // assert(node->id >= 0 && node->id < leafNum);
2106 node->name = convertIntToString(node->id);
2107 }
2108 FOR_NEIGHBOR_IT(node, dad, it)
2109 assignLeafNameByID((*it)->node, node);
2110 }
2111
getTaxa(Split & taxa,Node * node,Node * dad)2112 void MTree::getTaxa(Split &taxa, Node *node, Node *dad) {
2113 if (!node) node = root;
2114 if (node->isLeaf()) {
2115 taxa.addTaxon(node->id);
2116 }
2117 FOR_NEIGHBOR_IT(node, dad, it)
2118 getTaxa(taxa, (*it)->node, node);
2119 }
2120
2121
extractQuadSubtrees(vector<Split * > & subtrees,BranchVector & branches,Node * node,Node * dad)2122 void MTree::extractQuadSubtrees(vector<Split*> &subtrees, BranchVector &branches, Node *node, Node *dad) {
2123 if (!node) node = root;
2124 FOR_NEIGHBOR_IT(node, dad, it) {
2125 extractQuadSubtrees(subtrees, branches, (*it)->node, node);
2126 if ((*it)->node->isLeaf()) continue;
2127 // internal branch
2128 ASSERT(node->degree() == 3 && (*it)->node->degree() == 3);
2129 int cnt = 0;
2130 Node *child = (*it)->node;
2131 string treestrings[4];
2132 int nodeid = 0;
2133 FOR_NEIGHBOR_DECLARE(child, node, it2) {
2134 Split *sp = new Split(leafNum);
2135 getTaxa(*sp, (*it2)->node, child);
2136 subtrees.push_back(sp);
2137 cnt += sp->countTaxa();
2138 if (Params::getInstance().print_df1_trees) {
2139 ostringstream ostr;
2140 printTree(ostr, WT_BR_LEN, (*it2)->node, child);
2141 treestrings[nodeid] = ostr.str();
2142 treestrings[nodeid] = treestrings[nodeid].substr(0, treestrings[nodeid].length()-1);
2143 nodeid++;
2144 }
2145 }
2146 FOR_NEIGHBOR(node, child, it2) {
2147 Split *sp = new Split(leafNum);
2148 getTaxa(*sp, (*it2)->node, node);
2149 subtrees.push_back(sp);
2150 cnt += sp->countTaxa();
2151 if (Params::getInstance().print_df1_trees) {
2152 ostringstream ostr;
2153 printTree(ostr, WT_BR_LEN, (*it2)->node, node);
2154 treestrings[nodeid] = ostr.str();
2155 treestrings[nodeid] = treestrings[nodeid].substr(0, treestrings[nodeid].length()-1);
2156 nodeid++;
2157 }
2158 }
2159 ASSERT(cnt == leafNum);
2160 branches.push_back({node, child});
2161 if (Params::getInstance().print_df1_trees) {
2162 ASSERT(nodeid == 4);
2163 // output NNI-1 tree
2164 string treeDF1 = "(" + treestrings[0] + "," + treestrings[2] + ",(" + treestrings[1] + "," + treestrings[3] + "));";
2165 string treeDF2 = "(" + treestrings[0] + "," + treestrings[3] + ",(" + treestrings[1] + "," + treestrings[2] + "));";
2166 PUT_ATTR(child->findNeighbor(node), treeDF1);
2167 PUT_ATTR(child->findNeighbor(node), treeDF2);
2168 }
2169 }
2170 }
2171
2172 /*
2173 void MTree::assignBranchSupport(const char *trees_file, map<int,BranchSupportInfo> &branch_supports) {
2174 cout << "Reading input trees file " << trees_file << endl;
2175 try {
2176 ifstream in;
2177 in.exceptions(ios::failbit | ios::badbit);
2178 in.open(trees_file);
2179 assignBranchSupport(in, branch_supports);
2180 in.close();
2181 } catch (ios::failure) {
2182 outError(ERR_READ_INPUT, trees_file);
2183 }
2184 }
2185
2186 void MTree::assignBranchSupport(istream &in, map<int,BranchSupportInfo> &branch_supports) {
2187 SplitGraph mysg;
2188 NodeVector mynodes;
2189 convertSplits(mysg, &mynodes, root->neighbors[0]->node);
2190 vector<Split*> subtrees;
2191 extractQuadSubtrees(subtrees, root->neighbors[0]->node);
2192 IntVector decisive_counts;
2193 decisive_counts.resize(mynodes.size(), 0);
2194 StrVector occurence_trees; // list of tree IDs where each split occurs
2195 if (verbose_mode >= VB_MED)
2196 occurence_trees.resize(mynodes.size());
2197 SplitGraph::iterator sit;
2198 for (sit = mysg.begin(); sit != mysg.end(); sit++)
2199 (*sit)->setWeight(0.0);
2200 int ntrees, taxid;
2201 for (ntrees = 1; !in.eof(); ntrees++) {
2202 MTree tree;
2203 bool is_rooted = false;
2204
2205 // read in the tree and convert into split system for indexing
2206 tree.readTree(in, is_rooted);
2207 if (verbose_mode >= VB_DEBUG)
2208 cout << ntrees << " " << endl;
2209 StrVector taxname;
2210 tree.getTaxaName(taxname);
2211 // create the map from taxa between 2 trees
2212 Split taxa_mask(leafNum);
2213 for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
2214 taxid = mysg.findLeafName(*it);
2215 if (taxid < 0)
2216 outError("Taxon not found in full tree: ", *it);
2217 taxa_mask.addTaxon(taxid);
2218 }
2219 // make the taxa ordering right before converting to split system
2220 taxname.clear();
2221 int smallid;
2222 for (taxid = 0, smallid = 0; taxid < leafNum; taxid++)
2223 if (taxa_mask.containTaxon(taxid)) {
2224 taxname.push_back(mysg.getTaxa()->GetTaxonLabel(taxid));
2225 string name = (string)mysg.getTaxa()->GetTaxonLabel(taxid);
2226 tree.findLeafName(name)->id = smallid++;
2227 }
2228 ASSERT(taxname.size() == tree.leafNum);
2229
2230 SplitGraph sg;
2231 //NodeVector nodes;
2232 tree.convertSplits(sg);
2233 SplitIntMap hash_ss;
2234 for (sit = sg.begin(); sit != sg.end(); sit++)
2235 hash_ss.insertSplit((*sit), 1);
2236
2237 // now scan through all splits in current tree
2238 int id, qid;
2239 for (sit = mysg.begin(), id = 0, qid = 0; sit != mysg.end(); sit++, id++)
2240 if ((*sit)->trivial() < 0) // it is an internal split
2241 {
2242
2243 bool decisive = true;
2244 for (int i = 0; i < 4; i++) {
2245 if (!taxa_mask.overlap(*subtrees[qid+i])) {
2246 decisive = false;
2247 break;
2248 }
2249 }
2250 qid += 4;
2251 if (!decisive) continue;
2252
2253 decisive_counts[id]++;
2254 Split *subsp = (*sit)->extractSubSplit(taxa_mask);
2255 if (subsp->shouldInvert())
2256 subsp->invert();
2257 Split *sp = hash_ss.findSplit(subsp);
2258 if (sp && sp->trivial() < 0) {
2259 (*sit)->setWeight((*sit)->getWeight()+1.0);
2260 if (verbose_mode >= VB_MED)
2261 occurence_trees[id] += convertIntToString(ntrees) + " ";
2262 if (verbose_mode >= VB_MAX) {
2263 for (taxid = 0; taxid < (*sit)->getNTaxa(); taxid++)
2264 if ((*sit)->containTaxon(taxid))
2265 cout << " " << mysg.getTaxa()->GetTaxonLabel(taxid);
2266 cout << " --> ";
2267 for (taxid = 0; taxid < sp->getNTaxa(); taxid++)
2268 if (sp->containTaxon(taxid))
2269 cout << " " << taxname[taxid];
2270 cout << endl;
2271 }
2272 }
2273 delete subsp;
2274 }
2275
2276 char ch;
2277 in.exceptions(ios::goodbit);
2278 (in) >> ch;
2279 if (in.eof()) break;
2280 in.unget();
2281 in.exceptions(ios::failbit | ios::badbit);
2282
2283 }
2284
2285 cout << ntrees << " trees read" << endl;
2286
2287 for (int i = 0; i < mysg.size(); i++)
2288 if (!mynodes[i]->isLeaf())
2289 {
2290 BranchSupportInfo brsup;
2291 brsup.id = mynodes[i]->id;
2292 brsup.name = mynodes[i]->name;
2293 brsup.geneCF = mysg[i]->getWeight()/decisive_counts[i];
2294 brsup.geneN = decisive_counts[i];
2295 branch_supports[brsup.id] = brsup;
2296
2297 stringstream tmp;
2298 if (mysg[i]->getWeight() == 0.0)
2299 tmp << "0";
2300 else
2301 tmp << round((mysg[i]->getWeight()/decisive_counts[i])*1000)/10;
2302 if (verbose_mode >= VB_MED)
2303 tmp << "%" << decisive_counts[i];
2304
2305 if (Params::getInstance().newick_extended_format) {
2306 if (mynodes[i]->name.empty() || mynodes[i]->name.back() != ']')
2307 mynodes[i]->name += "[&CF=" + tmp.str() + "]";
2308 else
2309 mynodes[i]->name = mynodes[i]->name.substr(0, mynodes[i]->name.length()-1) + ",!CF=" + tmp.str() + "]";
2310 } else {
2311 if (!mynodes[i]->name.empty())
2312 mynodes[i]->name.append("/");
2313 mynodes[i]->name.append(tmp.str());
2314 }
2315 if (verbose_mode >= VB_MED) {
2316 cout << mynodes[i]->name << " " << occurence_trees[i] << endl;
2317 }
2318 }
2319 for (vector<Split*>::reverse_iterator it = subtrees.rbegin(); it != subtrees.rend(); it++)
2320 delete (*it);
2321 }
2322 */
2323
computeRFDist(const char * trees_file,DoubleVector & dist,int assign_sup)2324 void MTree::computeRFDist(const char *trees_file, DoubleVector &dist, int assign_sup) {
2325 cout << "Reading input trees file " << trees_file << endl;
2326 try {
2327 ifstream in;
2328 in.exceptions(ios::failbit | ios::badbit);
2329 in.open(trees_file);
2330 computeRFDist(in, dist, assign_sup);
2331 in.close();
2332 } catch (ios::failure) {
2333 outError(ERR_READ_INPUT, trees_file);
2334 }
2335 }
2336
computeRFDist(istream & in,DoubleVector & dist,int assign_sup,bool one_tree)2337 void MTree::computeRFDist(istream &in, DoubleVector &dist, int assign_sup, bool one_tree) {
2338 SplitGraph mysg;
2339 NodeVector nodes;
2340 convertSplits(mysg, &nodes, root->neighbors[0]->node);
2341 StringIntMap name_index;
2342 int ntrees, taxid;
2343 for (taxid = 0; taxid < mysg.getNTaxa(); taxid++)
2344 name_index[mysg.getTaxa()->GetTaxonLabel(taxid)] = taxid;
2345 NodeVector::iterator nit;
2346 if (assign_sup)
2347 for (nit = nodes.begin(); nit != nodes.end(); nit++)
2348 (*nit)->height = 0.0;
2349
2350 SplitGraph::iterator sit;
2351 for (sit = mysg.begin(); sit != mysg.end(); sit++)
2352 (*sit)->setWeight(0.0);
2353 for (ntrees = 1; !in.eof(); ntrees++) {
2354 MTree tree;
2355 bool is_rooted = false;
2356
2357 // read in the tree and convert into split system for indexing
2358 tree.readTree(in, is_rooted);
2359 if (verbose_mode >= VB_DEBUG)
2360 cout << ntrees << " " << endl;
2361 StrVector taxname;
2362 tree.getTaxaName(taxname);
2363 // create the map from taxa between 2 trees
2364 Split taxa_mask(leafNum);
2365 for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
2366 if (name_index.find(*it) == name_index.end())
2367 outError("Taxon not found in full tree: ", *it);
2368 taxid = name_index[*it];
2369 taxa_mask.addTaxon(taxid);
2370 }
2371 // make the taxa ordering right before converting to split system
2372 taxname.clear();
2373 int smallid;
2374 for (taxid = 0, smallid = 0; taxid < leafNum; taxid++)
2375 if (taxa_mask.containTaxon(taxid)) {
2376 taxname.push_back(mysg.getTaxa()->GetTaxonLabel(taxid));
2377 string name = (string)mysg.getTaxa()->GetTaxonLabel(taxid);
2378 tree.findLeafName(name)->id = smallid++;
2379 }
2380 ASSERT(taxname.size() == tree.leafNum);
2381
2382 SplitGraph sg;
2383 //NodeVector nodes;
2384 tree.convertSplits(sg);
2385 SplitIntMap hash_ss;
2386 for (sit = sg.begin(); sit != sg.end(); sit++)
2387 hash_ss.insertSplit((*sit), 1);
2388
2389 // now scan through all splits in current tree
2390 int common_splits = 0;
2391 for (sit = mysg.begin(); sit != mysg.end(); sit++)
2392 if ((*sit)->trivial() < 0) // it is an internal split
2393 {
2394
2395 Split *subsp = (*sit)->extractSubSplit(taxa_mask);
2396 if (subsp->shouldInvert())
2397 subsp->invert();
2398 Split *sp = hash_ss.findSplit(subsp);
2399 if (sp) {
2400 common_splits++;
2401 //(*sit)->setWeight((*sit)->getWeight()+1.0);
2402 if (verbose_mode >= VB_MAX) {
2403 for (taxid = 0; taxid < (*sit)->getNTaxa(); taxid++)
2404 if ((*sit)->containTaxon(taxid))
2405 cout << " " << mysg.getTaxa()->GetTaxonLabel(taxid);
2406 cout << " --> ";
2407 for (taxid = 0; taxid < sp->getNTaxa(); taxid++)
2408 if (sp->containTaxon(taxid))
2409 cout << " " << taxname[taxid];
2410 cout << endl;
2411 }
2412 if (assign_sup && subsp->trivial() < 0)
2413 nodes[sit-mysg.begin()]->height++;
2414 }
2415 delete subsp;
2416 }
2417
2418 //cout << "common_splits = " << common_splits << endl;
2419 double max_dist = branchNum-leafNum + tree.branchNum-tree.leafNum;
2420 double rf_val = max_dist - 2*common_splits;
2421 if (Params::getInstance().normalize_tree_dist) {
2422 rf_val = rf_val / max_dist;
2423 }
2424 dist.push_back(rf_val);
2425 char ch;
2426 in.exceptions(ios::goodbit);
2427 (in) >> ch;
2428 if (in.eof()) break;
2429 in.unget();
2430 in.exceptions(ios::failbit | ios::badbit);
2431
2432 if (one_tree)
2433 break;
2434
2435 }
2436 if (assign_sup)
2437 for (nit = nodes.begin(); nit != nodes.end(); nit++)
2438 if (!(*nit)->isLeaf())
2439 (*nit)->name = convertIntToString((*nit)->height);
2440
2441 // cout << ntrees << " trees read" << endl;
2442
2443
2444 }
2445
reportDisagreedTrees(vector<string> & taxname,MTreeSet & trees,Split & mysplit)2446 void MTree::reportDisagreedTrees(vector<string> &taxname, MTreeSet &trees, Split &mysplit) {
2447 for (MTreeSet::iterator it = trees.begin(); it != trees.end(); it++) {
2448 MTree *tree = (*it);
2449 SplitGraph sg;
2450 tree->convertSplits(taxname, sg);
2451 if (!sg.containSplit(mysplit)) {
2452 tree->printTree(cout, 0); // don't print branch lengths
2453 cout << endl;
2454 }
2455 }
2456 }
2457
2458
createBootstrapSupport(vector<string> & taxname,MTreeSet & trees,SplitIntMap & hash_ss,char * tag,Node * node,Node * dad)2459 void MTree::createBootstrapSupport(vector<string> &taxname, MTreeSet &trees, SplitIntMap &hash_ss,
2460 char *tag, Node *node, Node *dad) {
2461 if (!node) node = root;
2462 FOR_NEIGHBOR_IT(node, dad, it) {
2463 if (!node->isLeaf() && !(*it)->node->isLeaf()) {
2464 vector<int> taxa;
2465 getTaxaID(taxa, (*it)->node, node);
2466 Split mysplit(leafNum, 0.0, taxa);
2467 if (mysplit.shouldInvert())
2468 mysplit.invert();
2469 //mysplit.report(cout);
2470 //SplitIntMap::iterator ass_it = hash_ss.find(&mysplit);
2471 Split *sp = hash_ss.findSplit(&mysplit);
2472 // if found smt
2473 if (sp != NULL) {
2474 //Split *sp = ass_it->first;
2475 /*char tmp[100];
2476 if ((*it)->node->name.empty()) {
2477 sprintf(tmp, "%d", round(sp->getWeight()));
2478 } else
2479 sprintf(tmp, "/%d", round(sp->getWeight()));*/
2480 stringstream tmp;
2481 if ((*it)->node->name.empty())
2482 tmp << sp->getWeight();
2483 else
2484 tmp << "/" << sp->getWeight();
2485
2486 // assign tag
2487 if (tag && (iEquals(tag, "ALL") || (*it)->node->name == tag))
2488 tmp << sp->getName();
2489 (*it)->node->name.append(tmp.str());
2490 } else {
2491 if (!(*it)->node->name.empty()) (*it)->node->name.append("/");
2492 (*it)->node->name.append("0");
2493 if (verbose_mode >= VB_MED) {
2494 cout << "split not found:" << endl;
2495 mysplit.report(cout);
2496 }
2497 }
2498 /* new stuff: report trees that do not contain the split */
2499 if (strncmp((*it)->node->name.c_str(), "INFO", 4) == 0) {
2500 cout << "Reporting trees not containing the split " << (*it)->node->name << endl;
2501 reportDisagreedTrees(taxname, trees, mysplit);
2502 }
2503 }
2504 createBootstrapSupport(taxname, trees, hash_ss, tag, (*it)->node, node);
2505 }
2506 }
2507
removeNode(Node * dad,Node * node)2508 void MTree::removeNode(Node *dad, Node *node) {
2509 // Node *child = (*it)->node;
2510 bool first = true;
2511 FOR_NEIGHBOR_IT(node, dad, it2) {
2512 if (first)
2513 dad->updateNeighbor(node, (*it2)->node, (*it2)->length);
2514 else
2515 dad->addNeighbor((*it2)->node, (*it2)->length);
2516 (*it2)->node->updateNeighbor(node, dad);
2517 first = false;
2518 }
2519 delete node;
2520
2521 // Node *child = (*it)->node;
2522 // bool first = true;
2523 // FOR_NEIGHBOR_IT(child, node, it2) {
2524 // if (first)
2525 // node->updateNeighbor(child, (*it2)->node, (*it2)->length);
2526 // else
2527 // node->addNeighbor((*it2)->node, (*it2)->length);
2528 // (*it2)->node->updateNeighbor(child, node);
2529 // first = false;
2530 // }
2531 // delete child;
2532 }
2533
2534
collapseZeroBranches(Node * node,Node * dad,double threshold)2535 int MTree::collapseZeroBranches(Node *node, Node *dad, double threshold) {
2536 if (!node) node = root;
2537 int count = 0;
2538 FOR_NEIGHBOR_DECLARE(node, dad, it) {
2539 count += collapseZeroBranches((*it)->node, node, threshold);
2540 }
2541 NeighborVec nei_vec;
2542 nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
2543 for (it = nei_vec.begin(); it != nei_vec.end(); it++)
2544 if ((*it)->node != dad && (*it)->length <= threshold) {
2545 // delete the child node
2546 removeNode(node, (*it)->node);
2547 count++;
2548 }
2549 return count;
2550 }
2551
collapseInternalBranches(Node * node,Node * dad,double threshold)2552 int MTree::collapseInternalBranches(Node *node, Node *dad, double threshold) {
2553 if (!node) node = root;
2554 int count = 0;
2555 FOR_NEIGHBOR_DECLARE(node, dad, it) {
2556 count += collapseInternalBranches((*it)->node, node, threshold);
2557 }
2558 if (node->isLeaf())
2559 return count;
2560 NeighborVec nei_vec;
2561 nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
2562 for (it = nei_vec.begin(); it != nei_vec.end(); it++)
2563 if ((*it)->node != dad && !(*it)->node->isLeaf() && (*it)->length <= threshold) {
2564 // delete the child node
2565 removeNode(node, (*it)->node);
2566 count++;
2567 }
2568 return count;
2569 }
2570
insertTaxa(StrVector & new_taxa,StrVector & existing_taxa)2571 void MTree::insertTaxa(StrVector &new_taxa, StrVector &existing_taxa) {
2572 if (new_taxa.empty()) return;
2573 IntVector id;
2574 int i;
2575 id.resize(new_taxa.size());
2576 for (i = 0; i < id.size(); i++)
2577 id[i] = i;
2578 // randomize order before reinsert back into tree
2579 my_random_shuffle(id.begin(), id.end());
2580
2581 for (int i = 0; i < new_taxa.size(); i++) {
2582 Node *old_taxon = findLeafName(existing_taxa[id[i]]);
2583 ASSERT(old_taxon);
2584 double len = old_taxon->neighbors[0]->length;
2585 Node *old_node = old_taxon->neighbors[0]->node;
2586 Node *new_taxon = newNode(leafNum+i, new_taxa[id[i]].c_str());
2587 Node *new_node = newNode();
2588 // link new_taxon - new_node
2589 new_taxon->addNeighbor(new_node, 0.0);
2590 new_node->addNeighbor(new_taxon, 0.0);
2591 // link old_taxon - new_node
2592 new_node->addNeighbor(old_taxon, 0.0);
2593 old_taxon->updateNeighbor(old_node, new_node, 0.0);
2594 // link old_node - new_node
2595 new_node->addNeighbor(old_node, len);
2596 old_node->updateNeighbor(old_taxon, new_node, len);
2597 }
2598
2599 leafNum = leafNum + new_taxa.size();
2600 initializeTree();
2601 }
2602
findFirstTaxon(Node * node,Node * dad)2603 Node *MTree::findFirstTaxon(Node *node, Node *dad) {
2604 if (!node) node = root;
2605 // Node *next;
2606 for (int i = 0; i < nodeNum; i++)
2607 FOR_NEIGHBOR_IT(node, dad, it) {
2608 if ((*it)->node->isLeaf()) return (*it)->node;
2609 dad = node;
2610 node = (*it)->node;
2611 break;
2612 }
2613 return NULL;
2614 }
2615
removeTaxa(StrVector & taxa_names)2616 int MTree::removeTaxa(StrVector &taxa_names) {
2617 if (taxa_names.empty())
2618 return 0;
2619 int count = 0;
2620 for (StrVector::iterator sit = taxa_names.begin(); sit != taxa_names.end(); sit++) {
2621 Node *node = findLeafName(*sit);
2622 if (!node) continue;
2623 count++;
2624 // if (!node)
2625 // outError((string)"Taxon " + (*sit) + " does not appear in the tree");
2626 if (node == root)
2627 { // find another root
2628 root = findFirstTaxon(root);
2629 }
2630
2631 Node *innode = node->neighbors[0]->node;
2632 Node *othernodes[2] = { NULL, NULL };
2633 int i;
2634 double length = 0;
2635
2636 bool should_merge = true;
2637
2638 FOR_NEIGHBOR_DECLARE(innode, node, it) {
2639 length += (*it)->length;
2640 if (othernodes[0] == NULL)
2641 othernodes[0] = (*it)->node;
2642 else if (othernodes[1] == NULL)
2643 othernodes[1] = (*it)->node;
2644 else
2645 should_merge = false;
2646 }
2647
2648 if (should_merge)
2649 {
2650 // merge two branches
2651 for (i = 0; i < 2; i++)
2652 for (it = othernodes[i]->neighbors.begin(); it != othernodes[i]->neighbors.end(); it++)
2653 if ((*it)->node == innode)
2654 {
2655 (*it)->node = othernodes[1-i];
2656 (*it)->length = length;
2657 }
2658 delete innode;
2659 } else {
2660 // simple delete the neighbor of innode
2661 for (it = innode->neighbors.begin(); it != innode->neighbors.end(); it++)
2662 if ((*it)->node == node) {
2663 innode->neighbors.erase(it);
2664 break;
2665 }
2666 }
2667 delete node;
2668 }
2669
2670 if (!count) return 0;
2671
2672 NodeVector taxa;
2673 getTaxa(taxa);
2674 ASSERT(taxa.size() > 0);
2675 // reassign taxon IDs
2676 int id = 0;
2677 for (NodeVector::iterator nit = taxa.begin(); nit != taxa.end(); nit++)
2678 if (*nit == root && rooted)
2679 (*nit)->id = taxa.size()-1;
2680 else
2681 (*nit)->id = id++;
2682 leafNum = taxa.size();
2683 initializeTree();
2684 return count;
2685 }
2686
getSplits(SplitGraph & splits,Node * node,Node * dad)2687 void MTree::getSplits(SplitGraph &splits, Node* node, Node* dad) {
2688 if (!node) {
2689 node = root;
2690 }
2691 FOR_NEIGHBOR_IT(node, dad, it) {
2692 getSplits(splits, (*it)->node, node);
2693 Split* mySplit = new Split(*((*it)->split));
2694 if (mySplit->shouldInvert())
2695 mySplit->invert();
2696 splits.push_back(mySplit);
2697 }
2698 }
2699
buildNodeSplit(Split * resp,Node * node,Node * dad)2700 void MTree::buildNodeSplit(Split *resp, Node *node, Node *dad) {
2701 if (!node) {
2702 node = root;
2703 // The neighbor that represents root
2704 Neighbor* rootNei = root->neighbors[0]->node->findNeighbor(root);
2705 if (rootNei->split == NULL) {
2706 rootNei->split = new Split(leafNum);
2707 } else {
2708 delete rootNei->split;
2709 rootNei->split = new Split(leafNum);
2710 }
2711 resp = rootNei->split;
2712 }
2713 bool has_child = false;
2714 FOR_NEIGHBOR_IT(node, dad, it) {
2715 if ((*it)->split == NULL) {
2716 (*it)->split = new Split(leafNum);
2717 } else {
2718 delete (*it)->split;
2719 (*it)->split = new Split(leafNum);
2720 }
2721 buildNodeSplit((*it)->split, (*it)->node, node);
2722 //(*it)->split->report(cout);
2723 *resp += *((*it)->split);
2724 has_child = true;
2725 }
2726
2727 if (dad != NULL) {
2728 Neighbor* dadNei = node->findNeighbor(dad);
2729 dadNei->split = new Split(*resp);
2730 dadNei->split->invert();
2731 }
2732
2733 if (!has_child) {
2734 resp->addTaxon(node->id);
2735 }
2736 }
2737
initializeSplitMap(Split * resp,Node * node,Node * dad)2738 void MTree::initializeSplitMap(Split *resp, Node *node, Node *dad) {
2739 if (!node) node = root;
2740 if (!resp) {
2741 resp = new Split(leafNum);
2742 }
2743 bool has_child = false;
2744 FOR_NEIGHBOR_IT(node, dad, it) {
2745 Split *sp = new Split(leafNum);
2746 initializeSplitMap(sp, (*it)->node, node);
2747 *resp += *sp;
2748 if (sp->shouldInvert())
2749 sp->invert();
2750 /* ignore nodes with degree of 2 because such split will be added before */
2751 if (node->degree() != 2) {
2752 Branch curBranch((*it)->node, node);
2753 splitBranchMap.insert(make_pair(sp, curBranch));
2754 }
2755 has_child = true;
2756 }
2757 if (!has_child) {
2758 resp->addTaxon(node->id);
2759 }
2760 }
2761
findFarthestLeaf(Node * node,Node * dad)2762 Node *MTree::findFarthestLeaf(Node *node, Node *dad) {
2763 if (rooted) // special treatment for rooted tree
2764 return root;
2765 if (!node)
2766 node = root;
2767
2768 if (dad && node->isLeaf()) {
2769 node->height = 0.0;
2770 return node;
2771 }
2772 Node *res = NULL;
2773 node->height = 0.0;
2774 FOR_NEIGHBOR_IT(node, dad, it) {
2775 Node *leaf = findFarthestLeaf((*it)->node, node);
2776 if (node->height < (*it)->node->height+1) {
2777 node->height = (*it)->node->height+1;
2778 res = leaf;
2779 }
2780 }
2781 return res;
2782 }
2783
getPreOrderBranches(NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad)2784 void MTree::getPreOrderBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
2785 if (dad) {
2786 nodes.push_back(node);
2787 nodes2.push_back(dad);
2788 }
2789
2790 NeighborVec neivec = node->neighbors;
2791 NeighborVec::iterator i1, i2;
2792 for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
2793 for (i2 = i1+1; i2 != neivec.end(); i2++)
2794 if ((*i1)->node->height > (*i2)->node->height) {
2795 Neighbor *nei = *i1;
2796 *i1 = *i2;
2797 *i2 = nei;
2798 }
2799 for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
2800 if ((*i1)->node != dad)
2801 getPreOrderBranches(nodes, nodes2, (*i1)->node, node);
2802 // FOR_NEIGHBOR_IT(node, dad, it)
2803 // getPreOrderBranches(nodes, nodes2, (*it)->node, node);
2804 }
2805