1 /***************************************************************************
2  *   Copyright (C) 2006 by BUI Quang Minh, Steffen Klaere, Arndt von Haeseler   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "mtree.h"
21 #include <iostream>
22 //#include <fstream>
23 #include <iterator>
24 //#include <mtree.h>
25 #include "pda/splitgraph.h"
26 #include "utils/tools.h"
27 #include "mtreeset.h"
28 using namespace std;
29 
30 /*********************************************
31 	class MTree
32 *********************************************/
33 
MTree()34 MTree::MTree() {
35     root = NULL;
36     leafNum = 0;
37     nodeNum = 0;
38     rooted = false;
39     if (Params::getInstance().min_branch_length <= 0)
40         num_precision = 6;
41     else
42         num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
43     len_scale = 1.0;
44 	fig_char = "|-+++";
45 }
46 
MTree(const char * userTreeFile,bool & is_rooted)47 MTree::MTree(const char *userTreeFile, bool &is_rooted)
48 {
49     init(userTreeFile, is_rooted);
50 }
51 
init(const char * userTreeFile,bool & is_rooted)52 void MTree::init(const char *userTreeFile, bool &is_rooted) {
53     if (Params::getInstance().min_branch_length <= 0)
54         num_precision = 6;
55     else
56         num_precision = max((int)ceil(-log10(Params::getInstance().min_branch_length))+1, 6);
57     len_scale = 1.0;
58     readTree(userTreeFile, is_rooted);
59     //printInfo();
60 	fig_char = "|-+++";
61 }
62 
63 
64 /**
65 	constructor, get from another tree
66 */
MTree(MTree & tree)67 MTree::MTree(MTree &tree) {
68     init(tree);
69 }
70 
MTree(string & treeString,vector<string> & taxaNames,bool isRooted)71 MTree::MTree(string& treeString, vector<string>& taxaNames, bool isRooted) {
72     stringstream str;
73     str << treeString;
74     str.seekg(0, ios::beg);
75     readTree(str, isRooted);
76     assignIDs(taxaNames);
77     assignLeafID();
78 }
79 
MTree(string & treeString,bool isRooted)80 MTree::MTree(string& treeString, bool isRooted) {
81     stringstream str;
82     str << treeString;
83     str.seekg(0, ios::beg);
84     readTree(str, isRooted);
85     assignLeafID();
86 }
87 
init(MTree & tree)88 void MTree::init(MTree &tree) {
89     root = tree.root;
90     leafNum = tree.leafNum;
91     nodeNum = tree.nodeNum;
92     rooted = tree.rooted;
93     //userFile = tree.userFile;
94     // have to delete the root when exchange to another object
95     tree.root = NULL;
96     num_precision = tree.num_precision;
97     len_scale = tree.len_scale;
98     fig_char = tree.fig_char;
99 }
100 
assignIDs(vector<string> & taxaNames)101 void MTree::assignIDs(vector<string>& taxaNames) {
102     bool err = false;
103     int nseq = taxaNames.size();
104     for (int seq = 0; seq < nseq; seq++) {
105         string seq_name = taxaNames[seq];
106         Node *node = findLeafName(seq_name);
107         if (!node) {
108             string str = "Sequence ";
109             str += seq_name;
110             str += " does not appear in the tree";
111             err = true;
112             outError(str, false);
113         } else {
114             ASSERT(node->isLeaf());
115             node->id = seq;
116         }
117     }
118     StrVector taxname;
119     getTaxaName(taxname);
120     for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
121         bool foundTaxa = false;
122         for (vector<string>::iterator it2 = taxaNames.begin(); it2 != taxaNames.end(); it2++) {
123             if ( *it == *it2 ) {
124                 foundTaxa = true;
125                 break;
126             }
127         }
128         if (!foundTaxa) {
129             outError((string) "Tree taxon " + (*it) + " does not appear in the input taxa names", false);
130             err = true;
131         }
132     }
133     if (err) outError("Tree taxa and input taxa names do not match (see above)");
134 }
135 
copyTree(MTree * tree)136 void MTree::copyTree(MTree *tree) {
137     if (root) freeNode();
138     stringstream ss;
139     tree->printTree(ss);
140     readTree(ss, tree->rooted);
141     rooted = tree->rooted;
142 }
143 
copyTree(MTree * tree,string & taxa_set)144 void MTree::copyTree(MTree *tree, string &taxa_set) {
145     rooted = tree->rooted;
146     if (rooted) {
147         ASSERT(tree->rooted);
148         taxa_set.push_back(1);
149     }
150 //    if (tree->leafNum != taxa_set.length())
151 //        outError("#leaves and taxa_set do not match!");
152     leafNum = nodeNum = branchNum = 0;
153     for (string::iterator it = taxa_set.begin(); it != taxa_set.end(); it++)
154         nodeNum += (*it);
155     double new_len;
156     if (root) freeNode();
157     root = NULL;
158     root = copyTree(tree, taxa_set, new_len);
159     if (rooted)
160         ASSERT(root->name == ROOT_NAME);
161 }
162 
copyTree(MTree * tree,string & taxa_set,double & len,Node * node,Node * dad)163 Node* MTree::copyTree(MTree *tree, string &taxa_set, double &len, Node *node, Node *dad) {
164     if (!node) {
165         if (taxa_set[tree->root->id]) {
166             node = tree->root;
167         } else {
168             for (int i = 0; i < tree->leafNum; i++)
169                 if (taxa_set[i]) {
170                     node = tree->findNodeID(i);
171                     break;
172                 }
173         }
174     }
175     Node *new_node = NULL;
176     NodeVector new_nodes;
177     DoubleVector new_lens;
178     if (node->isLeaf()) {
179         len = 0.0;
180         if (taxa_set[node->id]) {
181             new_node = newNode(leafNum++, node->name.c_str());
182         }
183         if (dad) return new_node;
184     }
185     if (new_node) {
186         new_nodes.push_back(new_node);
187         new_lens.push_back(len);
188     }
189     FOR_NEIGHBOR_IT(node, dad, it) {
190         double new_len;
191         new_node = copyTree(tree, taxa_set, new_len, (*it)->node, node);
192         if (new_node) {
193             new_nodes.push_back(new_node);
194             new_lens.push_back((*it)->length + new_len);
195         }
196     }
197     if (new_nodes.empty()) return NULL;
198     if (new_nodes.size() == 1) {
199         len = new_lens[0];
200         return new_nodes[0];
201     }
202     if (!dad && new_nodes.size() == 2) {
203         double sum_len = new_lens[0] + new_lens[1];
204         new_nodes[0]->addNeighbor(new_nodes[1], sum_len, branchNum);
205         new_nodes[1]->addNeighbor(new_nodes[0], sum_len, branchNum);
206         branchNum++;
207         return new_nodes[0];
208     }
209     Node* int_node = newNode(nodeNum++, node->name.c_str());
210     len = 0.0;
211     for (int i = 0; i < new_nodes.size(); i++) {
212         int_node->addNeighbor(new_nodes[i], new_lens[i], branchNum);
213         new_nodes[i]->addNeighbor(int_node, new_lens[i], branchNum);
214         branchNum++;
215     }
216     return int_node;
217 }
218 
extractBifurcatingSubTree(Node * node,Node * dad)219 void MTree::extractBifurcatingSubTree(Node *node, Node *dad) {
220     if (!node) node = root;
221     if (node->degree() > 3) {
222         int id1, id2, id3;
223         id1 = node->findNeighborIt(dad) - node->neighbors.begin();
224         do {
225             id2 = random_int(node->degree());
226         } while (id2 == id1);
227 
228         // make sure that id1 < id2
229         if (id1 > id2) {
230             int tmp = id1;
231             id1 = id2;
232             id2 = tmp;
233         }
234         do {
235             id3 = random_int(node->degree());
236         } while (id3 == id1 || id3 == id2);
237         //make sure that id1 < id2 < id3
238         if (id3 < id2) {
239             if (id3 < id1) {
240                 // id3 < id1 < id2
241                 int tmp = id1;
242                 id1 = id3;
243                 id3 = id2;
244                 id2 = tmp;
245             } else {
246                 // id1 < id3 < id2
247                 int tmp = id2;
248                 id2 = id3;
249                 id3 = tmp;
250             }
251         }
252         // remove all neighbors except id1, id2, id3
253         for (int i = 0; i != node->neighbors.size(); i++)
254             if (i != id1 && i != id2 && i != id3) {
255                 freeNode(node->neighbors[i]->node, node);
256                 delete node->neighbors[i];
257             }
258         node->neighbors[0] = node->neighbors[id1];
259         node->neighbors[1] = node->neighbors[id2];
260         node->neighbors[2] = node->neighbors[id3];
261         node->neighbors.erase(node->neighbors.begin()+3, node->neighbors.end());
262     }
263     FOR_NEIGHBOR_IT(node, dad, it) {
264         if (!(*it)->node->isLeaf())
265             extractBifurcatingSubTree((*it)->node, node);
266     }
267 }
268 
resolveMultifurcation()269 void MTree::resolveMultifurcation() {
270     // randomly resolve multifurcating node
271 
272     NodeVector nodes;
273     getInternalNodes(nodes);
274     for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++)
275         while ((*it)->degree() > 3) {
276             Node *new_node = newNode();
277             int id1 = random_int((*it)->degree());
278             int id2;
279             do {
280                 id2 = random_int((*it)->degree());
281             } while (id2 == id1);
282 
283             // make sure that id1 < id2
284             if (id1 > id2) {
285                 int tmp = id1;
286                 id1 = id2;
287                 id2 = tmp;
288             }
289             Neighbor *nei1 = (*it)->neighbors[id1];
290             Neighbor *nei2 = (*it)->neighbors[id2];
291 
292             // connect id1 with new_node
293             nei1->node->updateNeighbor((*it), new_node);
294             new_node->neighbors.push_back(nei1);
295 
296             // connect id2 with new_node
297             nei2->node->updateNeighbor((*it), new_node);
298             new_node->neighbors.push_back(nei2);
299 
300             // connect new_node with old node
301             new_node->addNeighbor((*it), -1.0);
302             (*it)->neighbors.erase((*it)->neighbors.begin() + id2);
303             (*it)->neighbors.erase((*it)->neighbors.begin() + id1);
304             (*it)->addNeighbor(new_node, -1.0);
305         }
306 }
307 
newNode(int node_id,const char * node_name)308 Node* MTree::newNode(int node_id, const char* node_name) {
309     return new Node(node_id, node_name);
310 }
311 
newNode(int node_id,int node_name)312 Node* MTree::newNode(int node_id, int node_name) {
313     return new Node(node_id, node_name);
314 }
315 
isBifurcating(Node * node,Node * dad)316 bool MTree::isBifurcating(Node *node, Node *dad) {
317 	if (!node) node = root;
318 	if (!node->isLeaf() && node->degree() != 3) return false;
319 	FOR_NEIGHBOR_IT(node, dad, it) {
320 		if (!(*it)->node->isLeaf() && (*it)->node->degree() != 3) return false;
321 		if (!isBifurcating((*it)->node, node)) return false;
322 	}
323 	return true;
324 }
325 
printBranchLengths(ostream & out,Node * node,Node * dad)326 void MTree::printBranchLengths(ostream &out, Node *node, Node *dad)
327 {
328     if (node == NULL) {
329     	node = root;
330     	sortTaxa();
331     }
332     FOR_NEIGHBOR_IT(node, dad, it) {
333         if (node->name != "") out << node->name; else out << node->id;
334         out << "\t";
335         if ((*it)->node->name != "") out << (*it)->node->name; else out << (*it)->node->id;
336         out << "\t" << (*it)->length << endl;
337         printBranchLengths(out, (*it)->node, node);
338     }
339 }
340 
countZeroBranches(Node * node,Node * dad,double epsilon)341 int MTree::countZeroBranches(Node *node, Node *dad, double epsilon) {
342     int count = 0;
343     if (node == NULL) node = root;
344     FOR_NEIGHBOR_IT(node, dad, it) {
345         if ((*it)->length <= epsilon) count++;
346         count += countZeroBranches((*it)->node, node, epsilon);
347     }
348     return count;
349 }
350 
countZeroInternalBranches(Node * node,Node * dad,double epsilon)351 int MTree::countZeroInternalBranches(Node *node, Node *dad, double epsilon) {
352     int count = 0;
353     if (node == NULL) node = root;
354     FOR_NEIGHBOR_IT(node, dad, it) {
355         if ((*it)->length <= epsilon && !(*it)->node->isLeaf() && !node->isLeaf()) count++;
356         count += countZeroInternalBranches((*it)->node, node, epsilon);
357     }
358     return count;
359 
360 }
361 
countLongBranches(Node * node,Node * dad,double upper_limit)362 int MTree::countLongBranches(Node *node, Node *dad, double upper_limit) {
363     int count = 0;
364     if (node == NULL) node = root;
365     FOR_NEIGHBOR_IT(node, dad, it) {
366         if ((*it)->length >= upper_limit) count++;
367         count += countLongBranches((*it)->node, node, upper_limit);
368     }
369     return count;
370 }
371 
372 
printTree(const char * ofile,int brtype)373 void MTree::printTree(const char *ofile, int brtype)
374 {
375     try {
376         ofstream out;
377         out.exceptions(ios::failbit | ios::badbit);
378         if (brtype & WT_APPEND)
379             out.open(ofile, ios_base::out | ios_base::app);
380         else
381             out.open(ofile);
382         printTree(out, brtype);
383         out.close();
384         if (verbose_mode >= VB_DEBUG)
385             cout << "Tree was printed to " << ofile << endl;
386     } catch (ios::failure) {
387         outError(ERR_WRITE_OUTPUT, ofile);
388     }
389 }
390 
printNexus(string ofile,int brtype,string nexus_comment)391 void MTree::printNexus(string ofile, int brtype, string nexus_comment)
392 {
393     try {
394         ofstream out;
395         out.exceptions(ios::failbit | ios::badbit);
396         if (brtype & WT_APPEND)
397             out.open(ofile, ios_base::out | ios_base::app);
398         else
399             out.open(ofile);
400         out << "#NEXUS" << endl;
401         if (!nexus_comment.empty())
402             out << "[ " << nexus_comment << " ]" << endl;
403         out << "begin trees;" << endl;
404         out << "  tree tree_1 = ";
405         printTree(out, brtype | WT_BR_ATTR);
406         out << endl;
407         out << "end;" << endl;
408         out.close();
409         if (verbose_mode >= VB_DEBUG)
410             cout << "Tree was printed to " << ofile << endl;
411     } catch (ios::failure) {
412         outError(ERR_WRITE_OUTPUT, ofile);
413     }
414 }
415 
printTree(ostream & out,int brtype)416 void MTree::printTree(ostream &out, int brtype) {
417     if (root->isLeaf()) {
418         if (root->neighbors[0]->node->isLeaf()) {
419             // tree has only 2 taxa!
420             out << "(";
421             printTree(out, brtype, root);
422             out << ",";
423             if (brtype & WT_TAXON_ID)
424                 out << root->neighbors[0]->node->id;
425             else
426                 out << root->neighbors[0]->node->name;
427 
428             if (brtype & WT_BR_LEN)
429                 out << ":0";
430             out << ")";
431         } else
432             // tree has more than 2 taxa
433             printTree(out, brtype, root->neighbors[0]->node);
434     } else
435         printTree(out, brtype, root);
436 
437     out << ";";
438     if (brtype & WT_NEWLINE) out << endl;
439 }
440 
441 struct IntString {
442     int id;
443     string str;
444 };
445 
446 /**
447 	nodecmp, for pruning algorithm
448 */
449 struct IntStringCmp
450 {
451     /**
452     	nodecmp, for pruning algorithm
453     */
operator ()IntStringCmp454     bool operator()(const IntString* s1, const IntString* s2) const
455     {
456         return (s1->id) < (s2->id);
457     }
458 };
459 
460 typedef set<IntString*, IntStringCmp> IntStringSet;
461 
printBranchLength(ostream & out,int brtype,bool print_slash,Neighbor * length_nei)462 void MTree::printBranchLength(ostream &out, int brtype, bool print_slash, Neighbor *length_nei) {
463     if (length_nei->length == -1.0)
464         return; // NA branch length
465     int prec = 10;
466 	double length = length_nei->length;
467     if (brtype & WT_BR_SCALE) length *= len_scale;
468     if (brtype & WT_BR_LEN_SHORT) prec = 6;
469     if (brtype & WT_BR_LEN_ROUNDING) length = round(length);
470     out.precision(prec);
471     if ((brtype & WT_BR_ATTR) && !length_nei->attributes.empty()) {
472         // print branch attributes
473         out << "[&";
474         bool first = true;
475         for (auto attr : length_nei->attributes) {
476             if (!first)
477                 out << ",";
478             out << attr.first << "=\"" << attr.second << '"';
479             first = false;
480         }
481         out << "]";
482     }
483 
484     if (brtype & WT_BR_LEN) {
485         if (brtype & WT_BR_LEN_FIXED_WIDTH)
486             out << ":" << fixed << length;
487         else
488             out << ":" << length;
489     } else if (brtype & WT_BR_CLADE && length_nei->node->name != ROOT_NAME) {
490     	if (print_slash)
491     		out << "/";
492         out << length;
493     }
494 }
495 
printTree(ostream & out,int brtype,Node * node,Node * dad)496 int MTree::printTree(ostream &out, int brtype, Node *node, Node *dad)
497 {
498     int smallest_taxid = leafNum;
499     out.precision(num_precision);
500     if (!node) node = root;
501     if (node->isLeaf()) {
502         smallest_taxid = node->id;
503         if (brtype & WT_TAXON_ID)
504             out << node->id;
505         else
506             out << node->name;
507 
508         if (brtype & WT_BR_LEN) {
509         	out.setf( std::ios::fixed, std:: ios::floatfield ); // some sofware does handle number format like '1.234e-6'
510 //            out.precision(10); // increase precision to avoid zero branch (like in RAxML)
511             printBranchLength(out, brtype, false, node->neighbors[0]);
512 //        	double len = node->neighbors[0]->length;
513 //            if (brtype & WT_BR_SCALE) len *= len_scale;
514 //            if (brtype & WT_BR_LEN_ROUNDING) len = round(len);
515 //            if (brtype & WT_BR_LEN_FIXED_WIDTH)
516 //                out << ":" << fixed << len;
517 //            else
518 //                out << ":" << len;
519         }
520     } else {
521         // internal node
522         out << "(";
523         bool first = true;
524         Neighbor *length_nei = NULL;
525         //for (int i = 0; i < node->neighbors.size(); i++)
526         //if (node->neighbors[i]->node != dad)
527         if (! (brtype & WT_SORT_TAXA)) {
528             FOR_NEIGHBOR_IT(node, dad, it) {
529                 if ((*it)->node->name != ROOT_NAME) {
530                     if (!first)
531                         out << ",";
532                     int taxid = printTree(out, brtype, (*it)->node, node);
533                     if (taxid < smallest_taxid) smallest_taxid = taxid;
534                     first = false;
535                 } else
536                     length_nei = (*it);
537             } else {
538                 length_nei = (*it);
539             }
540         } else {
541             IntStringSet strout;
542             FOR_NEIGHBOR_IT(node, dad, it) {
543                 if ((*it)->node->name != ROOT_NAME) {
544                     ostringstream ss;
545                     IntString *str = new IntString;
546                     str->id = printTree(ss, brtype, (*it)->node, node);
547                     //ss.flush();
548                     str->str = ss.str();
549                     strout.insert(str);
550                 } else
551                 	length_nei = (*it);
552             } else {
553             	length_nei = (*it);
554             }
555             smallest_taxid = (*strout.begin())->id;
556             IntStringSet::iterator iss;
557             for (iss = strout.begin(); iss != strout.end(); iss++) {
558                 if (!first) out << ",";
559                 out << (*iss)->str;
560                 first = false;
561             }
562             for (iss = strout.begin(); iss != strout.end(); iss++)
563                 delete (*iss);
564         }
565         out << ")";
566         if (brtype & WT_INT_NODE)
567             out << node->id;
568         else if (!node->name.empty() && (brtype & WT_BR_ATTR) == 0)
569             out << node->name;
570         if (dad != NULL || length_nei) {
571         	printBranchLength(out, brtype, !node->name.empty(), length_nei);
572         }
573     }
574     return smallest_taxid;
575 }
576 
577 
printSubTree(ostream & out,NodeVector & subtree)578 void MTree::printSubTree(ostream &out, NodeVector &subtree) {
579     if (root->isLeaf())
580         printSubTree(out, subtree, root->neighbors[0]->node);
581     else
582         printSubTree(out, subtree, root);
583     out << ";";
584 }
585 
printSubTree(ostream & out,NodeVector & subtree,Node * node,Node * dad)586 void MTree::printSubTree(ostream &out, NodeVector &subtree, Node *node, Node *dad) {
587     if (!node) node = root;
588 
589     NeighborVec::iterator it;
590     double length = 0.0, dad_length = 0.0;
591     // go down if only 1 child available
592     Node *child = NULL;
593     int degree;
594     do {
595         degree = 0;
596         FOR_NEIGHBOR(node, dad, it) {
597             if (subtree[(*it)->node->id] != NULL) {
598                 degree++;
599                 child = (*it)->node;
600             }
601         } else dad_length = (*it)->length;
602 
603         if (degree == 1) {
604             dad = node;
605             node = child;
606             length += dad_length;
607         }
608     } while (degree == 1 && !node->isLeaf());
609 
610     if (node->isLeaf())
611         out << node->name << ":" << node->neighbors[0]->length + length;
612     else
613     {
614         // internal node
615         out << "(";
616         bool first = true;
617 
618         FOR_NEIGHBOR(node, dad, it)	{
619             if (subtree[(*it)->node->id] != NULL) {
620                 if ((*it)->node->name != ROOT_NAME) {
621                     if (!first)
622                         out << ",";
623                     printSubTree(out, subtree, (*it)->node, node);
624                     first = false;
625                 } else
626                     length += (*it)->length;
627             }
628         } else {
629             length += (*it)->length;
630         }
631         out << ")";
632         if (!node->name.empty())
633             out << node->name;
634         if (dad != NULL || length > 1e-20)
635             out << ":" << length;
636     }
637 }
638 
639 
printTaxa(const char * ofile)640 void MTree::printTaxa(const char *ofile)
641 {
642     try {
643         ofstream out;
644         out.exceptions(ios::failbit | ios::badbit);
645         out.open(ofile);
646         if (root->isLeaf())
647             printTaxa(out, root->neighbors[0]->node);
648         else
649             printTaxa(out);
650         out.close();
651         cout << "Taxa list was printed to " << ofile << endl;
652     } catch (ios::failure) {
653         outError(ERR_WRITE_OUTPUT, ofile);
654     }
655 }
656 
printTaxa(ostream & out,Node * node,Node * dad)657 void MTree::printTaxa(ostream &out, Node *node, Node *dad)
658 {
659     if (!node) node = root;
660     if (node->isLeaf())
661         out << node->name << endl;
662     else
663     {
664         // internal node
665         //for (int i = 0; i < node->neighbors.size(); i++)
666         //if (node->neighbors[i]->node != dad)
667         FOR_NEIGHBOR_IT(node, dad, it)	{
668             printTaxa(out, (*it)->node, node);
669         }
670     }
671 }
672 
printTaxa(ostream & out,NodeVector & subtree)673 void MTree::printTaxa(ostream &out, NodeVector &subtree) {
674     for (int i = 0; i < leafNum; i++)
675         if (subtree[i] != NULL) {
676             out << subtree[i]->name << endl;
677         }
678 }
679 
readTree(const char * infile,bool & is_rooted)680 void MTree::readTree(const char *infile, bool &is_rooted) {
681     ifstream in;
682     try {
683         in.exceptions(ios::failbit | ios::badbit);
684         in.open(infile);
685         readTree(in, is_rooted);
686         in.close();
687     } catch (ios::failure) {
688         outError(ERR_READ_INPUT, infile);
689     }
690 
691     rooted = is_rooted;
692 
693     if (verbose_mode >= VB_MED)
694         cout << "Tree contains " << leafNum - is_rooted <<
695              " taxa and " << nodeNum-1-is_rooted << " branches" << (is_rooted ? " (rooted)" : "") << endl;
696 }
697 
698 /*
699 void MTree::readTreeString(string tree_string, bool is_rooted) {
700 	stringstream str;
701 	str << tree_string;
702 	str.seekg(0, ios::beg);
703 	freeNode();
704 	readTree(str, is_rooted);
705 }
706 */
707 
708 
readTree(istream & in,bool & is_rooted)709 void MTree::readTree(istream &in, bool &is_rooted)
710 {
711     in_line = 1;
712     in_column = 1;
713     in_comment = "";
714     try {
715         char ch;
716         ch = readNextChar(in);
717         if (ch != '(') {
718         	cout << in.rdbuf() << endl;
719             throw "Tree file does not start with an opening-bracket '('";
720         }
721 
722         leafNum = 0;
723 
724         DoubleVector branch_len;
725         Node *node;
726         parseFile(in, ch, node, branch_len);
727         // 2018-01-05: assuming rooted tree if root node has two children
728         if (is_rooted || (!branch_len.empty() && branch_len[0] != 0.0) || node->degree() == 2) {
729             if (branch_len.empty())
730                 branch_len.push_back(-1.0);
731             if (branch_len[0] == -1.0) branch_len[0] = 0.0;
732             if (branch_len[0] < 0.0)
733                 throw ERR_NEG_BRANCH;
734             rooted = is_rooted = true;
735             root = newNode(leafNum, ROOT_NAME);
736             root->addNeighbor(node, branch_len);
737             node->addNeighbor(root, branch_len);
738             leafNum++;
739             rooted = true;
740         } else { // assign root to one of the neighbor of node, if any
741             FOR_NEIGHBOR_IT(node, NULL, it)
742             if ((*it)->node->isLeaf()) {
743                 root = (*it)->node;
744                 break;
745             }
746         }
747         // make sure that root is a leaf
748         ASSERT(root->isLeaf());
749 
750         if (in.eof() || ch != ';')
751             throw "Tree file must be ended with a semi-colon ';'";
752     } catch (bad_alloc) {
753         outError(ERR_NO_MEMORY);
754     } catch (const char *str) {
755         outError(str, reportInputInfo());
756     } catch (string str) {
757         outError(str.c_str(), reportInputInfo());
758     } catch (ios::failure) {
759         outError(ERR_READ_INPUT, reportInputInfo());
760     } catch (...) {
761         // anything else
762         outError(ERR_READ_ANY, reportInputInfo());
763     }
764 
765     nodeNum = leafNum;
766     initializeTree();
767 
768     //bool stop = false;
769     //checkValidTree(stop);
770 }
771 
initializeTree(Node * node,Node * dad)772 void MTree::initializeTree(Node *node, Node* dad)
773 {
774     if (!node) {
775         node = root;
776         nodeNum = leafNum;
777         branchNum = 0;
778     }
779     if (!node->isLeaf())
780     {
781         node->id = nodeNum;
782         nodeNum++;
783         //node->name = node->id;
784 
785     }
786     //for (int i = 0; i < node->neighbors.size(); i++)
787     //if (node->neighbors[i]->node != dad)
788     FOR_NEIGHBOR_IT(node, dad, it) {
789         (*it)->id = branchNum;
790         (*it)->node->findNeighbor(node)->id = branchNum;
791         branchNum++;
792         initializeTree((*it)->node, node);
793     }
794 }
795 
parseBranchLength(string & lenstr,DoubleVector & branch_len)796 void MTree::parseBranchLength(string &lenstr, DoubleVector &branch_len) {
797 //    branch_len.push_back(convert_double(lenstr.c_str()));
798     double len = convert_double(lenstr.c_str());
799     if (in_comment.empty()) {
800         branch_len.push_back(len);
801         return;
802     }
803     convert_double_vec(in_comment.c_str(), branch_len, BRANCH_LENGTH_SEPARATOR);
804 //    char* str = (char*)in_comment.c_str() + 1;
805 //    int pos;
806 //    for (int i = 1; str[0] == 'L'; i++) {
807 //        str++;
808 //        int id = convert_int(str, pos);
809 //        if (id != i)
810 //            throw "Wrong ID in " + string(str);
811 //        if (str[pos] != '=')
812 //            throw "= is expected in " + string(str);
813 //        str += pos+1;
814 //        double val = convert_double(str, pos);
815 //        branch_len.push_back(val);
816 //        if (str[pos] == ',') {
817 //            str += pos+1;
818 //            continue;
819 //        } else
820 //            break;
821 //    }
822 }
823 
824 
parseFile(istream & infile,char & ch,Node * & root,DoubleVector & branch_len)825 void MTree::parseFile(istream &infile, char &ch, Node* &root, DoubleVector &branch_len)
826 {
827     Node *node;
828     int maxlen = 1000;
829     string seqname;
830     int seqlen;
831     DoubleVector brlen;
832     branch_len.clear();
833 
834     root = newNode();
835 
836     if (ch == '(') {
837         // internal node
838         ch = readNextChar(infile);
839         while (ch != ')' && !infile.eof())
840         {
841             node = NULL;
842             parseFile(infile, ch, node, brlen);
843             //if (brlen == -1.0)
844             //throw "Found branch with no length.";
845             //if (brlen < 0.0)
846             //throw ERR_NEG_BRANCH;
847             root->addNeighbor(node, brlen);
848             node->addNeighbor(root, brlen);
849             if (infile.eof())
850                 throw "Expecting ')', but end of file instead";
851             if (ch == ',')
852                 ch = readNextChar(infile);
853             else if (ch != ')') {
854                 string err = "Expecting ')', but found '";
855                 err += ch;
856                 err += "' instead";
857                 throw err;
858             }
859         }
860         if (!infile.eof()) ch = readNextChar(infile);
861     }
862     // now read the node name
863     seqlen = 0;
864     char end_ch = 0;
865     if (ch == '\'' || ch == '"') end_ch = ch;
866     seqname = "";
867 
868     while (!infile.eof() && seqlen < maxlen)
869     {
870         if (end_ch == 0) {
871             if (is_newick_token(ch) || controlchar(ch)) break;
872         }
873         seqname += ch;
874         seqlen++;
875 //        seqname[seqlen++] = ch;
876         ch = infile.get();
877         in_column++;
878         if (end_ch != 0 && ch == end_ch) {
879             seqname += ch;
880             seqlen++;
881 //            seqname[seqlen++] = ch;
882             break;
883         }
884     }
885     if ((controlchar(ch) || ch == '[' || ch == end_ch) && !infile.eof())
886         ch = readNextChar(infile, ch);
887     if (seqlen == maxlen)
888         throw "Too long name ( > 1000)";
889     if (root->isLeaf())
890         renameString(seqname);
891 //    seqname[seqlen] = 0;
892     if (seqlen == 0 && root->isLeaf())
893         throw "Redundant double-bracket ‘((…))’ with closing bracket ending at";
894     if (seqlen > 0)
895         root->name.append(seqname);
896     if (root->isLeaf()) {
897         // is a leaf, assign its ID
898         root->id = leafNum;
899         if (leafNum == 0)
900             MTree::root = root;
901         leafNum++;
902     }
903 
904     if (ch == ';' || infile.eof())
905         return;
906     if (ch == ':')
907     {
908         string saved_comment = in_comment;
909         ch = readNextChar(infile);
910         if (in_comment.empty())
911             in_comment = saved_comment;
912         seqlen = 0;
913         seqname = "";
914         while (!is_newick_token(ch) && !controlchar(ch) && !infile.eof() && seqlen < maxlen)
915         {
916 //            seqname[seqlen] = ch;
917             seqname += ch;
918             seqlen++;
919             ch = infile.get();
920             in_column++;
921         }
922         if ((controlchar(ch) || ch == '[') && !infile.eof())
923             ch = readNextChar(infile, ch);
924         if (seqlen == maxlen || infile.eof())
925             throw "branch length format error.";
926 //        seqname[seqlen] = 0;
927         parseBranchLength(seqname, branch_len);
928 //        convert_double_vec(seqname.c_str(), branch_len, BRANCH_LENGTH_SEPARATOR);
929     }
930 }
931 
932 
933 
934 /**
935 	check tree is bifurcating tree (every leaf with level 1 or 3)
936 */
checkValidTree(bool & stop,Node * node,Node * dad)937 void MTree::checkValidTree(bool &stop, Node *node, Node *dad)
938 {
939     if (!node) node = root;
940     if (node->degree() != 1 && node->degree() != 3) {
941         cout << "Tree is not bifurcating." << endl;
942         stop = true;
943         return;
944     }
945     //for (int i = 0; i < node->neighbors.size(); i++)
946     //if (node->neighbors[i]->node != dad) {
947     FOR_NEIGHBOR_IT(node, dad, it) {
948         checkValidTree(stop, (*it)->node, node);
949         if (stop)
950             return;
951     }
952 }
953 
treeLength(Node * node,Node * dad)954 double MTree::treeLength(Node *node, Node *dad)
955 {
956     if (!node) node = root;
957     double sum = 0;
958     FOR_NEIGHBOR_IT(node, dad, it) {
959         sum += (*it)->length + treeLength((*it)->node, node);
960     }
961     return sum;
962 }
963 
treeLengthInternal(double epsilon,Node * node,Node * dad)964 double MTree::treeLengthInternal( double epsilon, Node *node, Node *dad)
965 {
966     if (!node) node = root;
967     double sum = 0;
968     FOR_NEIGHBOR_IT(node, dad, it) {
969     	if (!(*it)->node->isLeaf() && !node->isLeaf())
970     	{
971     		if (treeLength((*it)->node, node) > epsilon) {
972     			sum += (*it)->length + treeLengthInternal(epsilon, (*it)->node, node);
973     		}
974     	}
975     	else {
976     		if (treeLength((*it)->node, node) > epsilon) {
977     			sum += treeLengthInternal(epsilon, (*it)->node, node);
978     		}
979     	}
980     }
981     return sum;
982 }
983 
treeDepth(Node * node,Node * dad)984 double MTree::treeDepth(Node *node, Node *dad)
985 {
986     if (!node) node = root;
987     double maxsum = 0.0;
988     FOR_NEIGHBOR_IT(node, dad, it) {
989         double len = (*it)->length;
990         if (len < 0.0) len = 0.0;
991         double sum = len + treeDepth((*it)->node, node);
992         if (sum > maxsum) maxsum = sum;
993     }
994     return maxsum;
995 }
996 
getNonCherryLeaves(NodeVector & noncherry,NodeVector & cherry,Node * node,Node * dad)997 void MTree::getNonCherryLeaves(NodeVector &noncherry, NodeVector &cherry, Node *node, Node *dad) {
998     if (!node) node = root;
999     if (node->isLeaf()) {
1000     	if (node->isInCherry()) {
1001     		cherry.push_back(node);
1002     	} else {
1003             noncherry.push_back(node);
1004     	}
1005     }
1006     FOR_NEIGHBOR_IT(node, dad, it) {
1007     	getNonCherryLeaves(noncherry, cherry, (*it)->node, node);
1008     }
1009 }
1010 
getTaxa(NodeVector & taxa,Node * node,Node * dad)1011 void MTree::getTaxa(NodeVector &taxa, Node *node, Node *dad) {
1012     if (!node) node = root;
1013     if (node->isLeaf()) {
1014         taxa.push_back(node);
1015     }
1016     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1017     //if ((*it)->node != dad)	{
1018     FOR_NEIGHBOR_IT(node, dad, it) {
1019         getTaxa(taxa, (*it)->node, node);
1020     }
1021 }
1022 
getAllNodesInSubtree(Node * node,Node * dad,NodeVector & nodeList)1023 void MTree::getAllNodesInSubtree(Node *node, Node *dad, NodeVector &nodeList) {
1024     ASSERT(node);
1025     nodeList.push_back(node);
1026     if (node->isLeaf()) {
1027         return;
1028     }
1029     FOR_NEIGHBOR_IT(node, dad, it) {
1030         getAllNodesInSubtree((*it)->node, node, nodeList);
1031     }
1032 }
1033 
getNumTaxa(Node * node,Node * dad)1034 int MTree::getNumTaxa(Node *node, Node *dad) {
1035     int numLeaf = 0;
1036     if (!node) {
1037     	node = root;
1038     	numLeaf = 1;
1039     } else {
1040         if (node->isLeaf()) {
1041             return 1;
1042         }
1043     }
1044 
1045     FOR_NEIGHBOR_IT(node, dad, it) {
1046         numLeaf += getNumTaxa((*it)->node, node);
1047     }
1048     return numLeaf;
1049 }
1050 
getInternalNodes(NodeVector & nodes,Node * node,Node * dad)1051 void MTree::getInternalNodes(NodeVector &nodes, Node *node, Node *dad) {
1052     if (!node) node = root;
1053     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1054     //if ((*it)->node != dad)	{
1055     FOR_NEIGHBOR_IT(node, dad, it)
1056     if (!(*it)->node->isLeaf()) {
1057         getInternalNodes(nodes, (*it)->node, node);
1058         nodes.push_back((*it)->node);
1059     }
1060 }
1061 
getMultifurcatingNodes(NodeVector & nodes,Node * node,Node * dad)1062 void MTree::getMultifurcatingNodes(NodeVector &nodes, Node *node, Node *dad) {
1063     if (!node) node = root;
1064     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1065     //if ((*it)->node != dad)    {
1066     FOR_NEIGHBOR_IT(node, dad, it)
1067     if (!(*it)->node->isLeaf()) {
1068         if ((*it)->node->degree() > 3)
1069             nodes.push_back((*it)->node);
1070         getMultifurcatingNodes(nodes, (*it)->node, node);
1071     }
1072 }
1073 
generateNNIBraches(NodeVector & nodes1,NodeVector & nodes2,SplitGraph * excludeSplits,Node * node,Node * dad)1074 void MTree::generateNNIBraches(NodeVector &nodes1, NodeVector &nodes2, SplitGraph* excludeSplits, Node *node, Node *dad) {
1075     if (!node) node = root;
1076     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1077     //if ((*it)->node != dad)	{
1078     FOR_NEIGHBOR_IT(node, dad, it)
1079     if (!(*it)->node->isLeaf()) {
1080         generateNNIBraches(nodes1, nodes2, excludeSplits, (*it)->node, node);
1081         if (!node->isLeaf()) {
1082         	if (excludeSplits != NULL && excludeSplits->size() != 0) {
1083         		Split* sp = getSplit(node, (*it)->node);
1084         		if (excludeSplits->containSplit(*sp)) {
1085         			delete sp;
1086         			continue;
1087         		}
1088         		delete sp;
1089         	}
1090 			if (node->id < (*it)->node->id) {
1091 				nodes1.push_back(node);
1092 				nodes2.push_back((*it)->node);
1093 			} else {
1094 				nodes1.push_back((*it)->node);
1095 				nodes2.push_back(node);
1096 			}
1097         }
1098     }
1099 }
1100 
1101 //bool MTree::branchExist(Node* node1, Node* node2, NodeVector& nodes1, NodeVector& nodes2) {
1102 //	assert(nodes1.size() == nodes2.size());
1103 //	bool existed = false;
1104 //	for (int i = 0; i < nodes1.size(); i++) {
1105 //		if (nodes1[i] == node1) {
1106 //			if (nodes2[i] == node2) {
1107 //				existed = true;
1108 //				break;
1109 //			}
1110 //		}
1111 //		if (nodes1[i] == node2) {
1112 //			if (nodes2[i] == node1) {
1113 //				existed = true;
1114 //				break;
1115 //			}
1116 //		}
1117 //	}
1118 //	return existed;
1119 //}
1120 
getSurroundingInnerBranches(Node * node,Node * dad,int depth,Branches & surrBranches)1121 void MTree::getSurroundingInnerBranches(Node *node, Node *dad, int depth, Branches &surrBranches) {
1122     if (depth == 0)
1123       return;
1124     FOR_NEIGHBOR_IT(node, dad, it) {
1125         if (!(*it)->node->isLeaf()) {
1126             Branch curBranch;
1127             curBranch.first = node;
1128             curBranch.second = (*it)->node;
1129             int branchID = pairInteger(node->id, (*it)->node->id);
1130             if (surrBranches.find(branchID) == surrBranches.end())
1131                 surrBranches.insert(pair<int,Branch>(branchID, curBranch));
1132             getSurroundingInnerBranches((*it)->node, node, depth-1, surrBranches);
1133         }
1134     }
1135 }
1136 
isInnerBranch(Node * node1,Node * node2)1137 bool MTree::isInnerBranch(Node* node1, Node* node2) {
1138     return(node1->degree() >= 3 && node2->degree() >= 3 && isABranch(node1, node2));
1139 }
1140 
isABranch(Node * node1,Node * node2)1141 bool MTree::isABranch(Node* node1, Node* node2) {
1142     return (node1->findNeighbor(node2) != NULL && node2->findNeighbor(node1) != NULL);
1143 }
1144 
getBranches(NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad,bool post_traversal)1145 void MTree::getBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad, bool post_traversal) {
1146     if (!node) node = root;
1147     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1148     //if ((*it)->node != dad)   {
1149     FOR_NEIGHBOR_IT(node, dad, it) {
1150         if (!post_traversal) {
1151             if (node->id < (*it)->node->id) {
1152                 nodes.push_back(node);
1153                 nodes2.push_back((*it)->node);
1154             } else {
1155                 nodes.push_back((*it)->node);
1156                 nodes2.push_back(node);
1157             }
1158         }
1159         getBranches(nodes, nodes2, (*it)->node, node, post_traversal);
1160         if (post_traversal) {
1161             if (node->id < (*it)->node->id) {
1162                 nodes.push_back(node);
1163                 nodes2.push_back((*it)->node);
1164             } else {
1165                 nodes.push_back((*it)->node);
1166                 nodes2.push_back(node);
1167             }
1168         }
1169     }
1170 }
1171 
getBranches(int max_dist,NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad)1172 void MTree::getBranches(int max_dist, NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
1173     if (!node) node = root;
1174     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1175     //if ((*it)->node != dad)   {
1176     FOR_NEIGHBOR_IT(node, dad, it) {
1177         if (node->id < (*it)->node->id) {
1178             nodes.push_back(node);
1179             nodes2.push_back((*it)->node);
1180         } else {
1181             nodes.push_back((*it)->node);
1182             nodes2.push_back(node);
1183         }
1184         if (max_dist > 1)
1185             getBranches(max_dist-1, nodes, nodes2, (*it)->node, node);
1186     }
1187 }
1188 
getBranches(BranchVector & branches,Node * node,Node * dad,bool post_traversal)1189 void MTree::getBranches(BranchVector& branches, Node *node, Node *dad, bool post_traversal) {
1190     if (!node) node = root;
1191     FOR_NEIGHBOR_IT(node, dad, it) {
1192         if (!post_traversal) {
1193             Branch branch;
1194             branch.first = node;
1195             branch.second = (*it)->node;
1196             branches.push_back(branch);
1197         }
1198         getBranches(branches, (*it)->node, node, post_traversal);
1199         if (post_traversal) {
1200             Branch branch;
1201             branch.first = node;
1202             branch.second = (*it)->node;
1203             branches.push_back(branch);
1204         }
1205     }
1206 }
1207 
getInnerBranches(Branches & branches,Node * node,Node * dad)1208 void MTree::getInnerBranches(Branches& branches, Node *node, Node *dad) {
1209     if (!node) node = root;
1210     FOR_NEIGHBOR_IT(node, dad, it) {
1211     	if (isInnerBranch((*it)->node, node)) {
1212             Branch branch;
1213             branch.first = node;
1214             branch.second = (*it)->node;
1215             branches.insert(pair<int, Branch>(pairInteger(branch.first->id, branch.second->id), branch));
1216     	}
1217     	getInnerBranches(branches, (*it)->node, node);
1218     }
1219 }
1220 
getInnerBranches(BranchVector & branches,Node * node,Node * dad,bool post_traversal)1221 void MTree::getInnerBranches(BranchVector& branches, Node *node, Node *dad, bool post_traversal) {
1222     if (!node) node = root;
1223     FOR_NEIGHBOR_IT(node, dad, it) {
1224         if (!node->isLeaf() && !(*it)->node->isLeaf() && !post_traversal) {
1225             Branch branch;
1226             branch.first = node;
1227             branch.second = (*it)->node;
1228             branches.push_back(branch);
1229         }
1230         getInnerBranches(branches, (*it)->node, node, post_traversal);
1231         if (!node->isLeaf() && !(*it)->node->isLeaf() && post_traversal) {
1232             Branch branch;
1233             branch.first = node;
1234             branch.second = (*it)->node;
1235             branches.push_back(branch);
1236         }
1237     }
1238 }
1239 
getBranchLengths(vector<DoubleVector> & len,Node * node,Node * dad)1240 void MTree::getBranchLengths(vector<DoubleVector> &len, Node *node, Node *dad) {
1241     if (!node) {
1242         node = root;
1243         ASSERT(len.size() == branchNum);
1244     }
1245     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1246     //if ((*it)->node != dad)	{
1247     FOR_NEIGHBOR_IT(node, dad, it) {
1248         (*it)->getLength(len[(*it)->id]);
1249         getBranchLengths(len, (*it)->node, node);
1250     }
1251 }
1252 
setBranchLengths(vector<DoubleVector> & len,Node * node,Node * dad)1253 void MTree::setBranchLengths(vector<DoubleVector> &len, Node *node, Node *dad) {
1254     if (!node) {
1255         node = root;
1256         ASSERT(len.size() == branchNum);
1257     }
1258     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1259     //if ((*it)->node != dad)	{
1260     FOR_NEIGHBOR_IT(node, dad, it) {
1261         (*it)->setLength(len[(*it)->id]);
1262         (*it)->node->findNeighbor(node)->setLength(len[(*it)->id]);
1263         setBranchLengths(len, (*it)->node, node);
1264     }
1265 }
1266 
getOrderedTaxa(NodeVector & taxa,Node * node,Node * dad)1267 void MTree::getOrderedTaxa(NodeVector &taxa, Node *node, Node *dad) {
1268     if (!node) node = root;
1269     if (node->isLeaf()) {
1270         if (taxa.empty()) taxa.resize(leafNum);
1271         taxa[node->id] = node;
1272     }
1273     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1274     //if ((*it)->node != dad)	{
1275     FOR_NEIGHBOR_IT(node, dad, it) {
1276         getOrderedTaxa(taxa, (*it)->node, node);
1277     }
1278 }
1279 
getTaxaName(vector<string> & taxname,Node * node,Node * dad)1280 void MTree::getTaxaName(vector<string> &taxname, Node *node, Node *dad) {
1281     if (!node) node = root;
1282     if (node->isLeaf()) {
1283         if (taxname.empty()) taxname.resize(leafNum);
1284         taxname[node->id] = node->name;
1285     }
1286     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1287     //if ((*it)->node != dad)	{
1288     FOR_NEIGHBOR_IT(node, dad, it) {
1289         getTaxaName(taxname, (*it)->node, node);
1290     }
1291 }
1292 
getNodeName(set<string> & nodename,Node * node,Node * dad)1293 void MTree::getNodeName(set<string> &nodename, Node *node, Node *dad) {
1294     if (!node) node = root;
1295     if (!node->name.empty())
1296         nodename.insert(node->name);
1297     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1298     //if ((*it)->node != dad)    {
1299     FOR_NEIGHBOR_IT(node, dad, it) {
1300         getNodeName(nodename, (*it)->node, node);
1301     }
1302 }
1303 
getUnorderedTaxaName(vector<string> & taxname,Node * node,Node * dad)1304 void MTree::getUnorderedTaxaName(vector<string> &taxname, Node *node, Node *dad) {
1305     if (!node) node = root;
1306     if (node->isLeaf()) {
1307     	taxname.push_back(node->name);
1308     }
1309     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1310     //if ((*it)->node != dad)	{
1311     FOR_NEIGHBOR_IT(node, dad, it) {
1312         getUnorderedTaxaName(taxname, (*it)->node, node);
1313     }
1314 }
1315 
getTaxaID(vector<int> & taxa,Node * node,Node * dad)1316 void MTree::getTaxaID(vector<int> &taxa, Node *node, Node *dad) {
1317     if (!node) node = root;
1318     if (node->isLeaf()) {
1319         taxa.push_back(node->id);
1320     }
1321     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1322     //if ((*it)->node != dad)	{
1323     FOR_NEIGHBOR_IT(node, dad, it) {
1324         getTaxaID(taxa, (*it)->node, node);
1325     }
1326 }
1327 
containsSplits(SplitGraph & splits)1328 bool MTree::containsSplits(SplitGraph& splits) {
1329 	SplitGraph treeSplits;
1330 	convertSplits(treeSplits);
1331 	//check if treeSplits contains all splits in splits
1332 	for (SplitGraph::iterator it = splits.begin(); it != splits.end(); it++) {
1333 		if (!treeSplits.containSplit(**it))
1334 			return false;
1335 	}
1336 	//treeSplits.report(cout);
1337 	//splits.report(cout);
1338 	return true;
1339 }
1340 
getSplit(Node * node1,Node * node2)1341 Split* MTree::getSplit(Node* node1, Node* node2) {
1342     Neighbor* node12 = node1->findNeighbor(node2);
1343     return node12->split;
1344 }
1345 
_getSplit(Node * node1,Node * node2)1346 Split* MTree::_getSplit(Node* node1, Node* node2) {
1347     Split* sp = new Split(leafNum);
1348     getTaxa(*sp, node1, node2);
1349     if (sp->shouldInvert())
1350         sp->invert();
1351     return sp;
1352 }
1353 
convertSplits(SplitGraph & sg,Split * resp,NodeVector * nodes,Node * node,Node * dad)1354 void MTree::convertSplits(SplitGraph &sg, Split *resp, NodeVector *nodes, Node *node, Node *dad) {
1355     if (!node) node = root;
1356     ASSERT(resp->getNTaxa() == leafNum);
1357     bool has_child = false;
1358     FOR_NEIGHBOR_IT(node, dad, it) {
1359         //vector<int> taxa;
1360         //getTaxaID((*it)->node, node, taxa);
1361 
1362         Split *sp = new Split(leafNum, (*it)->length);
1363         convertSplits(sg, sp, nodes, (*it)->node, node);
1364         *resp += *sp;
1365         if (sp->shouldInvert())
1366             sp->invert();
1367 		 /* ignore nodes with degree of 2 because such split will be added before */
1368         if (node->degree() != 2) {
1369         	sg.push_back(sp);
1370         	if (nodes) nodes->push_back((*it)->node);
1371         }
1372         has_child = true;
1373     }
1374     if (!has_child)
1375         resp->addTaxon(node->id);
1376 }
1377 
convertSplits(vector<string> & taxname,SplitGraph & sg,NodeVector * nodes,Node * node,Node * dad)1378 void MTree::convertSplits(vector<string> &taxname, SplitGraph &sg, NodeVector *nodes, Node *node, Node *dad) {
1379     if (!sg.taxa) {
1380         sg.taxa = new NxsTaxaBlock();
1381         for (vector<string>::iterator it = taxname.begin(); it != taxname.end(); it++)
1382             sg.taxa->AddTaxonLabel(NxsString(it->c_str()));
1383     }
1384     if (!sg.splits)
1385         sg.splits = new MSplitsBlock(&sg);
1386     if (!sg.pda)
1387         sg.pda = new MPdaBlock(&sg);
1388 
1389     // make the cycle
1390     getTaxaID(sg.splits->cycle);
1391     // make the splits
1392     Split sp(leafNum);
1393     convertSplits(sg, &sp, nodes, node, dad);
1394 }
1395 
convertSplits(SplitGraph & sg,NodeVector * nodes,Node * node,Node * dad)1396 void MTree::convertSplits(SplitGraph &sg, NodeVector *nodes, Node *node, Node *dad) {
1397 
1398     // make the taxa name
1399     vector<string> taxname;
1400     taxname.resize(leafNum);
1401     getTaxaName(taxname);
1402 
1403     convertSplits(taxname, sg, nodes, node, dad);
1404 }
1405 
splitnumtaxacmp(const Split * a,const Split * b)1406 inline int splitnumtaxacmp(const Split* a, const Split* b)
1407 {
1408     return (a->countTaxa() < b->countTaxa());
1409 }
1410 
convertToTree(SplitGraph & sg)1411 void MTree::convertToTree(SplitGraph &sg) {
1412     SplitGraph::iterator it;
1413     int taxid;
1414     int count;
1415 	BoolVector has_tax;
1416 	has_tax.resize(sg.getNTaxa(), false);
1417 	// first add trivial splits if not existed
1418 	for (it = sg.begin(); it != sg.end(); it++) {
1419 		taxid = (*it)->trivial();
1420 		if (taxid >= 0) has_tax[taxid] = true;
1421 	}
1422 	for (count = 0; count < has_tax.size(); count++)
1423 		if (!has_tax[count]) {
1424 			Split *sp = new Split(sg.getNTaxa());
1425 			sp->addTaxon(count);
1426 			sg.push_back(sp);
1427 		}
1428     // sort splits by the number of taxa they contain
1429     sort(sg.begin(), sg.end(), splitnumtaxacmp);
1430 
1431     // initialize the tree
1432     rooted = false;
1433     leafNum = sg.getNTaxa();
1434     nodeNum = leafNum;
1435 
1436     // create the ground nodes, first as the leaves
1437     NodeVector leaves;
1438     vector<Split*> cladetaxa;
1439     leaves.resize(leafNum, NULL);
1440     cladetaxa.resize(leafNum, NULL);
1441     // first add all trivial splits into tree
1442     for (it = sg.begin(), count = 0; it != sg.end(); it++, count++) {
1443         //(*it)->report(cout);
1444         taxid = (*it)->trivial();
1445         if (taxid < 0) break;
1446         ASSERT(leaves[taxid] == NULL);
1447         leaves[taxid] = newNode(taxid, sg.getTaxa()->GetTaxonLabel(taxid).c_str());
1448         leaves[taxid]->addNeighbor(NULL, (*it)->getWeight());
1449         cladetaxa[taxid] = (*it);
1450     }
1451     // now fill in all missing taxa with zero terminal branch
1452     for (taxid = 0; taxid < leafNum; taxid++)
1453         ASSERT(leaves[taxid]);
1454 
1455     // now add non-trivial splits, cotinue with the interrupted iterator
1456     for (/*it = sg.begin()*/; it != sg.end(); it++) {
1457         //(*it)->report(cout);
1458         Split *mysp = *it;
1459         Node *newnode = newNode(nodeNum);
1460         int count = 0;
1461 
1462         for (taxid = 0; taxid < leaves.size(); )
1463             if (cladetaxa[taxid]->subsetOf(*mysp)) // clade is a subset of current split
1464             {
1465                 count += cladetaxa[taxid]->countTaxa();
1466                 double len = leaves[taxid]->updateNeighbor(NULL, newnode);
1467                 newnode->addNeighbor(leaves[taxid], len);
1468                 leaves[taxid] = leaves.back();
1469                 leaves.pop_back();
1470                 cladetaxa[taxid] = cladetaxa.back();
1471                 cladetaxa.pop_back();
1472             } else taxid++;
1473         ASSERT(count == mysp->countTaxa());
1474         cladetaxa.push_back(mysp);
1475         leaves.push_back(newnode);
1476 
1477         newnode->addNeighbor(NULL, mysp->getWeight());
1478         nodeNum++;
1479     }
1480     ASSERT(leaves.size() >= 3);
1481     Node *newnode = newNode(nodeNum);
1482     for (taxid = 0; taxid < leaves.size(); taxid++) {
1483         double len = leaves[taxid]->updateNeighbor(NULL, newnode);
1484         newnode->addNeighbor(leaves[taxid], len);
1485     }
1486     root = newnode;
1487     nodeNum++;
1488     cladetaxa.clear();
1489     string root_name = ROOT_NAME;
1490     newnode = findLeafName(root_name);
1491     if (newnode) {
1492         rooted = true;
1493         root = newnode;
1494     }
1495 
1496 }
1497 
findNodeName(string & name,Node * node,Node * dad)1498 Node *MTree::findNodeName(string &name, Node *node, Node *dad) {
1499     if (!node) node = root;
1500     if (node->name == name) return node;
1501     FOR_NEIGHBOR_IT(node, dad, it) {
1502         Node *res = findNodeName(name, (*it)->node, node);
1503         if (res) return res;
1504     }
1505     return NULL;
1506 }
1507 
findNodeNames(unordered_set<string> & taxa_set,pair<Node *,Neighbor * > & res,Node * node,Node * dad)1508 bool MTree::findNodeNames(unordered_set<string> &taxa_set, pair<Node*,Neighbor*> &res, Node *node, Node *dad) {
1509     int presence = 0;
1510     Neighbor *target = NULL;
1511     FOR_NEIGHBOR_IT(node, dad, it) {
1512         if ((*it)->node->isLeaf()) {
1513             if (taxa_set.find((*it)->node->name) != taxa_set.end()) {
1514                 presence++;
1515             } else target = (*it);
1516         } else {
1517             if (findNodeNames(taxa_set, res, (*it)->node, node))
1518                 presence++;
1519             else
1520                 target = *it;
1521             if (res.first)
1522                 return false;
1523         }
1524     }
1525     // all presence or absence
1526     if (presence == node->neighbors.size()-1)
1527         return true;
1528     if (presence == 0)
1529         return false;
1530     // inbetween: detect it!
1531     res.first = node;
1532     res.second = target;
1533     if (target != node->neighbors[0]) {
1534         // move target into the first neighbor
1535         FOR_NEIGHBOR_IT(node, NULL, it)
1536         if ((*it) == target) {
1537             (*it) = node->neighbors[0];
1538             node->neighbors[0] = target;
1539             break;
1540         }
1541     }
1542     return false;
1543 }
1544 
findLeafName(string & name,Node * node,Node * dad)1545 Node *MTree::findLeafName(string &name, Node *node, Node *dad) {
1546     if (!node) node = root;
1547     if (node->isLeaf() && node->name == name) return node;
1548     FOR_NEIGHBOR_IT(node, dad, it) {
1549         Node *res = findLeafName(name, (*it)->node, node);
1550         if (res) return res;
1551     }
1552     return NULL;
1553 }
1554 
findNodeID(int id,Node * node,Node * dad)1555 Node *MTree::findNodeID(int id, Node *node, Node* dad) {
1556     if (!node) node = root;
1557     if (node->id == id) return node;
1558     FOR_NEIGHBOR_IT(node, dad, it) {
1559         Node *res = findNodeID(id, (*it)->node, node);
1560         if (res) return res;
1561     }
1562     return NULL;
1563 }
1564 
1565 
scaleLength(double norm,bool make_int,Node * node,Node * dad)1566 void MTree::scaleLength(double norm, bool make_int, Node *node, Node *dad) {
1567     if (!node) node = root;
1568     FOR_NEIGHBOR_DECLARE(node, NULL, it) {
1569         (*it)->length *= norm;
1570         if (make_int)
1571             (*it)->length = round((*it)->length);
1572     }
1573 
1574     FOR_NEIGHBOR(node, dad, it) {
1575         scaleLength(norm, make_int, (*it)->node, node);
1576     }
1577 }
1578 
transformBranchLenRAX(double factor,Node * node,Node * dad)1579 void MTree::transformBranchLenRAX(double factor, Node *node, Node *dad) {
1580     if (!node) node = root;
1581     FOR_NEIGHBOR_DECLARE(node, NULL, it) {
1582         (*it)->length /= factor;
1583         (*it)->length = exp(-(*it)->length);
1584     }
1585 
1586     FOR_NEIGHBOR(node, dad, it) {
1587     	transformBranchLenRAX(factor, (*it)->node, node);
1588     }
1589 }
1590 
scaleCladeSupport(double norm,bool make_int,Node * node,Node * dad)1591 void MTree::scaleCladeSupport(double norm, bool make_int, Node *node, Node *dad) {
1592     if (!node) node = root;
1593     if (!node->isLeaf() && !node->name.empty()) {
1594         double supp = 0.0;
1595         try {
1596             supp = convert_double(node->name.c_str());
1597         } catch (string str) {
1598             outError(str);
1599         }
1600         supp *= norm;
1601         if (make_int)
1602             supp = round(supp);
1603         node->name = "";
1604         node->name += supp;
1605     }
1606 
1607     FOR_NEIGHBOR_IT(node, dad, it) {
1608         scaleCladeSupport(norm, make_int, (*it)->node, node);
1609     }
1610 }
1611 
1612 
~MTree()1613 MTree::~MTree()
1614 {
1615     if (root != NULL)
1616         freeNode();
1617     root = NULL;
1618 }
1619 
freeNode(Node * node,Node * dad)1620 int MTree::freeNode(Node *node, Node *dad)
1621 {
1622 	if ( root == NULL )
1623 		return 0;
1624     if (!node) node = root;
1625     NeighborVec::reverse_iterator it;
1626     int num_nodes = 1;
1627     for (it = node->neighbors.rbegin(); it != node->neighbors.rend(); it++)
1628         if ((*it)->node != dad) {
1629             num_nodes += freeNode((*it)->node, node);
1630         }
1631     delete node;
1632     return num_nodes;
1633 }
1634 
readNextChar(istream & in,char current_ch)1635 char MTree::readNextChar(istream &in, char current_ch) {
1636     char ch;
1637     if (current_ch == '[')
1638         ch = current_ch;
1639     else {
1640         in.get(ch);
1641         in_column++;
1642         if (ch == 10) {
1643             in_line++;
1644             in_column = 1;
1645         }
1646     }
1647     while (controlchar(ch) && !in.eof()) {
1648         in.get(ch);
1649         in_column++;
1650         if (ch == 10) {
1651             in_line++;
1652             in_column = 1;
1653         }
1654     }
1655     in_comment = "";
1656     // ignore comment
1657     while (ch=='[' && !in.eof()) {
1658         while (ch!=']' && !in.eof()) {
1659             in.get(ch);
1660             if (ch != ']')
1661                 in_comment += ch;
1662             in_column++;
1663             if (ch == 10) {
1664                 in_line++;
1665                 in_column = 1;
1666             }
1667         }
1668         if (ch != ']') throw "Comments not ended with ]";
1669         in_column++;
1670         in.get(ch);
1671         if (ch == 10) {
1672             in_line++;
1673             in_column = 1;
1674         }
1675         while (controlchar(ch) && !in.eof()) {
1676             in_column++;
1677             in.get(ch);
1678             if (ch == 10) {
1679                 in_line++;
1680                 in_column = 1;
1681             }
1682         }
1683     }
1684     return ch;
1685 }
1686 
reportInputInfo()1687 string MTree::reportInputInfo() {
1688     string str = " (line ";
1689     str += convertIntToString(in_line) + " column " + convertIntToString(in_column-1) + ")";
1690     return str;
1691 }
1692 
convertToUnrooted()1693 void MTree::convertToUnrooted() {
1694     ASSERT(rooted && root);
1695     ASSERT(root->isLeaf() && root->id == leafNum-1);
1696     Node *node = root->neighbors[0]->node;
1697     Node *taxon = findFirstTaxon();
1698 
1699     rooted = false;
1700     leafNum--;
1701 
1702     // delete root node
1703     if (node->degree() == 3) {
1704         // delete and join adjacent branches
1705         Node *node1 = NULL, *node2 = NULL;
1706         double len = 0.0;
1707         FOR_NEIGHBOR_IT(node, root, it) {
1708             if (!node1) node1 = (*it)->node; else node2 = (*it)->node;
1709             len += (*it)->length;
1710         }
1711         node1->updateNeighbor(node, node2, len);
1712         node2->updateNeighbor(node, node1, len);
1713         delete node;
1714     } else {
1715         // only delete root node
1716         auto it = node->findNeighborIt(root);
1717         delete *it;
1718         node->neighbors.erase(it);
1719 
1720     }
1721 
1722     delete root;
1723     // set a temporary taxon so that tree traversal works
1724     root = taxon;
1725 
1726     initializeTree();
1727     //    computeBranchDirection();
1728 }
1729 
1730 typedef map<int, Neighbor*> IntNeighborMap;
1731 
sortTaxa(Node * node,Node * dad)1732 int MTree::sortTaxa(Node *node, Node *dad) {
1733     if (!node) {
1734         node = root;
1735         if (node->isLeaf()) node = node->neighbors[0]->node;
1736     }
1737     if (node->isLeaf())
1738         return node->id;
1739     IntNeighborMap taxid_nei_map;
1740     FOR_NEIGHBOR_IT(node, dad, it) {
1741         int taxid = sortTaxa((*it)->node, node);
1742         taxid_nei_map.insert(IntNeighborMap::value_type(taxid, (*it)));
1743     }
1744     ;
1745     int i = 0;
1746     for (IntNeighborMap::iterator it = taxid_nei_map.begin(); it != taxid_nei_map.end(); it++, i++) {
1747         if (node->neighbors[i]->node == dad) i++;
1748         node->neighbors[i] = it->second;
1749     }
1750 
1751     return taxid_nei_map.begin()->first;
1752 }
1753 
setExtendedFigChar()1754 void MTree::setExtendedFigChar() {
1755 	//fig_char[0] = 179;
1756 	//fig_char[1] = 196;
1757 	fig_char[2] = '/';
1758 	//fig_char[3] = 195;
1759 	fig_char[4] = '\\';
1760 }
1761 
drawTree(ostream & out,int brtype,double zero_epsilon)1762 void MTree::drawTree(ostream &out, int brtype, double zero_epsilon) {
1763     IntVector sub_tree_br;
1764     if (verbose_mode >= VB_DEBUG) {
1765         printTree(cout);
1766         cout << endl;
1767     }
1768     Node *node = root;
1769     if (node->isLeaf()) node = node->neighbors[0]->node;
1770     double scale = 60.0/treeDepth(node);
1771     //if (verbose_mode >= VB_DEBUG)
1772     //cout << "Tree depth: " << scale<< endl;
1773     drawTree2(out, brtype, scale, sub_tree_br, zero_epsilon);
1774     /*
1775     if (brtype & WT_INT_NODE)
1776         drawTree2(out, brtype, scale, sub_tree_br, zero_epsilon);
1777     else
1778         drawTree(out, brtype, scale, sub_tree_br, zero_epsilon);
1779     */
1780     out << endl;
1781 }
1782 
1783 /*
1784 void MTree::drawTree(ostream &out, int brtype, double brscale, IntVector &subtree_br, double zero_epsilon, Node *node, Node *dad) {
1785     int i, br_len = 3;
1786     if (!node) {
1787         node = root;
1788         if (node->isLeaf()) node = node->neighbors[0]->node;
1789     } else {
1790 
1791         if (brtype & WT_BR_SCALE) {
1792             br_len = floor(node->findNeighbor(dad)->length * brscale)-1;
1793             if (br_len < 3) br_len = 3;
1794             //if (!node->isLeaf() && br_len < 4) br_len = 4;
1795         }
1796         out << '+';
1797         if ((brtype & WT_INT_NODE) && !node->isLeaf()) {
1798             string str = convertIntToString(node->id);
1799             for (i = 0; i < br_len-str.length(); i++) out << '-';
1800             out << node->id;
1801         } else
1802             for (i = 0; i < br_len; i++) out << '-';
1803     }
1804     if (node->isLeaf()) {
1805         out << node->name;
1806         if (brtype & WT_TAXON_ID)
1807             out << " (" << node->id << ")";
1808         out << endl;
1809         return;
1810     }
1811     int descendant_cnt = node->degree();
1812     if (dad) descendant_cnt--;
1813     int cnt = 0;
1814     subtree_br.push_back(br_len);
1815     FOR_NEIGHBOR_IT(node, dad, it) {
1816         if (cnt == descendant_cnt-1)
1817             subtree_br.back() = -subtree_br.back();
1818 
1819         drawTree(out, brtype, brscale, subtree_br, zero_epsilon, (*it)->node, node);
1820         cnt++;
1821         if (cnt == descendant_cnt) break;
1822         for (IntVector::iterator it = subtree_br.begin()+1; it != subtree_br.end(); it++)
1823         {
1824             if ((*(it-1)) > 0) out << '|';
1825             else out << ' ';
1826             for (i = 0; i < abs(*it); i++) out << ' ';
1827         }
1828     }
1829     subtree_br.pop_back();
1830 }
1831 */
1832 
drawTree2(ostream & out,int brtype,double brscale,IntVector & subtree_br,double zero_epsilon,Node * node,Node * dad)1833 void MTree::drawTree2(ostream &out, int brtype, double brscale, IntVector &subtree_br, double zero_epsilon, Node *node, Node *dad) {
1834     int i, br_len = 3;
1835     IntVector::iterator ii;
1836     bool zero_length = false;
1837 
1838     //cout << "DrawTree2!" << endl;
1839     if (!node) {
1840         node = root;
1841         if (node->isLeaf()) node = node->neighbors[0]->node;
1842     } else {
1843         if (brtype & WT_BR_SCALE) {
1844             br_len = floor(node->findNeighbor(dad)->length * brscale)-1;
1845             if (br_len < 2) br_len = 2;
1846         }
1847         if (node->findNeighbor(dad)->length <= zero_epsilon) zero_length = true;
1848     }
1849     if (node->isLeaf()) {
1850         for (ii = subtree_br.begin()+1; ii != subtree_br.end(); ii++) {
1851             if (abs(*(ii-1)) > 1000) out << ' ';
1852             else out << fig_char[0];
1853             int num = abs(*ii);
1854             if (num > 1000) num -= 1000;
1855             for (i = 0; i < num; i++) out << ' ';
1856         }
1857         out << ((node==dad->neighbors.front()->node) ? fig_char[2] : ((node==dad->neighbors.back()->node) ? fig_char[4] : fig_char[3]));
1858         for (i = 0; i < br_len; i++)
1859             out << ((zero_length) ? '*' : fig_char[1]);
1860         out << node->name;
1861         if (brtype & WT_TAXON_ID)
1862             out << " (" << node->id << ")";
1863         if (brtype & WT_BR_ID)
1864             out << " [" << node->neighbors[0]->id << "]";
1865         if (brtype & WT_BR_LEN)
1866             out << " " << node->neighbors[0]->length;
1867         //out << " ";
1868         //copy (subtree_br.begin(), subtree_br.end(), ostream_iterator<int> (out, " "));
1869         out << endl;
1870         return;
1871     }
1872     int descendant_cnt = node->degree();
1873     if (dad) descendant_cnt--;
1874     int cnt = 0;
1875     bool first = true;
1876 
1877     br_len = br_len+1000;
1878     FOR_NEIGHBOR_IT(node, dad, it) {
1879         if (cnt == descendant_cnt-1)
1880             br_len = -br_len;
1881         subtree_br.push_back(br_len);
1882 
1883         drawTree2(out, brtype, brscale, subtree_br, zero_epsilon, (*it)->node, node);
1884         subtree_br.pop_back();
1885         if (br_len > 1000) br_len -= 1000;
1886         cnt++;
1887         if (cnt == descendant_cnt) break;
1888         if (subtree_br.size() > 1)
1889             for (ii = subtree_br.begin()+1; ii != subtree_br.end(); ii++) {
1890                 if (abs(*(ii-1)) > 1000) out << ' ';
1891                 else out << fig_char[0];
1892                 if (ii == subtree_br.begin()) continue;
1893                 int num = abs(*ii);
1894                 if (num > 1000) num -= 1000;
1895                 for (i = 0; i < num; i++) out << ' ';
1896             }
1897         if (first) {
1898             if (dad) {
1899 				out << ((node==dad->neighbors.front()->node) ? fig_char[2] : ((node==dad->neighbors.back()->node) ? fig_char[4] : fig_char[3]));
1900                 for (i = 0; i < abs(br_len); i++)
1901                     out << ((zero_length) ? '*' : fig_char[1]);
1902             }
1903             if (brtype & WT_INT_NODE)
1904             	out << node->id;
1905             else
1906             	out << fig_char[0];
1907             if (!node->name.empty())
1908                 out << " (" << node->name << ")";
1909             if (brtype & WT_BR_LEN && dad)
1910                 out << " " << node->findNeighbor(dad)->length;
1911             if (brtype & WT_BR_ID && dad)
1912                 out << " [" << node->findNeighbor(dad)->id << "]";
1913             if (!subtree_br.empty()) {
1914                 if (subtree_br.back() >1000)
1915                     subtree_br.back() -= 1000;
1916                 else if (subtree_br.back() < 0)
1917                     subtree_br.back() -= 1000;
1918             }
1919         } else {
1920             if (dad) {
1921                 if (abs(subtree_br.back()) > 1000) out << ' ';
1922                 else out << fig_char[0];
1923                 for (i = 0; i < abs(br_len); i++)
1924                     out << ' ';
1925             }
1926             out << fig_char[0];
1927         }
1928         //out << " ";
1929         //copy (subtree_br.begin(), subtree_br.end(), ostream_iterator<int> (out, " "));
1930         out << endl;
1931         first = false;
1932     }
1933 }
1934 
equalTopology(MTree * tree)1935 bool MTree::equalTopology(MTree *tree) {
1936 	ASSERT(root->isLeaf());
1937 	Node *root2 = tree->findLeafName(root->name);
1938 	if (!root2) return false;
1939 	ostringstream ostr, ostr2;
1940 	printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
1941 	tree->printTree(ostr2, WT_TAXON_ID | WT_SORT_TAXA, root2);
1942 	return ostr.str() == ostr2.str();
1943 }
1944 
calcDist(char * filename)1945 void MTree::calcDist(char *filename) {
1946     vector<string> taxname;
1947     int i, j;
1948 
1949     // allocate memory
1950     taxname.resize(leafNum);
1951     double *dist = new double [leafNum * leafNum];
1952     // calculate the distances
1953     calcDist(dist);
1954     // get the taxa name
1955     getTaxaName(taxname);
1956 
1957     try {
1958         ofstream out;
1959         out.exceptions(ios::failbit | ios::badbit);
1960         out.open(filename);
1961 
1962         // now write the distances in phylip .dist format
1963         out << leafNum << endl;
1964 
1965         for (i = 0; i < leafNum; i++) {
1966             out << taxname[i] << "   ";
1967             for (j = 0; j < leafNum; j++) {
1968                 out << dist[i*leafNum + j] << "  ";
1969             }
1970             out << endl;
1971         }
1972         out.close();
1973     } catch (ios::failure) {
1974         outError(ERR_WRITE_OUTPUT, filename);
1975     }
1976     delete [] dist;
1977 }
1978 
calcDist(double * & dist,Node * node,Node * dad)1979 void MTree::calcDist(double* &dist, Node *node, Node *dad) {
1980     if (!node) node = root;
1981     if (node->isLeaf()) {
1982         calcDist(node, 0.0, dist, node, NULL);
1983     }
1984     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1985     //if ((*it)->node != dad)	{
1986     FOR_NEIGHBOR_IT(node, dad, it) {
1987         calcDist(dist, (*it)->node, node);
1988     }
1989 }
1990 
calcDist(Node * aroot,double cur_len,double * & dist,Node * node,Node * dad)1991 void MTree::calcDist(Node *aroot, double cur_len, double* &dist, Node *node, Node *dad) {
1992     double branch_length;
1993 	if (!node) node = root;
1994     if (node->isLeaf()) {
1995         dist[aroot->id * leafNum + node->id] = cur_len;
1996         dist[node->id * leafNum + aroot->id] = cur_len;
1997     }
1998     //for (NeighborVec::iterator it = node->neighbors.begin(); it != node->neighbors.end(); it++)
1999     //if ((*it)->node != dad)	{
2000     FOR_NEIGHBOR_IT(node, dad, it) {
2001     	branch_length = (*it)->length;
2002         calcDist(aroot, cur_len + branch_length, dist, (*it)->node, node);
2003     }
2004 
2005 }
2006 
2007 
2008 /*********************************************
2009 	class PDTaxaSet
2010 *********************************************/
2011 
setSubTree(MTree & tree,NodeVector & subtree)2012 void PDTaxaSet::setSubTree(MTree &tree, NodeVector &subtree) {
2013     stringstream ostr;
2014     tree.printSubTree(ostr, subtree);
2015     tree_str = ostr.str();
2016 }
2017 
setTree(MTree & tree)2018 void PDTaxaSet::setTree(MTree &tree) {
2019     // assign the taxa set
2020     tree.getTaxa(*this);
2021     // assign the score
2022     score = tree.treeLength();
2023 
2024     // assign tree_str
2025     stringstream ostr;
2026     tree.printTree(ostr);
2027     tree_str = ostr.str();
2028 }
2029 
2030 
printTaxa(ostream & out)2031 void PDTaxaSet::printTaxa(ostream &out) {
2032     for (iterator it = begin(); it != end(); it++)
2033         if ((*it)->name != ROOT_NAME)
2034             out << (*it)->name << endl;
2035 }
2036 
printTaxa(char * filename)2037 void PDTaxaSet::printTaxa(char *filename) {
2038     try {
2039         ofstream out;
2040         out.exceptions(ios::failbit | ios::badbit);
2041         out.open(filename);
2042         printTaxa(out);
2043         out.close();
2044         cout << "Taxa list was printed to " << filename << endl;
2045     } catch (ios::failure) {
2046         outError(ERR_WRITE_OUTPUT, filename);
2047     }
2048 }
2049 
printTree(ostream & out)2050 void PDTaxaSet::printTree(ostream &out) {
2051     if (!tree_str.empty())
2052         out << tree_str << endl;
2053 }
2054 
printTree(char * filename)2055 void PDTaxaSet::printTree(char *filename) {
2056     try {
2057         ofstream out;
2058         out.exceptions(ios::failbit | ios::badbit);
2059         out.open(filename);
2060         printTree(out);
2061         out.close();
2062         cout << "Tree was printed to " << filename << endl;
2063     } catch (ios::failure) {
2064         outError(ERR_WRITE_OUTPUT, filename);
2065     }
2066 }
2067 
2068 
makeIDSet(int ntaxa,Split & id_set)2069 void PDTaxaSet::makeIDSet(int ntaxa, Split &id_set) {
2070     id_set.setNTaxa(ntaxa);
2071     id_set.setWeight(score);
2072     for (iterator it = begin(); it != end(); it++)
2073         id_set.addTaxon((*it)->id);
2074 }
2075 
writeInternalNodeNames(string & out_file)2076 void MTree::writeInternalNodeNames(string &out_file) {
2077     try {
2078         ofstream out(out_file.c_str());
2079         NodeVector nodes;
2080         getInternalNodes(nodes);
2081         for (NodeVector::iterator nit = nodes.begin(); nit != nodes.end(); nit++) {
2082             out  << " " << (*nit)->name;
2083         }
2084         out << endl;
2085         out.close();
2086     } catch (ios::failure) {
2087         outError(ERR_WRITE_OUTPUT, out_file);
2088     }
2089 }
2090 
assignLeafID(Node * node,Node * dad)2091 void MTree::assignLeafID(Node *node, Node *dad) {
2092     if (!node) node = root;
2093     if (node->isLeaf()) {
2094         node->id = atoi(node->name.c_str());
2095         ASSERT(node->id >= 0 && node->id < leafNum);
2096     }
2097     FOR_NEIGHBOR_IT(node, dad, it)
2098     assignLeafID((*it)->node, node);
2099 }
2100 
assignLeafNameByID(Node * node,Node * dad)2101 void MTree::assignLeafNameByID(Node *node, Node *dad) {
2102     if (!node) node = root;
2103     if (node->isLeaf()) {
2104 //        node->id = atoi(node->name.c_str());
2105 //        assert(node->id >= 0 && node->id < leafNum);
2106         node->name = convertIntToString(node->id);
2107     }
2108     FOR_NEIGHBOR_IT(node, dad, it)
2109         assignLeafNameByID((*it)->node, node);
2110 }
2111 
getTaxa(Split & taxa,Node * node,Node * dad)2112 void MTree::getTaxa(Split &taxa, Node *node, Node *dad) {
2113 	if (!node) node = root;
2114 	if (node->isLeaf()) {
2115 		taxa.addTaxon(node->id);
2116 	}
2117 	FOR_NEIGHBOR_IT(node, dad, it)
2118 		getTaxa(taxa, (*it)->node, node);
2119 }
2120 
2121 
extractQuadSubtrees(vector<Split * > & subtrees,BranchVector & branches,Node * node,Node * dad)2122 void MTree::extractQuadSubtrees(vector<Split*> &subtrees, BranchVector &branches, Node *node, Node *dad) {
2123 	if (!node) node = root;
2124 	FOR_NEIGHBOR_IT(node, dad, it) {
2125 		extractQuadSubtrees(subtrees, branches, (*it)->node, node);
2126 		if ((*it)->node->isLeaf()) continue;
2127 		// internal branch
2128 		ASSERT(node->degree() == 3 && (*it)->node->degree() == 3);
2129 		int cnt = 0;
2130 		Node *child = (*it)->node;
2131         string treestrings[4];
2132         int nodeid = 0;
2133 		FOR_NEIGHBOR_DECLARE(child, node, it2) {
2134 			Split *sp = new Split(leafNum);
2135 			getTaxa(*sp, (*it2)->node, child);
2136 			subtrees.push_back(sp);
2137 			cnt += sp->countTaxa();
2138             if (Params::getInstance().print_df1_trees) {
2139                 ostringstream ostr;
2140                 printTree(ostr, WT_BR_LEN, (*it2)->node, child);
2141                 treestrings[nodeid] = ostr.str();
2142                 treestrings[nodeid] = treestrings[nodeid].substr(0, treestrings[nodeid].length()-1);
2143                 nodeid++;
2144             }
2145 		}
2146 		FOR_NEIGHBOR(node, child, it2) {
2147 			Split *sp = new Split(leafNum);
2148 			getTaxa(*sp, (*it2)->node, node);
2149 			subtrees.push_back(sp);
2150 			cnt += sp->countTaxa();
2151             if (Params::getInstance().print_df1_trees) {
2152                 ostringstream ostr;
2153                 printTree(ostr, WT_BR_LEN, (*it2)->node, node);
2154                 treestrings[nodeid] = ostr.str();
2155                 treestrings[nodeid] = treestrings[nodeid].substr(0, treestrings[nodeid].length()-1);
2156                 nodeid++;
2157             }
2158 		}
2159 		ASSERT(cnt == leafNum);
2160         branches.push_back({node, child});
2161         if (Params::getInstance().print_df1_trees) {
2162             ASSERT(nodeid == 4);
2163             // output NNI-1 tree
2164             string treeDF1 = "(" + treestrings[0] + "," + treestrings[2] + ",(" + treestrings[1] + "," + treestrings[3] + "));";
2165             string treeDF2 = "(" + treestrings[0] + "," + treestrings[3] + ",(" + treestrings[1] + "," + treestrings[2] + "));";
2166             PUT_ATTR(child->findNeighbor(node), treeDF1);
2167             PUT_ATTR(child->findNeighbor(node), treeDF2);
2168         }
2169 	}
2170 }
2171 
2172 /*
2173 void MTree::assignBranchSupport(const char *trees_file, map<int,BranchSupportInfo> &branch_supports) {
2174 	cout << "Reading input trees file " << trees_file << endl;
2175 	try {
2176 		ifstream in;
2177         in.exceptions(ios::failbit | ios::badbit);
2178         in.open(trees_file);
2179         assignBranchSupport(in, branch_supports);
2180 		in.close();
2181 	} catch (ios::failure) {
2182 		outError(ERR_READ_INPUT, trees_file);
2183 	}
2184 }
2185 
2186 void MTree::assignBranchSupport(istream &in, map<int,BranchSupportInfo> &branch_supports) {
2187 	SplitGraph mysg;
2188 	NodeVector mynodes;
2189 	convertSplits(mysg, &mynodes, root->neighbors[0]->node);
2190 	vector<Split*> subtrees;
2191 	extractQuadSubtrees(subtrees, root->neighbors[0]->node);
2192 	IntVector decisive_counts;
2193 	decisive_counts.resize(mynodes.size(), 0);
2194 	StrVector occurence_trees; // list of tree IDs where each split occurs
2195 	if (verbose_mode >= VB_MED)
2196 		occurence_trees.resize(mynodes.size());
2197 	SplitGraph::iterator sit;
2198 	for (sit = mysg.begin(); sit != mysg.end(); sit++)
2199 		(*sit)->setWeight(0.0);
2200 	int ntrees, taxid;
2201 	for (ntrees = 1; !in.eof(); ntrees++) {
2202 		MTree tree;
2203 		bool is_rooted = false;
2204 
2205 		// read in the tree and convert into split system for indexing
2206 		tree.readTree(in, is_rooted);
2207 		if (verbose_mode >= VB_DEBUG)
2208 			cout << ntrees << " " << endl;
2209 		StrVector taxname;
2210 		tree.getTaxaName(taxname);
2211 		// create the map from taxa between 2 trees
2212 		Split taxa_mask(leafNum);
2213 		for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
2214 			taxid = mysg.findLeafName(*it);
2215 			if (taxid < 0)
2216 				outError("Taxon not found in full tree: ", *it);
2217 			taxa_mask.addTaxon(taxid);
2218 		}
2219 		// make the taxa ordering right before converting to split system
2220 		taxname.clear();
2221 		int smallid;
2222 		for (taxid = 0, smallid = 0; taxid < leafNum; taxid++)
2223 			if (taxa_mask.containTaxon(taxid)) {
2224 				taxname.push_back(mysg.getTaxa()->GetTaxonLabel(taxid));
2225 				string name = (string)mysg.getTaxa()->GetTaxonLabel(taxid);
2226 				tree.findLeafName(name)->id = smallid++;
2227 			}
2228 		ASSERT(taxname.size() == tree.leafNum);
2229 
2230 		SplitGraph sg;
2231 		//NodeVector nodes;
2232 		tree.convertSplits(sg);
2233 		SplitIntMap hash_ss;
2234 		for (sit = sg.begin(); sit != sg.end(); sit++)
2235 			hash_ss.insertSplit((*sit), 1);
2236 
2237 		// now scan through all splits in current tree
2238 		int id, qid;
2239 		for (sit = mysg.begin(), id = 0, qid = 0; sit != mysg.end(); sit++, id++)
2240 		if ((*sit)->trivial() < 0) // it is an internal split
2241 		{
2242 
2243 			bool decisive = true;
2244 			for (int i = 0; i < 4; i++) {
2245 				if (!taxa_mask.overlap(*subtrees[qid+i])) {
2246 					decisive = false;
2247 					break;
2248 				}
2249 			}
2250 			qid += 4;
2251 			if (!decisive) continue;
2252 
2253 			decisive_counts[id]++;
2254 			Split *subsp = (*sit)->extractSubSplit(taxa_mask);
2255 			if (subsp->shouldInvert())
2256 				subsp->invert();
2257 			Split *sp = hash_ss.findSplit(subsp);
2258 			if (sp && sp->trivial() < 0) {
2259 				(*sit)->setWeight((*sit)->getWeight()+1.0);
2260 				if (verbose_mode >= VB_MED)
2261 					occurence_trees[id] += convertIntToString(ntrees) + " ";
2262 				if (verbose_mode >= VB_MAX) {
2263 					for (taxid = 0; taxid < (*sit)->getNTaxa(); taxid++)
2264 						if ((*sit)->containTaxon(taxid))
2265 							cout << " " << mysg.getTaxa()->GetTaxonLabel(taxid);
2266 					cout << " --> ";
2267 					for (taxid = 0; taxid < sp->getNTaxa(); taxid++)
2268 						if (sp->containTaxon(taxid))
2269 							cout << " " << taxname[taxid];
2270 					cout << endl;
2271 				}
2272 			}
2273 			delete subsp;
2274 		}
2275 
2276 		char ch;
2277 		in.exceptions(ios::goodbit);
2278 		(in) >> ch;
2279 		if (in.eof()) break;
2280 		in.unget();
2281 		in.exceptions(ios::failbit | ios::badbit);
2282 
2283 	}
2284 
2285 	cout << ntrees << " trees read" << endl;
2286 
2287 	for (int i = 0; i < mysg.size(); i++)
2288 	if (!mynodes[i]->isLeaf())
2289 	{
2290         BranchSupportInfo brsup;
2291         brsup.id = mynodes[i]->id;
2292         brsup.name = mynodes[i]->name;
2293         brsup.geneCF = mysg[i]->getWeight()/decisive_counts[i];
2294         brsup.geneN = decisive_counts[i];
2295         branch_supports[brsup.id] = brsup;
2296 
2297 		stringstream tmp;
2298 		if (mysg[i]->getWeight() == 0.0)
2299 			tmp << "0";
2300 		else
2301 			tmp << round((mysg[i]->getWeight()/decisive_counts[i])*1000)/10;
2302 		if (verbose_mode >= VB_MED)
2303 			tmp << "%" << decisive_counts[i];
2304 
2305         if (Params::getInstance().newick_extended_format) {
2306             if (mynodes[i]->name.empty() || mynodes[i]->name.back() != ']')
2307                 mynodes[i]->name += "[&CF=" + tmp.str() + "]";
2308             else
2309                 mynodes[i]->name = mynodes[i]->name.substr(0, mynodes[i]->name.length()-1) + ",!CF=" + tmp.str() + "]";
2310         } else {
2311             if (!mynodes[i]->name.empty())
2312                 mynodes[i]->name.append("/");
2313             mynodes[i]->name.append(tmp.str());
2314         }
2315 		if (verbose_mode >= VB_MED) {
2316 			cout << mynodes[i]->name << " " << occurence_trees[i] << endl;
2317 		}
2318 	}
2319 	for (vector<Split*>::reverse_iterator it = subtrees.rbegin(); it != subtrees.rend(); it++)
2320 		delete (*it);
2321 }
2322 */
2323 
computeRFDist(const char * trees_file,DoubleVector & dist,int assign_sup)2324 void MTree::computeRFDist(const char *trees_file, DoubleVector &dist, int assign_sup) {
2325 	cout << "Reading input trees file " << trees_file << endl;
2326 	try {
2327 		ifstream in;
2328         in.exceptions(ios::failbit | ios::badbit);
2329         in.open(trees_file);
2330         computeRFDist(in, dist, assign_sup);
2331 		in.close();
2332 	} catch (ios::failure) {
2333 		outError(ERR_READ_INPUT, trees_file);
2334 	}
2335 }
2336 
computeRFDist(istream & in,DoubleVector & dist,int assign_sup,bool one_tree)2337 void MTree::computeRFDist(istream &in, DoubleVector &dist, int assign_sup, bool one_tree) {
2338 	SplitGraph mysg;
2339     NodeVector nodes;
2340 	convertSplits(mysg, &nodes, root->neighbors[0]->node);
2341     StringIntMap name_index;
2342     int ntrees, taxid;
2343     for (taxid = 0; taxid < mysg.getNTaxa(); taxid++)
2344         name_index[mysg.getTaxa()->GetTaxonLabel(taxid)] = taxid;
2345     NodeVector::iterator nit;
2346     if (assign_sup)
2347         for (nit = nodes.begin(); nit != nodes.end(); nit++)
2348             (*nit)->height = 0.0;
2349 
2350 	SplitGraph::iterator sit;
2351 	for (sit = mysg.begin(); sit != mysg.end(); sit++)
2352 		(*sit)->setWeight(0.0);
2353 	for (ntrees = 1; !in.eof(); ntrees++) {
2354 		MTree tree;
2355 		bool is_rooted = false;
2356 
2357 		// read in the tree and convert into split system for indexing
2358 		tree.readTree(in, is_rooted);
2359 		if (verbose_mode >= VB_DEBUG)
2360 			cout << ntrees << " " << endl;
2361 		StrVector taxname;
2362 		tree.getTaxaName(taxname);
2363 		// create the map from taxa between 2 trees
2364 		Split taxa_mask(leafNum);
2365 		for (StrVector::iterator it = taxname.begin(); it != taxname.end(); it++) {
2366             if (name_index.find(*it) == name_index.end())
2367                 outError("Taxon not found in full tree: ", *it);
2368 			taxid = name_index[*it];
2369 			taxa_mask.addTaxon(taxid);
2370 		}
2371 		// make the taxa ordering right before converting to split system
2372 		taxname.clear();
2373 		int smallid;
2374 		for (taxid = 0, smallid = 0; taxid < leafNum; taxid++)
2375 			if (taxa_mask.containTaxon(taxid)) {
2376 				taxname.push_back(mysg.getTaxa()->GetTaxonLabel(taxid));
2377 				string name = (string)mysg.getTaxa()->GetTaxonLabel(taxid);
2378 				tree.findLeafName(name)->id = smallid++;
2379 			}
2380 		ASSERT(taxname.size() == tree.leafNum);
2381 
2382 		SplitGraph sg;
2383 		//NodeVector nodes;
2384 		tree.convertSplits(sg);
2385 		SplitIntMap hash_ss;
2386 		for (sit = sg.begin(); sit != sg.end(); sit++)
2387 			hash_ss.insertSplit((*sit), 1);
2388 
2389 		// now scan through all splits in current tree
2390 		int common_splits = 0;
2391 		for (sit = mysg.begin(); sit != mysg.end(); sit++)
2392 		if ((*sit)->trivial() < 0) // it is an internal split
2393 		{
2394 
2395 			Split *subsp = (*sit)->extractSubSplit(taxa_mask);
2396 			if (subsp->shouldInvert())
2397 				subsp->invert();
2398 			Split *sp = hash_ss.findSplit(subsp);
2399 			if (sp) {
2400 				common_splits++;
2401 				//(*sit)->setWeight((*sit)->getWeight()+1.0);
2402 				if (verbose_mode >= VB_MAX) {
2403 					for (taxid = 0; taxid < (*sit)->getNTaxa(); taxid++)
2404 						if ((*sit)->containTaxon(taxid))
2405 							cout << " " << mysg.getTaxa()->GetTaxonLabel(taxid);
2406 					cout << " --> ";
2407 					for (taxid = 0; taxid < sp->getNTaxa(); taxid++)
2408 						if (sp->containTaxon(taxid))
2409 							cout << " " << taxname[taxid];
2410 					cout << endl;
2411 				}
2412                 if (assign_sup && subsp->trivial() < 0)
2413                     nodes[sit-mysg.begin()]->height++;
2414 			}
2415 			delete subsp;
2416 		}
2417 
2418 		//cout << "common_splits = " << common_splits << endl;
2419         double max_dist = branchNum-leafNum + tree.branchNum-tree.leafNum;
2420         double rf_val = max_dist - 2*common_splits;
2421         if (Params::getInstance().normalize_tree_dist) {
2422             rf_val = rf_val / max_dist;
2423         }
2424 		dist.push_back(rf_val);
2425 		char ch;
2426 		in.exceptions(ios::goodbit);
2427 		(in) >> ch;
2428 		if (in.eof()) break;
2429 		in.unget();
2430 		in.exceptions(ios::failbit | ios::badbit);
2431 
2432         if (one_tree)
2433             break;
2434 
2435 	}
2436     if (assign_sup)
2437         for (nit = nodes.begin(); nit != nodes.end(); nit++)
2438             if (!(*nit)->isLeaf())
2439                 (*nit)->name = convertIntToString((*nit)->height);
2440 
2441 //	cout << ntrees << " trees read" << endl;
2442 
2443 
2444 }
2445 
reportDisagreedTrees(vector<string> & taxname,MTreeSet & trees,Split & mysplit)2446 void MTree::reportDisagreedTrees(vector<string> &taxname, MTreeSet &trees, Split &mysplit) {
2447 	for (MTreeSet::iterator it = trees.begin(); it != trees.end(); it++) {
2448 		MTree *tree = (*it);
2449 		SplitGraph sg;
2450 		tree->convertSplits(taxname, sg);
2451 		if (!sg.containSplit(mysplit)) {
2452 			tree->printTree(cout, 0); // don't print branch lengths
2453 			cout << endl;
2454 		}
2455 	}
2456 }
2457 
2458 
createBootstrapSupport(vector<string> & taxname,MTreeSet & trees,SplitIntMap & hash_ss,char * tag,Node * node,Node * dad)2459 void MTree::createBootstrapSupport(vector<string> &taxname, MTreeSet &trees, SplitIntMap &hash_ss,
2460     char *tag, Node *node, Node *dad) {
2461 	if (!node) node = root;
2462 	FOR_NEIGHBOR_IT(node, dad, it) {
2463 		if (!node->isLeaf() && !(*it)->node->isLeaf()) {
2464 			vector<int> taxa;
2465 			getTaxaID(taxa, (*it)->node, node);
2466 			Split mysplit(leafNum, 0.0, taxa);
2467 			if (mysplit.shouldInvert())
2468 				mysplit.invert();
2469 			//mysplit.report(cout);
2470 			//SplitIntMap::iterator ass_it = hash_ss.find(&mysplit);
2471 			Split *sp = hash_ss.findSplit(&mysplit);
2472 			// if found smt
2473 			if (sp != NULL) {
2474 				//Split *sp = ass_it->first;
2475 				/*char tmp[100];
2476 				if ((*it)->node->name.empty()) {
2477 					sprintf(tmp, "%d", round(sp->getWeight()));
2478 				} else
2479 					sprintf(tmp, "/%d", round(sp->getWeight()));*/
2480 				stringstream tmp;
2481 				if ((*it)->node->name.empty())
2482 				  tmp << sp->getWeight();
2483 				else
2484 				  tmp << "/" << sp->getWeight();
2485 
2486                 // assign tag
2487                 if (tag && (iEquals(tag, "ALL") || (*it)->node->name == tag))
2488                     tmp << sp->getName();
2489 				(*it)->node->name.append(tmp.str());
2490 			} else {
2491 				if (!(*it)->node->name.empty()) (*it)->node->name.append("/");
2492 				(*it)->node->name.append("0");
2493 				if (verbose_mode >= VB_MED) {
2494 					cout << "split not found:" << endl;
2495 					mysplit.report(cout);
2496 				}
2497 			}
2498 			/* new stuff: report trees that do not contain the split */
2499 			if (strncmp((*it)->node->name.c_str(), "INFO", 4) == 0) {
2500 				cout << "Reporting trees not containing the split " << (*it)->node->name << endl;
2501 				reportDisagreedTrees(taxname, trees, mysplit);
2502 			}
2503 		}
2504 		createBootstrapSupport(taxname, trees, hash_ss, tag, (*it)->node, node);
2505 	}
2506 }
2507 
removeNode(Node * dad,Node * node)2508 void MTree::removeNode(Node *dad, Node *node) {
2509 //    Node *child = (*it)->node;
2510     bool first = true;
2511     FOR_NEIGHBOR_IT(node, dad, it2) {
2512         if (first)
2513             dad->updateNeighbor(node, (*it2)->node, (*it2)->length);
2514         else
2515             dad->addNeighbor((*it2)->node, (*it2)->length);
2516         (*it2)->node->updateNeighbor(node, dad);
2517         first = false;
2518     }
2519     delete node;
2520 
2521 //    Node *child = (*it)->node;
2522 //    bool first = true;
2523 //    FOR_NEIGHBOR_IT(child, node, it2) {
2524 //        if (first)
2525 //            node->updateNeighbor(child, (*it2)->node, (*it2)->length);
2526 //        else
2527 //            node->addNeighbor((*it2)->node, (*it2)->length);
2528 //        (*it2)->node->updateNeighbor(child, node);
2529 //        first = false;
2530 //    }
2531 //    delete child;
2532 }
2533 
2534 
collapseZeroBranches(Node * node,Node * dad,double threshold)2535 int MTree::collapseZeroBranches(Node *node, Node *dad, double threshold) {
2536 	if (!node) node = root;
2537     int count = 0;
2538 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
2539 		count += collapseZeroBranches((*it)->node, node, threshold);
2540 	}
2541 	NeighborVec nei_vec;
2542 	nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
2543 	for (it = nei_vec.begin(); it != nei_vec.end(); it++)
2544 	if ((*it)->node != dad && (*it)->length <= threshold) {
2545 		// delete the child node
2546         removeNode(node, (*it)->node);
2547         count++;
2548 	}
2549     return count;
2550 }
2551 
collapseInternalBranches(Node * node,Node * dad,double threshold)2552 int MTree::collapseInternalBranches(Node *node, Node *dad, double threshold) {
2553 	if (!node) node = root;
2554     int count = 0;
2555 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
2556 		count += collapseInternalBranches((*it)->node, node, threshold);
2557 	}
2558     if (node->isLeaf())
2559         return count;
2560 	NeighborVec nei_vec;
2561 	nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
2562 	for (it = nei_vec.begin(); it != nei_vec.end(); it++)
2563 	if ((*it)->node != dad && !(*it)->node->isLeaf() && (*it)->length <= threshold) {
2564 		// delete the child node
2565         removeNode(node, (*it)->node);
2566         count++;
2567 	}
2568     return count;
2569 }
2570 
insertTaxa(StrVector & new_taxa,StrVector & existing_taxa)2571 void MTree::insertTaxa(StrVector &new_taxa, StrVector &existing_taxa) {
2572 	if (new_taxa.empty()) return;
2573 	IntVector id;
2574 	int i;
2575 	id.resize(new_taxa.size());
2576 	for (i = 0; i < id.size(); i++)
2577 		id[i] = i;
2578 	// randomize order before reinsert back into tree
2579 	my_random_shuffle(id.begin(), id.end());
2580 
2581 	for (int i = 0; i < new_taxa.size(); i++) {
2582 		Node *old_taxon = findLeafName(existing_taxa[id[i]]);
2583 		ASSERT(old_taxon);
2584 		double len = old_taxon->neighbors[0]->length;
2585 		Node *old_node = old_taxon->neighbors[0]->node;
2586 		Node *new_taxon = newNode(leafNum+i, new_taxa[id[i]].c_str());
2587 		Node *new_node = newNode();
2588 		// link new_taxon - new_node
2589 		new_taxon->addNeighbor(new_node, 0.0);
2590 		new_node->addNeighbor(new_taxon, 0.0);
2591 		// link old_taxon - new_node
2592 		new_node->addNeighbor(old_taxon, 0.0);
2593 		old_taxon->updateNeighbor(old_node, new_node, 0.0);
2594 		// link old_node - new_node
2595 		new_node->addNeighbor(old_node, len);
2596 		old_node->updateNeighbor(old_taxon, new_node, len);
2597 	}
2598 
2599     leafNum = leafNum + new_taxa.size();
2600     initializeTree();
2601 }
2602 
findFirstTaxon(Node * node,Node * dad)2603 Node *MTree::findFirstTaxon(Node *node, Node *dad) {
2604 	if (!node) node = root;
2605 //	Node *next;
2606 	for (int i = 0; i < nodeNum; i++)
2607 		FOR_NEIGHBOR_IT(node, dad, it) {
2608 			if ((*it)->node->isLeaf()) return (*it)->node;
2609 			dad = node;
2610 			node = (*it)->node;
2611             break;
2612 		}
2613 	return NULL;
2614 }
2615 
removeTaxa(StrVector & taxa_names)2616 int MTree::removeTaxa(StrVector &taxa_names) {
2617 	if (taxa_names.empty())
2618         return 0;
2619 	int count = 0;
2620 	for (StrVector::iterator sit = taxa_names.begin(); sit != taxa_names.end(); sit++) {
2621 		Node *node = findLeafName(*sit);
2622 		if (!node) continue;
2623 		count++;
2624 //		if (!node)
2625 //			outError((string)"Taxon " + (*sit) + " does not appear in the tree");
2626 		if (node == root)
2627 		{	// find another root
2628 			root = findFirstTaxon(root);
2629 		}
2630 
2631 		Node *innode = node->neighbors[0]->node;
2632 		Node *othernodes[2] = { NULL, NULL };
2633 		int i;
2634 		double length = 0;
2635 
2636 		bool should_merge = true;
2637 
2638 		FOR_NEIGHBOR_DECLARE(innode, node, it)	{
2639 			length += (*it)->length;
2640 			if (othernodes[0] == NULL)
2641 				othernodes[0] = (*it)->node;
2642 			else if (othernodes[1] == NULL)
2643 				othernodes[1] = (*it)->node;
2644 			else
2645 				should_merge = false;
2646 		}
2647 
2648 		if (should_merge)
2649 		{
2650 			// merge two branches
2651 			for (i = 0; i < 2; i++)
2652 				for (it = othernodes[i]->neighbors.begin(); it != othernodes[i]->neighbors.end(); it++)
2653 					if ((*it)->node == innode)
2654 					{
2655 						(*it)->node = othernodes[1-i];
2656 						(*it)->length = length;
2657 					}
2658             delete innode;
2659 		} else {
2660 			// simple delete the neighbor of innode
2661 			for (it = innode->neighbors.begin(); it != innode->neighbors.end(); it++)
2662 				if ((*it)->node == node) {
2663 					innode->neighbors.erase(it);
2664 					break;
2665 				}
2666 		}
2667 		delete node;
2668 	}
2669 
2670 	if (!count) return 0;
2671 
2672 	NodeVector taxa;
2673 	getTaxa(taxa);
2674 	ASSERT(taxa.size() > 0);
2675 	// reassign taxon IDs
2676 	int id = 0;
2677 	for (NodeVector::iterator nit = taxa.begin(); nit != taxa.end(); nit++)
2678         if (*nit == root && rooted)
2679             (*nit)->id = taxa.size()-1;
2680         else
2681             (*nit)->id = id++;
2682 	leafNum = taxa.size();
2683 	initializeTree();
2684     return count;
2685 }
2686 
getSplits(SplitGraph & splits,Node * node,Node * dad)2687 void MTree::getSplits(SplitGraph &splits, Node* node, Node* dad) {
2688    if (!node) {
2689        node = root;
2690    }
2691    FOR_NEIGHBOR_IT(node, dad, it) {
2692            getSplits(splits, (*it)->node, node);
2693            Split* mySplit = new Split(*((*it)->split));
2694            if (mySplit->shouldInvert())
2695                mySplit->invert();
2696            splits.push_back(mySplit);
2697        }
2698 }
2699 
buildNodeSplit(Split * resp,Node * node,Node * dad)2700 void MTree::buildNodeSplit(Split *resp, Node *node, Node *dad) {
2701     if (!node) {
2702         node = root;
2703         // The neighbor that represents root
2704         Neighbor* rootNei = root->neighbors[0]->node->findNeighbor(root);
2705         if (rootNei->split == NULL) {
2706             rootNei->split = new Split(leafNum);
2707         } else {
2708             delete rootNei->split;
2709             rootNei->split = new Split(leafNum);
2710         }
2711         resp = rootNei->split;
2712     }
2713     bool has_child = false;
2714     FOR_NEIGHBOR_IT(node, dad, it) {
2715             if ((*it)->split == NULL) {
2716                 (*it)->split = new Split(leafNum);
2717             } else {
2718                 delete (*it)->split;
2719                 (*it)->split = new Split(leafNum);
2720             }
2721             buildNodeSplit((*it)->split, (*it)->node, node);
2722             //(*it)->split->report(cout);
2723             *resp += *((*it)->split);
2724             has_child = true;
2725         }
2726 
2727     if (dad != NULL) {
2728         Neighbor* dadNei = node->findNeighbor(dad);
2729         dadNei->split = new Split(*resp);
2730         dadNei->split->invert();
2731     }
2732 
2733     if (!has_child) {
2734         resp->addTaxon(node->id);
2735     }
2736 }
2737 
initializeSplitMap(Split * resp,Node * node,Node * dad)2738 void MTree::initializeSplitMap(Split *resp, Node *node, Node *dad) {
2739     if (!node) node = root;
2740     if (!resp) {
2741         resp = new Split(leafNum);
2742     }
2743     bool has_child = false;
2744     FOR_NEIGHBOR_IT(node, dad, it) {
2745             Split *sp = new Split(leafNum);
2746             initializeSplitMap(sp, (*it)->node, node);
2747             *resp += *sp;
2748             if (sp->shouldInvert())
2749                 sp->invert();
2750             /* ignore nodes with degree of 2 because such split will be added before */
2751             if (node->degree() != 2) {
2752                 Branch curBranch((*it)->node, node);
2753                 splitBranchMap.insert(make_pair(sp, curBranch));
2754             }
2755             has_child = true;
2756         }
2757     if (!has_child) {
2758         resp->addTaxon(node->id);
2759     }
2760 }
2761 
findFarthestLeaf(Node * node,Node * dad)2762 Node *MTree::findFarthestLeaf(Node *node, Node *dad) {
2763     if (rooted) // special treatment for rooted tree
2764         return root;
2765     if (!node)
2766         node = root;
2767 
2768     if (dad && node->isLeaf()) {
2769         node->height = 0.0;
2770         return node;
2771     }
2772     Node *res = NULL;
2773     node->height = 0.0;
2774     FOR_NEIGHBOR_IT(node, dad, it) {
2775         Node *leaf = findFarthestLeaf((*it)->node, node);
2776         if (node->height < (*it)->node->height+1) {
2777             node->height = (*it)->node->height+1;
2778             res = leaf;
2779         }
2780     }
2781     return res;
2782 }
2783 
getPreOrderBranches(NodeVector & nodes,NodeVector & nodes2,Node * node,Node * dad)2784 void MTree::getPreOrderBranches(NodeVector &nodes, NodeVector &nodes2, Node *node, Node *dad) {
2785     if (dad) {
2786         nodes.push_back(node);
2787         nodes2.push_back(dad);
2788     }
2789 
2790     NeighborVec neivec = node->neighbors;
2791     NeighborVec::iterator i1, i2;
2792     for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
2793         for (i2 = i1+1; i2 != neivec.end(); i2++)
2794             if ((*i1)->node->height > (*i2)->node->height) {
2795                 Neighbor *nei = *i1;
2796                 *i1 = *i2;
2797                 *i2 = nei;
2798             }
2799     for (i1 = neivec.begin(); i1 != neivec.end(); i1++)
2800         if ((*i1)->node != dad)
2801             getPreOrderBranches(nodes, nodes2, (*i1)->node, node);
2802 //    FOR_NEIGHBOR_IT(node, dad, it)
2803 //        getPreOrderBranches(nodes, nodes2, (*it)->node, node);
2804 }
2805