1 /***************************************************************************
2  *   Copyright (C) 2009 by BUI Quang Minh   *
3  *   minh.bui@univie.ac.at   *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  *   This program is distributed in the hope that it will be useful,       *
11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
13  *   GNU General Public License for more details.                          *
14  *                                                                         *
15  *   You should have received a copy of the GNU General Public License     *
16  *   along with this program; if not, write to the                         *
17  *   Free Software Foundation, Inc.,                                       *
18  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
19  ***************************************************************************/
20 #include "phylosupertree.h"
21 #include "alignment/superalignment.h"
22 #include "alignment/superalignmentpairwise.h"
23 #include "main/phylotesting.h"
24 #include "model/partitionmodel.h"
25 #include "utils/MPIHelper.h"
26 
PhyloSuperTree()27 PhyloSuperTree::PhyloSuperTree()
28  : IQTree()
29 {
30 	totalNNIs = evalNNIs = 0;
31     rescale_codon_brlen = false;
32 	// Initialize the counter for evaluated NNIs on subtrees. FOR THIS CASE IT WON'T BE initialized.
33 }
34 
PhyloSuperTree(SuperAlignment * alignment,bool new_iqtree)35 PhyloSuperTree::PhyloSuperTree(SuperAlignment *alignment, bool new_iqtree) :  IQTree(alignment) {
36     totalNNIs = evalNNIs = 0;
37 
38     rescale_codon_brlen = false;
39     bool has_codon = false;
40     vector<Alignment*>::iterator it;
41     for (it = alignment->partitions.begin(); it != alignment->partitions.end(); it++)
42         if ((*it)->seq_type != SEQ_CODON) {
43             rescale_codon_brlen = true;
44         } else
45             has_codon = true;
46 
47     rescale_codon_brlen &= has_codon;
48 
49     // Initialize the counter for evaluated NNIs on subtrees
50     int part = 0;
51 
52     StrVector model_names;
53     for (it = alignment->partitions.begin(); it != alignment->partitions.end(); it++, part++) {
54         PhyloTree *tree;
55         if (new_iqtree)
56             tree = new IQTree(*it);
57         else
58             tree = new PhyloTree(*it);
59         push_back(tree);
60         PartitionInfo info;
61         info.cur_ptnlh = NULL;
62         info.nniMoves[0].ptnlh = NULL;
63         info.nniMoves[1].ptnlh = NULL;
64         info.evalNNIs = 0.0;
65         part_info.push_back(info);
66     }
67 
68     aln = alignment;
69 
70 }
71 
PhyloSuperTree(SuperAlignment * alignment,PhyloSuperTree * super_tree)72 PhyloSuperTree::PhyloSuperTree(SuperAlignment *alignment, PhyloSuperTree *super_tree) :  IQTree(alignment) {
73 	totalNNIs = evalNNIs = 0;
74     rescale_codon_brlen = super_tree->rescale_codon_brlen;
75 	part_info = super_tree->part_info;
76 	for (vector<Alignment*>::iterator it = alignment->partitions.begin(); it != alignment->partitions.end(); it++) {
77 		PhyloTree *tree = new PhyloTree((*it));
78 		push_back(tree);
79 	}
80 	// Initialize the counter for evaluated NNIs on subtrees
81 	int part = 0;
82 	for (iterator it = begin(); it != end(); it++, part++) {
83 		part_info[part].evalNNIs = 0.0;
84 	}
85 
86 	aln = alignment;
87 }
88 
setModelFactory(ModelFactory * model_fac)89 void PhyloSuperTree::setModelFactory(ModelFactory *model_fac) {
90     PhyloTree::setModelFactory(model_fac);
91     if (model_fac) {
92         PhyloSuperTree *tree = (PhyloSuperTree*)model_fac->site_rate->phylo_tree;
93         for (int part = 0; part != size(); part++) {
94             at(part)->setModelFactory(tree->at(part)->getModelFactory());
95         }
96     } else {
97         for (int part = 0; part != size(); part++) {
98             at(part)->setModelFactory(NULL);
99         }
100     }
101 }
102 
setPartInfo(PhyloSuperTree * tree)103 void PhyloSuperTree::setPartInfo(PhyloSuperTree *tree) {
104     part_info = tree->part_info;
105     int part = 0;
106     for (iterator it = begin(); it != end(); it++, part++) {
107         part_info[part].evalNNIs = 0.0;
108     }
109     if (!params->bootstrap_spec) {
110         return;
111     }
112     // 2018-06-03: for -bsam GENE, number of partitions might be reduced
113     if (part_info.size() <= size())
114         return;
115     part_info.erase(part_info.begin()+size(), part_info.end());
116     for (part = 0; part < part_info.size(); part++) {
117         bool found = false;
118         for (int p = 0; p < tree->size(); p++)
119             if (tree->at(p)->aln->name == at(part)->aln->name) {
120                 part_info[part] = tree->part_info[p];
121                 part_info[part].evalNNIs = 0.0;
122                 found = true;
123                 break;
124             }
125         ASSERT(found);
126     }
127 }
128 
129 
setSuperAlignment(Alignment * alignment)130 void PhyloSuperTree::setSuperAlignment(Alignment *alignment) {
131     PhyloTree::setAlignment(alignment);
132 
133     SuperAlignment *saln = (SuperAlignment*)aln;
134     for (int i = 0; i < size(); i++)
135         at(i)->setAlignment(saln->partitions.at(i));
136 }
137 
setCheckpoint(Checkpoint * checkpoint)138 void PhyloSuperTree::setCheckpoint(Checkpoint *checkpoint) {
139 	IQTree::setCheckpoint(checkpoint);
140 	for (iterator it = begin(); it != end(); it++)
141 		(*it)->setCheckpoint(checkpoint);
142 }
143 
saveCheckpoint()144 void PhyloSuperTree::saveCheckpoint() {
145 //    checkpoint->startStruct("PhyloSuperTree");
146 //    int part = 0;
147 //    for (iterator it = begin(); it != end(); it++, part++) {
148 //    	string key = part_info[part].name + ".tree";
149 //    	checkpoint->put(key, (*it)->getTreeString());
150 //    }
151 //    checkpoint->endStruct();
152     IQTree::saveCheckpoint();
153 }
154 
restoreCheckpoint()155 void PhyloSuperTree::restoreCheckpoint() {
156     IQTree::restoreCheckpoint();
157 
158     // first get the newick string of super tree
159 //    checkpoint->startStruct("PhyloTree");
160 //    string newick;
161 //    CKP_RESTORE(newick);
162 //    checkpoint->endStruct();
163 //
164 //    if (newick.empty()) return;
165 //
166 //    // now get partition tree strings
167 //    checkpoint->startStruct("PhyloSuperTree");
168 //    int part = 0;
169 //    for (iterator it = begin(); it != end(); it++, part++) {
170 //    	string key = part_info[part].name + ".tree";
171 //    	string part_tree;
172 //    	if (!checkpoint->get(key, part_tree))
173 //    		outError("No tree for partition " + part_info[part].name + " found from checkpoint");
174 //    	newick += part_tree;
175 //    }
176 //
177 //    checkpoint->endStruct();
178 //
179 //    readTreeString(newick);
180 
181 }
182 
setParams(Params * params)183 void PhyloSuperTree::setParams(Params* params) {
184 	IQTree::setParams(params);
185 	for (iterator it = begin(); it != end(); it++) {
186 		(*it)->setParams(params);
187 	}
188 }
189 
initSettings(Params & params)190 void PhyloSuperTree::initSettings(Params &params) {
191 	IQTree::initSettings(params);
192     setLikelihoodKernel(params.SSE);
193     setNumThreads(params.num_threads);
194 	for (iterator it = begin(); it != end(); it++) {
195 		(*it)->params = &params;
196 		(*it)->optimize_by_newton = params.optimize_by_newton;
197 	}
198 
199 }
200 
setLikelihoodKernel(LikelihoodKernel lk)201 void PhyloSuperTree::setLikelihoodKernel(LikelihoodKernel lk) {
202     PhyloTree::setLikelihoodKernel(lk);
203     for (iterator it = begin(); it != end(); it++)
204         (*it)->setLikelihoodKernel(lk);
205 }
206 
setParsimonyKernel(LikelihoodKernel lk)207 void PhyloSuperTree::setParsimonyKernel(LikelihoodKernel lk) {
208     PhyloTree::setParsimonyKernel(lk);
209     for (iterator it = begin(); it != end(); it++)
210         (*it)->setParsimonyKernel(lk);
211 }
212 
changeLikelihoodKernel(LikelihoodKernel lk)213 void PhyloSuperTree::changeLikelihoodKernel(LikelihoodKernel lk) {
214 	PhyloTree::changeLikelihoodKernel(lk);
215 }
216 
setNumThreads(int num_threads)217 void PhyloSuperTree::setNumThreads(int num_threads) {
218     PhyloTree::setNumThreads((size() >= num_threads) ? num_threads : 1);
219     for (iterator it = begin(); it != end(); it++)
220         (*it)->setNumThreads((size() >= num_threads) ? 1 : num_threads);
221 }
222 
printResultTree(string suffix)223 void PhyloSuperTree::printResultTree(string suffix) {
224     if (MPIHelper::getInstance().isWorker()) {
225         return;
226     }
227     if (params->suppress_output_flags & OUT_TREEFILE)
228         return;
229 
230     IQTree::printResultTree(suffix);
231 
232     string tree_file_name = params->out_prefix;
233     tree_file_name += ".parttrees";
234     if (suffix.compare("") != 0) {
235         tree_file_name += "." + suffix;
236     }
237     try {
238         ofstream out;
239         out.exceptions(ios::failbit | ios::badbit);
240         out.open(tree_file_name);
241         for (iterator it = begin(); it != end(); it++)
242             (*it)->printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
243         out.close();
244     } catch (ios::failure) {
245         outError(ERR_WRITE_OUTPUT, tree_file_name);
246     }
247     if (verbose_mode >= VB_MED)
248         cout << "Partition trees printed to " << tree_file_name << endl;
249 }
250 
getTreeString()251 string PhyloSuperTree::getTreeString() {
252 	stringstream tree_stream;
253 	printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
254 	for (iterator it = begin(); it != end(); it++)
255 		(*it)->printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
256 	return tree_stream.str();
257 }
258 
readTreeString(const string & tree_string)259 void PhyloSuperTree::readTreeString(const string &tree_string) {
260 	stringstream str;
261 	str << tree_string;
262 	str.seekg(0, ios::beg);
263 	freeNode();
264     // bug fix 2017-11-30: in case taxon name happens to be ID
265 	MTree::readTree(str, rooted);
266     assignLeafNames();
267 //	setAlignment(aln);
268 	setRootNode(params->root);
269 	for (iterator it = begin(); it != end(); it++) {
270 		(*it)->freeNode();
271 		(*it)->readTree(str, (*it)->rooted);
272         (*it)->assignLeafNames();
273 //		(*it)->setAlignment((*it)->aln);
274 	}
275 	linkTrees();
276 //	if (isSuperTree()) {
277 //		((PhyloSuperTree*) this)->mapTrees();
278 //	}
279 	if (params->pll) {
280 		ASSERT(0);
281 		pllReadNewick(getTreeString());
282 	}
283 	resetCurScore();
284 
285 }
286 
287 
288 /**
289  * save branch lengths into a vector
290  */
saveBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)291 void PhyloSuperTree::saveBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
292     ASSERT(getMixlen() == 1); // supertree and treemixlen not allowed together
293 	int totalBranchNum = branchNum * getMixlen();
294 	iterator it;
295 	for (it = begin(); it != end(); it++) {
296 		totalBranchNum += (*it)->branchNum * (*it)->getMixlen();
297 	}
298 	lenvec.resize(startid + totalBranchNum);
299 
300 	PhyloTree::saveBranchLengths(lenvec, startid);
301 	startid += branchNum * getMixlen();
302 	for (iterator it = begin(); it != end(); it++) {
303 		(*it)->saveBranchLengths(lenvec, startid);
304 		startid += (*it)->branchNum * (*it)->getMixlen();
305 	}
306 }
307 /**
308  * restore branch lengths from a vector previously called with saveBranchLengths
309  */
restoreBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)310 void PhyloSuperTree::restoreBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
311 	PhyloTree::restoreBranchLengths(lenvec, startid);
312 	startid += branchNum * getMixlen();
313 	for (iterator it = begin(); it != end(); it++) {
314 		(*it)->restoreBranchLengths(lenvec, startid);
315 		startid += (*it)->branchNum * (*it)->getMixlen();
316 	}
317 }
318 
collapseInternalBranches(Node * node,Node * dad,double threshold)319 int PhyloSuperTree::collapseInternalBranches(Node *node, Node *dad, double threshold) {
320 	if (!node) node = root;
321     int count = 0;
322 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
323 		count += collapseInternalBranches((*it)->node, node, threshold);
324 	}
325     if (node->isLeaf())
326         return count;
327 	NeighborVec nei_vec;
328 	nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
329 	for (it = nei_vec.begin(); it != nei_vec.end(); it++)
330 	if ((*it)->node != dad && !(*it)->node->isLeaf() && (*it)->length <= threshold) {
331 
332         //delete branch of gene-trees
333         SuperNeighbor *snei = (SuperNeighbor*)(*it);
334         int part = 0;
335         for (part = 0; part != size(); part++)
336             if (snei->link_neighbors[part]) {
337                 SuperNeighbor* snei_back = (SuperNeighbor*)(*it)->node->findNeighbor(node);
338                 at(part)->removeNode(snei_back->link_neighbors[part]->node, snei->link_neighbors[part]->node);
339             }
340 
341 		// delete the child node
342         removeNode(node, (*it)->node);
343         count++;
344 	}
345     return count;
346 }
347 
348 
newNode(int node_id,const char * node_name)349 Node* PhyloSuperTree::newNode(int node_id, const char* node_name) {
350     return (Node*) (new SuperNode(node_id, node_name));
351 }
352 
newNode(int node_id,int node_name)353 Node* PhyloSuperTree::newNode(int node_id, int node_name) {
354     return (Node*) (new SuperNode(node_id, node_name));
355 }
356 
getAlnNPattern()357 int PhyloSuperTree::getAlnNPattern() {
358 	int num = 0;
359 	for (iterator it = begin(); it != end(); it++)
360 		num += (*it)->getAlnNPattern();
361 	return num;
362 }
363 
getAlnNSite()364 int PhyloSuperTree::getAlnNSite() {
365 	int num = 0;
366 	for (iterator it = begin(); it != end(); it++)
367 		num += (*it)->getAlnNSite();
368 	return num;
369 }
370 
computeDist(int seq1,int seq2,double initial_dist,double & var)371 double PhyloSuperTree::computeDist(int seq1, int seq2, double initial_dist, double &var) {
372     // if no model or site rate is specified, return JC distance
373     if (initial_dist == 0.0) {
374     	if (params->compute_obs_dist)
375             initial_dist = aln->computeObsDist(seq1, seq2);
376     	else
377     		initial_dist = aln->computeDist(seq1, seq2);
378     }
379     if (initial_dist == MAX_GENETIC_DIST) return initial_dist; // MANUEL: here no d2l is return
380     if (!model_factory || !site_rate) return initial_dist; // MANUEL: here no d2l is return
381 
382     // now optimize the distance based on the model and site rate
383     SuperAlignmentPairwise aln_pair(this, seq1, seq2);
384     return aln_pair.optimizeDist(initial_dist, var);
385 }
386 
linkBranch(int part,SuperNeighbor * nei,SuperNeighbor * dad_nei)387 void PhyloSuperTree::linkBranch(int part, SuperNeighbor *nei, SuperNeighbor *dad_nei) {
388 	SuperNode *node = (SuperNode*)dad_nei->node;
389 	SuperNode *dad = (SuperNode*)nei->node;
390 	nei->link_neighbors[part] = NULL;
391 	dad_nei->link_neighbors[part] = NULL;
392 	vector<PhyloNeighbor*> part_vec;
393 	vector<PhyloNeighbor*> child_part_vec;
394 
395 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
396 		if (((SuperNeighbor*)*it)->link_neighbors[part]) {
397 			part_vec.push_back(((SuperNeighbor*)*it)->link_neighbors[part]);
398 			child_part_vec.push_back(((SuperNeighbor*)(*it)->node->findNeighbor(node))->link_neighbors[part]);
399 			ASSERT(child_part_vec.back()->node == child_part_vec.front()->node || child_part_vec.back()->id == child_part_vec.front()->id);
400 		}
401 	}
402 
403 	if (part_vec.empty())
404 		return;
405 	if (part_vec.size() == 1) {
406 		nei->link_neighbors[part] = child_part_vec[0];
407 		dad_nei->link_neighbors[part] = part_vec[0];
408 		return;
409 	}
410 	if (part_vec[0] == child_part_vec[1]) {
411 		// ping-pong, out of sub-tree
412 		ASSERT(part_vec[1] == child_part_vec[0]);
413 		return;
414 	}
415 	PhyloNode *node_part = (PhyloNode*) child_part_vec[0]->node;
416 	PhyloNode *dad_part = NULL;
417 	FOR_NEIGHBOR(node_part, NULL, it) {
418 		bool appear = false;
419 		for (vector<PhyloNeighbor*>::iterator it2 = part_vec.begin(); it2 != part_vec.end(); it2++){
420 			if ((*it2) == (*it)) {
421 				appear = true; break;
422 			}
423 		}
424 		if (!appear) {
425 			ASSERT(!dad_part);
426 			dad_part = (PhyloNode*)(*it)->node;
427 		}
428 	}
429 	nei->link_neighbors[part] = (PhyloNeighbor*)node_part->findNeighbor(dad_part);
430 	dad_nei->link_neighbors[part] = (PhyloNeighbor*)dad_part->findNeighbor(node_part);
431 }
432 
linkTree(int part,NodeVector & part_taxa,SuperNode * node,SuperNode * dad)433 void PhyloSuperTree::linkTree(int part, NodeVector &part_taxa, SuperNode *node, SuperNode *dad) {
434 	if (!node) {
435 		if (!root->isLeaf())
436 			node = (SuperNode*) root;
437 		else
438 			node = (SuperNode*)root->neighbors[0]->node;
439 		ASSERT(node);
440 		if (node->isLeaf()) // two-taxa tree
441 			dad = (SuperNode*)node->neighbors[0]->node;
442 	}
443 	SuperNeighbor *nei = NULL;
444 	SuperNeighbor *dad_nei = NULL;
445 	if (dad) {
446 		nei = (SuperNeighbor*)node->findNeighbor(dad);
447 		dad_nei = (SuperNeighbor*)dad->findNeighbor(node);
448 		if (nei->link_neighbors.empty()) nei->link_neighbors.resize(size());
449 		if (dad_nei->link_neighbors.empty()) dad_nei->link_neighbors.resize(size());
450 		nei->link_neighbors[part] = NULL;
451 		dad_nei->link_neighbors[part] = NULL;
452 	}
453 	if (node->isLeaf()) {
454 		ASSERT(dad);
455 		PhyloNode *node_part = (PhyloNode*)part_taxa[node->id];
456 		if (node_part) {
457 			PhyloNode *dad_part = (PhyloNode*)node_part->neighbors[0]->node;
458 			ASSERT(node_part->isLeaf());
459 			nei->link_neighbors[part] = (PhyloNeighbor*) node_part->neighbors[0];
460 			dad_nei->link_neighbors[part] = (PhyloNeighbor*)dad_part->findNeighbor(node_part);
461 		}
462 		return;
463 	}
464 
465 	FOR_NEIGHBOR_DECLARE(node, dad, it) {
466 		linkTree(part, part_taxa, (SuperNode*) (*it)->node, (SuperNode*) node);
467 	}
468 	if (!dad) return;
469 	linkBranch(part, nei, dad_nei);
470 }
471 
syncRooting()472 void PhyloSuperTree::syncRooting() {
473     if (empty() || !front()->root)
474         return;
475     // check if all trees are similarly rooted or unrooted
476     bool same_rooting = true;
477     iterator tree;
478     for (tree = begin(); tree != end(); tree++) {
479         if ((*tree)->rooted != front()->rooted) {
480             same_rooting = false;
481             break;
482         }
483     }
484 
485     if (same_rooting) {
486         if (!empty() && rooted != front()->rooted) {
487             if (rooted)
488                 convertToUnrooted();
489             else
490                 convertToRooted();
491         }
492     } else if (!rooted) {
493         // not same rooting between trees
494         convertToRooted();
495     }
496 }
497 
printMapInfo()498 void PhyloSuperTree::printMapInfo() {
499 	NodeVector nodes1, nodes2;
500 	getBranches(nodes1, nodes2);
501 	int part = 0;
502 	for (iterator it = begin(); it != end(); it++, part++) {
503 		cout << "Subtree for partition " << part << endl;
504 		(*it)->drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
505 		for (int i = 0; i < nodes1.size(); i++) {
506 			PhyloNeighbor *nei1 = ((SuperNeighbor*)nodes1[i]->findNeighbor(nodes2[i]))->link_neighbors[part];
507 			PhyloNeighbor *nei2 = ((SuperNeighbor*)nodes2[i]->findNeighbor(nodes1[i]))->link_neighbors[part];
508 			cout << nodes1[i]->findNeighbor(nodes2[i])->id << ":";
509 			if (nodes1[i]->isLeaf()) cout << nodes1[i]->name; else cout << nodes1[i]->id;
510 			cout << ",";
511 			if (nodes2[i]->isLeaf()) cout << nodes2[i]->name; else cout << nodes2[i]->id;
512 			cout << " -> ";
513 			if (nei2) {
514 				cout << nei2->id << ":";
515 				if (nei2->node->isLeaf())
516 					cout << nei2->node->name;
517 				else cout << nei2->node->id;
518 			}
519 			else cout << -1;
520 			cout << ",";
521 			if (nei1)
522 				if (nei1->node->isLeaf())
523 					cout << nei1->node->name;
524 				else cout << nei1->node->id;
525 			else cout << -1;
526 			cout << endl;
527 		}
528 	}
529 }
530 
531 
mapTrees()532 void PhyloSuperTree::mapTrees() {
533 	ASSERT(root);
534     syncRooting();
535 	int part = 0, i;
536 	if (verbose_mode >= VB_DEBUG)
537 		drawTree(cout,  WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
538 	for (iterator it = begin(); it != end(); it++, part++) {
539 		string taxa_set;
540         Pattern taxa_pat = aln->getPattern(part);
541         taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end());
542 		(*it)->copyTree(this, taxa_set);
543         if ((*it)->getModel()) {
544 			(*it)->initializeAllPartialLh();
545         }
546         (*it)->resetCurScore();
547 		NodeVector my_taxa, part_taxa;
548 		(*it)->getOrderedTaxa(my_taxa);
549 		part_taxa.resize(leafNum, NULL);
550 		for (i = 0; i < leafNum; i++) {
551             int id;
552             if (i < aln->getNSeq())
553                 id = ((SuperAlignment*)aln)->taxa_index[i][part];
554             else {
555                 ASSERT(rooted);
556                 if ((*it)->rooted)
557                     id = (*it)->leafNum-1;
558                 else
559                     id = -1;
560             }
561 			if (id >=0) part_taxa[i] = my_taxa[id];
562 		}
563 		if (verbose_mode >= VB_DEBUG) {
564 			cout << "Subtree for partition " << part << endl;
565 			(*it)->drawTree(cout,  WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
566 		}
567 		linkTree(part, part_taxa);
568 	}
569 
570 	if (verbose_mode >= VB_DEBUG) printMapInfo();
571 }
572 
linkTrees()573 void PhyloSuperTree::linkTrees() {
574 	int part = 0;
575 	iterator it;
576 	for (it = begin(), part = 0; it != end(); it++, part++) {
577 		(*it)->initializeTree();
578 		(*it)->setAlignment((*it)->aln);
579         if ((*it)->getModel()) {
580 			(*it)->initializeAllPartialLh();
581         }
582         (*it)->resetCurScore();
583 		NodeVector my_taxa, part_taxa;
584 		(*it)->getOrderedTaxa(my_taxa);
585 		part_taxa.resize(leafNum, NULL);
586 		int i;
587 		for (i = 0; i < leafNum; i++) {
588             int id;
589             if (i < aln->getNSeq())
590                 id = ((SuperAlignment*)aln)->taxa_index[i][part];
591             else if ((*it)->rooted)
592                 id = (*it)->leafNum-1;
593             else
594                 id = -1;
595 			if (id >=0) part_taxa[i] = my_taxa[id];
596 		}
597 		linkTree(part, part_taxa);
598 	}
599 }
600 
initializeAllPartialLh()601 void PhyloSuperTree::initializeAllPartialLh() {
602 	for (iterator it = begin(); it != end(); it++) {
603 		(*it)->initializeAllPartialLh();
604 	}
605 }
606 
607 
deleteAllPartialLh()608 void PhyloSuperTree::deleteAllPartialLh() {
609 	for (iterator it = begin(); it != end(); it++) {
610 		(*it)->deleteAllPartialLh();
611 	}
612 }
613 
clearAllPartialLH(bool make_null)614 void PhyloSuperTree::clearAllPartialLH(bool make_null) {
615     for (iterator it = begin(); it != end(); it++) {
616         (*it)->clearAllPartialLH(make_null);
617     }
618 }
619 
computeParsimonyBranchObsolete(PhyloNeighbor * dad_branch,PhyloNode * dad,int * branch_subst)620 int PhyloSuperTree::computeParsimonyBranchObsolete(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
621     int score = 0, part = 0;
622     SuperNeighbor *dad_nei = (SuperNeighbor*)dad_branch;
623     SuperNeighbor *node_nei = (SuperNeighbor*)(dad_branch->node->findNeighbor(dad));
624 
625     if (branch_subst)
626         branch_subst = 0;
627     for (iterator it = begin(); it != end(); it++, part++) {
628         int this_subst = 0;
629         if (dad_nei->link_neighbors[part]) {
630             if (branch_subst)
631                 score += (*it)->computeParsimonyBranch(dad_nei->link_neighbors[part], (PhyloNode*)node_nei->link_neighbors[part]->node, &this_subst);
632             else
633                 score += (*it)->computeParsimonyBranch(dad_nei->link_neighbors[part], (PhyloNode*)node_nei->link_neighbors[part]->node);
634         } else
635             score += (*it)->computeParsimony();
636         if (branch_subst)
637             branch_subst += this_subst;
638     }
639     return score;
640 }
641 
computePartitionOrder()642 void PhyloSuperTree::computePartitionOrder() {
643     if (!part_order.empty())
644         return;
645     int i, ntrees = size();
646     part_order.resize(ntrees);
647     part_order_by_nptn.resize(ntrees);
648 #ifdef _OPENMP
649     int *id = new int[ntrees];
650     double *cost = new double[ntrees];
651 
652     for (i = 0; i < ntrees; i++) {
653         Alignment *part_aln = at(i)->aln;
654         cost[i] = -((double)part_aln->getNSeq())*part_aln->getNPattern()*part_aln->num_states;
655         id[i] = i;
656     }
657     quicksort(cost, 0, ntrees-1, id);
658     for (i = 0; i < ntrees; i++)
659         part_order[i] = id[i];
660 
661     // compute part_order by number of patterns
662     for (i = 0; i < ntrees; i++) {
663         Alignment *part_aln = at(i)->aln;
664         cost[i] = -((double)part_aln->getNPattern())*part_aln->num_states;
665         id[i] = i;
666     }
667     quicksort(cost, 0, ntrees-1, id);
668     for (i = 0; i < ntrees; i++)
669         part_order_by_nptn[i] = id[i];
670 
671     delete [] cost;
672     delete [] id;
673 
674     if (verbose_mode >= VB_MED) {
675         cout << "Partitions ordered by computation costs:" << endl;
676         cout << "#nexus" << endl << "begin sets;" << endl;
677         for (i = 0; i < ntrees; i++)
678             cout << "  charset " << at(part_order[i])->aln->name << " = " << at(part_order[i])->aln->position_spec << ";" << endl;
679         cout << "end;" << endl;
680     }
681 #else
682     for (i = 0; i < ntrees; i++) {
683         part_order[i] = i;
684         part_order_by_nptn[i] = i;
685     }
686 #endif // OPENMP
687 }
688 
computeLikelihood(double * pattern_lh)689 double PhyloSuperTree::computeLikelihood(double *pattern_lh) {
690 	double tree_lh = 0.0;
691 	int ntrees = size();
692 	if (pattern_lh) {
693 		//#ifdef _OPENMP
694 		//#pragma omp parallel for reduction(+: tree_lh)
695 		//#endif
696 		for (int i = 0; i < ntrees; i++) {
697 			part_info[i].cur_score = at(i)->computeLikelihood(pattern_lh);
698 			tree_lh += part_info[i].cur_score;
699 			pattern_lh += at(i)->getAlnNPattern();
700 		}
701 	} else {
702         if (part_order.empty()) computePartitionOrder();
703 		#ifdef _OPENMP
704 		#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
705 		#endif
706 		for (int j = 0; j < ntrees; j++) {
707             int i = part_order[j];
708 			part_info[i].cur_score = at(i)->computeLikelihood();
709 			tree_lh += part_info[i].cur_score;
710 		}
711 	}
712 	return tree_lh;
713 }
714 
getNumLhCat(SiteLoglType wsl)715 int PhyloSuperTree::getNumLhCat(SiteLoglType wsl) {
716     int ncat = 0, it_cat;
717     for (iterator it = begin(); it != end(); it++)
718         if ((it_cat = (*it)->getNumLhCat(wsl)) > ncat)
719             ncat = it_cat;
720     return ncat;
721 }
722 
723 
computePatternLikelihood(double * pattern_lh,double * cur_logl,double * ptn_lh_cat,SiteLoglType wsl)724 void PhyloSuperTree::computePatternLikelihood(double *pattern_lh, double *cur_logl, double *ptn_lh_cat, SiteLoglType wsl) {
725 	size_t offset = 0, offset_lh_cat = 0;
726 	iterator it;
727 	for (it = begin(); it != end(); it++) {
728 		if (ptn_lh_cat)
729 			(*it)->computePatternLikelihood(pattern_lh + offset, NULL, ptn_lh_cat + offset_lh_cat, wsl);
730 		else
731 			(*it)->computePatternLikelihood(pattern_lh + offset);
732 		offset += (*it)->aln->getNPattern();
733         offset_lh_cat += (*it)->aln->getNPattern() * (*it)->getNumLhCat(wsl);
734 	}
735 	if (cur_logl) { // sanity check
736 		double sum_logl = 0;
737 		offset = 0;
738 		for (it = begin(); it != end(); it++) {
739 			int nptn = (*it)->aln->getNPattern();
740 			for (int j = 0; j < nptn; j++)
741 				sum_logl += pattern_lh[offset + j] * (*it)->aln->at(j).frequency;
742 			offset += (*it)->aln->getNPattern();
743 		}
744 		if (fabs(sum_logl - *cur_logl) > 0.001) {
745             cout << *cur_logl << " " << sum_logl << endl;
746 //            outError("Wrong PhyloSuperTree::", __func__);
747 		}
748         ASSERT(fabs(sum_logl - *cur_logl) < 0.001);
749 	}
750 }
751 
computePatternProbabilityCategory(double * ptn_prob_cat,SiteLoglType wsl)752 void PhyloSuperTree::computePatternProbabilityCategory(double *ptn_prob_cat, SiteLoglType wsl) {
753 	size_t offset = 0;
754 	for (iterator it = begin(); it != end(); it++) {
755         (*it)->computePatternProbabilityCategory(ptn_prob_cat + offset, wsl);
756         offset += (*it)->aln->getNPattern() * (*it)->getNumLhCat(wsl);
757 	}
758 }
759 
optimizeAllBranches(int my_iterations,double tolerance,int maxNRStep)760 double PhyloSuperTree::optimizeAllBranches(int my_iterations, double tolerance, int maxNRStep) {
761 	double tree_lh = 0.0;
762 	int ntrees = size();
763     if (part_order.empty()) computePartitionOrder();
764 	#ifdef _OPENMP
765 	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
766 	#endif
767 	for (int j = 0; j < ntrees; j++) {
768         int i = part_order[j];
769 		part_info[i].cur_score = at(i)->optimizeAllBranches(my_iterations, tolerance/min(ntrees,10), maxNRStep);
770 		tree_lh += part_info[i].cur_score;
771 		if (verbose_mode >= VB_MAX)
772 			at(i)->printTree(cout, WT_BR_LEN + WT_NEWLINE);
773 	}
774 
775 	if (my_iterations >= 100) computeBranchLengths();
776 	return tree_lh;
777 }
778 
~PhyloSuperTree()779 PhyloSuperTree::~PhyloSuperTree()
780 {
781 	for (vector<PartitionInfo>::reverse_iterator pit = part_info.rbegin(); pit != part_info.rend(); pit++) {
782 		if (pit->nniMoves[1].ptnlh)
783 			delete [] pit->nniMoves[1].ptnlh;
784 		pit->nniMoves[1].ptnlh = NULL;
785 		if (pit->nniMoves[0].ptnlh)
786 			delete [] pit->nniMoves[0].ptnlh;
787 		pit->nniMoves[0].ptnlh = NULL;
788 		if (pit->cur_ptnlh)
789 			delete [] pit->cur_ptnlh;
790 		pit->cur_ptnlh = NULL;
791 	}
792 	part_info.clear();
793 
794 	for (reverse_iterator it = rbegin(); it != rend(); it++)
795 		delete (*it);
796 	clear();
797 }
798 
799 
initPartitionInfo()800 void PhyloSuperTree::initPartitionInfo() {
801 	int part = 0;
802 	for (iterator it = begin(); it != end(); it++, part++) {
803 		part_info[part].cur_score = 0.0;
804 
805 		part_info[part].cur_brlen.resize((*it)->branchNum);
806 		if (params->nni5) {
807 			part_info[part].nni1_brlen.resize((*it)->branchNum * 5);
808 			part_info[part].nni2_brlen.resize((*it)->branchNum * 5);
809 		} else {
810 			part_info[part].nni1_brlen.resize((*it)->branchNum);
811 			part_info[part].nni2_brlen.resize((*it)->branchNum);
812 		}
813 
814 		(*it)->getBranchLengths(part_info[part].cur_brlen);
815 
816 		if (save_all_trees == 2 || params->write_intermediate_trees >= 2) {
817 			// initialize ptnlh for ultrafast bootstrap
818 			int nptn = (*it)->getAlnNPattern();
819 			if (!part_info[part].cur_ptnlh)
820 				part_info[part].cur_ptnlh = new double[nptn];
821 			if (!part_info[part].nniMoves[0].ptnlh)
822 				part_info[part].nniMoves[0].ptnlh = new double [nptn];
823 			if (!part_info[part].nniMoves[1].ptnlh)
824 				part_info[part].nniMoves[1].ptnlh = new double [nptn];
825 		} else {
826             part_info[part].cur_ptnlh = NULL;
827             part_info[part].nniMoves[0].ptnlh = NULL;
828             part_info[part].nniMoves[1].ptnlh = NULL;
829         }
830 	}
831 }
832 
getMaxPartNameLength()833 int PhyloSuperTree::getMaxPartNameLength() {
834 	int namelen = 0;
835 	for (iterator it = begin(); it != end(); it++)
836 		namelen = max((int)(*it)->aln->name.length(), namelen);
837 	return namelen;
838 }
839 
getBestNNIForBran(PhyloNode * node1,PhyloNode * node2,NNIMove * nniMoves)840 NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove *nniMoves) {
841     if (((PhyloNeighbor*)node1->findNeighbor(node2))->direction == TOWARD_ROOT) {
842         // swap node1 and node2 if the direction is not right, only for nonreversible models
843         PhyloNode *tmp = node1;
844         node1 = node2;
845         node2 = tmp;
846     }
847     NNIMove myMove;
848     //myMove.newloglh = 0;
849 	SuperNeighbor *nei1 = ((SuperNeighbor*)node1->findNeighbor(node2));
850 	SuperNeighbor *nei2 = ((SuperNeighbor*)node2->findNeighbor(node1));
851 	ASSERT(nei1 && nei2);
852 	SuperNeighbor *node1_nei = NULL;
853 	SuperNeighbor *node2_nei = NULL;
854 	SuperNeighbor *node2_nei_other = NULL;
855 	FOR_NEIGHBOR_DECLARE(node1, node2, node1_it)
856     if (((PhyloNeighbor*)*node1_it)->direction != TOWARD_ROOT)
857     {
858 		node1_nei = (SuperNeighbor*)(*node1_it);
859 		break;
860 	}
861 	FOR_NEIGHBOR_DECLARE(node2, node1, node2_it) {
862 		node2_nei = (SuperNeighbor*)(*node2_it);
863 		break;
864 	}
865 
866 	FOR_NEIGHBOR_IT(node2, node1, node2_it_other)
867 	if ((*node2_it_other) != node2_nei) {
868 		node2_nei_other = (SuperNeighbor*)(*node2_it_other);
869 		break;
870 	}
871 
872     // check for compatibility with constraint tree
873     bool nni_ok[2] = {true, true};
874     int nniid = 0;
875 	FOR_NEIGHBOR(node2, node1, node2_it) {
876         NNIMove nni;
877         nni.node1 = node1;
878         nni.node2 = node2;
879         nni.node1Nei_it = node1->findNeighborIt(node1_nei->node);
880         nni.node2Nei_it = node2_it;
881         nni_ok[nniid++] = constraintTree.isCompatible(nni);
882     }
883     ASSERT(nniid == 2);
884     myMove.node1 = myMove.node2 = NULL;
885     myMove.newloglh = -DBL_MAX;
886     // return if both NNIs do not satisfy constraint
887     if (!nni_ok[0] && !nni_ok[1]) {
888 //        ASSERT(!nniMoves);
889         if (nniMoves) {
890             nniMoves[0].newloglh = nniMoves[1].newloglh = -DBL_MAX;
891         }
892         return myMove;
893     }
894 
895 	//double bestScore = optimizeOneBranch(node1, node2, false);
896 
897 	int ntrees = size(), part;
898 	double nni_score1 = 0.0, nni_score2 = 0.0;
899 	int local_totalNNIs = 0, local_evalNNIs = 0;
900 
901     if (part_order.empty()) computePartitionOrder();
902 	#ifdef _OPENMP
903 	#pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic) if(num_threads>1)
904 	#endif
905 	for (int treeid = 0; treeid < ntrees; treeid++) {
906         part = part_order_by_nptn[treeid];
907 		bool is_nni = true;
908 		local_totalNNIs++;
909 		FOR_NEIGHBOR_DECLARE(node1, NULL, nit) {
910 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
911 		}
912 		FOR_NEIGHBOR(node2, NULL, nit) {
913 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
914 		}
915 		if (!is_nni && params->terrace_aware) {
916 			if (part_info[part].cur_score == 0.0)  {
917 				part_info[part].cur_score = at(part)->computeLikelihood();
918 				if (save_all_trees == 2 || nniMoves)
919 					at(part)->computePatternLikelihood(part_info[part].cur_ptnlh, &part_info[part].cur_score);
920 			}
921 			nni_score1 += part_info[part].cur_score;
922 			nni_score2 += part_info[part].cur_score;
923 			continue;
924 		}
925 
926 		local_evalNNIs++;
927 		part_info[part].evalNNIs++;
928 
929 		PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
930 		PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
931 
932 		int brid = nei1_part->id;
933 
934 		//NNIMove part_moves[2];
935 		//part_moves[0].node1Nei_it = NULL;
936 
937 		// setup subtree NNI correspondingly
938 		PhyloNode *node1_part = (PhyloNode*)nei2_part->node;
939 		PhyloNode *node2_part = (PhyloNode*)nei1_part->node;
940 		part_info[part].nniMoves[0].node1 = part_info[part].nniMoves[1].node1 = node1;
941 		part_info[part].nniMoves[0].node2 = part_info[part].nniMoves[1].node2 = node2;
942 		part_info[part].nniMoves[0].node1Nei_it = node1_part->findNeighborIt(node1_nei->link_neighbors[part]->node);
943 		part_info[part].nniMoves[0].node2Nei_it = node2_part->findNeighborIt(node2_nei->link_neighbors[part]->node);
944 
945 		part_info[part].nniMoves[1].node1Nei_it = node1_part->findNeighborIt(node1_nei->link_neighbors[part]->node);
946 		part_info[part].nniMoves[1].node2Nei_it = node2_part->findNeighborIt(node2_nei_other->link_neighbors[part]->node);
947 
948 		at(part)->getBestNNIForBran((PhyloNode*)nei2_part->node, (PhyloNode*)nei1_part->node, part_info[part].nniMoves);
949 		// detect the corresponding NNIs and swap if necessary (the swapping refers to the swapping of NNI order)
950 		if (!((*part_info[part].nniMoves[0].node1Nei_it == node1_nei->link_neighbors[part] &&
951 				*part_info[part].nniMoves[0].node2Nei_it == node2_nei->link_neighbors[part]) ||
952 			(*part_info[part].nniMoves[0].node1Nei_it != node1_nei->link_neighbors[part] &&
953 					*part_info[part].nniMoves[0].node2Nei_it != node2_nei->link_neighbors[part])))
954 		{
955 			outError("WRONG");
956 			NNIMove tmp = part_info[part].nniMoves[0];
957 			part_info[part].nniMoves[0] = part_info[part].nniMoves[1];
958 			part_info[part].nniMoves[1] = tmp;
959 		}
960 		nni_score1 += part_info[part].nniMoves[0].newloglh;
961 		nni_score2 += part_info[part].nniMoves[1].newloglh;
962 		int numlen = 1;
963 		if (params->nni5) numlen = 5;
964 		for (int i = 0; i < numlen; i++) {
965 			part_info[part].nni1_brlen[brid*numlen + i] = part_info[part].nniMoves[0].newLen[i];
966 			part_info[part].nni2_brlen[brid*numlen + i] = part_info[part].nniMoves[1].newLen[i];
967 		}
968 
969 	}
970 	totalNNIs += local_totalNNIs;
971 	evalNNIs += local_evalNNIs;
972 	double nni_scores[2] = {nni_score1, nni_score2};
973 
974     if (!nni_ok[0]) nni_scores[0] = -DBL_MAX;
975     if (!nni_ok[1]) nni_scores[1] = -DBL_MAX;
976 
977 	myMove.node1Nei_it = node1->findNeighborIt(node1_nei->node);
978 	myMove.node1 = node1;
979 	myMove.node2 = node2;
980 	if (nni_scores[0] > nni_scores[1]) {
981 		myMove.swap_id = 1;
982 		myMove.node2Nei_it = node2->findNeighborIt(node2_nei->node);
983 		myMove.newloglh = nni_scores[0];
984 	} else  {
985 		myMove.swap_id = 2;
986 		myMove.node2Nei_it = node2->findNeighborIt(node2_nei_other->node);
987 		myMove.newloglh = nni_scores[1];
988 	}
989 
990 	if (save_all_trees != 2 && !nniMoves) return myMove;
991 
992 	// for bootstrap now
993     //now setup pattern likelihoods per partition
994 	double *save_lh_factor = new double [ntrees];
995 	double *save_lh_factor_back = new double [ntrees];
996 	nniid = 0;
997 	FOR_NEIGHBOR(node2, node1, node2_it) if (nni_ok[nniid])
998     {
999 
1000 		// do the NNI
1001 		node2_nei = (SuperNeighbor*)(*node2_it);
1002         node1->updateNeighbor(node1_it, node2_nei);
1003         node2_nei->node->updateNeighbor(node2, node1);
1004         node2->updateNeighbor(node2_it, node1_nei);
1005         node1_nei->node->updateNeighbor(node1, node2);
1006 
1007         for (part = 0; part < ntrees; part++) {
1008 			bool is_nni = true;
1009 			FOR_NEIGHBOR_DECLARE(node1, NULL, nit) {
1010 				if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1011 			}
1012 			FOR_NEIGHBOR(node2, NULL, nit) {
1013 				if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1014 			}
1015 			if (!is_nni)
1016 				memcpy(at(part)->_pattern_lh, part_info[part].cur_ptnlh, at(part)->getAlnNPattern() * sizeof(double));
1017 			else
1018 				memcpy(at(part)->_pattern_lh, part_info[part].nniMoves[nniid].ptnlh, at(part)->getAlnNPattern() * sizeof(double));
1019     		save_lh_factor[part] = at(part)->current_it->lh_scale_factor;
1020     		save_lh_factor_back[part] = at(part)->current_it_back->lh_scale_factor;
1021     		at(part)->current_it->lh_scale_factor = 0.0;
1022     		at(part)->current_it_back->lh_scale_factor = 0.0;
1023         }
1024         if (nniMoves) {
1025         	nniMoves[nniid].newloglh = nni_scores[nniid];
1026        		computePatternLikelihood(nniMoves[nniid].ptnlh, &nni_scores[nniid]);
1027         }
1028         if (save_all_trees == 2)
1029         	saveCurrentTree(nni_scores[nniid]);
1030 
1031         // restore information
1032         for (part = 0; part < ntrees; part++) {
1033     		at(part)->current_it->lh_scale_factor = save_lh_factor[part];
1034     		at(part)->current_it_back->lh_scale_factor = save_lh_factor_back[part];
1035         }
1036 
1037         // swap back to recover the tree
1038         node1->updateNeighbor(node1_it, node1_nei);
1039         node1_nei->node->updateNeighbor(node2, node1);
1040         node2->updateNeighbor(node2_it, node2_nei);
1041         node2_nei->node->updateNeighbor(node1, node2);
1042         nniid++;
1043 
1044 	}
1045 
1046 	delete [] save_lh_factor_back;
1047 	delete [] save_lh_factor;
1048 	return myMove;
1049 }
1050 
doNNI(NNIMove & move,bool clearLH)1051 void PhyloSuperTree::doNNI(NNIMove &move, bool clearLH) {
1052 	SuperNeighbor *nei1 = (SuperNeighbor*)move.node1->findNeighbor(move.node2);
1053 	SuperNeighbor *nei2 = (SuperNeighbor*)move.node2->findNeighbor(move.node1);
1054 	SuperNeighbor *node1_nei = (SuperNeighbor*)*move.node1Nei_it;
1055 	SuperNeighbor *node2_nei = (SuperNeighbor*)*move.node2Nei_it;
1056 	int part = 0;
1057 	iterator it;
1058 	PhyloTree::doNNI(move, clearLH);
1059 
1060 	for (it = begin(), part = 0; it != end(); it++, part++) {
1061 		bool is_nni = true;
1062 		FOR_NEIGHBOR_DECLARE(move.node1, NULL, nit) {
1063 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1064 		}
1065 		FOR_NEIGHBOR(move.node2, NULL, nit) {
1066 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1067 		}
1068 		if (!is_nni) {
1069 			// relink the branch if it does not correspond to NNI for partition
1070 			linkBranch(part, nei1, nei2);
1071 			continue;
1072 		}
1073 
1074 		NNIMove part_move;
1075 		PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
1076 		PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
1077 		part_move.node1 = (PhyloNode*)nei2_part->node;
1078 		part_move.node2 = (PhyloNode*)nei1_part->node;
1079 		part_move.node1Nei_it = part_move.node1->findNeighborIt(node1_nei->link_neighbors[part]->node);
1080 		part_move.node2Nei_it = part_move.node2->findNeighborIt(node2_nei->link_neighbors[part]->node);
1081 
1082 		(*it)->doNNI(part_move, clearLH);
1083 
1084 	}
1085 
1086 }
1087 
changeNNIBrans(NNIMove & move)1088 void PhyloSuperTree::changeNNIBrans(NNIMove &move) {
1089 	SuperNeighbor *nei1 = (SuperNeighbor*)move.node1->findNeighbor(move.node2);
1090 	SuperNeighbor *nei2 = (SuperNeighbor*)move.node2->findNeighbor(move.node1);
1091 	iterator it;
1092 	int part;
1093 
1094 	for (it = begin(), part = 0; it != end(); it++, part++) {
1095 		bool is_nni = true;
1096 		FOR_NEIGHBOR_DECLARE(move.node1, NULL, nit) {
1097 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1098 		}
1099 		FOR_NEIGHBOR(move.node2, NULL, nit) {
1100 			if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1101 		}
1102 		if (!is_nni) {
1103 			continue;
1104 		}
1105 
1106 		NNIMove part_move;
1107 		PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
1108 		PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
1109 		int brid = nei1_part->id;
1110 		part_move.node1 = (PhyloNode*)nei2_part->node;
1111 		part_move.node2 = (PhyloNode*)nei1_part->node;
1112 		int numlen = 1;
1113 		if (params->nni5) numlen = 5;
1114 		if (move.swap_id == 1) {
1115 			for (int i = 0; i < numlen; i++)
1116 				part_move.newLen[i] = part_info[part].nni1_brlen[brid*numlen + i];
1117 		} else {
1118 			for (int i = 0; i < numlen; i++)
1119 				part_move.newLen[i] = part_info[part].nni2_brlen[brid*numlen + i];
1120 		}
1121 
1122 		(*it)->changeNNIBrans(part_move);
1123 
1124 	}
1125 
1126 }
1127 
1128 //void PhyloSuperTree::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
1129 //	int part = 0;
1130 //	for (iterator it = begin(); it != end(); it++, part++) {
1131 //		(*it)->setBranchLengths(part_info[part].cur_brlen);
1132 //	}
1133 //}
1134 
reinsertLeaves(PhyloNodeVector & del_leaves)1135 void PhyloSuperTree::reinsertLeaves(PhyloNodeVector &del_leaves) {
1136 	IQTree::reinsertLeaves(del_leaves);
1137 	mapTrees();
1138 }
1139 
computeBranchLengths()1140 void PhyloSuperTree::computeBranchLengths() {
1141 	if (verbose_mode >= VB_DEBUG)
1142 		cout << "Assigning branch lengths for full tree with weighted average..." << endl;
1143 	int part = 0, i;
1144     iterator it;
1145 
1146 	NodeVector nodes1, nodes2;
1147 	getBranches(nodes1, nodes2);
1148 	vector<SuperNeighbor*> neighbors1;
1149 	vector<SuperNeighbor*> neighbors2;
1150 	IntVector occurence;
1151 	occurence.resize(nodes1.size(), 0);
1152 	for (i = 0; i < nodes1.size(); i++) {
1153 		neighbors1.push_back((SuperNeighbor*)nodes1[i]->findNeighbor(nodes2[i]) );
1154 		neighbors2.push_back((SuperNeighbor*)nodes2[i]->findNeighbor(nodes1[i]) );
1155 		neighbors1.back()->length = 0.0;
1156 	}
1157 	for (it = begin(), part = 0; it != end(); it++, part++) {
1158 		IntVector brfreq;
1159 		brfreq.resize((*it)->branchNum, 0);
1160 		for (i = 0; i < nodes1.size(); i++) {
1161 			PhyloNeighbor *nei1 = neighbors1[i]->link_neighbors[part];
1162 			if (!nei1) continue;
1163 			brfreq[nei1->id]++;
1164 		}
1165 		for (i = 0; i < nodes1.size(); i++) {
1166 			PhyloNeighbor *nei1 = neighbors1[i]->link_neighbors[part];
1167 			if (!nei1) continue;
1168             if ((*it)->aln->seq_type == SEQ_CODON && rescale_codon_brlen) {
1169                 // rescale branch length by 3
1170                 neighbors1[i]->length += (nei1->length) * (*it)->aln->getNSite() / brfreq[nei1->id];
1171                 occurence[i] += (*it)->aln->getNSite()*3;
1172             } else {
1173                 neighbors1[i]->length += (nei1->length) * (*it)->aln->getNSite() / brfreq[nei1->id];
1174                 occurence[i] += (*it)->aln->getNSite();
1175             }
1176 			//cout << neighbors1[i]->id << "  " << nodes1[i]->id << nodes1[i]->name <<"," << nodes2[i]->id << nodes2[i]->name <<": " << (nei1->length) / brfreq[nei1->id] << endl;
1177 		}
1178 		//cout << endl;
1179 	}
1180 	for (i = 0; i < nodes1.size(); i++) {
1181 		if (occurence[i])
1182 			neighbors1[i]->length /= occurence[i];
1183 		neighbors2[i]->length = neighbors1[i]->length;
1184 	}
1185 }
1186 
getModelName()1187 string PhyloSuperTree::getModelName() {
1188 	return (string)"Partition model";
1189 }
1190 
extractSubtree(set<int> & ids)1191 PhyloTree *PhyloSuperTree::extractSubtree(set<int> &ids) {
1192 	string union_taxa;
1193 	for (auto it = ids.begin(); it != ids.end(); it++) {
1194 		int id = *it;
1195 		if (id < 0 || id >= size())
1196 			outError("Internal error ", __func__);
1197 		string taxa_set;
1198         Pattern taxa_pat = aln->getPattern(id);
1199         taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end());
1200 		if (it == ids.begin()) union_taxa = taxa_set; else {
1201 			for (int j = 0; j < union_taxa.length(); j++)
1202 				if (taxa_set[j] == 1) union_taxa[j] = 1;
1203 		}
1204 	}
1205 	PhyloTree *tree = new PhyloTree;
1206 	tree->copyTree(this, union_taxa);
1207 	return tree;
1208 }
1209 
getMemoryRequired(size_t ncategory,bool full_mem)1210 uint64_t PhyloSuperTree::getMemoryRequired(size_t ncategory, bool full_mem) {
1211 //	uint64_t mem_size = PhyloTree::getMemoryRequired(ncategory);
1212 	// supertree does not need any memory for likelihood vectors!
1213 	uint64_t mem_size = 0;
1214 	for (iterator it = begin(); it != end(); it++)
1215 		mem_size += (*it)->getMemoryRequired(ncategory, full_mem);
1216 	return mem_size;
1217 }
1218 
1219 // get memory requirement for ModelFinder
getMemoryRequiredThreaded(size_t ncategory,bool full_mem)1220 uint64_t PhyloSuperTree::getMemoryRequiredThreaded(size_t ncategory, bool full_mem) {
1221     // only get the largest k partitions (k=#threads)
1222     int threads = (params->num_threads != 0) ? params->num_threads : params->num_threads_max;
1223     threads = min(threads, countPhysicalCPUCores());
1224     threads = min(threads, (int)size());
1225 
1226     // sort partition by computational cost for OpenMP effciency
1227     uint64_t *part_mem = new uint64_t[size()];
1228     int i;
1229     for (i = 0; i < size(); i++) {
1230         part_mem[i] = at(i)->getMemoryRequired(ncategory, full_mem);
1231     }
1232 
1233     // sort partition memory in increasing order
1234     quicksort<uint64_t, int>(part_mem, 0, size()-1);
1235 
1236     uint64_t mem = 0;
1237     for (i = size()-threads; i < size(); i++)
1238         mem += part_mem[i];
1239 
1240     delete [] part_mem;
1241 
1242     return mem;
1243 }
1244 
countEmptyBranches(PhyloNode * node,PhyloNode * dad)1245 int PhyloSuperTree::countEmptyBranches(PhyloNode *node, PhyloNode *dad) {
1246 	int count = 0;
1247     if (!node)
1248         node = (PhyloNode*)root;
1249 
1250     FOR_NEIGHBOR_IT(node, dad, it) {
1251     	SuperNeighbor *nei = (SuperNeighbor*)(*it);
1252     	bool isempty = true;
1253     	for (PhyloNeighborVec::iterator nit = nei->link_neighbors.begin(); nit != nei->link_neighbors.end(); nit++)
1254     		if ((*nit)) {
1255     			isempty = false;
1256     			break;
1257     		}
1258     	if (isempty) count++;
1259     	count += countEmptyBranches((PhyloNode*)(*it)->node, node);
1260     }
1261     return count;
1262 }
1263 
1264 /** remove identical sequences from the tree */
removeIdenticalSeqs(Params & params)1265 void PhyloSuperTree::removeIdenticalSeqs(Params &params) {
1266 	IQTree::removeIdenticalSeqs(params);
1267 	if (removed_seqs.empty()) return;
1268 	// now synchronize aln
1269 	int part = 0;
1270     SuperAlignment *saln = (SuperAlignment*)aln;
1271 	for (iterator it = begin(); it != end(); it++, part++) {
1272 		if (verbose_mode >= VB_MED) {
1273 			cout << "Partition " << saln->partitions[part]->name << " " << saln->partitions[part]->getNSeq() <<
1274 					" sequences from " << (*it)->aln->getNSeq() << " extracted" << endl;
1275 		}
1276 		(*it)->aln = saln->partitions[part];
1277 	}
1278 	if (verbose_mode >= VB_MED) {
1279 		cout << "Reduced alignment has " << aln->getNSeq() << " sequences with " << getAlnNSite() << " sites and "
1280 				<< getAlnNPattern() << " patterns" << endl;
1281 	}
1282 
1283 }
1284 
1285 /** reinsert identical sequences into the tree and reset original alignment */
reinsertIdenticalSeqs(Alignment * orig_aln)1286 void PhyloSuperTree::reinsertIdenticalSeqs(Alignment *orig_aln) {
1287 	if (removed_seqs.empty()) return;
1288 	IQTree::reinsertIdenticalSeqs(orig_aln);
1289 
1290 	// now synchronize aln
1291 	int part = 0;
1292     for (iterator it = begin(); it != end(); it++, part++) {
1293 //        (*it)->setAlignment(((SuperAlignment*)aln)->partitions[part]);
1294 		(*it)->aln = ((SuperAlignment*)aln)->partitions[part];
1295     }
1296 	mapTrees();
1297 
1298 
1299 }
1300 
fixNegativeBranch(bool force,Node * node,Node * dad)1301 int PhyloSuperTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
1302 	mapTrees();
1303 	int fixed = 0;
1304 	for (iterator it = begin(); it != end(); it++) {
1305 		(*it)->initializeAllPartialPars();
1306 		(*it)->clearAllPartialLH();
1307 		fixed += (*it)->fixNegativeBranch(force);
1308 		(*it)->clearAllPartialLH();
1309 	}
1310 	computeBranchLengths();
1311 	return fixed;
1312 }
1313 
1314 /****************************************************************************
1315         ancestral sequence reconstruction
1316  ****************************************************************************/
1317 
initMarginalAncestralState(ostream & out,bool & orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1318 void PhyloSuperTree::initMarginalAncestralState(ostream &out, bool &orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1319     orig_kernel_nonrev = params->kernel_nonrev;
1320     if (!orig_kernel_nonrev) {
1321         // switch to nonrev kernel to compute _pattern_lh_cat_state
1322         params->kernel_nonrev = true;
1323         setLikelihoodKernel(sse);
1324         clearAllPartialLH();
1325     }
1326 
1327     size_t total_size = 0, total_ptn = 0;
1328 
1329     bool mixed_data = false;
1330 
1331     for (auto it = begin(); it != end(); it++) {
1332         size_t nptn = (*it)->aln->size();
1333         size_t nstates = (*it)->model->num_states;
1334         (*it)->_pattern_lh_cat_state = (*it)->newPartialLh();
1335         total_size += nptn*nstates;
1336         total_ptn += nptn;
1337         if (nstates != front()->model->num_states)
1338             mixed_data = true;
1339     }
1340 
1341     ptn_ancestral_prob = aligned_alloc<double>(total_size);
1342     ptn_ancestral_seq = aligned_alloc<int>(total_ptn);
1343 }
1344 
1345 /**
1346     compute ancestral sequence probability for an internal node by marginal reconstruction
1347     (Yang, Kumar and Nei 1995)
1348     @param dad_branch branch leading to an internal node where to obtain ancestral sequence
1349     @param dad dad of the target internal node
1350     @param[out] ptn_ancestral_prob pattern ancestral probability vector of dad_branch->node
1351 */
computeMarginalAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1352 void PhyloSuperTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad,
1353     double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1354 
1355     SuperNeighbor *snei = (SuperNeighbor*)dad_branch;
1356     SuperNeighbor *snei_back = (SuperNeighbor*)dad_branch->node->findNeighbor(dad);
1357     int part = 0;
1358     for (auto it = begin(); it != end(); it++, part++) {
1359         size_t nptn = (*it)->getAlnNPattern();
1360         size_t nstates = (*it)->model->num_states;
1361         if (snei->link_neighbors[part]) {
1362             (*it)->computeMarginalAncestralState(snei->link_neighbors[part], (PhyloNode*)snei_back->link_neighbors[part]->node,
1363                 ptn_ancestral_prob, ptn_ancestral_seq);
1364         } else {
1365             // branch does not exist in partition tree
1366             double eqprob = 1.0/nstates;
1367             for (size_t ptn = 0; ptn < nptn; ptn++) {
1368                 for (size_t i = 0; i < nstates; i++)
1369                     ptn_ancestral_prob[ptn*nstates+i] = eqprob;
1370                 ptn_ancestral_seq[ptn] = (*it)->aln->STATE_UNKNOWN;
1371             }
1372         }
1373         ptn_ancestral_prob += nptn*nstates;
1374         ptn_ancestral_seq += nptn;
1375     }
1376 }
1377 
writeMarginalAncestralState(ostream & out,PhyloNode * node,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1378 void PhyloSuperTree::writeMarginalAncestralState(ostream &out, PhyloNode *node,
1379     double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1380     int part = 1;
1381     for (auto it = begin(); it != end(); it++, part++) {
1382         size_t site, nsites = (*it)->getAlnNSite(), nstates = (*it)->model->num_states;
1383         for (site = 0; site < nsites; site++) {
1384             int ptn = (*it)->aln->getPatternID(site);
1385             out << node->name << "\t" << part << "\t" << site+1 << "\t";
1386             out << (*it)->aln->convertStateBackStr(ptn_ancestral_seq[ptn]);
1387             double *state_prob = ptn_ancestral_prob + ptn*nstates;
1388             for (size_t j = 0; j < nstates; j++) {
1389                 out << "\t" << state_prob[j];
1390             }
1391             out << endl;
1392         }
1393         size_t nptn = (*it)->getAlnNPattern();
1394         ptn_ancestral_prob += nptn*nstates;
1395         ptn_ancestral_seq += nptn;
1396     }
1397 }
1398 
1399 /**
1400     end computing ancestral sequence probability for an internal node by marginal reconstruction
1401 */
endMarginalAncestralState(bool orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1402 void PhyloSuperTree::endMarginalAncestralState(bool orig_kernel_nonrev,
1403     double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1404     if (!orig_kernel_nonrev) {
1405         // switch back to REV kernel
1406         params->kernel_nonrev = orig_kernel_nonrev;
1407         setLikelihoodKernel(sse);
1408         clearAllPartialLH();
1409     }
1410     aligned_free(ptn_ancestral_seq);
1411     aligned_free(ptn_ancestral_prob);
1412 
1413     for (auto it = rbegin(); it != rend(); it++) {
1414         aligned_free((*it)->_pattern_lh_cat_state);
1415         (*it)->_pattern_lh_cat_state = NULL;
1416     }
1417 }
1418 
writeSiteLh(ostream & out,SiteLoglType wsl,int partid)1419 void PhyloSuperTree::writeSiteLh(ostream &out, SiteLoglType wsl, int partid) {
1420     int part = 1;
1421     for (auto it = begin(); it != end(); it++, part++)
1422         (*it)->writeSiteLh(out, wsl, part);
1423 }
1424 
writeSiteRates(ostream & out,bool bayes,int partid)1425 void PhyloSuperTree::writeSiteRates(ostream &out, bool bayes, int partid) {
1426 
1427     int part = 1;
1428     for (iterator it = begin(); it != end(); it++, part++) {
1429         (*it)->writeSiteRates(out, bayes, part);
1430     }
1431 }
1432 
writeBranch(ostream & out,Node * node1,Node * node2)1433 void PhyloSuperTree::writeBranch(ostream &out, Node* node1, Node* node2) {
1434     SuperNeighbor *nei1 = (SuperNeighbor*)node1->findNeighbor(node2);
1435     StrVector taxnames;
1436     if (getNumTaxa(node1, node2) <= leafNum / 2)
1437         getTaxaName(taxnames, node1, node2);
1438     else
1439         getTaxaName(taxnames, node2, node1);
1440     out << nei1->id+1 << ",";
1441     bool first = true;
1442     for (int i = 0; i < taxnames.size(); i++)
1443         if (!taxnames[i].empty()) {
1444             if (!first) out << " ";
1445             out << taxnames[i];
1446             first = false;
1447         }
1448     out << "," << nei1->length;
1449     for (int part = 0; part != size(); part++) {
1450         bool present = true;
1451         FOR_NEIGHBOR_DECLARE(node1, NULL, it) {
1452             SuperNeighbor *nei = (SuperNeighbor*)(*it);
1453             if (!nei->link_neighbors[part])
1454                 present = false;
1455         }
1456         FOR_NEIGHBOR(node2, NULL, it) {
1457             SuperNeighbor *nei = (SuperNeighbor*)(*it);
1458             if (!nei->link_neighbors[part])
1459                 present = false;
1460         }
1461         out << ",";
1462         if (present)
1463             out << nei1->link_neighbors[part]->length;
1464     }
1465 }
1466 
writeBranches(ostream & out)1467 void PhyloSuperTree::writeBranches(ostream &out) {
1468     NodeVector nodes1, nodes2;
1469     getBranches(nodes1, nodes2);
1470     int i;
1471     out << "ID,Taxa,Len";
1472     for (i = 0; i < size(); i++)
1473         out << "," << at(i)->aln->name;
1474     out << endl;
1475     for (i = 0; i < nodes1.size(); i++) {
1476         writeBranch(out, nodes1[i], nodes2[i]);
1477         out << endl;
1478     }
1479 }
1480 
printBestPartitionParams(const char * filename)1481 void PhyloSuperTree::printBestPartitionParams(const char *filename) {
1482     try {
1483         ofstream out;
1484         out.exceptions(ios::failbit | ios::badbit);
1485         out.open(filename);
1486         out << "#nexus" << endl
1487         << "begin sets;" << endl;
1488         int part;
1489         SuperAlignment *saln = (SuperAlignment*)aln;
1490         for (part = 0; part < size(); part++) {
1491             string name = saln->partitions[part]->name;
1492             replace(name.begin(), name.end(), '+', '_');
1493             out << "  charset " << name << " = ";
1494             if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": ";
1495             if (saln->partitions[part]->seq_type == SEQ_CODON)
1496                 out << "CODON, ";
1497             string pos = saln->partitions[part]->position_spec;
1498             replace(pos.begin(), pos.end(), ',' , ' ');
1499             out << pos << ";" << endl;
1500         }
1501         out << "  charpartition mymodels =" << endl;
1502         for (part = 0; part < size(); part++) {
1503             string name = saln->partitions[part]->name;
1504             replace(name.begin(), name.end(), '+', '_');
1505             if (part > 0) out << "," << endl;
1506             out << "    " << at(part)->getModelNameParams() << ": " << name << "{" << at(part)->treeLength() << "}";
1507         }
1508         out << ";" << endl;
1509         out << "end;" << endl;
1510         out.close();
1511         cout << "Partition information was printed to " << filename << endl;
1512     } catch (ios::failure &) {
1513         outError(ERR_WRITE_OUTPUT, filename);
1514     }
1515 }
1516