1 /***************************************************************************
2 * Copyright (C) 2009 by BUI Quang Minh *
3 * minh.bui@univie.ac.at *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU General Public License for more details. *
14 * *
15 * You should have received a copy of the GNU General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19 ***************************************************************************/
20 #include "phylosupertree.h"
21 #include "alignment/superalignment.h"
22 #include "alignment/superalignmentpairwise.h"
23 #include "main/phylotesting.h"
24 #include "model/partitionmodel.h"
25 #include "utils/MPIHelper.h"
26
PhyloSuperTree()27 PhyloSuperTree::PhyloSuperTree()
28 : IQTree()
29 {
30 totalNNIs = evalNNIs = 0;
31 rescale_codon_brlen = false;
32 // Initialize the counter for evaluated NNIs on subtrees. FOR THIS CASE IT WON'T BE initialized.
33 }
34
PhyloSuperTree(SuperAlignment * alignment,bool new_iqtree)35 PhyloSuperTree::PhyloSuperTree(SuperAlignment *alignment, bool new_iqtree) : IQTree(alignment) {
36 totalNNIs = evalNNIs = 0;
37
38 rescale_codon_brlen = false;
39 bool has_codon = false;
40 vector<Alignment*>::iterator it;
41 for (it = alignment->partitions.begin(); it != alignment->partitions.end(); it++)
42 if ((*it)->seq_type != SEQ_CODON) {
43 rescale_codon_brlen = true;
44 } else
45 has_codon = true;
46
47 rescale_codon_brlen &= has_codon;
48
49 // Initialize the counter for evaluated NNIs on subtrees
50 int part = 0;
51
52 StrVector model_names;
53 for (it = alignment->partitions.begin(); it != alignment->partitions.end(); it++, part++) {
54 PhyloTree *tree;
55 if (new_iqtree)
56 tree = new IQTree(*it);
57 else
58 tree = new PhyloTree(*it);
59 push_back(tree);
60 PartitionInfo info;
61 info.cur_ptnlh = NULL;
62 info.nniMoves[0].ptnlh = NULL;
63 info.nniMoves[1].ptnlh = NULL;
64 info.evalNNIs = 0.0;
65 part_info.push_back(info);
66 }
67
68 aln = alignment;
69
70 }
71
PhyloSuperTree(SuperAlignment * alignment,PhyloSuperTree * super_tree)72 PhyloSuperTree::PhyloSuperTree(SuperAlignment *alignment, PhyloSuperTree *super_tree) : IQTree(alignment) {
73 totalNNIs = evalNNIs = 0;
74 rescale_codon_brlen = super_tree->rescale_codon_brlen;
75 part_info = super_tree->part_info;
76 for (vector<Alignment*>::iterator it = alignment->partitions.begin(); it != alignment->partitions.end(); it++) {
77 PhyloTree *tree = new PhyloTree((*it));
78 push_back(tree);
79 }
80 // Initialize the counter for evaluated NNIs on subtrees
81 int part = 0;
82 for (iterator it = begin(); it != end(); it++, part++) {
83 part_info[part].evalNNIs = 0.0;
84 }
85
86 aln = alignment;
87 }
88
setModelFactory(ModelFactory * model_fac)89 void PhyloSuperTree::setModelFactory(ModelFactory *model_fac) {
90 PhyloTree::setModelFactory(model_fac);
91 if (model_fac) {
92 PhyloSuperTree *tree = (PhyloSuperTree*)model_fac->site_rate->phylo_tree;
93 for (int part = 0; part != size(); part++) {
94 at(part)->setModelFactory(tree->at(part)->getModelFactory());
95 }
96 } else {
97 for (int part = 0; part != size(); part++) {
98 at(part)->setModelFactory(NULL);
99 }
100 }
101 }
102
setPartInfo(PhyloSuperTree * tree)103 void PhyloSuperTree::setPartInfo(PhyloSuperTree *tree) {
104 part_info = tree->part_info;
105 int part = 0;
106 for (iterator it = begin(); it != end(); it++, part++) {
107 part_info[part].evalNNIs = 0.0;
108 }
109 if (!params->bootstrap_spec) {
110 return;
111 }
112 // 2018-06-03: for -bsam GENE, number of partitions might be reduced
113 if (part_info.size() <= size())
114 return;
115 part_info.erase(part_info.begin()+size(), part_info.end());
116 for (part = 0; part < part_info.size(); part++) {
117 bool found = false;
118 for (int p = 0; p < tree->size(); p++)
119 if (tree->at(p)->aln->name == at(part)->aln->name) {
120 part_info[part] = tree->part_info[p];
121 part_info[part].evalNNIs = 0.0;
122 found = true;
123 break;
124 }
125 ASSERT(found);
126 }
127 }
128
129
setSuperAlignment(Alignment * alignment)130 void PhyloSuperTree::setSuperAlignment(Alignment *alignment) {
131 PhyloTree::setAlignment(alignment);
132
133 SuperAlignment *saln = (SuperAlignment*)aln;
134 for (int i = 0; i < size(); i++)
135 at(i)->setAlignment(saln->partitions.at(i));
136 }
137
setCheckpoint(Checkpoint * checkpoint)138 void PhyloSuperTree::setCheckpoint(Checkpoint *checkpoint) {
139 IQTree::setCheckpoint(checkpoint);
140 for (iterator it = begin(); it != end(); it++)
141 (*it)->setCheckpoint(checkpoint);
142 }
143
saveCheckpoint()144 void PhyloSuperTree::saveCheckpoint() {
145 // checkpoint->startStruct("PhyloSuperTree");
146 // int part = 0;
147 // for (iterator it = begin(); it != end(); it++, part++) {
148 // string key = part_info[part].name + ".tree";
149 // checkpoint->put(key, (*it)->getTreeString());
150 // }
151 // checkpoint->endStruct();
152 IQTree::saveCheckpoint();
153 }
154
restoreCheckpoint()155 void PhyloSuperTree::restoreCheckpoint() {
156 IQTree::restoreCheckpoint();
157
158 // first get the newick string of super tree
159 // checkpoint->startStruct("PhyloTree");
160 // string newick;
161 // CKP_RESTORE(newick);
162 // checkpoint->endStruct();
163 //
164 // if (newick.empty()) return;
165 //
166 // // now get partition tree strings
167 // checkpoint->startStruct("PhyloSuperTree");
168 // int part = 0;
169 // for (iterator it = begin(); it != end(); it++, part++) {
170 // string key = part_info[part].name + ".tree";
171 // string part_tree;
172 // if (!checkpoint->get(key, part_tree))
173 // outError("No tree for partition " + part_info[part].name + " found from checkpoint");
174 // newick += part_tree;
175 // }
176 //
177 // checkpoint->endStruct();
178 //
179 // readTreeString(newick);
180
181 }
182
setParams(Params * params)183 void PhyloSuperTree::setParams(Params* params) {
184 IQTree::setParams(params);
185 for (iterator it = begin(); it != end(); it++) {
186 (*it)->setParams(params);
187 }
188 }
189
initSettings(Params & params)190 void PhyloSuperTree::initSettings(Params ¶ms) {
191 IQTree::initSettings(params);
192 setLikelihoodKernel(params.SSE);
193 setNumThreads(params.num_threads);
194 for (iterator it = begin(); it != end(); it++) {
195 (*it)->params = ¶ms;
196 (*it)->optimize_by_newton = params.optimize_by_newton;
197 }
198
199 }
200
setLikelihoodKernel(LikelihoodKernel lk)201 void PhyloSuperTree::setLikelihoodKernel(LikelihoodKernel lk) {
202 PhyloTree::setLikelihoodKernel(lk);
203 for (iterator it = begin(); it != end(); it++)
204 (*it)->setLikelihoodKernel(lk);
205 }
206
setParsimonyKernel(LikelihoodKernel lk)207 void PhyloSuperTree::setParsimonyKernel(LikelihoodKernel lk) {
208 PhyloTree::setParsimonyKernel(lk);
209 for (iterator it = begin(); it != end(); it++)
210 (*it)->setParsimonyKernel(lk);
211 }
212
changeLikelihoodKernel(LikelihoodKernel lk)213 void PhyloSuperTree::changeLikelihoodKernel(LikelihoodKernel lk) {
214 PhyloTree::changeLikelihoodKernel(lk);
215 }
216
setNumThreads(int num_threads)217 void PhyloSuperTree::setNumThreads(int num_threads) {
218 PhyloTree::setNumThreads((size() >= num_threads) ? num_threads : 1);
219 for (iterator it = begin(); it != end(); it++)
220 (*it)->setNumThreads((size() >= num_threads) ? 1 : num_threads);
221 }
222
printResultTree(string suffix)223 void PhyloSuperTree::printResultTree(string suffix) {
224 if (MPIHelper::getInstance().isWorker()) {
225 return;
226 }
227 if (params->suppress_output_flags & OUT_TREEFILE)
228 return;
229
230 IQTree::printResultTree(suffix);
231
232 string tree_file_name = params->out_prefix;
233 tree_file_name += ".parttrees";
234 if (suffix.compare("") != 0) {
235 tree_file_name += "." + suffix;
236 }
237 try {
238 ofstream out;
239 out.exceptions(ios::failbit | ios::badbit);
240 out.open(tree_file_name);
241 for (iterator it = begin(); it != end(); it++)
242 (*it)->printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
243 out.close();
244 } catch (ios::failure) {
245 outError(ERR_WRITE_OUTPUT, tree_file_name);
246 }
247 if (verbose_mode >= VB_MED)
248 cout << "Partition trees printed to " << tree_file_name << endl;
249 }
250
getTreeString()251 string PhyloSuperTree::getTreeString() {
252 stringstream tree_stream;
253 printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
254 for (iterator it = begin(); it != end(); it++)
255 (*it)->printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
256 return tree_stream.str();
257 }
258
readTreeString(const string & tree_string)259 void PhyloSuperTree::readTreeString(const string &tree_string) {
260 stringstream str;
261 str << tree_string;
262 str.seekg(0, ios::beg);
263 freeNode();
264 // bug fix 2017-11-30: in case taxon name happens to be ID
265 MTree::readTree(str, rooted);
266 assignLeafNames();
267 // setAlignment(aln);
268 setRootNode(params->root);
269 for (iterator it = begin(); it != end(); it++) {
270 (*it)->freeNode();
271 (*it)->readTree(str, (*it)->rooted);
272 (*it)->assignLeafNames();
273 // (*it)->setAlignment((*it)->aln);
274 }
275 linkTrees();
276 // if (isSuperTree()) {
277 // ((PhyloSuperTree*) this)->mapTrees();
278 // }
279 if (params->pll) {
280 ASSERT(0);
281 pllReadNewick(getTreeString());
282 }
283 resetCurScore();
284
285 }
286
287
288 /**
289 * save branch lengths into a vector
290 */
saveBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)291 void PhyloSuperTree::saveBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
292 ASSERT(getMixlen() == 1); // supertree and treemixlen not allowed together
293 int totalBranchNum = branchNum * getMixlen();
294 iterator it;
295 for (it = begin(); it != end(); it++) {
296 totalBranchNum += (*it)->branchNum * (*it)->getMixlen();
297 }
298 lenvec.resize(startid + totalBranchNum);
299
300 PhyloTree::saveBranchLengths(lenvec, startid);
301 startid += branchNum * getMixlen();
302 for (iterator it = begin(); it != end(); it++) {
303 (*it)->saveBranchLengths(lenvec, startid);
304 startid += (*it)->branchNum * (*it)->getMixlen();
305 }
306 }
307 /**
308 * restore branch lengths from a vector previously called with saveBranchLengths
309 */
restoreBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)310 void PhyloSuperTree::restoreBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
311 PhyloTree::restoreBranchLengths(lenvec, startid);
312 startid += branchNum * getMixlen();
313 for (iterator it = begin(); it != end(); it++) {
314 (*it)->restoreBranchLengths(lenvec, startid);
315 startid += (*it)->branchNum * (*it)->getMixlen();
316 }
317 }
318
collapseInternalBranches(Node * node,Node * dad,double threshold)319 int PhyloSuperTree::collapseInternalBranches(Node *node, Node *dad, double threshold) {
320 if (!node) node = root;
321 int count = 0;
322 FOR_NEIGHBOR_DECLARE(node, dad, it) {
323 count += collapseInternalBranches((*it)->node, node, threshold);
324 }
325 if (node->isLeaf())
326 return count;
327 NeighborVec nei_vec;
328 nei_vec.insert(nei_vec.begin(), node->neighbors.begin(), node->neighbors.end());
329 for (it = nei_vec.begin(); it != nei_vec.end(); it++)
330 if ((*it)->node != dad && !(*it)->node->isLeaf() && (*it)->length <= threshold) {
331
332 //delete branch of gene-trees
333 SuperNeighbor *snei = (SuperNeighbor*)(*it);
334 int part = 0;
335 for (part = 0; part != size(); part++)
336 if (snei->link_neighbors[part]) {
337 SuperNeighbor* snei_back = (SuperNeighbor*)(*it)->node->findNeighbor(node);
338 at(part)->removeNode(snei_back->link_neighbors[part]->node, snei->link_neighbors[part]->node);
339 }
340
341 // delete the child node
342 removeNode(node, (*it)->node);
343 count++;
344 }
345 return count;
346 }
347
348
newNode(int node_id,const char * node_name)349 Node* PhyloSuperTree::newNode(int node_id, const char* node_name) {
350 return (Node*) (new SuperNode(node_id, node_name));
351 }
352
newNode(int node_id,int node_name)353 Node* PhyloSuperTree::newNode(int node_id, int node_name) {
354 return (Node*) (new SuperNode(node_id, node_name));
355 }
356
getAlnNPattern()357 int PhyloSuperTree::getAlnNPattern() {
358 int num = 0;
359 for (iterator it = begin(); it != end(); it++)
360 num += (*it)->getAlnNPattern();
361 return num;
362 }
363
getAlnNSite()364 int PhyloSuperTree::getAlnNSite() {
365 int num = 0;
366 for (iterator it = begin(); it != end(); it++)
367 num += (*it)->getAlnNSite();
368 return num;
369 }
370
computeDist(int seq1,int seq2,double initial_dist,double & var)371 double PhyloSuperTree::computeDist(int seq1, int seq2, double initial_dist, double &var) {
372 // if no model or site rate is specified, return JC distance
373 if (initial_dist == 0.0) {
374 if (params->compute_obs_dist)
375 initial_dist = aln->computeObsDist(seq1, seq2);
376 else
377 initial_dist = aln->computeDist(seq1, seq2);
378 }
379 if (initial_dist == MAX_GENETIC_DIST) return initial_dist; // MANUEL: here no d2l is return
380 if (!model_factory || !site_rate) return initial_dist; // MANUEL: here no d2l is return
381
382 // now optimize the distance based on the model and site rate
383 SuperAlignmentPairwise aln_pair(this, seq1, seq2);
384 return aln_pair.optimizeDist(initial_dist, var);
385 }
386
linkBranch(int part,SuperNeighbor * nei,SuperNeighbor * dad_nei)387 void PhyloSuperTree::linkBranch(int part, SuperNeighbor *nei, SuperNeighbor *dad_nei) {
388 SuperNode *node = (SuperNode*)dad_nei->node;
389 SuperNode *dad = (SuperNode*)nei->node;
390 nei->link_neighbors[part] = NULL;
391 dad_nei->link_neighbors[part] = NULL;
392 vector<PhyloNeighbor*> part_vec;
393 vector<PhyloNeighbor*> child_part_vec;
394
395 FOR_NEIGHBOR_DECLARE(node, dad, it) {
396 if (((SuperNeighbor*)*it)->link_neighbors[part]) {
397 part_vec.push_back(((SuperNeighbor*)*it)->link_neighbors[part]);
398 child_part_vec.push_back(((SuperNeighbor*)(*it)->node->findNeighbor(node))->link_neighbors[part]);
399 ASSERT(child_part_vec.back()->node == child_part_vec.front()->node || child_part_vec.back()->id == child_part_vec.front()->id);
400 }
401 }
402
403 if (part_vec.empty())
404 return;
405 if (part_vec.size() == 1) {
406 nei->link_neighbors[part] = child_part_vec[0];
407 dad_nei->link_neighbors[part] = part_vec[0];
408 return;
409 }
410 if (part_vec[0] == child_part_vec[1]) {
411 // ping-pong, out of sub-tree
412 ASSERT(part_vec[1] == child_part_vec[0]);
413 return;
414 }
415 PhyloNode *node_part = (PhyloNode*) child_part_vec[0]->node;
416 PhyloNode *dad_part = NULL;
417 FOR_NEIGHBOR(node_part, NULL, it) {
418 bool appear = false;
419 for (vector<PhyloNeighbor*>::iterator it2 = part_vec.begin(); it2 != part_vec.end(); it2++){
420 if ((*it2) == (*it)) {
421 appear = true; break;
422 }
423 }
424 if (!appear) {
425 ASSERT(!dad_part);
426 dad_part = (PhyloNode*)(*it)->node;
427 }
428 }
429 nei->link_neighbors[part] = (PhyloNeighbor*)node_part->findNeighbor(dad_part);
430 dad_nei->link_neighbors[part] = (PhyloNeighbor*)dad_part->findNeighbor(node_part);
431 }
432
linkTree(int part,NodeVector & part_taxa,SuperNode * node,SuperNode * dad)433 void PhyloSuperTree::linkTree(int part, NodeVector &part_taxa, SuperNode *node, SuperNode *dad) {
434 if (!node) {
435 if (!root->isLeaf())
436 node = (SuperNode*) root;
437 else
438 node = (SuperNode*)root->neighbors[0]->node;
439 ASSERT(node);
440 if (node->isLeaf()) // two-taxa tree
441 dad = (SuperNode*)node->neighbors[0]->node;
442 }
443 SuperNeighbor *nei = NULL;
444 SuperNeighbor *dad_nei = NULL;
445 if (dad) {
446 nei = (SuperNeighbor*)node->findNeighbor(dad);
447 dad_nei = (SuperNeighbor*)dad->findNeighbor(node);
448 if (nei->link_neighbors.empty()) nei->link_neighbors.resize(size());
449 if (dad_nei->link_neighbors.empty()) dad_nei->link_neighbors.resize(size());
450 nei->link_neighbors[part] = NULL;
451 dad_nei->link_neighbors[part] = NULL;
452 }
453 if (node->isLeaf()) {
454 ASSERT(dad);
455 PhyloNode *node_part = (PhyloNode*)part_taxa[node->id];
456 if (node_part) {
457 PhyloNode *dad_part = (PhyloNode*)node_part->neighbors[0]->node;
458 ASSERT(node_part->isLeaf());
459 nei->link_neighbors[part] = (PhyloNeighbor*) node_part->neighbors[0];
460 dad_nei->link_neighbors[part] = (PhyloNeighbor*)dad_part->findNeighbor(node_part);
461 }
462 return;
463 }
464
465 FOR_NEIGHBOR_DECLARE(node, dad, it) {
466 linkTree(part, part_taxa, (SuperNode*) (*it)->node, (SuperNode*) node);
467 }
468 if (!dad) return;
469 linkBranch(part, nei, dad_nei);
470 }
471
syncRooting()472 void PhyloSuperTree::syncRooting() {
473 if (empty() || !front()->root)
474 return;
475 // check if all trees are similarly rooted or unrooted
476 bool same_rooting = true;
477 iterator tree;
478 for (tree = begin(); tree != end(); tree++) {
479 if ((*tree)->rooted != front()->rooted) {
480 same_rooting = false;
481 break;
482 }
483 }
484
485 if (same_rooting) {
486 if (!empty() && rooted != front()->rooted) {
487 if (rooted)
488 convertToUnrooted();
489 else
490 convertToRooted();
491 }
492 } else if (!rooted) {
493 // not same rooting between trees
494 convertToRooted();
495 }
496 }
497
printMapInfo()498 void PhyloSuperTree::printMapInfo() {
499 NodeVector nodes1, nodes2;
500 getBranches(nodes1, nodes2);
501 int part = 0;
502 for (iterator it = begin(); it != end(); it++, part++) {
503 cout << "Subtree for partition " << part << endl;
504 (*it)->drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
505 for (int i = 0; i < nodes1.size(); i++) {
506 PhyloNeighbor *nei1 = ((SuperNeighbor*)nodes1[i]->findNeighbor(nodes2[i]))->link_neighbors[part];
507 PhyloNeighbor *nei2 = ((SuperNeighbor*)nodes2[i]->findNeighbor(nodes1[i]))->link_neighbors[part];
508 cout << nodes1[i]->findNeighbor(nodes2[i])->id << ":";
509 if (nodes1[i]->isLeaf()) cout << nodes1[i]->name; else cout << nodes1[i]->id;
510 cout << ",";
511 if (nodes2[i]->isLeaf()) cout << nodes2[i]->name; else cout << nodes2[i]->id;
512 cout << " -> ";
513 if (nei2) {
514 cout << nei2->id << ":";
515 if (nei2->node->isLeaf())
516 cout << nei2->node->name;
517 else cout << nei2->node->id;
518 }
519 else cout << -1;
520 cout << ",";
521 if (nei1)
522 if (nei1->node->isLeaf())
523 cout << nei1->node->name;
524 else cout << nei1->node->id;
525 else cout << -1;
526 cout << endl;
527 }
528 }
529 }
530
531
mapTrees()532 void PhyloSuperTree::mapTrees() {
533 ASSERT(root);
534 syncRooting();
535 int part = 0, i;
536 if (verbose_mode >= VB_DEBUG)
537 drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
538 for (iterator it = begin(); it != end(); it++, part++) {
539 string taxa_set;
540 Pattern taxa_pat = aln->getPattern(part);
541 taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end());
542 (*it)->copyTree(this, taxa_set);
543 if ((*it)->getModel()) {
544 (*it)->initializeAllPartialLh();
545 }
546 (*it)->resetCurScore();
547 NodeVector my_taxa, part_taxa;
548 (*it)->getOrderedTaxa(my_taxa);
549 part_taxa.resize(leafNum, NULL);
550 for (i = 0; i < leafNum; i++) {
551 int id;
552 if (i < aln->getNSeq())
553 id = ((SuperAlignment*)aln)->taxa_index[i][part];
554 else {
555 ASSERT(rooted);
556 if ((*it)->rooted)
557 id = (*it)->leafNum-1;
558 else
559 id = -1;
560 }
561 if (id >=0) part_taxa[i] = my_taxa[id];
562 }
563 if (verbose_mode >= VB_DEBUG) {
564 cout << "Subtree for partition " << part << endl;
565 (*it)->drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
566 }
567 linkTree(part, part_taxa);
568 }
569
570 if (verbose_mode >= VB_DEBUG) printMapInfo();
571 }
572
linkTrees()573 void PhyloSuperTree::linkTrees() {
574 int part = 0;
575 iterator it;
576 for (it = begin(), part = 0; it != end(); it++, part++) {
577 (*it)->initializeTree();
578 (*it)->setAlignment((*it)->aln);
579 if ((*it)->getModel()) {
580 (*it)->initializeAllPartialLh();
581 }
582 (*it)->resetCurScore();
583 NodeVector my_taxa, part_taxa;
584 (*it)->getOrderedTaxa(my_taxa);
585 part_taxa.resize(leafNum, NULL);
586 int i;
587 for (i = 0; i < leafNum; i++) {
588 int id;
589 if (i < aln->getNSeq())
590 id = ((SuperAlignment*)aln)->taxa_index[i][part];
591 else if ((*it)->rooted)
592 id = (*it)->leafNum-1;
593 else
594 id = -1;
595 if (id >=0) part_taxa[i] = my_taxa[id];
596 }
597 linkTree(part, part_taxa);
598 }
599 }
600
initializeAllPartialLh()601 void PhyloSuperTree::initializeAllPartialLh() {
602 for (iterator it = begin(); it != end(); it++) {
603 (*it)->initializeAllPartialLh();
604 }
605 }
606
607
deleteAllPartialLh()608 void PhyloSuperTree::deleteAllPartialLh() {
609 for (iterator it = begin(); it != end(); it++) {
610 (*it)->deleteAllPartialLh();
611 }
612 }
613
clearAllPartialLH(bool make_null)614 void PhyloSuperTree::clearAllPartialLH(bool make_null) {
615 for (iterator it = begin(); it != end(); it++) {
616 (*it)->clearAllPartialLH(make_null);
617 }
618 }
619
computeParsimonyBranchObsolete(PhyloNeighbor * dad_branch,PhyloNode * dad,int * branch_subst)620 int PhyloSuperTree::computeParsimonyBranchObsolete(PhyloNeighbor *dad_branch, PhyloNode *dad, int *branch_subst) {
621 int score = 0, part = 0;
622 SuperNeighbor *dad_nei = (SuperNeighbor*)dad_branch;
623 SuperNeighbor *node_nei = (SuperNeighbor*)(dad_branch->node->findNeighbor(dad));
624
625 if (branch_subst)
626 branch_subst = 0;
627 for (iterator it = begin(); it != end(); it++, part++) {
628 int this_subst = 0;
629 if (dad_nei->link_neighbors[part]) {
630 if (branch_subst)
631 score += (*it)->computeParsimonyBranch(dad_nei->link_neighbors[part], (PhyloNode*)node_nei->link_neighbors[part]->node, &this_subst);
632 else
633 score += (*it)->computeParsimonyBranch(dad_nei->link_neighbors[part], (PhyloNode*)node_nei->link_neighbors[part]->node);
634 } else
635 score += (*it)->computeParsimony();
636 if (branch_subst)
637 branch_subst += this_subst;
638 }
639 return score;
640 }
641
computePartitionOrder()642 void PhyloSuperTree::computePartitionOrder() {
643 if (!part_order.empty())
644 return;
645 int i, ntrees = size();
646 part_order.resize(ntrees);
647 part_order_by_nptn.resize(ntrees);
648 #ifdef _OPENMP
649 int *id = new int[ntrees];
650 double *cost = new double[ntrees];
651
652 for (i = 0; i < ntrees; i++) {
653 Alignment *part_aln = at(i)->aln;
654 cost[i] = -((double)part_aln->getNSeq())*part_aln->getNPattern()*part_aln->num_states;
655 id[i] = i;
656 }
657 quicksort(cost, 0, ntrees-1, id);
658 for (i = 0; i < ntrees; i++)
659 part_order[i] = id[i];
660
661 // compute part_order by number of patterns
662 for (i = 0; i < ntrees; i++) {
663 Alignment *part_aln = at(i)->aln;
664 cost[i] = -((double)part_aln->getNPattern())*part_aln->num_states;
665 id[i] = i;
666 }
667 quicksort(cost, 0, ntrees-1, id);
668 for (i = 0; i < ntrees; i++)
669 part_order_by_nptn[i] = id[i];
670
671 delete [] cost;
672 delete [] id;
673
674 if (verbose_mode >= VB_MED) {
675 cout << "Partitions ordered by computation costs:" << endl;
676 cout << "#nexus" << endl << "begin sets;" << endl;
677 for (i = 0; i < ntrees; i++)
678 cout << " charset " << at(part_order[i])->aln->name << " = " << at(part_order[i])->aln->position_spec << ";" << endl;
679 cout << "end;" << endl;
680 }
681 #else
682 for (i = 0; i < ntrees; i++) {
683 part_order[i] = i;
684 part_order_by_nptn[i] = i;
685 }
686 #endif // OPENMP
687 }
688
computeLikelihood(double * pattern_lh)689 double PhyloSuperTree::computeLikelihood(double *pattern_lh) {
690 double tree_lh = 0.0;
691 int ntrees = size();
692 if (pattern_lh) {
693 //#ifdef _OPENMP
694 //#pragma omp parallel for reduction(+: tree_lh)
695 //#endif
696 for (int i = 0; i < ntrees; i++) {
697 part_info[i].cur_score = at(i)->computeLikelihood(pattern_lh);
698 tree_lh += part_info[i].cur_score;
699 pattern_lh += at(i)->getAlnNPattern();
700 }
701 } else {
702 if (part_order.empty()) computePartitionOrder();
703 #ifdef _OPENMP
704 #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
705 #endif
706 for (int j = 0; j < ntrees; j++) {
707 int i = part_order[j];
708 part_info[i].cur_score = at(i)->computeLikelihood();
709 tree_lh += part_info[i].cur_score;
710 }
711 }
712 return tree_lh;
713 }
714
getNumLhCat(SiteLoglType wsl)715 int PhyloSuperTree::getNumLhCat(SiteLoglType wsl) {
716 int ncat = 0, it_cat;
717 for (iterator it = begin(); it != end(); it++)
718 if ((it_cat = (*it)->getNumLhCat(wsl)) > ncat)
719 ncat = it_cat;
720 return ncat;
721 }
722
723
computePatternLikelihood(double * pattern_lh,double * cur_logl,double * ptn_lh_cat,SiteLoglType wsl)724 void PhyloSuperTree::computePatternLikelihood(double *pattern_lh, double *cur_logl, double *ptn_lh_cat, SiteLoglType wsl) {
725 size_t offset = 0, offset_lh_cat = 0;
726 iterator it;
727 for (it = begin(); it != end(); it++) {
728 if (ptn_lh_cat)
729 (*it)->computePatternLikelihood(pattern_lh + offset, NULL, ptn_lh_cat + offset_lh_cat, wsl);
730 else
731 (*it)->computePatternLikelihood(pattern_lh + offset);
732 offset += (*it)->aln->getNPattern();
733 offset_lh_cat += (*it)->aln->getNPattern() * (*it)->getNumLhCat(wsl);
734 }
735 if (cur_logl) { // sanity check
736 double sum_logl = 0;
737 offset = 0;
738 for (it = begin(); it != end(); it++) {
739 int nptn = (*it)->aln->getNPattern();
740 for (int j = 0; j < nptn; j++)
741 sum_logl += pattern_lh[offset + j] * (*it)->aln->at(j).frequency;
742 offset += (*it)->aln->getNPattern();
743 }
744 if (fabs(sum_logl - *cur_logl) > 0.001) {
745 cout << *cur_logl << " " << sum_logl << endl;
746 // outError("Wrong PhyloSuperTree::", __func__);
747 }
748 ASSERT(fabs(sum_logl - *cur_logl) < 0.001);
749 }
750 }
751
computePatternProbabilityCategory(double * ptn_prob_cat,SiteLoglType wsl)752 void PhyloSuperTree::computePatternProbabilityCategory(double *ptn_prob_cat, SiteLoglType wsl) {
753 size_t offset = 0;
754 for (iterator it = begin(); it != end(); it++) {
755 (*it)->computePatternProbabilityCategory(ptn_prob_cat + offset, wsl);
756 offset += (*it)->aln->getNPattern() * (*it)->getNumLhCat(wsl);
757 }
758 }
759
optimizeAllBranches(int my_iterations,double tolerance,int maxNRStep)760 double PhyloSuperTree::optimizeAllBranches(int my_iterations, double tolerance, int maxNRStep) {
761 double tree_lh = 0.0;
762 int ntrees = size();
763 if (part_order.empty()) computePartitionOrder();
764 #ifdef _OPENMP
765 #pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(num_threads > 1)
766 #endif
767 for (int j = 0; j < ntrees; j++) {
768 int i = part_order[j];
769 part_info[i].cur_score = at(i)->optimizeAllBranches(my_iterations, tolerance/min(ntrees,10), maxNRStep);
770 tree_lh += part_info[i].cur_score;
771 if (verbose_mode >= VB_MAX)
772 at(i)->printTree(cout, WT_BR_LEN + WT_NEWLINE);
773 }
774
775 if (my_iterations >= 100) computeBranchLengths();
776 return tree_lh;
777 }
778
~PhyloSuperTree()779 PhyloSuperTree::~PhyloSuperTree()
780 {
781 for (vector<PartitionInfo>::reverse_iterator pit = part_info.rbegin(); pit != part_info.rend(); pit++) {
782 if (pit->nniMoves[1].ptnlh)
783 delete [] pit->nniMoves[1].ptnlh;
784 pit->nniMoves[1].ptnlh = NULL;
785 if (pit->nniMoves[0].ptnlh)
786 delete [] pit->nniMoves[0].ptnlh;
787 pit->nniMoves[0].ptnlh = NULL;
788 if (pit->cur_ptnlh)
789 delete [] pit->cur_ptnlh;
790 pit->cur_ptnlh = NULL;
791 }
792 part_info.clear();
793
794 for (reverse_iterator it = rbegin(); it != rend(); it++)
795 delete (*it);
796 clear();
797 }
798
799
initPartitionInfo()800 void PhyloSuperTree::initPartitionInfo() {
801 int part = 0;
802 for (iterator it = begin(); it != end(); it++, part++) {
803 part_info[part].cur_score = 0.0;
804
805 part_info[part].cur_brlen.resize((*it)->branchNum);
806 if (params->nni5) {
807 part_info[part].nni1_brlen.resize((*it)->branchNum * 5);
808 part_info[part].nni2_brlen.resize((*it)->branchNum * 5);
809 } else {
810 part_info[part].nni1_brlen.resize((*it)->branchNum);
811 part_info[part].nni2_brlen.resize((*it)->branchNum);
812 }
813
814 (*it)->getBranchLengths(part_info[part].cur_brlen);
815
816 if (save_all_trees == 2 || params->write_intermediate_trees >= 2) {
817 // initialize ptnlh for ultrafast bootstrap
818 int nptn = (*it)->getAlnNPattern();
819 if (!part_info[part].cur_ptnlh)
820 part_info[part].cur_ptnlh = new double[nptn];
821 if (!part_info[part].nniMoves[0].ptnlh)
822 part_info[part].nniMoves[0].ptnlh = new double [nptn];
823 if (!part_info[part].nniMoves[1].ptnlh)
824 part_info[part].nniMoves[1].ptnlh = new double [nptn];
825 } else {
826 part_info[part].cur_ptnlh = NULL;
827 part_info[part].nniMoves[0].ptnlh = NULL;
828 part_info[part].nniMoves[1].ptnlh = NULL;
829 }
830 }
831 }
832
getMaxPartNameLength()833 int PhyloSuperTree::getMaxPartNameLength() {
834 int namelen = 0;
835 for (iterator it = begin(); it != end(); it++)
836 namelen = max((int)(*it)->aln->name.length(), namelen);
837 return namelen;
838 }
839
getBestNNIForBran(PhyloNode * node1,PhyloNode * node2,NNIMove * nniMoves)840 NNIMove PhyloSuperTree::getBestNNIForBran(PhyloNode *node1, PhyloNode *node2, NNIMove *nniMoves) {
841 if (((PhyloNeighbor*)node1->findNeighbor(node2))->direction == TOWARD_ROOT) {
842 // swap node1 and node2 if the direction is not right, only for nonreversible models
843 PhyloNode *tmp = node1;
844 node1 = node2;
845 node2 = tmp;
846 }
847 NNIMove myMove;
848 //myMove.newloglh = 0;
849 SuperNeighbor *nei1 = ((SuperNeighbor*)node1->findNeighbor(node2));
850 SuperNeighbor *nei2 = ((SuperNeighbor*)node2->findNeighbor(node1));
851 ASSERT(nei1 && nei2);
852 SuperNeighbor *node1_nei = NULL;
853 SuperNeighbor *node2_nei = NULL;
854 SuperNeighbor *node2_nei_other = NULL;
855 FOR_NEIGHBOR_DECLARE(node1, node2, node1_it)
856 if (((PhyloNeighbor*)*node1_it)->direction != TOWARD_ROOT)
857 {
858 node1_nei = (SuperNeighbor*)(*node1_it);
859 break;
860 }
861 FOR_NEIGHBOR_DECLARE(node2, node1, node2_it) {
862 node2_nei = (SuperNeighbor*)(*node2_it);
863 break;
864 }
865
866 FOR_NEIGHBOR_IT(node2, node1, node2_it_other)
867 if ((*node2_it_other) != node2_nei) {
868 node2_nei_other = (SuperNeighbor*)(*node2_it_other);
869 break;
870 }
871
872 // check for compatibility with constraint tree
873 bool nni_ok[2] = {true, true};
874 int nniid = 0;
875 FOR_NEIGHBOR(node2, node1, node2_it) {
876 NNIMove nni;
877 nni.node1 = node1;
878 nni.node2 = node2;
879 nni.node1Nei_it = node1->findNeighborIt(node1_nei->node);
880 nni.node2Nei_it = node2_it;
881 nni_ok[nniid++] = constraintTree.isCompatible(nni);
882 }
883 ASSERT(nniid == 2);
884 myMove.node1 = myMove.node2 = NULL;
885 myMove.newloglh = -DBL_MAX;
886 // return if both NNIs do not satisfy constraint
887 if (!nni_ok[0] && !nni_ok[1]) {
888 // ASSERT(!nniMoves);
889 if (nniMoves) {
890 nniMoves[0].newloglh = nniMoves[1].newloglh = -DBL_MAX;
891 }
892 return myMove;
893 }
894
895 //double bestScore = optimizeOneBranch(node1, node2, false);
896
897 int ntrees = size(), part;
898 double nni_score1 = 0.0, nni_score2 = 0.0;
899 int local_totalNNIs = 0, local_evalNNIs = 0;
900
901 if (part_order.empty()) computePartitionOrder();
902 #ifdef _OPENMP
903 #pragma omp parallel for reduction(+: nni_score1, nni_score2, local_totalNNIs, local_evalNNIs) private(part) schedule(dynamic) if(num_threads>1)
904 #endif
905 for (int treeid = 0; treeid < ntrees; treeid++) {
906 part = part_order_by_nptn[treeid];
907 bool is_nni = true;
908 local_totalNNIs++;
909 FOR_NEIGHBOR_DECLARE(node1, NULL, nit) {
910 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
911 }
912 FOR_NEIGHBOR(node2, NULL, nit) {
913 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
914 }
915 if (!is_nni && params->terrace_aware) {
916 if (part_info[part].cur_score == 0.0) {
917 part_info[part].cur_score = at(part)->computeLikelihood();
918 if (save_all_trees == 2 || nniMoves)
919 at(part)->computePatternLikelihood(part_info[part].cur_ptnlh, &part_info[part].cur_score);
920 }
921 nni_score1 += part_info[part].cur_score;
922 nni_score2 += part_info[part].cur_score;
923 continue;
924 }
925
926 local_evalNNIs++;
927 part_info[part].evalNNIs++;
928
929 PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
930 PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
931
932 int brid = nei1_part->id;
933
934 //NNIMove part_moves[2];
935 //part_moves[0].node1Nei_it = NULL;
936
937 // setup subtree NNI correspondingly
938 PhyloNode *node1_part = (PhyloNode*)nei2_part->node;
939 PhyloNode *node2_part = (PhyloNode*)nei1_part->node;
940 part_info[part].nniMoves[0].node1 = part_info[part].nniMoves[1].node1 = node1;
941 part_info[part].nniMoves[0].node2 = part_info[part].nniMoves[1].node2 = node2;
942 part_info[part].nniMoves[0].node1Nei_it = node1_part->findNeighborIt(node1_nei->link_neighbors[part]->node);
943 part_info[part].nniMoves[0].node2Nei_it = node2_part->findNeighborIt(node2_nei->link_neighbors[part]->node);
944
945 part_info[part].nniMoves[1].node1Nei_it = node1_part->findNeighborIt(node1_nei->link_neighbors[part]->node);
946 part_info[part].nniMoves[1].node2Nei_it = node2_part->findNeighborIt(node2_nei_other->link_neighbors[part]->node);
947
948 at(part)->getBestNNIForBran((PhyloNode*)nei2_part->node, (PhyloNode*)nei1_part->node, part_info[part].nniMoves);
949 // detect the corresponding NNIs and swap if necessary (the swapping refers to the swapping of NNI order)
950 if (!((*part_info[part].nniMoves[0].node1Nei_it == node1_nei->link_neighbors[part] &&
951 *part_info[part].nniMoves[0].node2Nei_it == node2_nei->link_neighbors[part]) ||
952 (*part_info[part].nniMoves[0].node1Nei_it != node1_nei->link_neighbors[part] &&
953 *part_info[part].nniMoves[0].node2Nei_it != node2_nei->link_neighbors[part])))
954 {
955 outError("WRONG");
956 NNIMove tmp = part_info[part].nniMoves[0];
957 part_info[part].nniMoves[0] = part_info[part].nniMoves[1];
958 part_info[part].nniMoves[1] = tmp;
959 }
960 nni_score1 += part_info[part].nniMoves[0].newloglh;
961 nni_score2 += part_info[part].nniMoves[1].newloglh;
962 int numlen = 1;
963 if (params->nni5) numlen = 5;
964 for (int i = 0; i < numlen; i++) {
965 part_info[part].nni1_brlen[brid*numlen + i] = part_info[part].nniMoves[0].newLen[i];
966 part_info[part].nni2_brlen[brid*numlen + i] = part_info[part].nniMoves[1].newLen[i];
967 }
968
969 }
970 totalNNIs += local_totalNNIs;
971 evalNNIs += local_evalNNIs;
972 double nni_scores[2] = {nni_score1, nni_score2};
973
974 if (!nni_ok[0]) nni_scores[0] = -DBL_MAX;
975 if (!nni_ok[1]) nni_scores[1] = -DBL_MAX;
976
977 myMove.node1Nei_it = node1->findNeighborIt(node1_nei->node);
978 myMove.node1 = node1;
979 myMove.node2 = node2;
980 if (nni_scores[0] > nni_scores[1]) {
981 myMove.swap_id = 1;
982 myMove.node2Nei_it = node2->findNeighborIt(node2_nei->node);
983 myMove.newloglh = nni_scores[0];
984 } else {
985 myMove.swap_id = 2;
986 myMove.node2Nei_it = node2->findNeighborIt(node2_nei_other->node);
987 myMove.newloglh = nni_scores[1];
988 }
989
990 if (save_all_trees != 2 && !nniMoves) return myMove;
991
992 // for bootstrap now
993 //now setup pattern likelihoods per partition
994 double *save_lh_factor = new double [ntrees];
995 double *save_lh_factor_back = new double [ntrees];
996 nniid = 0;
997 FOR_NEIGHBOR(node2, node1, node2_it) if (nni_ok[nniid])
998 {
999
1000 // do the NNI
1001 node2_nei = (SuperNeighbor*)(*node2_it);
1002 node1->updateNeighbor(node1_it, node2_nei);
1003 node2_nei->node->updateNeighbor(node2, node1);
1004 node2->updateNeighbor(node2_it, node1_nei);
1005 node1_nei->node->updateNeighbor(node1, node2);
1006
1007 for (part = 0; part < ntrees; part++) {
1008 bool is_nni = true;
1009 FOR_NEIGHBOR_DECLARE(node1, NULL, nit) {
1010 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1011 }
1012 FOR_NEIGHBOR(node2, NULL, nit) {
1013 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1014 }
1015 if (!is_nni)
1016 memcpy(at(part)->_pattern_lh, part_info[part].cur_ptnlh, at(part)->getAlnNPattern() * sizeof(double));
1017 else
1018 memcpy(at(part)->_pattern_lh, part_info[part].nniMoves[nniid].ptnlh, at(part)->getAlnNPattern() * sizeof(double));
1019 save_lh_factor[part] = at(part)->current_it->lh_scale_factor;
1020 save_lh_factor_back[part] = at(part)->current_it_back->lh_scale_factor;
1021 at(part)->current_it->lh_scale_factor = 0.0;
1022 at(part)->current_it_back->lh_scale_factor = 0.0;
1023 }
1024 if (nniMoves) {
1025 nniMoves[nniid].newloglh = nni_scores[nniid];
1026 computePatternLikelihood(nniMoves[nniid].ptnlh, &nni_scores[nniid]);
1027 }
1028 if (save_all_trees == 2)
1029 saveCurrentTree(nni_scores[nniid]);
1030
1031 // restore information
1032 for (part = 0; part < ntrees; part++) {
1033 at(part)->current_it->lh_scale_factor = save_lh_factor[part];
1034 at(part)->current_it_back->lh_scale_factor = save_lh_factor_back[part];
1035 }
1036
1037 // swap back to recover the tree
1038 node1->updateNeighbor(node1_it, node1_nei);
1039 node1_nei->node->updateNeighbor(node2, node1);
1040 node2->updateNeighbor(node2_it, node2_nei);
1041 node2_nei->node->updateNeighbor(node1, node2);
1042 nniid++;
1043
1044 }
1045
1046 delete [] save_lh_factor_back;
1047 delete [] save_lh_factor;
1048 return myMove;
1049 }
1050
doNNI(NNIMove & move,bool clearLH)1051 void PhyloSuperTree::doNNI(NNIMove &move, bool clearLH) {
1052 SuperNeighbor *nei1 = (SuperNeighbor*)move.node1->findNeighbor(move.node2);
1053 SuperNeighbor *nei2 = (SuperNeighbor*)move.node2->findNeighbor(move.node1);
1054 SuperNeighbor *node1_nei = (SuperNeighbor*)*move.node1Nei_it;
1055 SuperNeighbor *node2_nei = (SuperNeighbor*)*move.node2Nei_it;
1056 int part = 0;
1057 iterator it;
1058 PhyloTree::doNNI(move, clearLH);
1059
1060 for (it = begin(), part = 0; it != end(); it++, part++) {
1061 bool is_nni = true;
1062 FOR_NEIGHBOR_DECLARE(move.node1, NULL, nit) {
1063 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1064 }
1065 FOR_NEIGHBOR(move.node2, NULL, nit) {
1066 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1067 }
1068 if (!is_nni) {
1069 // relink the branch if it does not correspond to NNI for partition
1070 linkBranch(part, nei1, nei2);
1071 continue;
1072 }
1073
1074 NNIMove part_move;
1075 PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
1076 PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
1077 part_move.node1 = (PhyloNode*)nei2_part->node;
1078 part_move.node2 = (PhyloNode*)nei1_part->node;
1079 part_move.node1Nei_it = part_move.node1->findNeighborIt(node1_nei->link_neighbors[part]->node);
1080 part_move.node2Nei_it = part_move.node2->findNeighborIt(node2_nei->link_neighbors[part]->node);
1081
1082 (*it)->doNNI(part_move, clearLH);
1083
1084 }
1085
1086 }
1087
changeNNIBrans(NNIMove & move)1088 void PhyloSuperTree::changeNNIBrans(NNIMove &move) {
1089 SuperNeighbor *nei1 = (SuperNeighbor*)move.node1->findNeighbor(move.node2);
1090 SuperNeighbor *nei2 = (SuperNeighbor*)move.node2->findNeighbor(move.node1);
1091 iterator it;
1092 int part;
1093
1094 for (it = begin(), part = 0; it != end(); it++, part++) {
1095 bool is_nni = true;
1096 FOR_NEIGHBOR_DECLARE(move.node1, NULL, nit) {
1097 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1098 }
1099 FOR_NEIGHBOR(move.node2, NULL, nit) {
1100 if (! ((SuperNeighbor*)*nit)->link_neighbors[part]) { is_nni = false; break; }
1101 }
1102 if (!is_nni) {
1103 continue;
1104 }
1105
1106 NNIMove part_move;
1107 PhyloNeighbor *nei1_part = nei1->link_neighbors[part];
1108 PhyloNeighbor *nei2_part = nei2->link_neighbors[part];
1109 int brid = nei1_part->id;
1110 part_move.node1 = (PhyloNode*)nei2_part->node;
1111 part_move.node2 = (PhyloNode*)nei1_part->node;
1112 int numlen = 1;
1113 if (params->nni5) numlen = 5;
1114 if (move.swap_id == 1) {
1115 for (int i = 0; i < numlen; i++)
1116 part_move.newLen[i] = part_info[part].nni1_brlen[brid*numlen + i];
1117 } else {
1118 for (int i = 0; i < numlen; i++)
1119 part_move.newLen[i] = part_info[part].nni2_brlen[brid*numlen + i];
1120 }
1121
1122 (*it)->changeNNIBrans(part_move);
1123
1124 }
1125
1126 }
1127
1128 //void PhyloSuperTree::restoreAllBrans(PhyloNode *node, PhyloNode *dad) {
1129 // int part = 0;
1130 // for (iterator it = begin(); it != end(); it++, part++) {
1131 // (*it)->setBranchLengths(part_info[part].cur_brlen);
1132 // }
1133 //}
1134
reinsertLeaves(PhyloNodeVector & del_leaves)1135 void PhyloSuperTree::reinsertLeaves(PhyloNodeVector &del_leaves) {
1136 IQTree::reinsertLeaves(del_leaves);
1137 mapTrees();
1138 }
1139
computeBranchLengths()1140 void PhyloSuperTree::computeBranchLengths() {
1141 if (verbose_mode >= VB_DEBUG)
1142 cout << "Assigning branch lengths for full tree with weighted average..." << endl;
1143 int part = 0, i;
1144 iterator it;
1145
1146 NodeVector nodes1, nodes2;
1147 getBranches(nodes1, nodes2);
1148 vector<SuperNeighbor*> neighbors1;
1149 vector<SuperNeighbor*> neighbors2;
1150 IntVector occurence;
1151 occurence.resize(nodes1.size(), 0);
1152 for (i = 0; i < nodes1.size(); i++) {
1153 neighbors1.push_back((SuperNeighbor*)nodes1[i]->findNeighbor(nodes2[i]) );
1154 neighbors2.push_back((SuperNeighbor*)nodes2[i]->findNeighbor(nodes1[i]) );
1155 neighbors1.back()->length = 0.0;
1156 }
1157 for (it = begin(), part = 0; it != end(); it++, part++) {
1158 IntVector brfreq;
1159 brfreq.resize((*it)->branchNum, 0);
1160 for (i = 0; i < nodes1.size(); i++) {
1161 PhyloNeighbor *nei1 = neighbors1[i]->link_neighbors[part];
1162 if (!nei1) continue;
1163 brfreq[nei1->id]++;
1164 }
1165 for (i = 0; i < nodes1.size(); i++) {
1166 PhyloNeighbor *nei1 = neighbors1[i]->link_neighbors[part];
1167 if (!nei1) continue;
1168 if ((*it)->aln->seq_type == SEQ_CODON && rescale_codon_brlen) {
1169 // rescale branch length by 3
1170 neighbors1[i]->length += (nei1->length) * (*it)->aln->getNSite() / brfreq[nei1->id];
1171 occurence[i] += (*it)->aln->getNSite()*3;
1172 } else {
1173 neighbors1[i]->length += (nei1->length) * (*it)->aln->getNSite() / brfreq[nei1->id];
1174 occurence[i] += (*it)->aln->getNSite();
1175 }
1176 //cout << neighbors1[i]->id << " " << nodes1[i]->id << nodes1[i]->name <<"," << nodes2[i]->id << nodes2[i]->name <<": " << (nei1->length) / brfreq[nei1->id] << endl;
1177 }
1178 //cout << endl;
1179 }
1180 for (i = 0; i < nodes1.size(); i++) {
1181 if (occurence[i])
1182 neighbors1[i]->length /= occurence[i];
1183 neighbors2[i]->length = neighbors1[i]->length;
1184 }
1185 }
1186
getModelName()1187 string PhyloSuperTree::getModelName() {
1188 return (string)"Partition model";
1189 }
1190
extractSubtree(set<int> & ids)1191 PhyloTree *PhyloSuperTree::extractSubtree(set<int> &ids) {
1192 string union_taxa;
1193 for (auto it = ids.begin(); it != ids.end(); it++) {
1194 int id = *it;
1195 if (id < 0 || id >= size())
1196 outError("Internal error ", __func__);
1197 string taxa_set;
1198 Pattern taxa_pat = aln->getPattern(id);
1199 taxa_set.insert(taxa_set.begin(), taxa_pat.begin(), taxa_pat.end());
1200 if (it == ids.begin()) union_taxa = taxa_set; else {
1201 for (int j = 0; j < union_taxa.length(); j++)
1202 if (taxa_set[j] == 1) union_taxa[j] = 1;
1203 }
1204 }
1205 PhyloTree *tree = new PhyloTree;
1206 tree->copyTree(this, union_taxa);
1207 return tree;
1208 }
1209
getMemoryRequired(size_t ncategory,bool full_mem)1210 uint64_t PhyloSuperTree::getMemoryRequired(size_t ncategory, bool full_mem) {
1211 // uint64_t mem_size = PhyloTree::getMemoryRequired(ncategory);
1212 // supertree does not need any memory for likelihood vectors!
1213 uint64_t mem_size = 0;
1214 for (iterator it = begin(); it != end(); it++)
1215 mem_size += (*it)->getMemoryRequired(ncategory, full_mem);
1216 return mem_size;
1217 }
1218
1219 // get memory requirement for ModelFinder
getMemoryRequiredThreaded(size_t ncategory,bool full_mem)1220 uint64_t PhyloSuperTree::getMemoryRequiredThreaded(size_t ncategory, bool full_mem) {
1221 // only get the largest k partitions (k=#threads)
1222 int threads = (params->num_threads != 0) ? params->num_threads : params->num_threads_max;
1223 threads = min(threads, countPhysicalCPUCores());
1224 threads = min(threads, (int)size());
1225
1226 // sort partition by computational cost for OpenMP effciency
1227 uint64_t *part_mem = new uint64_t[size()];
1228 int i;
1229 for (i = 0; i < size(); i++) {
1230 part_mem[i] = at(i)->getMemoryRequired(ncategory, full_mem);
1231 }
1232
1233 // sort partition memory in increasing order
1234 quicksort<uint64_t, int>(part_mem, 0, size()-1);
1235
1236 uint64_t mem = 0;
1237 for (i = size()-threads; i < size(); i++)
1238 mem += part_mem[i];
1239
1240 delete [] part_mem;
1241
1242 return mem;
1243 }
1244
countEmptyBranches(PhyloNode * node,PhyloNode * dad)1245 int PhyloSuperTree::countEmptyBranches(PhyloNode *node, PhyloNode *dad) {
1246 int count = 0;
1247 if (!node)
1248 node = (PhyloNode*)root;
1249
1250 FOR_NEIGHBOR_IT(node, dad, it) {
1251 SuperNeighbor *nei = (SuperNeighbor*)(*it);
1252 bool isempty = true;
1253 for (PhyloNeighborVec::iterator nit = nei->link_neighbors.begin(); nit != nei->link_neighbors.end(); nit++)
1254 if ((*nit)) {
1255 isempty = false;
1256 break;
1257 }
1258 if (isempty) count++;
1259 count += countEmptyBranches((PhyloNode*)(*it)->node, node);
1260 }
1261 return count;
1262 }
1263
1264 /** remove identical sequences from the tree */
removeIdenticalSeqs(Params & params)1265 void PhyloSuperTree::removeIdenticalSeqs(Params ¶ms) {
1266 IQTree::removeIdenticalSeqs(params);
1267 if (removed_seqs.empty()) return;
1268 // now synchronize aln
1269 int part = 0;
1270 SuperAlignment *saln = (SuperAlignment*)aln;
1271 for (iterator it = begin(); it != end(); it++, part++) {
1272 if (verbose_mode >= VB_MED) {
1273 cout << "Partition " << saln->partitions[part]->name << " " << saln->partitions[part]->getNSeq() <<
1274 " sequences from " << (*it)->aln->getNSeq() << " extracted" << endl;
1275 }
1276 (*it)->aln = saln->partitions[part];
1277 }
1278 if (verbose_mode >= VB_MED) {
1279 cout << "Reduced alignment has " << aln->getNSeq() << " sequences with " << getAlnNSite() << " sites and "
1280 << getAlnNPattern() << " patterns" << endl;
1281 }
1282
1283 }
1284
1285 /** reinsert identical sequences into the tree and reset original alignment */
reinsertIdenticalSeqs(Alignment * orig_aln)1286 void PhyloSuperTree::reinsertIdenticalSeqs(Alignment *orig_aln) {
1287 if (removed_seqs.empty()) return;
1288 IQTree::reinsertIdenticalSeqs(orig_aln);
1289
1290 // now synchronize aln
1291 int part = 0;
1292 for (iterator it = begin(); it != end(); it++, part++) {
1293 // (*it)->setAlignment(((SuperAlignment*)aln)->partitions[part]);
1294 (*it)->aln = ((SuperAlignment*)aln)->partitions[part];
1295 }
1296 mapTrees();
1297
1298
1299 }
1300
fixNegativeBranch(bool force,Node * node,Node * dad)1301 int PhyloSuperTree::fixNegativeBranch(bool force, Node *node, Node *dad) {
1302 mapTrees();
1303 int fixed = 0;
1304 for (iterator it = begin(); it != end(); it++) {
1305 (*it)->initializeAllPartialPars();
1306 (*it)->clearAllPartialLH();
1307 fixed += (*it)->fixNegativeBranch(force);
1308 (*it)->clearAllPartialLH();
1309 }
1310 computeBranchLengths();
1311 return fixed;
1312 }
1313
1314 /****************************************************************************
1315 ancestral sequence reconstruction
1316 ****************************************************************************/
1317
initMarginalAncestralState(ostream & out,bool & orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1318 void PhyloSuperTree::initMarginalAncestralState(ostream &out, bool &orig_kernel_nonrev, double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1319 orig_kernel_nonrev = params->kernel_nonrev;
1320 if (!orig_kernel_nonrev) {
1321 // switch to nonrev kernel to compute _pattern_lh_cat_state
1322 params->kernel_nonrev = true;
1323 setLikelihoodKernel(sse);
1324 clearAllPartialLH();
1325 }
1326
1327 size_t total_size = 0, total_ptn = 0;
1328
1329 bool mixed_data = false;
1330
1331 for (auto it = begin(); it != end(); it++) {
1332 size_t nptn = (*it)->aln->size();
1333 size_t nstates = (*it)->model->num_states;
1334 (*it)->_pattern_lh_cat_state = (*it)->newPartialLh();
1335 total_size += nptn*nstates;
1336 total_ptn += nptn;
1337 if (nstates != front()->model->num_states)
1338 mixed_data = true;
1339 }
1340
1341 ptn_ancestral_prob = aligned_alloc<double>(total_size);
1342 ptn_ancestral_seq = aligned_alloc<int>(total_ptn);
1343 }
1344
1345 /**
1346 compute ancestral sequence probability for an internal node by marginal reconstruction
1347 (Yang, Kumar and Nei 1995)
1348 @param dad_branch branch leading to an internal node where to obtain ancestral sequence
1349 @param dad dad of the target internal node
1350 @param[out] ptn_ancestral_prob pattern ancestral probability vector of dad_branch->node
1351 */
computeMarginalAncestralState(PhyloNeighbor * dad_branch,PhyloNode * dad,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1352 void PhyloSuperTree::computeMarginalAncestralState(PhyloNeighbor *dad_branch, PhyloNode *dad,
1353 double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1354
1355 SuperNeighbor *snei = (SuperNeighbor*)dad_branch;
1356 SuperNeighbor *snei_back = (SuperNeighbor*)dad_branch->node->findNeighbor(dad);
1357 int part = 0;
1358 for (auto it = begin(); it != end(); it++, part++) {
1359 size_t nptn = (*it)->getAlnNPattern();
1360 size_t nstates = (*it)->model->num_states;
1361 if (snei->link_neighbors[part]) {
1362 (*it)->computeMarginalAncestralState(snei->link_neighbors[part], (PhyloNode*)snei_back->link_neighbors[part]->node,
1363 ptn_ancestral_prob, ptn_ancestral_seq);
1364 } else {
1365 // branch does not exist in partition tree
1366 double eqprob = 1.0/nstates;
1367 for (size_t ptn = 0; ptn < nptn; ptn++) {
1368 for (size_t i = 0; i < nstates; i++)
1369 ptn_ancestral_prob[ptn*nstates+i] = eqprob;
1370 ptn_ancestral_seq[ptn] = (*it)->aln->STATE_UNKNOWN;
1371 }
1372 }
1373 ptn_ancestral_prob += nptn*nstates;
1374 ptn_ancestral_seq += nptn;
1375 }
1376 }
1377
writeMarginalAncestralState(ostream & out,PhyloNode * node,double * ptn_ancestral_prob,int * ptn_ancestral_seq)1378 void PhyloSuperTree::writeMarginalAncestralState(ostream &out, PhyloNode *node,
1379 double *ptn_ancestral_prob, int *ptn_ancestral_seq) {
1380 int part = 1;
1381 for (auto it = begin(); it != end(); it++, part++) {
1382 size_t site, nsites = (*it)->getAlnNSite(), nstates = (*it)->model->num_states;
1383 for (site = 0; site < nsites; site++) {
1384 int ptn = (*it)->aln->getPatternID(site);
1385 out << node->name << "\t" << part << "\t" << site+1 << "\t";
1386 out << (*it)->aln->convertStateBackStr(ptn_ancestral_seq[ptn]);
1387 double *state_prob = ptn_ancestral_prob + ptn*nstates;
1388 for (size_t j = 0; j < nstates; j++) {
1389 out << "\t" << state_prob[j];
1390 }
1391 out << endl;
1392 }
1393 size_t nptn = (*it)->getAlnNPattern();
1394 ptn_ancestral_prob += nptn*nstates;
1395 ptn_ancestral_seq += nptn;
1396 }
1397 }
1398
1399 /**
1400 end computing ancestral sequence probability for an internal node by marginal reconstruction
1401 */
endMarginalAncestralState(bool orig_kernel_nonrev,double * & ptn_ancestral_prob,int * & ptn_ancestral_seq)1402 void PhyloSuperTree::endMarginalAncestralState(bool orig_kernel_nonrev,
1403 double* &ptn_ancestral_prob, int* &ptn_ancestral_seq) {
1404 if (!orig_kernel_nonrev) {
1405 // switch back to REV kernel
1406 params->kernel_nonrev = orig_kernel_nonrev;
1407 setLikelihoodKernel(sse);
1408 clearAllPartialLH();
1409 }
1410 aligned_free(ptn_ancestral_seq);
1411 aligned_free(ptn_ancestral_prob);
1412
1413 for (auto it = rbegin(); it != rend(); it++) {
1414 aligned_free((*it)->_pattern_lh_cat_state);
1415 (*it)->_pattern_lh_cat_state = NULL;
1416 }
1417 }
1418
writeSiteLh(ostream & out,SiteLoglType wsl,int partid)1419 void PhyloSuperTree::writeSiteLh(ostream &out, SiteLoglType wsl, int partid) {
1420 int part = 1;
1421 for (auto it = begin(); it != end(); it++, part++)
1422 (*it)->writeSiteLh(out, wsl, part);
1423 }
1424
writeSiteRates(ostream & out,bool bayes,int partid)1425 void PhyloSuperTree::writeSiteRates(ostream &out, bool bayes, int partid) {
1426
1427 int part = 1;
1428 for (iterator it = begin(); it != end(); it++, part++) {
1429 (*it)->writeSiteRates(out, bayes, part);
1430 }
1431 }
1432
writeBranch(ostream & out,Node * node1,Node * node2)1433 void PhyloSuperTree::writeBranch(ostream &out, Node* node1, Node* node2) {
1434 SuperNeighbor *nei1 = (SuperNeighbor*)node1->findNeighbor(node2);
1435 StrVector taxnames;
1436 if (getNumTaxa(node1, node2) <= leafNum / 2)
1437 getTaxaName(taxnames, node1, node2);
1438 else
1439 getTaxaName(taxnames, node2, node1);
1440 out << nei1->id+1 << ",";
1441 bool first = true;
1442 for (int i = 0; i < taxnames.size(); i++)
1443 if (!taxnames[i].empty()) {
1444 if (!first) out << " ";
1445 out << taxnames[i];
1446 first = false;
1447 }
1448 out << "," << nei1->length;
1449 for (int part = 0; part != size(); part++) {
1450 bool present = true;
1451 FOR_NEIGHBOR_DECLARE(node1, NULL, it) {
1452 SuperNeighbor *nei = (SuperNeighbor*)(*it);
1453 if (!nei->link_neighbors[part])
1454 present = false;
1455 }
1456 FOR_NEIGHBOR(node2, NULL, it) {
1457 SuperNeighbor *nei = (SuperNeighbor*)(*it);
1458 if (!nei->link_neighbors[part])
1459 present = false;
1460 }
1461 out << ",";
1462 if (present)
1463 out << nei1->link_neighbors[part]->length;
1464 }
1465 }
1466
writeBranches(ostream & out)1467 void PhyloSuperTree::writeBranches(ostream &out) {
1468 NodeVector nodes1, nodes2;
1469 getBranches(nodes1, nodes2);
1470 int i;
1471 out << "ID,Taxa,Len";
1472 for (i = 0; i < size(); i++)
1473 out << "," << at(i)->aln->name;
1474 out << endl;
1475 for (i = 0; i < nodes1.size(); i++) {
1476 writeBranch(out, nodes1[i], nodes2[i]);
1477 out << endl;
1478 }
1479 }
1480
printBestPartitionParams(const char * filename)1481 void PhyloSuperTree::printBestPartitionParams(const char *filename) {
1482 try {
1483 ofstream out;
1484 out.exceptions(ios::failbit | ios::badbit);
1485 out.open(filename);
1486 out << "#nexus" << endl
1487 << "begin sets;" << endl;
1488 int part;
1489 SuperAlignment *saln = (SuperAlignment*)aln;
1490 for (part = 0; part < size(); part++) {
1491 string name = saln->partitions[part]->name;
1492 replace(name.begin(), name.end(), '+', '_');
1493 out << " charset " << name << " = ";
1494 if (!saln->partitions[part]->aln_file.empty()) out << saln->partitions[part]->aln_file << ": ";
1495 if (saln->partitions[part]->seq_type == SEQ_CODON)
1496 out << "CODON, ";
1497 string pos = saln->partitions[part]->position_spec;
1498 replace(pos.begin(), pos.end(), ',' , ' ');
1499 out << pos << ";" << endl;
1500 }
1501 out << " charpartition mymodels =" << endl;
1502 for (part = 0; part < size(); part++) {
1503 string name = saln->partitions[part]->name;
1504 replace(name.begin(), name.end(), '+', '_');
1505 if (part > 0) out << "," << endl;
1506 out << " " << at(part)->getModelNameParams() << ": " << name << "{" << at(part)->treeLength() << "}";
1507 }
1508 out << ";" << endl;
1509 out << "end;" << endl;
1510 out.close();
1511 cout << "Partition information was printed to " << filename << endl;
1512 } catch (ios::failure &) {
1513 outError(ERR_WRITE_OUTPUT, filename);
1514 }
1515 }
1516