1 //
2 //  phylosupertreeunlinked.cpp
3 //  tree
4 //
5 //  Created by Minh Bui on 2/5/18.
6 //
7 
8 #include "phylosupertreeunlinked.h"
9 #include "utils/MPIHelper.h"
10 #include "utils/timeutil.h"
11 
12 extern ostream cmust;
13 
PhyloSuperTreeUnlinked(SuperAlignment * alignment)14 PhyloSuperTreeUnlinked::PhyloSuperTreeUnlinked(SuperAlignment *alignment)
15 : PhyloSuperTree(alignment, true)
16 {
17 }
18 
19 
readTree(istream & in,bool & is_rooted)20 void PhyloSuperTreeUnlinked::readTree(istream &in, bool &is_rooted) {
21     for (iterator it = begin(); it != end(); it++) {
22         (*it)->rooted = Params::getInstance().is_rooted;
23         (*it)->readTree(in, (*it)->rooted);
24         is_rooted |= (*it)->rooted;
25     }
26 }
27 
setAlignment(Alignment * alignment)28 void PhyloSuperTreeUnlinked::setAlignment(Alignment *alignment) {
29     ASSERT(alignment->isSuperAlignment());
30     SuperAlignment *saln = (SuperAlignment*)alignment;
31     ASSERT(saln->partitions.size() == size());
32     for (int i = 0; i < size(); i++)
33         at(i)->setAlignment(saln->partitions[i]);
34 }
35 
36 /**
37  * setup all necessary parameters  (declared as virtual needed for phylosupertree)
38  */
initSettings(Params & params)39 void PhyloSuperTreeUnlinked::initSettings(Params& params) {
40     PhyloSuperTree::initSettings(params);
41     for (auto it = begin(); it != end(); it++)
42         ((IQTree*)(*it))->initSettings(params);
43 }
44 
mapTrees()45 void PhyloSuperTreeUnlinked::mapTrees() {
46     // do nothing here as partition trees are unlinked
47 }
48 
computeParsimonyTree(const char * out_prefix,Alignment * alignment,int * rand_stream)49 int PhyloSuperTreeUnlinked::computeParsimonyTree(const char *out_prefix, Alignment *alignment, int *rand_stream) {
50     SuperAlignment *saln = (SuperAlignment*)alignment;
51     int score = 0;
52     int i;
53     ASSERT(saln->partitions.size() == size());
54     for (i = 0; i < size(); i++) {
55         score += at(i)->computeParsimonyTree(NULL, saln->partitions[i], rand_stream);
56     }
57     if (out_prefix) {
58         string file_name = out_prefix;
59         file_name += ".parstree";
60         try {
61             ofstream out;
62             out.open(file_name.c_str());
63             for (i = 0; i < size(); i++) {
64                 at(i)->printTree(out, WT_NEWLINE);
65             }
66             out.close();
67         } catch (...) {
68             outError("Cannot write to file ", file_name);
69         }
70     }
71     return score;
72 }
73 
wrapperFixNegativeBranch(bool force_change)74 int PhyloSuperTreeUnlinked::wrapperFixNegativeBranch(bool force_change) {
75     // Initialize branch lengths for the parsimony tree
76     int numFixed = 0;
77     for (auto tree = begin(); tree != end(); tree++) {
78         numFixed += (*tree)->fixNegativeBranch(force_change);
79         (*tree)->resetCurScore();
80     }
81     return numFixed;
82 }
83 
isBifurcating(Node * node,Node * dad)84 bool PhyloSuperTreeUnlinked::isBifurcating(Node *node, Node *dad) {
85     for (auto it = begin(); it != end(); it++)
86         if (!(*it)->isBifurcating())
87             return false;
88     return true;
89 }
90 
getTreeString()91 string PhyloSuperTreeUnlinked::getTreeString() {
92     stringstream tree_stream;
93     for (iterator it = begin(); it != end(); it++)
94         (*it)->printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
95     return tree_stream.str();
96 }
97 
readTreeString(const string & tree_string)98 void PhyloSuperTreeUnlinked::readTreeString(const string &tree_string) {
99     stringstream str;
100     str << tree_string;
101     str.seekg(0, ios::beg);
102     for (iterator it = begin(); it != end(); it++) {
103         (*it)->freeNode();
104         (*it)->readTree(str, rooted);
105         (*it)->assignLeafNames();
106         (*it)->resetCurScore();
107     }
108 }
109 
saveCheckpoint()110 void PhyloSuperTreeUnlinked::saveCheckpoint() {
111     for (iterator it = begin(); it != end(); it++) {
112         checkpoint->startStruct((*it)->aln->name);
113         (*it)->saveCheckpoint();
114         checkpoint->endStruct();
115     }
116 }
117 
restoreCheckpoint()118 void PhyloSuperTreeUnlinked::restoreCheckpoint() {
119     for (iterator it = begin(); it != end(); it++) {
120         checkpoint->startStruct((*it)->aln->name);
121         (*it)->restoreCheckpoint();
122         checkpoint->endStruct();
123     }
124 }
125 
126 /**
127  * save branch lengths into a vector
128  */
saveBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)129 void PhyloSuperTreeUnlinked::saveBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
130     int totalBranchNum = 0;
131     iterator it;
132     for (it = begin(); it != end(); it++) {
133         totalBranchNum += (*it)->branchNum * (*it)->getMixlen();
134     }
135     lenvec.resize(startid + totalBranchNum);
136 
137     for (iterator it = begin(); it != end(); it++) {
138         (*it)->saveBranchLengths(lenvec, startid);
139         startid += (*it)->branchNum * (*it)->getMixlen();
140     }
141 }
142 /**
143  * restore branch lengths from a vector previously called with saveBranchLengths
144  */
restoreBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)145 void PhyloSuperTreeUnlinked::restoreBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
146     for (iterator it = begin(); it != end(); it++) {
147         (*it)->restoreBranchLengths(lenvec, startid);
148         startid += (*it)->branchNum * (*it)->getMixlen();
149     }
150 }
151 
setRootNode(const char * my_root,bool multi_taxa)152 void PhyloSuperTreeUnlinked::setRootNode(const char *my_root, bool multi_taxa) {
153     // DOES NOTHING
154 }
155 
computeBranchLengths()156 void PhyloSuperTreeUnlinked::computeBranchLengths() {
157     // DOES NOTHING
158 }
159 
printTree(ostream & out,int brtype)160 void PhyloSuperTreeUnlinked::printTree(ostream &out, int brtype) {
161     for (iterator tree = begin(); tree != end(); tree++)
162         (*tree)->printTree(out, brtype);
163 }
164 
printResultTree(string suffix)165 void PhyloSuperTreeUnlinked::printResultTree(string suffix) {
166     if (MPIHelper::getInstance().isWorker()) {
167         return;
168     }
169     if (params->suppress_output_flags & OUT_TREEFILE)
170         return;
171     string tree_file_name = params->out_prefix;
172     tree_file_name += ".treefile";
173     if (suffix.compare("") != 0) {
174         tree_file_name += "." + suffix;
175     }
176     ofstream out;
177     out.open(tree_file_name.c_str());
178     for (iterator tree = begin(); tree != end(); tree++)
179         (*tree)->printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
180     out.close();
181     if (verbose_mode >= VB_MED)
182         cout << "Best tree printed to " << tree_file_name << endl;
183 }
184 
treeLength(Node * node,Node * dad)185 double PhyloSuperTreeUnlinked::treeLength(Node *node, Node *dad) {
186     double len = 0.0;
187     for (iterator tree = begin(); tree != end(); tree++)
188         len += (*tree)->treeLength();
189     return len;
190 }
191 
treeLengthInternal(double epsilon,Node * node,Node * dad)192 double PhyloSuperTreeUnlinked::treeLengthInternal( double epsilon, Node *node, Node *dad) {
193     double len = 0.0;
194     for (iterator tree = begin(); tree != end(); tree++)
195         len += (*tree)->treeLengthInternal(epsilon);
196     return len;
197 }
198 
doNNISearch(bool write_info)199 pair<int, int> PhyloSuperTreeUnlinked::doNNISearch(bool write_info) {
200     int NNIs = 0, NNI_steps = 0;
201     double score = 0.0;
202 #pragma omp parallel for schedule(dynamic) num_threads(num_threads) if (num_threads > 1) reduction(+: NNIs, NNI_steps, score)
203     for (int i = 0; i < size(); i++) {
204         IQTree *part_tree = (IQTree*)at(part_order[i]);
205         Checkpoint *ckp = new Checkpoint;
206         getCheckpoint()->getSubCheckpoint(ckp, part_tree->aln->name);
207         part_tree->setCheckpoint(ckp);
208         auto num_NNIs = part_tree->doNNISearch(false);
209         NNIs += num_NNIs.first;
210         NNI_steps += num_NNIs.second;
211         score += part_tree->getCurScore();
212 #pragma omp critical
213         {
214         getCheckpoint()->putSubCheckpoint(ckp, part_tree->aln->name);
215         getCheckpoint()->dump();
216         }
217         delete ckp;
218         part_tree->setCheckpoint(getCheckpoint());
219     }
220 
221     setCurScore(score);
222     cout << "Log-likelihood: " << score << endl;
223     return std::make_pair(NNIs, NNI_steps);
224 }
225 
doTreeSearch()226 double PhyloSuperTreeUnlinked::doTreeSearch() {
227     double tree_lh = 0.0;
228     string bestTree;
229 
230     cout << "--------------------------------------------------------------------" << endl;
231     cout << "|                SEPARATE TREE SEARCH FOR PARTITIONS               |" << endl;
232     cout << "--------------------------------------------------------------------" << endl;
233 
234     if (part_order.empty())
235         computePartitionOrder();
236 
237     int saved_flag = params->suppress_output_flags;
238     params->suppress_output_flags |= OUT_TREEFILE + OUT_LOG;
239     VerboseMode saved_mode = verbose_mode;
240     verbose_mode = VB_QUIET;
241     bool saved_print_ufboot_trees = params->print_ufboot_trees;
242     params->print_ufboot_trees = false;
243 
244 #pragma omp parallel for schedule(dynamic) num_threads(num_threads) if (num_threads > 1) reduction(+: tree_lh)
245     for (int i = 0; i < size(); i++) {
246         IQTree *part_tree = (IQTree*)at(part_order[i]);
247         Checkpoint *ckp = new Checkpoint;
248         getCheckpoint()->getSubCheckpoint(ckp, part_tree->aln->name);
249         part_tree->setCheckpoint(ckp);
250         double score = part_tree->doTreeSearch();
251         tree_lh += score;
252 #pragma omp critical
253         {
254             getCheckpoint()->putSubCheckpoint(ckp, part_tree->aln->name);
255             getCheckpoint()->dump();
256             cmust << "Partition " << part_tree->aln->name
257                  << " / Iterations: " << part_tree->stop_rule.getCurIt()
258                  << " / LogL: " << score
259                  << " / Time: " << convert_time(getRealTime() - params->start_real_time)
260                  << endl;
261         }
262         delete ckp;
263         part_tree->setCheckpoint(getCheckpoint());
264     }
265 
266     verbose_mode = saved_mode;
267     params->suppress_output_flags= saved_flag;
268     params->print_ufboot_trees = saved_print_ufboot_trees;
269 
270     if (tree_lh < curScore)
271         cout << "BETTER TREE FOUND: " << tree_lh << endl;
272     curScore = tree_lh;
273 
274     bestTree = getTreeString();
275     addTreeToCandidateSet(bestTree, getCurScore(), false, MPIHelper::getInstance().getProcessID());
276     printResultTree();
277     intermediateTrees.update(bestTree, getCurScore());
278     candidateTrees.saveCheckpoint();
279     return curScore;
280 }
281 
summarizeBootstrap(Params & params)282 void PhyloSuperTreeUnlinked::summarizeBootstrap(Params &params) {
283     for (auto tree = begin(); tree != end(); tree++)
284         ((IQTree*)*tree)->summarizeBootstrap(params);
285 }
286 
writeUFBootTrees(Params & params)287 void PhyloSuperTreeUnlinked::writeUFBootTrees(Params &params) {
288     //    IntVector tree_weights;
289     int i, j;
290     string filename = params.out_prefix;
291     filename += ".ufboot";
292     ofstream out(filename.c_str());
293 
294     for (auto tree = begin(); tree != end(); tree++) {
295         MTreeSet trees;
296 
297         trees.init(((IQTree*)*tree)->boot_trees, (*tree)->rooted);
298         for (i = 0; i < trees.size(); i++) {
299             NodeVector taxa;
300             // change the taxa name from ID to real name
301             trees[i]->getOrderedTaxa(taxa);
302             for (j = 0; j < taxa.size(); j++)
303                 taxa[j]->name = aln->getSeqName(taxa[j]->id);
304             if (removed_seqs.size() > 0) {
305                 // reinsert removed seqs into each tree
306                 trees[i]->insertTaxa(removed_seqs, twin_seqs);
307             }
308             // now print to file
309             for (j = 0; j < trees.tree_weights[i]; j++)
310                 if (params.print_ufboot_trees == 1)
311                     trees[i]->printTree(out, WT_NEWLINE);
312                 else
313                     trees[i]->printTree(out, WT_NEWLINE + WT_BR_LEN);
314         }
315     }
316     cout << "UFBoot trees printed to " << filename << endl;
317     out.close();
318 }
319 
320 /**
321  Test all branches of the tree with aLRT SH-like interpretation
322  */
testAllBranches(int threshold,double best_score,double * pattern_lh,int reps,int lbp_reps,bool aLRT_test,bool aBayes_test,PhyloNode * node,PhyloNode * dad)323 int PhyloSuperTreeUnlinked::testAllBranches(int threshold, double best_score, double *pattern_lh,
324                             int reps, int lbp_reps, bool aLRT_test, bool aBayes_test,
325                             PhyloNode *node, PhyloNode *dad)
326 {
327     int id;
328     int num_low_support = 0;
329     double *ptn_lh[size()];
330     ptn_lh[0] = pattern_lh;
331     for (id = 1; id < size(); id++)
332         ptn_lh[id] = ptn_lh[id-1] + at(id-1)->getAlnNPattern();
333 #ifdef _OPENMP
334 #pragma omp parallel for reduction(+: num_low_support)
335 #endif
336     for (int id = 0; id < size(); id++) {
337         num_low_support += at(id)->testAllBranches(threshold, at(id)->getCurScore(), ptn_lh[id],
338                             reps, lbp_reps, aLRT_test, aBayes_test);
339     }
340     return num_low_support;
341 }
342 
testNumThreads()343 int PhyloSuperTreeUnlinked::testNumThreads() {
344 #ifdef _OPENMP
345     // unlinked partitions scales well with many cores
346     int bestProc = min(countPhysicalCPUCores(), params->num_threads_max);
347     bestProc = min(bestProc, (int)size());
348     cout << "BEST NUMBER OF THREADS: " << bestProc << endl << endl;
349     setNumThreads(bestProc);
350     return bestProc;
351 #else
352     return 1;
353 #endif
354 }
355