1 //
2 // phylosupertreeunlinked.cpp
3 // tree
4 //
5 // Created by Minh Bui on 2/5/18.
6 //
7
8 #include "phylosupertreeunlinked.h"
9 #include "utils/MPIHelper.h"
10 #include "utils/timeutil.h"
11
12 extern ostream cmust;
13
PhyloSuperTreeUnlinked(SuperAlignment * alignment)14 PhyloSuperTreeUnlinked::PhyloSuperTreeUnlinked(SuperAlignment *alignment)
15 : PhyloSuperTree(alignment, true)
16 {
17 }
18
19
readTree(istream & in,bool & is_rooted)20 void PhyloSuperTreeUnlinked::readTree(istream &in, bool &is_rooted) {
21 for (iterator it = begin(); it != end(); it++) {
22 (*it)->rooted = Params::getInstance().is_rooted;
23 (*it)->readTree(in, (*it)->rooted);
24 is_rooted |= (*it)->rooted;
25 }
26 }
27
setAlignment(Alignment * alignment)28 void PhyloSuperTreeUnlinked::setAlignment(Alignment *alignment) {
29 ASSERT(alignment->isSuperAlignment());
30 SuperAlignment *saln = (SuperAlignment*)alignment;
31 ASSERT(saln->partitions.size() == size());
32 for (int i = 0; i < size(); i++)
33 at(i)->setAlignment(saln->partitions[i]);
34 }
35
36 /**
37 * setup all necessary parameters (declared as virtual needed for phylosupertree)
38 */
initSettings(Params & params)39 void PhyloSuperTreeUnlinked::initSettings(Params& params) {
40 PhyloSuperTree::initSettings(params);
41 for (auto it = begin(); it != end(); it++)
42 ((IQTree*)(*it))->initSettings(params);
43 }
44
mapTrees()45 void PhyloSuperTreeUnlinked::mapTrees() {
46 // do nothing here as partition trees are unlinked
47 }
48
computeParsimonyTree(const char * out_prefix,Alignment * alignment,int * rand_stream)49 int PhyloSuperTreeUnlinked::computeParsimonyTree(const char *out_prefix, Alignment *alignment, int *rand_stream) {
50 SuperAlignment *saln = (SuperAlignment*)alignment;
51 int score = 0;
52 int i;
53 ASSERT(saln->partitions.size() == size());
54 for (i = 0; i < size(); i++) {
55 score += at(i)->computeParsimonyTree(NULL, saln->partitions[i], rand_stream);
56 }
57 if (out_prefix) {
58 string file_name = out_prefix;
59 file_name += ".parstree";
60 try {
61 ofstream out;
62 out.open(file_name.c_str());
63 for (i = 0; i < size(); i++) {
64 at(i)->printTree(out, WT_NEWLINE);
65 }
66 out.close();
67 } catch (...) {
68 outError("Cannot write to file ", file_name);
69 }
70 }
71 return score;
72 }
73
wrapperFixNegativeBranch(bool force_change)74 int PhyloSuperTreeUnlinked::wrapperFixNegativeBranch(bool force_change) {
75 // Initialize branch lengths for the parsimony tree
76 int numFixed = 0;
77 for (auto tree = begin(); tree != end(); tree++) {
78 numFixed += (*tree)->fixNegativeBranch(force_change);
79 (*tree)->resetCurScore();
80 }
81 return numFixed;
82 }
83
isBifurcating(Node * node,Node * dad)84 bool PhyloSuperTreeUnlinked::isBifurcating(Node *node, Node *dad) {
85 for (auto it = begin(); it != end(); it++)
86 if (!(*it)->isBifurcating())
87 return false;
88 return true;
89 }
90
getTreeString()91 string PhyloSuperTreeUnlinked::getTreeString() {
92 stringstream tree_stream;
93 for (iterator it = begin(); it != end(); it++)
94 (*it)->printTree(tree_stream, WT_TAXON_ID + WT_BR_LEN + WT_SORT_TAXA);
95 return tree_stream.str();
96 }
97
readTreeString(const string & tree_string)98 void PhyloSuperTreeUnlinked::readTreeString(const string &tree_string) {
99 stringstream str;
100 str << tree_string;
101 str.seekg(0, ios::beg);
102 for (iterator it = begin(); it != end(); it++) {
103 (*it)->freeNode();
104 (*it)->readTree(str, rooted);
105 (*it)->assignLeafNames();
106 (*it)->resetCurScore();
107 }
108 }
109
saveCheckpoint()110 void PhyloSuperTreeUnlinked::saveCheckpoint() {
111 for (iterator it = begin(); it != end(); it++) {
112 checkpoint->startStruct((*it)->aln->name);
113 (*it)->saveCheckpoint();
114 checkpoint->endStruct();
115 }
116 }
117
restoreCheckpoint()118 void PhyloSuperTreeUnlinked::restoreCheckpoint() {
119 for (iterator it = begin(); it != end(); it++) {
120 checkpoint->startStruct((*it)->aln->name);
121 (*it)->restoreCheckpoint();
122 checkpoint->endStruct();
123 }
124 }
125
126 /**
127 * save branch lengths into a vector
128 */
saveBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)129 void PhyloSuperTreeUnlinked::saveBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
130 int totalBranchNum = 0;
131 iterator it;
132 for (it = begin(); it != end(); it++) {
133 totalBranchNum += (*it)->branchNum * (*it)->getMixlen();
134 }
135 lenvec.resize(startid + totalBranchNum);
136
137 for (iterator it = begin(); it != end(); it++) {
138 (*it)->saveBranchLengths(lenvec, startid);
139 startid += (*it)->branchNum * (*it)->getMixlen();
140 }
141 }
142 /**
143 * restore branch lengths from a vector previously called with saveBranchLengths
144 */
restoreBranchLengths(DoubleVector & lenvec,int startid,PhyloNode * node,PhyloNode * dad)145 void PhyloSuperTreeUnlinked::restoreBranchLengths(DoubleVector &lenvec, int startid, PhyloNode *node, PhyloNode *dad) {
146 for (iterator it = begin(); it != end(); it++) {
147 (*it)->restoreBranchLengths(lenvec, startid);
148 startid += (*it)->branchNum * (*it)->getMixlen();
149 }
150 }
151
setRootNode(const char * my_root,bool multi_taxa)152 void PhyloSuperTreeUnlinked::setRootNode(const char *my_root, bool multi_taxa) {
153 // DOES NOTHING
154 }
155
computeBranchLengths()156 void PhyloSuperTreeUnlinked::computeBranchLengths() {
157 // DOES NOTHING
158 }
159
printTree(ostream & out,int brtype)160 void PhyloSuperTreeUnlinked::printTree(ostream &out, int brtype) {
161 for (iterator tree = begin(); tree != end(); tree++)
162 (*tree)->printTree(out, brtype);
163 }
164
printResultTree(string suffix)165 void PhyloSuperTreeUnlinked::printResultTree(string suffix) {
166 if (MPIHelper::getInstance().isWorker()) {
167 return;
168 }
169 if (params->suppress_output_flags & OUT_TREEFILE)
170 return;
171 string tree_file_name = params->out_prefix;
172 tree_file_name += ".treefile";
173 if (suffix.compare("") != 0) {
174 tree_file_name += "." + suffix;
175 }
176 ofstream out;
177 out.open(tree_file_name.c_str());
178 for (iterator tree = begin(); tree != end(); tree++)
179 (*tree)->printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
180 out.close();
181 if (verbose_mode >= VB_MED)
182 cout << "Best tree printed to " << tree_file_name << endl;
183 }
184
treeLength(Node * node,Node * dad)185 double PhyloSuperTreeUnlinked::treeLength(Node *node, Node *dad) {
186 double len = 0.0;
187 for (iterator tree = begin(); tree != end(); tree++)
188 len += (*tree)->treeLength();
189 return len;
190 }
191
treeLengthInternal(double epsilon,Node * node,Node * dad)192 double PhyloSuperTreeUnlinked::treeLengthInternal( double epsilon, Node *node, Node *dad) {
193 double len = 0.0;
194 for (iterator tree = begin(); tree != end(); tree++)
195 len += (*tree)->treeLengthInternal(epsilon);
196 return len;
197 }
198
doNNISearch(bool write_info)199 pair<int, int> PhyloSuperTreeUnlinked::doNNISearch(bool write_info) {
200 int NNIs = 0, NNI_steps = 0;
201 double score = 0.0;
202 #pragma omp parallel for schedule(dynamic) num_threads(num_threads) if (num_threads > 1) reduction(+: NNIs, NNI_steps, score)
203 for (int i = 0; i < size(); i++) {
204 IQTree *part_tree = (IQTree*)at(part_order[i]);
205 Checkpoint *ckp = new Checkpoint;
206 getCheckpoint()->getSubCheckpoint(ckp, part_tree->aln->name);
207 part_tree->setCheckpoint(ckp);
208 auto num_NNIs = part_tree->doNNISearch(false);
209 NNIs += num_NNIs.first;
210 NNI_steps += num_NNIs.second;
211 score += part_tree->getCurScore();
212 #pragma omp critical
213 {
214 getCheckpoint()->putSubCheckpoint(ckp, part_tree->aln->name);
215 getCheckpoint()->dump();
216 }
217 delete ckp;
218 part_tree->setCheckpoint(getCheckpoint());
219 }
220
221 setCurScore(score);
222 cout << "Log-likelihood: " << score << endl;
223 return std::make_pair(NNIs, NNI_steps);
224 }
225
doTreeSearch()226 double PhyloSuperTreeUnlinked::doTreeSearch() {
227 double tree_lh = 0.0;
228 string bestTree;
229
230 cout << "--------------------------------------------------------------------" << endl;
231 cout << "| SEPARATE TREE SEARCH FOR PARTITIONS |" << endl;
232 cout << "--------------------------------------------------------------------" << endl;
233
234 if (part_order.empty())
235 computePartitionOrder();
236
237 int saved_flag = params->suppress_output_flags;
238 params->suppress_output_flags |= OUT_TREEFILE + OUT_LOG;
239 VerboseMode saved_mode = verbose_mode;
240 verbose_mode = VB_QUIET;
241 bool saved_print_ufboot_trees = params->print_ufboot_trees;
242 params->print_ufboot_trees = false;
243
244 #pragma omp parallel for schedule(dynamic) num_threads(num_threads) if (num_threads > 1) reduction(+: tree_lh)
245 for (int i = 0; i < size(); i++) {
246 IQTree *part_tree = (IQTree*)at(part_order[i]);
247 Checkpoint *ckp = new Checkpoint;
248 getCheckpoint()->getSubCheckpoint(ckp, part_tree->aln->name);
249 part_tree->setCheckpoint(ckp);
250 double score = part_tree->doTreeSearch();
251 tree_lh += score;
252 #pragma omp critical
253 {
254 getCheckpoint()->putSubCheckpoint(ckp, part_tree->aln->name);
255 getCheckpoint()->dump();
256 cmust << "Partition " << part_tree->aln->name
257 << " / Iterations: " << part_tree->stop_rule.getCurIt()
258 << " / LogL: " << score
259 << " / Time: " << convert_time(getRealTime() - params->start_real_time)
260 << endl;
261 }
262 delete ckp;
263 part_tree->setCheckpoint(getCheckpoint());
264 }
265
266 verbose_mode = saved_mode;
267 params->suppress_output_flags= saved_flag;
268 params->print_ufboot_trees = saved_print_ufboot_trees;
269
270 if (tree_lh < curScore)
271 cout << "BETTER TREE FOUND: " << tree_lh << endl;
272 curScore = tree_lh;
273
274 bestTree = getTreeString();
275 addTreeToCandidateSet(bestTree, getCurScore(), false, MPIHelper::getInstance().getProcessID());
276 printResultTree();
277 intermediateTrees.update(bestTree, getCurScore());
278 candidateTrees.saveCheckpoint();
279 return curScore;
280 }
281
summarizeBootstrap(Params & params)282 void PhyloSuperTreeUnlinked::summarizeBootstrap(Params ¶ms) {
283 for (auto tree = begin(); tree != end(); tree++)
284 ((IQTree*)*tree)->summarizeBootstrap(params);
285 }
286
writeUFBootTrees(Params & params)287 void PhyloSuperTreeUnlinked::writeUFBootTrees(Params ¶ms) {
288 // IntVector tree_weights;
289 int i, j;
290 string filename = params.out_prefix;
291 filename += ".ufboot";
292 ofstream out(filename.c_str());
293
294 for (auto tree = begin(); tree != end(); tree++) {
295 MTreeSet trees;
296
297 trees.init(((IQTree*)*tree)->boot_trees, (*tree)->rooted);
298 for (i = 0; i < trees.size(); i++) {
299 NodeVector taxa;
300 // change the taxa name from ID to real name
301 trees[i]->getOrderedTaxa(taxa);
302 for (j = 0; j < taxa.size(); j++)
303 taxa[j]->name = aln->getSeqName(taxa[j]->id);
304 if (removed_seqs.size() > 0) {
305 // reinsert removed seqs into each tree
306 trees[i]->insertTaxa(removed_seqs, twin_seqs);
307 }
308 // now print to file
309 for (j = 0; j < trees.tree_weights[i]; j++)
310 if (params.print_ufboot_trees == 1)
311 trees[i]->printTree(out, WT_NEWLINE);
312 else
313 trees[i]->printTree(out, WT_NEWLINE + WT_BR_LEN);
314 }
315 }
316 cout << "UFBoot trees printed to " << filename << endl;
317 out.close();
318 }
319
320 /**
321 Test all branches of the tree with aLRT SH-like interpretation
322 */
testAllBranches(int threshold,double best_score,double * pattern_lh,int reps,int lbp_reps,bool aLRT_test,bool aBayes_test,PhyloNode * node,PhyloNode * dad)323 int PhyloSuperTreeUnlinked::testAllBranches(int threshold, double best_score, double *pattern_lh,
324 int reps, int lbp_reps, bool aLRT_test, bool aBayes_test,
325 PhyloNode *node, PhyloNode *dad)
326 {
327 int id;
328 int num_low_support = 0;
329 double *ptn_lh[size()];
330 ptn_lh[0] = pattern_lh;
331 for (id = 1; id < size(); id++)
332 ptn_lh[id] = ptn_lh[id-1] + at(id-1)->getAlnNPattern();
333 #ifdef _OPENMP
334 #pragma omp parallel for reduction(+: num_low_support)
335 #endif
336 for (int id = 0; id < size(); id++) {
337 num_low_support += at(id)->testAllBranches(threshold, at(id)->getCurScore(), ptn_lh[id],
338 reps, lbp_reps, aLRT_test, aBayes_test);
339 }
340 return num_low_support;
341 }
342
testNumThreads()343 int PhyloSuperTreeUnlinked::testNumThreads() {
344 #ifdef _OPENMP
345 // unlinked partitions scales well with many cores
346 int bestProc = min(countPhysicalCPUCores(), params->num_threads_max);
347 bestProc = min(bestProc, (int)size());
348 cout << "BEST NUMBER OF THREADS: " << bestProc << endl << endl;
349 setNumThreads(bestProc);
350 return bestProc;
351 #else
352 return 1;
353 #endif
354 }
355