1 /***************************************************************************
2 * Copyright (C) 2009-2015 by *
3 * BUI Quang Minh <minh.bui@univie.ac.at> *
4 * Lam-Tung Nguyen <nltung@gmail.com> *
5 * *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
21 ***************************************************************************/
22 #include "iqtree.h"
23 #include "phylotreemixlen.h"
24 #include "phylosupertree.h"
25 #include "phylosupertreeplen.h"
26 #include "model/partitionmodelplen.h"
27 #include "mexttree.h"
28 #include "utils/timeutil.h"
29 #include "model/modelmarkov.h"
30 #include "model/rategamma.h"
31 //#include "phylotreemixlen.h"
32 //#include "model/modelfactorymixlen.h"
33 #include <numeric>
34 #include "utils/tools.h"
35 #include "utils/MPIHelper.h"
36 #include "utils/pllnni.h"
37
38 Params *globalParams;
39 Alignment *globalAlignment;
40 extern StringIntMap pllTreeCounter;
41
IQTree()42 IQTree::IQTree() : PhyloTree() {
43 IQTree::init();
44 }
45
init()46 void IQTree::init() {
47 // PhyloTree::init();
48 k_represent = 0;
49 k_delete = k_delete_min = k_delete_max = k_delete_stay = 0;
50 dist_matrix = NULL;
51 var_matrix = NULL;
52 // curScore = 0.0; // Current score of the tree
53 cur_pars_score = -1;
54 // enable_parsimony = false;
55 estimate_nni_cutoff = false;
56 nni_cutoff = -1e6;
57 nni_sort = false;
58 testNNI = false;
59 // print_tree_lh = false;
60 // write_intermediate_trees = 0;
61 // max_candidate_trees = 0;
62 logl_cutoff = 0.0;
63 len_scale = 10000;
64 // save_all_br_lens = false;
65 duplication_counter = 0;
66 //boot_splits = new SplitGraph;
67 pll2iqtree_pattern_index = NULL;
68
69 treels_name = Params::getInstance().out_prefix;
70 treels_name += ".treels";
71 out_lh_file = Params::getInstance().out_prefix;
72 out_lh_file += ".treelh";
73 site_lh_file = Params::getInstance().out_prefix;
74 site_lh_file += ".sitelh";
75
76 if (Params::getInstance().print_tree_lh) {
77 out_treelh.open(out_lh_file.c_str());
78 out_sitelh.open(site_lh_file.c_str());
79 }
80
81 if (Params::getInstance().write_intermediate_trees)
82 out_treels.open(treels_name.c_str());
83 on_refine_btree = false;
84 contree_rfdist = -1;
85 boot_consense_logl = 0.0;
86
87 }
88
IQTree(Alignment * aln)89 IQTree::IQTree(Alignment *aln) : PhyloTree(aln) {
90 IQTree::init();
91 }
92
setCheckpoint(Checkpoint * checkpoint)93 void IQTree::setCheckpoint(Checkpoint *checkpoint) {
94 PhyloTree::setCheckpoint(checkpoint);
95 stop_rule.setCheckpoint(checkpoint);
96 candidateTrees.setCheckpoint(checkpoint);
97 for (auto it = boot_splits.begin(); it != boot_splits.end(); it++)
98 (*it)->setCheckpoint(checkpoint);
99 }
100
saveUFBoot(Checkpoint * checkpoint)101 void IQTree::saveUFBoot(Checkpoint *checkpoint) {
102 checkpoint->startStruct("UFBoot");
103 if (MPIHelper::getInstance().isWorker()) {
104 CKP_SAVE(sample_start);
105 CKP_SAVE(sample_end);
106 checkpoint->startList(boot_samples.size());
107 checkpoint->setListElement(sample_start-1);
108 for (int id = sample_start; id != sample_end; id++) {
109 checkpoint->addListElement();
110 stringstream ss;
111 ss.precision(10);
112 ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
113 checkpoint->put("", ss.str());
114 }
115 checkpoint->endList();
116 } else {
117 CKP_SAVE(logl_cutoff);
118 int boot_splits_size = boot_splits.size();
119 CKP_SAVE(boot_splits_size);
120 checkpoint->startList(boot_samples.size());
121 for (int id = 0; id != boot_samples.size(); id++) {
122 checkpoint->addListElement();
123 stringstream ss;
124 ss.precision(10);
125 ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
126 checkpoint->put("", ss.str());
127 }
128 checkpoint->endList();
129 }
130 checkpoint->endStruct();
131 }
132
saveCheckpoint()133 void IQTree::saveCheckpoint() {
134 stop_rule.saveCheckpoint();
135 candidateTrees.saveCheckpoint();
136
137 if (boot_samples.size() > 0 && !boot_trees.front().empty()) {
138 saveUFBoot(checkpoint);
139 // boot_splits
140 int id = 0;
141 for (vector<SplitGraph*>::iterator sit = boot_splits.begin(); sit != boot_splits.end(); sit++, id++) {
142 checkpoint->startStruct("UFBootSplit" + convertIntToString(id));
143 (*sit)->saveCheckpoint();
144 checkpoint->endStruct();
145 }
146 }
147
148 PhyloTree::saveCheckpoint();
149 CKP_SAVE(boot_consense_logl);
150 CKP_SAVE(contree_rfdist);
151 }
152
restoreUFBoot(Checkpoint * checkpoint)153 void IQTree::restoreUFBoot(Checkpoint *checkpoint) {
154 checkpoint->startStruct("UFBoot");
155 // save boot_samples and boot_trees
156 int id;
157 checkpoint->startList(params->gbo_replicates);
158 int sample_start, sample_end;
159 CKP_RESTORE(sample_start);
160 CKP_RESTORE(sample_end);
161 checkpoint->setListElement(sample_start-1);
162 for (id = sample_start; id != sample_end; id++) {
163 checkpoint->addListElement();
164 string str;
165 checkpoint->getString("", str);
166 ASSERT(!str.empty());
167 stringstream ss(str);
168 ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
169 }
170 checkpoint->endList();
171 checkpoint->endStruct();
172 }
173
restoreCheckpoint()174 void IQTree::restoreCheckpoint() {
175 PhyloTree::restoreCheckpoint();
176 stop_rule.restoreCheckpoint();
177 candidateTrees.restoreCheckpoint();
178
179 if (params->gbo_replicates > 0 && checkpoint->hasKeyPrefix("UFBoot")) {
180 checkpoint->startStruct("UFBoot");
181 // CKP_RESTORE(max_candidate_trees);
182 CKP_RESTORE(logl_cutoff);
183 // save boot_samples and boot_trees
184 int id = 0;
185 checkpoint->startList(params->gbo_replicates);
186 boot_trees.resize(params->gbo_replicates);
187 boot_logl.resize(params->gbo_replicates);
188 boot_orig_logl.resize(params->gbo_replicates);
189 boot_counts.resize(params->gbo_replicates);
190 for (id = 0; id < params->gbo_replicates; id++) {
191 checkpoint->addListElement();
192 string str;
193 checkpoint->getString("", str);
194 stringstream ss(str);
195 ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
196 }
197 checkpoint->endList();
198 int boot_splits_size = 0;
199 CKP_RESTORE(boot_splits_size);
200 checkpoint->endStruct();
201
202 // boot_splits
203 for (id = 0; id < boot_splits_size; id++) {
204 checkpoint->startStruct("UFBootSplit" + convertIntToString(id));
205 SplitGraph *sg = new SplitGraph;
206 sg->createBlocks();
207 StrVector taxname;
208 getTaxaName(taxname);
209 for (auto its = taxname.begin(); its != taxname.end(); its++)
210 sg->getTaxa()->AddTaxonLabel(NxsString(its->c_str()));
211 sg->setCheckpoint(checkpoint);
212 sg->restoreCheckpoint();
213 boot_splits.push_back(sg);
214 checkpoint->endStruct();
215 }
216 }
217
218 CKP_RESTORE(boot_consense_logl);
219 CKP_RESTORE(contree_rfdist);
220 }
221
initSettings(Params & params)222 void IQTree::initSettings(Params ¶ms) {
223
224 searchinfo.nni_type = params.nni_type;
225 optimize_by_newton = params.optimize_by_newton;
226 setLikelihoodKernel(params.SSE);
227 if (num_threads > 0)
228 setNumThreads(num_threads);
229 else
230 setNumThreads(params.num_threads);
231 candidateTrees.init(this->aln, 200);
232 intermediateTrees.init(this->aln, 200000);
233
234 if (params.min_iterations == -1) {
235 if (!params.gbo_replicates) {
236 if (params.stop_condition == SC_UNSUCCESS_ITERATION) {
237 params.min_iterations = aln->getNSeq() * 100;
238 } else if (aln->getNSeq() < 100) {
239 params.min_iterations = 200;
240 } else {
241 params.min_iterations = aln->getNSeq() * 2;
242 }
243 if (params.iteration_multiple > 1)
244 params.min_iterations = aln->getNSeq() * params.iteration_multiple;
245 } else {
246 params.min_iterations = 100;
247 }
248 }
249 if (!params.treeset_file.empty() && params.min_iterations == -1) {
250 params.min_iterations = 1;
251 params.stop_condition = SC_FIXED_ITERATION;
252 params.numInitTrees = 1;
253 }
254 if (params.gbo_replicates)
255 params.max_iterations = max(params.max_iterations, max(params.min_iterations, params.step_iterations));
256
257 k_represent = params.k_representative;
258
259 if (params.p_delete == -1.0) {
260 if (aln->getNSeq() < 4)
261 params.p_delete = 0.0; // delete nothing
262 else if (aln->getNSeq() == 4)
263 params.p_delete = 0.25; // just delete 1 leaf
264 else if (aln->getNSeq() == 5)
265 params.p_delete = 0.4; // just delete 2 leaves
266 else if (aln->getNSeq() < 51)
267 params.p_delete = 0.5;
268 else if (aln->getNSeq() < 100)
269 params.p_delete = 0.3;
270 else if (aln->getNSeq() < 200)
271 params.p_delete = 0.2;
272 else if (aln->getNSeq() < 400)
273 params.p_delete = 0.1;
274 else
275 params.p_delete = 0.05;
276 }
277 //tree.setProbDelete(params.p_delete);
278 if (params.p_delete != -1.0) {
279 k_delete = k_delete_min = k_delete_max = ceil(params.p_delete * leafNum);
280 } else {
281 k_delete = k_delete_min = 10;
282 k_delete_max = leafNum / 2;
283 if (k_delete_max > 100)
284 k_delete_max = 100;
285 if (k_delete_max < 20)
286 k_delete_max = 20;
287 k_delete_stay = ceil(leafNum / k_delete);
288 }
289
290 //tree.setIQPIterations(params.stop_condition, params.stop_confidence, params.min_iterations, params.max_iterations);
291
292 stop_rule.initialize(params);
293
294 //tree.setIQPAssessQuartet(params.iqp_assess_quartet);
295 iqp_assess_quartet = params.iqp_assess_quartet;
296 estimate_nni_cutoff = params.estimate_nni_cutoff;
297 nni_cutoff = params.nni_cutoff;
298 nni_sort = params.nni_sort;
299 testNNI = params.testNNI;
300
301 globalParams = ¶ms;
302 globalAlignment = aln;
303
304 //write_intermediate_trees = params.write_intermediate_trees;
305
306 if (Params::getInstance().write_intermediate_trees > 2 || params.gbo_replicates > 0) {
307 save_all_trees = 1;
308 }
309 if (params.gbo_replicates > 0) {
310 if (params.iqp_assess_quartet != IQP_BOOTSTRAP) {
311 save_all_trees = 2;
312 }
313 }
314 // if (params.gbo_replicates > 0 && params.do_compression)
315 // save_all_br_lens = true;
316 // print_tree_lh = params.print_tree_lh;
317 // max_candidate_trees = params.max_candidate_trees;
318 // if (max_candidate_trees == 0)
319 // max_candidate_trees = aln->getNSeq() * params.step_iterations;
320 setRootNode(params.root);
321
322 size_t i;
323
324 if (params.online_bootstrap && params.gbo_replicates > 0 && !isSuperTreeUnlinked()) {
325 if (aln->getNSeq() < 4)
326 outError("It makes no sense to perform bootstrap with less than 4 sequences.");
327
328 string bootaln_name = params.out_prefix;
329 bootaln_name += ".bootaln";
330 if (params.print_bootaln) {
331 ofstream bootalnout;
332 bootalnout.open(bootaln_name.c_str());
333 bootalnout.close();
334 }
335
336 // 2015-12-17: initialize random stream for creating bootstrap samples
337 // mainly so that checkpointing does not need to save bootstrap samples
338 int *saved_randstream = randstream;
339 init_random(params.ran_seed);
340
341 // cout << "Generating " << params.gbo_replicates << " samples for ultrafast "
342 // << RESAMPLE_NAME << " (seed: " << params.ran_seed << ")..." << endl;
343 // allocate memory for boot_samples
344 boot_samples.resize(params.gbo_replicates);
345 sample_start = 0;
346 sample_end = boot_samples.size();
347
348 // compute the sample_start and sample_end
349 if (MPIHelper::getInstance().getNumProcesses() > 1) {
350 int num_samples = boot_samples.size() / MPIHelper::getInstance().getNumProcesses();
351 if (boot_samples.size() % MPIHelper::getInstance().getNumProcesses() != 0)
352 num_samples++;
353 sample_start = MPIHelper::getInstance().getProcessID() * num_samples;
354 sample_end = sample_start + num_samples;
355 if (sample_end > boot_samples.size())
356 sample_end = boot_samples.size();
357 }
358
359 size_t orig_nptn = getAlnNPattern();
360 #ifdef BOOT_VAL_FLOAT
361 size_t nptn = get_safe_upper_limit_float(orig_nptn);
362 #else
363 size_t nptn = get_safe_upper_limit(orig_nptn);
364 #endif
365 BootValType *mem = aligned_alloc<BootValType>(nptn * (size_t)(params.gbo_replicates));
366 memset(mem, 0, nptn * (size_t)(params.gbo_replicates) * sizeof(BootValType));
367 for (i = 0; i < params.gbo_replicates; i++)
368 boot_samples[i] = mem + i*nptn;
369
370 if (boot_trees.empty()) {
371 boot_logl.resize(params.gbo_replicates, -DBL_MAX);
372 boot_orig_logl.resize(params.gbo_replicates, -DBL_MAX);
373 boot_trees.resize(params.gbo_replicates, "");
374 boot_counts.resize(params.gbo_replicates, 0);
375 } else {
376 cout << "CHECKPOINT: " << boot_trees.size() << " UFBoot trees and " << boot_splits.size() << " UFBootSplits restored" << endl;
377 }
378 VerboseMode saved_mode = verbose_mode;
379 verbose_mode = VB_QUIET;
380 for (i = 0; i < params.gbo_replicates; i++) {
381 if (params.print_bootaln) {
382 Alignment* bootstrap_alignment;
383 if (aln->isSuperAlignment())
384 bootstrap_alignment = new SuperAlignment;
385 else
386 bootstrap_alignment = new Alignment;
387 IntVector this_sample;
388 bootstrap_alignment->createBootstrapAlignment(aln, &this_sample, params.bootstrap_spec);
389 for (size_t j = 0; j < orig_nptn; j++)
390 boot_samples[i][j] = this_sample[j];
391 bootstrap_alignment->printAlignment(params.aln_output_format, bootaln_name.c_str(), true);
392 delete bootstrap_alignment;
393 } else {
394 IntVector this_sample;
395 aln->createBootstrapAlignment(this_sample, params.bootstrap_spec);
396 for (size_t j = 0; j < orig_nptn; j++)
397 boot_samples[i][j] = this_sample[j];
398 }
399 }
400 verbose_mode = saved_mode;
401 if (params.print_bootaln) {
402 cout << "Bootstrap alignments printed to " << bootaln_name << endl;
403 }
404
405 // cout << "Max candidate trees (tau): " << max_candidate_trees << endl;
406
407 // restore randstream
408 finish_random();
409 randstream = saved_randstream;
410
411 // Diep: initialize data members to be used in the Refinement Step
412 on_refine_btree = false;
413 saved_aln_on_refine_btree = NULL;
414 if(params.ufboot2corr){
415 boot_samples_int.resize(params.gbo_replicates);
416 for (size_t i = 0; i < params.gbo_replicates; i++) {
417 boot_samples_int[i].resize(nptn, 0);
418 for (size_t j = 0; j < orig_nptn; j++)
419 boot_samples_int[i][j] = boot_samples[i][j];
420 }
421 }
422
423 }
424
425 if (params.root_state) {
426 if (strlen(params.root_state) != 1)
427 outError("Root state must have exactly 1 character");
428 root_state = aln->convertState(params.root_state[0]);
429 if (root_state < 0 || root_state >= aln->num_states)
430 outError("Invalid root state");
431 }
432 }
433
~IQTree()434 IQTree::~IQTree() {
435 //if (bonus_values)
436 //delete bonus_values;
437 //bonus_values = NULL;
438
439 for (vector<SplitGraph*>::reverse_iterator it2 = boot_splits.rbegin(); it2 != boot_splits.rend(); it2++)
440 delete (*it2);
441 boot_splits.clear();
442 //if (boot_splits) delete boot_splits;
443
444 if (!boot_samples.empty()) {
445 aligned_free(boot_samples[0]); // free memory
446 boot_samples.clear();
447 }
448 }
449
450 extern const char *aa_model_names_rax[];
451
createPLLPartition(Params & params,ostream & pllPartitionFileHandle)452 void IQTree::createPLLPartition(Params ¶ms, ostream &pllPartitionFileHandle) {
453 if (isSuperTree()) {
454 PhyloSuperTree *siqtree = (PhyloSuperTree*) this;
455 // additional check for PLL hard limit
456 if (params.pll) {
457 if (siqtree->size() > PLL_NUM_BRANCHES)
458 outError("Number of partitions exceeds PLL limit, please increase PLL_NUM_BRANCHES constant in pll.h");
459 int i = 0;
460 int startPos = 1;
461
462 // prepare proper partition file
463 for (PhyloSuperTree::iterator it = siqtree->begin(); it != siqtree->end(); it++) {
464 i++;
465 int curLen = ((*it)->aln->seq_type == SEQ_CODON) ? (*it)->getAlnNSite()*3 : (*it)->getAlnNSite();
466 if ((*it)->aln->seq_type == SEQ_DNA || (*it)->aln->seq_type == SEQ_CODON) {
467 pllPartitionFileHandle << "DNA";
468 } else if ((*it)->aln->seq_type == SEQ_PROTEIN) {
469 if ((*it)->aln->model_name != "" && (*it)->aln->model_name.substr(0, 4) != "TEST" && (*it)->aln->model_name.substr(0, 2) != "MF") {
470 string modelStr = (*it)->aln->model_name.
471 substr(0, (*it)->aln->model_name.find_first_of("+{"));
472 if (modelStr == "LG4")
473 modelStr = "LG4M";
474 bool name_ok = false;
475 for (int j = 0; j < 18; j++)
476 if (modelStr == aa_model_names_rax[j]) {
477 name_ok = true;
478 break;
479 }
480 if (name_ok)
481 pllPartitionFileHandle << modelStr;
482 else
483 pllPartitionFileHandle << "WAG";
484 } else {
485 pllPartitionFileHandle << "WAG";
486 }
487 } else
488 outError("PLL only works with DNA/protein alignments");
489 pllPartitionFileHandle << ", p" << i << " = " << startPos << "-" << startPos + curLen - 1 << endl;
490 startPos = startPos + curLen;
491 }
492 } else {
493 // only prepare partition file for computing parsimony trees
494 SeqType datatype[] = {SEQ_DNA, SEQ_CODON, SEQ_PROTEIN};
495 PhyloSuperTree::iterator it;
496
497 for (int i = 0; i < sizeof(datatype)/sizeof(SeqType); i++) {
498 bool first = true;
499 int startPos = 1;
500 for (it = siqtree->begin(); it != siqtree->end(); it++)
501 if ((*it)->aln->seq_type == datatype[i]) {
502 if (first) {
503 if (datatype[i] == SEQ_DNA || datatype[i] == SEQ_CODON)
504 pllPartitionFileHandle << "DNA";
505 else
506 pllPartitionFileHandle << "WAG";
507 }
508 int curLen = (datatype[i] == SEQ_CODON) ? (*it)->getAlnNSite()*3 : (*it)->getAlnNSite();
509 if (first)
510 pllPartitionFileHandle << ", p" << i << " = ";
511 else
512 pllPartitionFileHandle << ", ";
513
514 pllPartitionFileHandle << startPos << "-" << startPos + curLen - 1;
515 startPos = startPos + curLen;
516 first = false;
517 } else {
518 startPos = startPos + (*it)->getAlnNSite();
519 }
520 if (!first) pllPartitionFileHandle << endl;
521 }
522 }
523 } else {
524 /* create a partition file */
525 string model;
526 if (aln->seq_type == SEQ_DNA || aln->seq_type == SEQ_CODON) {
527 model = "DNA";
528 } else if (aln->seq_type == SEQ_PROTEIN) {
529 if (params.pll && params.model_name != "" && params.model_name.substr(0, 4) != "TEST" && params.model_name.substr(0, 2) != "MF") {
530 model = params.model_name.substr(0, params.model_name.find_first_of("+{"));
531 } else {
532 model = "WAG";
533 }
534 } else {
535 model = "WAG";
536 //outError("PLL currently only supports DNA/protein alignments");
537 }
538 int nsite = (aln->seq_type == SEQ_CODON) ? getAlnNSite()*3 : getAlnNSite();
539 pllPartitionFileHandle << model << ", p1 = " << "1-" << nsite << endl;
540 }
541 }
542
computeInitialTree(LikelihoodKernel kernel)543 void IQTree::computeInitialTree(LikelihoodKernel kernel) {
544 double start = getRealTime();
545 string initTree;
546 string out_file = params->out_prefix;
547 int score;
548 if (params->stop_condition == SC_FIXED_ITERATION && params->numNNITrees > params->min_iterations)
549 params->numNNITrees = max(params->min_iterations, 1);
550 int fixed_number = 0;
551
552 if (params->sankoff_cost_file && !cost_matrix)
553 loadCostMatrixFile(params->sankoff_cost_file);
554 if (aln->ordered_pattern.empty())
555 aln->orderPatternByNumChars(PAT_VARIANT);
556
557 setParsimonyKernel(kernel);
558
559 if (params->user_file) {
560 // start the search with user-defined tree
561 cout << "Reading input tree file " << params->user_file << " ...";
562 bool myrooted = params->is_rooted;
563 readTree(params->user_file, myrooted);
564 if (myrooted && !isSuperTreeUnlinked()) {
565 cout << " rooted tree";
566 }
567 cout << endl;
568 setAlignment(aln);
569 if (isSuperTree())
570 wrapperFixNegativeBranch(params->fixed_branch_length == BRLEN_OPTIMIZE &&
571 params->partition_type == BRLEN_OPTIMIZE);
572 else
573 fixed_number = wrapperFixNegativeBranch(false);
574 params->numInitTrees = 1;
575 params->numNNITrees = 1;
576 if (params->pll)
577 pllReadNewick(getTreeString());
578 initTree = getTreeString();
579 CKP_SAVE(initTree);
580 saveCheckpoint();
581 } else if (CKP_RESTORE(initTree)) {
582 readTreeString(initTree);
583 cout << endl << "CHECKPOINT: Initial tree restored" << endl;
584 } else {
585 START_TREE_TYPE start_tree = params->start_tree;
586 // only own parsimony kernel supports constraint tree
587 if (!constraintTree.empty())
588 start_tree = STT_PARSIMONY;
589 switch (start_tree) {
590 case STT_PARSIMONY:
591 //initCostMatrix(CM_UNIFORM);
592 // Create parsimony tree using IQ-Tree kernel
593 cout << "Creating fast initial parsimony tree by random order stepwise addition..." << endl;
594 // aln->orderPatternByNumChars();
595 start = getRealTime();
596 score = computeParsimonyTree(params->out_prefix, aln, randstream);
597 cout << getRealTime() - start << " seconds, parsimony score: " << score
598 << " (based on " << aln->num_parsimony_sites << " sites)"<< endl;
599 // already fixed branch length
600 //wrapperFixNegativeBranch(false);
601
602 break;
603 case STT_RANDOM_TREE:
604 case STT_PLL_PARSIMONY:
605 cout << endl;
606 cout << "Create initial parsimony tree by phylogenetic likelihood library (PLL)... ";
607 pllInst->randomNumberSeed = params->ran_seed + MPIHelper::getInstance().getProcessID();
608 pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
609 resetBranches(pllInst);
610 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back,
611 PLL_FALSE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
612 PhyloTree::readTreeStringSeqName(string(pllInst->tree_string));
613 cout << getRealTime() - start << " seconds" << endl;
614 wrapperFixNegativeBranch(true);
615 break;
616 case STT_BIONJ:
617 // This is the old default option: using BIONJ as starting tree
618 computeBioNJ(*params, aln, dist_file);
619 cout << getRealTime() - start << " seconds" << endl;
620 params->numInitTrees = 1;
621 if (isSuperTree())
622 wrapperFixNegativeBranch(true);
623 else
624 fixed_number = wrapperFixNegativeBranch(false);
625 break;
626 case STT_USER_TREE:
627 ASSERT(0 && "User tree should be handled already");
628 break;
629 }
630 initTree = getTreeString();
631 CKP_SAVE(initTree);
632 saveCheckpoint();
633 checkpoint->dump(true);
634 }
635
636 if (!constraintTree.isCompatible(this))
637 outError("Initial tree is not compatible with constraint tree");
638
639 if (fixed_number) {
640 cout << "WARNING: " << fixed_number << " undefined/negative branch lengths are initialized with parsimony" << endl;
641 }
642
643 if (params->root) {
644 StrVector outgroup_names;
645 convert_string_vec(params->root, outgroup_names);
646 for (auto it = outgroup_names.begin(); it != outgroup_names.end(); it++)
647 if (aln->getSeqID(*it) < 0)
648 outError("Specified outgroup taxon " + *it + " not found");
649 }
650
651 if (params->write_init_tree) {
652 out_file += ".init_tree";
653 printTree(out_file.c_str(), WT_NEWLINE);
654 // printTree(getTreeString().c_str(), WT_NEWLINE);
655 }
656 }
657
addTreeToCandidateSet(string treeString,double score,bool updateStopRule,int sourceProcID)658 int IQTree::addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID) {
659 double curBestScore = candidateTrees.getBestScore();
660 int pos = candidateTrees.update(treeString, score);
661 if (updateStopRule) {
662 stop_rule.setCurIt(stop_rule.getCurIt() + 1);
663 if (score > curBestScore) {
664 if (pos != -1) {
665 stop_rule.addImprovedIteration(stop_rule.getCurIt());
666 cout << "BETTER TREE FOUND at iteration " << stop_rule.getCurIt() << ": " << score << endl;
667 } else {
668 cout << "UPDATE BEST LOG-LIKELIHOOD: " << score << endl;
669 }
670 bestcandidate_changed = true;
671 // COMMENT OUT: not safe with MPI version
672 // printResultTree();
673 }
674
675 curScore = score;
676 printIterationInfo(sourceProcID);
677 }
678 return pos;
679 }
680
initCandidateTreeSet(int nParTrees,int nNNITrees)681 void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
682
683 if (nParTrees > 0) {
684 if (params->start_tree == STT_RANDOM_TREE)
685 cout << "Generating " << nParTrees << " random trees... ";
686 else
687 cout << "Generating " << nParTrees << " parsimony trees... ";
688 cout.flush();
689 }
690 double startTime = getRealTime();
691 // int numDupPars = 0;
692 // bool orig_rooted = rooted;
693 // rooted = false;
694 int processID = MPIHelper::getInstance().getProcessID();
695
696 #ifdef _OPENMP
697 StrVector pars_trees;
698 if (params->start_tree == STT_PARSIMONY && nParTrees >= 1) {
699 pars_trees.resize(nParTrees);
700 #pragma omp parallel
701 {
702 int *rstream;
703 int ran_seed = params->ran_seed + processID * 1000 + omp_get_thread_num();
704 init_random(ran_seed, false, &rstream);
705 PhyloTree tree;
706 if (!constraintTree.empty()) {
707 tree.constraintTree.readConstraint(constraintTree);
708 }
709 tree.setParams(params);
710 tree.setParsimonyKernel(params->SSE);
711 tree.rooted = rooted;
712 #pragma omp for schedule(dynamic)
713 for (int i = 0; i < nParTrees; i++) {
714 tree.computeParsimonyTree(NULL, aln, rstream);
715 pars_trees[i] = tree.getTreeString();
716 }
717 finish_random(rstream);
718 }
719 }
720 #endif
721
722 int init_size = candidateTrees.size();
723
724 // unsigned long curNumTrees = candidateTrees.size();
725 for (int treeNr = 1; treeNr <= nParTrees; treeNr++) {
726 int parRandSeed = Params::getInstance().ran_seed + processID * nParTrees + treeNr;
727 string curParsTree;
728
729 /********* Create parsimony tree using PLL *********/
730 if (params->start_tree == STT_PLL_PARSIMONY) {
731 pllInst->randomNumberSeed = parRandSeed;
732 pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
733 resetBranches(pllInst);
734 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
735 pllInst->start->back, PLL_FALSE, PLL_TRUE, PLL_FALSE,
736 PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
737 curParsTree = string(pllInst->tree_string);
738 PhyloTree::readTreeStringSeqName(curParsTree);
739 wrapperFixNegativeBranch(true);
740 curParsTree = getTreeString();
741 } else if (params->start_tree == STT_RANDOM_TREE) {
742 generateRandomTree(YULE_HARDING);
743 wrapperFixNegativeBranch(true);
744 if (rooted) {
745 rooted = false;
746 convertToRooted();
747 }
748 curParsTree = getTreeString();
749 } else if (params->start_tree == STT_PARSIMONY) {
750 /********* Create parsimony tree using IQ-TREE *********/
751 #ifdef _OPENMP
752 PhyloTree::readTreeString(pars_trees[treeNr-1]);
753 curParsTree = getTreeString();
754 #else
755 computeParsimonyTree(NULL, aln, randstream);
756 curParsTree = getTreeString();
757 #endif
758 }
759
760 int pos = addTreeToCandidateSet(curParsTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
761 // if a duplicated tree is generated, then randomize the tree
762 if (pos == -1) {
763 readTreeString(curParsTree);
764 doRandomNNIs();
765 // generateRandomTree(YULE_HARDING);
766 wrapperFixNegativeBranch(true);
767 string randTree = getTreeString();
768 // if (isMixlen()) {
769 // randTree = optimizeBranches(1);
770 // } else {
771 // curScore = -DBL_MAX;
772 // }
773 addTreeToCandidateSet(randTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
774 }
775 }
776
777 if (nParTrees > 0)
778 cout << getRealTime() - startTime << " second" << endl;
779
780 /****************************************************************************************
781 Compute logl of all initial trees
782 *****************************************************************************************/
783
784 vector<string> initTreeStrings = candidateTrees.getBestTreeStrings();
785 candidateTrees.clear();
786
787 if (init_size < initTreeStrings.size())
788 cout << "Computing log-likelihood of " << initTreeStrings.size() - init_size << " initial trees ... ";
789 startTime = getRealTime();
790
791 for (vector<string>::iterator it = initTreeStrings.begin(); it != initTreeStrings.end(); ++it) {
792 string treeString;
793 double score;
794 readTreeString(*it);
795 if (it-initTreeStrings.begin() >= init_size)
796 treeString = optimizeBranches(params->brlen_num_traversal);
797 else {
798 computeLogL();
799 treeString = getTreeString();
800 }
801 score = getCurScore();
802 candidateTrees.update(treeString,score);
803 }
804
805 if (Params::getInstance().writeDistImdTrees)
806 intermediateTrees.initTrees(candidateTrees);
807
808 if (init_size < initTreeStrings.size())
809 cout << getRealTime() - startTime << " seconds" << endl;
810
811 if (nParTrees > 0) {
812 cout << "Current best score: " << candidateTrees.getBestScore() << endl;
813 }
814
815 //---- BLOCKING COMMUNICATION
816 syncCandidateTrees(nNNITrees, false);
817
818
819 vector<string> bestInitTrees; // Set of best initial trees for doing NNIs
820
821 bestInitTrees = candidateTrees.getBestTreeStringsForProcess(nNNITrees);
822
823 cout << endl;
824 cout << "Do NNI search on " << bestInitTrees.size() << " best initial trees" << endl;
825 stop_rule.setCurIt(0);
826 if (candidateTrees.size() > Params::getInstance().numSupportTrees)
827 candidateTrees.clear();// TODO why clear here???
828
829 candidateTrees.setMaxSize(Params::getInstance().numSupportTrees);
830 vector<string>::iterator it;
831
832 for (it = bestInitTrees.begin(); it != bestInitTrees.end(); it++) {
833 readTreeString(*it);
834 // optimizeBranches();
835 // cout << "curScore: " << curScore << " Tree before NNI: " << getTreeString() << endl;
836 doNNISearch();
837 string treeString = getTreeString();
838 addTreeToCandidateSet(treeString, curScore, true, MPIHelper::getInstance().getProcessID());
839 if (Params::getInstance().writeDistImdTrees)
840 intermediateTrees.update(treeString, curScore);
841 }
842
843 // TODO turning this
844 if (isMixlen()) {
845 cout << "Optimizing model parameters for top " << min((int)candidateTrees.size(), params->popSize) << " candidate trees... " << endl;
846
847 startTime = getRealTime();
848 bestInitTrees = candidateTrees.getBestTreeStrings(params->popSize);
849 for (it = bestInitTrees.begin(); it != bestInitTrees.end(); it++) {
850 string tree;
851 readTreeString(*it);
852 //tree = optimizeBranches();
853 tree = optimizeModelParameters();
854 // cout << "Tree after brlen opt: " << tree << endl;
855 cout << "Tree " << distance(bestInitTrees.begin(), it)+1 << " / LogL: " << getCurScore() << endl;
856 candidateTrees.update(tree, getCurScore());
857 }
858 cout << getRealTime() - startTime << " seconds" << endl;
859 }
860
861 //---- BLOCKING COMMUNICATION
862 syncCandidateTrees(Params::getInstance().numSupportTrees, true);
863
864 }
865
generateParsimonyTree(int randomSeed)866 string IQTree::generateParsimonyTree(int randomSeed) {
867 string parsimonyTreeString;
868 if (params->start_tree == STT_PLL_PARSIMONY) {
869 pllInst->randomNumberSeed = randomSeed;
870 pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst,
871 pllPartitions, params->sprDist);
872 resetBranches(pllInst);
873 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
874 pllInst->start->back, PLL_FALSE, PLL_TRUE, PLL_FALSE,
875 PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
876 parsimonyTreeString = string(pllInst->tree_string);
877 PhyloTree::readTreeString(parsimonyTreeString);
878 wrapperFixNegativeBranch(true);
879 parsimonyTreeString = getTreeString();
880 } else if (params->start_tree == STT_RANDOM_TREE) {
881 generateRandomTree(YULE_HARDING);
882 wrapperFixNegativeBranch(true);
883 parsimonyTreeString = getTreeString();
884 } else {
885 computeParsimonyTree(NULL, aln, randstream);
886 parsimonyTreeString = getTreeString();
887 }
888 return parsimonyTreeString;
889
890 }
891
initializePLL(Params & params)892 void IQTree::initializePLL(Params ¶ms) {
893 pllAttr.rateHetModel = PLL_GAMMA;
894 pllAttr.fastScaling = PLL_FALSE;
895 pllAttr.saveMemory = PLL_FALSE;
896 pllAttr.useRecom = PLL_FALSE;
897 pllAttr.randomNumberSeed = params.ran_seed;
898 pllAttr.numberOfThreads = max(params.num_threads, 1); /* This only affects the pthreads version */
899 if (pllInst != NULL) {
900 pllDestroyInstance(pllInst);
901 }
902 /* Create a PLL getInstance */
903 pllInst = pllCreateInstance(&pllAttr);
904
905 /* Read in the alignment file */
906 stringstream pllAln;
907 aln->printAlignment(IN_PHYLIP, pllAln);
908 string pllAlnStr = pllAln.str();
909 pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length());
910
911 /* Read in the partition information */
912 // BQM: to avoid printing file
913 stringstream pllPartitionFileHandle;
914 createPLLPartition(params, pllPartitionFileHandle);
915 pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str());
916
917 /* Validate the partitions */
918 if (!pllPartitionsValidate(partitionInfo, pllAlignment)) {
919 outError("pllPartitionsValidate");
920 }
921
922 /* Commit the partitions and build a partitions structure */
923 pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment);
924
925 /* We don't need the the intermediate partition queue structure anymore */
926 pllQueuePartitionsDestroy(&partitionInfo);
927
928 /* eliminate duplicate sites from the alignment and update weights vector */
929 pllAlignmentRemoveDups(pllAlignment, pllPartitions);
930
931 pllTreeInitTopologyForAlignment(pllInst, pllAlignment);
932
933 /* Connect the alignment and partition structure with the tree structure */
934 if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) {
935 outError("Incompatible tree/alignment combination");
936 }
937 }
938
isInitializedPLL()939 bool IQTree::isInitializedPLL() {
940 return pllInst != NULL;
941 }
942
initializeModel(Params & params,string model_name,ModelsBlock * models_block)943 void IQTree::initializeModel(Params ¶ms, string model_name, ModelsBlock *models_block) {
944 try {
945 if (!getModelFactory()) {
946 if (isSuperTree()) {
947 if (params.partition_type == BRLEN_OPTIMIZE || params.partition_type == TOPO_UNLINKED) {
948 setModelFactory(new PartitionModel(params, (PhyloSuperTree*) this, models_block));
949 } else
950 setModelFactory(new PartitionModelPlen(params, (PhyloSuperTreePlen*) this, models_block));
951
952 // mapTrees again in case of different rooting
953 if (root)
954 ((PhyloSuperTree*)this)->mapTrees();
955
956 } else {
957 setModelFactory(new ModelFactory(params, model_name, this, models_block));
958 }
959 }
960 } catch (string & str) {
961 outError(str);
962 }
963 setModel(getModelFactory()->model);
964 setRate(getModelFactory()->site_rate);
965 getModelFactory()->setCheckpoint(checkpoint);
966 /*
967 * MDW: I don't understand why/how checkpointing is being used,
968 * however I'm having problems because a restoreCheckpoint
969 * happens before anything has been saved (in
970 * phyloanalysis.cpp:runTreeReconstruction). This fixes that
971 * problem, but as I don't understand what is going on, I am
972 * not at all confident this is the correct solution.
973 */
974
975 //!!! BQM: Because iqtree restore the checkpoint from the previous run! (not this current run)
976 // That's why saveCheckpoint should NOT be called now! I comment this line out.
977
978 // model->saveCheckpoint();
979
980 if (params.pll) {
981 if (getRate()->getNDiscreteRate() == 1) {
982 outError("Non-Gamma model is not yet supported by PLL.");
983 // TODO: change rateHetModel to PLL_CAT in case of non-Gamma model
984 }
985 if (getRate()->name.substr(0,2) == "+I")
986 outError("+Invar model is not yet supported by PLL.");
987 if (aln->seq_type == SEQ_DNA && getModel()->name != "GTR")
988 outError("non GTR model for DNA is not yet supported by PLL.");
989 pllInitModel(pllInst, pllPartitions);
990 }
991
992 if (aln->ordered_pattern.empty())
993 aln->orderPatternByNumChars(PAT_VARIANT);
994
995 }
getProbDelete()996 double IQTree::getProbDelete() {
997 return (double) k_delete / leafNum;
998 }
999
resetKDelete()1000 void IQTree::resetKDelete() {
1001 k_delete = k_delete_min;
1002 k_delete_stay = ceil(leafNum / k_delete);
1003 }
1004
increaseKDelete()1005 void IQTree::increaseKDelete() {
1006 if (k_delete >= k_delete_max)
1007 return;
1008 k_delete_stay--;
1009 if (k_delete_stay > 0)
1010 return;
1011 k_delete++;
1012 k_delete_stay = ceil(leafNum / k_delete);
1013 if (verbose_mode >= VB_MED)
1014 cout << "Increase k_delete to " << k_delete << endl;
1015 }
1016
1017 //void IQTree::setIQPIterations(STOP_CONDITION stop_condition, double stop_confidence, int min_iterations,
1018 // int max_iterations) {
1019 // stop_rule.setStopCondition(stop_condition);
1020 // stop_rule.setConfidenceValue(stop_confidence);
1021 // stop_rule.setIterationNum(min_iterations, max_iterations);
1022 //}
1023
findRepresentLeaves(vector<RepresentLeafSet * > & leaves_vec,int nei_id,PhyloNode * dad)1024 RepresentLeafSet* IQTree::findRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, int nei_id, PhyloNode *dad) {
1025 PhyloNode *node = (PhyloNode*) (dad->neighbors[nei_id]->node);
1026 int set_id = dad->id * 3 + nei_id;
1027 if (leaves_vec[set_id])
1028 return leaves_vec[set_id];
1029 RepresentLeafSet *leaves = new RepresentLeafSet;
1030 RepresentLeafSet * leaves_it[2] = { NULL, NULL };
1031 leaves_vec[set_id] = leaves;
1032 RepresentLeafSet::iterator last;
1033 RepresentLeafSet::iterator cur_it;
1034 int i, j;
1035 //double admit_height = 1000000;
1036
1037 leaves->clear();
1038 if (node->isLeaf()) {
1039 // set the depth to zero
1040 //node->height = 0.0;
1041 leaves->insert(new RepLeaf(node, 0));
1042 } else {
1043 for (i = 0, j = 0; i < node->neighbors.size(); i++)
1044 if (node->neighbors[i]->node != dad) {
1045 leaves_it[j++] = findRepresentLeaves(leaves_vec, i, node);
1046 }
1047 ASSERT(j == 2 && leaves_it[0] && leaves_it[1]);
1048 if (leaves_it[0]->empty() && leaves_it[1]->empty()) {
1049 cout << "wrong";
1050 }
1051 RepresentLeafSet::iterator lit[2] = { leaves_it[0]->begin(), leaves_it[1]->begin() };
1052 while (leaves->size() < k_represent) {
1053 int id = -1;
1054 if (lit[0] != leaves_it[0]->end() && lit[1] != leaves_it[1]->end()) {
1055 if ((*lit[0])->height < (*lit[1])->height)
1056 id = 0;
1057 else if ((*lit[0])->height > (*lit[1])->height)
1058 id = 1;
1059 else { // tie, choose at random
1060 id = random_int(2);
1061 }
1062 } else if (lit[0] != leaves_it[0]->end())
1063 id = 0;
1064 else if (lit[1] != leaves_it[1]->end())
1065 id = 1;
1066 else
1067 break;
1068 ASSERT(id < 2 && id >= 0);
1069 leaves->insert(new RepLeaf((*lit[id])->leaf, (*lit[id])->height + 1));
1070 lit[id]++;
1071 }
1072 }
1073 ASSERT(!leaves->empty());
1074 /*
1075 if (verbose_mode >= VB_MAX) {
1076 for (cur_it = leaves->begin(); cur_it != leaves->end(); cur_it++)
1077 cout << (*cur_it)->leaf->name << " ";
1078 cout << endl;
1079 }*/
1080 return leaves;
1081 }
1082
1083 /*
1084 void IQPTree::clearRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, Node *node, Node *dad) {
1085 int nei_id;
1086 for (nei_id = 0; nei_id < node->neighbors.size(); nei_id++)
1087 if (node->neighbors[nei_id]->node == dad) break;
1088 assert(nei_id < node->neighbors.size());
1089 int set_id = node->id * 3 + nei_id;
1090 if (leaves_vec[set_id]) {
1091 for (RepresentLeafSet::iterator rlit = leaves_vec[set_id]->begin(); rlit != leaves_vec[set_id]->end(); rlit++)
1092 delete (*rlit);
1093 delete leaves_vec[set_id];
1094 leaves_vec[set_id] = NULL;
1095 }
1096 FOR_NEIGHBOR_IT(node, dad, it) {
1097 clearRepresentLeaves(leaves_vec, (*it)->node, node);
1098 }
1099 }*/
1100
deleteNonCherryLeaves(PhyloNodeVector & del_leaves)1101 void IQTree::deleteNonCherryLeaves(PhyloNodeVector &del_leaves) {
1102 NodeVector cherry_taxa;
1103 NodeVector noncherry_taxa;
1104 // get the vector of non cherry taxa
1105 getNonCherryLeaves(noncherry_taxa, cherry_taxa);
1106 root = NULL;
1107 int num_taxa = aln->getNSeq();
1108 int num_delete = k_delete;
1109 if (num_delete > num_taxa - 4)
1110 num_delete = num_taxa - 4;
1111 if (verbose_mode >= VB_DEBUG) {
1112 cout << "Deleting " << num_delete << " leaves" << endl;
1113 }
1114 vector<unsigned int> indices_noncherry(noncherry_taxa.size());
1115 //iota(indices_noncherry.begin(), indices_noncherry.end(), 0);
1116 unsigned int startValue = 0;
1117 for (vector<unsigned int>::iterator it = indices_noncherry.begin(); it != indices_noncherry.end(); ++it) {
1118 (*it) = startValue;
1119 ++startValue;
1120 }
1121 my_random_shuffle(indices_noncherry.begin(), indices_noncherry.end());
1122 int i;
1123 for (i = 0; i < num_delete && i < noncherry_taxa.size(); i++) {
1124 PhyloNode *taxon = (PhyloNode*) noncherry_taxa[indices_noncherry[i]];
1125 del_leaves.push_back(taxon);
1126 deleteLeaf(taxon);
1127 //cout << taxon->id << ", ";
1128 }
1129 int j = 0;
1130 if (i < num_delete) {
1131 vector<unsigned int> indices_cherry(cherry_taxa.size());
1132 //iota(indices_cherry.begin(), indices_cherry.end(), 0);
1133 startValue = 0;
1134 for (vector<unsigned int>::iterator it = indices_cherry.begin(); it != indices_cherry.end(); ++it) {
1135 (*it) = startValue;
1136 ++startValue;
1137 }
1138 my_random_shuffle(indices_cherry.begin(), indices_cherry.end());
1139 while (i < num_delete) {
1140 PhyloNode *taxon = (PhyloNode*) cherry_taxa[indices_cherry[j]];
1141 del_leaves.push_back(taxon);
1142 deleteLeaf(taxon);
1143 i++;
1144 j++;
1145 }
1146 }
1147 root = cherry_taxa[j];
1148 }
1149
deleteLeaves(PhyloNodeVector & del_leaves)1150 void IQTree::deleteLeaves(PhyloNodeVector &del_leaves) {
1151 NodeVector taxa;
1152 // get the vector of taxa
1153 getTaxa(taxa);
1154 root = NULL;
1155 //int num_delete = floor(p_delete * taxa.size());
1156 int num_delete = k_delete;
1157 int i;
1158 if (num_delete > taxa.size() - 4)
1159 num_delete = taxa.size() - 4;
1160 if (verbose_mode >= VB_DEBUG) {
1161 cout << "Deleting " << num_delete << " leaves" << endl;
1162 }
1163 // now try to randomly delete some taxa of the probability of p_delete
1164 for (i = 0; i < num_delete;) {
1165 int id = random_int(taxa.size());
1166 if (!taxa[id])
1167 continue;
1168 else
1169 i++;
1170 PhyloNode *taxon = (PhyloNode*) taxa[id];
1171 del_leaves.push_back(taxon);
1172 deleteLeaf(taxon);
1173 taxa[id] = NULL;
1174 }
1175 // set root to the first taxon which was not deleted
1176 for (i = 0; i < taxa.size(); i++)
1177 if (taxa[i]) {
1178 root = taxa[i];
1179 break;
1180 }
1181 }
1182
assessQuartet(Node * leaf0,Node * leaf1,Node * leaf2,Node * del_leaf)1183 int IQTree::assessQuartet(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf) {
1184 ASSERT(dist_matrix);
1185 int nseq = aln->getNSeq();
1186 //int id0 = leaf0->id, id1 = leaf1->id, id2 = leaf2->id;
1187 double dist0 = dist_matrix[leaf0->id * nseq + del_leaf->id] + dist_matrix[leaf1->id * nseq + leaf2->id];
1188 double dist1 = dist_matrix[leaf1->id * nseq + del_leaf->id] + dist_matrix[leaf0->id * nseq + leaf2->id];
1189 double dist2 = dist_matrix[leaf2->id * nseq + del_leaf->id] + dist_matrix[leaf0->id * nseq + leaf1->id];
1190 if (dist0 < dist1 && dist0 < dist2)
1191 return 0;
1192 if (dist1 < dist2)
1193 return 1;
1194 return 2;
1195 }
1196
assessQuartetParsimony(Node * leaf0,Node * leaf1,Node * leaf2,Node * del_leaf)1197 int IQTree::assessQuartetParsimony(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf) {
1198 int score[3] = { 0, 0, 0 };
1199 for (Alignment::iterator it = aln->begin(); it != aln->end(); it++) {
1200 char ch0 = (*it)[leaf0->id];
1201 char ch1 = (*it)[leaf1->id];
1202 char ch2 = (*it)[leaf2->id];
1203 char chd = (*it)[del_leaf->id];
1204 if (ch0 >= aln->num_states || ch1 >= aln->num_states || ch2 >= aln->num_states || chd >= aln->num_states)
1205 continue;
1206 if (chd == ch0 && ch1 == ch2)
1207 score[0] += (*it).frequency;
1208 if (chd == ch1 && ch0 == ch2)
1209 score[1] += (*it).frequency;
1210 if (chd == ch2 && ch0 == ch1)
1211 score[2] += (*it).frequency;
1212 }
1213 if (score[0] == score[1] && score[0] == score[2]) {
1214 int id = random_int(3);
1215 return id;
1216 }
1217 if (score[0] > score[1] && score[0] > score[2])
1218 return 0;
1219 if (score[1] < score[2])
1220 return 2;
1221 return 1;
1222 }
1223
initializeBonus(PhyloNode * node,PhyloNode * dad)1224 void IQTree::initializeBonus(PhyloNode *node, PhyloNode *dad) {
1225 if (!node)
1226 node = (PhyloNode*) root;
1227 if (dad) {
1228 PhyloNeighbor *node_nei = (PhyloNeighbor*) node->findNeighbor(dad);
1229 PhyloNeighbor *dad_nei = (PhyloNeighbor*) dad->findNeighbor(node);
1230 node_nei->lh_scale_factor = 0.0;
1231 node_nei->partial_lh_computed = 0;
1232 dad_nei->lh_scale_factor = 0.0;
1233 dad_nei->partial_lh_computed = 0;
1234 }
1235
1236 FOR_NEIGHBOR_IT(node, dad, it){
1237 initializeBonus((PhyloNode*) ((*it)->node), node);
1238 }
1239 }
1240
raiseBonus(Neighbor * nei,Node * dad,double bonus)1241 void IQTree::raiseBonus(Neighbor *nei, Node *dad, double bonus) {
1242 ((PhyloNeighbor*) nei)->lh_scale_factor += bonus;
1243 if (verbose_mode >= VB_DEBUG)
1244 cout << dad->id << " - " << nei->node->id << " : " << bonus << endl;
1245
1246 // FOR_NEIGHBOR_IT(nei->node, dad, it)
1247 // raiseBonus((*it), nei->node, bonus);
1248 }
1249
computePartialBonus(Node * node,Node * dad)1250 double IQTree::computePartialBonus(Node *node, Node* dad) {
1251 PhyloNeighbor *node_nei = (PhyloNeighbor*) node->findNeighbor(dad);
1252 if (node_nei->partial_lh_computed)
1253 return node_nei->lh_scale_factor;
1254
1255 FOR_NEIGHBOR_IT(node, dad, it){
1256 node_nei->lh_scale_factor += computePartialBonus((*it)->node, node);
1257 }
1258 node_nei->partial_lh_computed = 1;
1259 return node_nei->lh_scale_factor;
1260 }
1261
findBestBonus(double & best_score,NodeVector & best_nodes,NodeVector & best_dads,Node * node,Node * dad)1262 void IQTree::findBestBonus(double &best_score, NodeVector &best_nodes, NodeVector &best_dads, Node *node, Node *dad) {
1263 double score;
1264 if (!node)
1265 node = root;
1266 if (!dad) {
1267 best_score = 0;
1268 } else {
1269 score = computePartialBonus(node, dad) + computePartialBonus(dad, node);
1270 if (score >= best_score) {
1271 if (score > best_score) {
1272 best_score = score;
1273 best_nodes.clear();
1274 best_dads.clear();
1275 }
1276 best_nodes.push_back(node);
1277 best_dads.push_back(dad);
1278 }
1279 //cout << node->id << " - " << dad->id << " : " << best_score << endl;
1280 }
1281
1282 FOR_NEIGHBOR_IT(node, dad, it){
1283 findBestBonus(best_score, best_nodes, best_dads, (*it)->node, node);
1284 }
1285 }
1286
assessQuartets(vector<RepresentLeafSet * > & leaves_vec,PhyloNode * cur_root,PhyloNode * del_leaf)1287 void IQTree::assessQuartets(vector<RepresentLeafSet*> &leaves_vec, PhyloNode *cur_root, PhyloNode *del_leaf) {
1288 const int MAX_DEGREE = 3;
1289 RepresentLeafSet * leaves[MAX_DEGREE];
1290 double bonus[MAX_DEGREE];
1291 memset(bonus, 0, MAX_DEGREE * sizeof(double));
1292 int cnt = 0;
1293
1294 // only work for birfucating tree
1295 ASSERT(cur_root->degree() == MAX_DEGREE);
1296
1297 // find the representative leaf set for three subtrees
1298
1299 FOR_NEIGHBOR_IT(cur_root, NULL, it){
1300 leaves[cnt] = findRepresentLeaves(leaves_vec, cnt, cur_root);
1301 cnt++;
1302 }
1303 for (RepresentLeafSet::iterator i0 = leaves[0]->begin(); i0 != leaves[0]->end(); i0++)
1304 for (RepresentLeafSet::iterator i1 = leaves[1]->begin(); i1 != leaves[1]->end(); i1++)
1305 for (RepresentLeafSet::iterator i2 = leaves[2]->begin(); i2 != leaves[2]->end(); i2++) {
1306 int best_id;
1307 if (iqp_assess_quartet == IQP_DISTANCE)
1308 best_id = assessQuartet((*i0)->leaf, (*i1)->leaf, (*i2)->leaf, del_leaf);
1309 else
1310 best_id = assessQuartetParsimony((*i0)->leaf, (*i1)->leaf, (*i2)->leaf, del_leaf);
1311 bonus[best_id] += 1.0;
1312 }
1313 for (cnt = 0; cnt < MAX_DEGREE; cnt++)
1314 if (bonus[cnt] > 0.0)
1315 raiseBonus(cur_root->neighbors[cnt], cur_root, bonus[cnt]);
1316
1317 }
1318
reinsertLeavesByParsimony(PhyloNodeVector & del_leaves)1319 void IQTree::reinsertLeavesByParsimony(PhyloNodeVector &del_leaves) {
1320 ASSERT(0 && "this function is obsolete");
1321 PhyloNodeVector::iterator it_leaf;
1322 ASSERT(root->isLeaf());
1323 for (it_leaf = del_leaves.begin(); it_leaf != del_leaves.end(); it_leaf++) {
1324 //cout << "Add leaf " << (*it_leaf)->id << " to the tree" << endl;
1325 initializeAllPartialPars();
1326 clearAllPartialLH();
1327 Node *target_node = NULL;
1328 Node *target_dad = NULL;
1329 Node *added_node = (*it_leaf)->neighbors[0]->node;
1330 Node *node1 = NULL;
1331 Node *node2 = NULL;
1332 //Node *leaf;
1333 for (int i = 0; i < 3; i++) {
1334 if (added_node->neighbors[i]->node->id == (*it_leaf)->id) {
1335 //leaf = added_node->neighbors[i]->node;
1336 } else if (!node1) {
1337 node1 = added_node->neighbors[i]->node;
1338 } else {
1339 node2 = added_node->neighbors[i]->node;
1340 }
1341 }
1342
1343 //cout << "(" << node1->id << ", " << node2->id << ")" << "----" << "(" << added_node->id << "," << leaf->id << ")" << endl;
1344 added_node->updateNeighbor(node1, (Node*) 1);
1345 added_node->updateNeighbor(node2, (Node*) 2);
1346
1347 best_pars_score = INT_MAX;
1348 // TODO: this needs to be adapted
1349 // addTaxonMPFast(added_node, target_node, target_dad, NULL, root->neighbors[0]->node, root);
1350 target_node->updateNeighbor(target_dad, added_node, -1.0);
1351 target_dad->updateNeighbor(target_node, added_node, -1.0);
1352 added_node->updateNeighbor((Node*) 1, target_node, -1.0);
1353 added_node->updateNeighbor((Node*) 2, target_dad, -1.0);
1354
1355 }
1356
1357 }
1358
reinsertLeaves(PhyloNodeVector & del_leaves)1359 void IQTree::reinsertLeaves(PhyloNodeVector &del_leaves) {
1360 PhyloNodeVector::iterator it_leaf;
1361
1362 //int num_del_leaves = del_leaves.size();
1363 ASSERT(root->isLeaf());
1364
1365 for (it_leaf = del_leaves.begin(); it_leaf != del_leaves.end(); it_leaf++) {
1366 if (verbose_mode >= VB_DEBUG)
1367 cout << "Reinserting " << (*it_leaf)->name << " (" << (*it_leaf)->id << ")" << endl;
1368 vector<RepresentLeafSet*> leaves_vec;
1369 leaves_vec.resize(nodeNum * 3, NULL);
1370 initializeBonus();
1371 NodeVector nodes;
1372 getInternalNodes(nodes);
1373 if (verbose_mode >= VB_DEBUG)
1374 drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1375 //printTree(cout, WT_BR_LEN | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
1376 for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++) {
1377 assessQuartets(leaves_vec, (PhyloNode*) (*it), (*it_leaf));
1378 }
1379 NodeVector best_nodes, best_dads;
1380 double best_bonus;
1381 findBestBonus(best_bonus, best_nodes, best_dads);
1382 if (verbose_mode >= VB_DEBUG)
1383 cout << "Best bonus " << best_bonus << " " << best_nodes[0]->id << " " << best_dads[0]->id << endl;
1384 ASSERT(best_nodes.size() == best_dads.size());
1385 int node_id = random_int(best_nodes.size());
1386 if (best_nodes.size() > 1 && verbose_mode >= VB_DEBUG)
1387 cout << best_nodes.size() << " branches show the same best bonus, branch nr. " << node_id << " is chosen"
1388 << endl;
1389
1390 reinsertLeaf(*it_leaf, best_nodes[node_id], best_dads[node_id]);
1391 //clearRepresentLeaves(leaves_vec, *it_node, *it_leaf);
1392 /*if (verbose_mode >= VB_DEBUG) {
1393 printTree(cout);
1394 cout << endl;
1395 }*/
1396 for (vector<RepresentLeafSet*>::iterator rit = leaves_vec.begin(); rit != leaves_vec.end(); rit++)
1397 if ((*rit)) {
1398 RepresentLeafSet *tit = (*rit);
1399 for (RepresentLeafSet::iterator rlit = tit->begin(); rlit != tit->end(); rlit++)
1400 delete (*rlit);
1401 delete (*rit);
1402 }
1403 }
1404 initializeTree(); // BQM: re-index nodes and branches s.t. ping-pong neighbors have the same ID
1405
1406 if (verbose_mode >= VB_DEBUG)
1407 drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1408 }
1409
doParsimonyReinsertion()1410 void IQTree::doParsimonyReinsertion() {
1411 PhyloNodeVector del_leaves;
1412
1413 deleteLeaves(del_leaves);
1414
1415 reinsertLeavesByParsimony(del_leaves);
1416 fixNegativeBranch(false);
1417 }
1418
getNonTabuBranches(Branches & allBranches,SplitGraph & tabuSplits,Branches & nonTabuBranches,Branches * tabuBranches)1419 void IQTree::getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches) {
1420 if (tabuSplits.size() == 0) {
1421 return;
1422 }
1423 for (Branches::iterator it = allBranches.begin(); it != allBranches.end(); it++) {
1424 if (isInnerBranch(it->second.first, it->second.second)) {
1425 int nodeID1 = it->second.first->id;
1426 int nodeID2 = it->second.second->id;
1427 Branch curBranch = it->second;
1428 Split* sp = getSplit(it->second.first, it->second.second);
1429 if (!tabuSplits.containSplit(*sp)) {
1430 nonTabuBranches.insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
1431 } else {
1432 if (tabuBranches != NULL) {
1433 tabuBranches->insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
1434 }
1435 }
1436 delete sp;
1437 }
1438
1439 }
1440 }
1441
getSplitBranches(Branches & branches,SplitIntMap & splits,Node * node,Node * dad)1442 void IQTree::getSplitBranches(Branches &branches, SplitIntMap &splits, Node *node, Node *dad) {
1443 if (!node) {
1444 node = root;
1445 }
1446 FOR_NEIGHBOR_IT(node, dad, it) {
1447 if (isInnerBranch((*it)->node, node)) {
1448 Branch curBranch;
1449 curBranch.first = (*it)->node;
1450 curBranch.second = node;
1451 Split* curSplit;
1452 Split *sp = (*it)->split;
1453 ASSERT(sp != NULL);
1454 curSplit = new Split(*sp);
1455 if (curSplit->shouldInvert())
1456 curSplit->invert();
1457 if (splits.findSplit(curSplit) != NULL) {
1458 //curSplit->report(cout);
1459 branches.insert(pair<int,Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
1460 }
1461 delete curSplit;
1462 }
1463 getSplitBranches(branches, splits, (*it)->node, node);
1464 }
1465 }
1466
shouldEvaluate(Split * curSplit,SplitIntMap & tabuSplits,SplitIntMap & candSplits)1467 bool IQTree::shouldEvaluate(Split *curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits) {
1468 bool answer = true;
1469 /******************** CHECK TABU SPLIT **************************/
1470 if (tabuSplits.findSplit(curSplit) != NULL) {
1471 answer = false;
1472 } else if (!candSplits.empty()) {
1473 Split *_curSplit;
1474 /******************** CHECK STABLE SPLIT **************************/
1475 int value;
1476 _curSplit = candSplits.findSplit(curSplit, value);
1477 if (_curSplit == NULL || _curSplit->getWeight() <= params->stableSplitThreshold) {
1478 answer = true;
1479 } else { // add a stable branch with a certain probability
1480 double rndDbl = random_double();
1481 if (rndDbl > params->stableSplitThreshold) {
1482 answer = true;
1483 } else {
1484 answer = false;
1485 }
1486 }
1487 } else {
1488 answer = true;
1489 }
1490 return answer;
1491 }
1492
1493
getNNIBranches(SplitIntMap & tabuSplits,SplitIntMap & candSplits,Branches & nonNNIBranches,Branches & nniBranches,Node * node,Node * dad)1494 void IQTree::getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candSplits,Branches &nonNNIBranches, Branches &nniBranches, Node *node, Node *dad) {
1495 if (!node) {
1496 node = root;
1497 }
1498 FOR_NEIGHBOR_IT(node, dad, it) {
1499 if (isInnerBranch((*it)->node, node)) {
1500 Branch curBranch;
1501 curBranch.first = (*it)->node;
1502 curBranch.second = node;
1503 int branchID = pairInteger(curBranch.first->id, curBranch.second->id);
1504
1505 if (params->fixStableSplits) {
1506 Split *curSplit;
1507 Split *sp = (*it)->split;
1508 ASSERT(sp != NULL);
1509 curSplit = new Split(*sp);
1510 if (curSplit->shouldInvert())
1511 curSplit->invert();
1512 if (shouldEvaluate(curSplit, tabuSplits, candSplits)) {
1513 nniBranches.insert(pair<int, Branch>(branchID, curBranch));
1514 } else {
1515 nonNNIBranches.insert(pair<int, Branch>(branchID, curBranch));
1516 }
1517 delete curSplit;
1518 } else {
1519 nniBranches.insert(pair<int, Branch>(branchID, curBranch));
1520 }
1521 }
1522 getNNIBranches(tabuSplits, candSplits, nonNNIBranches, nniBranches, (*it)->node, node);
1523 }
1524 }
1525
getStableBranches(SplitIntMap & candSplits,double supportValue,Branches & stableBranches,Node * node,Node * dad)1526 void IQTree::getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &stableBranches, Node *node, Node *dad) {
1527 if (!node) {
1528 node = root;
1529 }
1530
1531 FOR_NEIGHBOR_IT(node, dad, it) {
1532 if (isInnerBranch((*it)->node, node)) {
1533 Branch curBranch;
1534 curBranch.first = (*it)->node;
1535 curBranch.second = node;
1536 Split *curSplit;
1537 Split *sp = (*it)->split;
1538 ASSERT(sp != NULL);
1539 curSplit = new Split(*sp);
1540 if (curSplit->shouldInvert())
1541 curSplit->invert();
1542 int occurences;
1543 sp = candSplits.findSplit(curSplit, occurences);
1544 if (sp != NULL) {
1545 if ( sp->getWeight() >= supportValue) {
1546 stableBranches.insert(
1547 pair<int, Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
1548 }
1549 }
1550 delete curSplit;
1551 }
1552 getStableBranches(candSplits, supportValue, stableBranches, (*it)->node, node);
1553 }
1554 }
1555
perturbStableSplits(double suppValue)1556 string IQTree::perturbStableSplits(double suppValue) {
1557 int numRandNNI = 0;
1558 Branches stableBranches;
1559 // initTabuSplits.clear();
1560 // stableBranches = getStableBranches(candidateTrees.getCandSplits(), suppValue);
1561 // int maxRandNNI = stableBranches.size() / 2;
1562 do {
1563 getStableBranches(candidateTrees.getCandSplits(), suppValue, stableBranches);
1564 vector<NNIMove> randomNNIs;
1565 vector<NNIMove> compatibleNNIs;
1566 for (map<int, Branch>::iterator it = stableBranches.begin(); it != stableBranches.end(); it++) {
1567 NNIMove randNNI = getRandomNNI(it->second);
1568 if (constraintTree.isCompatible(randNNI))
1569 randomNNIs.push_back(randNNI);
1570 }
1571 getCompatibleNNIs(randomNNIs, compatibleNNIs);
1572 for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
1573 doNNI(*it);
1574 numRandNNI++;
1575 // Split *sp = getSplit(it->node1, it->node2);
1576 // Split *tabuSplit = new Split(*sp);
1577 // if (tabuSplit->shouldInvert()) {
1578 // tabuSplit->invert();
1579 // }
1580 // initTabuSplits.insertSplit(tabuSplit, 1);
1581 }
1582 } while (stableBranches.size() > 0);
1583
1584 if (verbose_mode >= VB_MAX) {
1585 cout << "Tree perturbation: number of random NNI performed = " << numRandNNI << endl;
1586 }
1587 setAlignment(aln);
1588 setRootNode(params->root);
1589
1590 clearAllPartialLH();
1591
1592 if (isSuperTree()) {
1593 ((PhyloSuperTree*) this)->mapTrees();
1594 }
1595 if (params->pll) {
1596 pllReadNewick(getTreeString());
1597 }
1598
1599 resetCurScore();
1600 return getTreeString();
1601 }
1602
doRandomNNIs(bool storeTabu)1603 string IQTree::doRandomNNIs(bool storeTabu) {
1604 int cntNNI = 0;
1605 int numRandomNNI;
1606 Branches nniBranches;
1607 Branches nonNNIBranches;
1608 if (storeTabu) {
1609 Branches stableBranches;
1610 getStableBranches(candidateTrees.getCandSplits(), Params::getInstance().stableSplitThreshold, stableBranches);
1611 int numNonStableBranches = leafNum - 3 - stableBranches.size();
1612 numRandomNNI = numNonStableBranches;
1613 } else {
1614 numRandomNNI = floor((leafNum - 3) * Params::getInstance().initPS);
1615 if (leafNum >= 4 && numRandomNNI == 0)
1616 numRandomNNI = 1;
1617 }
1618
1619 initTabuSplits.clear();
1620 while (cntNNI < numRandomNNI) {
1621 nniBranches.clear();
1622 nonNNIBranches.clear();
1623 getNNIBranches(initTabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
1624 if (nniBranches.size() == 0) break;
1625 // Convert the map data structure Branches to vector of Branch
1626 vector<Branch> vectorNNIBranches;
1627 for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); ++it) {
1628 vectorNNIBranches.push_back(it->second);
1629 }
1630 int randInt = random_int((int) vectorNNIBranches.size());
1631 NNIMove randNNI = getRandomNNI(vectorNNIBranches[randInt]);
1632 if (constraintTree.isCompatible(randNNI)) {
1633 // only if random NNI satisfies constraintTree
1634 doNNI(randNNI);
1635 if (storeTabu) {
1636 Split *sp = getSplit(randNNI.node1, randNNI.node2);
1637 Split *tabuSplit = new Split(*sp);
1638 if (tabuSplit->shouldInvert()) {
1639 tabuSplit->invert();
1640 }
1641 initTabuSplits.insertSplit(tabuSplit, 1);
1642 }
1643 }
1644 cntNNI++;
1645 }
1646 if (verbose_mode >= VB_MAX)
1647 cout << "Tree perturbation: number of random NNI performed = " << cntNNI << endl;
1648 setAlignment(aln);
1649 setRootNode(params->root);
1650
1651 if (isSuperTree()) {
1652 ((PhyloSuperTree*) this)->mapTrees();
1653 }
1654 if (params->pll) {
1655 pllReadNewick(getTreeString());
1656 }
1657
1658 clearAllPartialLH();
1659 resetCurScore();
1660 return getTreeString();
1661 }
1662
1663
doIQP()1664 void IQTree::doIQP() {
1665 if (verbose_mode >= VB_DEBUG)
1666 drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1667 //double time_begin = getCPUTime();
1668 PhyloNodeVector del_leaves;
1669 deleteLeaves(del_leaves);
1670 reinsertLeaves(del_leaves);
1671
1672 // just to make sure IQP does it right
1673 setAlignment(aln);
1674 if (params->pll) {
1675 pllReadNewick(getTreeString());
1676 }
1677
1678 resetCurScore();
1679 // lhComputed = false;
1680
1681 if (isSuperTree()) {
1682 ((PhyloSuperTree*) this)->mapTrees();
1683 }
1684
1685 // if (enable_parsimony) {
1686 // cur_pars_score = computeParsimony();
1687 // if (verbose_mode >= VB_MAX) {
1688 // cout << "IQP Likelihood = " << curScore << " Parsimony = " << cur_pars_score << endl;
1689 // }
1690 // }
1691 }
1692
inputTree2PLL(string treestring,bool computeLH)1693 double IQTree::inputTree2PLL(string treestring, bool computeLH) {
1694 double res = 0.0;
1695 // read in the tree string from IQTree kernel
1696 pllNewickTree *newick = pllNewickParseString(treestring.c_str());
1697 pllTreeInitTopologyNewick(pllInst, newick, PLL_FALSE);
1698 pllNewickParseDestroy(&newick);
1699 if (computeLH) {
1700 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
1701 res = pllInst->likelihood;
1702 }
1703 return res;
1704 }
1705
getModelRatesFromPLL()1706 double* IQTree::getModelRatesFromPLL() {
1707 ASSERT(aln->num_states == 4);
1708 int numberOfRates = (pllPartitions->partitionData[0]->states * pllPartitions->partitionData[0]->states
1709 - pllPartitions->partitionData[0]->states) / 2;
1710 double* rate_params = new double[numberOfRates];
1711 for (int i = 0; i < numberOfRates; i++) {
1712 rate_params[i] = pllPartitions->partitionData[0]->substRates[i];
1713 }
1714 return rate_params;
1715 }
1716
pllPrintModelParams()1717 void IQTree::pllPrintModelParams() {
1718 cout.precision(6);
1719 cout << fixed;
1720 for (int part = 0; part < pllPartitions->numberOfPartitions; part++) {
1721 cout << "Alpha[" << part << "]" << ": " << pllPartitions->partitionData[part]->alpha << endl;
1722 if (aln->num_states == 4) {
1723 int states, rates;
1724 states = pllPartitions->partitionData[part]->states;
1725 rates = ((states * states - states) / 2);
1726 cout << "Rates[" << part << "]: " << " ac ag at cg ct gt: ";
1727 for (int i = 0; i < rates; i++) {
1728 cout << pllPartitions->partitionData[part]->substRates[i] << " ";
1729 }
1730 cout << endl;
1731 cout << "Frequencies: ";
1732 for (int i = 0; i < 4; i++) {
1733 cout << pllPartitions->partitionData[part]->empiricalFrequencies[i] << " ";
1734 }
1735 cout << endl;
1736 }
1737 }
1738 cout.precision(3);
1739 cout << fixed;
1740 }
1741
getAlphaFromPLL()1742 double IQTree::getAlphaFromPLL() {
1743 return pllPartitions->partitionData[0]->alpha;
1744 }
1745
inputModelPLL2IQTree()1746 void IQTree::inputModelPLL2IQTree() {
1747 // TODO add support for partitioned model
1748 getRate()->setGammaShape(pllPartitions->partitionData[0]->alpha);
1749 if (aln->num_states == 4) {
1750 ((ModelMarkov*) getModel())->setRateMatrix(pllPartitions->partitionData[0]->substRates);
1751 getModel()->decomposeRateMatrix();
1752 }
1753 ((ModelMarkov*) getModel())->setStateFrequency(pllPartitions->partitionData[0]->empiricalFrequencies);
1754 }
1755
inputModelIQTree2PLL()1756 void IQTree::inputModelIQTree2PLL() {
1757 // get the alpha parameter
1758 double alpha = getRate()->getGammaShape();
1759 if (alpha == 0.0)
1760 alpha = PLL_ALPHA_MAX;
1761 if (aln->num_states == 4) {
1762 // get the rate parameters
1763 double *rate_param = new double[6];
1764 getModel()->getRateMatrix(rate_param);
1765 // get the state frequencies
1766 double *state_freqs = new double[aln->num_states];
1767 getModel()->getStateFrequency(state_freqs);
1768
1769 /* put them into PLL */
1770 stringstream linkagePattern;
1771 int partNr;
1772 for (partNr = 0; partNr < pllPartitions->numberOfPartitions - 1; partNr++) {
1773 linkagePattern << partNr << ",";
1774 }
1775 linkagePattern << partNr;
1776 char *pattern = new char[linkagePattern.str().length() + 1];
1777 strcpy(pattern, linkagePattern.str().c_str());
1778 pllLinkAlphaParameters(pattern, pllPartitions);
1779 pllLinkFrequencies(pattern, pllPartitions);
1780 pllLinkRates(pattern, pllPartitions);
1781 delete[] pattern;
1782
1783 for (partNr = 0; partNr < pllPartitions->numberOfPartitions; partNr++) {
1784 pllSetFixedAlpha(alpha, partNr, pllPartitions, pllInst);
1785 pllSetFixedBaseFrequencies(state_freqs, 4, partNr, pllPartitions, pllInst);
1786 pllSetFixedSubstitutionMatrix(rate_param, 6, partNr, pllPartitions, pllInst);
1787 }
1788 delete[] rate_param;
1789 delete[] state_freqs;
1790 } else if (aln->num_states == 20) {
1791 double *state_freqs = new double[aln->num_states];
1792 getModel()->getStateFrequency(state_freqs);
1793 int partNr;
1794 for (partNr = 0; partNr < pllPartitions->numberOfPartitions; partNr++) {
1795 pllSetFixedAlpha(alpha, partNr, pllPartitions, pllInst);
1796 pllSetFixedBaseFrequencies(state_freqs, 20, partNr, pllPartitions, pllInst);
1797 }
1798 delete[] state_freqs;
1799 } else {
1800 if (params->pll) {
1801 outError("Phylogenetic likelihood library current does not support data type other than DNA or Protein");
1802 }
1803 }
1804 }
1805
pllBuildIQTreePatternIndex()1806 void IQTree::pllBuildIQTreePatternIndex(){
1807 pll2iqtree_pattern_index = new int[pllAlignment->sequenceLength];
1808 char ** pll_aln = new char *[pllAlignment->sequenceCount];
1809 for(int i = 0; i < pllAlignment->sequenceCount; i++)
1810 pll_aln[i] = new char[pllAlignment->sequenceLength];
1811
1812 int pos;
1813 for(int i = 0; i < pllAlignment->sequenceCount; i++){
1814 pos = 0;
1815 for(int model = 0; model < pllPartitions->numberOfPartitions; model++){
1816 memcpy(&pll_aln[i][pos],
1817 &pllAlignment->sequenceData[i + 1][pllPartitions->partitionData[model]->lower],
1818 pllPartitions->partitionData[model]->width);
1819 pos += pllPartitions->partitionData[model]->width;
1820 }
1821 }
1822
1823 char * pll_site = new char[pllAlignment->sequenceCount + 1];
1824 char * site = new char[pllAlignment->sequenceCount + 1];
1825 for(int i = 0; i < pllAlignment->sequenceLength; i++){
1826 for(int j = 0; j < pllAlignment->sequenceCount; j++)
1827 pll_site[j]= pll_aln[j][i];
1828 pll_site[pllAlignment->sequenceCount] = '\0';
1829
1830 site[pllAlignment->sequenceCount] = '\0';
1831 for(int k = 0; k < aln->size(); k++){
1832 for(int p = 0; p < pllAlignment->sequenceCount; p++)
1833 site[p] = aln->convertStateBack(aln->at(k)[p]);
1834 pllBaseSubstitute(site, pllPartitions->partitionData[0]->dataType);
1835 if(memcmp(pll_site,site, pllAlignment->sequenceCount) == 0){
1836 pll2iqtree_pattern_index[i] = k;
1837 }
1838 }
1839 }
1840
1841 delete [] pll_site;
1842 delete [] site;
1843 for(int i = 0; i < pllAlignment->sequenceCount; i++)
1844 delete [] pll_aln[i];
1845 delete [] pll_aln;
1846 }
1847
1848
1849 /**
1850 * DTH:
1851 * Substitute bases in seq according to PLL's rules
1852 * This function should be updated if PLL's rules change.
1853 * @param seq: data of some sequence to be substituted
1854 * @param dataType: PLL_DNA_DATA or PLL_AA_DATA
1855 */
pllBaseSubstitute(char * seq,int dataType)1856 void IQTree::pllBaseSubstitute (char *seq, int dataType)
1857 {
1858 char meaningDNA[256];
1859 char meaningAA[256];
1860 char * d;
1861
1862 for (int i = 0; i < 256; ++ i)
1863 {
1864 meaningDNA[i] = -1;
1865 meaningAA[i] = -1;
1866 }
1867
1868 /* DNA data */
1869
1870 meaningDNA[(int)'A'] = 1;
1871 meaningDNA[(int)'B'] = 14;
1872 meaningDNA[(int)'C'] = 2;
1873 meaningDNA[(int)'D'] = 13;
1874 meaningDNA[(int)'G'] = 4;
1875 meaningDNA[(int)'H'] = 11;
1876 meaningDNA[(int)'K'] = 12;
1877 meaningDNA[(int)'M'] = 3;
1878 meaningDNA[(int)'R'] = 5;
1879 meaningDNA[(int)'S'] = 6;
1880 meaningDNA[(int)'T'] = 8;
1881 meaningDNA[(int)'U'] = 8;
1882 meaningDNA[(int)'V'] = 7;
1883 meaningDNA[(int)'W'] = 9;
1884 meaningDNA[(int)'Y'] = 10;
1885 meaningDNA[(int)'a'] = 1;
1886 meaningDNA[(int)'b'] = 14;
1887 meaningDNA[(int)'c'] = 2;
1888 meaningDNA[(int)'d'] = 13;
1889 meaningDNA[(int)'g'] = 4;
1890 meaningDNA[(int)'h'] = 11;
1891 meaningDNA[(int)'k'] = 12;
1892 meaningDNA[(int)'m'] = 3;
1893 meaningDNA[(int)'r'] = 5;
1894 meaningDNA[(int)'s'] = 6;
1895 meaningDNA[(int)'t'] = 8;
1896 meaningDNA[(int)'u'] = 8;
1897 meaningDNA[(int)'v'] = 7;
1898 meaningDNA[(int)'w'] = 9;
1899 meaningDNA[(int)'y'] = 10;
1900
1901 meaningDNA[(int)'N'] =
1902 meaningDNA[(int)'n'] =
1903 meaningDNA[(int)'O'] =
1904 meaningDNA[(int)'o'] =
1905 meaningDNA[(int)'X'] =
1906 meaningDNA[(int)'x'] =
1907 meaningDNA[(int)'-'] =
1908 meaningDNA[(int)'?'] = 15;
1909
1910 /* AA data */
1911
1912 meaningAA[(int)'A'] = 0; /* alanine */
1913 meaningAA[(int)'R'] = 1; /* arginine */
1914 meaningAA[(int)'N'] = 2; /* asparagine*/
1915 meaningAA[(int)'D'] = 3; /* aspartic */
1916 meaningAA[(int)'C'] = 4; /* cysteine */
1917 meaningAA[(int)'Q'] = 5; /* glutamine */
1918 meaningAA[(int)'E'] = 6; /* glutamic */
1919 meaningAA[(int)'G'] = 7; /* glycine */
1920 meaningAA[(int)'H'] = 8; /* histidine */
1921 meaningAA[(int)'I'] = 9; /* isoleucine */
1922 meaningAA[(int)'L'] = 10; /* leucine */
1923 meaningAA[(int)'K'] = 11; /* lysine */
1924 meaningAA[(int)'M'] = 12; /* methionine */
1925 meaningAA[(int)'F'] = 13; /* phenylalanine */
1926 meaningAA[(int)'P'] = 14; /* proline */
1927 meaningAA[(int)'S'] = 15; /* serine */
1928 meaningAA[(int)'T'] = 16; /* threonine */
1929 meaningAA[(int)'W'] = 17; /* tryptophan */
1930 meaningAA[(int)'Y'] = 18; /* tyrosine */
1931 meaningAA[(int)'V'] = 19; /* valine */
1932 meaningAA[(int)'B'] = 20; /* asparagine, aspartic 2 and 3*/
1933 meaningAA[(int)'Z'] = 21; /*21 glutamine glutamic 5 and 6*/
1934 meaningAA[(int)'a'] = 0; /* alanine */
1935 meaningAA[(int)'r'] = 1; /* arginine */
1936 meaningAA[(int)'n'] = 2; /* asparagine*/
1937 meaningAA[(int)'d'] = 3; /* aspartic */
1938 meaningAA[(int)'c'] = 4; /* cysteine */
1939 meaningAA[(int)'q'] = 5; /* glutamine */
1940 meaningAA[(int)'e'] = 6; /* glutamic */
1941 meaningAA[(int)'g'] = 7; /* glycine */
1942 meaningAA[(int)'h'] = 8; /* histidine */
1943 meaningAA[(int)'i'] = 9; /* isoleucine */
1944 meaningAA[(int)'l'] = 10; /* leucine */
1945 meaningAA[(int)'k'] = 11; /* lysine */
1946 meaningAA[(int)'m'] = 12; /* methionine */
1947 meaningAA[(int)'f'] = 13; /* phenylalanine */
1948 meaningAA[(int)'p'] = 14; /* proline */
1949 meaningAA[(int)'s'] = 15; /* serine */
1950 meaningAA[(int)'t'] = 16; /* threonine */
1951 meaningAA[(int)'w'] = 17; /* tryptophan */
1952 meaningAA[(int)'y'] = 18; /* tyrosine */
1953 meaningAA[(int)'v'] = 19; /* valine */
1954 meaningAA[(int)'b'] = 20; /* asparagine, aspartic 2 and 3*/
1955 meaningAA[(int)'z'] = 21; /*21 glutamine glutamic 5 and 6*/
1956
1957 meaningAA[(int)'X'] =
1958 meaningAA[(int)'x'] =
1959 meaningAA[(int)'?'] =
1960 meaningAA[(int)'*'] =
1961 meaningAA[(int)'-'] = 22;
1962
1963 d = (dataType == PLL_DNA_DATA) ? meaningDNA : meaningAA;
1964 int seq_len = strlen(seq);
1965 for (int i = 0; i < seq_len; ++ i)
1966 {
1967 seq[i] = d[(int)seq[i]];
1968 }
1969 }
1970
swapTaxa(PhyloNode * node1,PhyloNode * node2)1971 double IQTree::swapTaxa(PhyloNode *node1, PhyloNode *node2) {
1972 ASSERT(node1->isLeaf());
1973 ASSERT(node2->isLeaf());
1974
1975 PhyloNeighbor *node1nei = (PhyloNeighbor*) *(node1->neighbors.begin());
1976 PhyloNeighbor *node2nei = (PhyloNeighbor*) *(node2->neighbors.begin());
1977
1978 node2nei->node->updateNeighbor(node2, node1);
1979 node1nei->node->updateNeighbor(node1, node2);
1980
1981 // Update the new neightbors of the 2 nodes
1982 node1->updateNeighbor(node1->neighbors.begin(), node2nei);
1983 node2->updateNeighbor(node2->neighbors.begin(), node1nei);
1984
1985 PhyloNeighbor *node1NewNei = (PhyloNeighbor*) *(node1->neighbors.begin());
1986 PhyloNeighbor *node2NewNei = (PhyloNeighbor*) *(node2->neighbors.begin());
1987
1988 // Reoptimize the branch lengths
1989 optimizeOneBranch(node1, (PhyloNode*) node1NewNei->node);
1990 // this->curScore = optimizeOneBranch(node2, (PhyloNode*) node2NewNei->node);
1991 optimizeOneBranch(node2, (PhyloNode*) node2NewNei->node);
1992 //drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
1993 this->curScore = computeLikelihoodFromBuffer();
1994 return this->curScore;
1995 }
1996
perturb(int times)1997 double IQTree::perturb(int times) {
1998 while (times > 0) {
1999 NodeVector taxa;
2000 // get the vector of taxa
2001 getTaxa(taxa);
2002 int taxonid1 = random_int(taxa.size());
2003 PhyloNode *taxon1 = (PhyloNode*) taxa[taxonid1];
2004 PhyloNode *taxon2;
2005 int *dists = new int[taxa.size()];
2006 int minDist = 1000000;
2007 for (int i = 0; i < taxa.size(); i++) {
2008 if (i == taxonid1)
2009 continue;
2010 taxon2 = (PhyloNode*) taxa[i];
2011 int dist = taxon1->calDist(taxon2);
2012 dists[i] = dist;
2013 if (dist >= 7 && dist < minDist)
2014 minDist = dist;
2015 }
2016
2017 int taxonid2 = -1;
2018 for (int i = 0; i < taxa.size(); i++) {
2019 if (dists[i] == minDist)
2020 taxonid2 = i;
2021 }
2022
2023 taxon2 = (PhyloNode*) taxa[taxonid2];
2024
2025 cout << "Swapping node " << taxon1->id << " and node " << taxon2->id << endl;
2026 cout << "Distance " << minDist << endl;
2027 curScore = swapTaxa(taxon1, taxon2);
2028 //taxa.erase( taxa.begin() + taxaID1 );
2029 //taxa.erase( taxa.begin() + taxaID2 -1 );
2030
2031 times--;
2032 delete[] dists;
2033 }
2034 curScore = optimizeAllBranches(1);
2035 return curScore;
2036 }
2037
2038 //extern "C" pllUFBootData * pllUFBootDataPtr;
2039 extern pllUFBootData * pllUFBootDataPtr;
2040
optimizeModelParameters(bool printInfo,double logl_epsilon)2041 string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
2042 if (logl_epsilon == -1)
2043 logl_epsilon = params->modelEps;
2044 cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl;
2045 double stime = getRealTime();
2046 string newTree;
2047 if (params->pll) {
2048 if (curScore == -DBL_MAX) {
2049 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2050 } else {
2051 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_FALSE, PLL_FALSE);
2052 }
2053 pllOptimizeModelParameters(pllInst, pllPartitions, logl_epsilon);
2054 curScore = pllInst->likelihood;
2055 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
2056 pllInst->start->back, PLL_TRUE,
2057 PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH,
2058 PLL_FALSE, PLL_FALSE);
2059 if (printInfo) {
2060 pllPrintModelParams();
2061 }
2062 newTree = string(pllInst->tree_string);
2063 double etime = getRealTime();
2064 if (printInfo)
2065 cout << etime - stime << " seconds (logl: " << curScore << ")" << endl;
2066 } else {
2067 double modOptScore;
2068 if (params->opt_gammai) { // DO RESTART ON ALPHA AND P_INVAR
2069 modOptScore = getModelFactory()->optimizeParametersGammaInvar(params->fixed_branch_length, printInfo, logl_epsilon);
2070 params->opt_gammai = false;
2071 } else {
2072 modOptScore = getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon);
2073 }
2074
2075 if (isSuperTree()) {
2076 ((PhyloSuperTree*) this)->computeBranchLengths();
2077 }
2078 if (getModelFactory()->isUnstableParameters() && aln->seq_type != SEQ_CODON) {
2079 cout << endl;
2080 outWarning("Estimated model parameters are at boundary that can cause numerical instability!");
2081 cout << endl;
2082 }
2083
2084 if (modOptScore < curScore - 1.0 && params->partition_type != TOPO_UNLINKED) {
2085 cout << " BUG: Tree logl gets worse after model optimization!" << endl;
2086 cout << " Old logl: " << curScore << " / " << "new logl: " << modOptScore << endl;
2087 printTree("debug.tree");
2088 abort();
2089 } else {
2090 curScore = modOptScore;
2091 newTree = getTreeString();
2092 }
2093 if (params->print_trees_site_posterior)
2094 computePatternCategories();
2095 }
2096
2097 return newTree;
2098 }
2099
printBestScores()2100 void IQTree::printBestScores() {
2101 vector<double> bestScores = candidateTrees.getBestScores(params->popSize);
2102 for (vector<double>::iterator it = bestScores.begin(); it != bestScores.end(); it++)
2103 cout << (*it) << " ";
2104 cout << endl;
2105 }
2106
computeLogL()2107 double IQTree::computeLogL() {
2108 if (params->pll) {
2109 if (curScore == -DBL_MAX) {
2110 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2111 } else {
2112 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_FALSE, PLL_FALSE);
2113 }
2114 curScore = pllInst->likelihood;
2115 } else {
2116 // if (!lhComputed) {
2117 // initializeAllPartialLh();
2118 // clearAllPartialLH();
2119 // }
2120 curScore = computeLikelihood();
2121 }
2122 return curScore;
2123 }
2124
optimizeBranches(int maxTraversal)2125 string IQTree::optimizeBranches(int maxTraversal) {
2126 string tree;
2127 if (params->pll) {
2128 if (curScore == -DBL_MAX) {
2129 pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2130 // lhComputed = true;
2131 }
2132 pllOptimizeBranchLengths(pllInst, pllPartitions, maxTraversal);
2133 curScore = pllInst->likelihood;
2134 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE,
2135 PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
2136 tree = string(pllInst->tree_string);
2137 } else {
2138 // if (!lhComputed) {
2139 // initializeAllPartialLh();
2140 // clearAllPartialLH();
2141 // lhComputed = true;
2142 // }
2143 curScore = optimizeAllBranches(maxTraversal, params->loglh_epsilon, PLL_NEWZPERCYCLE);
2144 tree = getTreeString();
2145 }
2146 return tree;
2147 }
2148
doTreeSearch()2149 double IQTree::doTreeSearch() {
2150
2151 if (params->numInitTrees > 1) {
2152 cout << "--------------------------------------------------------------------" << endl;
2153 cout << "| INITIALIZING CANDIDATE TREE SET |" << endl;
2154 cout << "--------------------------------------------------------------------" << endl;
2155 }
2156
2157 double initCPUTime = getRealTime();
2158 int treesPerProc = (params->numInitTrees) / MPIHelper::getInstance().getNumProcesses() - candidateTrees.size();
2159 if (params->numInitTrees % MPIHelper::getInstance().getNumProcesses() != 0) {
2160 treesPerProc++;
2161 }
2162 if (treesPerProc < 0)
2163 treesPerProc = 0;
2164 // Master node does one tree less because it already created the BIONJ tree
2165 // if (MPIHelper::getInstance().isMaster()) {
2166 // treesPerProc--;
2167 // }
2168
2169 // Make sure to get at least 1 tree
2170 if (treesPerProc < 1 && params->numInitTrees > candidateTrees.size())
2171 treesPerProc = 1;
2172
2173 /* Initialize candidate tree set */
2174 if (!getCheckpoint()->getBool("finishedCandidateSet")) {
2175 initCandidateTreeSet(treesPerProc, params->numNNITrees);
2176 // write best tree to disk
2177 printBestCandidateTree();
2178 saveCheckpoint();
2179 getCheckpoint()->putBool("finishedCandidateSet", true);
2180 getCheckpoint()->dump();
2181 } else {
2182 cout << "CHECKPOINT: Candidate tree set restored, best LogL: " << candidateTrees.getBestScore() << endl;
2183 }
2184 ASSERT(candidateTrees.size() != 0);
2185 cout << "Finish initializing candidate tree set (" << candidateTrees.size() << ")" << endl;
2186
2187
2188 cout << "Current best tree score: " << candidateTrees.getBestScore() << " / CPU time: " <<
2189 getRealTime() - initCPUTime << endl;
2190 cout << "Number of iterations: " << stop_rule.getCurIt() << endl;
2191
2192 // string treels_name = params->out_prefix;
2193 // treels_name += ".treels";
2194 // string out_lh_file = params->out_prefix;
2195 // out_lh_file += ".treelh";
2196 // string site_lh_file = params->out_prefix;
2197 // site_lh_file += ".sitelh";
2198 //
2199 // if (params->print_tree_lh) {
2200 // out_treelh.open(out_lh_file.c_str());
2201 // out_sitelh.open(site_lh_file.c_str());
2202 // }
2203
2204 // if (params->write_intermediate_trees)
2205 // out_treels.open(treels_name.c_str());
2206
2207 // if (params->write_intermediate_trees && save_all_trees != 2) {
2208 // printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2209 // }
2210
2211 setRootNode(params->root);
2212
2213 if (!getCheckpoint()->getBool("finishedCandidateSet"))
2214 cout << "CHECKPOINT: " << stop_rule.getCurIt() << " search iterations restored" << endl;
2215
2216 searchinfo.curPerStrength = params->initPS;
2217 double cur_correlation = 0.0;
2218
2219
2220 if ((Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation) && candidateTrees.size() > 1) {
2221 candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
2222 }
2223
2224 // tracking of worker candidate set is changed from master candidate set
2225 candidateset_changed.resize(MPIHelper::getInstance().getNumProcesses(), false);
2226 bestcandidate_changed = false;
2227
2228 /*==============================================================================================================
2229 MAIN LOOP OF THE IQ-TREE ALGORITHM
2230 *=============================================================================================================*/
2231
2232 bool early_stop = stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation);
2233 if (!early_stop) {
2234 cout << "--------------------------------------------------------------------" << endl;
2235 cout << "| OPTIMIZING CANDIDATE TREE SET |" << endl;
2236 cout << "--------------------------------------------------------------------" << endl;
2237 }
2238
2239 // count threshold for computing bootstrap correlation
2240 int ufboot_count, ufboot_count_check;
2241 stop_rule.getUFBootCountCheck(ufboot_count, ufboot_count_check);
2242
2243 while (!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
2244
2245 searchinfo.curIter = stop_rule.getCurIt();
2246 // estimate logl_cutoff for bootstrap
2247 if (!boot_orig_logl.empty())
2248 logl_cutoff = *min_element(boot_orig_logl.begin(), boot_orig_logl.end());
2249
2250 if (estimate_nni_cutoff && nni_info.size() >= 500) {
2251 estimate_nni_cutoff = false;
2252 estimateNNICutoff(params);
2253 }
2254
2255 Alignment *saved_aln = aln;
2256
2257 string curTree;
2258 /*----------------------------------------
2259 * Perturb the tree
2260 *---------------------------------------*/
2261 doTreePerturbation();
2262
2263 /*----------------------------------------
2264 * Optimize tree with NNI
2265 *----------------------------------------*/
2266 pair<int, int> nniInfos; // <num_NNIs, num_steps>
2267 nniInfos = doNNISearch();
2268 curTree = getTreeString();
2269 int pos = addTreeToCandidateSet(curTree, curScore, true, MPIHelper::getInstance().getProcessID());
2270 if (pos != -2 && pos != -1 && (Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation))
2271 candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
2272
2273 if (MPIHelper::getInstance().isWorker() || MPIHelper::getInstance().gotMessage())
2274 syncCurrentTree();
2275
2276
2277 // TODO: cannot check yet, need to somehow return treechanged
2278 // if (nni_count == 0 && params->snni && numPerturb > 0 && treechanged) {
2279 // assert(0 && "BUG: NNI could not improved perturbed tree");
2280 // }
2281 //
2282 if (iqp_assess_quartet == IQP_BOOTSTRAP) {
2283 // restore alignment
2284 delete aln;
2285 setAlignment(saved_aln);
2286 initializeAllPartialLh();
2287 clearAllPartialLH();
2288 }
2289
2290 if (isSuperTree()) {
2291 ((PhyloSuperTree *) this)->computeBranchLengths();
2292 }
2293
2294 /*----------------------------------------
2295 * Print information
2296 *---------------------------------------*/
2297 //printInterationInfo();
2298
2299 // if (params->write_intermediate_trees && save_all_trees != 2) {
2300 // printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2301 // }
2302
2303 if (params->snni && verbose_mode >= VB_DEBUG) {
2304 printBestScores();
2305 }
2306
2307 // DTH: make pllUFBootData usable in summarizeBootstrap
2308 if (params->pll && params->online_bootstrap && (params->gbo_replicates > 0))
2309 pllConvertUFBootData2IQTree();
2310 // DTH: Carefully watch the -pll case here
2311
2312 /*----------------------------------------
2313 * convergence criterion for ultrafast bootstrap
2314 *---------------------------------------*/
2315
2316
2317 // MASTER receives bootstrap trees and perform stop convergence test
2318 if ((stop_rule.getCurIt()) >= ufboot_count &&
2319 params->stop_condition == SC_BOOTSTRAP_CORRELATION && MPIHelper::getInstance().isMaster()) {
2320 ufboot_count += params->step_iterations/2;
2321 // compute split support every half step
2322 SplitGraph *sg = new SplitGraph;
2323 summarizeBootstrap(*sg);
2324 sg->removeTrivialSplits();
2325 sg->setCheckpoint(checkpoint);
2326 boot_splits.push_back(sg);
2327 cout << "Log-likelihood cutoff on original alignment: " << logl_cutoff << endl;
2328 // MPIHelper::getInstance().sendMsg(LOGL_CUTOFF_TAG, convertDoubleToString(logl_cutoff));
2329
2330 // check convergence every full step
2331 if (stop_rule.getCurIt() >= ufboot_count_check) {
2332 ufboot_count_check += params->step_iterations;
2333 cur_correlation = computeBootstrapCorrelation();
2334 cout << "NOTE: Bootstrap correlation coefficient of split occurrence frequencies: " <<
2335 cur_correlation << endl;
2336 if (!stop_rule.meetCorrelation(cur_correlation)) {
2337 cout << "NOTE: UFBoot does not converge, continue at least " << params->step_iterations << " more iterations" << endl;
2338 }
2339 }
2340 if (params->gbo_replicates && params->online_bootstrap && params->print_ufboot_trees)
2341 writeUFBootTrees(*params);
2342
2343 } // end of bootstrap convergence test
2344
2345 // print UFBoot trees every 10 iterations
2346
2347 saveCheckpoint();
2348 checkpoint->dump();
2349
2350 if (bestcandidate_changed) {
2351 printBestCandidateTree();
2352 bestcandidate_changed = false;
2353 }
2354
2355 //if (params->partition_type)
2356 // ((PhyloSuperTreePlen*)this)->printNNIcasesNUM();
2357
2358 }
2359
2360 // 2019-06-03: check convergence here to avoid effect of refineBootTrees
2361 if (boot_splits.size() >= 2 && MPIHelper::getInstance().isMaster()) {
2362 // check the stopping criterion for ultra-fast bootstrap
2363 if (computeBootstrapCorrelation() < params->min_correlation)
2364 cout << "WARNING: bootstrap analysis did not converge. You should rerun with higher number of iterations (-nm option)" << endl;
2365
2366 }
2367
2368 if(params->ufboot2corr) refineBootTrees();
2369
2370 if (!early_stop)
2371 sendStopMessage();
2372
2373 readTreeString(candidateTrees.getBestTreeStrings()[0]);
2374
2375 if (testNNI)
2376 outNNI.close();
2377 if (params->write_intermediate_trees)
2378 out_treels.close();
2379 if (params->print_tree_lh) {
2380 out_treelh.close();
2381 out_sitelh.close();
2382 }
2383
2384 // DTH: pllUFBoot deallocation
2385 if (params->pll) {
2386 pllDestroyUFBootData();
2387 }
2388
2389 #ifdef _IQTREE_MPI
2390 cout << "Total number of trees received: " << MPIHelper::getInstance().getNumTreeReceived() << endl;
2391 cout << "Total number of trees sent: " << MPIHelper::getInstance().getNumTreeSent() << endl;
2392 cout << "Total number of NNI searches done by myself: " << MPIHelper::getInstance().getNumNNISearch() << endl;
2393 MPIHelper::getInstance().resetNumbers();
2394 #endif
2395
2396 cout << "TREE SEARCH COMPLETED AFTER " << stop_rule.getCurIt() << " ITERATIONS"
2397 << " / Time: " << convert_time(getRealTime() - params->start_real_time) << endl << endl;
2398
2399 return candidateTrees.getBestScore();
2400
2401 }
2402
2403
2404 /*
2405 void IQTree::refineBootTrees(){
2406 on_refine_btree = true;
2407 int num_boot_rep = params->gbo_replicates;
2408 params->gbo_replicates = 0;
2409 save_all_trees = 0;
2410
2411 NNI_Type saved_nni_type = params->nni_type;
2412 if(params->u2c_nni5 == false){
2413 params->nni5 = false;
2414 params->nni_type = NNI1;
2415 }else{
2416 params->nni5 = true;
2417 params->nni_type = NNI5;
2418 }
2419
2420 string saved_tree = getTreeString();
2421 saved_aln_on_refine_btree = aln;
2422
2423 int nptn = getAlnNPattern();
2424 cout << "npn = " << nptn << endl;
2425
2426 string tree;
2427 Alignment * bootstrap_aln;
2428
2429 int *saved_randstream = randstream;
2430 init_random(params->ran_seed);
2431
2432 string file_name = params->out_prefix;
2433 file_name += ".binfo";
2434 ofstream binfo(file_name.c_str());
2435
2436 file_name = params->out_prefix;
2437 file_name += ".btree.pre";
2438 ofstream btreep(file_name.c_str(), ios::app);
2439
2440 file_name = params->out_prefix;
2441 file_name += ".btree.aft";
2442 ofstream btreea(file_name.c_str(), ios::app);
2443
2444 file_name = params->out_prefix;
2445 file_name += ".btree.pre.brlen";
2446 ofstream btreep_brlen(file_name.c_str(), ios::app);
2447
2448 file_name = params->out_prefix;
2449 file_name += ".btree.aft.brlen";
2450 ofstream btreea_brlen(file_name.c_str(), ios::app);
2451
2452 file_name = params->out_prefix;
2453 file_name += ".refine.aln";
2454 ofstream refine_aln(file_name.c_str(), ios::app);
2455
2456
2457 if(!isSuperTree()){
2458 getModelFactory()->model->writeInfo(binfo);
2459 getModelFactory()->site_rate->writeInfo(binfo);
2460 }else{
2461 ((PhyloSuperTree *)this)->at(0)->getModelFactory()->model->writeInfo(binfo);
2462 ((PhyloSuperTree *)this)->at(0)->getModelFactory()->site_rate->writeInfo(binfo);
2463 }
2464
2465 PhyloSuperTree *stree = (isSuperTree()) ? (PhyloSuperTree*)this : NULL;
2466
2467 for(int sample = 0; sample < num_boot_rep; sample++){
2468 if ((sample+1) % 100 == 0)
2469 cout << sample+1 << " replicates done" << endl;
2470
2471 if (saved_aln_on_refine_btree->isSuperAlignment())
2472 bootstrap_aln = new SuperAlignment;
2473 else
2474 bootstrap_aln = new Alignment;
2475
2476 // bootstrap_aln->buildFromPatternFreq(*saved_aln_on_refine_btree, boot_samples_int[sample]);
2477 IntVector this_sample;
2478 bootstrap_aln->createBootstrapAlignment(saved_aln_on_refine_btree, &this_sample, params->bootstrap_spec);
2479
2480 if (isSuperTree())
2481 stree->setSuperAlignment(bootstrap_aln);
2482 else
2483 setAlignment(bootstrap_aln);
2484
2485 for(int k = 0; k < nptn; k++){
2486 if(boot_samples_int[sample][k] != this_sample[k]){
2487 assert(0 && "Not identical");
2488 }
2489 }
2490
2491 // bootstrap_aln->createBootstrapAlignment(saved_aln_on_refine_btree, NULL, params->bootstrap_spec);
2492 ptn_freq_computed = false;
2493 computePtnFreq();
2494 if (isSuperTree()) {
2495 for (auto it = stree->begin(); it != stree->end(); it++) {
2496 (*it)->ptn_freq_computed = false;
2497 (*it)->computePtnFreq();
2498 }
2499 }
2500
2501
2502 tree = boot_trees[sample];
2503 // Read the bootstrap tree
2504 // cout << tree << endl;
2505
2506 readTreeString(tree);
2507
2508
2509 if(!isSuperTree())
2510 aln->printPhylip(refine_aln, true);
2511 else
2512 ((SuperAlignment *) aln)->printCombinedAlignment(refine_aln, true);
2513
2514 setRootNode(params->root);
2515
2516 if (isSuperTree()){
2517 wrapperFixNegativeBranch(true);
2518 ((PhyloSuperTree*) this)->mapTrees();
2519 }
2520 else{
2521 initializeAllPartialLh();
2522 clearAllPartialLH();
2523 wrapperFixNegativeBranch(false);
2524 }
2525
2526
2527 // printTree(cout);
2528 // cout << endl;
2529 optimizeBranches();
2530 // printTree(cout);
2531 // cout << endl;
2532
2533 curScore = computeLogL();
2534 binfo << sample << "\t" << curScore;
2535
2536 printTree(btreep, WT_NEWLINE | WT_SORT_TAXA);
2537 printTree(btreep_brlen, WT_NEWLINE | WT_SORT_TAXA | WT_BR_LEN);
2538
2539 pair<int, int> nniInfos; // <num_NNIs, num_steps>
2540 nniInfos = doNNISearch();
2541 curScore = computeLogL();
2542 binfo << "\t" << curScore << endl;
2543
2544 stringstream ostr;
2545 printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
2546 tree = ostr.str();
2547 boot_trees[sample] = getTreeString();
2548 boot_logl[sample] = curScore;
2549
2550 printTree(btreea, WT_NEWLINE | WT_SORT_TAXA);
2551 printTree(btreea_brlen, WT_NEWLINE | WT_SORT_TAXA | WT_BR_LEN);
2552 delete aln;
2553 }
2554
2555 binfo.close();
2556 btreep.close();
2557 btreea.close();
2558
2559 // delete aln;
2560
2561 // restore randstream
2562 finish_random();
2563 randstream = saved_randstream;
2564
2565 // Recover the last status of IQTREE
2566
2567 params->gbo_replicates = num_boot_rep;
2568
2569 if (isSuperTree())
2570 stree->setSuperAlignment(saved_aln_on_refine_btree);
2571 else
2572 setAlignment(saved_aln_on_refine_btree);
2573
2574 readTreeString(saved_tree);
2575
2576 ptn_freq_computed = false;
2577 computePtnFreq();
2578 if (isSuperTree()) {
2579 for (auto it = stree->begin(); it != stree->end(); it++) {
2580 (*it)->ptn_freq_computed = false;
2581 (*it)->computePtnFreq();
2582 }
2583 }
2584
2585 if(params->u2c_nni5 == false){
2586 params->nni_type = saved_nni_type;
2587 if(params->nni_type == NNI5)
2588 params->nni5 = true;
2589 }
2590
2591 initializeAllPartialLh();
2592 clearAllPartialLH();
2593 curScore = optimizeAllBranches();
2594
2595 save_all_trees = 2;
2596 on_refine_btree = false;
2597 }
2598 */
2599
2600 /**********************************************************
2601 * STANDARD NON-PARAMETRIC BOOTSTRAP
2602 ***********************************************************/
refineBootTrees()2603 void IQTree::refineBootTrees() {
2604
2605 int *saved_randstream = randstream;
2606 init_random(params->ran_seed);
2607
2608 params->gbo_replicates = 0;
2609
2610 NNI_Type saved_nni_type = params->nni_type;
2611 // TODO: A bug in PhyloSuperTreePlen::swapNNIBranch by nni1
2612 // Thus always turn on -nni5 by PhyloSuperTreePlen
2613 if(params->u2c_nni5 == false && (!isSuperTree() || params->partition_type == BRLEN_OPTIMIZE)) {
2614 params->nni5 = false;
2615 params->nni_type = NNI1;
2616 }else{
2617 params->nni5 = true;
2618 params->nni_type = NNI5;
2619 }
2620
2621 cout << "Refining ufboot trees with NNI ";
2622 if (params->nni5)
2623 cout << "5 branches..." << endl;
2624 else
2625 cout << "1 branch..." << endl;
2626
2627 int refined_trees = 0;
2628
2629 int refined_samples = 0;
2630
2631 checkpoint->startStruct("UFBoot");
2632 if (CKP_RESTORE(refined_samples))
2633 cout << "CHECKPOINT: " << refined_samples << " refined samples restored" << endl;
2634 checkpoint->endStruct();
2635
2636 // 2018-08-17: delete duplicated memory
2637 deleteAllPartialLh();
2638
2639 ModelsBlock *models_block = readModelsDefinition(*params);
2640
2641 // do bootstrap analysis
2642 for (int sample = refined_samples; sample < boot_trees.size(); sample++) {
2643 // create bootstrap alignment
2644 Alignment* bootstrap_alignment;
2645 if (aln->isSuperAlignment())
2646 bootstrap_alignment = new SuperAlignment;
2647 else
2648 bootstrap_alignment = new Alignment;
2649 bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
2650
2651 // create bootstrap tree
2652 IQTree *boot_tree;
2653 if (aln->isSuperAlignment()){
2654 if(params->partition_type != BRLEN_OPTIMIZE){
2655 boot_tree = new PhyloSuperTreePlen((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) this);
2656 } else {
2657 boot_tree = new PhyloSuperTree((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) this);
2658 }
2659 } else {
2660 // allocate heterotachy tree if neccessary
2661 int pos = posRateHeterotachy(aln->model_name);
2662
2663 if (params->num_mixlen > 1) {
2664 boot_tree = new PhyloTreeMixlen(bootstrap_alignment, params->num_mixlen);
2665 } else if (pos != string::npos) {
2666 boot_tree = new PhyloTreeMixlen(bootstrap_alignment, 0);
2667 } else
2668 boot_tree = new IQTree(bootstrap_alignment);
2669 }
2670
2671 boot_tree->on_refine_btree = true;
2672 boot_tree->save_all_trees = 0;
2673
2674 // initialize constraint tree
2675 if (!constraintTree.empty()) {
2676 boot_tree->constraintTree.readConstraint(constraintTree);
2677 }
2678
2679 boot_tree->setParams(params);
2680
2681 // 2019-06-03: bug fix setting part_info properly
2682 if (boot_tree->isSuperTree())
2683 ((PhyloSuperTree*)boot_tree)->setPartInfo((PhyloSuperTree*)this);
2684
2685 // copy model
2686 // BQM 2019-05-31: bug fix with -bsam option
2687 boot_tree->initializeModel(*params, aln->model_name, models_block);
2688 boot_tree->getModelFactory()->setCheckpoint(getCheckpoint());
2689 if (isSuperTree())
2690 ((PartitionModel*)boot_tree->getModelFactory())->PartitionModel::restoreCheckpoint();
2691 else
2692 boot_tree->getModelFactory()->restoreCheckpoint();
2693
2694 // set likelihood kernel
2695 boot_tree->setParams(params);
2696 boot_tree->setLikelihoodKernel(sse);
2697 boot_tree->setNumThreads(num_threads);
2698
2699 // load the current ufboot tree
2700 // 2019-02-06: fix crash with -sp and -bnni
2701 if (isSuperTree())
2702 boot_tree->PhyloTree::readTreeString(boot_trees[sample]);
2703 else
2704 boot_tree->readTreeString(boot_trees[sample]);
2705
2706 if (boot_tree->isSuperTree() && params->partition_type == BRLEN_OPTIMIZE) {
2707 if (((PhyloSuperTree*)boot_tree)->size() > 1) {
2708 // re-initialize branch lengths for unlinked model
2709 boot_tree->wrapperFixNegativeBranch(true);
2710 }
2711 }
2712
2713 // TODO: check if this resolves the crash in reorientPartialLh()
2714 boot_tree->initializeAllPartialLh();
2715
2716 // just in case some branch lengths are negative
2717 if (int num_neg = boot_tree->wrapperFixNegativeBranch(false))
2718 outWarning("Bootstrap tree " + convertIntToString(sample+1) + " has " +
2719 convertIntToString(num_neg) + "non-positive branch lengths");
2720
2721 // REMARK: branch lengths were estimated from original alignments
2722 // for bootstrap_alignment, they still thus need to be reoptimized a bit
2723 boot_tree->optimizeBranches(2);
2724
2725
2726 auto num_nnis = boot_tree->doNNISearch();
2727 if (num_nnis.second != 0)
2728 refined_trees++;
2729
2730 if (verbose_mode >= VB_MED) {
2731 cout << "UFBoot tree " << sample+1 << ": " << boot_logl[sample] << " -> " << boot_tree->getCurScore() << endl;
2732 }
2733
2734 stringstream ostr;
2735 if (params->print_ufboot_trees == 2)
2736 boot_tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_LEN_SHORT);
2737 else
2738 boot_tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
2739 boot_trees[sample] = ostr.str();
2740 boot_logl[sample] = boot_tree->curScore;
2741
2742
2743 // delete memory
2744 //boot_tree->setModelFactory(NULL);
2745 boot_tree->save_all_trees = 2;
2746
2747 bootstrap_alignment = boot_tree->aln;
2748 delete boot_tree;
2749 // fix bug: bootstrap_alignment might be changed
2750 delete bootstrap_alignment;
2751
2752
2753 if ((sample+1) % 100 == 0)
2754 cout << sample+1 << " samples done" << endl;
2755
2756 saveCheckpoint();
2757 checkpoint->startStruct("UFBoot");
2758 refined_samples = sample;
2759 CKP_SAVE(refined_samples);
2760 checkpoint->endStruct();
2761
2762 checkpoint->dump();
2763
2764 }
2765
2766 delete models_block;
2767
2768 cout << "Total " << refined_trees << " ufboot trees refined" << endl;
2769
2770 // restore randstream
2771 finish_random();
2772 randstream = saved_randstream;
2773
2774 SplitGraph *sg = new SplitGraph;
2775 summarizeBootstrap(*sg);
2776 sg->removeTrivialSplits();
2777 sg->setCheckpoint(checkpoint);
2778 boot_splits.push_back(sg);
2779
2780 saveCheckpoint();
2781 checkpoint->dump();
2782
2783 // restore
2784 params->gbo_replicates = boot_trees.size();
2785 params->nni_type = saved_nni_type;
2786 if(params->nni_type == NNI5) {
2787 params->nni5 = true;
2788 } else
2789 params->nni5 = false;
2790
2791 // 2018-08-17: recover memory
2792 initializeAllPartialLh();
2793
2794 }
2795
2796
printIterationInfo(int sourceProcID)2797 void IQTree::printIterationInfo(int sourceProcID) {
2798 double realtime_remaining = stop_rule.getRemainingTime(stop_rule.getCurIt());
2799 cout.setf(ios_base::fixed, ios_base::floatfield);
2800
2801 // only print every 10th iteration or high verbose mode
2802 if (stop_rule.getCurIt() % 10 == 0 || verbose_mode >= VB_MED) {
2803 cout << ((iqp_assess_quartet == IQP_BOOTSTRAP) ? "Bootstrap " : "Iteration ") << stop_rule.getCurIt() <<
2804 " / LogL: ";
2805 cout << curScore;
2806 cout << " / Time: " << convert_time(getRealTime() - params->start_real_time);
2807
2808 if (stop_rule.getCurIt() > 20) {
2809 cout << " (" << convert_time(realtime_remaining) << " left)";
2810 }
2811 if (MPIHelper::getInstance().getNumProcesses() > 1)
2812 cout << " / Process: " << sourceProcID;
2813 cout << endl;
2814 }
2815 }
2816
2817 //void IQTree::estimateLoglCutoffBS() {
2818 // if (params->avoid_duplicated_trees && max_candidate_trees > 0 && treels_logl.size() > 1000) {
2819 // int predicted_iteration;
2820 // predicted_iteration = ((stop_rule.getCurIt() + params->step_iterations - 1) / params->step_iterations)
2821 // * params->step_iterations;
2822 // int num_entries = (int) floor(max_candidate_trees * ((double) stop_rule.getCurIt() / predicted_iteration));
2823 // if (num_entries < treels_logl.size() * 0.9) {
2824 // DoubleVector logl = treels_logl;
2825 // nth_element(logl.begin(), logl.begin() + (treels_logl.size() - num_entries), logl.end());
2826 // logl_cutoff = logl[treels_logl.size() - num_entries] - 1.0;
2827 // } else
2828 // logl_cutoff = 0.0;
2829 // if (verbose_mode >= VB_MED) {
2830 // if (stop_rule.getCurIt() % 10 == 0) {
2831 // cout << treels.size() << " trees, " << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
2832 // if (params->store_candidate_trees)
2833 // cout << " duplicates= " << duplication_counter << " ("
2834 // << (int) round(100 * ((double) duplication_counter / treels_logl.size())) << "%)" << endl;
2835 // else
2836 // cout << endl;
2837 // }
2838 // }
2839 // }
2840 //}
2841
2842
doTreePerturbation()2843 double IQTree::doTreePerturbation() {
2844 if (iqp_assess_quartet == IQP_BOOTSTRAP) {
2845 // create bootstrap sample
2846 Alignment *bootstrap_alignment;
2847 if (aln->isSuperAlignment())
2848 bootstrap_alignment = new SuperAlignment;
2849 else
2850 bootstrap_alignment = new Alignment;
2851 bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
2852 setAlignment(bootstrap_alignment);
2853 initializeAllPartialLh();
2854 clearAllPartialLH();
2855 curScore = optimizeAllBranches();
2856 } else {
2857 if (params->snni) {
2858 if (Params::getInstance().five_plus_five) {
2859 readTreeString(candidateTrees.getNextCandTree());
2860 } else {
2861 readTreeString(candidateTrees.getRandTopTree(Params::getInstance().popSize));
2862 }
2863 if (Params::getInstance().iqp) {
2864 doIQP();
2865 } else if (Params::getInstance().adaptPertubation) {
2866 perturbStableSplits(Params::getInstance().stableSplitThreshold);
2867 } else {
2868 doRandomNNIs(Params::getInstance().tabu);
2869 }
2870 } else {
2871 // Using the IQPNNI algorithm (best tree is selected)
2872 readTreeString(getBestTrees()[0]);
2873 doIQP();
2874 }
2875 if (params->count_trees) {
2876 string perturb_tree_topo = getTopologyString(false);
2877 if (pllTreeCounter.find(perturb_tree_topo) == pllTreeCounter.end()) {
2878 // not found in hash_map
2879 pllTreeCounter[perturb_tree_topo] = 1;
2880 } else {
2881 // found in hash_map
2882 pllTreeCounter[perturb_tree_topo]++;
2883 }
2884 }
2885 //optimizeBranches(1);
2886 curScore = computeLogL();
2887 }
2888 return curScore;
2889 }
2890
2891 /****************************************************************************
2892 Fast Nearest Neighbor Interchange by maximum likelihood
2893 ****************************************************************************/
doNNISearch(bool write_info)2894 pair<int, int> IQTree::doNNISearch(bool write_info) {
2895
2896 computeLogL();
2897 double curBestScore = getBestScore();
2898
2899 if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
2900 printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2901 }
2902
2903 pair<int, int> nniInfos; // Number of NNIs and number of steps
2904 if (params->pll) {
2905 if (params->partition_file)
2906 outError("Unsupported -pll -sp combination!");
2907 curScore = pllOptimizeNNI(nniInfos.first, nniInfos.second, searchinfo);
2908 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE,
2909 PLL_TRUE, 0, 0, 0, PLL_SUMMARIZE_LH, 0, 0);
2910 readTreeString(string(pllInst->tree_string));
2911 } else {
2912 nniInfos = optimizeNNI(Params::getInstance().speednni);
2913 if (isSuperTree()) {
2914 ((PhyloSuperTree*) this)->computeBranchLengths();
2915 }
2916 if (params->print_trees_site_posterior)
2917 computePatternCategories();
2918 }
2919
2920 if(!on_refine_btree){ // Diep add (IF in Refinement Step, do not optimize model parameters)
2921 // Better tree or score is found
2922 if (getCurScore() > curBestScore + params->modelEps) {
2923 // Re-optimize model parameters (the sNNI algorithm)
2924 optimizeModelParameters(write_info, params->modelEps * 10);
2925 getModelFactory()->saveCheckpoint();
2926
2927 // 2018-01-09: additional optimize root position
2928 // TODO: does not work with SuperTree yet
2929 if (rooted && !isSuperTree() && params->root_move_dist > 0)
2930 {
2931 optimizeRootPosition(params->root_move_dist, true, params->modelEps * 10);
2932 }
2933 }
2934 }
2935 MPIHelper::getInstance().setNumNNISearch(MPIHelper::getInstance().getNumNNISearch() + 1);
2936
2937 return nniInfos;
2938 }
2939
optimizeNNI(bool speedNNI)2940 pair<int, int> IQTree::optimizeNNI(bool speedNNI) {
2941 unsigned int totalNNIApplied = 0;
2942 unsigned int numSteps = 0;
2943 const int MAXSTEPS = leafNum;
2944 // unsigned int numInnerBranches = leafNum - 3;
2945 double curBestScore = candidateTrees.getBestScore();
2946
2947 // if (isMixlen())
2948 // optimizeBranches();
2949
2950 Branches nniBranches;
2951 Branches nonNNIBranches;
2952 vector<NNIMove> positiveNNIs;
2953 vector<NNIMove> appliedNNIs;
2954 SplitIntMap tabuSplits;
2955 if (!initTabuSplits.empty()) {
2956 tabuSplits = initTabuSplits;
2957 }
2958
2959 double originalScore = curScore;
2960 for (numSteps = 1; numSteps <= MAXSTEPS; numSteps++) {
2961
2962 // cout << "numSteps = " << numSteps << endl;
2963 double oldScore = curScore;
2964 if (save_all_trees == 2) {
2965 saveCurrentTree(curScore); // BQM: for new bootstrap
2966 }
2967 if (verbose_mode >= VB_DEBUG) {
2968 cout << "Doing NNI round " << numSteps << endl;
2969 if (isSuperTree()) {
2970 ((PhyloSuperTree*) this)->printMapInfo();
2971 }
2972 }
2973
2974 // save all current branch lengths
2975 DoubleVector lenvec;
2976 saveBranchLengths(lenvec);
2977
2978 // for super tree
2979 initPartitionInfo();
2980
2981 nniBranches.clear();
2982 nonNNIBranches.clear();
2983
2984 bool startSpeedNNI;
2985 // When tabu and speednni are combined, speednni is only start from third steps
2986 if (!initTabuSplits.empty() && numSteps < 3) {
2987 startSpeedNNI = false;
2988 } else if (speedNNI && !appliedNNIs.empty()) {
2989 startSpeedNNI = true;
2990 } else {
2991 startSpeedNNI = false;
2992 }
2993
2994 if (startSpeedNNI) {
2995 // speedNNI option: only evaluate NNIs that are 2 branches away from the previously applied NNI
2996 Branches filteredNNIBranches;
2997 filterNNIBranches(appliedNNIs, filteredNNIBranches);
2998 for (Branches::iterator it = filteredNNIBranches.begin(); it != filteredNNIBranches.end(); it++) {
2999 Branch curBranch = it->second;
3000 PhyloNeighbor* nei = (PhyloNeighbor*) curBranch.first->findNeighbor(curBranch.second);
3001 Split* curSplit = nei->split;
3002 bool tabu = false;
3003 bool stable = false;
3004 if (!tabuSplits.empty()) {
3005 int value;
3006 if (tabuSplits.findSplit(curSplit, value) != NULL)
3007 tabu = true;
3008 }
3009 if (!candidateTrees.getCandSplits().empty()) {
3010 int value;
3011 if (candidateTrees.getCandSplits().findSplit(curSplit, value) != NULL)
3012 stable = true;
3013
3014 }
3015 if (!tabu && !stable) {
3016 int branchID = pairInteger(curBranch.first->id, curBranch.second->id);
3017 nniBranches.insert(pair<int, Branch>(branchID, curBranch));
3018 }
3019 }
3020 } else {
3021 getNNIBranches(tabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
3022 }
3023
3024 if (!tabuSplits.empty()) {
3025 tabuSplits.clear();
3026 }
3027
3028 positiveNNIs.clear();
3029 evaluateNNIs(nniBranches, positiveNNIs);
3030
3031 if (positiveNNIs.size() == 0) {
3032 if (!nonNNIBranches.empty() && totalNNIApplied == 0) {
3033 evaluateNNIs(nonNNIBranches, positiveNNIs);
3034 if (positiveNNIs.size() == 0) {
3035 break;
3036 }
3037 } else {
3038 break;
3039 }
3040 }
3041
3042 /* sort all positive NNI moves (ASCENDING) */
3043 sort(positiveNNIs.begin(), positiveNNIs.end());
3044
3045 /* remove conflicting NNIs */
3046 appliedNNIs.clear();
3047 getCompatibleNNIs(positiveNNIs, appliedNNIs);
3048
3049 // do non-conflicting positive NNIs
3050 doNNIs(appliedNNIs);
3051 curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
3052
3053 if (curScore < appliedNNIs.at(0).newloglh - params->loglh_epsilon) {
3054 //cout << "Tree getting worse: curScore = " << curScore << " / best score = " << appliedNNIs.at(0).newloglh << endl;
3055 // tree cannot be worse if only 1 NNI is applied
3056 if (appliedNNIs.size() > 1) {
3057 // revert all applied NNIs
3058 doNNIs(appliedNNIs);
3059 restoreBranchLengths(lenvec);
3060 clearAllPartialLH();
3061 // only do the best NNI
3062 appliedNNIs.resize(1);
3063 doNNIs(appliedNNIs);
3064 curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
3065 ASSERT(curScore > appliedNNIs.at(0).newloglh - 0.1);
3066 } else
3067 ASSERT(curScore > appliedNNIs.at(0).newloglh - 0.1 && "Using one NNI reduces LogL");
3068 totalNNIApplied++;
3069 } else {
3070 totalNNIApplied += appliedNNIs.size();
3071 }
3072
3073 if(curScore < oldScore - params->loglh_epsilon){
3074 cout << "$$$$$$$$: " << curScore << "\t" << oldScore << "\t" << curScore - oldScore << endl;
3075
3076 }
3077
3078 if (curScore - oldScore < params->loglh_epsilon)
3079 break;
3080
3081 if (params->snni && (curScore > curBestScore + 0.1)) {
3082 curBestScore = curScore;
3083 }
3084
3085 if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
3086 printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
3087 }
3088
3089 if (Params::getInstance().writeDistImdTrees) {
3090 intermediateTrees.update(getTreeString(), curScore);
3091 }
3092 }
3093
3094 if (totalNNIApplied == 0 && verbose_mode >= VB_MED) {
3095 cout << "NOTE: Input tree is already NNI-optimal" << endl;
3096 }
3097
3098 if (numSteps == MAXSTEPS) {
3099 cout << "WARNING: NNI search needs unusual large number of steps (" << numSteps << ") to converge!" << endl;
3100 }
3101
3102 if(curScore < originalScore - params->loglh_epsilon){
3103 cout << "AAAAAAAAAAAAAAAAAAA: " << curScore << "\t" << originalScore << "\t" << curScore - originalScore << endl;
3104
3105 }
3106 return make_pair(numSteps, totalNNIApplied);
3107 }
3108
filterNNIBranches(vector<NNIMove> & appliedNNIs,Branches & nniBranches)3109 void IQTree::filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &nniBranches) {
3110 for (vector<NNIMove>::iterator it = appliedNNIs.begin(); it != appliedNNIs.end(); it++) {
3111 Branch curBranch;
3112 curBranch.first = it->node1;
3113 curBranch.second = it->node2;
3114 int branchID = pairInteger(it->node1->id, it->node2->id);
3115 if (nniBranches.find(branchID) == nniBranches.end())
3116 nniBranches.insert(pair<int,Branch>(branchID, curBranch));
3117 getSurroundingInnerBranches(it->node1, it->node2, 2, nniBranches);
3118 getSurroundingInnerBranches(it->node2, it->node1, 2, nniBranches);
3119 }
3120 }
3121
pllOptimizeNNI(int & totalNNICount,int & nniSteps,SearchInfo & searchinfo)3122 double IQTree::pllOptimizeNNI(int &totalNNICount, int &nniSteps, SearchInfo &searchinfo) {
3123 if((globalParams->online_bootstrap == PLL_TRUE) && (globalParams->gbo_replicates > 0)) {
3124 pllInitUFBootData();
3125 }
3126 searchinfo.numAppliedNNIs = 0;
3127 searchinfo.curLogl = curScore;
3128 //cout << "curLogl: " << searchinfo.curLogl << endl;
3129 const int MAX_NNI_STEPS = aln->getNSeq();
3130 totalNNICount = 0;
3131 for (nniSteps = 1; nniSteps <= MAX_NNI_STEPS; nniSteps++) {
3132 searchinfo.curNumNNISteps = nniSteps;
3133 searchinfo.posNNIList.clear();
3134 double newLH = pllDoNNISearch(pllInst, pllPartitions, searchinfo);
3135 if (searchinfo.curNumAppliedNNIs == 0) { // no positive NNI was found
3136 searchinfo.curLogl = newLH;
3137 break;
3138 } else {
3139 searchinfo.curLogl = newLH;
3140 searchinfo.numAppliedNNIs += searchinfo.curNumAppliedNNIs;
3141 }
3142 }
3143
3144 if (nniSteps == (MAX_NNI_STEPS + 1)) {
3145 cout << "WARNING: NNI search needs unusual large number of steps (" << MAX_NNI_STEPS << ") to converge!" << endl;
3146 }
3147
3148 if (searchinfo.numAppliedNNIs == 0) {
3149 cout << "NOTE: Tree is already NNI-optimized" << endl;
3150 }
3151
3152 totalNNICount = searchinfo.numAppliedNNIs;
3153 pllInst->likelihood = searchinfo.curLogl;
3154 return searchinfo.curLogl;
3155 }
3156
pllLogBootSamples(int ** pll_boot_samples,int nsamples,int npatterns)3157 void IQTree::pllLogBootSamples(int** pll_boot_samples, int nsamples, int npatterns){
3158 ofstream bfile("boot_samples.log");
3159 bfile << "Original freq:" << endl;
3160 int sum = 0;
3161 for(int i = 0; i < pllAlignment->sequenceLength; i++){
3162 bfile << setw(4) << pllInst->aliaswgt[i];
3163 sum += pllInst->aliaswgt[i];
3164 }
3165 bfile << endl << "sum = " << sum << endl;
3166
3167 bfile << "Bootstrap freq:" << endl;
3168
3169 for(int i = 0; i < nsamples; i++){
3170 sum = 0;
3171 for(int j = 0; j < npatterns; j++){
3172 bfile << setw(4) << pll_boot_samples[i][j];
3173 sum += pll_boot_samples[i][j];
3174 }
3175 bfile << endl << "sum = " << sum << endl;
3176 }
3177 bfile.close();
3178
3179 }
3180
pllInitUFBootData()3181 void IQTree::pllInitUFBootData(){
3182 if(pllUFBootDataPtr == NULL){
3183 pllUFBootDataPtr = (pllUFBootData *) malloc(sizeof(pllUFBootData));
3184 pllUFBootDataPtr->boot_samples = NULL;
3185 pllUFBootDataPtr->candidate_trees_count = 0;
3186
3187 if(params->online_bootstrap && params->gbo_replicates > 0){
3188 if(!pll2iqtree_pattern_index) pllBuildIQTreePatternIndex();
3189
3190 // pllUFBootDataPtr->treels = pllHashInit(max_candidate_trees);
3191 // pllUFBootDataPtr->treels_size = max_candidate_trees; // track size of treels_logl, treels_newick, treels_ptnlh
3192
3193 // pllUFBootDataPtr->treels_logl =
3194 // (double *) malloc(max_candidate_trees * (sizeof(double)));
3195 // if(!pllUFBootDataPtr->treels_logl) outError("Not enough dynamic memory!");
3196
3197
3198
3199 // pllUFBootDataPtr->treels_ptnlh =
3200 // (double **) malloc(max_candidate_trees * (sizeof(double *)));
3201 // if(!pllUFBootDataPtr->treels_ptnlh) outError("Not enough dynamic memory!");
3202 // memset(pllUFBootDataPtr->treels_ptnlh, 0, max_candidate_trees * (sizeof(double *)));
3203
3204 // aln->createBootstrapAlignment() must be called before this fragment
3205 pllUFBootDataPtr->boot_samples =
3206 (int **) malloc(params->gbo_replicates * sizeof(int *));
3207 if(!pllUFBootDataPtr->boot_samples) outError("Not enough dynamic memory!");
3208 for(int i = 0; i < params->gbo_replicates; i++){
3209 pllUFBootDataPtr->boot_samples[i] =
3210 (int *) malloc(pllAlignment->sequenceLength * sizeof(int));
3211 if(!pllUFBootDataPtr->boot_samples[i]) outError("Not enough dynamic memory!");
3212 for(int j = 0; j < pllAlignment->sequenceLength; j++){
3213 pllUFBootDataPtr->boot_samples[i][j] =
3214 boot_samples[i][pll2iqtree_pattern_index[j]];
3215 }
3216 }
3217
3218
3219 pllUFBootDataPtr->boot_logl =
3220 (double *) malloc(params->gbo_replicates * (sizeof(double)));
3221 if(!pllUFBootDataPtr->boot_logl) outError("Not enough dynamic memory!");
3222 for(int i = 0; i < params->gbo_replicates; i++)
3223 pllUFBootDataPtr->boot_logl[i] = -DBL_MAX;
3224
3225 pllUFBootDataPtr->boot_counts =
3226 (int *) malloc(params->gbo_replicates * (sizeof(int)));
3227 if(!pllUFBootDataPtr->boot_counts) outError("Not enough dynamic memory!");
3228 memset(pllUFBootDataPtr->boot_counts, 0, params->gbo_replicates * (sizeof(int)));
3229
3230 pllUFBootDataPtr->boot_trees.resize(params->gbo_replicates, "");
3231 pllUFBootDataPtr->duplication_counter = 0;
3232 }
3233 }
3234 // pllUFBootDataPtr->max_candidate_trees = max_candidate_trees;
3235 pllUFBootDataPtr->save_all_trees = save_all_trees;
3236 pllUFBootDataPtr->logl_cutoff = logl_cutoff;
3237 pllUFBootDataPtr->n_patterns = pllAlignment->sequenceLength;
3238 }
3239
pllDestroyUFBootData()3240 void IQTree::pllDestroyUFBootData(){
3241 if(pll2iqtree_pattern_index){
3242 delete [] pll2iqtree_pattern_index;
3243 pll2iqtree_pattern_index = NULL;
3244 }
3245
3246 if(params->online_bootstrap && params->gbo_replicates > 0){
3247 pllHashDestroy(&(pllUFBootDataPtr->treels), rax_free);
3248
3249 free(pllUFBootDataPtr->treels_logl);
3250
3251
3252 for(int i = 0; i < pllUFBootDataPtr->treels_size; i++)
3253 if(pllUFBootDataPtr->treels_ptnlh[i])
3254 free(pllUFBootDataPtr->treels_ptnlh[i]);
3255 free(pllUFBootDataPtr->treels_ptnlh);
3256
3257 for(int i = 0; i < params->gbo_replicates; i++)
3258 free(pllUFBootDataPtr->boot_samples[i]);
3259 free(pllUFBootDataPtr->boot_samples);
3260
3261 free(pllUFBootDataPtr->boot_logl);
3262
3263 free(pllUFBootDataPtr->boot_counts);
3264
3265 }
3266 free(pllUFBootDataPtr);
3267 pllUFBootDataPtr = NULL;
3268 }
3269
3270
doNNIs(vector<NNIMove> & compatibleNNIs,bool changeBran)3271 void IQTree::doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran) {
3272 for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
3273 doNNI(*it);
3274 if (!params->leastSquareNNI && changeBran) {
3275 // apply new branch lengths
3276 changeNNIBrans(*it);
3277 }
3278 }
3279 // 2015-10-14: has to reset this pointer when read in
3280 current_it = current_it_back = NULL;
3281
3282 }
3283
3284
getCompatibleNNIs(vector<NNIMove> & nniMoves,vector<NNIMove> & compatibleNNIs)3285 void IQTree::getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs) {
3286 compatibleNNIs.clear();
3287 for (vector<NNIMove>::iterator it1 = nniMoves.begin(); it1 != nniMoves.end(); it1++) {
3288 bool select = true;
3289 for (vector<NNIMove>::iterator it2 = compatibleNNIs.begin(); it2 != compatibleNNIs.end(); it2++) {
3290 if ((*it1).node1 == (*(it2)).node1
3291 || (*it1).node2 == (*(it2)).node1
3292 || (*it1).node1 == (*(it2)).node2
3293 || (*it1).node2 == (*(it2)).node2) {
3294 select = false;
3295 break;
3296 }
3297 }
3298 if (select) {
3299 compatibleNNIs.push_back(*it1);
3300 }
3301 }
3302 }
3303
3304 //double IQTree::estN95() {
3305 // if (vecNumNNI.size() == 0) {
3306 // return 0;
3307 // } else {
3308 // sort(vecNumNNI.begin(), vecNumNNI.end());
3309 // int index = floor(vecNumNNI.size() * speed_conf);
3310 // return vecNumNNI[index];
3311 // }
3312 //}
3313
getAvgNumNNI()3314 double IQTree::getAvgNumNNI() {
3315 if (vecNumNNI.size() == 0) {
3316 return 0;
3317 } else {
3318 double median;
3319 size_t size = vecNumNNI.size();
3320 sort(vecNumNNI.begin(), vecNumNNI.end());
3321 if (size % 2 == 0) {
3322 median = (vecNumNNI[size / 2 + 1] + vecNumNNI[size / 2]) / 2;
3323 } else {
3324 median = vecNumNNI[size / 2];
3325 }
3326 return median;
3327 }
3328 }
3329
estDeltaMedian()3330 double IQTree::estDeltaMedian() {
3331 if (vecImpProNNI.size() == 0) {
3332 return 0;
3333 } else {
3334 double median;
3335 size_t size = vecImpProNNI.size();
3336 sort(vecImpProNNI.begin(), vecImpProNNI.end());
3337 if (size % 2 == 0) {
3338 median = (vecImpProNNI[size / 2 + 1] + vecImpProNNI[size / 2]) / 2;
3339 } else {
3340 median = vecImpProNNI[size / 2];
3341 }
3342 return median;
3343 }
3344 }
3345
3346 //inline double IQTree::estDelta95() {
3347 // if (vecImpProNNI.size() == 0) {
3348 // return 0;
3349 // } else {
3350 // sort(vecImpProNNI.begin(), vecImpProNNI.end());
3351 // int index = floor(vecImpProNNI.size() * speed_conf);
3352 // return vecImpProNNI[index];
3353 // }
3354 //}
3355
getDelete() const3356 int IQTree::getDelete() const {
3357 return k_delete;
3358 }
3359
setDelete(int _delete)3360 void IQTree::setDelete(int _delete) {
3361 k_delete = _delete;
3362 }
3363
evaluateNNIs(Branches & nniBranches,vector<NNIMove> & positiveNNIs)3364 void IQTree::evaluateNNIs(Branches &nniBranches, vector<NNIMove> &positiveNNIs) {
3365 for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
3366 NNIMove nni = getBestNNIForBran((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, NULL);
3367 if (nni.newloglh > curScore) {
3368 positiveNNIs.push_back(nni);
3369 }
3370
3371 // synchronize tree during optimization step
3372 if (MPIHelper::getInstance().isMaster() && candidateset_changed.size() > 0
3373 && MPIHelper::getInstance().gotMessage()) {
3374 syncCurrentTree();
3375 }
3376 }
3377 }
3378
3379 //Branches IQTree::getReducedListOfNNIBranches(Branches &previousNNIBranches) {
3380 // Branches resBranches;
3381 // for (Branches::iterator it = previousNNIBranches.begin(); it != previousNNIBranches.end(); it++) {
3382 // getSurroundingInnerBranches(it->second.first, it->second.second, 2, resBranches);
3383 // getSurroundingInnerBranches(it->second.second, it->second.first, 2, resBranches);
3384 // }
3385 //}
3386
optimizeNNIBranches(Branches & nniBranches)3387 double IQTree::optimizeNNIBranches(Branches &nniBranches) {
3388 for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
3389 optimizeOneBranch((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, true, PLL_NEWZPERCYCLE);
3390 }
3391 curScore = computeLikelihoodFromBuffer();
3392 return curScore;
3393 }
3394
3395 /**
3396 * Currently not used, commented out to simplify the interface of getBestNNIForBran
3397 void IQTree::evalNNIsSort(bool approx_nni) {
3398 if (myMove.newloglh > curScore + params->loglh_epsilon) {
3399 if (myMove.newloglh > curScore + params->loglh_epsilon) {
3400 addPositiveNNIMove(myMove);
3401 }
3402 addPositiveNNIMove(myMove);
3403 }
3404 NodeVector nodes1, nodes2;
3405 int i;
3406 double cur_lh = curScore;
3407 vector<IntBranchInfo> int_branches;
3408
3409 getInternalBranches(nodes1, nodes2);
3410 assert(nodes1.size() == leafNum - 3 && nodes2.size() == leafNum - 3);
3411
3412 for (i = 0; i < leafNum - 3; i++) {
3413 IntBranchInfo int_branch;
3414 PhyloNeighbor *node12_it = (PhyloNeighbor*) nodes1[i]->findNeighbor(nodes2[i]);
3415 //PhyloNeighbor *node21_it = (PhyloNeighbor*) nodes2[i]->findNeighbor(nodes1[i]);
3416 int_branch.lh_contribution = cur_lh - computeLikelihoodZeroBranch(node12_it, (PhyloNode*) nodes1[i]);
3417 if (int_branch.lh_contribution < 0.0)
3418 int_branch.lh_contribution = 0.0;
3419 if (int_branch.lh_contribution < fabs(nni_cutoff)) {
3420 int_branch.node1 = (PhyloNode*) nodes1[i];
3421 int_branch.node2 = (PhyloNode*) nodes2[i];
3422 int_branches.push_back(int_branch);
3423 }
3424 }
3425 std::sort(int_branches.begin(), int_branches.end(), int_branch_cmp);
3426 for (vector<IntBranchInfo>::iterator it = int_branches.begin(); it != int_branches.end(); it++)
3427 if (it->lh_contribution >= 0.0) // evaluate NNI if branch contribution is big enough
3428 {
3429 NNIMove myMove = getBestNNIForBran(it->node1, it->node2, NULL, approx_nni, it->lh_contribution);
3430 if (myMove.newloglh > curScore) {
3431 addPositiveNNIMove(myMove);
3432 if (!estimate_nni_cutoff)
3433 for (vector<IntBranchInfo>::iterator it2 = it + 1; it2 != int_branches.end(); it2++) {
3434 if (it2->node1 == it->node1 || it2->node2 == it->node1 || it2->node1 == it->node2
3435 || it2->node2 == it->node2)
3436 it2->lh_contribution = -1.0; // do not evaluate this branch later on
3437 }
3438 }
3439 } else { // otherwise, only optimize the branch length
3440 PhyloNode *node1 = it->node1;
3441 PhyloNode *node2 = it->node2;
3442 PhyloNeighbor *node12_it = (PhyloNeighbor*) node1->findNeighbor(node2);
3443 PhyloNeighbor *node21_it = (PhyloNeighbor*) node2->findNeighbor(node1);
3444 double stored_len = node12_it->length;
3445 curScore = optimizeOneBranch(node1, node2, false);
3446 string key("");
3447 if (node1->id < node2->id) {
3448 key += convertIntToString(node1->id) + "->" + convertIntToString(node2->id);
3449 } else {
3450 key += convertIntToString(node2->id) + "->" + convertIntToString(node1->id);
3451 }
3452
3453 optBrans.insert(mapString2Double::value_type(key, node12_it->length));
3454 node12_it->length = stored_len;
3455 node21_it->length = stored_len;
3456 }
3457 }
3458 */
3459
estimateNNICutoff(Params * params)3460 void IQTree::estimateNNICutoff(Params* params) {
3461 double *delta = new double[nni_info.size()];
3462 int i;
3463 for (i = 0; i < nni_info.size(); i++) {
3464 double lh_score[4];
3465 memmove(lh_score, nni_info[i].lh_score, 4 * sizeof(double));
3466 std::sort(lh_score + 1, lh_score + 4); // sort in ascending order
3467 delta[i] = lh_score[0] - lh_score[2];
3468 if (verbose_mode >= VB_MED)
3469 cout << i << ": " << lh_score[0] << " " << lh_score[1] << " " << lh_score[2] << " " << lh_score[3] << endl;
3470 }
3471 std::sort(delta, delta + nni_info.size());
3472 nni_cutoff = delta[nni_info.size() / 20];
3473 cout << endl << "Estimated NNI cutoff: " << nni_cutoff << endl;
3474 string file_name = params->out_prefix;
3475 file_name += ".nnidelta";
3476 try {
3477 ofstream out;
3478 out.exceptions(ios::failbit | ios::badbit);
3479 out.open(file_name.c_str());
3480 for (i = 0; i < nni_info.size(); i++) {
3481 out << delta[i] << endl;
3482 }
3483 out.close();
3484 cout << "NNI delta printed to " << file_name << endl;
3485 } catch (ios::failure) {
3486 outError(ERR_WRITE_OUTPUT, file_name);
3487 }
3488 delete[] delta;
3489 }
3490
saveCurrentTree(double cur_logl)3491 void IQTree::saveCurrentTree(double cur_logl) {
3492
3493 if (logl_cutoff != 0.0 && cur_logl < logl_cutoff - 1.0)
3494 return;
3495 // treels_logl.push_back(cur_logl);
3496 // num_trees_for_rell++;
3497
3498 if (Params::getInstance().write_intermediate_trees)
3499 printTree(out_treels, WT_NEWLINE | WT_BR_LEN);
3500
3501 int nptn = getAlnNPattern();
3502
3503 #ifdef BOOT_VAL_FLOAT
3504 int maxnptn = get_safe_upper_limit_float(nptn);
3505 BootValType *pattern_lh = aligned_alloc<BootValType>(maxnptn);
3506 memset(pattern_lh, 0, maxnptn*sizeof(BootValType));
3507 double *pattern_lh_orig = aligned_alloc<double>(nptn);
3508 computePatternLikelihood(pattern_lh_orig, &cur_logl);
3509 for (int i = 0; i < nptn; i++)
3510 pattern_lh[i] = (float)pattern_lh_orig[i];
3511 #else
3512 int maxnptn = get_safe_upper_limit(nptn);
3513 BootValType *pattern_lh = aligned_alloc<BootValType>(maxnptn);
3514 memset(pattern_lh, 0, maxnptn*sizeof(BootValType));
3515 computePatternLikelihood(pattern_lh, &cur_logl);
3516 #endif
3517
3518
3519 if (boot_samples.empty()) {
3520 // for runGuidedBootstrap
3521 } else {
3522 // online bootstrap
3523 // int ptn;
3524 // int updated = 0;
3525 // int nsamples = boot_samples.size();
3526 ostringstream ostr;
3527 string tree_str;
3528 setRootNode(params->root);
3529 if (params->print_ufboot_trees == 2)
3530 printTree(ostr, WT_TAXON_ID + WT_SORT_TAXA + WT_BR_LEN + WT_BR_LEN_SHORT);
3531 else
3532 printTree(ostr, WT_TAXON_ID + WT_SORT_TAXA);
3533 tree_str = ostr.str();
3534
3535 #ifdef _OPENMP
3536 int rand_seed = random_int(1000);
3537 #pragma omp parallel
3538 {
3539 int *rstream;
3540 init_random(rand_seed + omp_get_thread_num(), false, &rstream);
3541 #pragma omp for
3542 #else
3543 int *rstream = randstream;
3544 #endif
3545 for (int sample = sample_start; sample < sample_end; sample++) {
3546 double rell = 0.0;
3547
3548 {
3549 // SSE optimized version of the above loop
3550 BootValType *boot_sample = boot_samples[sample];
3551
3552 BootValType res = (this->*dotProduct)(pattern_lh, boot_sample, nptn);
3553
3554 rell = res;
3555 }
3556
3557 bool better = rell > boot_logl[sample] + params->ufboot_epsilon;
3558 if (!better && rell > boot_logl[sample] - params->ufboot_epsilon) {
3559 better = (random_double(rstream) <= 1.0 / (boot_counts[sample] + 1));
3560 }
3561 if (better) {
3562 if (rell <= boot_logl[sample] + params->ufboot_epsilon) {
3563 boot_counts[sample]++;
3564 } else {
3565 boot_counts[sample] = 1;
3566 }
3567 boot_logl[sample] = max(boot_logl[sample], rell);
3568 boot_orig_logl[sample] = cur_logl;
3569 boot_trees[sample] = tree_str;
3570 }
3571 }
3572 #ifdef _OPENMP
3573 finish_random(rstream);
3574 }
3575 #endif
3576 }
3577 if (Params::getInstance().print_tree_lh) {
3578 out_treelh << cur_logl;
3579 double prob;
3580 #ifdef BOOT_VAL_FLOAT
3581 aln->multinomialProb(pattern_lh_orig, prob);
3582 #else
3583 aln->multinomialProb(pattern_lh, prob);
3584 #endif
3585 out_treelh << "\t" << prob << endl;
3586
3587 IntVector pattern_index;
3588 aln->getSitePatternIndex(pattern_index);
3589 out_sitelh << "Site_Lh ";
3590 for (int i = 0; i < getAlnNSite(); i++)
3591 out_sitelh << " " << pattern_lh[pattern_index[i]];
3592 out_sitelh << endl;
3593 }
3594
3595 if (!boot_samples.empty()) {
3596 #ifdef BOOT_VAL_FLOAT
3597 aligned_free(pattern_lh_orig);
3598 #endif
3599 aligned_free(pattern_lh);
3600 } else {
3601 #ifdef BOOT_VAL_FLOAT
3602 aligned_free(pattern_lh);
3603 #endif
3604 }
3605
3606 }
3607
saveNNITrees(PhyloNode * node,PhyloNode * dad)3608 void IQTree::saveNNITrees(PhyloNode *node, PhyloNode *dad) {
3609 if (!node) {
3610 node = (PhyloNode*) root;
3611 }
3612 if (dad && !node->isLeaf() && !dad->isLeaf()) {
3613 double *pat_lh1 = new double[aln->getNPattern()];
3614 double *pat_lh2 = new double[aln->getNPattern()];
3615 double lh1, lh2;
3616 computeNNIPatternLh(curScore, lh1, pat_lh1, lh2, pat_lh2, node, dad);
3617 delete[] pat_lh2;
3618 delete[] pat_lh1;
3619 }
3620 FOR_NEIGHBOR_IT(node, dad, it)saveNNITrees((PhyloNode*) (*it)->node, node);
3621 }
3622
summarizeBootstrap(Params & params,MTreeSet & trees)3623 void IQTree::summarizeBootstrap(Params ¶ms, MTreeSet &trees) {
3624 int sum_weights = trees.sumTreeWeights();
3625 int i;
3626 if (verbose_mode >= VB_MAX) {
3627 for (i = 0; i < trees.size(); i++)
3628 if (trees.tree_weights[i] > 0)
3629 cout << "Tree " << i + 1 << " weight= " << (double) trees.tree_weights[i] * 100 / sum_weights << endl;
3630 }
3631 int max_tree_id = max_element(trees.tree_weights.begin(), trees.tree_weights.end()) - trees.tree_weights.begin();
3632 if (verbose_mode >= VB_MED) {
3633 cout << "max_tree_id = " << max_tree_id + 1 << " max_weight = " << trees.tree_weights[max_tree_id];
3634 cout << " (" << (double) trees.tree_weights[max_tree_id] * 100 / sum_weights << "%)" << endl;
3635 }
3636 // assign bootstrap support
3637 SplitGraph sg;
3638 SplitIntMap hash_ss;
3639 // make the taxa name
3640 vector<string> taxname;
3641 taxname.resize(leafNum);
3642 if (boot_splits.empty()) {
3643 getTaxaName(taxname);
3644 } else {
3645 boot_splits.back()->getTaxaName(taxname);
3646 }
3647 /*if (!tree.save_all_trees)
3648 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1);
3649 else
3650 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, false);
3651 */
3652 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, NULL, false); // do not sort taxa
3653
3654 if (verbose_mode >= VB_MED)
3655 cout << sg.size() << " splits found" << endl;
3656
3657 sg.scaleWeight(1.0 / trees.sumTreeWeights(), false, 4);
3658 string out_file;
3659 out_file = params.out_prefix;
3660 out_file += ".splits";
3661 if (params.print_splits_file) {
3662 sg.saveFile(out_file.c_str(), IN_OTHER, true);
3663 cout << "Split supports printed to star-dot file " << out_file << endl;
3664 }
3665 // compute the percentage of appearance
3666 sg.scaleWeight(100.0, true);
3667 // printSplitSet(sg, hash_ss);
3668 //sg.report(cout);
3669 //cout << "Creating " << RESAMPLE_NAME << " support values..." << endl;
3670 // stringstream tree_stream;
3671 // printTree(tree_stream, WT_TAXON_ID | WT_BR_LEN);
3672 // MExtTree mytree;
3673 // mytree.readTree(tree_stream, rooted);
3674 // mytree.assignLeafID();
3675 assignLeafNameByID();
3676 createBootstrapSupport(taxname, trees, hash_ss, NULL);
3677
3678 // now write resulting tree with supports
3679 // tree_stream.seekp(0, ios::beg);
3680 // mytree.printTree(tree_stream);
3681 //
3682 // // now read resulting tree
3683 // tree_stream.seekg(0, ios::beg);
3684 // freeNode();
3685 // // RARE BUG FIX: to avoid cases that identical seqs were removed and leaf name happens to be IDs
3686 // MTree::readTree(tree_stream, rooted);
3687
3688 assignLeafNames();
3689
3690 // 2017-12-05: commented out, not sure why doing this, which disort branch lengths
3691 // if (isSuperTree()) {
3692 // ((PhyloSuperTree*) this)->mapTrees();
3693 // } else {
3694 // initializeAllPartialLh();
3695 // clearAllPartialLH();
3696 // }
3697
3698 if (!save_all_trees) {
3699 out_file = params.out_prefix;
3700 out_file += ".suptree";
3701
3702 printTree(out_file.c_str());
3703 cout << "Tree with assigned support written to " << out_file << endl;
3704 }
3705
3706 if (params.print_splits_nex_file) {
3707 out_file = params.out_prefix;
3708 out_file += ".splits.nex";
3709 sg.saveFile(out_file.c_str(), IN_NEXUS, false);
3710 cout << "Split supports printed to NEXUS file " << out_file << endl;
3711 }
3712
3713 /*
3714 out_file = params.out_prefix;
3715 out_file += ".supval";
3716 writeInternalNodeNames(out_file);
3717
3718 cout << "Support values written to " << out_file << endl;
3719 */
3720
3721
3722 }
3723
writeUFBootTrees(Params & params)3724 void IQTree::writeUFBootTrees(Params ¶ms) {
3725 MTreeSet trees;
3726 // IntVector tree_weights;
3727 int i, j;
3728 string filename = params.out_prefix;
3729 filename += ".ufboot";
3730 ofstream out(filename.c_str());
3731
3732 trees.init(boot_trees, rooted);
3733 for (i = 0; i < trees.size(); i++) {
3734 NodeVector taxa;
3735 // change the taxa name from ID to real name
3736 trees[i]->getOrderedTaxa(taxa);
3737 for (j = 0; j < taxa.size() - (int)rooted; j++)
3738 taxa[j]->name = aln->getSeqName(taxa[j]->id);
3739 if (removed_seqs.size() > 0) {
3740 // reinsert removed seqs into each tree
3741 trees[i]->insertTaxa(removed_seqs, twin_seqs);
3742 }
3743 // now print to file
3744 for (j = 0; j < trees.tree_weights[i]; j++)
3745 if (params.print_ufboot_trees == 1)
3746 trees[i]->printTree(out, WT_NEWLINE);
3747 else
3748 trees[i]->printTree(out, WT_NEWLINE + WT_BR_LEN);
3749 }
3750 cout << "UFBoot trees printed to " << filename << endl;
3751 out.close();
3752 }
3753
summarizeBootstrap(Params & params)3754 void IQTree::summarizeBootstrap(Params ¶ms) {
3755 setRootNode(params.root);
3756 MTreeSet trees;
3757 trees.init(boot_trees, rooted);
3758 summarizeBootstrap(params, trees);
3759 }
3760
summarizeBootstrap(SplitGraph & sg)3761 void IQTree::summarizeBootstrap(SplitGraph &sg) {
3762 MTreeSet trees;
3763 //SplitGraph sg;
3764 trees.init(boot_trees, rooted);
3765 SplitIntMap hash_ss;
3766 // make the taxa name
3767 vector<string> taxname;
3768 taxname.resize(leafNum);
3769 getTaxaName(taxname);
3770
3771 /*if (!tree.save_all_trees)
3772 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1);
3773 else
3774 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, false);
3775 */
3776 trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, NULL, false); // do not sort taxa
3777 }
3778
pllConvertUFBootData2IQTree()3779 void IQTree::pllConvertUFBootData2IQTree(){
3780 // duplication_counter
3781 duplication_counter = pllUFBootDataPtr->duplication_counter;
3782 //treels_logl
3783 // treels_logl.clear();
3784 // for(int i = 0; i < pllUFBootDataPtr->candidate_trees_count; i++)
3785 // treels_logl.push_back(pllUFBootDataPtr->treels_logl[i]);
3786
3787 //boot_trees
3788 boot_trees.clear();
3789 for(int i = 0; i < params->gbo_replicates; i++)
3790 boot_trees.push_back(pllUFBootDataPtr->boot_trees[i]);
3791
3792 }
3793
computeCorrelation(IntVector & ix,IntVector & iy)3794 double computeCorrelation(IntVector &ix, IntVector &iy) {
3795
3796 ASSERT(ix.size() == iy.size());
3797 DoubleVector x;
3798 DoubleVector y;
3799
3800 double mx = 0.0, my = 0.0; // mean value
3801 int i;
3802 x.resize(ix.size());
3803 y.resize(iy.size());
3804 for (i = 0; i < x.size(); i++) {
3805 x[i] = ix[i];
3806 y[i] = iy[i];
3807 mx += x[i];
3808 my += y[i];
3809 }
3810 mx /= x.size();
3811 my /= y.size();
3812 for (i = 0; i < x.size(); i++) {
3813 x[i] = x[i] / mx - 1.0;
3814 y[i] = y[i] / my - 1.0;
3815 }
3816
3817 double f1 = 0.0, f2 = 0.0, f3 = 0.0;
3818 for (i = 0; i < x.size(); i++) {
3819 f1 += (x[i]) * (y[i]);
3820 f2 += (x[i]) * (x[i]);
3821 f3 += (y[i]) * (y[i]);
3822 }
3823 if (f2 == 0.0 || f3 == 0.0)
3824 return 1.0;
3825 return f1 / (sqrt(f2) * sqrt(f3));
3826 }
3827
computeBootstrapCorrelation()3828 double IQTree::computeBootstrapCorrelation() {
3829 if (boot_splits.size() < 2)
3830 return 0.0;
3831 IntVector split_supports;
3832 SplitIntMap split_map;
3833 int i;
3834 // collect split supports
3835 SplitGraph *sg = boot_splits.back();
3836 SplitGraph *half = boot_splits[(boot_splits.size() - 1) / 2];
3837 for (i = 0; i < half->size(); i++)
3838 if (half->at(i)->trivial() == -1) {
3839 split_map.insertSplit(half->at(i), split_supports.size());
3840 split_supports.push_back((int) (half->at(i)->getWeight()));
3841 }
3842
3843 // collect split supports for new tree collection
3844 IntVector split_supports_new;
3845 split_supports_new.resize(split_supports.size(), 0);
3846 for (i = 0; i < sg->size(); i++)
3847 if ((*sg)[i]->trivial() == -1) {
3848 int index;
3849 Split *sp = split_map.findSplit((*sg)[i], index);
3850 if (sp) {
3851 // split found
3852 split_supports_new[index] = (int) ((*sg)[i]->getWeight());
3853 } else {
3854 // new split
3855 split_supports_new.push_back((int) ((*sg)[i]->getWeight()));
3856 }
3857 }
3858 if (verbose_mode >= VB_MED)
3859 cout << split_supports_new.size() - split_supports.size() << " new splits compared to old boot_splits" << endl;
3860 if (split_supports_new.size() > split_supports.size())
3861 split_supports.resize(split_supports_new.size(), 0);
3862
3863 // now compute correlation coefficient
3864 double corr = computeCorrelation(split_supports, split_supports_new);
3865
3866 return corr;
3867 }
3868
3869 //void IQTree::addPositiveNNIMove(NNIMove myMove) {
3870 // plusNNIs.push_back(myMove);
3871 //}
3872
printResultTree(string suffix)3873 void IQTree::printResultTree(string suffix) {
3874 if (MPIHelper::getInstance().isWorker()) {
3875 return;
3876 // stringstream processTreeFile;
3877 // processTreeFile << tree_file_name << "." << MPIHelper::getInstance().getProcessID();
3878 // tree_file_name = processTreeFile.str();
3879 }
3880 if (params->suppress_output_flags & OUT_TREEFILE)
3881 return;
3882 setRootNode(params->root, true);
3883 string tree_file_name = params->out_prefix;
3884 tree_file_name += ".treefile";
3885 if (suffix.compare("") != 0) {
3886 tree_file_name += "." + suffix;
3887 }
3888 printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3889 if (verbose_mode >= VB_MED)
3890 cout << "Best tree printed to " << tree_file_name << endl;
3891 setRootNode(params->root, false);
3892 }
3893
printResultTree(ostream & out)3894 void IQTree::printResultTree(ostream &out) {
3895 setRootNode(params->root);
3896 printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3897 }
3898
printBestCandidateTree()3899 void IQTree::printBestCandidateTree() {
3900 if (MPIHelper::getInstance().isWorker())
3901 return;
3902 if (params->suppress_output_flags & OUT_TREEFILE)
3903 return;
3904 string tree_file_name = params->out_prefix;
3905 tree_file_name += ".treefile";
3906 readTreeString(candidateTrees.getBestTreeStrings(1)[0]);
3907 setRootNode(params->root);
3908 printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3909 if (verbose_mode >= VB_MED)
3910 cout << "Best tree printed to " << tree_file_name << endl;
3911 }
3912
3913
printPhylolibTree(const char * suffix)3914 void IQTree::printPhylolibTree(const char* suffix) {
3915 pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE, 1, 0, 0, 0,
3916 PLL_SUMMARIZE_LH, 0, 0);
3917 char phylolibTree[1024];
3918 strcpy(phylolibTree, params->out_prefix);
3919 strcat(phylolibTree, suffix);
3920 FILE *phylolib_tree = fopen(phylolibTree, "w");
3921 fprintf(phylolib_tree, "%s", pllInst->tree_string);
3922 cout << "Tree optimized by Phylolib was written to " << phylolibTree << endl;
3923 }
3924
printIntermediateTree(int brtype)3925 void IQTree::printIntermediateTree(int brtype) {
3926 setRootNode(params->root);
3927 double *pattern_lh = NULL;
3928 double logl = curScore;
3929
3930 if (params->print_tree_lh) {
3931 pattern_lh = new double[getAlnNPattern()];
3932 computePatternLikelihood(pattern_lh, &logl);
3933 }
3934
3935 if (Params::getInstance().write_intermediate_trees)
3936 printTree(out_treels, brtype);
3937
3938 if (params->print_tree_lh) {
3939 out_treelh.precision(10);
3940 out_treelh << logl;
3941 double prob;
3942 aln->multinomialProb(pattern_lh, prob);
3943 out_treelh << "\t" << prob << endl;
3944 if (!(brtype & WT_APPEND))
3945 out_sitelh << aln->getNSite() << endl;
3946 out_sitelh << "Site_Lh ";
3947 for (int i = 0; i < aln->getNSite(); i++)
3948 out_sitelh << "\t" << pattern_lh[aln->getPatternID(i)];
3949 out_sitelh << endl;
3950 delete[] pattern_lh;
3951 }
3952 if (params->write_intermediate_trees == 1 && save_all_trees != 1) {
3953 return;
3954 }
3955 int x = save_all_trees;
3956 save_all_trees = 2;
3957 // TODO Why is evalNNI() is called in this function?
3958 //evalNNIs();
3959 Branches innerBranches;
3960 vector<NNIMove> positiveNNIs;
3961 getInnerBranches(innerBranches);
3962 evaluateNNIs(innerBranches, positiveNNIs);
3963 save_all_trees = x;
3964 }
3965
3966
convertNNI2Splits(SplitIntMap & nniSplits,int numNNIs,vector<NNIMove> & compatibleNNIs)3967 void IQTree::convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs) {
3968 for (int i = 0; i < numNNIs; i++) {
3969 Split *sp = new Split(*getSplit(compatibleNNIs[i].node1, compatibleNNIs[i].node2));
3970 if (sp->shouldInvert()) {
3971 sp->invert();
3972 }
3973 nniSplits.insertSplit(sp, 1);
3974 }
3975 }
3976
getBestScore()3977 double IQTree::getBestScore() {
3978 return candidateTrees.getBestScore();
3979 }
3980
getBestTrees(int numTrees)3981 vector<string> IQTree::getBestTrees(int numTrees) {
3982 return candidateTrees.getBestTreeStrings(numTrees);
3983 }
3984
3985
3986 /*******************************************
3987 MPI stuffs
3988 *******************************************/
3989
syncCandidateTrees(int nTrees,bool updateStopRule)3990 void IQTree::syncCandidateTrees(int nTrees, bool updateStopRule) {
3991 if (MPIHelper::getInstance().getNumProcesses() == 1)
3992 return;
3993
3994 #ifdef _IQTREE_MPI
3995 // gather trees to Master
3996
3997 Checkpoint *ckp = new Checkpoint;
3998
3999 if (MPIHelper::getInstance().isMaster()) {
4000 // update candidate set at master
4001 int trees = 0;
4002 for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
4003 int worker = MPIHelper::getInstance().recvCheckpoint(ckp);
4004 CandidateSet cset;
4005 cset.setCheckpoint(ckp);
4006 cset.restoreCheckpoint();
4007 for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4008 addTreeToCandidateSet(it->second.tree, it->second.score, updateStopRule, worker);
4009 trees += ckp->size();
4010 ckp->clear();
4011 }
4012 cout << "Master: " << trees << " candidate trees gathered from workers" << endl;
4013 // get the best candidate trees
4014 int numTrees = max(nTrees, MPIHelper::getInstance().getNumProcesses());
4015 CandidateSet bestCandidates = candidateTrees.getBestCandidateTrees(numTrees);
4016 int saved_numNNITrees = params->numNNITrees;
4017 params->numNNITrees = numTrees;
4018 bestCandidates.setCheckpoint(ckp);
4019 bestCandidates.saveCheckpoint();
4020 params->numNNITrees = saved_numNNITrees;
4021 } else {
4022 // send candidate set to master
4023 CandidateSet cset = candidateTrees.getBestCandidateTrees();
4024 cset.setCheckpoint(ckp);
4025 cset.saveCheckpoint();
4026 MPIHelper::getInstance().sendCheckpoint(ckp, PROC_MASTER);
4027 cout << "Worker " << MPIHelper::getInstance().getProcessID() << ": " << ckp->size() << " candidate trees sent to master" << endl;
4028 ckp->clear();
4029 }
4030
4031 if (updateStopRule && stop_rule.meetStopCondition(stop_rule.getCurIt(), 0.0)) {
4032 // 2020-04-30: send stop signal
4033 ckp->putBool("stop", true);
4034 }
4035
4036 // broadcast candidate trees from master to worker
4037 MPIHelper::getInstance().broadcastCheckpoint(ckp);
4038 cout << ckp->size() << " trees broadcasted to workers" << endl;
4039
4040 if (MPIHelper::getInstance().isWorker()) {
4041 // update candidate set at worker
4042 CandidateSet cset;
4043 cset.setCheckpoint(ckp);
4044 cset.restoreCheckpoint();
4045 for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4046 addTreeToCandidateSet(it->second.tree, it->second.score, false, PROC_MASTER);
4047
4048 // 2020-04-40: check stop signal
4049 if (ckp->getBool("stop")) {
4050 cout << "Worker " << MPIHelper::getInstance().getProcessID() << " gets STOP message!" << endl;
4051 stop_rule.shouldStop();
4052 }
4053 }
4054
4055 delete ckp;
4056 #endif
4057 }
4058
syncCurrentTree()4059 void IQTree::syncCurrentTree() {
4060 if (MPIHelper::getInstance().getNumProcesses() == 1)
4061 return;
4062 #ifdef _IQTREE_MPI
4063 //------ BLOCKING COMMUNICATION ------//
4064 Checkpoint *checkpoint = new Checkpoint;
4065 string tree;
4066 double score;
4067
4068 if (MPIHelper::getInstance().isMaster()) {
4069 // master: receive tree from WORKERS
4070 int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
4071 MPIHelper::getInstance().increaseTreeReceived();
4072 CKP_RESTORE(tree);
4073 CKP_RESTORE(score);
4074 int pos = addTreeToCandidateSet(tree, score, true, worker);
4075 if (pos >= 0 && pos < params->popSize) {
4076 // candidate set is changed, update for other workers
4077 for (int w = 0; w < candidateset_changed.size(); w++)
4078 if (w != worker)
4079 candidateset_changed[w] = true;
4080 }
4081
4082 if (boot_samples.size() > 0) {
4083 restoreUFBoot(checkpoint);
4084 }
4085
4086 // send candidate trees to worker
4087 checkpoint->clear();
4088 if (boot_samples.size() > 0)
4089 CKP_SAVE(logl_cutoff);
4090 if (candidateset_changed[worker]) {
4091 CandidateSet cset = candidateTrees.getBestCandidateTrees(Params::getInstance().popSize);
4092 cset.setCheckpoint(checkpoint);
4093 cset.saveCheckpoint();
4094 candidateset_changed[worker] = false;
4095 MPIHelper::getInstance().increaseTreeSent(Params::getInstance().popSize);
4096 }
4097 MPIHelper::getInstance().sendCheckpoint(checkpoint, worker);
4098 } else {
4099 // worker: always send tree to MASTER
4100 tree = getTreeString();
4101 score = curScore;
4102 CKP_SAVE(tree);
4103 CKP_SAVE(score);
4104 if (boot_samples.size() > 0) {
4105 saveUFBoot(checkpoint);
4106 }
4107 MPIHelper::getInstance().sendCheckpoint(checkpoint, PROC_MASTER);
4108 MPIHelper::getInstance().increaseTreeSent();
4109
4110 // now receive the candidate set
4111 MPIHelper::getInstance().recvCheckpoint(checkpoint, PROC_MASTER);
4112 if (checkpoint->getBool("stop")) {
4113 cout << "Worker " << MPIHelper::getInstance().getProcessID() << " gets STOP message!" << endl;
4114 stop_rule.shouldStop();
4115 } else {
4116 CandidateSet cset;
4117 cset.setCheckpoint(checkpoint);
4118 cset.restoreCheckpoint();
4119 for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4120 addTreeToCandidateSet(it->second.tree, it->second.score, false, MPIHelper::getInstance().getProcessID());
4121 MPIHelper::getInstance().increaseTreeReceived(cset.size());
4122 if (boot_samples.size() > 0)
4123 CKP_RESTORE(logl_cutoff);
4124 }
4125 }
4126
4127 delete checkpoint;
4128
4129 #endif
4130 }
4131
sendStopMessage()4132 void IQTree::sendStopMessage() {
4133 if (MPIHelper::getInstance().getNumProcesses() == 1)
4134 return;
4135 #ifdef _IQTREE_MPI
4136
4137 Checkpoint *checkpoint = new Checkpoint;
4138 checkpoint->putBool("stop", true);
4139 stringstream ss;
4140 checkpoint->dump(ss);
4141 string str = ss.str();
4142 string tree;
4143 double score;
4144
4145 cout << "Sending STOP message to workers" << endl;
4146
4147 // send STOP message to all processes
4148 if (MPIHelper::getInstance().isMaster()) {
4149 // repeatedly send stop message to all workers
4150 for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
4151 // string buf;
4152 // int worker = MPIHelper::getInstance().recvString(buf);
4153 checkpoint->clear();
4154 int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
4155 MPIHelper::getInstance().increaseTreeReceived();
4156 CKP_RESTORE(tree);
4157 CKP_RESTORE(score);
4158 addTreeToCandidateSet(tree, score, true, worker);
4159 MPIHelper::getInstance().sendString(str, worker, TREE_TAG);
4160 }
4161 }
4162
4163 delete checkpoint;
4164
4165 MPI_Barrier(MPI_COMM_WORLD);
4166 #endif
4167 }
4168
warnNumThreads()4169 void PhyloTree::warnNumThreads() {
4170 if (num_threads <= 1)
4171 return;
4172 // return if -nt AUTO
4173 if (params->num_threads == 0)
4174 return;
4175 size_t nptn = getAlnNPattern();
4176 if (nptn < num_threads*vector_size)
4177 outError("Too many threads for short alignments, please reduce number of threads or use -T AUTO to determine it.");
4178 if (nptn < num_threads*400/aln->num_states)
4179 outWarning("Number of threads seems too high for short alignments. Use -T AUTO to determine best number of threads.");
4180 }
4181
testNumThreads()4182 int PhyloTree::testNumThreads() {
4183 #ifndef _OPENMP
4184 return 1;
4185 #else
4186 int max_procs = min(countPhysicalCPUCores()/MPIHelper::getInstance().countSameHost(), params->num_threads_max);
4187 cout << "Measuring multi-threading efficiency up to " << max_procs << " CPU cores" << endl;
4188 DoubleVector runTimes;
4189 int bestProc = 0;
4190 double saved_curScore = curScore;
4191 int num_iter = 1;
4192
4193 // generate different trees
4194 int tree;
4195 double min_time = max_procs; // minimum time in seconds
4196 StrVector trees;
4197 trees.push_back(getTreeString());
4198 setLikelihoodKernel(sse);
4199
4200 for (int proc = 1; proc <= max_procs; proc++) {
4201
4202 omp_set_num_threads(proc);
4203 setNumThreads(proc);
4204 initializeAllPartialLh();
4205
4206 double beginTime = getRealTime();
4207 double runTime, logl;
4208
4209 for (tree = 0; tree < trees.size(); tree++) {
4210 readTreeString(trees[tree]);
4211 logl = optimizeAllBranches(num_iter);
4212 runTime = getRealTime() - beginTime;
4213
4214 // too fast, increase number of iterations
4215 if (runTime*10 < min_time && proc == 1 && tree == 0) {
4216 int new_num_iter = 10;
4217 cout << "Increase to " << new_num_iter << " rounds for branch lengths" << endl;
4218 logl = optimizeAllBranches(new_num_iter - num_iter);
4219 num_iter = new_num_iter;
4220 runTime = getRealTime() - beginTime;
4221 }
4222
4223 // considering at least 2 trees
4224 if ((runTime < min_time && proc == 1) || trees.size() == 1) {
4225 // time not reached, add more tree
4226 // readTreeString(trees[0]);
4227 // doRandomNNIs();
4228 generateRandomTree(YULE_HARDING);
4229 wrapperFixNegativeBranch(true);
4230 trees.push_back(getTreeString());
4231 }
4232 curScore = saved_curScore;
4233 }
4234
4235 if (proc == 1)
4236 cout << trees.size() << " trees examined" << endl;
4237
4238 deleteAllPartialLh();
4239
4240 runTimes.push_back(runTime);
4241 double speedup = runTimes[0] / runTime;
4242
4243 cout << "Threads: " << proc << " / Time: " << runTime << " sec / Speedup: " << speedup
4244 << " / Efficiency: " << (int)round(speedup*100/proc) << "% / LogL: " << (int)logl << endl;
4245
4246 // break if too bad efficiency ( < 50%) or worse than than 10% of the best run time
4247 if (speedup*2 <= proc || (runTime > runTimes[bestProc]*1.1 && proc>1))
4248 break;
4249
4250 // update best threads if sufficient
4251 if (runTime <= runTimes[bestProc]*0.95)
4252 bestProc = proc-1;
4253
4254 }
4255
4256 readTreeString(trees[0]);
4257
4258 cout << "BEST NUMBER OF THREADS: " << bestProc+1 << endl << endl;
4259 setNumThreads(bestProc+1);
4260
4261 return bestProc+1;
4262 #endif
4263 }
4264