1 /***************************************************************************
2  *   Copyright (C) 2009-2015 by                                            *
3  *   BUI Quang Minh <minh.bui@univie.ac.at>                                *
4  *   Lam-Tung Nguyen <nltung@gmail.com>                                    *
5  *                                                                         *
6  *                                                                         *
7  *   This program is free software; you can redistribute it and/or modify  *
8  *   it under the terms of the GNU General Public License as published by  *
9  *   the Free Software Foundation; either version 2 of the License, or     *
10  *   (at your option) any later version.                                   *
11  *                                                                         *
12  *   This program is distributed in the hope that it will be useful,       *
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15  *   GNU General Public License for more details.                          *
16  *                                                                         *
17  *   You should have received a copy of the GNU General Public License     *
18  *   along with this program; if not, write to the                         *
19  *   Free Software Foundation, Inc.,                                       *
20  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
21  ***************************************************************************/
22 #include "iqtree.h"
23 #include "phylotreemixlen.h"
24 #include "phylosupertree.h"
25 #include "phylosupertreeplen.h"
26 #include "model/partitionmodelplen.h"
27 #include "mexttree.h"
28 #include "utils/timeutil.h"
29 #include "model/modelmarkov.h"
30 #include "model/rategamma.h"
31 //#include "phylotreemixlen.h"
32 //#include "model/modelfactorymixlen.h"
33 #include <numeric>
34 #include "utils/tools.h"
35 #include "utils/MPIHelper.h"
36 #include "utils/pllnni.h"
37 
38 Params *globalParams;
39 Alignment *globalAlignment;
40 extern StringIntMap pllTreeCounter;
41 
IQTree()42 IQTree::IQTree() : PhyloTree() {
43     IQTree::init();
44 }
45 
init()46 void IQTree::init() {
47 //    PhyloTree::init();
48     k_represent = 0;
49     k_delete = k_delete_min = k_delete_max = k_delete_stay = 0;
50     dist_matrix = NULL;
51     var_matrix = NULL;
52 //    curScore = 0.0; // Current score of the tree
53     cur_pars_score = -1;
54 //    enable_parsimony = false;
55     estimate_nni_cutoff = false;
56     nni_cutoff = -1e6;
57     nni_sort = false;
58     testNNI = false;
59 //    print_tree_lh = false;
60 //    write_intermediate_trees = 0;
61 //    max_candidate_trees = 0;
62     logl_cutoff = 0.0;
63     len_scale = 10000;
64 //    save_all_br_lens = false;
65     duplication_counter = 0;
66     //boot_splits = new SplitGraph;
67     pll2iqtree_pattern_index = NULL;
68 
69     treels_name = Params::getInstance().out_prefix;
70     treels_name += ".treels";
71     out_lh_file = Params::getInstance().out_prefix;
72     out_lh_file += ".treelh";
73     site_lh_file = Params::getInstance().out_prefix;
74     site_lh_file += ".sitelh";
75 
76     if (Params::getInstance().print_tree_lh) {
77         out_treelh.open(out_lh_file.c_str());
78         out_sitelh.open(site_lh_file.c_str());
79     }
80 
81     if (Params::getInstance().write_intermediate_trees)
82         out_treels.open(treels_name.c_str());
83     on_refine_btree = false;
84     contree_rfdist = -1;
85     boot_consense_logl = 0.0;
86 
87 }
88 
IQTree(Alignment * aln)89 IQTree::IQTree(Alignment *aln) : PhyloTree(aln) {
90     IQTree::init();
91 }
92 
setCheckpoint(Checkpoint * checkpoint)93 void IQTree::setCheckpoint(Checkpoint *checkpoint) {
94     PhyloTree::setCheckpoint(checkpoint);
95     stop_rule.setCheckpoint(checkpoint);
96     candidateTrees.setCheckpoint(checkpoint);
97     for (auto it = boot_splits.begin(); it != boot_splits.end(); it++)
98         (*it)->setCheckpoint(checkpoint);
99 }
100 
saveUFBoot(Checkpoint * checkpoint)101 void IQTree::saveUFBoot(Checkpoint *checkpoint) {
102     checkpoint->startStruct("UFBoot");
103     if (MPIHelper::getInstance().isWorker()) {
104         CKP_SAVE(sample_start);
105         CKP_SAVE(sample_end);
106         checkpoint->startList(boot_samples.size());
107         checkpoint->setListElement(sample_start-1);
108         for (int id = sample_start; id != sample_end; id++) {
109             checkpoint->addListElement();
110             stringstream ss;
111             ss.precision(10);
112             ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
113             checkpoint->put("", ss.str());
114         }
115         checkpoint->endList();
116     } else {
117         CKP_SAVE(logl_cutoff);
118         int boot_splits_size = boot_splits.size();
119         CKP_SAVE(boot_splits_size);
120         checkpoint->startList(boot_samples.size());
121         for (int id = 0; id != boot_samples.size(); id++) {
122             checkpoint->addListElement();
123             stringstream ss;
124             ss.precision(10);
125             ss << boot_counts[id] << " " << boot_logl[id] << " " << boot_orig_logl[id] << " " << boot_trees[id];
126             checkpoint->put("", ss.str());
127         }
128         checkpoint->endList();
129     }
130     checkpoint->endStruct();
131 }
132 
saveCheckpoint()133 void IQTree::saveCheckpoint() {
134     stop_rule.saveCheckpoint();
135     candidateTrees.saveCheckpoint();
136 
137     if (boot_samples.size() > 0 && !boot_trees.front().empty()) {
138         saveUFBoot(checkpoint);
139         // boot_splits
140         int id = 0;
141         for (vector<SplitGraph*>::iterator sit = boot_splits.begin(); sit != boot_splits.end(); sit++, id++) {
142             checkpoint->startStruct("UFBootSplit" + convertIntToString(id));
143             (*sit)->saveCheckpoint();
144             checkpoint->endStruct();
145         }
146     }
147 
148     PhyloTree::saveCheckpoint();
149     CKP_SAVE(boot_consense_logl);
150     CKP_SAVE(contree_rfdist);
151 }
152 
restoreUFBoot(Checkpoint * checkpoint)153 void IQTree::restoreUFBoot(Checkpoint *checkpoint) {
154     checkpoint->startStruct("UFBoot");
155     // save boot_samples and boot_trees
156     int id;
157     checkpoint->startList(params->gbo_replicates);
158     int sample_start, sample_end;
159     CKP_RESTORE(sample_start);
160     CKP_RESTORE(sample_end);
161     checkpoint->setListElement(sample_start-1);
162     for (id = sample_start; id != sample_end; id++) {
163         checkpoint->addListElement();
164         string str;
165         checkpoint->getString("", str);
166         ASSERT(!str.empty());
167         stringstream ss(str);
168         ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
169     }
170     checkpoint->endList();
171     checkpoint->endStruct();
172 }
173 
restoreCheckpoint()174 void IQTree::restoreCheckpoint() {
175     PhyloTree::restoreCheckpoint();
176     stop_rule.restoreCheckpoint();
177     candidateTrees.restoreCheckpoint();
178 
179     if (params->gbo_replicates > 0 && checkpoint->hasKeyPrefix("UFBoot")) {
180         checkpoint->startStruct("UFBoot");
181 //        CKP_RESTORE(max_candidate_trees);
182         CKP_RESTORE(logl_cutoff);
183         // save boot_samples and boot_trees
184         int id = 0;
185         checkpoint->startList(params->gbo_replicates);
186         boot_trees.resize(params->gbo_replicates);
187         boot_logl.resize(params->gbo_replicates);
188         boot_orig_logl.resize(params->gbo_replicates);
189         boot_counts.resize(params->gbo_replicates);
190         for (id = 0; id < params->gbo_replicates; id++) {
191             checkpoint->addListElement();
192             string str;
193             checkpoint->getString("", str);
194             stringstream ss(str);
195             ss >> boot_counts[id] >> boot_logl[id] >> boot_orig_logl[id] >> boot_trees[id];
196         }
197         checkpoint->endList();
198         int boot_splits_size = 0;
199         CKP_RESTORE(boot_splits_size);
200         checkpoint->endStruct();
201 
202         // boot_splits
203         for (id = 0; id < boot_splits_size; id++) {
204             checkpoint->startStruct("UFBootSplit" + convertIntToString(id));
205             SplitGraph *sg = new SplitGraph;
206             sg->createBlocks();
207             StrVector taxname;
208             getTaxaName(taxname);
209             for (auto its = taxname.begin(); its != taxname.end(); its++)
210                 sg->getTaxa()->AddTaxonLabel(NxsString(its->c_str()));
211             sg->setCheckpoint(checkpoint);
212             sg->restoreCheckpoint();
213             boot_splits.push_back(sg);
214             checkpoint->endStruct();
215         }
216     }
217 
218     CKP_RESTORE(boot_consense_logl);
219     CKP_RESTORE(contree_rfdist);
220 }
221 
initSettings(Params & params)222 void IQTree::initSettings(Params &params) {
223 
224     searchinfo.nni_type = params.nni_type;
225     optimize_by_newton = params.optimize_by_newton;
226     setLikelihoodKernel(params.SSE);
227     if (num_threads > 0)
228         setNumThreads(num_threads);
229     else
230         setNumThreads(params.num_threads);
231     candidateTrees.init(this->aln, 200);
232     intermediateTrees.init(this->aln, 200000);
233 
234     if (params.min_iterations == -1) {
235         if (!params.gbo_replicates) {
236             if (params.stop_condition == SC_UNSUCCESS_ITERATION) {
237                 params.min_iterations = aln->getNSeq() * 100;
238             } else if (aln->getNSeq() < 100) {
239                 params.min_iterations = 200;
240             } else {
241                 params.min_iterations = aln->getNSeq() * 2;
242             }
243             if (params.iteration_multiple > 1)
244                 params.min_iterations = aln->getNSeq() * params.iteration_multiple;
245         } else {
246             params.min_iterations = 100;
247         }
248     }
249     if (!params.treeset_file.empty() && params.min_iterations == -1) {
250         params.min_iterations = 1;
251         params.stop_condition = SC_FIXED_ITERATION;
252         params.numInitTrees = 1;
253     }
254     if (params.gbo_replicates)
255         params.max_iterations = max(params.max_iterations, max(params.min_iterations, params.step_iterations));
256 
257     k_represent = params.k_representative;
258 
259     if (params.p_delete == -1.0) {
260         if (aln->getNSeq() < 4)
261             params.p_delete = 0.0; // delete nothing
262         else if (aln->getNSeq() == 4)
263             params.p_delete = 0.25; // just delete 1 leaf
264         else if (aln->getNSeq() == 5)
265             params.p_delete = 0.4; // just delete 2 leaves
266         else if (aln->getNSeq() < 51)
267             params.p_delete = 0.5;
268         else if (aln->getNSeq() < 100)
269             params.p_delete = 0.3;
270         else if (aln->getNSeq() < 200)
271             params.p_delete = 0.2;
272         else if (aln->getNSeq() < 400)
273             params.p_delete = 0.1;
274         else
275             params.p_delete = 0.05;
276     }
277     //tree.setProbDelete(params.p_delete);
278     if (params.p_delete != -1.0) {
279         k_delete = k_delete_min = k_delete_max = ceil(params.p_delete * leafNum);
280     } else {
281         k_delete = k_delete_min = 10;
282         k_delete_max = leafNum / 2;
283         if (k_delete_max > 100)
284             k_delete_max = 100;
285         if (k_delete_max < 20)
286             k_delete_max = 20;
287         k_delete_stay = ceil(leafNum / k_delete);
288     }
289 
290     //tree.setIQPIterations(params.stop_condition, params.stop_confidence, params.min_iterations, params.max_iterations);
291 
292     stop_rule.initialize(params);
293 
294     //tree.setIQPAssessQuartet(params.iqp_assess_quartet);
295     iqp_assess_quartet = params.iqp_assess_quartet;
296     estimate_nni_cutoff = params.estimate_nni_cutoff;
297     nni_cutoff = params.nni_cutoff;
298     nni_sort = params.nni_sort;
299     testNNI = params.testNNI;
300 
301     globalParams = &params;
302     globalAlignment = aln;
303 
304     //write_intermediate_trees = params.write_intermediate_trees;
305 
306     if (Params::getInstance().write_intermediate_trees > 2 || params.gbo_replicates > 0) {
307         save_all_trees = 1;
308     }
309     if (params.gbo_replicates > 0) {
310         if (params.iqp_assess_quartet != IQP_BOOTSTRAP) {
311             save_all_trees = 2;
312         }
313     }
314 //    if (params.gbo_replicates > 0 && params.do_compression)
315 //        save_all_br_lens = true;
316 //    print_tree_lh = params.print_tree_lh;
317 //    max_candidate_trees = params.max_candidate_trees;
318 //    if (max_candidate_trees == 0)
319 //        max_candidate_trees = aln->getNSeq() * params.step_iterations;
320     setRootNode(params.root);
321 
322     size_t i;
323 
324     if (params.online_bootstrap && params.gbo_replicates > 0 && !isSuperTreeUnlinked()) {
325         if (aln->getNSeq() < 4)
326             outError("It makes no sense to perform bootstrap with less than 4 sequences.");
327 
328         string bootaln_name = params.out_prefix;
329         bootaln_name += ".bootaln";
330         if (params.print_bootaln) {
331             ofstream bootalnout;
332             bootalnout.open(bootaln_name.c_str());
333             bootalnout.close();
334         }
335 
336         // 2015-12-17: initialize random stream for creating bootstrap samples
337         // mainly so that checkpointing does not need to save bootstrap samples
338         int *saved_randstream = randstream;
339         init_random(params.ran_seed);
340 
341 //        cout << "Generating " << params.gbo_replicates << " samples for ultrafast "
342 //             << RESAMPLE_NAME << " (seed: " << params.ran_seed << ")..." << endl;
343         // allocate memory for boot_samples
344         boot_samples.resize(params.gbo_replicates);
345         sample_start = 0;
346         sample_end = boot_samples.size();
347 
348         // compute the sample_start and sample_end
349         if (MPIHelper::getInstance().getNumProcesses() > 1) {
350             int num_samples = boot_samples.size() / MPIHelper::getInstance().getNumProcesses();
351             if (boot_samples.size() % MPIHelper::getInstance().getNumProcesses() != 0)
352                 num_samples++;
353             sample_start = MPIHelper::getInstance().getProcessID() * num_samples;
354             sample_end = sample_start + num_samples;
355             if (sample_end > boot_samples.size())
356                 sample_end = boot_samples.size();
357         }
358 
359         size_t orig_nptn = getAlnNPattern();
360 #ifdef BOOT_VAL_FLOAT
361         size_t nptn = get_safe_upper_limit_float(orig_nptn);
362 #else
363         size_t nptn = get_safe_upper_limit(orig_nptn);
364 #endif
365         BootValType *mem = aligned_alloc<BootValType>(nptn * (size_t)(params.gbo_replicates));
366         memset(mem, 0, nptn * (size_t)(params.gbo_replicates) * sizeof(BootValType));
367         for (i = 0; i < params.gbo_replicates; i++)
368             boot_samples[i] = mem + i*nptn;
369 
370         if (boot_trees.empty()) {
371             boot_logl.resize(params.gbo_replicates, -DBL_MAX);
372             boot_orig_logl.resize(params.gbo_replicates, -DBL_MAX);
373             boot_trees.resize(params.gbo_replicates, "");
374             boot_counts.resize(params.gbo_replicates, 0);
375         } else {
376             cout << "CHECKPOINT: " << boot_trees.size() << " UFBoot trees and " << boot_splits.size() << " UFBootSplits restored" << endl;
377         }
378         VerboseMode saved_mode = verbose_mode;
379         verbose_mode = VB_QUIET;
380         for (i = 0; i < params.gbo_replicates; i++) {
381             if (params.print_bootaln) {
382                 Alignment* bootstrap_alignment;
383                 if (aln->isSuperAlignment())
384                     bootstrap_alignment = new SuperAlignment;
385                 else
386                     bootstrap_alignment = new Alignment;
387                 IntVector this_sample;
388                 bootstrap_alignment->createBootstrapAlignment(aln, &this_sample, params.bootstrap_spec);
389                 for (size_t j = 0; j < orig_nptn; j++)
390                     boot_samples[i][j] = this_sample[j];
391                 bootstrap_alignment->printAlignment(params.aln_output_format, bootaln_name.c_str(), true);
392                 delete bootstrap_alignment;
393             } else {
394                 IntVector this_sample;
395                 aln->createBootstrapAlignment(this_sample, params.bootstrap_spec);
396                 for (size_t j = 0; j < orig_nptn; j++)
397                     boot_samples[i][j] = this_sample[j];
398             }
399         }
400         verbose_mode = saved_mode;
401         if (params.print_bootaln) {
402             cout << "Bootstrap alignments printed to " << bootaln_name << endl;
403         }
404 
405 //        cout << "Max candidate trees (tau): " << max_candidate_trees << endl;
406 
407         // restore randstream
408         finish_random();
409         randstream = saved_randstream;
410 
411         // Diep: initialize data members to be used in the Refinement Step
412         on_refine_btree = false;
413         saved_aln_on_refine_btree = NULL;
414         if(params.ufboot2corr){
415             boot_samples_int.resize(params.gbo_replicates);
416             for (size_t i = 0; i < params.gbo_replicates; i++) {
417                 boot_samples_int[i].resize(nptn, 0);
418                 for (size_t j = 0; j < orig_nptn; j++)
419                     boot_samples_int[i][j] = boot_samples[i][j];
420                }
421         }
422 
423     }
424 
425     if (params.root_state) {
426         if (strlen(params.root_state) != 1)
427             outError("Root state must have exactly 1 character");
428         root_state = aln->convertState(params.root_state[0]);
429         if (root_state < 0 || root_state >= aln->num_states)
430             outError("Invalid root state");
431     }
432 }
433 
~IQTree()434 IQTree::~IQTree() {
435     //if (bonus_values)
436     //delete bonus_values;
437     //bonus_values = NULL;
438 
439     for (vector<SplitGraph*>::reverse_iterator it2 = boot_splits.rbegin(); it2 != boot_splits.rend(); it2++)
440         delete (*it2);
441     boot_splits.clear();
442     //if (boot_splits) delete boot_splits;
443 
444     if (!boot_samples.empty()) {
445         aligned_free(boot_samples[0]); // free memory
446         boot_samples.clear();
447     }
448 }
449 
450 extern const char *aa_model_names_rax[];
451 
createPLLPartition(Params & params,ostream & pllPartitionFileHandle)452 void IQTree::createPLLPartition(Params &params, ostream &pllPartitionFileHandle) {
453     if (isSuperTree()) {
454         PhyloSuperTree *siqtree = (PhyloSuperTree*) this;
455         // additional check for PLL hard limit
456         if (params.pll) {
457             if (siqtree->size() > PLL_NUM_BRANCHES)
458                 outError("Number of partitions exceeds PLL limit, please increase PLL_NUM_BRANCHES constant in pll.h");
459             int i = 0;
460             int startPos = 1;
461 
462             // prepare proper partition file
463             for (PhyloSuperTree::iterator it = siqtree->begin(); it != siqtree->end(); it++) {
464                 i++;
465                 int curLen = ((*it)->aln->seq_type == SEQ_CODON) ? (*it)->getAlnNSite()*3 : (*it)->getAlnNSite();
466                 if ((*it)->aln->seq_type == SEQ_DNA || (*it)->aln->seq_type == SEQ_CODON) {
467                     pllPartitionFileHandle << "DNA";
468                 } else if ((*it)->aln->seq_type == SEQ_PROTEIN) {
469                     if ((*it)->aln->model_name != "" && (*it)->aln->model_name.substr(0, 4) != "TEST" && (*it)->aln->model_name.substr(0, 2) != "MF") {
470                         string modelStr = (*it)->aln->model_name.
471                                 substr(0, (*it)->aln->model_name.find_first_of("+{"));
472                         if (modelStr == "LG4")
473                             modelStr = "LG4M";
474                         bool name_ok = false;
475                         for (int j = 0; j < 18; j++)
476                             if (modelStr == aa_model_names_rax[j]) {
477                                 name_ok = true;
478                                 break;
479                             }
480                         if (name_ok)
481                             pllPartitionFileHandle << modelStr;
482                         else
483                             pllPartitionFileHandle << "WAG";
484                     } else {
485                         pllPartitionFileHandle << "WAG";
486                     }
487                 } else
488                     outError("PLL only works with DNA/protein alignments");
489                 pllPartitionFileHandle << ", p" << i << " = " << startPos << "-" << startPos + curLen - 1 << endl;
490                 startPos = startPos + curLen;
491             }
492         } else {
493             // only prepare partition file for computing parsimony trees
494             SeqType datatype[] = {SEQ_DNA, SEQ_CODON, SEQ_PROTEIN};
495             PhyloSuperTree::iterator it;
496 
497             for (int i = 0; i < sizeof(datatype)/sizeof(SeqType); i++) {
498                 bool first = true;
499                 int startPos = 1;
500                 for (it = siqtree->begin(); it != siqtree->end(); it++)
501                     if ((*it)->aln->seq_type == datatype[i]) {
502                         if (first) {
503                         if (datatype[i] == SEQ_DNA || datatype[i] == SEQ_CODON)
504                             pllPartitionFileHandle << "DNA";
505                         else
506                             pllPartitionFileHandle << "WAG";
507                         }
508                         int curLen = (datatype[i] == SEQ_CODON) ? (*it)->getAlnNSite()*3 : (*it)->getAlnNSite();
509                         if (first)
510                             pllPartitionFileHandle << ", p" << i << " = ";
511                         else
512                             pllPartitionFileHandle << ", ";
513 
514                         pllPartitionFileHandle << startPos << "-" << startPos + curLen - 1;
515                         startPos = startPos + curLen;
516                         first = false;
517                     } else {
518                         startPos = startPos + (*it)->getAlnNSite();
519                     }
520                 if (!first) pllPartitionFileHandle << endl;
521             }
522         }
523     } else {
524         /* create a partition file */
525         string model;
526         if (aln->seq_type == SEQ_DNA || aln->seq_type == SEQ_CODON) {
527             model = "DNA";
528         } else if (aln->seq_type == SEQ_PROTEIN) {
529             if (params.pll && params.model_name != "" && params.model_name.substr(0, 4) != "TEST" && params.model_name.substr(0, 2) != "MF") {
530                 model = params.model_name.substr(0, params.model_name.find_first_of("+{"));
531             } else {
532                 model = "WAG";
533             }
534         } else {
535             model = "WAG";
536             //outError("PLL currently only supports DNA/protein alignments");
537         }
538         int nsite = (aln->seq_type == SEQ_CODON) ? getAlnNSite()*3 : getAlnNSite();
539         pllPartitionFileHandle << model << ", p1 = " << "1-" << nsite << endl;
540     }
541 }
542 
computeInitialTree(LikelihoodKernel kernel)543 void IQTree::computeInitialTree(LikelihoodKernel kernel) {
544     double start = getRealTime();
545     string initTree;
546     string out_file = params->out_prefix;
547     int score;
548     if (params->stop_condition == SC_FIXED_ITERATION && params->numNNITrees > params->min_iterations)
549         params->numNNITrees = max(params->min_iterations, 1);
550     int fixed_number = 0;
551 
552     if (params->sankoff_cost_file && !cost_matrix)
553         loadCostMatrixFile(params->sankoff_cost_file);
554     if (aln->ordered_pattern.empty())
555         aln->orderPatternByNumChars(PAT_VARIANT);
556 
557     setParsimonyKernel(kernel);
558 
559     if (params->user_file) {
560         // start the search with user-defined tree
561         cout << "Reading input tree file " << params->user_file << " ...";
562         bool myrooted = params->is_rooted;
563         readTree(params->user_file, myrooted);
564         if (myrooted && !isSuperTreeUnlinked()) {
565             cout << " rooted tree";
566         }
567         cout << endl;
568         setAlignment(aln);
569         if (isSuperTree())
570             wrapperFixNegativeBranch(params->fixed_branch_length == BRLEN_OPTIMIZE &&
571                                      params->partition_type == BRLEN_OPTIMIZE);
572         else
573             fixed_number = wrapperFixNegativeBranch(false);
574         params->numInitTrees = 1;
575         params->numNNITrees = 1;
576         if (params->pll)
577             pllReadNewick(getTreeString());
578         initTree = getTreeString();
579         CKP_SAVE(initTree);
580         saveCheckpoint();
581     } else if (CKP_RESTORE(initTree)) {
582         readTreeString(initTree);
583         cout << endl << "CHECKPOINT: Initial tree restored" << endl;
584     } else {
585         START_TREE_TYPE start_tree = params->start_tree;
586         // only own parsimony kernel supports constraint tree
587         if (!constraintTree.empty())
588             start_tree = STT_PARSIMONY;
589         switch (start_tree) {
590         case STT_PARSIMONY:
591             //initCostMatrix(CM_UNIFORM);
592             // Create parsimony tree using IQ-Tree kernel
593             cout << "Creating fast initial parsimony tree by random order stepwise addition..." << endl;
594 //            aln->orderPatternByNumChars();
595             start = getRealTime();
596             score = computeParsimonyTree(params->out_prefix, aln, randstream);
597             cout << getRealTime() - start << " seconds, parsimony score: " << score
598                 << " (based on " << aln->num_parsimony_sites << " sites)"<< endl;
599             // already fixed branch length
600             //wrapperFixNegativeBranch(false);
601 
602             break;
603         case STT_RANDOM_TREE:
604         case STT_PLL_PARSIMONY:
605             cout << endl;
606             cout << "Create initial parsimony tree by phylogenetic likelihood library (PLL)... ";
607             pllInst->randomNumberSeed = params->ran_seed + MPIHelper::getInstance().getProcessID();
608             pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
609             resetBranches(pllInst);
610             pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back,
611                     PLL_FALSE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
612             PhyloTree::readTreeStringSeqName(string(pllInst->tree_string));
613             cout << getRealTime() - start << " seconds" << endl;
614             wrapperFixNegativeBranch(true);
615             break;
616         case STT_BIONJ:
617             // This is the old default option: using BIONJ as starting tree
618             computeBioNJ(*params, aln, dist_file);
619             cout << getRealTime() - start << " seconds" << endl;
620             params->numInitTrees = 1;
621             if (isSuperTree())
622                 wrapperFixNegativeBranch(true);
623             else
624                 fixed_number = wrapperFixNegativeBranch(false);
625             break;
626         case STT_USER_TREE:
627             ASSERT(0 && "User tree should be handled already");
628             break;
629         }
630         initTree = getTreeString();
631         CKP_SAVE(initTree);
632         saveCheckpoint();
633         checkpoint->dump(true);
634     }
635 
636     if (!constraintTree.isCompatible(this))
637         outError("Initial tree is not compatible with constraint tree");
638 
639     if (fixed_number) {
640         cout << "WARNING: " << fixed_number << " undefined/negative branch lengths are initialized with parsimony" << endl;
641     }
642 
643     if (params->root) {
644         StrVector outgroup_names;
645         convert_string_vec(params->root, outgroup_names);
646         for (auto it = outgroup_names.begin(); it != outgroup_names.end(); it++)
647             if (aln->getSeqID(*it) < 0)
648                 outError("Specified outgroup taxon " + *it + " not found");
649     }
650 
651     if (params->write_init_tree) {
652         out_file += ".init_tree";
653         printTree(out_file.c_str(), WT_NEWLINE);
654 //        printTree(getTreeString().c_str(), WT_NEWLINE);
655     }
656 }
657 
addTreeToCandidateSet(string treeString,double score,bool updateStopRule,int sourceProcID)658 int IQTree::addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID) {
659     double curBestScore = candidateTrees.getBestScore();
660     int pos = candidateTrees.update(treeString, score);
661     if (updateStopRule) {
662         stop_rule.setCurIt(stop_rule.getCurIt() + 1);
663         if (score > curBestScore) {
664             if (pos != -1) {
665                 stop_rule.addImprovedIteration(stop_rule.getCurIt());
666                 cout << "BETTER TREE FOUND at iteration " << stop_rule.getCurIt() << ": " << score << endl;
667             } else {
668                 cout << "UPDATE BEST LOG-LIKELIHOOD: " << score << endl;
669             }
670             bestcandidate_changed = true;
671             // COMMENT OUT: not safe with MPI version
672 //            printResultTree();
673         }
674 
675         curScore = score;
676         printIterationInfo(sourceProcID);
677     }
678     return pos;
679 }
680 
initCandidateTreeSet(int nParTrees,int nNNITrees)681 void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
682 
683     if (nParTrees > 0) {
684         if (params->start_tree == STT_RANDOM_TREE)
685             cout << "Generating " << nParTrees  << " random trees... ";
686         else
687             cout << "Generating " << nParTrees  << " parsimony trees... ";
688         cout.flush();
689     }
690     double startTime = getRealTime();
691 //    int numDupPars = 0;
692 //    bool orig_rooted = rooted;
693 //    rooted = false;
694     int processID = MPIHelper::getInstance().getProcessID();
695 
696 #ifdef _OPENMP
697     StrVector pars_trees;
698     if (params->start_tree == STT_PARSIMONY && nParTrees >= 1) {
699         pars_trees.resize(nParTrees);
700         #pragma omp parallel
701         {
702             int *rstream;
703             int ran_seed = params->ran_seed + processID * 1000 + omp_get_thread_num();
704             init_random(ran_seed, false, &rstream);
705             PhyloTree tree;
706             if (!constraintTree.empty()) {
707                 tree.constraintTree.readConstraint(constraintTree);
708             }
709             tree.setParams(params);
710             tree.setParsimonyKernel(params->SSE);
711             tree.rooted = rooted;
712             #pragma omp for schedule(dynamic)
713             for (int i = 0; i < nParTrees; i++) {
714                 tree.computeParsimonyTree(NULL, aln, rstream);
715                 pars_trees[i] = tree.getTreeString();
716             }
717             finish_random(rstream);
718         }
719     }
720 #endif
721 
722     int init_size = candidateTrees.size();
723 
724 //    unsigned long curNumTrees = candidateTrees.size();
725     for (int treeNr = 1; treeNr <= nParTrees; treeNr++) {
726         int parRandSeed = Params::getInstance().ran_seed + processID * nParTrees + treeNr;
727         string curParsTree;
728 
729         /********* Create parsimony tree using PLL *********/
730         if (params->start_tree == STT_PLL_PARSIMONY) {
731             pllInst->randomNumberSeed = parRandSeed;
732             pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst, pllPartitions, params->sprDist);
733             resetBranches(pllInst);
734             pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
735                     pllInst->start->back, PLL_FALSE, PLL_TRUE, PLL_FALSE,
736                     PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
737             curParsTree = string(pllInst->tree_string);
738             PhyloTree::readTreeStringSeqName(curParsTree);
739             wrapperFixNegativeBranch(true);
740             curParsTree = getTreeString();
741         } else if (params->start_tree == STT_RANDOM_TREE) {
742             generateRandomTree(YULE_HARDING);
743             wrapperFixNegativeBranch(true);
744             if (rooted) {
745                 rooted = false;
746                 convertToRooted();
747             }
748             curParsTree = getTreeString();
749         } else if (params->start_tree == STT_PARSIMONY) {
750             /********* Create parsimony tree using IQ-TREE *********/
751 #ifdef _OPENMP
752             PhyloTree::readTreeString(pars_trees[treeNr-1]);
753             curParsTree = getTreeString();
754 #else
755             computeParsimonyTree(NULL, aln, randstream);
756             curParsTree = getTreeString();
757 #endif
758         }
759 
760         int pos = addTreeToCandidateSet(curParsTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
761         // if a duplicated tree is generated, then randomize the tree
762         if (pos == -1) {
763             readTreeString(curParsTree);
764             doRandomNNIs();
765 //            generateRandomTree(YULE_HARDING);
766             wrapperFixNegativeBranch(true);
767             string randTree = getTreeString();
768 //            if (isMixlen()) {
769 //                randTree = optimizeBranches(1);
770 //            } else {
771 //                curScore = -DBL_MAX;
772 //            }
773             addTreeToCandidateSet(randTree, -DBL_MAX, false, MPIHelper::getInstance().getProcessID());
774         }
775     }
776 
777     if (nParTrees > 0)
778         cout << getRealTime() - startTime << " second" << endl;
779 
780     /****************************************************************************************
781                           Compute logl of all initial trees
782     *****************************************************************************************/
783 
784     vector<string> initTreeStrings = candidateTrees.getBestTreeStrings();
785     candidateTrees.clear();
786 
787     if (init_size < initTreeStrings.size())
788         cout << "Computing log-likelihood of " << initTreeStrings.size() - init_size << " initial trees ... ";
789     startTime = getRealTime();
790 
791     for (vector<string>::iterator it = initTreeStrings.begin(); it != initTreeStrings.end(); ++it) {
792         string treeString;
793         double score;
794         readTreeString(*it);
795         if (it-initTreeStrings.begin() >= init_size)
796             treeString = optimizeBranches(params->brlen_num_traversal);
797         else {
798             computeLogL();
799             treeString = getTreeString();
800         }
801         score = getCurScore();
802         candidateTrees.update(treeString,score);
803     }
804 
805     if (Params::getInstance().writeDistImdTrees)
806         intermediateTrees.initTrees(candidateTrees);
807 
808     if (init_size < initTreeStrings.size())
809         cout << getRealTime() - startTime << " seconds" << endl;
810 
811     if (nParTrees > 0) {
812         cout << "Current best score: " << candidateTrees.getBestScore() << endl;
813     }
814 
815     //---- BLOCKING COMMUNICATION
816     syncCandidateTrees(nNNITrees, false);
817 
818 
819     vector<string> bestInitTrees; // Set of best initial trees for doing NNIs
820 
821     bestInitTrees = candidateTrees.getBestTreeStringsForProcess(nNNITrees);
822 
823     cout << endl;
824     cout << "Do NNI search on " << bestInitTrees.size() << " best initial trees" << endl;
825     stop_rule.setCurIt(0);
826     if (candidateTrees.size() > Params::getInstance().numSupportTrees)
827         candidateTrees.clear();// TODO why clear here???
828 
829     candidateTrees.setMaxSize(Params::getInstance().numSupportTrees);
830     vector<string>::iterator it;
831 
832     for (it = bestInitTrees.begin(); it != bestInitTrees.end(); it++) {
833         readTreeString(*it);
834 //        optimizeBranches();
835 //        cout << "curScore: " << curScore << "  Tree before NNI: " << getTreeString() << endl;
836         doNNISearch();
837         string treeString = getTreeString();
838         addTreeToCandidateSet(treeString, curScore, true, MPIHelper::getInstance().getProcessID());
839         if (Params::getInstance().writeDistImdTrees)
840             intermediateTrees.update(treeString, curScore);
841     }
842 
843     // TODO turning this
844     if (isMixlen()) {
845         cout << "Optimizing model parameters for top " << min((int)candidateTrees.size(), params->popSize) << " candidate trees... " << endl;
846 
847         startTime = getRealTime();
848         bestInitTrees = candidateTrees.getBestTreeStrings(params->popSize);
849         for (it = bestInitTrees.begin(); it != bestInitTrees.end(); it++) {
850             string tree;
851             readTreeString(*it);
852             //tree = optimizeBranches();
853             tree = optimizeModelParameters();
854 //            cout << "Tree after brlen opt: " << tree << endl;
855             cout << "Tree " << distance(bestInitTrees.begin(), it)+1 << " / LogL: " << getCurScore() << endl;
856             candidateTrees.update(tree, getCurScore());
857         }
858         cout << getRealTime() - startTime << " seconds" << endl;
859     }
860 
861     //---- BLOCKING COMMUNICATION
862     syncCandidateTrees(Params::getInstance().numSupportTrees, true);
863 
864 }
865 
generateParsimonyTree(int randomSeed)866 string IQTree::generateParsimonyTree(int randomSeed) {
867     string parsimonyTreeString;
868     if (params->start_tree == STT_PLL_PARSIMONY) {
869         pllInst->randomNumberSeed = randomSeed;
870         pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInst,
871                                                           pllPartitions, params->sprDist);
872         resetBranches(pllInst);
873         pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
874                         pllInst->start->back, PLL_FALSE, PLL_TRUE, PLL_FALSE,
875                         PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
876         parsimonyTreeString = string(pllInst->tree_string);
877         PhyloTree::readTreeString(parsimonyTreeString);
878         wrapperFixNegativeBranch(true);
879         parsimonyTreeString = getTreeString();
880     } else if (params->start_tree == STT_RANDOM_TREE) {
881         generateRandomTree(YULE_HARDING);
882         wrapperFixNegativeBranch(true);
883         parsimonyTreeString = getTreeString();
884     } else {
885         computeParsimonyTree(NULL, aln, randstream);
886         parsimonyTreeString = getTreeString();
887     }
888     return parsimonyTreeString;
889 
890 }
891 
initializePLL(Params & params)892 void IQTree::initializePLL(Params &params) {
893     pllAttr.rateHetModel = PLL_GAMMA;
894     pllAttr.fastScaling = PLL_FALSE;
895     pllAttr.saveMemory = PLL_FALSE;
896     pllAttr.useRecom = PLL_FALSE;
897     pllAttr.randomNumberSeed = params.ran_seed;
898     pllAttr.numberOfThreads = max(params.num_threads, 1); /* This only affects the pthreads version */
899     if (pllInst != NULL) {
900         pllDestroyInstance(pllInst);
901     }
902     /* Create a PLL getInstance */
903     pllInst = pllCreateInstance(&pllAttr);
904 
905     /* Read in the alignment file */
906     stringstream pllAln;
907     aln->printAlignment(IN_PHYLIP, pllAln);
908     string pllAlnStr = pllAln.str();
909     pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length());
910 
911     /* Read in the partition information */
912     // BQM: to avoid printing file
913     stringstream pllPartitionFileHandle;
914     createPLLPartition(params, pllPartitionFileHandle);
915     pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str());
916 
917     /* Validate the partitions */
918     if (!pllPartitionsValidate(partitionInfo, pllAlignment)) {
919         outError("pllPartitionsValidate");
920     }
921 
922     /* Commit the partitions and build a partitions structure */
923     pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment);
924 
925     /* We don't need the the intermediate partition queue structure anymore */
926     pllQueuePartitionsDestroy(&partitionInfo);
927 
928     /* eliminate duplicate sites from the alignment and update weights vector */
929     pllAlignmentRemoveDups(pllAlignment, pllPartitions);
930 
931     pllTreeInitTopologyForAlignment(pllInst, pllAlignment);
932 
933     /* Connect the alignment and partition structure with the tree structure */
934     if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) {
935         outError("Incompatible tree/alignment combination");
936     }
937 }
938 
isInitializedPLL()939 bool IQTree::isInitializedPLL() {
940     return pllInst != NULL;
941 }
942 
initializeModel(Params & params,string model_name,ModelsBlock * models_block)943 void IQTree::initializeModel(Params &params, string model_name, ModelsBlock *models_block) {
944     try {
945         if (!getModelFactory()) {
946             if (isSuperTree()) {
947                 if (params.partition_type == BRLEN_OPTIMIZE || params.partition_type == TOPO_UNLINKED) {
948                     setModelFactory(new PartitionModel(params, (PhyloSuperTree*) this, models_block));
949                 } else
950                     setModelFactory(new PartitionModelPlen(params, (PhyloSuperTreePlen*) this, models_block));
951 
952                 // mapTrees again in case of different rooting
953                 if (root)
954                     ((PhyloSuperTree*)this)->mapTrees();
955 
956             } else {
957                 setModelFactory(new ModelFactory(params, model_name, this, models_block));
958             }
959         }
960     } catch (string & str) {
961         outError(str);
962     }
963     setModel(getModelFactory()->model);
964     setRate(getModelFactory()->site_rate);
965     getModelFactory()->setCheckpoint(checkpoint);
966     /*
967      * MDW: I don't understand why/how checkpointing is being used,
968      * however I'm having problems because a restoreCheckpoint
969      * happens before anything has been saved (in
970      * phyloanalysis.cpp:runTreeReconstruction). This fixes that
971      * problem, but as I don't understand what is going on, I am
972      * not at all confident this is the correct solution.
973      */
974 
975      //!!! BQM: Because iqtree restore the checkpoint from the previous run! (not this current run)
976      // That's why saveCheckpoint should NOT be called now! I comment this line out.
977 
978 //    model->saveCheckpoint();
979 
980     if (params.pll) {
981         if (getRate()->getNDiscreteRate() == 1) {
982             outError("Non-Gamma model is not yet supported by PLL.");
983             // TODO: change rateHetModel to PLL_CAT in case of non-Gamma model
984         }
985         if (getRate()->name.substr(0,2) == "+I")
986             outError("+Invar model is not yet supported by PLL.");
987         if (aln->seq_type == SEQ_DNA && getModel()->name != "GTR")
988             outError("non GTR model for DNA is not yet supported by PLL.");
989         pllInitModel(pllInst, pllPartitions);
990     }
991 
992     if (aln->ordered_pattern.empty())
993         aln->orderPatternByNumChars(PAT_VARIANT);
994 
995 }
getProbDelete()996 double IQTree::getProbDelete() {
997     return (double) k_delete / leafNum;
998 }
999 
resetKDelete()1000 void IQTree::resetKDelete() {
1001     k_delete = k_delete_min;
1002     k_delete_stay = ceil(leafNum / k_delete);
1003 }
1004 
increaseKDelete()1005 void IQTree::increaseKDelete() {
1006     if (k_delete >= k_delete_max)
1007         return;
1008     k_delete_stay--;
1009     if (k_delete_stay > 0)
1010         return;
1011     k_delete++;
1012     k_delete_stay = ceil(leafNum / k_delete);
1013     if (verbose_mode >= VB_MED)
1014         cout << "Increase k_delete to " << k_delete << endl;
1015 }
1016 
1017 //void IQTree::setIQPIterations(STOP_CONDITION stop_condition, double stop_confidence, int min_iterations,
1018 //        int max_iterations) {
1019 //    stop_rule.setStopCondition(stop_condition);
1020 //    stop_rule.setConfidenceValue(stop_confidence);
1021 //    stop_rule.setIterationNum(min_iterations, max_iterations);
1022 //}
1023 
findRepresentLeaves(vector<RepresentLeafSet * > & leaves_vec,int nei_id,PhyloNode * dad)1024 RepresentLeafSet* IQTree::findRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, int nei_id, PhyloNode *dad) {
1025     PhyloNode *node = (PhyloNode*) (dad->neighbors[nei_id]->node);
1026     int set_id = dad->id * 3 + nei_id;
1027     if (leaves_vec[set_id])
1028         return leaves_vec[set_id];
1029     RepresentLeafSet *leaves = new RepresentLeafSet;
1030     RepresentLeafSet * leaves_it[2] = { NULL, NULL };
1031     leaves_vec[set_id] = leaves;
1032     RepresentLeafSet::iterator last;
1033     RepresentLeafSet::iterator cur_it;
1034     int i, j;
1035     //double admit_height = 1000000;
1036 
1037     leaves->clear();
1038     if (node->isLeaf()) {
1039         // set the depth to zero
1040         //node->height = 0.0;
1041         leaves->insert(new RepLeaf(node, 0));
1042     } else {
1043         for (i = 0, j = 0; i < node->neighbors.size(); i++)
1044             if (node->neighbors[i]->node != dad) {
1045                 leaves_it[j++] = findRepresentLeaves(leaves_vec, i, node);
1046             }
1047         ASSERT(j == 2 && leaves_it[0] && leaves_it[1]);
1048         if (leaves_it[0]->empty() && leaves_it[1]->empty()) {
1049             cout << "wrong";
1050         }
1051         RepresentLeafSet::iterator lit[2] = { leaves_it[0]->begin(), leaves_it[1]->begin() };
1052         while (leaves->size() < k_represent) {
1053             int id = -1;
1054             if (lit[0] != leaves_it[0]->end() && lit[1] != leaves_it[1]->end()) {
1055                 if ((*lit[0])->height < (*lit[1])->height)
1056                     id = 0;
1057                 else if ((*lit[0])->height > (*lit[1])->height)
1058                     id = 1;
1059                 else { // tie, choose at random
1060                     id = random_int(2);
1061                 }
1062             } else if (lit[0] != leaves_it[0]->end())
1063                 id = 0;
1064             else if (lit[1] != leaves_it[1]->end())
1065                 id = 1;
1066             else
1067                 break;
1068             ASSERT(id < 2 && id >= 0);
1069             leaves->insert(new RepLeaf((*lit[id])->leaf, (*lit[id])->height + 1));
1070             lit[id]++;
1071         }
1072     }
1073     ASSERT(!leaves->empty());
1074     /*
1075      if (verbose_mode >= VB_MAX) {
1076      for (cur_it = leaves->begin(); cur_it != leaves->end(); cur_it++)
1077      cout << (*cur_it)->leaf->name << " ";
1078      cout << endl;
1079      }*/
1080     return leaves;
1081 }
1082 
1083 /*
1084  void IQPTree::clearRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, Node *node, Node *dad) {
1085  int nei_id;
1086  for (nei_id = 0; nei_id < node->neighbors.size(); nei_id++)
1087  if (node->neighbors[nei_id]->node == dad) break;
1088  assert(nei_id < node->neighbors.size());
1089  int set_id = node->id * 3 + nei_id;
1090  if (leaves_vec[set_id]) {
1091  for (RepresentLeafSet::iterator rlit = leaves_vec[set_id]->begin(); rlit != leaves_vec[set_id]->end(); rlit++)
1092  delete (*rlit);
1093  delete leaves_vec[set_id];
1094  leaves_vec[set_id] = NULL;
1095  }
1096  FOR_NEIGHBOR_IT(node, dad, it) {
1097  clearRepresentLeaves(leaves_vec, (*it)->node, node);
1098  }
1099  }*/
1100 
deleteNonCherryLeaves(PhyloNodeVector & del_leaves)1101 void IQTree::deleteNonCherryLeaves(PhyloNodeVector &del_leaves) {
1102     NodeVector cherry_taxa;
1103     NodeVector noncherry_taxa;
1104     // get the vector of non cherry taxa
1105     getNonCherryLeaves(noncherry_taxa, cherry_taxa);
1106     root = NULL;
1107     int num_taxa = aln->getNSeq();
1108     int num_delete = k_delete;
1109     if (num_delete > num_taxa - 4)
1110         num_delete = num_taxa - 4;
1111     if (verbose_mode >= VB_DEBUG) {
1112         cout << "Deleting " << num_delete << " leaves" << endl;
1113     }
1114     vector<unsigned int> indices_noncherry(noncherry_taxa.size());
1115     //iota(indices_noncherry.begin(), indices_noncherry.end(), 0);
1116     unsigned int startValue = 0;
1117     for (vector<unsigned int>::iterator it = indices_noncherry.begin(); it != indices_noncherry.end(); ++it) {
1118         (*it) = startValue;
1119         ++startValue;
1120     }
1121     my_random_shuffle(indices_noncherry.begin(), indices_noncherry.end());
1122     int i;
1123     for (i = 0; i < num_delete && i < noncherry_taxa.size(); i++) {
1124         PhyloNode *taxon = (PhyloNode*) noncherry_taxa[indices_noncherry[i]];
1125         del_leaves.push_back(taxon);
1126         deleteLeaf(taxon);
1127         //cout << taxon->id << ", ";
1128     }
1129     int j = 0;
1130     if (i < num_delete) {
1131         vector<unsigned int> indices_cherry(cherry_taxa.size());
1132         //iota(indices_cherry.begin(), indices_cherry.end(), 0);
1133         startValue = 0;
1134         for (vector<unsigned int>::iterator it = indices_cherry.begin(); it != indices_cherry.end(); ++it) {
1135             (*it) = startValue;
1136             ++startValue;
1137         }
1138         my_random_shuffle(indices_cherry.begin(), indices_cherry.end());
1139         while (i < num_delete) {
1140             PhyloNode *taxon = (PhyloNode*) cherry_taxa[indices_cherry[j]];
1141             del_leaves.push_back(taxon);
1142             deleteLeaf(taxon);
1143             i++;
1144             j++;
1145         }
1146     }
1147     root = cherry_taxa[j];
1148 }
1149 
deleteLeaves(PhyloNodeVector & del_leaves)1150 void IQTree::deleteLeaves(PhyloNodeVector &del_leaves) {
1151     NodeVector taxa;
1152     // get the vector of taxa
1153     getTaxa(taxa);
1154     root = NULL;
1155     //int num_delete = floor(p_delete * taxa.size());
1156     int num_delete = k_delete;
1157     int i;
1158     if (num_delete > taxa.size() - 4)
1159         num_delete = taxa.size() - 4;
1160     if (verbose_mode >= VB_DEBUG) {
1161         cout << "Deleting " << num_delete << " leaves" << endl;
1162     }
1163     // now try to randomly delete some taxa of the probability of p_delete
1164     for (i = 0; i < num_delete;) {
1165         int id = random_int(taxa.size());
1166         if (!taxa[id])
1167             continue;
1168         else
1169             i++;
1170         PhyloNode *taxon = (PhyloNode*) taxa[id];
1171         del_leaves.push_back(taxon);
1172         deleteLeaf(taxon);
1173         taxa[id] = NULL;
1174     }
1175     // set root to the first taxon which was not deleted
1176     for (i = 0; i < taxa.size(); i++)
1177         if (taxa[i]) {
1178             root = taxa[i];
1179             break;
1180         }
1181 }
1182 
assessQuartet(Node * leaf0,Node * leaf1,Node * leaf2,Node * del_leaf)1183 int IQTree::assessQuartet(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf) {
1184     ASSERT(dist_matrix);
1185     int nseq = aln->getNSeq();
1186     //int id0 = leaf0->id, id1 = leaf1->id, id2 = leaf2->id;
1187     double dist0 = dist_matrix[leaf0->id * nseq + del_leaf->id] + dist_matrix[leaf1->id * nseq + leaf2->id];
1188     double dist1 = dist_matrix[leaf1->id * nseq + del_leaf->id] + dist_matrix[leaf0->id * nseq + leaf2->id];
1189     double dist2 = dist_matrix[leaf2->id * nseq + del_leaf->id] + dist_matrix[leaf0->id * nseq + leaf1->id];
1190     if (dist0 < dist1 && dist0 < dist2)
1191         return 0;
1192     if (dist1 < dist2)
1193         return 1;
1194     return 2;
1195 }
1196 
assessQuartetParsimony(Node * leaf0,Node * leaf1,Node * leaf2,Node * del_leaf)1197 int IQTree::assessQuartetParsimony(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf) {
1198     int score[3] = { 0, 0, 0 };
1199     for (Alignment::iterator it = aln->begin(); it != aln->end(); it++) {
1200         char ch0 = (*it)[leaf0->id];
1201         char ch1 = (*it)[leaf1->id];
1202         char ch2 = (*it)[leaf2->id];
1203         char chd = (*it)[del_leaf->id];
1204         if (ch0 >= aln->num_states || ch1 >= aln->num_states || ch2 >= aln->num_states || chd >= aln->num_states)
1205             continue;
1206         if (chd == ch0 && ch1 == ch2)
1207             score[0] += (*it).frequency;
1208         if (chd == ch1 && ch0 == ch2)
1209             score[1] += (*it).frequency;
1210         if (chd == ch2 && ch0 == ch1)
1211             score[2] += (*it).frequency;
1212     }
1213     if (score[0] == score[1] && score[0] == score[2]) {
1214         int id = random_int(3);
1215         return id;
1216     }
1217     if (score[0] > score[1] && score[0] > score[2])
1218         return 0;
1219     if (score[1] < score[2])
1220         return 2;
1221     return 1;
1222 }
1223 
initializeBonus(PhyloNode * node,PhyloNode * dad)1224 void IQTree::initializeBonus(PhyloNode *node, PhyloNode *dad) {
1225     if (!node)
1226         node = (PhyloNode*) root;
1227     if (dad) {
1228         PhyloNeighbor *node_nei = (PhyloNeighbor*) node->findNeighbor(dad);
1229         PhyloNeighbor *dad_nei = (PhyloNeighbor*) dad->findNeighbor(node);
1230         node_nei->lh_scale_factor = 0.0;
1231         node_nei->partial_lh_computed = 0;
1232         dad_nei->lh_scale_factor = 0.0;
1233         dad_nei->partial_lh_computed = 0;
1234     }
1235 
1236     FOR_NEIGHBOR_IT(node, dad, it){
1237     initializeBonus((PhyloNode*) ((*it)->node), node);
1238 }
1239 }
1240 
raiseBonus(Neighbor * nei,Node * dad,double bonus)1241 void IQTree::raiseBonus(Neighbor *nei, Node *dad, double bonus) {
1242     ((PhyloNeighbor*) nei)->lh_scale_factor += bonus;
1243     if (verbose_mode >= VB_DEBUG)
1244         cout << dad->id << " - " << nei->node->id << " : " << bonus << endl;
1245 
1246     //  FOR_NEIGHBOR_IT(nei->node, dad, it)
1247     //    raiseBonus((*it), nei->node, bonus);
1248 }
1249 
computePartialBonus(Node * node,Node * dad)1250 double IQTree::computePartialBonus(Node *node, Node* dad) {
1251     PhyloNeighbor *node_nei = (PhyloNeighbor*) node->findNeighbor(dad);
1252     if (node_nei->partial_lh_computed)
1253         return node_nei->lh_scale_factor;
1254 
1255     FOR_NEIGHBOR_IT(node, dad, it){
1256     node_nei->lh_scale_factor += computePartialBonus((*it)->node, node);
1257 }
1258     node_nei->partial_lh_computed = 1;
1259     return node_nei->lh_scale_factor;
1260 }
1261 
findBestBonus(double & best_score,NodeVector & best_nodes,NodeVector & best_dads,Node * node,Node * dad)1262 void IQTree::findBestBonus(double &best_score, NodeVector &best_nodes, NodeVector &best_dads, Node *node, Node *dad) {
1263     double score;
1264     if (!node)
1265         node = root;
1266     if (!dad) {
1267         best_score = 0;
1268     } else {
1269         score = computePartialBonus(node, dad) + computePartialBonus(dad, node);
1270         if (score >= best_score) {
1271             if (score > best_score) {
1272                 best_score = score;
1273                 best_nodes.clear();
1274                 best_dads.clear();
1275             }
1276             best_nodes.push_back(node);
1277             best_dads.push_back(dad);
1278         }
1279         //cout << node->id << " - " << dad->id << " : " << best_score << endl;
1280     }
1281 
1282     FOR_NEIGHBOR_IT(node, dad, it){
1283     findBestBonus(best_score, best_nodes, best_dads, (*it)->node, node);
1284 }
1285 }
1286 
assessQuartets(vector<RepresentLeafSet * > & leaves_vec,PhyloNode * cur_root,PhyloNode * del_leaf)1287 void IQTree::assessQuartets(vector<RepresentLeafSet*> &leaves_vec, PhyloNode *cur_root, PhyloNode *del_leaf) {
1288     const int MAX_DEGREE = 3;
1289     RepresentLeafSet * leaves[MAX_DEGREE];
1290     double bonus[MAX_DEGREE];
1291     memset(bonus, 0, MAX_DEGREE * sizeof(double));
1292     int cnt = 0;
1293 
1294     // only work for birfucating tree
1295     ASSERT(cur_root->degree() == MAX_DEGREE);
1296 
1297     // find the representative leaf set for three subtrees
1298 
1299     FOR_NEIGHBOR_IT(cur_root, NULL, it){
1300     leaves[cnt] = findRepresentLeaves(leaves_vec, cnt, cur_root);
1301     cnt++;
1302 }
1303     for (RepresentLeafSet::iterator i0 = leaves[0]->begin(); i0 != leaves[0]->end(); i0++)
1304         for (RepresentLeafSet::iterator i1 = leaves[1]->begin(); i1 != leaves[1]->end(); i1++)
1305             for (RepresentLeafSet::iterator i2 = leaves[2]->begin(); i2 != leaves[2]->end(); i2++) {
1306                 int best_id;
1307                 if (iqp_assess_quartet == IQP_DISTANCE)
1308                     best_id = assessQuartet((*i0)->leaf, (*i1)->leaf, (*i2)->leaf, del_leaf);
1309                 else
1310                     best_id = assessQuartetParsimony((*i0)->leaf, (*i1)->leaf, (*i2)->leaf, del_leaf);
1311                 bonus[best_id] += 1.0;
1312             }
1313     for (cnt = 0; cnt < MAX_DEGREE; cnt++)
1314         if (bonus[cnt] > 0.0)
1315             raiseBonus(cur_root->neighbors[cnt], cur_root, bonus[cnt]);
1316 
1317 }
1318 
reinsertLeavesByParsimony(PhyloNodeVector & del_leaves)1319 void IQTree::reinsertLeavesByParsimony(PhyloNodeVector &del_leaves) {
1320     ASSERT(0 && "this function is obsolete");
1321     PhyloNodeVector::iterator it_leaf;
1322     ASSERT(root->isLeaf());
1323     for (it_leaf = del_leaves.begin(); it_leaf != del_leaves.end(); it_leaf++) {
1324         //cout << "Add leaf " << (*it_leaf)->id << " to the tree" << endl;
1325         initializeAllPartialPars();
1326         clearAllPartialLH();
1327         Node *target_node = NULL;
1328         Node *target_dad = NULL;
1329         Node *added_node = (*it_leaf)->neighbors[0]->node;
1330         Node *node1 = NULL;
1331         Node *node2 = NULL;
1332         //Node *leaf;
1333         for (int i = 0; i < 3; i++) {
1334             if (added_node->neighbors[i]->node->id == (*it_leaf)->id) {
1335                 //leaf = added_node->neighbors[i]->node;
1336             } else if (!node1) {
1337                 node1 = added_node->neighbors[i]->node;
1338             } else {
1339                 node2 = added_node->neighbors[i]->node;
1340             }
1341         }
1342 
1343         //cout << "(" << node1->id << ", " << node2->id << ")" << "----" << "(" << added_node->id << "," << leaf->id << ")" << endl;
1344         added_node->updateNeighbor(node1, (Node*) 1);
1345         added_node->updateNeighbor(node2, (Node*) 2);
1346 
1347         best_pars_score = INT_MAX;
1348         // TODO: this needs to be adapted
1349 //        addTaxonMPFast(added_node, target_node, target_dad, NULL, root->neighbors[0]->node, root);
1350         target_node->updateNeighbor(target_dad, added_node, -1.0);
1351         target_dad->updateNeighbor(target_node, added_node, -1.0);
1352         added_node->updateNeighbor((Node*) 1, target_node, -1.0);
1353         added_node->updateNeighbor((Node*) 2, target_dad, -1.0);
1354 
1355     }
1356 
1357 }
1358 
reinsertLeaves(PhyloNodeVector & del_leaves)1359 void IQTree::reinsertLeaves(PhyloNodeVector &del_leaves) {
1360     PhyloNodeVector::iterator it_leaf;
1361 
1362     //int num_del_leaves = del_leaves.size();
1363     ASSERT(root->isLeaf());
1364 
1365     for (it_leaf = del_leaves.begin(); it_leaf != del_leaves.end(); it_leaf++) {
1366         if (verbose_mode >= VB_DEBUG)
1367             cout << "Reinserting " << (*it_leaf)->name << " (" << (*it_leaf)->id << ")" << endl;
1368         vector<RepresentLeafSet*> leaves_vec;
1369         leaves_vec.resize(nodeNum * 3, NULL);
1370         initializeBonus();
1371         NodeVector nodes;
1372         getInternalNodes(nodes);
1373         if (verbose_mode >= VB_DEBUG)
1374             drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1375         //printTree(cout, WT_BR_LEN | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
1376         for (NodeVector::iterator it = nodes.begin(); it != nodes.end(); it++) {
1377             assessQuartets(leaves_vec, (PhyloNode*) (*it), (*it_leaf));
1378         }
1379         NodeVector best_nodes, best_dads;
1380         double best_bonus;
1381         findBestBonus(best_bonus, best_nodes, best_dads);
1382         if (verbose_mode >= VB_DEBUG)
1383             cout << "Best bonus " << best_bonus << " " << best_nodes[0]->id << " " << best_dads[0]->id << endl;
1384         ASSERT(best_nodes.size() == best_dads.size());
1385         int node_id = random_int(best_nodes.size());
1386         if (best_nodes.size() > 1 && verbose_mode >= VB_DEBUG)
1387             cout << best_nodes.size() << " branches show the same best bonus, branch nr. " << node_id << " is chosen"
1388                     << endl;
1389 
1390         reinsertLeaf(*it_leaf, best_nodes[node_id], best_dads[node_id]);
1391         //clearRepresentLeaves(leaves_vec, *it_node, *it_leaf);
1392         /*if (verbose_mode >= VB_DEBUG) {
1393          printTree(cout);
1394          cout << endl;
1395          }*/
1396         for (vector<RepresentLeafSet*>::iterator rit = leaves_vec.begin(); rit != leaves_vec.end(); rit++)
1397             if ((*rit)) {
1398                 RepresentLeafSet *tit = (*rit);
1399                 for (RepresentLeafSet::iterator rlit = tit->begin(); rlit != tit->end(); rlit++)
1400                     delete (*rlit);
1401                 delete (*rit);
1402             }
1403     }
1404     initializeTree(); // BQM: re-index nodes and branches s.t. ping-pong neighbors have the same ID
1405 
1406     if (verbose_mode >= VB_DEBUG)
1407         drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1408 }
1409 
doParsimonyReinsertion()1410 void IQTree::doParsimonyReinsertion() {
1411     PhyloNodeVector del_leaves;
1412 
1413     deleteLeaves(del_leaves);
1414 
1415     reinsertLeavesByParsimony(del_leaves);
1416     fixNegativeBranch(false);
1417 }
1418 
getNonTabuBranches(Branches & allBranches,SplitGraph & tabuSplits,Branches & nonTabuBranches,Branches * tabuBranches)1419 void IQTree::getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches) {
1420     if (tabuSplits.size() == 0) {
1421         return;
1422     }
1423     for (Branches::iterator it = allBranches.begin(); it != allBranches.end(); it++) {
1424         if (isInnerBranch(it->second.first, it->second.second)) {
1425             int nodeID1 = it->second.first->id;
1426             int nodeID2 = it->second.second->id;
1427             Branch curBranch = it->second;
1428             Split* sp = getSplit(it->second.first, it->second.second);
1429             if (!tabuSplits.containSplit(*sp)) {
1430                 nonTabuBranches.insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
1431             } else {
1432                 if (tabuBranches != NULL) {
1433                     tabuBranches->insert(pair<int,Branch>(pairInteger(nodeID1, nodeID2), curBranch));
1434                 }
1435             }
1436             delete sp;
1437         }
1438 
1439     }
1440 }
1441 
getSplitBranches(Branches & branches,SplitIntMap & splits,Node * node,Node * dad)1442 void IQTree::getSplitBranches(Branches &branches, SplitIntMap &splits, Node *node, Node *dad) {
1443     if (!node) {
1444         node = root;
1445     }
1446     FOR_NEIGHBOR_IT(node, dad, it) {
1447             if (isInnerBranch((*it)->node, node)) {
1448                 Branch curBranch;
1449                 curBranch.first = (*it)->node;
1450                 curBranch.second = node;
1451                 Split* curSplit;
1452                 Split *sp = (*it)->split;
1453                 ASSERT(sp != NULL);
1454                 curSplit = new Split(*sp);
1455                 if (curSplit->shouldInvert())
1456                     curSplit->invert();
1457                 if (splits.findSplit(curSplit) != NULL) {
1458                     //curSplit->report(cout);
1459                     branches.insert(pair<int,Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
1460                 }
1461                 delete curSplit;
1462             }
1463             getSplitBranches(branches, splits, (*it)->node, node);
1464         }
1465 }
1466 
shouldEvaluate(Split * curSplit,SplitIntMap & tabuSplits,SplitIntMap & candSplits)1467 bool IQTree::shouldEvaluate(Split *curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits) {
1468     bool answer = true;
1469     /******************** CHECK TABU SPLIT **************************/
1470     if (tabuSplits.findSplit(curSplit) != NULL) {
1471         answer = false;
1472     } else if (!candSplits.empty()) {
1473         Split *_curSplit;
1474         /******************** CHECK STABLE SPLIT **************************/
1475         int value;
1476         _curSplit = candSplits.findSplit(curSplit, value);
1477         if (_curSplit == NULL || _curSplit->getWeight() <= params->stableSplitThreshold) {
1478             answer = true;
1479         } else { // add a stable branch with a certain probability
1480             double rndDbl = random_double();
1481             if (rndDbl > params->stableSplitThreshold) {
1482                 answer = true;
1483             } else {
1484                 answer = false;
1485             }
1486         }
1487     } else {
1488         answer = true;
1489     }
1490     return answer;
1491 }
1492 
1493 
getNNIBranches(SplitIntMap & tabuSplits,SplitIntMap & candSplits,Branches & nonNNIBranches,Branches & nniBranches,Node * node,Node * dad)1494 void IQTree::getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candSplits,Branches &nonNNIBranches, Branches &nniBranches, Node *node, Node *dad) {
1495     if (!node) {
1496         node = root;
1497     }
1498     FOR_NEIGHBOR_IT(node, dad, it) {
1499             if (isInnerBranch((*it)->node, node)) {
1500                 Branch curBranch;
1501                 curBranch.first = (*it)->node;
1502                 curBranch.second = node;
1503                 int branchID = pairInteger(curBranch.first->id, curBranch.second->id);
1504 
1505                 if (params->fixStableSplits) {
1506                     Split *curSplit;
1507                     Split *sp = (*it)->split;
1508                     ASSERT(sp != NULL);
1509                     curSplit = new Split(*sp);
1510                     if (curSplit->shouldInvert())
1511                         curSplit->invert();
1512                     if (shouldEvaluate(curSplit, tabuSplits, candSplits)) {
1513                         nniBranches.insert(pair<int, Branch>(branchID, curBranch));
1514                     } else {
1515                         nonNNIBranches.insert(pair<int, Branch>(branchID, curBranch));
1516                     }
1517                     delete curSplit;
1518                 } else {
1519                     nniBranches.insert(pair<int, Branch>(branchID, curBranch));
1520                 }
1521             }
1522             getNNIBranches(tabuSplits, candSplits, nonNNIBranches, nniBranches, (*it)->node, node);
1523         }
1524 }
1525 
getStableBranches(SplitIntMap & candSplits,double supportValue,Branches & stableBranches,Node * node,Node * dad)1526 void IQTree::getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &stableBranches, Node *node, Node *dad) {
1527     if (!node) {
1528         node = root;
1529     }
1530 
1531     FOR_NEIGHBOR_IT(node, dad, it) {
1532             if (isInnerBranch((*it)->node, node)) {
1533                 Branch curBranch;
1534                 curBranch.first = (*it)->node;
1535                 curBranch.second = node;
1536                 Split *curSplit;
1537                 Split *sp = (*it)->split;
1538                 ASSERT(sp != NULL);
1539                 curSplit = new Split(*sp);
1540                 if (curSplit->shouldInvert())
1541                     curSplit->invert();
1542                 int occurences;
1543                 sp = candSplits.findSplit(curSplit, occurences);
1544                 if (sp != NULL) {
1545                     if ( sp->getWeight() >= supportValue) {
1546                         stableBranches.insert(
1547                                 pair<int, Branch>(pairInteger(curBranch.first->id, curBranch.second->id), curBranch));
1548                     }
1549                 }
1550                 delete curSplit;
1551             }
1552             getStableBranches(candSplits, supportValue, stableBranches, (*it)->node, node);
1553         }
1554 }
1555 
perturbStableSplits(double suppValue)1556 string IQTree::perturbStableSplits(double suppValue) {
1557     int numRandNNI = 0;
1558     Branches stableBranches;
1559 //    initTabuSplits.clear();
1560 //    stableBranches = getStableBranches(candidateTrees.getCandSplits(), suppValue);
1561 //    int maxRandNNI = stableBranches.size() / 2;
1562     do {
1563         getStableBranches(candidateTrees.getCandSplits(), suppValue, stableBranches);
1564         vector<NNIMove> randomNNIs;
1565         vector<NNIMove> compatibleNNIs;
1566         for (map<int, Branch>::iterator it = stableBranches.begin(); it != stableBranches.end(); it++) {
1567             NNIMove randNNI = getRandomNNI(it->second);
1568             if (constraintTree.isCompatible(randNNI))
1569                 randomNNIs.push_back(randNNI);
1570         }
1571         getCompatibleNNIs(randomNNIs, compatibleNNIs);
1572         for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
1573             doNNI(*it);
1574             numRandNNI++;
1575 //            Split *sp = getSplit(it->node1, it->node2);
1576 //            Split *tabuSplit = new Split(*sp);
1577 //            if (tabuSplit->shouldInvert()) {
1578 //                tabuSplit->invert();
1579 //            }
1580 //            initTabuSplits.insertSplit(tabuSplit, 1);
1581         }
1582     } while (stableBranches.size() > 0);
1583 
1584     if (verbose_mode >= VB_MAX) {
1585         cout << "Tree perturbation: number of random NNI performed = " << numRandNNI << endl;
1586     }
1587     setAlignment(aln);
1588     setRootNode(params->root);
1589 
1590     clearAllPartialLH();
1591 
1592     if (isSuperTree()) {
1593         ((PhyloSuperTree*) this)->mapTrees();
1594     }
1595     if (params->pll) {
1596         pllReadNewick(getTreeString());
1597     }
1598 
1599     resetCurScore();
1600     return getTreeString();
1601 }
1602 
doRandomNNIs(bool storeTabu)1603 string IQTree::doRandomNNIs(bool storeTabu) {
1604     int cntNNI = 0;
1605     int numRandomNNI;
1606     Branches nniBranches;
1607     Branches nonNNIBranches;
1608     if (storeTabu) {
1609         Branches stableBranches;
1610         getStableBranches(candidateTrees.getCandSplits(), Params::getInstance().stableSplitThreshold, stableBranches);
1611         int numNonStableBranches = leafNum - 3 - stableBranches.size();
1612         numRandomNNI = numNonStableBranches;
1613     } else {
1614         numRandomNNI = floor((leafNum - 3) * Params::getInstance().initPS);
1615         if (leafNum >= 4 && numRandomNNI == 0)
1616             numRandomNNI = 1;
1617     }
1618 
1619     initTabuSplits.clear();
1620     while (cntNNI < numRandomNNI) {
1621         nniBranches.clear();
1622         nonNNIBranches.clear();
1623         getNNIBranches(initTabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
1624         if (nniBranches.size() == 0) break;
1625         // Convert the map data structure Branches to vector of Branch
1626         vector<Branch> vectorNNIBranches;
1627         for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); ++it) {
1628             vectorNNIBranches.push_back(it->second);
1629         }
1630         int randInt = random_int((int) vectorNNIBranches.size());
1631         NNIMove randNNI = getRandomNNI(vectorNNIBranches[randInt]);
1632         if (constraintTree.isCompatible(randNNI)) {
1633             // only if random NNI satisfies constraintTree
1634             doNNI(randNNI);
1635             if (storeTabu) {
1636                 Split *sp = getSplit(randNNI.node1, randNNI.node2);
1637                 Split *tabuSplit = new Split(*sp);
1638                 if (tabuSplit->shouldInvert()) {
1639                     tabuSplit->invert();
1640                 }
1641                 initTabuSplits.insertSplit(tabuSplit, 1);
1642             }
1643         }
1644         cntNNI++;
1645     }
1646     if (verbose_mode >= VB_MAX)
1647         cout << "Tree perturbation: number of random NNI performed = " << cntNNI << endl;
1648     setAlignment(aln);
1649     setRootNode(params->root);
1650 
1651     if (isSuperTree()) {
1652         ((PhyloSuperTree*) this)->mapTrees();
1653     }
1654     if (params->pll) {
1655         pllReadNewick(getTreeString());
1656     }
1657 
1658     clearAllPartialLH();
1659     resetCurScore();
1660     return getTreeString();
1661 }
1662 
1663 
doIQP()1664 void IQTree::doIQP() {
1665     if (verbose_mode >= VB_DEBUG)
1666         drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE | WT_BR_ID);
1667     //double time_begin = getCPUTime();
1668     PhyloNodeVector del_leaves;
1669     deleteLeaves(del_leaves);
1670     reinsertLeaves(del_leaves);
1671 
1672     // just to make sure IQP does it right
1673     setAlignment(aln);
1674     if (params->pll) {
1675         pllReadNewick(getTreeString());
1676     }
1677 
1678     resetCurScore();
1679 //    lhComputed = false;
1680 
1681     if (isSuperTree()) {
1682         ((PhyloSuperTree*) this)->mapTrees();
1683     }
1684 
1685 //    if (enable_parsimony) {
1686 //        cur_pars_score = computeParsimony();
1687 //        if (verbose_mode >= VB_MAX) {
1688 //            cout << "IQP Likelihood = " << curScore << "  Parsimony = " << cur_pars_score << endl;
1689 //        }
1690 //    }
1691 }
1692 
inputTree2PLL(string treestring,bool computeLH)1693 double IQTree::inputTree2PLL(string treestring, bool computeLH) {
1694     double res = 0.0;
1695     // read in the tree string from IQTree kernel
1696     pllNewickTree *newick = pllNewickParseString(treestring.c_str());
1697     pllTreeInitTopologyNewick(pllInst, newick, PLL_FALSE);
1698     pllNewickParseDestroy(&newick);
1699     if (computeLH) {
1700         pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
1701         res = pllInst->likelihood;
1702     }
1703     return res;
1704 }
1705 
getModelRatesFromPLL()1706 double* IQTree::getModelRatesFromPLL() {
1707     ASSERT(aln->num_states == 4);
1708     int numberOfRates = (pllPartitions->partitionData[0]->states * pllPartitions->partitionData[0]->states
1709             - pllPartitions->partitionData[0]->states) / 2;
1710     double* rate_params = new double[numberOfRates];
1711     for (int i = 0; i < numberOfRates; i++) {
1712         rate_params[i] = pllPartitions->partitionData[0]->substRates[i];
1713     }
1714     return rate_params;
1715 }
1716 
pllPrintModelParams()1717 void IQTree::pllPrintModelParams() {
1718     cout.precision(6);
1719     cout << fixed;
1720     for (int part = 0; part < pllPartitions->numberOfPartitions; part++) {
1721         cout << "Alpha[" << part << "]" << ": " << pllPartitions->partitionData[part]->alpha << endl;
1722         if (aln->num_states == 4) {
1723             int states, rates;
1724             states = pllPartitions->partitionData[part]->states;
1725             rates = ((states * states - states) / 2);
1726             cout << "Rates[" << part << "]: " << " ac ag at cg ct gt: ";
1727             for (int i = 0; i < rates; i++) {
1728                 cout << pllPartitions->partitionData[part]->substRates[i] << " ";
1729             }
1730             cout << endl;
1731             cout <<  "Frequencies: ";
1732             for (int i = 0; i < 4; i++) {
1733                 cout << pllPartitions->partitionData[part]->empiricalFrequencies[i] << " ";
1734             }
1735             cout << endl;
1736         }
1737     }
1738     cout.precision(3);
1739     cout << fixed;
1740 }
1741 
getAlphaFromPLL()1742 double IQTree::getAlphaFromPLL() {
1743     return pllPartitions->partitionData[0]->alpha;
1744 }
1745 
inputModelPLL2IQTree()1746 void IQTree::inputModelPLL2IQTree() {
1747     // TODO add support for partitioned model
1748     getRate()->setGammaShape(pllPartitions->partitionData[0]->alpha);
1749     if (aln->num_states == 4) {
1750         ((ModelMarkov*) getModel())->setRateMatrix(pllPartitions->partitionData[0]->substRates);
1751         getModel()->decomposeRateMatrix();
1752     }
1753     ((ModelMarkov*) getModel())->setStateFrequency(pllPartitions->partitionData[0]->empiricalFrequencies);
1754 }
1755 
inputModelIQTree2PLL()1756 void IQTree::inputModelIQTree2PLL() {
1757     // get the alpha parameter
1758     double alpha = getRate()->getGammaShape();
1759     if (alpha == 0.0)
1760         alpha = PLL_ALPHA_MAX;
1761     if (aln->num_states == 4) {
1762         // get the rate parameters
1763         double *rate_param = new double[6];
1764         getModel()->getRateMatrix(rate_param);
1765         // get the state frequencies
1766         double *state_freqs = new double[aln->num_states];
1767         getModel()->getStateFrequency(state_freqs);
1768 
1769         /* put them into PLL */
1770         stringstream linkagePattern;
1771         int partNr;
1772         for (partNr = 0; partNr < pllPartitions->numberOfPartitions - 1; partNr++) {
1773             linkagePattern << partNr << ",";
1774         }
1775         linkagePattern << partNr;
1776         char *pattern = new char[linkagePattern.str().length() + 1];
1777         strcpy(pattern, linkagePattern.str().c_str());
1778         pllLinkAlphaParameters(pattern, pllPartitions);
1779         pllLinkFrequencies(pattern, pllPartitions);
1780         pllLinkRates(pattern, pllPartitions);
1781         delete[] pattern;
1782 
1783         for (partNr = 0; partNr < pllPartitions->numberOfPartitions; partNr++) {
1784             pllSetFixedAlpha(alpha, partNr, pllPartitions, pllInst);
1785             pllSetFixedBaseFrequencies(state_freqs, 4, partNr, pllPartitions, pllInst);
1786             pllSetFixedSubstitutionMatrix(rate_param, 6, partNr, pllPartitions, pllInst);
1787         }
1788         delete[] rate_param;
1789         delete[] state_freqs;
1790     } else if (aln->num_states == 20) {
1791         double *state_freqs = new double[aln->num_states];
1792         getModel()->getStateFrequency(state_freqs);
1793         int partNr;
1794         for (partNr = 0; partNr < pllPartitions->numberOfPartitions; partNr++) {
1795             pllSetFixedAlpha(alpha, partNr, pllPartitions, pllInst);
1796             pllSetFixedBaseFrequencies(state_freqs, 20, partNr, pllPartitions, pllInst);
1797         }
1798         delete[] state_freqs;
1799     } else {
1800         if (params->pll) {
1801             outError("Phylogenetic likelihood library current does not support data type other than DNA or Protein");
1802         }
1803     }
1804 }
1805 
pllBuildIQTreePatternIndex()1806 void IQTree::pllBuildIQTreePatternIndex(){
1807     pll2iqtree_pattern_index = new int[pllAlignment->sequenceLength];
1808     char ** pll_aln = new char *[pllAlignment->sequenceCount];
1809     for(int i = 0; i < pllAlignment->sequenceCount; i++)
1810         pll_aln[i] = new char[pllAlignment->sequenceLength];
1811 
1812     int pos;
1813     for(int i = 0; i < pllAlignment->sequenceCount; i++){
1814         pos = 0;
1815         for(int model = 0; model < pllPartitions->numberOfPartitions; model++){
1816             memcpy(&pll_aln[i][pos],
1817                     &pllAlignment->sequenceData[i + 1][pllPartitions->partitionData[model]->lower],
1818                     pllPartitions->partitionData[model]->width);
1819             pos += pllPartitions->partitionData[model]->width;
1820         }
1821     }
1822 
1823     char * pll_site = new char[pllAlignment->sequenceCount + 1];
1824     char * site = new char[pllAlignment->sequenceCount + 1];
1825     for(int i = 0; i < pllAlignment->sequenceLength; i++){
1826         for(int j = 0; j < pllAlignment->sequenceCount; j++)
1827             pll_site[j]= pll_aln[j][i];
1828         pll_site[pllAlignment->sequenceCount] = '\0';
1829 
1830         site[pllAlignment->sequenceCount] = '\0';
1831         for(int k = 0; k < aln->size(); k++){
1832             for(int p = 0; p < pllAlignment->sequenceCount; p++)
1833                 site[p] = aln->convertStateBack(aln->at(k)[p]);
1834             pllBaseSubstitute(site, pllPartitions->partitionData[0]->dataType);
1835             if(memcmp(pll_site,site, pllAlignment->sequenceCount) == 0){
1836                 pll2iqtree_pattern_index[i] = k;
1837             }
1838         }
1839     }
1840 
1841     delete [] pll_site;
1842     delete [] site;
1843     for(int i = 0; i < pllAlignment->sequenceCount; i++)
1844         delete [] pll_aln[i];
1845     delete [] pll_aln;
1846 }
1847 
1848 
1849 /**
1850  * DTH:
1851  * Substitute bases in seq according to PLL's rules
1852  * This function should be updated if PLL's rules change.
1853  * @param seq: data of some sequence to be substituted
1854  * @param dataType: PLL_DNA_DATA or PLL_AA_DATA
1855  */
pllBaseSubstitute(char * seq,int dataType)1856 void IQTree::pllBaseSubstitute (char *seq, int dataType)
1857 {
1858     char meaningDNA[256];
1859     char  meaningAA[256];
1860     char * d;
1861 
1862     for (int i = 0; i < 256; ++ i)
1863     {
1864         meaningDNA[i] = -1;
1865         meaningAA[i]  = -1;
1866     }
1867 
1868     /* DNA data */
1869 
1870     meaningDNA[(int)'A'] =  1;
1871     meaningDNA[(int)'B'] = 14;
1872     meaningDNA[(int)'C'] =  2;
1873     meaningDNA[(int)'D'] = 13;
1874     meaningDNA[(int)'G'] =  4;
1875     meaningDNA[(int)'H'] = 11;
1876     meaningDNA[(int)'K'] = 12;
1877     meaningDNA[(int)'M'] =  3;
1878     meaningDNA[(int)'R'] =  5;
1879     meaningDNA[(int)'S'] =  6;
1880     meaningDNA[(int)'T'] =  8;
1881     meaningDNA[(int)'U'] =  8;
1882     meaningDNA[(int)'V'] =  7;
1883     meaningDNA[(int)'W'] =  9;
1884     meaningDNA[(int)'Y'] = 10;
1885     meaningDNA[(int)'a'] =  1;
1886     meaningDNA[(int)'b'] = 14;
1887     meaningDNA[(int)'c'] =  2;
1888     meaningDNA[(int)'d'] = 13;
1889     meaningDNA[(int)'g'] =  4;
1890     meaningDNA[(int)'h'] = 11;
1891     meaningDNA[(int)'k'] = 12;
1892     meaningDNA[(int)'m'] =  3;
1893     meaningDNA[(int)'r'] =  5;
1894     meaningDNA[(int)'s'] =  6;
1895     meaningDNA[(int)'t'] =  8;
1896     meaningDNA[(int)'u'] =  8;
1897     meaningDNA[(int)'v'] =  7;
1898     meaningDNA[(int)'w'] =  9;
1899     meaningDNA[(int)'y'] = 10;
1900 
1901     meaningDNA[(int)'N'] =
1902     meaningDNA[(int)'n'] =
1903     meaningDNA[(int)'O'] =
1904     meaningDNA[(int)'o'] =
1905     meaningDNA[(int)'X'] =
1906     meaningDNA[(int)'x'] =
1907     meaningDNA[(int)'-'] =
1908     meaningDNA[(int)'?'] = 15;
1909 
1910     /* AA data */
1911 
1912     meaningAA[(int)'A'] =  0;  /* alanine */
1913     meaningAA[(int)'R'] =  1;  /* arginine */
1914     meaningAA[(int)'N'] =  2;  /*  asparagine*/
1915     meaningAA[(int)'D'] =  3;  /* aspartic */
1916     meaningAA[(int)'C'] =  4;  /* cysteine */
1917     meaningAA[(int)'Q'] =  5;  /* glutamine */
1918     meaningAA[(int)'E'] =  6;  /* glutamic */
1919     meaningAA[(int)'G'] =  7;  /* glycine */
1920     meaningAA[(int)'H'] =  8;  /* histidine */
1921     meaningAA[(int)'I'] =  9;  /* isoleucine */
1922     meaningAA[(int)'L'] =  10; /* leucine */
1923     meaningAA[(int)'K'] =  11; /* lysine */
1924     meaningAA[(int)'M'] =  12; /* methionine */
1925     meaningAA[(int)'F'] =  13; /* phenylalanine */
1926     meaningAA[(int)'P'] =  14; /* proline */
1927     meaningAA[(int)'S'] =  15; /* serine */
1928     meaningAA[(int)'T'] =  16; /* threonine */
1929     meaningAA[(int)'W'] =  17; /* tryptophan */
1930     meaningAA[(int)'Y'] =  18; /* tyrosine */
1931     meaningAA[(int)'V'] =  19; /* valine */
1932     meaningAA[(int)'B'] =  20; /* asparagine, aspartic 2 and 3*/
1933     meaningAA[(int)'Z'] =  21; /*21 glutamine glutamic 5 and 6*/
1934     meaningAA[(int)'a'] =  0;  /* alanine */
1935     meaningAA[(int)'r'] =  1;  /* arginine */
1936     meaningAA[(int)'n'] =  2;  /*  asparagine*/
1937     meaningAA[(int)'d'] =  3;  /* aspartic */
1938     meaningAA[(int)'c'] =  4;  /* cysteine */
1939     meaningAA[(int)'q'] =  5;  /* glutamine */
1940     meaningAA[(int)'e'] =  6;  /* glutamic */
1941     meaningAA[(int)'g'] =  7;  /* glycine */
1942     meaningAA[(int)'h'] =  8;  /* histidine */
1943     meaningAA[(int)'i'] =  9;  /* isoleucine */
1944     meaningAA[(int)'l'] =  10; /* leucine */
1945     meaningAA[(int)'k'] =  11; /* lysine */
1946     meaningAA[(int)'m'] =  12; /* methionine */
1947     meaningAA[(int)'f'] =  13; /* phenylalanine */
1948     meaningAA[(int)'p'] =  14; /* proline */
1949     meaningAA[(int)'s'] =  15; /* serine */
1950     meaningAA[(int)'t'] =  16; /* threonine */
1951     meaningAA[(int)'w'] =  17; /* tryptophan */
1952     meaningAA[(int)'y'] =  18; /* tyrosine */
1953     meaningAA[(int)'v'] =  19; /* valine */
1954     meaningAA[(int)'b'] =  20; /* asparagine, aspartic 2 and 3*/
1955     meaningAA[(int)'z'] =  21; /*21 glutamine glutamic 5 and 6*/
1956 
1957     meaningAA[(int)'X'] =
1958     meaningAA[(int)'x'] =
1959     meaningAA[(int)'?'] =
1960     meaningAA[(int)'*'] =
1961     meaningAA[(int)'-'] = 22;
1962 
1963     d = (dataType == PLL_DNA_DATA) ? meaningDNA : meaningAA;
1964     int seq_len = strlen(seq);
1965     for (int i = 0; i < seq_len; ++ i)
1966     {
1967         seq[i] = d[(int)seq[i]];
1968     }
1969 }
1970 
swapTaxa(PhyloNode * node1,PhyloNode * node2)1971 double IQTree::swapTaxa(PhyloNode *node1, PhyloNode *node2) {
1972     ASSERT(node1->isLeaf());
1973     ASSERT(node2->isLeaf());
1974 
1975     PhyloNeighbor *node1nei = (PhyloNeighbor*) *(node1->neighbors.begin());
1976     PhyloNeighbor *node2nei = (PhyloNeighbor*) *(node2->neighbors.begin());
1977 
1978     node2nei->node->updateNeighbor(node2, node1);
1979     node1nei->node->updateNeighbor(node1, node2);
1980 
1981     // Update the new neightbors of the 2 nodes
1982     node1->updateNeighbor(node1->neighbors.begin(), node2nei);
1983     node2->updateNeighbor(node2->neighbors.begin(), node1nei);
1984 
1985     PhyloNeighbor *node1NewNei = (PhyloNeighbor*) *(node1->neighbors.begin());
1986     PhyloNeighbor *node2NewNei = (PhyloNeighbor*) *(node2->neighbors.begin());
1987 
1988     // Reoptimize the branch lengths
1989     optimizeOneBranch(node1, (PhyloNode*) node1NewNei->node);
1990 //    this->curScore = optimizeOneBranch(node2, (PhyloNode*) node2NewNei->node);
1991     optimizeOneBranch(node2, (PhyloNode*) node2NewNei->node);
1992     //drawTree(cout, WT_BR_SCALE | WT_INT_NODE | WT_TAXON_ID | WT_NEWLINE);
1993     this->curScore = computeLikelihoodFromBuffer();
1994     return this->curScore;
1995 }
1996 
perturb(int times)1997 double IQTree::perturb(int times) {
1998     while (times > 0) {
1999         NodeVector taxa;
2000         // get the vector of taxa
2001         getTaxa(taxa);
2002         int taxonid1 = random_int(taxa.size());
2003         PhyloNode *taxon1 = (PhyloNode*) taxa[taxonid1];
2004         PhyloNode *taxon2;
2005         int *dists = new int[taxa.size()];
2006         int minDist = 1000000;
2007         for (int i = 0; i < taxa.size(); i++) {
2008             if (i == taxonid1)
2009                 continue;
2010             taxon2 = (PhyloNode*) taxa[i];
2011             int dist = taxon1->calDist(taxon2);
2012             dists[i] = dist;
2013             if (dist >= 7 && dist < minDist)
2014                 minDist = dist;
2015         }
2016 
2017         int taxonid2 = -1;
2018         for (int i = 0; i < taxa.size(); i++) {
2019             if (dists[i] == minDist)
2020                 taxonid2 = i;
2021         }
2022 
2023         taxon2 = (PhyloNode*) taxa[taxonid2];
2024 
2025         cout << "Swapping node " << taxon1->id << " and node " << taxon2->id << endl;
2026         cout << "Distance " << minDist << endl;
2027         curScore = swapTaxa(taxon1, taxon2);
2028         //taxa.erase( taxa.begin() + taxaID1 );
2029         //taxa.erase( taxa.begin() + taxaID2 -1 );
2030 
2031         times--;
2032         delete[] dists;
2033     }
2034     curScore = optimizeAllBranches(1);
2035     return curScore;
2036 }
2037 
2038 //extern "C" pllUFBootData * pllUFBootDataPtr;
2039 extern pllUFBootData * pllUFBootDataPtr;
2040 
optimizeModelParameters(bool printInfo,double logl_epsilon)2041 string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
2042     if (logl_epsilon == -1)
2043         logl_epsilon = params->modelEps;
2044     cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl;
2045     double stime = getRealTime();
2046     string newTree;
2047     if (params->pll) {
2048         if (curScore == -DBL_MAX) {
2049             pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2050         } else {
2051             pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_FALSE, PLL_FALSE);
2052         }
2053         pllOptimizeModelParameters(pllInst, pllPartitions, logl_epsilon);
2054         curScore = pllInst->likelihood;
2055         pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions,
2056                 pllInst->start->back, PLL_TRUE,
2057                 PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE, PLL_SUMMARIZE_LH,
2058                 PLL_FALSE, PLL_FALSE);
2059         if (printInfo) {
2060             pllPrintModelParams();
2061         }
2062         newTree = string(pllInst->tree_string);
2063         double etime = getRealTime();
2064         if (printInfo)
2065             cout << etime - stime << " seconds (logl: " << curScore << ")" << endl;
2066     } else {
2067         double modOptScore;
2068         if (params->opt_gammai) { // DO RESTART ON ALPHA AND P_INVAR
2069             modOptScore = getModelFactory()->optimizeParametersGammaInvar(params->fixed_branch_length, printInfo, logl_epsilon);
2070             params->opt_gammai = false;
2071         } else {
2072             modOptScore = getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon);
2073         }
2074 
2075         if (isSuperTree()) {
2076             ((PhyloSuperTree*) this)->computeBranchLengths();
2077         }
2078         if (getModelFactory()->isUnstableParameters() && aln->seq_type != SEQ_CODON) {
2079             cout << endl;
2080             outWarning("Estimated model parameters are at boundary that can cause numerical instability!");
2081             cout << endl;
2082         }
2083 
2084         if (modOptScore < curScore - 1.0 && params->partition_type != TOPO_UNLINKED) {
2085             cout << "  BUG: Tree logl gets worse after model optimization!" << endl;
2086             cout << "  Old logl: " << curScore << " / " << "new logl: " << modOptScore << endl;
2087             printTree("debug.tree");
2088             abort();
2089         } else {
2090             curScore = modOptScore;
2091             newTree = getTreeString();
2092         }
2093         if (params->print_trees_site_posterior)
2094             computePatternCategories();
2095     }
2096 
2097     return newTree;
2098 }
2099 
printBestScores()2100 void IQTree::printBestScores() {
2101     vector<double> bestScores = candidateTrees.getBestScores(params->popSize);
2102     for (vector<double>::iterator it = bestScores.begin(); it != bestScores.end(); it++)
2103         cout << (*it) << " ";
2104     cout << endl;
2105 }
2106 
computeLogL()2107 double IQTree::computeLogL() {
2108     if (params->pll) {
2109         if (curScore == -DBL_MAX) {
2110             pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2111         } else {
2112             pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_FALSE, PLL_FALSE);
2113         }
2114         curScore = pllInst->likelihood;
2115     } else {
2116 //        if (!lhComputed) {
2117 //            initializeAllPartialLh();
2118 //            clearAllPartialLH();
2119 //        }
2120         curScore = computeLikelihood();
2121     }
2122     return curScore;
2123 }
2124 
optimizeBranches(int maxTraversal)2125 string IQTree::optimizeBranches(int maxTraversal) {
2126     string tree;
2127     if (params->pll) {
2128         if (curScore == -DBL_MAX) {
2129             pllEvaluateLikelihood(pllInst, pllPartitions, pllInst->start, PLL_TRUE, PLL_FALSE);
2130 //            lhComputed = true;
2131         }
2132         pllOptimizeBranchLengths(pllInst, pllPartitions, maxTraversal);
2133         curScore = pllInst->likelihood;
2134         pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE, PLL_TRUE, PLL_FALSE, PLL_FALSE, PLL_FALSE,
2135                 PLL_SUMMARIZE_LH, PLL_FALSE, PLL_FALSE);
2136         tree = string(pllInst->tree_string);
2137     } else {
2138 //        if (!lhComputed) {
2139 //            initializeAllPartialLh();
2140 //            clearAllPartialLH();
2141 //            lhComputed = true;
2142 //        }
2143         curScore = optimizeAllBranches(maxTraversal, params->loglh_epsilon, PLL_NEWZPERCYCLE);
2144         tree = getTreeString();
2145     }
2146     return tree;
2147 }
2148 
doTreeSearch()2149 double IQTree::doTreeSearch() {
2150 
2151     if (params->numInitTrees > 1) {
2152         cout << "--------------------------------------------------------------------" << endl;
2153         cout << "|             INITIALIZING CANDIDATE TREE SET                      |" << endl;
2154         cout << "--------------------------------------------------------------------" << endl;
2155     }
2156 
2157     double initCPUTime = getRealTime();
2158     int treesPerProc = (params->numInitTrees) / MPIHelper::getInstance().getNumProcesses() - candidateTrees.size();
2159     if (params->numInitTrees % MPIHelper::getInstance().getNumProcesses() != 0) {
2160         treesPerProc++;
2161     }
2162     if (treesPerProc < 0)
2163         treesPerProc = 0;
2164     // Master node does one tree less because it already created the BIONJ tree
2165 //    if (MPIHelper::getInstance().isMaster()) {
2166 //        treesPerProc--;
2167 //    }
2168 
2169     // Make sure to get at least 1 tree
2170     if (treesPerProc < 1 && params->numInitTrees > candidateTrees.size())
2171         treesPerProc = 1;
2172 
2173     /* Initialize candidate tree set */
2174     if (!getCheckpoint()->getBool("finishedCandidateSet")) {
2175         initCandidateTreeSet(treesPerProc, params->numNNITrees);
2176         // write best tree to disk
2177         printBestCandidateTree();
2178         saveCheckpoint();
2179         getCheckpoint()->putBool("finishedCandidateSet", true);
2180         getCheckpoint()->dump();
2181     } else {
2182         cout << "CHECKPOINT: Candidate tree set restored, best LogL: " << candidateTrees.getBestScore() << endl;
2183     }
2184     ASSERT(candidateTrees.size() != 0);
2185     cout << "Finish initializing candidate tree set (" << candidateTrees.size() << ")" << endl;
2186 
2187 
2188     cout << "Current best tree score: " << candidateTrees.getBestScore() << " / CPU time: " <<
2189     getRealTime() - initCPUTime << endl;
2190     cout << "Number of iterations: " << stop_rule.getCurIt() << endl;
2191 
2192 //    string treels_name = params->out_prefix;
2193 //    treels_name += ".treels";
2194 //    string out_lh_file = params->out_prefix;
2195 //    out_lh_file += ".treelh";
2196 //    string site_lh_file = params->out_prefix;
2197 //    site_lh_file += ".sitelh";
2198 //
2199 //    if (params->print_tree_lh) {
2200 //        out_treelh.open(out_lh_file.c_str());
2201 //        out_sitelh.open(site_lh_file.c_str());
2202 //    }
2203 
2204 //    if (params->write_intermediate_trees)
2205 //        out_treels.open(treels_name.c_str());
2206 
2207 //    if (params->write_intermediate_trees && save_all_trees != 2) {
2208 //        printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2209 //    }
2210 
2211     setRootNode(params->root);
2212 
2213     if (!getCheckpoint()->getBool("finishedCandidateSet"))
2214         cout << "CHECKPOINT: " << stop_rule.getCurIt() << " search iterations restored" << endl;
2215 
2216     searchinfo.curPerStrength = params->initPS;
2217     double cur_correlation = 0.0;
2218 
2219 
2220     if ((Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation) && candidateTrees.size() > 1) {
2221         candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
2222     }
2223 
2224     // tracking of worker candidate set is changed from master candidate set
2225     candidateset_changed.resize(MPIHelper::getInstance().getNumProcesses(), false);
2226     bestcandidate_changed = false;
2227 
2228     /*==============================================================================================================
2229                                            MAIN LOOP OF THE IQ-TREE ALGORITHM
2230      *=============================================================================================================*/
2231 
2232     bool early_stop = stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation);
2233     if (!early_stop) {
2234         cout << "--------------------------------------------------------------------" << endl;
2235         cout << "|               OPTIMIZING CANDIDATE TREE SET                      |" << endl;
2236         cout << "--------------------------------------------------------------------" << endl;
2237     }
2238 
2239     // count threshold for computing bootstrap correlation
2240     int ufboot_count, ufboot_count_check;
2241     stop_rule.getUFBootCountCheck(ufboot_count, ufboot_count_check);
2242 
2243     while (!stop_rule.meetStopCondition(stop_rule.getCurIt(), cur_correlation)) {
2244 
2245         searchinfo.curIter = stop_rule.getCurIt();
2246         // estimate logl_cutoff for bootstrap
2247         if (!boot_orig_logl.empty())
2248             logl_cutoff = *min_element(boot_orig_logl.begin(), boot_orig_logl.end());
2249 
2250         if (estimate_nni_cutoff && nni_info.size() >= 500) {
2251             estimate_nni_cutoff = false;
2252             estimateNNICutoff(params);
2253         }
2254 
2255         Alignment *saved_aln = aln;
2256 
2257         string curTree;
2258         /*----------------------------------------
2259          * Perturb the tree
2260          *---------------------------------------*/
2261         doTreePerturbation();
2262 
2263         /*----------------------------------------
2264          * Optimize tree with NNI
2265          *----------------------------------------*/
2266         pair<int, int> nniInfos; // <num_NNIs, num_steps>
2267         nniInfos = doNNISearch();
2268         curTree = getTreeString();
2269         int pos = addTreeToCandidateSet(curTree, curScore, true, MPIHelper::getInstance().getProcessID());
2270         if (pos != -2 && pos != -1 && (Params::getInstance().fixStableSplits || Params::getInstance().adaptPertubation))
2271             candidateTrees.computeSplitOccurences(Params::getInstance().stableSplitThreshold);
2272 
2273         if (MPIHelper::getInstance().isWorker() || MPIHelper::getInstance().gotMessage())
2274             syncCurrentTree();
2275 
2276 
2277         // TODO: cannot check yet, need to somehow return treechanged
2278 //        if (nni_count == 0 && params->snni && numPerturb > 0 && treechanged) {
2279 //            assert(0 && "BUG: NNI could not improved perturbed tree");
2280 //        }
2281 //
2282         if (iqp_assess_quartet == IQP_BOOTSTRAP) {
2283             // restore alignment
2284             delete aln;
2285             setAlignment(saved_aln);
2286             initializeAllPartialLh();
2287             clearAllPartialLH();
2288         }
2289 
2290         if (isSuperTree()) {
2291             ((PhyloSuperTree *) this)->computeBranchLengths();
2292         }
2293 
2294         /*----------------------------------------
2295          * Print information
2296          *---------------------------------------*/
2297         //printInterationInfo();
2298 
2299 //        if (params->write_intermediate_trees && save_all_trees != 2) {
2300 //            printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2301 //        }
2302 
2303         if (params->snni && verbose_mode >= VB_DEBUG) {
2304             printBestScores();
2305         }
2306 
2307         // DTH: make pllUFBootData usable in summarizeBootstrap
2308         if (params->pll && params->online_bootstrap && (params->gbo_replicates > 0))
2309             pllConvertUFBootData2IQTree();
2310         // DTH: Carefully watch the -pll case here
2311 
2312         /*----------------------------------------
2313          * convergence criterion for ultrafast bootstrap
2314          *---------------------------------------*/
2315 
2316 
2317         // MASTER receives bootstrap trees and perform stop convergence test
2318         if ((stop_rule.getCurIt()) >= ufboot_count &&
2319             params->stop_condition == SC_BOOTSTRAP_CORRELATION && MPIHelper::getInstance().isMaster()) {
2320             ufboot_count += params->step_iterations/2;
2321             // compute split support every half step
2322             SplitGraph *sg = new SplitGraph;
2323             summarizeBootstrap(*sg);
2324             sg->removeTrivialSplits();
2325             sg->setCheckpoint(checkpoint);
2326             boot_splits.push_back(sg);
2327             cout << "Log-likelihood cutoff on original alignment: " << logl_cutoff << endl;
2328 //            MPIHelper::getInstance().sendMsg(LOGL_CUTOFF_TAG, convertDoubleToString(logl_cutoff));
2329 
2330             // check convergence every full step
2331             if (stop_rule.getCurIt() >= ufboot_count_check) {
2332                 ufboot_count_check += params->step_iterations;
2333                 cur_correlation = computeBootstrapCorrelation();
2334                 cout << "NOTE: Bootstrap correlation coefficient of split occurrence frequencies: " <<
2335                 cur_correlation << endl;
2336                 if (!stop_rule.meetCorrelation(cur_correlation)) {
2337                     cout << "NOTE: UFBoot does not converge, continue at least " << params->step_iterations << " more iterations" << endl;
2338                 }
2339             }
2340             if (params->gbo_replicates && params->online_bootstrap && params->print_ufboot_trees)
2341                 writeUFBootTrees(*params);
2342 
2343         } // end of bootstrap convergence test
2344 
2345         // print UFBoot trees every 10 iterations
2346 
2347         saveCheckpoint();
2348         checkpoint->dump();
2349 
2350         if (bestcandidate_changed) {
2351             printBestCandidateTree();
2352             bestcandidate_changed = false;
2353         }
2354 
2355         //if (params->partition_type)
2356         //     ((PhyloSuperTreePlen*)this)->printNNIcasesNUM();
2357 
2358     }
2359 
2360     // 2019-06-03: check convergence here to avoid effect of refineBootTrees
2361     if (boot_splits.size() >= 2 && MPIHelper::getInstance().isMaster()) {
2362         // check the stopping criterion for ultra-fast bootstrap
2363         if (computeBootstrapCorrelation() < params->min_correlation)
2364             cout << "WARNING: bootstrap analysis did not converge. You should rerun with higher number of iterations (-nm option)" << endl;
2365 
2366     }
2367 
2368     if(params->ufboot2corr) refineBootTrees();
2369 
2370     if (!early_stop)
2371         sendStopMessage();
2372 
2373     readTreeString(candidateTrees.getBestTreeStrings()[0]);
2374 
2375     if (testNNI)
2376         outNNI.close();
2377     if (params->write_intermediate_trees)
2378         out_treels.close();
2379     if (params->print_tree_lh) {
2380         out_treelh.close();
2381         out_sitelh.close();
2382     }
2383 
2384     // DTH: pllUFBoot deallocation
2385     if (params->pll) {
2386         pllDestroyUFBootData();
2387     }
2388 
2389 #ifdef _IQTREE_MPI
2390     cout << "Total number of trees received: " << MPIHelper::getInstance().getNumTreeReceived() << endl;
2391     cout << "Total number of trees sent: " << MPIHelper::getInstance().getNumTreeSent() << endl;
2392     cout << "Total number of NNI searches done by myself: " << MPIHelper::getInstance().getNumNNISearch() << endl;
2393     MPIHelper::getInstance().resetNumbers();
2394 #endif
2395 
2396     cout << "TREE SEARCH COMPLETED AFTER " << stop_rule.getCurIt() << " ITERATIONS"
2397     << " / Time: " << convert_time(getRealTime() - params->start_real_time) << endl << endl;
2398 
2399     return candidateTrees.getBestScore();
2400 
2401 }
2402 
2403 
2404 /*
2405 void IQTree::refineBootTrees(){
2406     on_refine_btree = true;
2407     int num_boot_rep = params->gbo_replicates;
2408     params->gbo_replicates = 0;
2409     save_all_trees = 0;
2410 
2411     NNI_Type saved_nni_type = params->nni_type;
2412     if(params->u2c_nni5 == false){
2413         params->nni5 = false;
2414         params->nni_type = NNI1;
2415     }else{
2416         params->nni5 = true;
2417         params->nni_type = NNI5;
2418     }
2419 
2420     string saved_tree = getTreeString();
2421     saved_aln_on_refine_btree = aln;
2422 
2423     int nptn = getAlnNPattern();
2424     cout << "npn = " << nptn << endl;
2425 
2426     string tree;
2427     Alignment * bootstrap_aln;
2428 
2429     int *saved_randstream = randstream;
2430     init_random(params->ran_seed);
2431 
2432     string file_name = params->out_prefix;
2433     file_name += ".binfo";
2434     ofstream binfo(file_name.c_str());
2435 
2436     file_name = params->out_prefix;
2437     file_name += ".btree.pre";
2438     ofstream btreep(file_name.c_str(), ios::app);
2439 
2440     file_name = params->out_prefix;
2441     file_name += ".btree.aft";
2442     ofstream btreea(file_name.c_str(), ios::app);
2443 
2444     file_name = params->out_prefix;
2445     file_name += ".btree.pre.brlen";
2446     ofstream btreep_brlen(file_name.c_str(), ios::app);
2447 
2448     file_name = params->out_prefix;
2449     file_name += ".btree.aft.brlen";
2450     ofstream btreea_brlen(file_name.c_str(), ios::app);
2451 
2452     file_name = params->out_prefix;
2453     file_name += ".refine.aln";
2454     ofstream refine_aln(file_name.c_str(), ios::app);
2455 
2456 
2457     if(!isSuperTree()){
2458         getModelFactory()->model->writeInfo(binfo);
2459         getModelFactory()->site_rate->writeInfo(binfo);
2460     }else{
2461         ((PhyloSuperTree *)this)->at(0)->getModelFactory()->model->writeInfo(binfo);
2462         ((PhyloSuperTree *)this)->at(0)->getModelFactory()->site_rate->writeInfo(binfo);
2463     }
2464 
2465     PhyloSuperTree *stree = (isSuperTree()) ? (PhyloSuperTree*)this : NULL;
2466 
2467     for(int sample = 0; sample < num_boot_rep; sample++){
2468         if ((sample+1) % 100 == 0)
2469             cout << sample+1 << " replicates done" << endl;
2470 
2471         if (saved_aln_on_refine_btree->isSuperAlignment())
2472             bootstrap_aln = new SuperAlignment;
2473         else
2474             bootstrap_aln = new Alignment;
2475 
2476 //        bootstrap_aln->buildFromPatternFreq(*saved_aln_on_refine_btree, boot_samples_int[sample]);
2477         IntVector this_sample;
2478         bootstrap_aln->createBootstrapAlignment(saved_aln_on_refine_btree, &this_sample, params->bootstrap_spec);
2479 
2480         if (isSuperTree())
2481             stree->setSuperAlignment(bootstrap_aln);
2482         else
2483             setAlignment(bootstrap_aln);
2484 
2485         for(int k = 0; k < nptn; k++){
2486             if(boot_samples_int[sample][k] != this_sample[k]){
2487                 assert(0 && "Not identical");
2488             }
2489         }
2490 
2491 //        bootstrap_aln->createBootstrapAlignment(saved_aln_on_refine_btree, NULL, params->bootstrap_spec);
2492         ptn_freq_computed = false;
2493         computePtnFreq();
2494         if (isSuperTree()) {
2495             for (auto it = stree->begin(); it != stree->end(); it++) {
2496                 (*it)->ptn_freq_computed = false;
2497                 (*it)->computePtnFreq();
2498             }
2499         }
2500 
2501 
2502         tree = boot_trees[sample];
2503         // Read the bootstrap tree
2504 //        cout << tree << endl;
2505 
2506         readTreeString(tree);
2507 
2508 
2509         if(!isSuperTree())
2510             aln->printPhylip(refine_aln, true);
2511         else
2512             ((SuperAlignment *) aln)->printCombinedAlignment(refine_aln, true);
2513 
2514         setRootNode(params->root);
2515 
2516         if (isSuperTree()){
2517             wrapperFixNegativeBranch(true);
2518             ((PhyloSuperTree*) this)->mapTrees();
2519         }
2520         else{
2521             initializeAllPartialLh();
2522             clearAllPartialLH();
2523             wrapperFixNegativeBranch(false);
2524         }
2525 
2526 
2527 //        printTree(cout);
2528 //        cout << endl;
2529         optimizeBranches();
2530 //        printTree(cout);
2531 //        cout << endl;
2532 
2533         curScore = computeLogL();
2534         binfo << sample << "\t" << curScore;
2535 
2536         printTree(btreep, WT_NEWLINE | WT_SORT_TAXA);
2537         printTree(btreep_brlen, WT_NEWLINE | WT_SORT_TAXA | WT_BR_LEN);
2538 
2539         pair<int, int> nniInfos; // <num_NNIs, num_steps>
2540         nniInfos = doNNISearch();
2541         curScore = computeLogL();
2542         binfo << "\t" << curScore << endl;
2543 
2544         stringstream ostr;
2545         printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
2546         tree = ostr.str();
2547         boot_trees[sample] = getTreeString();
2548         boot_logl[sample] = curScore;
2549 
2550         printTree(btreea, WT_NEWLINE | WT_SORT_TAXA);
2551         printTree(btreea_brlen, WT_NEWLINE | WT_SORT_TAXA | WT_BR_LEN);
2552         delete aln;
2553     }
2554 
2555     binfo.close();
2556     btreep.close();
2557     btreea.close();
2558 
2559 //    delete aln;
2560 
2561     // restore randstream
2562     finish_random();
2563     randstream = saved_randstream;
2564 
2565     // Recover the last status of IQTREE
2566 
2567     params->gbo_replicates = num_boot_rep;
2568 
2569     if (isSuperTree())
2570         stree->setSuperAlignment(saved_aln_on_refine_btree);
2571     else
2572         setAlignment(saved_aln_on_refine_btree);
2573 
2574     readTreeString(saved_tree);
2575 
2576     ptn_freq_computed = false;
2577     computePtnFreq();
2578     if (isSuperTree()) {
2579         for (auto it = stree->begin(); it != stree->end(); it++) {
2580             (*it)->ptn_freq_computed = false;
2581             (*it)->computePtnFreq();
2582         }
2583     }
2584 
2585     if(params->u2c_nni5 == false){
2586         params->nni_type = saved_nni_type;
2587         if(params->nni_type == NNI5)
2588             params->nni5 = true;
2589     }
2590 
2591     initializeAllPartialLh();
2592     clearAllPartialLH();
2593     curScore = optimizeAllBranches();
2594 
2595     save_all_trees = 2;
2596     on_refine_btree = false;
2597 }
2598 */
2599 
2600 /**********************************************************
2601  * STANDARD NON-PARAMETRIC BOOTSTRAP
2602  ***********************************************************/
refineBootTrees()2603 void IQTree::refineBootTrees() {
2604 
2605     int *saved_randstream = randstream;
2606     init_random(params->ran_seed);
2607 
2608     params->gbo_replicates = 0;
2609 
2610     NNI_Type saved_nni_type = params->nni_type;
2611     // TODO: A bug in PhyloSuperTreePlen::swapNNIBranch by nni1
2612     // Thus always turn on -nni5 by PhyloSuperTreePlen
2613     if(params->u2c_nni5 == false && (!isSuperTree() || params->partition_type == BRLEN_OPTIMIZE)) {
2614         params->nni5 = false;
2615         params->nni_type = NNI1;
2616     }else{
2617         params->nni5 = true;
2618         params->nni_type = NNI5;
2619     }
2620 
2621     cout << "Refining ufboot trees with NNI ";
2622     if (params->nni5)
2623         cout << "5 branches..." << endl;
2624     else
2625         cout << "1 branch..." << endl;
2626 
2627     int refined_trees = 0;
2628 
2629     int refined_samples = 0;
2630 
2631     checkpoint->startStruct("UFBoot");
2632     if (CKP_RESTORE(refined_samples))
2633         cout << "CHECKPOINT: " << refined_samples << " refined samples restored" << endl;
2634     checkpoint->endStruct();
2635 
2636     // 2018-08-17: delete duplicated memory
2637     deleteAllPartialLh();
2638 
2639     ModelsBlock *models_block = readModelsDefinition(*params);
2640 
2641 	// do bootstrap analysis
2642 	for (int sample = refined_samples; sample < boot_trees.size(); sample++) {
2643         // create bootstrap alignment
2644         Alignment* bootstrap_alignment;
2645         if (aln->isSuperAlignment())
2646             bootstrap_alignment = new SuperAlignment;
2647         else
2648             bootstrap_alignment = new Alignment;
2649         bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
2650 
2651         // create bootstrap tree
2652         IQTree *boot_tree;
2653         if (aln->isSuperAlignment()){
2654             if(params->partition_type != BRLEN_OPTIMIZE){
2655                 boot_tree = new PhyloSuperTreePlen((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) this);
2656             } else {
2657                 boot_tree = new PhyloSuperTree((SuperAlignment*) bootstrap_alignment, (PhyloSuperTree*) this);
2658             }
2659         } else {
2660             // allocate heterotachy tree if neccessary
2661             int pos = posRateHeterotachy(aln->model_name);
2662 
2663             if (params->num_mixlen > 1) {
2664                 boot_tree = new PhyloTreeMixlen(bootstrap_alignment, params->num_mixlen);
2665             } else if (pos != string::npos) {
2666                 boot_tree = new PhyloTreeMixlen(bootstrap_alignment, 0);
2667             } else
2668                 boot_tree = new IQTree(bootstrap_alignment);
2669         }
2670 
2671         boot_tree->on_refine_btree = true;
2672         boot_tree->save_all_trees = 0;
2673 
2674         // initialize constraint tree
2675         if (!constraintTree.empty()) {
2676             boot_tree->constraintTree.readConstraint(constraintTree);
2677         }
2678 
2679         boot_tree->setParams(params);
2680 
2681         // 2019-06-03: bug fix setting part_info properly
2682         if (boot_tree->isSuperTree())
2683             ((PhyloSuperTree*)boot_tree)->setPartInfo((PhyloSuperTree*)this);
2684 
2685         // copy model
2686         // BQM 2019-05-31: bug fix with -bsam option
2687         boot_tree->initializeModel(*params, aln->model_name, models_block);
2688         boot_tree->getModelFactory()->setCheckpoint(getCheckpoint());
2689         if (isSuperTree())
2690             ((PartitionModel*)boot_tree->getModelFactory())->PartitionModel::restoreCheckpoint();
2691         else
2692             boot_tree->getModelFactory()->restoreCheckpoint();
2693 
2694         // set likelihood kernel
2695         boot_tree->setParams(params);
2696         boot_tree->setLikelihoodKernel(sse);
2697         boot_tree->setNumThreads(num_threads);
2698 
2699         // load the current ufboot tree
2700         // 2019-02-06: fix crash with -sp and -bnni
2701         if (isSuperTree())
2702             boot_tree->PhyloTree::readTreeString(boot_trees[sample]);
2703         else
2704             boot_tree->readTreeString(boot_trees[sample]);
2705 
2706         if (boot_tree->isSuperTree() && params->partition_type == BRLEN_OPTIMIZE) {
2707             if (((PhyloSuperTree*)boot_tree)->size() > 1) {
2708                 // re-initialize branch lengths for unlinked model
2709                 boot_tree->wrapperFixNegativeBranch(true);
2710             }
2711         }
2712 
2713         // TODO: check if this resolves the crash in reorientPartialLh()
2714         boot_tree->initializeAllPartialLh();
2715 
2716         // just in case some branch lengths are negative
2717         if (int num_neg = boot_tree->wrapperFixNegativeBranch(false))
2718             outWarning("Bootstrap tree " + convertIntToString(sample+1) + " has " +
2719                 convertIntToString(num_neg) + "non-positive branch lengths");
2720 
2721         // REMARK: branch lengths were estimated from original alignments
2722         // for bootstrap_alignment, they still thus need to be reoptimized a bit
2723         boot_tree->optimizeBranches(2);
2724 
2725 
2726         auto num_nnis = boot_tree->doNNISearch();
2727         if (num_nnis.second != 0)
2728             refined_trees++;
2729 
2730         if (verbose_mode >= VB_MED) {
2731             cout << "UFBoot tree " << sample+1 << ": " << boot_logl[sample] << " -> " << boot_tree->getCurScore() << endl;
2732         }
2733 
2734         stringstream ostr;
2735         if (params->print_ufboot_trees == 2)
2736             boot_tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA | WT_BR_LEN | WT_BR_LEN_SHORT);
2737         else
2738             boot_tree->printTree(ostr, WT_TAXON_ID | WT_SORT_TAXA);
2739         boot_trees[sample] = ostr.str();
2740         boot_logl[sample] = boot_tree->curScore;
2741 
2742 
2743         // delete memory
2744         //boot_tree->setModelFactory(NULL);
2745         boot_tree->save_all_trees = 2;
2746 
2747         bootstrap_alignment = boot_tree->aln;
2748         delete boot_tree;
2749         // fix bug: bootstrap_alignment might be changed
2750         delete bootstrap_alignment;
2751 
2752 
2753         if ((sample+1) % 100 == 0)
2754             cout << sample+1 << " samples done" << endl;
2755 
2756         saveCheckpoint();
2757         checkpoint->startStruct("UFBoot");
2758         refined_samples = sample;
2759         CKP_SAVE(refined_samples);
2760         checkpoint->endStruct();
2761 
2762         checkpoint->dump();
2763 
2764 	}
2765 
2766     delete models_block;
2767 
2768     cout << "Total " << refined_trees << " ufboot trees refined" << endl;
2769 
2770     // restore randstream
2771     finish_random();
2772     randstream = saved_randstream;
2773 
2774     SplitGraph *sg = new SplitGraph;
2775     summarizeBootstrap(*sg);
2776     sg->removeTrivialSplits();
2777     sg->setCheckpoint(checkpoint);
2778     boot_splits.push_back(sg);
2779 
2780     saveCheckpoint();
2781     checkpoint->dump();
2782 
2783     // restore
2784     params->gbo_replicates = boot_trees.size();
2785     params->nni_type = saved_nni_type;
2786     if(params->nni_type == NNI5) {
2787         params->nni5 = true;
2788     } else
2789         params->nni5 = false;
2790 
2791     // 2018-08-17: recover memory
2792     initializeAllPartialLh();
2793 
2794 }
2795 
2796 
printIterationInfo(int sourceProcID)2797 void IQTree::printIterationInfo(int sourceProcID) {
2798     double realtime_remaining = stop_rule.getRemainingTime(stop_rule.getCurIt());
2799     cout.setf(ios_base::fixed, ios_base::floatfield);
2800 
2801     // only print every 10th iteration or high verbose mode
2802     if (stop_rule.getCurIt() % 10 == 0 || verbose_mode >= VB_MED) {
2803             cout << ((iqp_assess_quartet == IQP_BOOTSTRAP) ? "Bootstrap " : "Iteration ") << stop_rule.getCurIt() <<
2804             " / LogL: ";
2805             cout << curScore;
2806             cout << " / Time: " << convert_time(getRealTime() - params->start_real_time);
2807 
2808             if (stop_rule.getCurIt() > 20) {
2809                 cout << " (" << convert_time(realtime_remaining) << " left)";
2810             }
2811             if (MPIHelper::getInstance().getNumProcesses() > 1)
2812                 cout << " / Process: " << sourceProcID;
2813             cout << endl;
2814         }
2815 }
2816 
2817 //void IQTree::estimateLoglCutoffBS() {
2818 //    if (params->avoid_duplicated_trees && max_candidate_trees > 0 && treels_logl.size() > 1000) {
2819 //        int predicted_iteration;
2820 //        predicted_iteration = ((stop_rule.getCurIt() + params->step_iterations - 1) / params->step_iterations)
2821 //                              * params->step_iterations;
2822 //        int num_entries = (int) floor(max_candidate_trees * ((double) stop_rule.getCurIt() / predicted_iteration));
2823 //        if (num_entries < treels_logl.size() * 0.9) {
2824 //            DoubleVector logl = treels_logl;
2825 //            nth_element(logl.begin(), logl.begin() + (treels_logl.size() - num_entries), logl.end());
2826 //            logl_cutoff = logl[treels_logl.size() - num_entries] - 1.0;
2827 //        } else
2828 //            logl_cutoff = 0.0;
2829 //        if (verbose_mode >= VB_MED) {
2830 //            if (stop_rule.getCurIt() % 10 == 0) {
2831 //                cout << treels.size() << " trees, " << treels_logl.size() << " logls, logl_cutoff= " << logl_cutoff;
2832 //                if (params->store_candidate_trees)
2833 //                    cout << " duplicates= " << duplication_counter << " ("
2834 //                    << (int) round(100 * ((double) duplication_counter / treels_logl.size())) << "%)" << endl;
2835 //                else
2836 //                    cout << endl;
2837 //            }
2838 //        }
2839 //    }
2840 //}
2841 
2842 
doTreePerturbation()2843 double IQTree::doTreePerturbation() {
2844     if (iqp_assess_quartet == IQP_BOOTSTRAP) {
2845         // create bootstrap sample
2846         Alignment *bootstrap_alignment;
2847         if (aln->isSuperAlignment())
2848             bootstrap_alignment = new SuperAlignment;
2849         else
2850             bootstrap_alignment = new Alignment;
2851         bootstrap_alignment->createBootstrapAlignment(aln, NULL, params->bootstrap_spec);
2852         setAlignment(bootstrap_alignment);
2853         initializeAllPartialLh();
2854         clearAllPartialLH();
2855         curScore = optimizeAllBranches();
2856     } else {
2857         if (params->snni) {
2858             if (Params::getInstance().five_plus_five) {
2859                 readTreeString(candidateTrees.getNextCandTree());
2860             } else {
2861                 readTreeString(candidateTrees.getRandTopTree(Params::getInstance().popSize));
2862             }
2863             if (Params::getInstance().iqp) {
2864                 doIQP();
2865             } else if (Params::getInstance().adaptPertubation) {
2866                 perturbStableSplits(Params::getInstance().stableSplitThreshold);
2867             } else {
2868                 doRandomNNIs(Params::getInstance().tabu);
2869             }
2870         } else {
2871             // Using the IQPNNI algorithm (best tree is selected)
2872             readTreeString(getBestTrees()[0]);
2873             doIQP();
2874         }
2875         if (params->count_trees) {
2876             string perturb_tree_topo = getTopologyString(false);
2877             if (pllTreeCounter.find(perturb_tree_topo) == pllTreeCounter.end()) {
2878                 // not found in hash_map
2879                 pllTreeCounter[perturb_tree_topo] = 1;
2880             } else {
2881                 // found in hash_map
2882                 pllTreeCounter[perturb_tree_topo]++;
2883             }
2884         }
2885         //optimizeBranches(1);
2886         curScore = computeLogL();
2887     }
2888     return curScore;
2889 }
2890 
2891 /****************************************************************************
2892  Fast Nearest Neighbor Interchange by maximum likelihood
2893  ****************************************************************************/
doNNISearch(bool write_info)2894 pair<int, int> IQTree::doNNISearch(bool write_info) {
2895 
2896     computeLogL();
2897     double curBestScore = getBestScore();
2898 
2899     if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
2900         printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
2901     }
2902 
2903     pair<int, int> nniInfos; // Number of NNIs and number of steps
2904     if (params->pll) {
2905         if (params->partition_file)
2906             outError("Unsupported -pll -sp combination!");
2907         curScore = pllOptimizeNNI(nniInfos.first, nniInfos.second, searchinfo);
2908         pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE,
2909                 PLL_TRUE, 0, 0, 0, PLL_SUMMARIZE_LH, 0, 0);
2910         readTreeString(string(pllInst->tree_string));
2911     } else {
2912         nniInfos = optimizeNNI(Params::getInstance().speednni);
2913         if (isSuperTree()) {
2914             ((PhyloSuperTree*) this)->computeBranchLengths();
2915         }
2916         if (params->print_trees_site_posterior)
2917             computePatternCategories();
2918     }
2919 
2920     if(!on_refine_btree){ // Diep add (IF in Refinement Step, do not optimize model parameters)
2921         // Better tree or score is found
2922         if (getCurScore() > curBestScore + params->modelEps) {
2923             // Re-optimize model parameters (the sNNI algorithm)
2924             optimizeModelParameters(write_info, params->modelEps * 10);
2925             getModelFactory()->saveCheckpoint();
2926 
2927             // 2018-01-09: additional optimize root position
2928             // TODO: does not work with SuperTree yet
2929             if (rooted && !isSuperTree() && params->root_move_dist > 0)
2930             {
2931                 optimizeRootPosition(params->root_move_dist, true, params->modelEps * 10);
2932             }
2933         }
2934     }
2935     MPIHelper::getInstance().setNumNNISearch(MPIHelper::getInstance().getNumNNISearch() + 1);
2936 
2937     return nniInfos;
2938 }
2939 
optimizeNNI(bool speedNNI)2940 pair<int, int> IQTree::optimizeNNI(bool speedNNI) {
2941     unsigned int totalNNIApplied = 0;
2942     unsigned int numSteps = 0;
2943     const int MAXSTEPS = leafNum;
2944 //    unsigned int numInnerBranches = leafNum - 3;
2945     double curBestScore = candidateTrees.getBestScore();
2946 
2947 //    if (isMixlen())
2948 //        optimizeBranches();
2949 
2950     Branches nniBranches;
2951     Branches nonNNIBranches;
2952     vector<NNIMove> positiveNNIs;
2953     vector<NNIMove> appliedNNIs;
2954     SplitIntMap tabuSplits;
2955     if (!initTabuSplits.empty()) {
2956         tabuSplits = initTabuSplits;
2957     }
2958 
2959     double originalScore = curScore;
2960     for (numSteps = 1; numSteps <= MAXSTEPS; numSteps++) {
2961 
2962 //        cout << "numSteps = " << numSteps << endl;
2963         double oldScore = curScore;
2964         if (save_all_trees == 2) {
2965             saveCurrentTree(curScore); // BQM: for new bootstrap
2966         }
2967         if (verbose_mode >= VB_DEBUG) {
2968             cout << "Doing NNI round " << numSteps << endl;
2969             if (isSuperTree()) {
2970                 ((PhyloSuperTree*) this)->printMapInfo();
2971             }
2972         }
2973 
2974         // save all current branch lengths
2975         DoubleVector lenvec;
2976         saveBranchLengths(lenvec);
2977 
2978         // for super tree
2979         initPartitionInfo();
2980 
2981         nniBranches.clear();
2982         nonNNIBranches.clear();
2983 
2984         bool startSpeedNNI;
2985         // When tabu and speednni are combined, speednni is only start from third steps
2986         if (!initTabuSplits.empty() && numSteps < 3) {
2987             startSpeedNNI = false;
2988         } else if (speedNNI && !appliedNNIs.empty()) {
2989             startSpeedNNI = true;
2990         } else {
2991             startSpeedNNI = false;
2992         }
2993 
2994         if (startSpeedNNI) {
2995             // speedNNI option: only evaluate NNIs that are 2 branches away from the previously applied NNI
2996             Branches filteredNNIBranches;
2997             filterNNIBranches(appliedNNIs, filteredNNIBranches);
2998             for (Branches::iterator it = filteredNNIBranches.begin(); it != filteredNNIBranches.end(); it++) {
2999                 Branch curBranch = it->second;
3000                 PhyloNeighbor* nei = (PhyloNeighbor*) curBranch.first->findNeighbor(curBranch.second);
3001                 Split* curSplit = nei->split;
3002                 bool tabu = false;
3003                 bool stable = false;
3004                 if (!tabuSplits.empty()) {
3005                     int value;
3006                     if (tabuSplits.findSplit(curSplit, value) != NULL)
3007                         tabu = true;
3008                 }
3009                 if (!candidateTrees.getCandSplits().empty()) {
3010                     int value;
3011                     if (candidateTrees.getCandSplits().findSplit(curSplit, value) != NULL)
3012                         stable = true;
3013 
3014                 }
3015                 if (!tabu && !stable) {
3016                     int branchID =  pairInteger(curBranch.first->id, curBranch.second->id);
3017                     nniBranches.insert(pair<int, Branch>(branchID, curBranch));
3018                 }
3019             }
3020         } else {
3021             getNNIBranches(tabuSplits, candidateTrees.getCandSplits(), nonNNIBranches, nniBranches);
3022         }
3023 
3024         if (!tabuSplits.empty()) {
3025             tabuSplits.clear();
3026         }
3027 
3028         positiveNNIs.clear();
3029         evaluateNNIs(nniBranches, positiveNNIs);
3030 
3031         if (positiveNNIs.size() == 0) {
3032             if (!nonNNIBranches.empty() && totalNNIApplied == 0) {
3033                 evaluateNNIs(nonNNIBranches, positiveNNIs);
3034                 if (positiveNNIs.size() == 0) {
3035                     break;
3036                 }
3037             } else {
3038                 break;
3039             }
3040         }
3041 
3042         /* sort all positive NNI moves (ASCENDING) */
3043         sort(positiveNNIs.begin(), positiveNNIs.end());
3044 
3045         /* remove conflicting NNIs */
3046         appliedNNIs.clear();
3047         getCompatibleNNIs(positiveNNIs, appliedNNIs);
3048 
3049         // do non-conflicting positive NNIs
3050         doNNIs(appliedNNIs);
3051         curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
3052 
3053         if (curScore < appliedNNIs.at(0).newloglh - params->loglh_epsilon) {
3054             //cout << "Tree getting worse: curScore = " << curScore << " / best score = " <<  appliedNNIs.at(0).newloglh << endl;
3055             // tree cannot be worse if only 1 NNI is applied
3056             if (appliedNNIs.size() > 1) {
3057                 // revert all applied NNIs
3058                 doNNIs(appliedNNIs);
3059                 restoreBranchLengths(lenvec);
3060                 clearAllPartialLH();
3061                 // only do the best NNI
3062                 appliedNNIs.resize(1);
3063                 doNNIs(appliedNNIs);
3064                 curScore = optimizeAllBranches(1, params->loglh_epsilon, PLL_NEWZPERCYCLE);
3065                 ASSERT(curScore > appliedNNIs.at(0).newloglh - 0.1);
3066             } else
3067                 ASSERT(curScore > appliedNNIs.at(0).newloglh - 0.1 && "Using one NNI reduces LogL");
3068             totalNNIApplied++;
3069         } else {
3070             totalNNIApplied += appliedNNIs.size();
3071         }
3072 
3073         if(curScore < oldScore - params->loglh_epsilon){
3074             cout << "$$$$$$$$: " << curScore << "\t" << oldScore << "\t" << curScore - oldScore << endl;
3075 
3076         }
3077 
3078         if (curScore - oldScore <  params->loglh_epsilon)
3079             break;
3080 
3081         if (params->snni && (curScore > curBestScore + 0.1)) {
3082             curBestScore = curScore;
3083         }
3084 
3085         if (Params::getInstance().write_intermediate_trees && save_all_trees != 2) {
3086             printIntermediateTree(WT_NEWLINE | WT_APPEND | WT_SORT_TAXA | WT_BR_LEN);
3087         }
3088 
3089         if (Params::getInstance().writeDistImdTrees) {
3090             intermediateTrees.update(getTreeString(), curScore);
3091         }
3092     }
3093 
3094     if (totalNNIApplied == 0 && verbose_mode >= VB_MED) {
3095         cout << "NOTE: Input tree is already NNI-optimal" << endl;
3096     }
3097 
3098     if (numSteps == MAXSTEPS) {
3099         cout << "WARNING: NNI search needs unusual large number of steps (" << numSteps << ") to converge!" << endl;
3100     }
3101 
3102     if(curScore < originalScore - params->loglh_epsilon){
3103         cout << "AAAAAAAAAAAAAAAAAAA: " << curScore << "\t" << originalScore << "\t" << curScore - originalScore << endl;
3104 
3105     }
3106     return make_pair(numSteps, totalNNIApplied);
3107 }
3108 
filterNNIBranches(vector<NNIMove> & appliedNNIs,Branches & nniBranches)3109 void IQTree::filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &nniBranches) {
3110     for (vector<NNIMove>::iterator it = appliedNNIs.begin(); it != appliedNNIs.end(); it++) {
3111         Branch curBranch;
3112         curBranch.first = it->node1;
3113         curBranch.second = it->node2;
3114         int branchID = pairInteger(it->node1->id, it->node2->id);
3115         if (nniBranches.find(branchID) == nniBranches.end())
3116             nniBranches.insert(pair<int,Branch>(branchID, curBranch));
3117         getSurroundingInnerBranches(it->node1, it->node2, 2, nniBranches);
3118         getSurroundingInnerBranches(it->node2, it->node1, 2, nniBranches);
3119     }
3120 }
3121 
pllOptimizeNNI(int & totalNNICount,int & nniSteps,SearchInfo & searchinfo)3122 double IQTree::pllOptimizeNNI(int &totalNNICount, int &nniSteps, SearchInfo &searchinfo) {
3123     if((globalParams->online_bootstrap == PLL_TRUE) && (globalParams->gbo_replicates > 0)) {
3124         pllInitUFBootData();
3125     }
3126     searchinfo.numAppliedNNIs = 0;
3127     searchinfo.curLogl = curScore;
3128     //cout << "curLogl: " << searchinfo.curLogl << endl;
3129     const int MAX_NNI_STEPS = aln->getNSeq();
3130     totalNNICount = 0;
3131     for (nniSteps = 1; nniSteps <= MAX_NNI_STEPS; nniSteps++) {
3132         searchinfo.curNumNNISteps = nniSteps;
3133         searchinfo.posNNIList.clear();
3134         double newLH = pllDoNNISearch(pllInst, pllPartitions, searchinfo);
3135         if (searchinfo.curNumAppliedNNIs == 0) { // no positive NNI was found
3136             searchinfo.curLogl = newLH;
3137             break;
3138         } else {
3139             searchinfo.curLogl = newLH;
3140             searchinfo.numAppliedNNIs += searchinfo.curNumAppliedNNIs;
3141         }
3142     }
3143 
3144     if (nniSteps == (MAX_NNI_STEPS + 1)) {
3145         cout << "WARNING: NNI search needs unusual large number of steps (" << MAX_NNI_STEPS << ") to converge!" << endl;
3146     }
3147 
3148     if (searchinfo.numAppliedNNIs == 0) {
3149         cout << "NOTE: Tree is already NNI-optimized" << endl;
3150     }
3151 
3152     totalNNICount = searchinfo.numAppliedNNIs;
3153     pllInst->likelihood = searchinfo.curLogl;
3154     return searchinfo.curLogl;
3155 }
3156 
pllLogBootSamples(int ** pll_boot_samples,int nsamples,int npatterns)3157 void IQTree::pllLogBootSamples(int** pll_boot_samples, int nsamples, int npatterns){
3158     ofstream bfile("boot_samples.log");
3159     bfile << "Original freq:" << endl;
3160     int sum = 0;
3161     for(int i = 0; i < pllAlignment->sequenceLength; i++){
3162         bfile << setw(4) << pllInst->aliaswgt[i];
3163         sum += pllInst->aliaswgt[i];
3164     }
3165     bfile << endl << "sum = " << sum << endl;
3166 
3167     bfile << "Bootstrap freq:" << endl;
3168 
3169     for(int i = 0; i < nsamples; i++){
3170         sum = 0;
3171         for(int j = 0; j < npatterns; j++){
3172             bfile << setw(4) << pll_boot_samples[i][j];
3173             sum += pll_boot_samples[i][j];
3174         }
3175         bfile << endl << "sum = "  << sum << endl;
3176     }
3177     bfile.close();
3178 
3179 }
3180 
pllInitUFBootData()3181 void IQTree::pllInitUFBootData(){
3182     if(pllUFBootDataPtr == NULL){
3183         pllUFBootDataPtr = (pllUFBootData *) malloc(sizeof(pllUFBootData));
3184         pllUFBootDataPtr->boot_samples = NULL;
3185         pllUFBootDataPtr->candidate_trees_count = 0;
3186 
3187         if(params->online_bootstrap && params->gbo_replicates > 0){
3188             if(!pll2iqtree_pattern_index) pllBuildIQTreePatternIndex();
3189 
3190 //            pllUFBootDataPtr->treels = pllHashInit(max_candidate_trees);
3191 //            pllUFBootDataPtr->treels_size = max_candidate_trees; // track size of treels_logl, treels_newick, treels_ptnlh
3192 
3193 //            pllUFBootDataPtr->treels_logl =
3194 //                (double *) malloc(max_candidate_trees * (sizeof(double)));
3195 //            if(!pllUFBootDataPtr->treels_logl) outError("Not enough dynamic memory!");
3196 
3197 
3198 
3199 //            pllUFBootDataPtr->treels_ptnlh =
3200 //                (double **) malloc(max_candidate_trees * (sizeof(double *)));
3201 //            if(!pllUFBootDataPtr->treels_ptnlh) outError("Not enough dynamic memory!");
3202 //            memset(pllUFBootDataPtr->treels_ptnlh, 0, max_candidate_trees * (sizeof(double *)));
3203 
3204             // aln->createBootstrapAlignment() must be called before this fragment
3205             pllUFBootDataPtr->boot_samples =
3206                 (int **) malloc(params->gbo_replicates * sizeof(int *));
3207             if(!pllUFBootDataPtr->boot_samples) outError("Not enough dynamic memory!");
3208             for(int i = 0; i < params->gbo_replicates; i++){
3209                 pllUFBootDataPtr->boot_samples[i] =
3210                     (int *) malloc(pllAlignment->sequenceLength * sizeof(int));
3211                 if(!pllUFBootDataPtr->boot_samples[i]) outError("Not enough dynamic memory!");
3212                 for(int j = 0; j < pllAlignment->sequenceLength; j++){
3213                     pllUFBootDataPtr->boot_samples[i][j] =
3214                         boot_samples[i][pll2iqtree_pattern_index[j]];
3215                 }
3216             }
3217 
3218 
3219             pllUFBootDataPtr->boot_logl =
3220                 (double *) malloc(params->gbo_replicates * (sizeof(double)));
3221             if(!pllUFBootDataPtr->boot_logl) outError("Not enough dynamic memory!");
3222             for(int i = 0; i < params->gbo_replicates; i++)
3223                 pllUFBootDataPtr->boot_logl[i] = -DBL_MAX;
3224 
3225             pllUFBootDataPtr->boot_counts =
3226                 (int *) malloc(params->gbo_replicates * (sizeof(int)));
3227             if(!pllUFBootDataPtr->boot_counts) outError("Not enough dynamic memory!");
3228             memset(pllUFBootDataPtr->boot_counts, 0, params->gbo_replicates * (sizeof(int)));
3229 
3230             pllUFBootDataPtr->boot_trees.resize(params->gbo_replicates, "");
3231             pllUFBootDataPtr->duplication_counter = 0;
3232         }
3233     }
3234 //    pllUFBootDataPtr->max_candidate_trees = max_candidate_trees;
3235     pllUFBootDataPtr->save_all_trees = save_all_trees;
3236     pllUFBootDataPtr->logl_cutoff = logl_cutoff;
3237     pllUFBootDataPtr->n_patterns = pllAlignment->sequenceLength;
3238 }
3239 
pllDestroyUFBootData()3240 void IQTree::pllDestroyUFBootData(){
3241     if(pll2iqtree_pattern_index){
3242         delete [] pll2iqtree_pattern_index;
3243         pll2iqtree_pattern_index = NULL;
3244     }
3245 
3246     if(params->online_bootstrap && params->gbo_replicates > 0){
3247         pllHashDestroy(&(pllUFBootDataPtr->treels), rax_free);
3248 
3249         free(pllUFBootDataPtr->treels_logl);
3250 
3251 
3252         for(int i = 0; i < pllUFBootDataPtr->treels_size; i++)
3253             if(pllUFBootDataPtr->treels_ptnlh[i])
3254                 free(pllUFBootDataPtr->treels_ptnlh[i]);
3255         free(pllUFBootDataPtr->treels_ptnlh);
3256 
3257         for(int i = 0; i < params->gbo_replicates; i++)
3258             free(pllUFBootDataPtr->boot_samples[i]);
3259         free(pllUFBootDataPtr->boot_samples);
3260 
3261         free(pllUFBootDataPtr->boot_logl);
3262 
3263         free(pllUFBootDataPtr->boot_counts);
3264 
3265     }
3266     free(pllUFBootDataPtr);
3267     pllUFBootDataPtr = NULL;
3268 }
3269 
3270 
doNNIs(vector<NNIMove> & compatibleNNIs,bool changeBran)3271 void IQTree::doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran) {
3272     for (vector<NNIMove>::iterator it = compatibleNNIs.begin(); it != compatibleNNIs.end(); it++) {
3273         doNNI(*it);
3274         if (!params->leastSquareNNI && changeBran) {
3275             // apply new branch lengths
3276             changeNNIBrans(*it);
3277         }
3278     }
3279     // 2015-10-14: has to reset this pointer when read in
3280     current_it = current_it_back = NULL;
3281 
3282 }
3283 
3284 
getCompatibleNNIs(vector<NNIMove> & nniMoves,vector<NNIMove> & compatibleNNIs)3285 void IQTree::getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs) {
3286     compatibleNNIs.clear();
3287     for (vector<NNIMove>::iterator it1 = nniMoves.begin(); it1 != nniMoves.end(); it1++) {
3288         bool select = true;
3289         for (vector<NNIMove>::iterator it2 = compatibleNNIs.begin(); it2 != compatibleNNIs.end(); it2++) {
3290             if ((*it1).node1 == (*(it2)).node1
3291                     || (*it1).node2 == (*(it2)).node1
3292                     || (*it1).node1 == (*(it2)).node2
3293                     || (*it1).node2 == (*(it2)).node2) {
3294                 select = false;
3295                 break;
3296             }
3297         }
3298         if (select) {
3299             compatibleNNIs.push_back(*it1);
3300         }
3301     }
3302 }
3303 
3304 //double IQTree::estN95() {
3305 //    if (vecNumNNI.size() == 0) {
3306 //        return 0;
3307 //    } else {
3308 //        sort(vecNumNNI.begin(), vecNumNNI.end());
3309 //        int index = floor(vecNumNNI.size() * speed_conf);
3310 //        return vecNumNNI[index];
3311 //    }
3312 //}
3313 
getAvgNumNNI()3314 double IQTree::getAvgNumNNI() {
3315     if (vecNumNNI.size() == 0) {
3316         return 0;
3317     } else {
3318         double median;
3319         size_t size = vecNumNNI.size();
3320         sort(vecNumNNI.begin(), vecNumNNI.end());
3321         if (size % 2 == 0) {
3322             median = (vecNumNNI[size / 2 + 1] + vecNumNNI[size / 2]) / 2;
3323         } else {
3324             median = vecNumNNI[size / 2];
3325         }
3326         return median;
3327     }
3328 }
3329 
estDeltaMedian()3330 double IQTree::estDeltaMedian() {
3331     if (vecImpProNNI.size() == 0) {
3332         return 0;
3333     } else {
3334         double median;
3335         size_t size = vecImpProNNI.size();
3336         sort(vecImpProNNI.begin(), vecImpProNNI.end());
3337         if (size % 2 == 0) {
3338             median = (vecImpProNNI[size / 2 + 1] + vecImpProNNI[size / 2]) / 2;
3339         } else {
3340             median = vecImpProNNI[size / 2];
3341         }
3342         return median;
3343     }
3344 }
3345 
3346 //inline double IQTree::estDelta95() {
3347 //    if (vecImpProNNI.size() == 0) {
3348 //        return 0;
3349 //    } else {
3350 //        sort(vecImpProNNI.begin(), vecImpProNNI.end());
3351 //        int index = floor(vecImpProNNI.size() * speed_conf);
3352 //        return vecImpProNNI[index];
3353 //    }
3354 //}
3355 
getDelete() const3356 int IQTree::getDelete() const {
3357     return k_delete;
3358 }
3359 
setDelete(int _delete)3360 void IQTree::setDelete(int _delete) {
3361     k_delete = _delete;
3362 }
3363 
evaluateNNIs(Branches & nniBranches,vector<NNIMove> & positiveNNIs)3364 void IQTree::evaluateNNIs(Branches &nniBranches, vector<NNIMove>  &positiveNNIs) {
3365     for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
3366         NNIMove nni = getBestNNIForBran((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, NULL);
3367         if (nni.newloglh > curScore) {
3368             positiveNNIs.push_back(nni);
3369         }
3370 
3371         // synchronize tree during optimization step
3372         if (MPIHelper::getInstance().isMaster() && candidateset_changed.size() > 0
3373             && MPIHelper::getInstance().gotMessage()) {
3374             syncCurrentTree();
3375         }
3376     }
3377 }
3378 
3379 //Branches IQTree::getReducedListOfNNIBranches(Branches &previousNNIBranches) {
3380 //    Branches resBranches;
3381 //    for (Branches::iterator it = previousNNIBranches.begin(); it != previousNNIBranches.end(); it++) {
3382 //        getSurroundingInnerBranches(it->second.first, it->second.second, 2, resBranches);
3383 //        getSurroundingInnerBranches(it->second.second, it->second.first, 2, resBranches);
3384 //    }
3385 //}
3386 
optimizeNNIBranches(Branches & nniBranches)3387 double IQTree::optimizeNNIBranches(Branches &nniBranches) {
3388     for (Branches::iterator it = nniBranches.begin(); it != nniBranches.end(); it++) {
3389         optimizeOneBranch((PhyloNode*) it->second.first, (PhyloNode*) it->second.second, true, PLL_NEWZPERCYCLE);
3390     }
3391     curScore = computeLikelihoodFromBuffer();
3392     return curScore;
3393 }
3394 
3395 /**
3396  *  Currently not used, commented out to simplify the interface of getBestNNIForBran
3397 void IQTree::evalNNIsSort(bool approx_nni) {
3398         if (myMove.newloglh > curScore + params->loglh_epsilon) {
3399         if (myMove.newloglh > curScore + params->loglh_epsilon) {
3400             addPositiveNNIMove(myMove);
3401         }
3402             addPositiveNNIMove(myMove);
3403         }
3404     NodeVector nodes1, nodes2;
3405     int i;
3406     double cur_lh = curScore;
3407     vector<IntBranchInfo> int_branches;
3408 
3409     getInternalBranches(nodes1, nodes2);
3410     assert(nodes1.size() == leafNum - 3 && nodes2.size() == leafNum - 3);
3411 
3412     for (i = 0; i < leafNum - 3; i++) {
3413         IntBranchInfo int_branch;
3414         PhyloNeighbor *node12_it = (PhyloNeighbor*) nodes1[i]->findNeighbor(nodes2[i]);
3415         //PhyloNeighbor *node21_it = (PhyloNeighbor*) nodes2[i]->findNeighbor(nodes1[i]);
3416         int_branch.lh_contribution = cur_lh - computeLikelihoodZeroBranch(node12_it, (PhyloNode*) nodes1[i]);
3417         if (int_branch.lh_contribution < 0.0)
3418             int_branch.lh_contribution = 0.0;
3419         if (int_branch.lh_contribution < fabs(nni_cutoff)) {
3420             int_branch.node1 = (PhyloNode*) nodes1[i];
3421             int_branch.node2 = (PhyloNode*) nodes2[i];
3422             int_branches.push_back(int_branch);
3423         }
3424     }
3425     std::sort(int_branches.begin(), int_branches.end(), int_branch_cmp);
3426     for (vector<IntBranchInfo>::iterator it = int_branches.begin(); it != int_branches.end(); it++)
3427         if (it->lh_contribution >= 0.0) // evaluate NNI if branch contribution is big enough
3428                 {
3429             NNIMove myMove = getBestNNIForBran(it->node1, it->node2, NULL, approx_nni, it->lh_contribution);
3430             if (myMove.newloglh > curScore) {
3431                 addPositiveNNIMove(myMove);
3432                 if (!estimate_nni_cutoff)
3433                     for (vector<IntBranchInfo>::iterator it2 = it + 1; it2 != int_branches.end(); it2++) {
3434                         if (it2->node1 == it->node1 || it2->node2 == it->node1 || it2->node1 == it->node2
3435                                 || it2->node2 == it->node2)
3436                             it2->lh_contribution = -1.0; // do not evaluate this branch later on
3437                     }
3438             }
3439         } else { // otherwise, only optimize the branch length
3440             PhyloNode *node1 = it->node1;
3441             PhyloNode *node2 = it->node2;
3442             PhyloNeighbor *node12_it = (PhyloNeighbor*) node1->findNeighbor(node2);
3443             PhyloNeighbor *node21_it = (PhyloNeighbor*) node2->findNeighbor(node1);
3444             double stored_len = node12_it->length;
3445             curScore = optimizeOneBranch(node1, node2, false);
3446             string key("");
3447             if (node1->id < node2->id) {
3448                 key += convertIntToString(node1->id) + "->" + convertIntToString(node2->id);
3449             } else {
3450                 key += convertIntToString(node2->id) + "->" + convertIntToString(node1->id);
3451             }
3452 
3453             optBrans.insert(mapString2Double::value_type(key, node12_it->length));
3454             node12_it->length = stored_len;
3455             node21_it->length = stored_len;
3456         }
3457 }
3458 */
3459 
estimateNNICutoff(Params * params)3460 void IQTree::estimateNNICutoff(Params* params) {
3461     double *delta = new double[nni_info.size()];
3462     int i;
3463     for (i = 0; i < nni_info.size(); i++) {
3464         double lh_score[4];
3465         memmove(lh_score, nni_info[i].lh_score, 4 * sizeof(double));
3466         std::sort(lh_score + 1, lh_score + 4); // sort in ascending order
3467         delta[i] = lh_score[0] - lh_score[2];
3468         if (verbose_mode >= VB_MED)
3469             cout << i << ": " << lh_score[0] << " " << lh_score[1] << " " << lh_score[2] << " " << lh_score[3] << endl;
3470     }
3471     std::sort(delta, delta + nni_info.size());
3472     nni_cutoff = delta[nni_info.size() / 20];
3473     cout << endl << "Estimated NNI cutoff: " << nni_cutoff << endl;
3474     string file_name = params->out_prefix;
3475     file_name += ".nnidelta";
3476     try {
3477         ofstream out;
3478         out.exceptions(ios::failbit | ios::badbit);
3479         out.open(file_name.c_str());
3480         for (i = 0; i < nni_info.size(); i++) {
3481             out << delta[i] << endl;
3482         }
3483         out.close();
3484         cout << "NNI delta printed to " << file_name << endl;
3485     } catch (ios::failure) {
3486         outError(ERR_WRITE_OUTPUT, file_name);
3487     }
3488     delete[] delta;
3489 }
3490 
saveCurrentTree(double cur_logl)3491 void IQTree::saveCurrentTree(double cur_logl) {
3492 
3493     if (logl_cutoff != 0.0 && cur_logl < logl_cutoff - 1.0)
3494         return;
3495 //    treels_logl.push_back(cur_logl);
3496 //    num_trees_for_rell++;
3497 
3498     if (Params::getInstance().write_intermediate_trees)
3499         printTree(out_treels, WT_NEWLINE | WT_BR_LEN);
3500 
3501     int nptn = getAlnNPattern();
3502 
3503 #ifdef BOOT_VAL_FLOAT
3504     int maxnptn = get_safe_upper_limit_float(nptn);
3505     BootValType *pattern_lh = aligned_alloc<BootValType>(maxnptn);
3506     memset(pattern_lh, 0, maxnptn*sizeof(BootValType));
3507     double *pattern_lh_orig = aligned_alloc<double>(nptn);
3508     computePatternLikelihood(pattern_lh_orig, &cur_logl);
3509     for (int i = 0; i < nptn; i++)
3510         pattern_lh[i] = (float)pattern_lh_orig[i];
3511 #else
3512     int maxnptn = get_safe_upper_limit(nptn);
3513     BootValType *pattern_lh = aligned_alloc<BootValType>(maxnptn);
3514     memset(pattern_lh, 0, maxnptn*sizeof(BootValType));
3515     computePatternLikelihood(pattern_lh, &cur_logl);
3516 #endif
3517 
3518 
3519     if (boot_samples.empty()) {
3520         // for runGuidedBootstrap
3521     } else {
3522         // online bootstrap
3523 //        int ptn;
3524 //        int updated = 0;
3525 //        int nsamples = boot_samples.size();
3526         ostringstream ostr;
3527         string tree_str;
3528         setRootNode(params->root);
3529         if (params->print_ufboot_trees == 2)
3530             printTree(ostr, WT_TAXON_ID + WT_SORT_TAXA + WT_BR_LEN + WT_BR_LEN_SHORT);
3531         else
3532             printTree(ostr, WT_TAXON_ID + WT_SORT_TAXA);
3533         tree_str = ostr.str();
3534 
3535     #ifdef _OPENMP
3536         int rand_seed = random_int(1000);
3537         #pragma omp parallel
3538         {
3539         int *rstream;
3540         init_random(rand_seed + omp_get_thread_num(), false, &rstream);
3541         #pragma omp for
3542     #else
3543         int *rstream = randstream;
3544     #endif
3545         for (int sample = sample_start; sample < sample_end; sample++) {
3546             double rell = 0.0;
3547 
3548             {
3549                 // SSE optimized version of the above loop
3550                 BootValType *boot_sample = boot_samples[sample];
3551 
3552                 BootValType res = (this->*dotProduct)(pattern_lh, boot_sample, nptn);
3553 
3554                 rell = res;
3555             }
3556 
3557             bool better = rell > boot_logl[sample] + params->ufboot_epsilon;
3558             if (!better && rell > boot_logl[sample] - params->ufboot_epsilon) {
3559                 better = (random_double(rstream) <= 1.0 / (boot_counts[sample] + 1));
3560             }
3561             if (better) {
3562                 if (rell <= boot_logl[sample] + params->ufboot_epsilon) {
3563                     boot_counts[sample]++;
3564                 } else {
3565                     boot_counts[sample] = 1;
3566                 }
3567                 boot_logl[sample] = max(boot_logl[sample], rell);
3568                 boot_orig_logl[sample] = cur_logl;
3569                 boot_trees[sample] = tree_str;
3570             }
3571         }
3572     #ifdef _OPENMP
3573         finish_random(rstream);
3574         }
3575     #endif
3576     }
3577     if (Params::getInstance().print_tree_lh) {
3578         out_treelh << cur_logl;
3579         double prob;
3580 #ifdef BOOT_VAL_FLOAT
3581         aln->multinomialProb(pattern_lh_orig, prob);
3582 #else
3583         aln->multinomialProb(pattern_lh, prob);
3584 #endif
3585         out_treelh << "\t" << prob << endl;
3586 
3587         IntVector pattern_index;
3588         aln->getSitePatternIndex(pattern_index);
3589         out_sitelh << "Site_Lh   ";
3590         for (int i = 0; i < getAlnNSite(); i++)
3591             out_sitelh << " " << pattern_lh[pattern_index[i]];
3592         out_sitelh << endl;
3593     }
3594 
3595     if (!boot_samples.empty()) {
3596 #ifdef BOOT_VAL_FLOAT
3597         aligned_free(pattern_lh_orig);
3598 #endif
3599         aligned_free(pattern_lh);
3600     } else {
3601 #ifdef BOOT_VAL_FLOAT
3602         aligned_free(pattern_lh);
3603 #endif
3604     }
3605 
3606 }
3607 
saveNNITrees(PhyloNode * node,PhyloNode * dad)3608 void IQTree::saveNNITrees(PhyloNode *node, PhyloNode *dad) {
3609     if (!node) {
3610         node = (PhyloNode*) root;
3611     }
3612     if (dad && !node->isLeaf() && !dad->isLeaf()) {
3613         double *pat_lh1 = new double[aln->getNPattern()];
3614         double *pat_lh2 = new double[aln->getNPattern()];
3615         double lh1, lh2;
3616         computeNNIPatternLh(curScore, lh1, pat_lh1, lh2, pat_lh2, node, dad);
3617         delete[] pat_lh2;
3618         delete[] pat_lh1;
3619     }
3620     FOR_NEIGHBOR_IT(node, dad, it)saveNNITrees((PhyloNode*) (*it)->node, node);
3621 }
3622 
summarizeBootstrap(Params & params,MTreeSet & trees)3623 void IQTree::summarizeBootstrap(Params &params, MTreeSet &trees) {
3624     int sum_weights = trees.sumTreeWeights();
3625     int i;
3626     if (verbose_mode >= VB_MAX) {
3627         for (i = 0; i < trees.size(); i++)
3628             if (trees.tree_weights[i] > 0)
3629                 cout << "Tree " << i + 1 << " weight= " << (double) trees.tree_weights[i] * 100 / sum_weights << endl;
3630     }
3631     int max_tree_id = max_element(trees.tree_weights.begin(), trees.tree_weights.end()) - trees.tree_weights.begin();
3632     if (verbose_mode >= VB_MED) {
3633         cout << "max_tree_id = " << max_tree_id + 1 << "   max_weight = " << trees.tree_weights[max_tree_id];
3634         cout << " (" << (double) trees.tree_weights[max_tree_id] * 100 / sum_weights << "%)" << endl;
3635     }
3636     // assign bootstrap support
3637     SplitGraph sg;
3638     SplitIntMap hash_ss;
3639     // make the taxa name
3640     vector<string> taxname;
3641     taxname.resize(leafNum);
3642     if (boot_splits.empty()) {
3643         getTaxaName(taxname);
3644     } else {
3645         boot_splits.back()->getTaxaName(taxname);
3646     }
3647     /*if (!tree.save_all_trees)
3648      trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1);
3649      else
3650      trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, false);
3651      */
3652     trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, NULL, false); // do not sort taxa
3653 
3654     if (verbose_mode >= VB_MED)
3655     	cout << sg.size() << " splits found" << endl;
3656 
3657     sg.scaleWeight(1.0 / trees.sumTreeWeights(), false, 4);
3658     string out_file;
3659     out_file = params.out_prefix;
3660     out_file += ".splits";
3661     if (params.print_splits_file) {
3662         sg.saveFile(out_file.c_str(), IN_OTHER, true);
3663         cout << "Split supports printed to star-dot file " << out_file << endl;
3664     }
3665     // compute the percentage of appearance
3666     sg.scaleWeight(100.0, true);
3667     //    printSplitSet(sg, hash_ss);
3668     //sg.report(cout);
3669     //cout << "Creating " << RESAMPLE_NAME << " support values..." << endl;
3670 //    stringstream tree_stream;
3671 //    printTree(tree_stream, WT_TAXON_ID | WT_BR_LEN);
3672 //    MExtTree mytree;
3673 //    mytree.readTree(tree_stream, rooted);
3674 //    mytree.assignLeafID();
3675     assignLeafNameByID();
3676     createBootstrapSupport(taxname, trees, hash_ss, NULL);
3677 
3678     // now write resulting tree with supports
3679 //    tree_stream.seekp(0, ios::beg);
3680 //    mytree.printTree(tree_stream);
3681 //
3682 //    // now read resulting tree
3683 //    tree_stream.seekg(0, ios::beg);
3684 //    freeNode();
3685 //    // RARE BUG FIX: to avoid cases that identical seqs were removed and leaf name happens to be IDs
3686 //    MTree::readTree(tree_stream, rooted);
3687 
3688     assignLeafNames();
3689 
3690     // 2017-12-05: commented out, not sure why doing this, which disort branch lengths
3691 //    if (isSuperTree()) {
3692 //        ((PhyloSuperTree*) this)->mapTrees();
3693 //    } else {
3694 //        initializeAllPartialLh();
3695 //        clearAllPartialLH();
3696 //    }
3697 
3698     if (!save_all_trees) {
3699         out_file = params.out_prefix;
3700         out_file += ".suptree";
3701 
3702         printTree(out_file.c_str());
3703         cout << "Tree with assigned support written to " << out_file << endl;
3704     }
3705 
3706     if (params.print_splits_nex_file) {
3707         out_file = params.out_prefix;
3708         out_file += ".splits.nex";
3709         sg.saveFile(out_file.c_str(), IN_NEXUS, false);
3710         cout << "Split supports printed to NEXUS file " << out_file << endl;
3711     }
3712 
3713     /*
3714      out_file = params.out_prefix;
3715      out_file += ".supval";
3716      writeInternalNodeNames(out_file);
3717 
3718      cout << "Support values written to " << out_file << endl;
3719      */
3720 
3721 
3722 }
3723 
writeUFBootTrees(Params & params)3724 void IQTree::writeUFBootTrees(Params &params) {
3725     MTreeSet trees;
3726 //    IntVector tree_weights;
3727     int i, j;
3728     string filename = params.out_prefix;
3729     filename += ".ufboot";
3730     ofstream out(filename.c_str());
3731 
3732     trees.init(boot_trees, rooted);
3733     for (i = 0; i < trees.size(); i++) {
3734         NodeVector taxa;
3735         // change the taxa name from ID to real name
3736         trees[i]->getOrderedTaxa(taxa);
3737         for (j = 0; j < taxa.size() - (int)rooted; j++)
3738             taxa[j]->name = aln->getSeqName(taxa[j]->id);
3739         if (removed_seqs.size() > 0) {
3740             // reinsert removed seqs into each tree
3741             trees[i]->insertTaxa(removed_seqs, twin_seqs);
3742         }
3743         // now print to file
3744         for (j = 0; j < trees.tree_weights[i]; j++)
3745             if (params.print_ufboot_trees == 1)
3746                 trees[i]->printTree(out, WT_NEWLINE);
3747             else
3748                 trees[i]->printTree(out, WT_NEWLINE + WT_BR_LEN);
3749     }
3750     cout << "UFBoot trees printed to " << filename << endl;
3751     out.close();
3752 }
3753 
summarizeBootstrap(Params & params)3754 void IQTree::summarizeBootstrap(Params &params) {
3755     setRootNode(params.root);
3756     MTreeSet trees;
3757     trees.init(boot_trees, rooted);
3758     summarizeBootstrap(params, trees);
3759 }
3760 
summarizeBootstrap(SplitGraph & sg)3761 void IQTree::summarizeBootstrap(SplitGraph &sg) {
3762     MTreeSet trees;
3763     //SplitGraph sg;
3764     trees.init(boot_trees, rooted);
3765     SplitIntMap hash_ss;
3766     // make the taxa name
3767     vector<string> taxname;
3768     taxname.resize(leafNum);
3769     getTaxaName(taxname);
3770 
3771     /*if (!tree.save_all_trees)
3772      trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1);
3773      else
3774      trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, false);
3775      */
3776     trees.convertSplits(taxname, sg, hash_ss, SW_COUNT, -1, NULL, false); // do not sort taxa
3777 }
3778 
pllConvertUFBootData2IQTree()3779 void IQTree::pllConvertUFBootData2IQTree(){
3780     // duplication_counter
3781     duplication_counter = pllUFBootDataPtr->duplication_counter;
3782     //treels_logl
3783 //    treels_logl.clear();
3784 //    for(int i = 0; i < pllUFBootDataPtr->candidate_trees_count; i++)
3785 //        treels_logl.push_back(pllUFBootDataPtr->treels_logl[i]);
3786 
3787     //boot_trees
3788     boot_trees.clear();
3789     for(int i = 0; i < params->gbo_replicates; i++)
3790         boot_trees.push_back(pllUFBootDataPtr->boot_trees[i]);
3791 
3792 }
3793 
computeCorrelation(IntVector & ix,IntVector & iy)3794 double computeCorrelation(IntVector &ix, IntVector &iy) {
3795 
3796     ASSERT(ix.size() == iy.size());
3797     DoubleVector x;
3798     DoubleVector y;
3799 
3800     double mx = 0.0, my = 0.0; // mean value
3801     int i;
3802     x.resize(ix.size());
3803     y.resize(iy.size());
3804     for (i = 0; i < x.size(); i++) {
3805         x[i] = ix[i];
3806         y[i] = iy[i];
3807         mx += x[i];
3808         my += y[i];
3809     }
3810     mx /= x.size();
3811     my /= y.size();
3812     for (i = 0; i < x.size(); i++) {
3813         x[i] = x[i] / mx - 1.0;
3814         y[i] = y[i] / my - 1.0;
3815     }
3816 
3817     double f1 = 0.0, f2 = 0.0, f3 = 0.0;
3818     for (i = 0; i < x.size(); i++) {
3819         f1 += (x[i]) * (y[i]);
3820         f2 += (x[i]) * (x[i]);
3821         f3 += (y[i]) * (y[i]);
3822     }
3823     if (f2 == 0.0 || f3 == 0.0)
3824         return 1.0;
3825     return f1 / (sqrt(f2) * sqrt(f3));
3826 }
3827 
computeBootstrapCorrelation()3828 double IQTree::computeBootstrapCorrelation() {
3829     if (boot_splits.size() < 2)
3830         return 0.0;
3831     IntVector split_supports;
3832     SplitIntMap split_map;
3833     int i;
3834     // collect split supports
3835     SplitGraph *sg = boot_splits.back();
3836     SplitGraph *half = boot_splits[(boot_splits.size() - 1) / 2];
3837     for (i = 0; i < half->size(); i++)
3838         if (half->at(i)->trivial() == -1) {
3839             split_map.insertSplit(half->at(i), split_supports.size());
3840             split_supports.push_back((int) (half->at(i)->getWeight()));
3841         }
3842 
3843     // collect split supports for new tree collection
3844     IntVector split_supports_new;
3845     split_supports_new.resize(split_supports.size(), 0);
3846     for (i = 0; i < sg->size(); i++)
3847         if ((*sg)[i]->trivial() == -1) {
3848             int index;
3849             Split *sp = split_map.findSplit((*sg)[i], index);
3850             if (sp) {
3851                 // split found
3852                 split_supports_new[index] = (int) ((*sg)[i]->getWeight());
3853             } else {
3854                 // new split
3855                 split_supports_new.push_back((int) ((*sg)[i]->getWeight()));
3856             }
3857         }
3858     if (verbose_mode >= VB_MED)
3859         cout << split_supports_new.size() - split_supports.size() << " new splits compared to old boot_splits" << endl;
3860     if (split_supports_new.size() > split_supports.size())
3861         split_supports.resize(split_supports_new.size(), 0);
3862 
3863     // now compute correlation coefficient
3864     double corr = computeCorrelation(split_supports, split_supports_new);
3865 
3866     return corr;
3867 }
3868 
3869 //void IQTree::addPositiveNNIMove(NNIMove myMove) {
3870 //    plusNNIs.push_back(myMove);
3871 //}
3872 
printResultTree(string suffix)3873 void IQTree::printResultTree(string suffix) {
3874     if (MPIHelper::getInstance().isWorker()) {
3875         return;
3876 //        stringstream processTreeFile;
3877 //        processTreeFile << tree_file_name << "." << MPIHelper::getInstance().getProcessID();
3878 //        tree_file_name = processTreeFile.str();
3879     }
3880     if (params->suppress_output_flags & OUT_TREEFILE)
3881         return;
3882     setRootNode(params->root, true);
3883     string tree_file_name = params->out_prefix;
3884     tree_file_name += ".treefile";
3885     if (suffix.compare("") != 0) {
3886         tree_file_name += "." + suffix;
3887     }
3888     printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3889     if (verbose_mode >= VB_MED)
3890         cout << "Best tree printed to " << tree_file_name << endl;
3891     setRootNode(params->root, false);
3892 }
3893 
printResultTree(ostream & out)3894 void IQTree::printResultTree(ostream &out) {
3895     setRootNode(params->root);
3896     printTree(out, WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3897 }
3898 
printBestCandidateTree()3899 void IQTree::printBestCandidateTree() {
3900     if (MPIHelper::getInstance().isWorker())
3901         return;
3902     if (params->suppress_output_flags & OUT_TREEFILE)
3903         return;
3904     string tree_file_name = params->out_prefix;
3905     tree_file_name += ".treefile";
3906     readTreeString(candidateTrees.getBestTreeStrings(1)[0]);
3907     setRootNode(params->root);
3908     printTree(tree_file_name.c_str(), WT_BR_LEN | WT_BR_LEN_FIXED_WIDTH | WT_SORT_TAXA | WT_NEWLINE);
3909     if (verbose_mode >= VB_MED)
3910         cout << "Best tree printed to " << tree_file_name << endl;
3911 }
3912 
3913 
printPhylolibTree(const char * suffix)3914 void IQTree::printPhylolibTree(const char* suffix) {
3915     pllTreeToNewick(pllInst->tree_string, pllInst, pllPartitions, pllInst->start->back, PLL_TRUE, 1, 0, 0, 0,
3916             PLL_SUMMARIZE_LH, 0, 0);
3917     char phylolibTree[1024];
3918     strcpy(phylolibTree, params->out_prefix);
3919     strcat(phylolibTree, suffix);
3920     FILE *phylolib_tree = fopen(phylolibTree, "w");
3921     fprintf(phylolib_tree, "%s", pllInst->tree_string);
3922     cout << "Tree optimized by Phylolib was written to " << phylolibTree << endl;
3923 }
3924 
printIntermediateTree(int brtype)3925 void IQTree::printIntermediateTree(int brtype) {
3926     setRootNode(params->root);
3927     double *pattern_lh = NULL;
3928     double logl = curScore;
3929 
3930     if (params->print_tree_lh) {
3931         pattern_lh = new double[getAlnNPattern()];
3932         computePatternLikelihood(pattern_lh, &logl);
3933     }
3934 
3935     if (Params::getInstance().write_intermediate_trees)
3936         printTree(out_treels, brtype);
3937 
3938     if (params->print_tree_lh) {
3939         out_treelh.precision(10);
3940         out_treelh << logl;
3941         double prob;
3942         aln->multinomialProb(pattern_lh, prob);
3943         out_treelh << "\t" << prob << endl;
3944         if (!(brtype & WT_APPEND))
3945             out_sitelh << aln->getNSite() << endl;
3946         out_sitelh << "Site_Lh   ";
3947         for (int i = 0; i < aln->getNSite(); i++)
3948             out_sitelh << "\t" << pattern_lh[aln->getPatternID(i)];
3949         out_sitelh << endl;
3950         delete[] pattern_lh;
3951     }
3952     if (params->write_intermediate_trees == 1 && save_all_trees != 1) {
3953         return;
3954     }
3955     int x = save_all_trees;
3956     save_all_trees = 2;
3957     // TODO Why is evalNNI() is called in this function?
3958     //evalNNIs();
3959     Branches innerBranches;
3960     vector<NNIMove> positiveNNIs;
3961     getInnerBranches(innerBranches);
3962     evaluateNNIs(innerBranches, positiveNNIs);
3963     save_all_trees = x;
3964 }
3965 
3966 
convertNNI2Splits(SplitIntMap & nniSplits,int numNNIs,vector<NNIMove> & compatibleNNIs)3967 void IQTree::convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs) {
3968     for (int i = 0; i < numNNIs; i++) {
3969         Split *sp = new Split(*getSplit(compatibleNNIs[i].node1, compatibleNNIs[i].node2));
3970         if (sp->shouldInvert()) {
3971             sp->invert();
3972         }
3973         nniSplits.insertSplit(sp, 1);
3974     }
3975 }
3976 
getBestScore()3977 double IQTree::getBestScore() {
3978     return candidateTrees.getBestScore();
3979 }
3980 
getBestTrees(int numTrees)3981 vector<string> IQTree::getBestTrees(int numTrees) {
3982     return candidateTrees.getBestTreeStrings(numTrees);
3983 }
3984 
3985 
3986 /*******************************************
3987     MPI stuffs
3988 *******************************************/
3989 
syncCandidateTrees(int nTrees,bool updateStopRule)3990 void IQTree::syncCandidateTrees(int nTrees, bool updateStopRule) {
3991     if (MPIHelper::getInstance().getNumProcesses() == 1)
3992         return;
3993 
3994 #ifdef _IQTREE_MPI
3995     // gather trees to Master
3996 
3997     Checkpoint *ckp = new Checkpoint;
3998 
3999     if (MPIHelper::getInstance().isMaster()) {
4000         // update candidate set at master
4001         int trees = 0;
4002         for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
4003             int worker = MPIHelper::getInstance().recvCheckpoint(ckp);
4004             CandidateSet cset;
4005             cset.setCheckpoint(ckp);
4006             cset.restoreCheckpoint();
4007             for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4008                 addTreeToCandidateSet(it->second.tree, it->second.score, updateStopRule, worker);
4009             trees += ckp->size();
4010             ckp->clear();
4011         }
4012         cout << "Master: " << trees << " candidate trees gathered from workers" << endl;
4013         // get the best candidate trees
4014         int numTrees = max(nTrees, MPIHelper::getInstance().getNumProcesses());
4015         CandidateSet bestCandidates = candidateTrees.getBestCandidateTrees(numTrees);
4016         int saved_numNNITrees = params->numNNITrees;
4017         params->numNNITrees = numTrees;
4018         bestCandidates.setCheckpoint(ckp);
4019         bestCandidates.saveCheckpoint();
4020         params->numNNITrees = saved_numNNITrees;
4021     } else {
4022         // send candidate set to master
4023         CandidateSet cset = candidateTrees.getBestCandidateTrees();
4024         cset.setCheckpoint(ckp);
4025         cset.saveCheckpoint();
4026         MPIHelper::getInstance().sendCheckpoint(ckp, PROC_MASTER);
4027         cout << "Worker " << MPIHelper::getInstance().getProcessID() << ": " << ckp->size() << " candidate trees sent to master" << endl;
4028         ckp->clear();
4029     }
4030 
4031     if (updateStopRule && stop_rule.meetStopCondition(stop_rule.getCurIt(), 0.0)) {
4032         // 2020-04-30: send stop signal
4033         ckp->putBool("stop", true);
4034     }
4035 
4036     // broadcast candidate trees from master to worker
4037     MPIHelper::getInstance().broadcastCheckpoint(ckp);
4038     cout << ckp->size() << " trees broadcasted to workers" << endl;
4039 
4040     if (MPIHelper::getInstance().isWorker()) {
4041         // update candidate set at worker
4042         CandidateSet cset;
4043         cset.setCheckpoint(ckp);
4044         cset.restoreCheckpoint();
4045         for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4046             addTreeToCandidateSet(it->second.tree, it->second.score, false, PROC_MASTER);
4047 
4048         // 2020-04-40: check stop signal
4049         if (ckp->getBool("stop")) {
4050             cout << "Worker " << MPIHelper::getInstance().getProcessID() << " gets STOP message!" << endl;
4051             stop_rule.shouldStop();
4052         }
4053     }
4054 
4055     delete ckp;
4056 #endif
4057 }
4058 
syncCurrentTree()4059 void IQTree::syncCurrentTree() {
4060     if (MPIHelper::getInstance().getNumProcesses() == 1)
4061         return;
4062 #ifdef _IQTREE_MPI
4063     //------ BLOCKING COMMUNICATION ------//
4064     Checkpoint *checkpoint = new Checkpoint;
4065     string tree;
4066     double score;
4067 
4068     if (MPIHelper::getInstance().isMaster()) {
4069         // master: receive tree from WORKERS
4070         int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
4071         MPIHelper::getInstance().increaseTreeReceived();
4072         CKP_RESTORE(tree);
4073         CKP_RESTORE(score);
4074         int pos = addTreeToCandidateSet(tree, score, true, worker);
4075         if (pos >= 0 && pos < params->popSize) {
4076             // candidate set is changed, update for other workers
4077             for (int w = 0; w < candidateset_changed.size(); w++)
4078                 if (w != worker)
4079                     candidateset_changed[w] = true;
4080         }
4081 
4082         if (boot_samples.size() > 0) {
4083             restoreUFBoot(checkpoint);
4084         }
4085 
4086         // send candidate trees to worker
4087         checkpoint->clear();
4088         if (boot_samples.size() > 0)
4089             CKP_SAVE(logl_cutoff);
4090         if (candidateset_changed[worker]) {
4091             CandidateSet cset = candidateTrees.getBestCandidateTrees(Params::getInstance().popSize);
4092             cset.setCheckpoint(checkpoint);
4093             cset.saveCheckpoint();
4094             candidateset_changed[worker] = false;
4095             MPIHelper::getInstance().increaseTreeSent(Params::getInstance().popSize);
4096         }
4097         MPIHelper::getInstance().sendCheckpoint(checkpoint, worker);
4098     } else {
4099         // worker: always send tree to MASTER
4100         tree = getTreeString();
4101         score = curScore;
4102         CKP_SAVE(tree);
4103         CKP_SAVE(score);
4104         if (boot_samples.size() > 0) {
4105             saveUFBoot(checkpoint);
4106         }
4107         MPIHelper::getInstance().sendCheckpoint(checkpoint, PROC_MASTER);
4108         MPIHelper::getInstance().increaseTreeSent();
4109 
4110         // now receive the candidate set
4111         MPIHelper::getInstance().recvCheckpoint(checkpoint, PROC_MASTER);
4112         if (checkpoint->getBool("stop")) {
4113             cout << "Worker " << MPIHelper::getInstance().getProcessID() << " gets STOP message!" << endl;
4114             stop_rule.shouldStop();
4115         } else {
4116             CandidateSet cset;
4117             cset.setCheckpoint(checkpoint);
4118             cset.restoreCheckpoint();
4119             for (CandidateSet::iterator it = cset.begin(); it != cset.end(); it++)
4120                 addTreeToCandidateSet(it->second.tree, it->second.score, false, MPIHelper::getInstance().getProcessID());
4121             MPIHelper::getInstance().increaseTreeReceived(cset.size());
4122             if (boot_samples.size() > 0)
4123                 CKP_RESTORE(logl_cutoff);
4124         }
4125     }
4126 
4127     delete checkpoint;
4128 
4129 #endif
4130 }
4131 
sendStopMessage()4132 void IQTree::sendStopMessage() {
4133     if (MPIHelper::getInstance().getNumProcesses() == 1)
4134         return;
4135 #ifdef _IQTREE_MPI
4136 
4137     Checkpoint *checkpoint = new Checkpoint;
4138     checkpoint->putBool("stop", true);
4139     stringstream ss;
4140     checkpoint->dump(ss);
4141     string str = ss.str();
4142     string tree;
4143     double score;
4144 
4145     cout << "Sending STOP message to workers" << endl;
4146 
4147     // send STOP message to all processes
4148     if (MPIHelper::getInstance().isMaster()) {
4149         // repeatedly send stop message to all workers
4150         for (int w = 1; w < MPIHelper::getInstance().getNumProcesses(); w++) {
4151 //            string buf;
4152 //            int worker = MPIHelper::getInstance().recvString(buf);
4153             checkpoint->clear();
4154             int worker = MPIHelper::getInstance().recvCheckpoint(checkpoint);
4155             MPIHelper::getInstance().increaseTreeReceived();
4156             CKP_RESTORE(tree);
4157             CKP_RESTORE(score);
4158             addTreeToCandidateSet(tree, score, true, worker);
4159             MPIHelper::getInstance().sendString(str, worker, TREE_TAG);
4160         }
4161     }
4162 
4163     delete checkpoint;
4164 
4165     MPI_Barrier(MPI_COMM_WORLD);
4166 #endif
4167 }
4168 
warnNumThreads()4169 void PhyloTree::warnNumThreads() {
4170     if (num_threads <= 1)
4171         return;
4172     // return if -nt AUTO
4173     if (params->num_threads == 0)
4174         return;
4175     size_t nptn = getAlnNPattern();
4176     if (nptn < num_threads*vector_size)
4177         outError("Too many threads for short alignments, please reduce number of threads or use -T AUTO to determine it.");
4178     if (nptn < num_threads*400/aln->num_states)
4179         outWarning("Number of threads seems too high for short alignments. Use -T AUTO to determine best number of threads.");
4180 }
4181 
testNumThreads()4182 int PhyloTree::testNumThreads() {
4183 #ifndef _OPENMP
4184     return 1;
4185 #else
4186 	int max_procs = min(countPhysicalCPUCores()/MPIHelper::getInstance().countSameHost(), params->num_threads_max);
4187     cout << "Measuring multi-threading efficiency up to " << max_procs << " CPU cores" << endl;
4188     DoubleVector runTimes;
4189     int bestProc = 0;
4190     double saved_curScore = curScore;
4191     int num_iter = 1;
4192 
4193     // generate different trees
4194     int tree;
4195     double min_time = max_procs; // minimum time in seconds
4196     StrVector trees;
4197     trees.push_back(getTreeString());
4198     setLikelihoodKernel(sse);
4199 
4200     for (int proc = 1; proc <= max_procs; proc++) {
4201 
4202         omp_set_num_threads(proc);
4203         setNumThreads(proc);
4204         initializeAllPartialLh();
4205 
4206         double beginTime = getRealTime();
4207         double runTime, logl;
4208 
4209         for (tree = 0; tree < trees.size(); tree++) {
4210             readTreeString(trees[tree]);
4211             logl = optimizeAllBranches(num_iter);
4212             runTime = getRealTime() - beginTime;
4213 
4214             // too fast, increase number of iterations
4215             if (runTime*10 < min_time && proc == 1 && tree == 0) {
4216                 int new_num_iter = 10;
4217                 cout << "Increase to " << new_num_iter << " rounds for branch lengths" << endl;
4218                 logl = optimizeAllBranches(new_num_iter - num_iter);
4219                 num_iter = new_num_iter;
4220                 runTime = getRealTime() - beginTime;
4221             }
4222 
4223             // considering at least 2 trees
4224             if ((runTime < min_time && proc == 1) || trees.size() == 1) {
4225                 // time not reached, add more tree
4226 //                readTreeString(trees[0]);
4227 //                doRandomNNIs();
4228                 generateRandomTree(YULE_HARDING);
4229                 wrapperFixNegativeBranch(true);
4230                 trees.push_back(getTreeString());
4231             }
4232             curScore = saved_curScore;
4233         }
4234 
4235         if (proc == 1)
4236             cout << trees.size() << " trees examined" << endl;
4237 
4238         deleteAllPartialLh();
4239 
4240         runTimes.push_back(runTime);
4241         double speedup = runTimes[0] / runTime;
4242 
4243         cout << "Threads: " << proc << " / Time: " << runTime << " sec / Speedup: " << speedup
4244             << " / Efficiency: " << (int)round(speedup*100/proc) << "% / LogL: " << (int)logl << endl;
4245 
4246         // break if too bad efficiency ( < 50%) or worse than than 10% of the best run time
4247         if (speedup*2 <= proc || (runTime > runTimes[bestProc]*1.1 && proc>1))
4248             break;
4249 
4250         // update best threads if sufficient
4251         if (runTime <= runTimes[bestProc]*0.95)
4252             bestProc = proc-1;
4253 
4254     }
4255 
4256     readTreeString(trees[0]);
4257 
4258     cout << "BEST NUMBER OF THREADS: " << bestProc+1 << endl << endl;
4259     setNumThreads(bestProc+1);
4260 
4261     return bestProc+1;
4262 #endif
4263 }
4264