1 /***************************************************************************
2 * Copyright (C) 2009-2015 by *
3 * BUI Quang Minh <minh.bui@univie.ac.at> *
4 * Lam-Tung Nguyen <nltung@gmail.com> *
5 * *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
21 ***************************************************************************/
22 #ifndef IQPTREE_H
23 #define IQPTREE_H
24
25 #include <set>
26 #include <map>
27 #include <stack>
28 #include <vector>
29 #include "phylotree.h"
30 #include "phylonode.h"
31 #include "utils/stoprule.h"
32 #include "mtreeset.h"
33 #include "node.h"
34 #include "candidateset.h"
35 #include "utils/pllnni.h"
36
37 typedef std::map< string, double > mapString2Double;
38 typedef std::multiset< double, std::less< double > > multiSetDB;
39 typedef std::multiset< int, std::less< int > > MultiSetInt;
40
41 class RepLeaf {
42 public:
43 Node *leaf;
44 int height;
45
46 RepLeaf(Node *aleaf, int aheight = 0) {
47 leaf = aleaf;
48 height = aheight;
49 }
50 };
51
52 /**
53 nodeheightcmp, for building k-representative leaf set
54 */
55 struct nodeheightcmp {
56
operatornodeheightcmp57 bool operator()(const RepLeaf* s1, const RepLeaf * s2) const {
58 return (s1->height) < (s2->height);
59 }
60 };
61
62 struct IntBranchInfo {
63 PhyloNode *node1;
64 PhyloNode *node2;
65 double lh_contribution; // log-likelihood contribution of this branch: L(T)-L(T|e=0)
66 };
67
int_branch_cmp(const IntBranchInfo a,const IntBranchInfo b)68 inline int int_branch_cmp(const IntBranchInfo a, const IntBranchInfo b) {
69 return (a.lh_contribution < b.lh_contribution);
70 }
71
72 /**
73 Representative Leaf Set, stored as a multiset template of STL,
74 sorted in ascending order of leaf's height
75 */
76 typedef multiset<RepLeaf*, nodeheightcmp> RepresentLeafSet;
77
78 /**
79 Main class for tree search
80 */
81 class IQTree : public PhyloTree {
82 public:
83 /**
84 default constructor
85 */
86 IQTree();
87
88 IQTree(Alignment *aln);
89
90 // EIGEN_MAKE_ALIGNED_OPERATOR_NEW
91
92 /**
93 destructor
94 */
95 virtual ~IQTree();
96
97 void init();
98
99 /**
100 set checkpoint object
101 @param checkpoint
102 */
103 virtual void setCheckpoint(Checkpoint *checkpoint);
104
105 /**
106 save object into the checkpoint
107 */
108 virtual void saveCheckpoint();
109
110 /**
111 restore object from the checkpoint
112 */
113 virtual void restoreCheckpoint();
114
115 /**
116 save UFBoot_trees.
117 For MPI workers only save from sample_start to sample_end
118 @param checkpoint Checkpoint object
119 */
120 void saveUFBoot(Checkpoint *checkpoint);
121
122 /**
123
124 restore UFBoot_trees from sample_start to sample_end (MPI)
125 @param checkpoint Checkpoint object
126 */
127 void restoreUFBoot(Checkpoint *checkpoint);
128
129 /**
130 * setup all necessary parameters (declared as virtual needed for phylosupertree)
131 */
132 virtual void initSettings(Params& params);
133
134 void createPLLPartition(Params ¶ms, ostream &pllPartitionFileHandle);
135
136 void initializePLL(Params ¶ms);
137
138 bool isInitializedPLL();
139
140 virtual void initializeModel(Params ¶ms, string model_name, ModelsBlock *models_block);
141
142 /**
143 print tree to .treefile
144 @param params program parameters, field root is taken
145 */
146 virtual void printResultTree(string suffix = "");
147 /**
148 print tree to out
149 @param params program parameters, field root is taken
150 @param out (OUT) output stream
151 */
152 void printResultTree(ostream &out);
153
154 void printBestCandidateTree();
155
156 /**
157 * print phylolib tree to a file.
158 * @param suffix suffix string for the tree file
159 */
160 void printPhylolibTree(const char* suffix);
161
162
163 /**
164 * print model parameters of Phylolib(rates, base frequencies, alpha) to stdout and
165 * to file
166 */
167 //void printPhylolibModelParams(const char* suffix);
168
169 /**
170 print intermediate tree
171 */
172 void printIntermediateTree(int brtype);
173
174 /**
175 set k-representative parameter
176 @param k_rep k-representative
177 */
178 // void setRepresentNum(int k_rep);
179
180 /**
181 set the probability of deleteing sequences for IQP algorithm
182 @param p_del probability of deleting sequences
183 */
184 //void setProbDelete(double p_del);
185
186 double getProbDelete();
187
188 void resetKDelete();
189 void increaseKDelete();
190
191 /**
192 set the number of iterations for the IQPNNI algorithm
193 @param stop_condition stop condition (SC_FIXED_ITERATION, SC_STOP_PREDICT)
194 @param min_iterations the min number of iterations
195 @param max_iterations the maximum number of iterations
196 */
197 // void setIQPIterations(STOP_CONDITION stop_condition, double stop_confidence, int min_iterations, int max_iterations);
198
199 /**
200 @param assess_quartet the quartet assessment, either IQP_DISTANCE or IQP_PARSIMONY
201 */
202 //void setIQPAssessQuartet(IQP_ASSESS_QUARTET assess_quartet);
203
204 /**
205 find the k-representative leaves under the node
206 @param node the node at which the subtree is rooted
207 @param dad the dad node of the considered subtree, to direct the search
208 @param leaves (OUT) the k-representative leaf set
209 */
210 RepresentLeafSet* findRepresentLeaves(vector<RepresentLeafSet*> &leaves, int nei_id,
211 PhyloNode *dad);
212
213 /**
214 clear representative leave sets iteratively, called once a leaf is re-inserted into the tree
215 @param node the node at which the subtree is rooted
216 @param dad the dad node of the considered subtree, to direct the search
217 @param leaves (OUT) the k-representative leaf set
218 */
219 void clearRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, Node *node, Node *dad);
220
221 /**
222 remove a portion of leaves and reinsert them using the IQP algorithm
223 */
224 void doIQP();
225
226 /**
227 * @brief get non-tabu branches from a set of branches
228 *
229 * @param
230 * allBranches[IN] the inital branches
231 * @param
232 * initTabuSplits[IN] the tabu splits
233 * @param
234 * nonTabuBranches[OUT] non-tabu branches from \a allBranches
235 * @param[OUT]
236 * tabuBranches branches that are tabu
237 */
238 void getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches = NULL);
239
240 /**
241 * @brief remove all branches corresponding to nnis
242 * @param nodes1 node vector containing one end of the branches
243 * @param nodes2 node vector containing the other end of the branches
244 * @param nnis
245 * @return
246 */
247 int removeNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove> nnis);
248
249 /**
250 * Perform a series of random NNI moves
251 * @return the perturbed newick string
252 */
253 string doRandomNNIs(bool storeTabu = false);
254
255 /**
256 * Do a random NNI on splits that are shared among all the candidate trees.
257 * @return the perturbed newick string
258 */
259 string perturbStableSplits(double supportValue);
260
261
262 /**
263 * input model parameters from IQ-TREE to PLL
264 */
265 void inputModelIQTree2PLL();
266
267 /**
268 * input model parameters from PLL to IQ-TREE
269 */
270 void inputModelPLL2IQTree();
271
272 /**
273 * get the rate parameters from PLL
274 * @return double array containing the 6 rates
275 */
276 double* getModelRatesFromPLL();
277
278 /**
279 * get the alpha parameter from PLL for the GAMMA distribution of rate heterogenity
280 * @return alpha parameter
281 */
282 double getAlphaFromPLL();
283
284 /**
285 * print model parameters from PLL
286 */
287 void pllPrintModelParams();
288
289 /**
290 * input the tree string from IQTree kernel to PLL kernel
291 * @return
292 */
293 double inputTree2PLL(string treestring, bool computeLH = true);
294
295 //bool containPosNNI(vector<NNIMove> posNNIs);
296
297 /**
298 * Perturb the tree for the next round of local search by swaping position of 2 random leaves
299 * @param nbDist The minimum distance between the 2 nodes that are swapped
300 * @param nbTimes Number of times that the swap operations are carried out
301 * @return The new loglikelihood of the tree
302 */
303 double perturb(int times);
304
305 /**
306 * TODO
307 * @param node1
308 * @param node2
309 * @return
310 */
311 double swapTaxa(PhyloNode *node1, PhyloNode *node2);
312
313
314 /**
315 perform tree search
316 @return best likelihood found
317 */
318 virtual double doTreeSearch();
319
320 /**
321 * Wrapper function that uses either PLL or IQ-TREE to optimize the branch length
322 * @param maxTraversal
323 * maximum number of tree traversal for branch length optimization
324 * @return NEWICK tree string
325 */
326 string optimizeBranches(int maxTraversal = 100);
327
328 /**
329 * Wrapper function to compute tree log-likelihood.
330 * This function with call either PLL or IQ-TREE to compute tree log-likelihood
331 * @return current score of tree
332 */
333 double computeLogL();
334
335 /**
336 * Print scores of tree used for generating offsprings
337 */
338 void printBestScores();
339
340 /****************************************************************************
341 Fast Nearest Neighbor Interchange by maximum likelihood
342 ****************************************************************************/
343
344
345 /**
346 * Optimize current tree using NNI
347 *
348 * @return
349 * <number of NNI steps, number of NNIs> done
350 */
351 virtual pair<int, int> optimizeNNI(bool speedNNI = true);
352
353 /**
354 * Return the current best score found
355 */
356 double getBestScore();
357
358 /**
359 * @brief Generate a list of internal branches on which NNI moves will be evaluated
360 * @param
361 * nonNNIBranches [OUT] Branches on which NNI evaluation will be skipped
362 * @param
363 * tabuSplits [IN] A list of splits that are considered tabu
364 * @param
365 * candidateSplitHash [IN] Lists that appear on the best 20 candidate trees
366 * @param
367 * dad [IN] for navigation
368 * @param
369 * node[IN] for navigation
370 * @return A list of branches for evaluating NNIs
371 */
372 void getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candidateSplitHash, Branches &nonNNIBranches, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
373
374 /**
375 * Return internal branches that appear in \a candidateSplitHash
376 * and has support value >= \a supportValue.
377 * @param
378 * candidateSplitHash [IN] A set of splits with the number of occurences.
379 * @param
380 * supportValue [IN] Only consider split whose support value is higher than this number
381 * @param
382 * dad [IN] for navigation
383 * @param
384 * node[IN] for navigation
385 * @return
386 * A list of branches fufilling the aforementioned conditions.
387 */
388 void getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
389
390
391 /**
392 *
393 * Determine whether to evaluate NNI moves on the branch corresponding to the current split
394 *
395 * @param curSplit [IN] the split that correspond to the current branch
396 * @param tabuSplits [IN] tabu splits
397 * @param candSplits [IN] splits contained in all candidate trees
398 * @param nonNNIBranches [OUT] branches that are not inserted to nniBranches are store here
399 * @param nniBranches [OUT] if the split is neither stable nor tabu it is inserted in this list
400 */
401 bool shouldEvaluate(Split* curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits);
402
403
404 /**
405 * @brief Only select NNI branches that are 2 branches away from the previously
406 * appied NNIs
407 * @param
408 * appliedNNIs List of previously applied NNIs
409 * @return
410 * List of branches to be evaluated
411 */
412 void filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &outBranches);
413
414
415 /**
416 * @brief get branches that correspond to the splits in \a nniSplits
417 */
418 void getSplitBranches(Branches &branches, SplitIntMap &splits, Node *dad = NULL, Node *node = NULL);
419
420 /**
421 * Do fastNNI using PLL
422 *
423 * @param nniCount (OUT) number of NNIs applied
424 * @param nniSteps (OUT) number of NNI steps done
425 */
426 double pllOptimizeNNI(int &nniCount, int &nniSteps, SearchInfo &searchinfo);
427
428 /**
429 * @brief Perform NNI search on the current tree topology
430 * @return <number_of_NNIs, number_of_NNI_steps>
431 * This function will automatically use the selected kernel (either PLL or IQ-TREE)
432 */
433 virtual pair<int, int> doNNISearch(bool write_info = false);
434
435 /**
436 @brief evaluate all NNIs
437 @param node evaluate all NNIs of the subtree rooted at node
438 @param dad a neighbor of \p node which does not belong to the subtree
439 being considered (used for traverse direction)
440
441 */
442 //void evalNNIs(PhyloNode *node = NULL, PhyloNode *dad = NULL);
443
444 /**
445 * @brief Evaluate all NNIs on branch defined by \a branches
446 *
447 * @param nniBranches [IN] branches the branches on which NNIs will be evaluated
448 * @return list positive NNIs
449 */
450 void evaluateNNIs(Branches &nniBranches, vector<NNIMove> &outNNIMoves);
451
452 double optimizeNNIBranches(Branches &nniBranches);
453
454 /**
455 search all positive NNI move on the current tree and save them
456 on the possilbleNNIMoves list
457 */
458 void evalNNIsSort(bool approx_nni);
459
460 /**
461 apply NNIs from the non-conflicting NNI list
462 @param compatibleNNIs vector of all compatible NNIs
463 @param changeBran whether or not the computed branch lengths should be applied
464 */
465 virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);
466
467 /**
468 * Restore the old 5 branch lengths stored in the NNI move.
469 * This is called after an NNI is reverted.
470 * @param nnimove the NNI move currently in consideration
471 */
472 //void restoreNNIBranches(NNIMove nnimove);
473
474
475 /**
476 * @brief get a list of compatible NNIs from a list of NNIs
477 * @param nniMoves [IN] list of NNIs
478 * @return list of compatible NNIs
479 */
480 void getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs);
481
482 /**
483 add a NNI move to the list of possible NNI moves;
484 */
485 void addPositiveNNIMove(NNIMove &myMove);
486
487 /**
488 * Estimate the 95% quantile of the distribution of N (see paper for more d
489 details)
490 * @return the estimated value
491 */
492 inline double estN95(void);
493
494 /**
495 * Estimate the median of the distribution of N (see paper for more d
496 details)
497 * @return the estimated value
498 */
499 double getAvgNumNNI(void);
500
501 /**
502 * Estimate the median of the distribution of N (see paper for more d
503 details)
504 * @return the estimated value
505 */
506 double estDeltaMedian(void);
507
508 /**
509 * Estimate the 95% quantile of the distribution of DELTA (see paper for
510 more detail)
511 * @return the estimated value
512 */
513 inline double estDelta95(void);
514
515 /**
516 current parsimony score of the tree
517 */
518 int cur_pars_score;
519
520 // bool enable_parsimony;
521 /**
522 stopping rule
523 */
524 StopRule stop_rule;
525
526 /**
527 * Parsimony scores, used for linear regression
528 */
529 double* pars_scores;
530
531 /**
532 Log-likelihood variastring IQPTree::bran2string(PhyloNode* node1, PhyloNode* node2)nce
533 */
534 double logl_variance;
535
536 /**
537 * The coressponding log-likelihood score from computed indendently from the parsimony
538 * scores
539 */
540 double* lh_scores;
541
542 Linear* linRegModel;
543
544
getNNICutoff()545 inline double getNNICutoff() {
546 return nni_cutoff;
547 }
548
549 /*
550 * Contains a sorted list of all NNIs (2n-6) evaluated for the current best tree
551 * The last element (nni_for_pertub.end()) is the best NNI
552 */
553 vector<pllNNIMove> nniListOfBestTree;
554
555
556 /**
557 * information and parameters for the tree search procedure
558 */
559 SearchInfo searchinfo;
560
561 /**
562 * Vector contains number of NNIs used at each iterations
563 */
564 vector<int> vecNumNNI;
565
566
567 /**
568 * Do memory allocation and initialize parameter for UFBoot to run with PLL
569 */
570 void pllInitUFBootData();
571
572 /**
573 * Do memory deallocation for UFBoot data (PLL mode)
574 */
575 void pllDestroyUFBootData();
576
577 /**
578 * DTH:
579 * Substitute bases in seq according to PLL's rules
580 * This function should be updated if PLL's rules change.
581 * @param seq: data of some sequence to be substituted
582 * @param dataType: PLL_DNA_DATA or PLL_AA_DATA
583 */
584 void pllBaseSubstitute (char *str, int dataType);
585
586 /*
587 * An array to map site index in pllAlignment into IQTree pattern index
588 * Born due to the order difference of these two
589 * Will be deallocated in pllDestroyUFBootData()
590 */
591 int * pll2iqtree_pattern_index;
592
593 /*
594 * Build pll2iqtree_pattern_index
595 * Must be called AFTER initializing PLL model
596 */
597 void pllBuildIQTreePatternIndex();
598
599 /**
600 * FOR TESTING:
601 * Write to log file the freq of pllAlignment sites, and
602 * freq of bootstrap site stored in pllUFBootDataPtr->boot_samples
603 */
604 void pllLogBootSamples(int** pll_boot_samples, int nsamples, int npatterns);
605
606 /**
607 * Convert certain arrays in pllUFBootDataPtr
608 * into IQTree data structures
609 * to be usable in IQTree::summarizeBootstrap()
610 */
611 void pllConvertUFBootData2IQTree();
612
613 protected:
614 /**
615 * Splits corresponding to random NNIs
616 */
617 SplitIntMap initTabuSplits;
618
619 /**
620 criterion to assess important quartet
621 */
622 IQP_ASSESS_QUARTET iqp_assess_quartet;
623
624
625 /**
626 * Taxa set
627 */
628 NodeVector taxaSet;
629
630 /**
631 * Vector contains approximated improvement pro NNI at each iterations
632 */
633 vector<double> vecImpProNNI;
634
635 /**
636 Optimal branch lengths
637 */
638 // mapString2Double optBrans;
639
640 /**
641 * @brief get branches, on which NNIs are evaluated for the next NNI step.
642 * @param[out] nodes1 one ends of the branches
643 * @param[out] nodes2 the other ends of the branches
644 * @param[in] nnis NNIs that have been previously applied
645 */
646 void generateNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove>& nnis);
647
648 int k_delete, k_delete_min, k_delete_max, k_delete_stay;
649
650 /**
651 number of representative leaves for IQP step
652 */
653 int k_represent;
654
655 public:
656
657 /**
658 * Candidate tree set (the current best N (default N = 5)
659 * NNI-optimal trees
660 */
661 CandidateSet candidateTrees;
662
663 /**
664 * Set of all intermediate trees (initial trees, tree generated by NNI steps,
665 * NNI-optimal trees)
666 */
667 CandidateSet intermediateTrees;
668
669
670 /**
671 * Update the candidate set with a new NNI-optimal tree. The maximum size of the candidate set
672 * is fixed to the initial setting. Thus, if the size exceed the maximum number of trees, the worse
673 * tree will be removed.
674 *
675 * @param treeString
676 * the new tree
677 * @param score
678 * the score of the new tree
679 * @param updateStopRule
680 * Whether or not to update the stop rule
681 * @return relative position of the new tree to the current best.
682 * -1 if duplicated
683 * -2 if the candidate set is not updated
684 */
685 int addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID);
686
687 /**
688 MPI: synchronize candidate trees between all processes
689 @param nTrees number of trees to broadcast
690 @param updateStopRule true to update stopping rule, false otherwise
691 */
692 void syncCandidateTrees(int nTrees, bool updateStopRule);
693
694 /**
695 MPI: synchronize tree of current iteration with master
696 will update candidateset_changed
697 @param curTree current tree
698
699 */
700 void syncCurrentTree();
701
702 /**
703 MPI: Master sends stop message to all workers
704 */
705 void sendStopMessage();
706
707 /**
708 * Generate the initial parsimony/random trees, called by initCandidateTreeSet
709 * @param nParTrees number of parsimony/random trees to generate
710 */
711 void createInitTrees(int nParTrees);
712
713 /**
714 * Generate the initial candidate tree set
715 * @param nParTrees number of parsimony trees to generate
716 * @param nNNITrees number of NNI locally optimal trees to generate
717 */
718 void initCandidateTreeSet(int nParTrees, int nNNITrees);
719
720 /**
721 * Generate the initial tree (usually used for model parameter estimation)
722 */
723 void computeInitialTree(LikelihoodKernel kernel);
724
725 /**
726 * @brief: optimize model parameters on the current tree
727 * either IQ-TREE or PLL
728 * @param printInfo to print model parameters to the screen or not
729 * @param epsilon likelihood epsilon for optimization
730 *
731 */
732 string optimizeModelParameters(bool printInfo = false, double epsilon = -1);
733
734 /**
735 * variable storing the current best tree topology
736 */
737 topol* pllBestTree;
738
739 /****** following variables are for ultra-fast bootstrap *******/
740
741 /** TRUE to save also branch lengths into treels_newick */
742 // bool save_all_br_lens;
743
744 /**
745 this keeps the list of intermediate trees.
746 it will be activated if params.avoid_duplicated_trees is TRUE.
747 */
748 // StringIntMap treels;
749
750 /** pattern log-likelihood vector for each treels */
751 // vector<double* > treels_ptnlh;
752
753 /** OBSOLETE: tree log-likelihood for each treels */
754 // DoubleVector treels_logl;
755
756 /** NEWICK string for each treels */
757 // StrVector treels_newick;
758
759 /** OBSOLETE: maximum number of distinct candidate trees (tau parameter) */
760 // int max_candidate_trees;
761
762 /** log-likelihood threshold (l_min) */
763 double logl_cutoff;
764
765 /** vector of bootstrap alignments generated */
766 vector<BootValType* > boot_samples;
767
768 /** starting sample for UFBoot, used for MPI */
769 int sample_start;
770
771 /** end sample for UFBoot, used for MPI */
772 int sample_end;
773
774 /** newick string of corresponding bootstrap trees */
775 StrVector boot_trees;
776
777 /** bootstrap tree strings with branch lengths, for -wbtl option */
778 // StrVector boot_trees_brlen;
779
780 /** number of multiple optimal trees per replicate */
781 IntVector boot_counts;
782
783 /** corresponding RELL log-likelihood */
784 DoubleVector boot_logl;
785
786 /** corresponding log-likelihood on original alignment */
787 DoubleVector boot_orig_logl;
788
789 /** Set of splits occurring in bootstrap trees */
790 vector<SplitGraph*> boot_splits;
791
792 /** log-likelihood of bootstrap consensus tree */
793 double boot_consense_logl;
794
795 /** Robinson-Foulds distance between contree and ML tree */
796 int contree_rfdist;
797
798 /** Corresponding map for set of splits occurring in bootstrap trees */
799 //SplitIntMap boot_splits_map;
800
801 /** summarize all bootstrap trees */
802 void summarizeBootstrap(Params ¶ms, MTreeSet &trees);
803
804 /** summarize bootstrap trees */
805 virtual void summarizeBootstrap(Params ¶ms);
806
807 /** summarize bootstrap trees into split set */
808 void summarizeBootstrap(SplitGraph &sg);
809
810 /**
811 write .ufboot trees file
812 */
813 virtual void writeUFBootTrees(Params ¶ms);
814
815 /** @return bootstrap correlation coefficient for assessing convergence */
816 double computeBootstrapCorrelation();
817
818 int getDelete() const;
819 void setDelete(int _delete);
820
821 protected:
822 /**** NNI cutoff heuristic *****/
823 /**
824 */
825 vector<NNIInfo> nni_info;
826
827 bool estimate_nni_cutoff;
828
829 double nni_cutoff;
830
831 bool nni_sort;
832
833 bool testNNI;
834
835 ofstream outNNI;
836 protected:
837
838 //bool print_tree_lh;
839
840 //int write_intermediate_trees;
841
842 ofstream out_treels, out_treelh, out_sitelh, out_treebetter;
843 string treels_name, out_lh_file, site_lh_file;
844
845 void estimateNNICutoff(Params* params);
846
847 virtual void saveCurrentTree(double logl); // save current tree
848
849
850 void saveNNITrees(PhyloNode *node = NULL, PhyloNode *dad = NULL);
851
852 int duplication_counter;
853
854 // MPI: vector of size = num processes, true if master should send candidate set to worker
855 BoolVector candidateset_changed;
856
857 // true if best candidate tree is changed
858 bool bestcandidate_changed;
859
860 /**
861 number of IQPNNI iterations
862 */
863 //int iqpnni_iterations;
864
865 /**
866 bonus values of all branches, used for IQP algorithm
867 */
868 //double *bonus_values;
869
870 /**
871 delete a set of leaves from tree (with the probability p_delete), assume tree is birfucating
872 @param del_leaves (OUT) the list of deleted leaves
873 */
874 void deleteLeaves(PhyloNodeVector &del_leaves);
875
876 void deleteNonTabuLeaves(PhyloNodeVector &del_leaves);
877
878 /**
879 * delete a set of leaves from tree
880 * non-cherry leaves are selected first
881 * @param del_leaves (OUT) the list of deleted leaves
882 */
883 void deleteNonCherryLeaves(PhyloNodeVector &del_leaves);
884
885 /**
886 reinsert the whole list of leaves back into the tree
887 @param del_leaves the list of deleted leaves, returned by deleteLeaves() function
888 */
889 virtual void reinsertLeaves(PhyloNodeVector &del_leaves);
890
891 void reinsertLeavesByParsimony(PhyloNodeVector &del_leaves);
892
893
894 void doParsimonyReinsertion();
895
896
897 /**
898 assess a quartet with four taxa. Current implementation uses the four-point condition
899 based on distance matrix for quick evaluation.
900 @param leaf0 one of the leaf in the existing sub-tree
901 @param leaf1 one of the leaf in the existing sub-tree
902 @param leaf2 one of the leaf in the existing sub-tree
903 @param del_leaf a leaf that was deleted (not in the existing sub-tree)
904 @return 0, 1, or 2 depending on del_leaf should be in subtree containing leaf0, leaf1, or leaf2, respectively
905 */
906 int assessQuartet(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf);
907
908 /**
909 assess a quartet with four taxa using parsimony
910 @param leaf0 one of the leaf in the existing sub-tree
911 @param leaf1 one of the leaf in the existing sub-tree
912 @param leaf2 one of the leaf in the existing sub-tree
913 @param del_leaf a leaf that was deleted (not in the existing sub-tree)
914 @return 0, 1, or 2 depending on del_leaf should be in subtree containing leaf0, leaf1, or leaf2, respectively
915 */
916 int assessQuartetParsimony(Node *leaf0, Node *leaf1, Node *leaf2,
917 Node *del_leaf);
918
919 /**
920 assess the important quartets around a virtual root of the tree.
921 This function will assign bonus points to branches by updating the variable 'bonus_values'
922 @param cur_root the current virtual root
923 @param del_leaf a leaf that was deleted (not in the existing sub-tree)
924 */
925 void assessQuartets(vector<RepresentLeafSet*> &leaves_vec, PhyloNode *cur_root, PhyloNode *del_leaf);
926
927 /**
928 initialize the bonus points to ZERO
929 @param node the root of the sub-tree
930 @param dad dad of 'node', used to direct the recursion
931 */
932 void initializeBonus(PhyloNode *node = NULL, PhyloNode *dad = NULL);
933
934 /**
935 raise the bonus points for all branches in the subtree rooted at a node
936 @param node the root of the sub-tree
937 @param dad dad of 'node', used to direct the recursion
938 */
939 void raiseBonus(Neighbor *nei, Node *dad, double bonus);
940
941 /**
942 Bonuses are stored in a partial fashion. This function will propagate the bonus at every branch
943 into the subtree at this branch.
944 @param node the root of the sub-tree
945 @param dad dad of 'node', used to direct the recursion
946 @return the partial bonus of the branch (node -> dad)
947 */
948 double computePartialBonus(Node *node, Node* dad);
949
950 /**
951 determine the list of branches with the same best bonus point
952 @param best_bonus the best bonus determined by findBestBonus()
953 @param best_nodes (OUT) vector of one ends of the branches with highest bonus point
954 @param best_dads (OUT) vector of the other ends of the branches with highest bonus point
955 @param node the root of the sub-tree
956 @param dad dad of 'node', used to direct the recursion
957 */
958 void findBestBonus(double &best_score, NodeVector &best_nodes, NodeVector &best_dads, Node *node = NULL, Node *dad = NULL);
959
960 void estDeltaMin();
961
962 void convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs);
963
964 string generateParsimonyTree(int randomSeed);
965
966 double doTreePerturbation();
967
968 void estimateLoglCutoffBS();
969
970 //void estimateNNICutoff(Params ¶ms);
971
972 public:
973 /**
974 * Return best tree string from the candidate set
975 *
976 * @param numTrees
977 * Number of best trees to return
978 * @return
979 * A string vector of trees
980 */
981 vector<string> getBestTrees(int numTrees = 0);
982
983 /**
984 * Print the iteration number and the tree score
985 */
986 void printIterationInfo(int sourceProcID);
987
988 /**
989 * Return branches that are 2 branches away from the branches, on which NNIs were applied
990 * in the previous NNI steps.
991 * @param
992 * previousNNIBranches[IN] a set of branches on which NNIs were performed in the previous NNI step.
993 * @return
994 * a set of branches, on which NNIs should be evaluated for the current NNI steps
995 */
996 Branches getReducedListOfNNIBranches(Branches &previousNNIBranches);
997
998
999 // Diep added for UFBoot2-Corr
1000 void refineBootTrees();
1001 bool on_refine_btree;
1002 Alignment* saved_aln_on_refine_btree;
1003 vector<IntVector> boot_samples_int;
1004 };
1005 #endif
1006