1 /***************************************************************************
2  *   Copyright (C) 2009-2015 by                                            *
3  *   BUI Quang Minh <minh.bui@univie.ac.at>                                *
4  *   Lam-Tung Nguyen <nltung@gmail.com>                                    *
5  *                                                                         *
6  *                                                                         *
7  *   This program is free software; you can redistribute it and/or modify  *
8  *   it under the terms of the GNU General Public License as published by  *
9  *   the Free Software Foundation; either version 2 of the License, or     *
10  *   (at your option) any later version.                                   *
11  *                                                                         *
12  *   This program is distributed in the hope that it will be useful,       *
13  *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
14  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
15  *   GNU General Public License for more details.                          *
16  *                                                                         *
17  *   You should have received a copy of the GNU General Public License     *
18  *   along with this program; if not, write to the                         *
19  *   Free Software Foundation, Inc.,                                       *
20  *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
21  ***************************************************************************/
22 #ifndef IQPTREE_H
23 #define IQPTREE_H
24 
25 #include <set>
26 #include <map>
27 #include <stack>
28 #include <vector>
29 #include "phylotree.h"
30 #include "phylonode.h"
31 #include "utils/stoprule.h"
32 #include "mtreeset.h"
33 #include "node.h"
34 #include "candidateset.h"
35 #include "utils/pllnni.h"
36 
37 typedef std::map< string, double > mapString2Double;
38 typedef std::multiset< double, std::less< double > > multiSetDB;
39 typedef std::multiset< int, std::less< int > > MultiSetInt;
40 
41 class RepLeaf {
42 public:
43     Node *leaf;
44     int height;
45 
46     RepLeaf(Node *aleaf, int aheight = 0) {
47         leaf = aleaf;
48         height = aheight;
49     }
50 };
51 
52 /**
53         nodeheightcmp, for building k-representative leaf set
54  */
55 struct nodeheightcmp {
56 
operatornodeheightcmp57     bool operator()(const RepLeaf* s1, const RepLeaf * s2) const {
58         return (s1->height) < (s2->height);
59     }
60 };
61 
62 struct IntBranchInfo {
63     PhyloNode *node1;
64     PhyloNode *node2;
65     double lh_contribution; // log-likelihood contribution of this branch: L(T)-L(T|e=0)
66 };
67 
int_branch_cmp(const IntBranchInfo a,const IntBranchInfo b)68 inline int int_branch_cmp(const IntBranchInfo a, const IntBranchInfo b) {
69     return (a.lh_contribution < b.lh_contribution);
70 }
71 
72 /**
73         Representative Leaf Set, stored as a multiset template of STL,
74         sorted in ascending order of leaf's height
75  */
76 typedef multiset<RepLeaf*, nodeheightcmp> RepresentLeafSet;
77 
78 /**
79     Main class for tree search
80  */
81 class IQTree : public PhyloTree {
82 public:
83     /**
84             default constructor
85      */
86     IQTree();
87 
88     IQTree(Alignment *aln);
89 
90 //    EIGEN_MAKE_ALIGNED_OPERATOR_NEW
91 
92     /**
93             destructor
94      */
95     virtual ~IQTree();
96 
97     void init();
98 
99     /**
100         set checkpoint object
101         @param checkpoint
102     */
103     virtual void setCheckpoint(Checkpoint *checkpoint);
104 
105     /**
106         save object into the checkpoint
107     */
108     virtual void saveCheckpoint();
109 
110     /**
111         restore object from the checkpoint
112     */
113     virtual void restoreCheckpoint();
114 
115     /**
116         save UFBoot_trees.
117         For MPI workers only save from sample_start to sample_end
118         @param checkpoint Checkpoint object
119     */
120     void saveUFBoot(Checkpoint *checkpoint);
121 
122     /**
123 
124         restore UFBoot_trees from sample_start to sample_end (MPI)
125         @param checkpoint Checkpoint object
126     */
127     void restoreUFBoot(Checkpoint *checkpoint);
128 
129     /**
130      * setup all necessary parameters  (declared as virtual needed for phylosupertree)
131      */
132     virtual void initSettings(Params& params);
133 
134     void createPLLPartition(Params &params, ostream &pllPartitionFileHandle);
135 
136     void initializePLL(Params &params);
137 
138     bool isInitializedPLL();
139 
140     virtual void initializeModel(Params &params, string model_name, ModelsBlock *models_block);
141 
142     /**
143             print tree to .treefile
144             @param params program parameters, field root is taken
145      */
146     virtual void printResultTree(string suffix = "");
147     /**
148             print tree to out
149             @param params program parameters, field root is taken
150             @param out (OUT) output stream
151      */
152     void printResultTree(ostream &out);
153 
154     void printBestCandidateTree();
155 
156     /**
157      * print phylolib tree to a file.
158      * @param suffix suffix string for the tree file
159      */
160     void printPhylolibTree(const char* suffix);
161 
162 
163     /**
164      *  print model parameters of Phylolib(rates, base frequencies, alpha) to stdout and
165      *  to file
166      */
167     //void printPhylolibModelParams(const char* suffix);
168 
169     /**
170         print intermediate tree
171      */
172     void printIntermediateTree(int brtype);
173 
174     /**
175             set k-representative parameter
176             @param k_rep k-representative
177      */
178     // void setRepresentNum(int k_rep);
179 
180     /**
181             set the probability of deleteing sequences for IQP algorithm
182             @param p_del probability of deleting sequences
183      */
184     //void setProbDelete(double p_del);
185 
186     double getProbDelete();
187 
188     void resetKDelete();
189     void increaseKDelete();
190 
191     /**
192             set the number of iterations for the IQPNNI algorithm
193             @param stop_condition stop condition (SC_FIXED_ITERATION, SC_STOP_PREDICT)
194             @param min_iterations the min number of iterations
195             @param max_iterations the maximum number of iterations
196      */
197 //    void setIQPIterations(STOP_CONDITION stop_condition, double stop_confidence, int min_iterations, int max_iterations);
198 
199     /**
200             @param assess_quartet the quartet assessment, either IQP_DISTANCE or IQP_PARSIMONY
201      */
202     //void setIQPAssessQuartet(IQP_ASSESS_QUARTET assess_quartet);
203 
204     /**
205             find the k-representative leaves under the node
206             @param node the node at which the subtree is rooted
207             @param dad the dad node of the considered subtree, to direct the search
208             @param leaves (OUT) the k-representative leaf set
209      */
210     RepresentLeafSet* findRepresentLeaves(vector<RepresentLeafSet*> &leaves, int nei_id,
211             PhyloNode *dad);
212 
213     /**
214             clear representative leave sets iteratively, called once a leaf is re-inserted into the tree
215             @param node the node at which the subtree is rooted
216             @param dad the dad node of the considered subtree, to direct the search
217             @param leaves (OUT) the k-representative leaf set
218      */
219     void clearRepresentLeaves(vector<RepresentLeafSet*> &leaves_vec, Node *node, Node *dad);
220 
221     /**
222             remove a portion of leaves and reinsert them using the IQP algorithm
223      */
224     void doIQP();
225 
226     /**
227      *  @brief get non-tabu branches from a set of branches
228      *
229      *  @param
230      *      allBranches[IN] the inital branches
231      *  @param
232      *      initTabuSplits[IN] the tabu splits
233      *  @param
234      *        nonTabuBranches[OUT] non-tabu branches from \a allBranches
235      *    @param[OUT]
236      *        tabuBranches branches that are tabu
237      */
238     void getNonTabuBranches(Branches& allBranches, SplitGraph& tabuSplits, Branches& nonTabuBranches, Branches* tabuBranches = NULL);
239 
240     /**
241      * @brief remove all branches corresponding to nnis
242      * @param nodes1 node vector containing one end of the branches
243      * @param nodes2 node vector containing the other end of the branches
244      * @param nnis
245      * @return
246      */
247     int removeNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove> nnis);
248 
249     /**
250      *         Perform a series of random NNI moves
251      *         @return the perturbed newick string
252      */
253     string doRandomNNIs(bool storeTabu = false);
254 
255     /**
256      *  Do a random NNI on splits that are shared among all the candidate trees.
257      *  @return the perturbed newick string
258      */
259     string perturbStableSplits(double supportValue);
260 
261 
262     /**
263      *   input model parameters from IQ-TREE to PLL
264      */
265     void inputModelIQTree2PLL();
266 
267     /**
268      *  input model parameters from PLL to IQ-TREE
269      */
270     void inputModelPLL2IQTree();
271 
272     /**
273      *  get the rate parameters from PLL
274      *  @return double array containing the 6 rates
275      */
276     double* getModelRatesFromPLL();
277 
278     /**
279      *  get the alpha parameter from PLL for the GAMMA distribution of rate heterogenity
280      *  @return alpha parameter
281      */
282     double getAlphaFromPLL();
283 
284     /**
285      *  print model parameters from PLL
286      */
287     void pllPrintModelParams();
288 
289     /**
290      * input the tree string from IQTree kernel to PLL kernel
291      * @return
292      */
293     double inputTree2PLL(string treestring, bool computeLH = true);
294 
295     //bool containPosNNI(vector<NNIMove> posNNIs);
296 
297     /**
298      * Perturb the tree for the next round of local search by swaping position of 2 random leaves
299      * @param nbDist The minimum distance between the 2 nodes that are swapped
300      * @param nbTimes Number of times that the swap operations are carried out
301      * @return The new loglikelihood of the tree
302      */
303     double perturb(int times);
304 
305     /**
306      * TODO
307      * @param node1
308      * @param node2
309      * @return
310      */
311     double swapTaxa(PhyloNode *node1, PhyloNode *node2);
312 
313 
314     /**
315             perform tree search
316             @return best likelihood found
317      */
318     virtual double doTreeSearch();
319 
320     /**
321      *  Wrapper function that uses either PLL or IQ-TREE to optimize the branch length
322      *  @param maxTraversal
323      *      maximum number of tree traversal for branch length optimization
324      *  @return NEWICK tree string
325      */
326     string optimizeBranches(int maxTraversal = 100);
327 
328     /**
329      *  Wrapper function to compute tree log-likelihood.
330      *  This function with call either PLL or IQ-TREE to compute tree log-likelihood
331      *  @return current score of tree
332      */
333     double computeLogL();
334 
335     /**
336      *    Print scores of tree used for generating offsprings
337      */
338     void printBestScores();
339 
340     /****************************************************************************
341             Fast Nearest Neighbor Interchange by maximum likelihood
342      ****************************************************************************/
343 
344 
345     /**
346      *  Optimize current tree using NNI
347      *
348      *  @return
349      *      <number of NNI steps, number of NNIs> done
350      */
351     virtual pair<int, int> optimizeNNI(bool speedNNI = true);
352 
353     /**
354      *  Return the current best score found
355      */
356     double getBestScore();
357 
358     /**
359      * @brief Generate a list of internal branches on which NNI moves will be evaluated
360      * @param
361      *      nonNNIBranches [OUT] Branches on which NNI evaluation will be skipped
362      * @param
363      *      tabuSplits [IN] A list of splits that are considered tabu
364      * @param
365      *      candidateSplitHash [IN] Lists that appear on the best 20 candidate trees
366      * @param
367      *      dad [IN] for navigation
368      * @param
369      *      node[IN] for navigation
370      * @return A list of branches for evaluating NNIs
371      */
372     void getNNIBranches(SplitIntMap &tabuSplits, SplitIntMap &candidateSplitHash, Branches &nonNNIBranches, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
373 
374     /**
375      *  Return internal branches that appear in \a candidateSplitHash
376      *  and has support value >= \a supportValue.
377      *  @param
378      *      candidateSplitHash [IN]   A set of splits with the number of occurences.
379      *  @param
380      *      supportValue [IN]  Only consider split whose support value is higher than this number
381      *  @param
382      *      dad [IN] for navigation
383      *  @param
384      *      node[IN] for navigation
385      *  @return
386      *      A list of branches fufilling the aforementioned conditions.
387      */
388     void getStableBranches(SplitIntMap &candSplits, double supportValue, Branches &outBranches, Node *dad = NULL, Node *node = NULL);
389 
390 
391     /**
392      *
393      *  Determine whether to evaluate NNI moves on the branch corresponding to the current split
394      *
395      *  @param curSplit [IN] the split that correspond to the current branch
396      *  @param tabuSplits [IN] tabu splits
397      *  @param candSplits [IN] splits contained in all candidate trees
398      *  @param nonNNIBranches [OUT] branches that are not inserted to nniBranches are store here
399      *  @param nniBranches [OUT] if the split is neither stable nor tabu it is inserted in this list
400      */
401     bool shouldEvaluate(Split* curSplit, SplitIntMap &tabuSplits, SplitIntMap &candSplits);
402 
403 
404     /**
405      *  @brief Only select NNI branches that are 2 branches away from the previously
406      *  appied NNIs
407      *  @param
408      *      appliedNNIs List of previously applied NNIs
409      *  @return
410      *      List of branches to be evaluated
411      */
412     void filterNNIBranches(vector<NNIMove> &appliedNNIs, Branches &outBranches);
413 
414 
415     /**
416      * @brief get branches that correspond to the splits in \a nniSplits
417      */
418     void getSplitBranches(Branches &branches, SplitIntMap &splits, Node *dad = NULL, Node *node = NULL);
419 
420     /**
421      *         Do fastNNI using PLL
422      *
423      *      @param nniCount (OUT) number of NNIs applied
424      *         @param nniSteps (OUT) number of NNI steps done
425      */
426     double pllOptimizeNNI(int &nniCount, int &nniSteps, SearchInfo &searchinfo);
427 
428     /**
429      *         @brief Perform NNI search on the current tree topology
430      *         @return <number_of_NNIs, number_of_NNI_steps>
431      *         This function will automatically use the selected kernel (either PLL or IQ-TREE)
432      */
433     virtual pair<int, int> doNNISearch(bool write_info = false);
434 
435     /**
436             @brief evaluate all NNIs
437             @param  node    evaluate all NNIs of the subtree rooted at node
438             @param  dad     a neighbor of \p node which does not belong to the subtree
439                             being considered (used for traverse direction)
440 
441      */
442     //void evalNNIs(PhyloNode *node = NULL, PhyloNode *dad = NULL);
443 
444     /**
445      * @brief Evaluate all NNIs on branch defined by \a branches
446      *
447      * @param nniBranches [IN] branches the branches on which NNIs will be evaluated
448      * @return list positive NNIs
449      */
450     void evaluateNNIs(Branches &nniBranches, vector<NNIMove> &outNNIMoves);
451 
452     double optimizeNNIBranches(Branches &nniBranches);
453 
454     /**
455             search all positive NNI move on the current tree and save them
456             on the possilbleNNIMoves list
457      */
458     void evalNNIsSort(bool approx_nni);
459 
460     /**
461             apply  NNIs from the non-conflicting NNI list
462             @param compatibleNNIs vector of all compatible NNIs
463             @param changeBran whether or not the computed branch lengths should be applied
464      */
465     virtual void doNNIs(vector<NNIMove> &compatibleNNIs, bool changeBran = true);
466 
467     /**
468      *  Restore the old 5 branch lengths stored in the NNI move.
469      *  This is called after an NNI is reverted.
470      *  @param nnimove the NNI move currently in consideration
471      */
472     //void restoreNNIBranches(NNIMove nnimove);
473 
474 
475     /**
476      *  @brief get a list of compatible NNIs from a list of NNIs
477      *  @param nniMoves [IN] list of NNIs
478      *  @return list of compatible NNIs
479      */
480     void getCompatibleNNIs(vector<NNIMove> &nniMoves, vector<NNIMove> &compatibleNNIs);
481 
482     /**
483             add a NNI move to the list of possible NNI moves;
484      */
485     void addPositiveNNIMove(NNIMove &myMove);
486 
487     /**
488      * Estimate the 95% quantile of the distribution of N (see paper for more d
489                                                            details)
490      * @return the estimated value
491      */
492     inline double estN95(void);
493 
494     /**
495      * Estimate the median of the distribution of N (see paper for more d
496                                                            details)
497      * @return the estimated value
498      */
499     double getAvgNumNNI(void);
500 
501     /**
502      * Estimate the median of the distribution of N (see paper for more d
503                                                           details)
504      * @return the estimated value
505      */
506     double estDeltaMedian(void);
507 
508     /**
509      * Estimate the 95% quantile of the distribution of DELTA (see paper for
510                                                                more detail)
511      * @return the estimated value
512      */
513     inline double estDelta95(void);
514 
515     /**
516             current parsimony score of the tree
517      */
518     int cur_pars_score;
519 
520 //    bool enable_parsimony;
521     /**
522             stopping rule
523      */
524     StopRule stop_rule;
525 
526     /**
527      *      Parsimony scores, used for linear regression
528      */
529     double* pars_scores;
530 
531     /**
532         Log-likelihood variastring IQPTree::bran2string(PhyloNode* node1, PhyloNode* node2)nce
533      */
534     double logl_variance;
535 
536     /**
537      *      The coressponding log-likelihood score from computed indendently from the parsimony
538      *      scores
539      */
540     double* lh_scores;
541 
542     Linear* linRegModel;
543 
544 
getNNICutoff()545     inline double getNNICutoff() {
546         return nni_cutoff;
547     }
548 
549     /*
550      *  Contains a sorted list of all NNIs (2n-6) evaluated for the current best tree
551      *  The last element (nni_for_pertub.end()) is the best NNI
552      */
553     vector<pllNNIMove> nniListOfBestTree;
554 
555 
556     /**
557      *  information and parameters for the tree search procedure
558      */
559     SearchInfo searchinfo;
560 
561     /**
562      *  Vector contains number of NNIs used at each iterations
563      */
564     vector<int> vecNumNNI;
565 
566 
567     /**
568      * Do memory allocation and initialize parameter for UFBoot to run with PLL
569      */
570     void pllInitUFBootData();
571 
572     /**
573      * Do memory deallocation for UFBoot data (PLL mode)
574      */
575     void pllDestroyUFBootData();
576 
577     /**
578      * DTH:
579      * Substitute bases in seq according to PLL's rules
580      * This function should be updated if PLL's rules change.
581      * @param seq: data of some sequence to be substituted
582      * @param dataType: PLL_DNA_DATA or PLL_AA_DATA
583      */
584    void pllBaseSubstitute (char *str, int dataType);
585 
586    /*
587     * An array to map site index in pllAlignment into IQTree pattern index
588     * Born due to the order difference of these two
589     * Will be deallocated in pllDestroyUFBootData()
590     */
591    int * pll2iqtree_pattern_index;
592 
593    /*
594     * Build pll2iqtree_pattern_index
595     * Must be called AFTER initializing PLL model
596     */
597    void pllBuildIQTreePatternIndex();
598 
599    /**
600     * FOR TESTING:
601     * Write to log file the freq of pllAlignment sites, and
602     * freq of bootstrap site stored in pllUFBootDataPtr->boot_samples
603     */
604    void pllLogBootSamples(int** pll_boot_samples, int nsamples, int npatterns);
605 
606    /**
607     * Convert certain arrays in pllUFBootDataPtr
608     * into IQTree data structures
609     * to be usable in IQTree::summarizeBootstrap()
610     */
611    void pllConvertUFBootData2IQTree();
612 
613 protected:
614     /**
615     *  Splits corresponding to random NNIs
616     */
617     SplitIntMap initTabuSplits;
618 
619     /**
620             criterion to assess important quartet
621      */
622     IQP_ASSESS_QUARTET iqp_assess_quartet;
623 
624 
625     /**
626      * Taxa set
627      */
628     NodeVector taxaSet;
629 
630     /**
631      *  Vector contains approximated improvement pro NNI at each iterations
632      */
633     vector<double> vecImpProNNI;
634 
635     /**
636         Optimal branch lengths
637      */
638 //    mapString2Double optBrans;
639 
640     /**
641      *  @brief get branches, on which NNIs are evaluated for the next NNI step.
642      *  @param[out] nodes1 one ends of the branches
643      *  @param[out] nodes2 the other ends of the branches
644      *  @param[in] nnis NNIs that have been previously applied
645      */
646     void generateNNIBranches(NodeVector& nodes1, NodeVector& nodes2, unordered_map<string, NNIMove>& nnis);
647 
648     int k_delete, k_delete_min, k_delete_max, k_delete_stay;
649 
650     /**
651             number of representative leaves for IQP step
652      */
653     int k_represent;
654 
655 public:
656 
657     /**
658      *  Candidate tree set (the current best N (default N = 5)
659      *  NNI-optimal trees
660      */
661     CandidateSet candidateTrees;
662 
663     /**
664      *  Set of all intermediate trees (initial trees, tree generated by NNI steps,
665      *  NNI-optimal trees)
666      */
667     CandidateSet intermediateTrees;
668 
669 
670     /**
671      *  Update the candidate set with a new NNI-optimal tree. The maximum size of the candidate set
672      *  is fixed to the initial setting. Thus, if the size exceed the maximum number of trees, the worse
673      *  tree will be removed.
674      *
675      *  @param treeString
676      *      the new tree
677      *  @param score
678      *      the score of the new tree
679      *  @param updateStopRule
680      *      Whether or not to update the stop rule
681      *  @return relative position of the new tree to the current best.
682      *      -1 if duplicated
683      *      -2 if the candidate set is not updated
684      */
685     int addTreeToCandidateSet(string treeString, double score, bool updateStopRule, int sourceProcID);
686 
687     /**
688         MPI: synchronize candidate trees between all processes
689         @param nTrees number of trees to broadcast
690         @param updateStopRule true to update stopping rule, false otherwise
691     */
692     void syncCandidateTrees(int nTrees, bool updateStopRule);
693 
694     /**
695         MPI: synchronize tree of current iteration with master
696         will update candidateset_changed
697         @param curTree current tree
698 
699     */
700     void syncCurrentTree();
701 
702     /**
703         MPI: Master sends stop message to all workers
704     */
705     void sendStopMessage();
706 
707     /**
708      *  Generate the initial parsimony/random trees, called by initCandidateTreeSet
709      *  @param nParTrees number of parsimony/random trees to generate
710      */
711     void createInitTrees(int nParTrees);
712 
713     /**
714      *  Generate the initial candidate tree set
715      *  @param nParTrees number of parsimony trees to generate
716      *  @param nNNITrees number of NNI locally optimal trees to generate
717      */
718     void initCandidateTreeSet(int nParTrees, int nNNITrees);
719 
720     /**
721      * Generate the initial tree (usually used for model parameter estimation)
722      */
723     void computeInitialTree(LikelihoodKernel kernel);
724 
725     /**
726      *  @brief: optimize model parameters on the current tree
727      *  either IQ-TREE or PLL
728      *  @param printInfo to print model parameters to the screen or not
729      *  @param epsilon likelihood epsilon for optimization
730      *
731      */
732     string optimizeModelParameters(bool printInfo = false, double epsilon = -1);
733 
734     /**
735      *  variable storing the current best tree topology
736      */
737     topol* pllBestTree;
738 
739     /****** following variables are for ultra-fast bootstrap *******/
740 
741     /** TRUE to save also branch lengths into treels_newick */
742 //    bool save_all_br_lens;
743 
744     /**
745         this keeps the list of intermediate trees.
746         it will be activated if params.avoid_duplicated_trees is TRUE.
747      */
748 //    StringIntMap treels;
749 
750     /** pattern log-likelihood vector for each treels */
751 //    vector<double* > treels_ptnlh;
752 
753     /** OBSOLETE: tree log-likelihood for each treels */
754 //    DoubleVector treels_logl;
755 
756     /** NEWICK string for each treels */
757 //    StrVector treels_newick;
758 
759     /** OBSOLETE: maximum number of distinct candidate trees (tau parameter) */
760 //    int max_candidate_trees;
761 
762     /** log-likelihood threshold (l_min) */
763     double logl_cutoff;
764 
765     /** vector of bootstrap alignments generated */
766     vector<BootValType* > boot_samples;
767 
768     /** starting sample for UFBoot, used for MPI */
769     int sample_start;
770 
771     /** end sample for UFBoot, used for MPI */
772     int sample_end;
773 
774     /** newick string of corresponding bootstrap trees */
775     StrVector boot_trees;
776 
777     /** bootstrap tree strings with branch lengths, for -wbtl option */
778 //    StrVector boot_trees_brlen;
779 
780     /** number of multiple optimal trees per replicate */
781     IntVector boot_counts;
782 
783     /** corresponding RELL log-likelihood */
784     DoubleVector boot_logl;
785 
786     /** corresponding log-likelihood on original alignment */
787     DoubleVector boot_orig_logl;
788 
789     /** Set of splits occurring in bootstrap trees */
790     vector<SplitGraph*> boot_splits;
791 
792     /** log-likelihood of bootstrap consensus tree */
793     double boot_consense_logl;
794 
795     /** Robinson-Foulds distance between contree and ML tree */
796     int contree_rfdist;
797 
798     /** Corresponding map for set of splits occurring in bootstrap trees */
799     //SplitIntMap boot_splits_map;
800 
801     /** summarize all bootstrap trees */
802     void summarizeBootstrap(Params &params, MTreeSet &trees);
803 
804     /** summarize bootstrap trees */
805     virtual void summarizeBootstrap(Params &params);
806 
807     /** summarize bootstrap trees into split set */
808     void summarizeBootstrap(SplitGraph &sg);
809 
810     /**
811      write .ufboot trees file
812      */
813     virtual void writeUFBootTrees(Params &params);
814 
815     /** @return bootstrap correlation coefficient for assessing convergence */
816     double computeBootstrapCorrelation();
817 
818     int getDelete() const;
819     void setDelete(int _delete);
820 
821 protected:
822     /**** NNI cutoff heuristic *****/
823     /**
824      */
825     vector<NNIInfo> nni_info;
826 
827     bool estimate_nni_cutoff;
828 
829     double nni_cutoff;
830 
831     bool nni_sort;
832 
833     bool testNNI;
834 
835     ofstream outNNI;
836 protected:
837 
838     //bool print_tree_lh;
839 
840     //int write_intermediate_trees;
841 
842     ofstream out_treels, out_treelh, out_sitelh, out_treebetter;
843     string treels_name, out_lh_file, site_lh_file;
844 
845     void estimateNNICutoff(Params* params);
846 
847     virtual void saveCurrentTree(double logl); // save current tree
848 
849 
850     void saveNNITrees(PhyloNode *node = NULL, PhyloNode *dad = NULL);
851 
852     int duplication_counter;
853 
854     // MPI: vector of size = num processes, true if master should send candidate set to worker
855     BoolVector candidateset_changed;
856 
857     // true if best candidate tree is changed
858     bool bestcandidate_changed;
859 
860     /**
861             number of IQPNNI iterations
862      */
863     //int iqpnni_iterations;
864 
865     /**
866             bonus values of all branches, used for IQP algorithm
867      */
868     //double *bonus_values;
869 
870     /**
871             delete a set of leaves from tree (with the probability p_delete), assume tree is birfucating
872             @param del_leaves (OUT) the list of deleted leaves
873      */
874     void deleteLeaves(PhyloNodeVector &del_leaves);
875 
876     void deleteNonTabuLeaves(PhyloNodeVector &del_leaves);
877 
878     /**
879      *         delete a set of leaves from tree
880      *         non-cherry leaves are selected first
881      *         @param del_leaves (OUT) the list of deleted leaves
882      */
883     void deleteNonCherryLeaves(PhyloNodeVector &del_leaves);
884 
885     /**
886             reinsert the whole list of leaves back into the tree
887             @param del_leaves the list of deleted leaves, returned by deleteLeaves() function
888      */
889     virtual void reinsertLeaves(PhyloNodeVector &del_leaves);
890 
891     void reinsertLeavesByParsimony(PhyloNodeVector &del_leaves);
892 
893 
894     void doParsimonyReinsertion();
895 
896 
897     /**
898             assess a quartet with four taxa. Current implementation uses the four-point condition
899             based on distance matrix for quick evaluation.
900             @param leaf0 one of the leaf in the existing sub-tree
901             @param leaf1 one of the leaf in the existing sub-tree
902             @param leaf2 one of the leaf in the existing sub-tree
903             @param del_leaf a leaf that was deleted (not in the existing sub-tree)
904             @return 0, 1, or 2 depending on del_leaf should be in subtree containing leaf0, leaf1, or leaf2, respectively
905      */
906     int assessQuartet(Node *leaf0, Node *leaf1, Node *leaf2, Node *del_leaf);
907 
908     /**
909             assess a quartet with four taxa using parsimony
910             @param leaf0 one of the leaf in the existing sub-tree
911             @param leaf1 one of the leaf in the existing sub-tree
912             @param leaf2 one of the leaf in the existing sub-tree
913             @param del_leaf a leaf that was deleted (not in the existing sub-tree)
914             @return 0, 1, or 2 depending on del_leaf should be in subtree containing leaf0, leaf1, or leaf2, respectively
915      */
916     int assessQuartetParsimony(Node *leaf0, Node *leaf1, Node *leaf2,
917             Node *del_leaf);
918 
919     /**
920             assess the important quartets around a virtual root of the tree.
921             This function will assign bonus points to branches by updating the variable 'bonus_values'
922             @param cur_root the current virtual root
923             @param del_leaf a leaf that was deleted (not in the existing sub-tree)
924      */
925     void assessQuartets(vector<RepresentLeafSet*> &leaves_vec, PhyloNode *cur_root, PhyloNode *del_leaf);
926 
927     /**
928             initialize the bonus points to ZERO
929             @param node the root of the sub-tree
930             @param dad dad of 'node', used to direct the recursion
931      */
932     void initializeBonus(PhyloNode *node = NULL, PhyloNode *dad = NULL);
933 
934     /**
935             raise the bonus points for all branches in the subtree rooted at a node
936             @param node the root of the sub-tree
937             @param dad dad of 'node', used to direct the recursion
938      */
939     void raiseBonus(Neighbor *nei, Node *dad, double bonus);
940 
941     /**
942             Bonuses are stored in a partial fashion. This function will propagate the bonus at every branch
943             into the subtree at this branch.
944             @param node the root of the sub-tree
945             @param dad dad of 'node', used to direct the recursion
946             @return the partial bonus of the branch (node -> dad)
947      */
948     double computePartialBonus(Node *node, Node* dad);
949 
950     /**
951             determine the list of branches with the same best bonus point
952             @param best_bonus the best bonus determined by findBestBonus()
953             @param best_nodes (OUT) vector of one ends of the branches with highest bonus point
954             @param best_dads (OUT) vector of the other ends of the branches with highest bonus point
955             @param node the root of the sub-tree
956             @param dad dad of 'node', used to direct the recursion
957      */
958     void findBestBonus(double &best_score, NodeVector &best_nodes, NodeVector &best_dads, Node *node = NULL, Node *dad = NULL);
959 
960     void estDeltaMin();
961 
962     void convertNNI2Splits(SplitIntMap &nniSplits, int numNNIs, vector<NNIMove> &compatibleNNIs);
963 
964     string generateParsimonyTree(int randomSeed);
965 
966     double doTreePerturbation();
967 
968     void estimateLoglCutoffBS();
969 
970     //void estimateNNICutoff(Params &params);
971 
972 public:
973     /**
974      *  Return best tree string from the candidate set
975      *
976      *  @param numTrees
977      *      Number of best trees to return
978      *  @return
979      *      A string vector of trees
980      */
981     vector<string> getBestTrees(int numTrees = 0);
982 
983     /**
984      *  Print the iteration number and the tree score
985      */
986     void printIterationInfo(int sourceProcID);
987 
988     /**
989      *  Return branches that are 2 branches away from the branches, on which NNIs were applied
990      *  in the previous NNI steps.
991      *  @param
992      *      previousNNIBranches[IN] a set of branches on which NNIs were performed in the previous NNI step.
993      *  @return
994      *      a set of branches, on which NNIs should be evaluated for the current NNI steps
995      */
996     Branches getReducedListOfNNIBranches(Branches &previousNNIBranches);
997 
998 
999     // Diep added for UFBoot2-Corr
1000     void refineBootTrees();
1001     bool on_refine_btree;
1002     Alignment* saved_aln_on_refine_btree;
1003     vector<IntVector> boot_samples_int;
1004 };
1005 #endif
1006