1 // 2 // phylotreemixlen.h 3 // iqtree 4 // 5 // Created by Minh Bui on 24/08/15. 6 // 7 // 8 9 #ifndef __iqtree__phylotreemixlen__ 10 #define __iqtree__phylotreemixlen__ 11 12 #include <stdio.h> 13 #ifdef USE_CPPOPTLIB 14 #include "cppoptlib/meta.h" 15 #include "cppoptlib/boundedproblem.h" 16 #endif 17 #include "iqtree.h" 18 19 20 21 /** 22 Phylogenetic tree with mixture of branch lengths 23 Started within joint project with Stephen Crotty 24 */ 25 #ifdef USE_CPPOPTLIB 26 class PhyloTreeMixlen : public IQTree, public cppoptlib::BoundedProblem<double> 27 #else 28 class PhyloTreeMixlen : public IQTree 29 #endif 30 { 31 32 friend class ModelFactoryMixlen; 33 34 public: 35 36 /** 37 default constructor 38 */ 39 PhyloTreeMixlen(); 40 41 PhyloTreeMixlen(Alignment *aln, int mixlen); 42 43 virtual ~PhyloTreeMixlen(); 44 45 /** 46 start structure for checkpointing 47 */ 48 virtual void startCheckpoint(); 49 50 /** 51 save object into the checkpoint 52 */ 53 virtual void saveCheckpoint(); 54 55 /** 56 restore object from the checkpoint 57 */ 58 virtual void restoreCheckpoint(); 59 60 /** 61 allocate a new node. Override this if you have an inherited Node class. 62 @param node_id node ID 63 @param node_name node name 64 @return a new node 65 */ 66 virtual Node* newNode(int node_id = -1, const char* node_name = NULL); 67 68 /** 69 allocate a new node. Override this if you have an inherited Node class. 70 @param node_id node ID 71 @param node_name node name issued by an interger 72 @return a new node 73 */ 74 virtual Node* newNode(int node_id, int node_name); 75 76 /** 77 refactored 2015-12-22: Taxon IDs instead of Taxon names to save space! 78 Read the tree saved with Taxon IDs and branch lengths. 79 @param tree_string tree string to read from 80 @param updatePLL if true, tree is read into PLL 81 */ 82 virtual void readTreeString(const string &tree_string); 83 84 virtual void initializeModel(Params ¶ms, string model_name, ModelsBlock *models_block); 85 86 /** 87 @return true if this is a tree with mixture branch lengths, default: false 88 */ isMixlen()89 virtual bool isMixlen() { return !initializing_mixlen; } 90 91 /** 92 @return number of mixture branch lengths, default: 1 93 */ getMixlen()94 virtual int getMixlen() { 95 if (initializing_mixlen) 96 return 1; 97 else 98 return mixlen; 99 } 100 101 /** 102 set number of mixture branch lengths 103 */ 104 void setMixlen(int mixlen); 105 106 /** 107 @param[out] lenvec tree lengths for each class in mixlen model 108 @param node the starting node, NULL to start from the root 109 @param dad dad of the node, used to direct the search 110 */ 111 virtual void treeLengths(DoubleVector &lenvec, Node *node = NULL, Node *dad = NULL); 112 113 /** 114 * assign branch length as mean over all branch lengths of categories 115 */ 116 void assignMeanMixBranches(Node *node = NULL, Node *dad = NULL); 117 118 119 /** 120 parse the string containing branch length(s) 121 by default, this will parse just one length 122 @param lenstr string containing branch length(s) 123 @param[out] branch_len output branch length(s) 124 */ 125 // virtual void parseBranchLength(string &lenstr, DoubleVector &branch_len); 126 127 /** 128 * internal function called by printTree to print branch length 129 * @param out output stream 130 * @param length_nei target Neighbor to print 131 */ 132 virtual void printBranchLength(ostream &out, int brtype, bool print_slash, Neighbor *length_nei); 133 134 /** 135 print tree to .treefile 136 @param params program parameters, field root is taken 137 */ 138 virtual void printResultTree(string suffix = ""); 139 140 /** 141 initialize mixture branch lengths 142 */ 143 void initializeMixBranches(PhyloNode *node = NULL, PhyloNode *dad = NULL); 144 145 /** initialize parameters if necessary */ 146 void initializeMixlen(double tolerance, bool write_info); 147 148 /** 149 called by fixNegativeBranch to fix one branch 150 @param branch_length new branch length 151 @param dad_branch dad branch 152 @param dad dad node 153 */ 154 virtual void fixOneNegativeBranch(double branch_length, Neighbor *dad_branch, Node *dad); 155 156 /** 157 * IMPORTANT: semantic change: this function does not return score anymore, for efficiency purpose 158 optimize one branch length by ML 159 @param node1 1st end node of the branch 160 @param node2 2nd end node of the branch 161 @param clearLH true to clear the partial likelihood, otherwise false 162 @param maxNRStep maximum number of Newton-Raphson steps 163 @return likelihood score 164 */ 165 virtual void optimizeOneBranch(PhyloNode *node1, PhyloNode *node2, bool clearLH = true, int maxNRStep = 100); 166 167 /** 168 optimize all branch lengths of the tree 169 @param iterations number of iterations to loop through all branches 170 @return the likelihood of the tree 171 */ 172 virtual double optimizeAllBranches(int my_iterations = 100, double tolerance = TOL_LIKELIHOOD, int maxNRStep = 100); 173 174 /** 175 This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value). 176 used by Newton raphson method to minimize the function. 177 Please always override this function to adapt to likelihood or parsimony score. 178 The default is for function f(x) = x^2. 179 @param value x-value of the function 180 @param df (OUT) first derivative 181 @param ddf (OUT) second derivative 182 */ 183 virtual void computeFuncDervMulti(double *value, double *df, double *ddf); 184 185 /** 186 Inherited from Optimization class. 187 This function calculate f(value), first derivative f'(value) and 2nd derivative f''(value). 188 used by Newton raphson method to minimize the function. 189 @param value current branch length 190 @param df (OUT) first derivative 191 @param ddf (OUT) second derivative 192 @return negative of likelihood (for minimization) 193 */ 194 virtual void computeFuncDerv(double value, double &df, double &ddf); 195 196 /** 197 return the number of dimensions 198 */ 199 virtual int getNDim(); 200 201 202 /** 203 the target function which needs to be optimized 204 @param x the input vector x 205 @return the function value at x 206 */ 207 virtual double targetFunk(double x[]); 208 209 /** 210 the approximated derivative function 211 @param x the input vector x 212 @param dfx the derivative at x 213 @return the function value at x 214 */ 215 virtual double derivativeFunk(double x[], double dfx[]); 216 217 /** For Mixlen stuffs */ getCurMixture()218 virtual int getCurMixture() { return cur_mixture; } 219 220 /** 221 * Optimize current tree using NNI 222 * 223 * @return 224 * <number of NNI steps, number of NNIs> done 225 */ 226 virtual pair<int, int> optimizeNNI(bool speedNNI = true); 227 228 /** number of mixture categories */ 229 int mixlen; 230 231 /** current category, for optimizing branch length */ 232 int cur_mixture; 233 234 235 /*************** Using cppoptlib for branch length optimization ***********/ 236 237 #ifdef USE_CPPOPTLIB 238 239 // using typename BoundedProblem<double>::TVector; 240 // using typename BoundedProblem<double>::THessian; 241 242 /** 243 * @brief returns objective value in x 244 * @details [long description] 245 * 246 * @param x [description] 247 * @return [description] 248 */ 249 double value(const TVector &x); 250 251 /** 252 * @brief returns gradient in x as reference parameter 253 * @details should be overwritten by symbolic gradient 254 * 255 * @param grad [description] 256 */ 257 void gradient(const TVector &x, TVector &grad); 258 259 /** 260 * @brief This computes the hessian 261 * @details should be overwritten by symbolic hessian, if solver relies on hessian 262 */ 263 void hessian(const TVector &x, THessian &hessian); 264 265 #endif 266 267 protected: 268 269 /** relative rate, used to initialize branch lengths */ 270 DoubleVector relative_treelen; 271 272 /** true if during intialization phase */ 273 bool initializing_mixlen; 274 275 }; 276 277 #endif /* defined(__iqtree__phylotreemixlen__) */ 278