1 /* 2 * ml.h 3 * 4 * 5 * Part of TREE-PUZZLE 5.2 (July 2004) 6 * 7 * (c) 2003-2004 by Heiko A. Schmidt, Korbinian Strimmer, and Arndt von Haeseler 8 * (c) 1999-2003 by Heiko A. Schmidt, Korbinian Strimmer, 9 * M. Vingron, and Arndt von Haeseler 10 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler 11 * 12 * All parts of the source except where indicated are distributed under 13 * the GNU public licence. See http://www.opensource.org for details. 14 * 15 * ($Id$) 16 * 17 */ 18 19 20 #ifndef _ML_ 21 #define _ML_ 22 23 /* values for mlmode */ 24 #define ML_QUART 1 25 #define ML_TREE 2 26 #define NJ_TREE 3 27 28 #if 0 29 /* values for rhettype */ 30 #define MLUNIFORMRATE 0 31 #define MLGAMMARATE 1 32 #define MLTWORATE 2 33 #define MLMIXEDRATE 3 34 #endif 35 36 /* values for rhetmode */ 37 #define UNIFORMRATE 0 38 #define GAMMARATE 1 39 #define TWORATE 2 40 #define MIXEDRATE 3 41 42 43 /* definitions */ 44 45 #define MINTS 0.20 /* Ts/Tv parameter */ 46 #define MAXTS 30.0 47 #define MINYR 0.10 /* Y/R Ts parameter */ 48 #define MAXYR 6.00 49 #define MINFI 0.00 /* fraction invariable sites */ 50 #define MAXFI 0.99 /* only for input */ 51 #define MINGE 0.01 /* rate heterogeneity parameter */ 52 #define MAXGE 0.99 53 #define MINCAT 4 /* discrete Gamma categories */ 54 #define MAXCAT 16 55 56 #define RMHROOT 5.0 /* upper relative bound for height of root */ 57 #define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */ 58 #define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */ 59 60 #ifdef USE_ADJUSTABLE_EPS 61 /* error in branch length (PAM) = 0.000001 */ 62 # define EPSILON_BRANCH_DEFAULT 0.0001 63 # define EPSILON_BRANCH epsilon_branch 64 EXTERN double epsilon_branch; 65 #else 66 # define EPSILON_BRANCH 0.0001 67 #endif 68 69 #ifdef USE_ADJUSTABLE_EPS 70 /* error in node and root heights */ 71 # define EPSILON_HEIGHTS_DEFAULT 0.0001 72 # define EPSILON_HEIGHTS epsilon_heights 73 EXTERN double epsilon_heights; 74 #else 75 # define EPSILON_HEIGHTS 0.0001 76 #endif 77 78 #ifdef USE_ADJUSTABLE_EPS 79 # if (EXTERN != extern) 80 double epsilon_branch = EPSILON_BRANCH_DEFAULT; 81 double epsilon_heights = EPSILON_HEIGHTS_DEFAULT; 82 # endif 83 #endif 84 85 #define MAXIT 100 /* maximum number of iterates of smoothing */ 86 #define MINFDIFF 0.00002 /* lower limit on base frequency differences */ 87 #define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */ 88 #define NUMQBRNCH 5 /* number of branches in a quartet */ 89 #define NUMQIBRNCH 1 /* number of internal branches in a quartet */ 90 #define NUMQSPC 4 /* number of sequences in a quartet */ 91 92 /* 2D minimisation */ 93 94 #ifdef USE_ADJUSTABLE_EPS 95 /* epsilon substitution process estimation */ 96 # define EPSILON_SUBSTPARAM_DEFAULT 0.01 97 # define EPSILON_SUBSTPARAM epsilon_substparam 98 EXTERN double epsilon_substparam; 99 #else 100 # define EPSILON_SUBSTPARAM 0.01 101 #endif 102 103 #ifdef USE_ADJUSTABLE_EPS 104 /* epsilon rate heterogeneity estimation */ 105 # define EPSILON_RATEPARAM_DEFAULT 0.01 106 # define EPSILON_RATEPARAM epsilon_rateparam 107 EXTERN double epsilon_rateparam; 108 #else 109 # define EPSILON_RATEPARAM 0.01 110 #endif 111 112 #ifdef USE_ADJUSTABLE_EPS 113 # if (EXTERN != extern) 114 double epsilon_substparam = EPSILON_SUBSTPARAM_DEFAULT; 115 double epsilon_rateparam = EPSILON_RATEPARAM_DEFAULT; 116 # endif 117 #endif 118 119 /* quartet series */ 120 #define MINPERTAXUM 2 121 #define MAXPERTAXUM 6 122 #define TSDIFF 0.20 123 #define YRDIFF 0.10 124 125 /* type definitions */ 126 127 typedef struct node 128 { 129 struct node *isop; /* pointers within node (nodes) */ 130 struct node *kinp; /* pointers between nodes (branches) */ 131 int descen; 132 int number; 133 double length; /* branch length inferred (no clock) */ 134 double lengthc; /* branch length inferred (clock) */ 135 double lengthext; /* external length from usertree */ 136 int lengthset; /* length set by usertree */ 137 double varlen; 138 double height; 139 double varheight; 140 ivector paths; /* path directions for Korbi's p-step */ 141 cvector eprob; 142 dcube partials; /* partial likelihoods */ 143 char *label; /* internal labels */ 144 } Node; 145 146 typedef struct tree 147 { 148 Node *rootp; 149 Node **ebrnchp; /* list of pointers to external branches */ 150 Node **ibrnchp; /* list of pointers to internal branches */ 151 double lklhd; /* total log-likelihood */ 152 double lklhdc; /* total log-likelihood clock */ 153 dmatrix condlkl; /* lhs for each pattern and non-zero rate */ 154 double rssleast; 155 } Tree; 156 157 158 /* global variables */ 159 160 EXTERN Node *chep; /* pointer to current height node */ 161 EXTERN Node *rootbr; /* pointer to root branch */ 162 EXTERN Node **heights; /* pointer to height nodes in unrooted tree */ 163 EXTERN int Numhts; /* number of height nodes in unrooted tree */ 164 EXTERN double hroot; /* height of root */ 165 EXTERN double varhroot; /* variance of height of root */ 166 EXTERN double maxhroot; /* maximal height of root */ 167 EXTERN int locroot; /* location of root */ 168 EXTERN int numbestroot; /* number of best locations for root */ 169 EXTERN int clockmode; /* clocklike vs. nonclocklike computation */ 170 EXTERN cmatrix Identif; /* sequence names */ 171 EXTERN cmatrix Namestr; /* sequence names (delimited by '\0') */ 172 EXTERN cmatrix Seqchar; /* ML sequence data */ 173 EXTERN cmatrix Seqpat; /* ordered site patterns */ 174 EXTERN ivector constpat; /* indicates constant site patterns */ 175 EXTERN cvector seqchi; 176 EXTERN cvector seqchj; 177 EXTERN dcube partiali; 178 EXTERN dcube partialj; 179 EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates*/ 180 EXTERN dmatrix Distanmat; /* matrix with maximum likelihood distances */ 181 EXTERN dmatrix Evec; /* Eigenvectors */ 182 EXTERN dmatrix Ievc; /* Inverse eigenvectors */ 183 EXTERN double TSparam; /* Ts/Tv parameter */ 184 EXTERN double GTR_ACrate; /* A <-> G mutation rate for GTR */ 185 EXTERN double GTR_AGrate; /* A <-> C mutation rate for GTR */ 186 EXTERN double GTR_ATrate; /* A <-> T mutation rate for GTR */ 187 EXTERN double GTR_CGrate; /* C <-> G mutation rate for GTR */ 188 EXTERN double GTR_CTrate; /* C <-> T mutation rate for GTR */ 189 EXTERN double GTR_GTrate; /* G <-> T mutation rate for GTR */ 190 EXTERN double tsmean, yrmean; 191 EXTERN double YRparam; /* Y/R Ts parameter */ 192 EXTERN double geerr; /* estimated error of rate heterogeneity */ 193 EXTERN double Geta; /* rate heterogeneity parameter */ 194 EXTERN double fracconst; /* fraction of constant sites */ 195 EXTERN double fracconstpat;/* fraction of constant patterns */ 196 EXTERN double Proportion; /* for tree drawing */ 197 EXTERN double tserr; /* estimated error of TSparam */ 198 EXTERN double yrerr; /* estimated error of YRparam */ 199 EXTERN double fracinv; /* fraction of invariable sites */ 200 EXTERN double fierr; /* estimated error of fracinv */ 201 EXTERN dvector Brnlength; 202 EXTERN dvector Distanvec; 203 EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */ 204 EXTERN dvector Freqtpm; /* base frequencies */ 205 EXTERN dvector Rates; /* rate of each of the categories */ 206 EXTERN dmatrix iexp; 207 EXTERN imatrix Basecomp; /* base composition of each taxon */ 208 EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */ 209 EXTERN int numtc; /* auxiliary variable for printing rooted tree */ 210 EXTERN int qcalg_optn; /* use quartet subsampling algorithm */ 211 EXTERN int approxp_optn;/* approximate parameter estimation */ 212 EXTERN int chi2fail; /* flag for chi2 test */ 213 EXTERN int Converg; /* flag for ML convergence (no clock) */ 214 EXTERN int Convergc; /* flag for ML convergence (clock) */ 215 EXTERN int data_optn; /* type of sequence input data */ 216 EXTERN int Dayhf_optn; /* Dayhoff model */ 217 EXTERN int HKY_optn; /* use HKY model */ 218 EXTERN int Jtt_optn; /* JTT model */ 219 EXTERN int print_GTR_optn; /* print (restricted) GTR matrix */ 220 EXTERN int GTR_optn; /* GTR model */ 221 EXTERN int blosum62_optn; /* BLOSUM 62 model */ 222 EXTERN int mtrev_optn; /* mtREV model */ 223 EXTERN int cprev_optn; /* cpREV model */ 224 EXTERN int vtmv_optn; /* VT model */ 225 EXTERN int wag_optn; /* WAG model */ 226 EXTERN int Maxsite; /* number of ML characters per taxum */ 227 EXTERN int Maxspc; /* number of sequences */ 228 EXTERN int mlmode; /* quartet ML or user defined tree ML */ 229 EXTERN int nuc_optn; /* nucleotide (4x4) models */ 230 EXTERN int Numbrnch; /* number of branches of current tree */ 231 EXTERN int numcats; /* number of rate categories */ 232 EXTERN int Numconst; /* number of constant sites */ 233 EXTERN int Numconstpat; /* number of constant patterns */ 234 EXTERN int Numibrnch; /* number of internal branches of current tree */ 235 EXTERN int Numitc; /* number of ML iterations assumning clock */ 236 EXTERN int Numit; /* number of ML iterations if convergence */ 237 EXTERN int Numptrn; /* number of site patterns */ 238 EXTERN int Numspc; /* number of sequences of current tree */ 239 EXTERN int optim_optn; /* optimize model parameters */ 240 EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */ 241 EXTERN int SH_optn; /* SH nucleotide (16x16) model */ 242 EXTERN int TN_optn; /* use TN model */ 243 EXTERN int tpmradix; /* number of different states */ 244 EXTERN int fracinv_optim; /* optimize fraction of invariable sites */ 245 EXTERN int typ_optn; /* type of PUZZLE analysis */ 246 EXTERN ivector Weight; /* weight of each site pattern */ 247 EXTERN Tree *Ctree; /* pointer to current tree */ 248 EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */ 249 EXTERN ivector Alias; /* link site -> corresponding site pattern */ 250 EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites*/ 251 252 EXTERN int bestratefound; /* best rate per site already reconstructed ? */ 253 254 /* function prototypes of all ml function */ 255 256 void convfreq(dvector); 257 void tp_radixsort(cmatrix, ivector, int, int, int *); 258 void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int); 259 void countconstantsites(cmatrix, ivector, int, int, int *, int*); 260 void evaluateseqs(void); 261 void elmhes(dmatrix, ivector, int); 262 void eltran(dmatrix, dmatrix, ivector, int); 263 void mcdiv(double, double, double, double, double *, double *); 264 void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector); 265 void onepamratematrix(dmatrix); 266 void eigensystem(dvector, dmatrix); 267 void luinverse(dmatrix, dmatrix, int); 268 void checkevector(dmatrix, dmatrix, int); 269 void tranprobmat(void); 270 void tprobmtrx(double, dmatrix); 271 double comptotloglkl(dmatrix); 272 void allsitelkl(dmatrix, dvector); 273 void writesitelklmatrixphy(FILE *outf, int numut, int numsite, dmatrix allslkl, dmatrix allslklc, int clock); 274 void writesitelklvectorphy(FILE *outf, char *name, int numsite, dvector aslkl); 275 /* void writesitelklbin(FILE *, dvector); */ /* TODO */ 276 double pairlkl(double); 277 double mldistance(int, int, double); /*epe*/ 278 /* double mldistance(int, int); */ 279 void initdistan(void); 280 void computedistan(void); 281 void productpartials(Node *); 282 void partialsinternal(Node *); 283 void partialsexternal(Node *); 284 void initpartials(Tree *); 285 double intlkl(double); 286 void optinternalbranch(Node *); 287 double extlkl(double); 288 void optexternalbranch(Node *); 289 void finishlkl(Node *); 290 double optlkl(Tree *); 291 double treelkl(Tree *); 292 void luequation(dmatrix, dvector, int); 293 void lslength(Tree *, dvector, int, int, dvector); 294 #if PARALLEL 295 void lslength_par(Tree *, dvector, int, int, dvector); 296 #endif 297 void setextlength(Tree *, int, int, dvector); 298 void getusertree(FILE *, cvector, int, int); 299 Node *internalnode(Tree *, char **, int *); 300 void constructtree(Tree *, cvector, int); 301 void removebasalbif(cvector); 302 void makeusertree(FILE *, int); 303 Tree *new_tree(int, int, cmatrix); 304 Tree *new_quartet(int, cmatrix); 305 void free_tree(Tree *, int); 306 void make_quartet(int, int, int, int); 307 void changedistan(dmatrix, dvector, int); 308 double quartet_lklhd(int, int, int, int); 309 double quartet_alklhd(int, int, int, int); 310 void readusertree(FILE *, int); 311 double usertree_lklhd(int, int); 312 double usertree_alklhd(int, int); 313 void mlstart(void); 314 void distupdate(int, int, int, int); 315 void mlfinish(void); 316 void prbranch(Node *, int, int, int, ivector, ivector, FILE *); 317 void getproportion(double *, dvector, int); 318 void prtopology(FILE *); 319 void fputphylogeny(FILE *); 320 void resulttree(FILE *outfp, int usebranch); 321 void njtree(FILE *); 322 void njdistantree(Tree *); 323 void findbestratecombination(void); 324 void printbestratecombination(FILE *); 325 void printbestratecombinationtofile(FILE *, int); 326 int checkedge(int); 327 void fputsubstree(FILE *, Node *); 328 void fputrooted(FILE *, int); 329 void findheights(Node *); 330 void initclock(int); 331 double clock_alklhd(int); 332 double heightlkl(double); 333 void optheight(void); 334 double rheightlkl(double); 335 void optrheight(void); 336 double clock_lklhd(int); 337 int findrootedge(void); 338 void resultheights(FILE *); 339 340 /* mlparam.c */ 341 double homogentest(int); 342 void YangDiscreteGamma(double, int, double *); 343 void updaterates(void); 344 void computestat(double *, int, double *, double *); 345 double quartetml(int, int, int, int); 346 double opttsq(double); 347 double optyrq(double); 348 void optimseqevolparamsquart(void); 349 double opttst(double); 350 double optyrt(double); 351 void optimseqevolparamstree(void); 352 double optfi(double); 353 double optge(double); 354 void optimrateparams(void); 355 356 void estimateparametersnotree(void); /* moved from puzzle1.c */ 357 void estimateparameterstree(void); /* moved from puzzle1.c */ 358 /* mlparam.c end */ 359 360 int gettpmradix(void); 361 void rtfdata(dmatrix, double *); 362 int code2int(cvector, int *, int *); 363 char *int2code(int); 364 365 void jttdata(dmatrix, double *); 366 void dyhfdata(dmatrix, double *); 367 void mtrevdata(dmatrix, double *); 368 void cprev45data(dmatrix, double *); 369 void blosum62data(dmatrix, double *); 370 void vtmvdata(dmatrix, double *); 371 void wagdata(dmatrix, double *); 372 373 #endif 374