1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13 #include <config.h>
14
15 #ifndef UTILITIES_H
16 #define UTILITIES_H
17
18 #define _POSIX_C_SOURCE 200112L
19
20 #include <stdio.h>
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <math.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <time.h>
27 #include <limits.h>
28 #include <errno.h>
29 #include <float.h>
30 #include <assert.h>
31 #include <stdbool.h>
32 #include <signal.h>
33 /* #include <malloc/stdlib.h> */
34 /* #include <stdlib.h> */
35
36 #if (defined(__AVX__))
37 #include <xmmintrin.h>
38 #include <pmmintrin.h>
39 #include <immintrin.h>
40 #elif (defined(__SSE3__))
41 #include <emmintrin.h>
42 #include <pmmintrin.h>
43 #endif
44
45
46 #if !defined(__FMA__) && defined(__AVX2__)
47 #define __FMA__ 1
48 #endif
49
50 extern int n_sec1;
51 extern int n_sec2;
52
53 #define __FUNCTION__ NULL
54
55 #define For(i,n) for(i=0; i<n; i++)
56 #define Fors(i,n,s) for(i=0; i<n; i+=s)
57 #define PointGamma(prob,alpha,beta) PointChi2(prob,2.0*(alpha))/(2.0*(beta))
58 #define SHFT2(a,b,c) (a)=(b);(b)=(c);
59 #define SHFT3(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
60 #define SIGN(a,b) ((b) > 0.0 ? fabs(a) : -fabs(a))
61 #define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
62
63 #ifndef MAX
64 #define MAX(a,b) ((a)>(b)?(a):(b))
65 #endif
66
67 #ifndef MIN
68 #define MIN(a,b) ((a)<(b)?(a):(b))
69 #endif
70
71 #define READ 0
72 #define WRITE 1
73 #define APPEND 2
74 #define READWRITE 3
75
76 #ifndef isnan
77 # define isnan(x) \
78 (sizeof (x) == sizeof (long double) ? isnan_ld (x) \
79 : sizeof (x) == sizeof (double) ? isnan_d (x) \
80 : isnan_f (x))
isnan_f(float x)81 static inline int isnan_f (float x) { return x != x; }
isnan_d(double x)82 static inline int isnan_d (double x) { return x != x; }
isnan_ld(long double x)83 static inline int isnan_ld (long double x) { return x != x; }
84 #endif
85
86 #ifndef isinf
87 # define isinf(x) \
88 (sizeof (x) == sizeof (long double) ? isinf_ld (x) \
89 : sizeof (x) == sizeof (double) ? isinf_d (x) \
90 : isinf_f (x))
isinf_f(float x)91 static inline int isinf_f (float x) { return isnan (x - x); }
isinf_d(double x)92 static inline int isinf_d (double x) { return isnan (x - x); }
isinf_ld(long double x)93 static inline int isinf_ld (long double x) { return isnan (x - x); }
94 #endif
95
96
97 extern int CALL;
98 extern int TIME;
99
100 #define SLFV_GAUSSIAN 0 /* Spatial Lambda-Fleming-Viot model (Gaussian) */
101 #define SLFV_UNIFORM 1 /* Spatial Lambda-Fleming-Viot model (Uniform) */
102 #define RW 2 /* standard Brownian diffusion model in phylogeography */
103 #define RRW 3 /* Lemey's relaxed random walk */
104
105 #define AC 0
106 #define AG 1
107 #define AT 2
108 #define CG 3
109 #define CT 4
110 #define GT 5
111
112 #if (defined __AVX__)
113 #define BYTE_ALIGN 32
114 #elif (defined __SSE3__)
115 #define BYTE_ALIGN 16
116 #else
117 #define BYTE_ALIGN 1
118 #endif
119
120 #ifndef M_1_SQRT_2PI
121 #define M_1_SQRT_2PI 0.398942280401432677939946059934 /* 1/sqrt(2pi) */
122 #endif
123
124 // verbose levels
125 #define VL0 0
126 #define VL1 1
127 #define VL2 2
128 #define VL3 3
129 #define VL4 4
130
131 #define T_MAX_MCMC_MOVE_NAME 500
132
133 #define WINDOW_WIDTH 800
134 #define WINDOW_HEIGHT 800
135
136 #define ALPHA_MIN 0.01
137 #define ALPHA_MAX 1000.
138
139 #define E_FRQ_WEIGHT_MIN 0.01
140 #define E_FRQ_WEIGHT_MAX 100.
141
142 #define R_MAT_WEIGHT_MIN 0.01
143 #define R_MAT_WEIGHT_MAX 100.
144
145 #define E_FRQ_MIN 0.01
146 #define E_FRQ_MAX 0.99
147
148 #define UNSCALED_E_FRQ_MIN -1000.
149 #define UNSCALED_E_FRQ_MAX +10.
150
151
152 #define TSTV_MIN 0.05
153 #define TSTV_MAX 20.0
154
155 #define PINV_MIN 0.00001
156 #define PINV_MAX 0.99999
157
158 #define RR_MIN 0.01
159 #define RR_MAX 100.0
160
161 #define UNSCALED_RR_MIN log(RR_MIN)
162 #define UNSCALED_RR_MAX log(RR_MAX)
163
164
165 #define GAMMA_RR_UNSCALED_MIN 0.01
166 #define GAMMA_RR_UNSCALED_MAX 200.
167
168 #define GAMMA_R_PROBA_UNSCALED_MIN 0.01
169 #define GAMMA_R_PROBA_UNSCALED_MAX 200.
170
171 #define PHYREX_UNIFORM 0
172 #define PHYREX_NORMAL 1
173
174 #define MCMC_MOVE_RANDWALK_UNIFORM 0
175 #define MCMC_MOVE_LOG_RANDWALK_UNIFORM 1
176 #define MCMC_MOVE_RANDWALK_NORMAL 2
177 #define MCMC_MOVE_LOG_RANDWALK_NORMAL 3
178 #define MCMC_MOVE_SCALE_THORNE 4
179 #define MCMC_MOVE_SCALE_GAMMA 5
180
181 #define N_MAX_MOVES 50
182
183 #define N_MAX_NEX_COM 20
184 #define T_MAX_NEX_COM 100
185 #define N_MAX_NEX_PARM 50
186 #define T_MAX_TOKEN 200
187 #define T_MAX_ID_COORD 10
188 #define T_MAX_ID_DISK 10
189 #define T_MAX_KEY 200
190 #define T_MAX_VAL 1000
191
192
193 #define N_MAX_MIXT_CLASSES 1000
194
195 #define NEXUS_COM 0
196 #define NEXUS_PARM 1
197 #define NEXUS_EQUAL 2
198 #define NEXUS_VALUE 3
199 #define NEXUS_SPACE 4
200
201 #define NNI_MOVE 0
202 #define SPR_MOVE 1
203 #define BEST_OF_NNI_AND_SPR 2
204
205 #define M_1_SQRT_2_PI 0.398942280401432677939946059934
206 #define M_SQRT_32 5.656854249492380195206754896838
207 #define PI 3.14159265358979311600
208 #define SQRT2PI 2.50662827463100024161
209 #define LOG2PI 1.83787706640934533908
210 #define LOG2 0.69314718055994528623
211 #define LOG_SQRT_2_PI 0.918938533204672741780329736406
212
213 #define NORMAL 1
214 #define EXACT 2
215
216 #define PHYLIP 0
217 #define NEXUS 1
218 #define IBDSIM 2
219
220 #ifndef YES
221 #define YES 1
222 #endif
223
224 #ifndef NO
225 #define NO 0
226 #endif
227
228 #ifndef TRUE
229 #define TRUE 1
230 #endif
231
232 #ifndef FALSE
233 #define FALSE 0
234 #endif
235
236 #define ON 1
237 #define OFF 0
238
239 #define SMALL_DBL 1.E-20
240
241 #define NT 0 /*! nucleotides */
242 #define AA 1 /*! amino acids */
243 #define GENERIC 2 /*! custom alphabet */
244 #define UNDEFINED -1
245
246 #define ACGT 0 /*! A,G,G,T encoding */
247 #define RY 1 /*! R,Y encoding */
248
249 #define INTERFACE_DATA_TYPE 0
250 #define INTERFACE_MULTIGENE 1
251 #define INTERFACE_MODEL 2
252 #define INTERFACE_TOPO_SEARCH 3
253 #define INTERFACE_BRANCH_SUPPORT 4
254
255 #define LEFT 0
256 #define RGHT 1
257
258
259 #ifndef INFINITY
260 #define INFINITY HUGE
261 #endif
262
263 #define MAX_N_CAL 100
264 #define N_MAX_OPTIONS 100
265
266 #define NEXT_BLOCK_SIZE 50
267
268 #define T_MAX_FILE 200
269 #define T_MAX_LINE 2000000
270 #define T_MAX_NAME 1000
271 #define T_MAX_ID 20
272 #define T_MAX_SEQ 2000000
273 #define T_MAX_OPTION 100
274 #define T_MAX_STATE 5
275
276 #define NODE_DEG_MAX 2000
277 #define BRENT_IT_MAX 1000
278 #define BRENT_CGOLD 0.3819660
279 #define BRENT_ZEPS 1.e-10
280 #define MNBRAK_GOLD 1.618034
281 #define MNBRAK_GLIMIT 100.0
282 #define MNBRAK_TINY 1.e-20
283 #define BL_START 1.e-04
284 #define GOLDEN_R 0.61803399
285 #define GOLDEN_C (1.0-GOLDEN_R)
286 #define N_MAX_INSERT 20
287 #define N_MAX_OTU 4000
288 #define UNLIKELY -1.e20
289 #define NJ_SEUIL 0.1
290 #define ROUND_MAX 100
291 #define DIST_MAX 2.00
292 #define DIST_MIN 1.e-10
293 #define AROUND_LK 50.0
294 #define PROP_STEP 1.0
295 #define T_MAX_ALPHABET 22
296 #define MDBL_MIN FLT_MIN
297 #define MDBL_MAX FLT_MAX
298 #define POWELL_ITMAX 200
299 #define LINMIN_TOL 2.0E-04
300 #define SCALE_POW 10 /*! Scaling factor will be 2^SCALE_POW or 2^(-SCALE_POW) [[ WARNING: SCALE_POW < 31 ]]*/
301 #define DEFAULT_SIZE_SPR_LIST 20
302 #define NEWICK 0
303 #define NEXUS 1
304 #define OUTPUT_TREE_FORMAT NEWICK
305 #define MAX_PARS 1000000000
306
307 #define LIM_SCALE_VAL 1.E-50 /*! Scaling limit (deprecated) */
308
309 #define MIN_CLOCK_RATE 1.E-10
310
311 #define MIN_VAR_BL 1.E-8
312 #define MAX_VAR_BL 1.E+3
313
314 #define JC69 1
315 #define K80 2
316 #define F81 3
317 #define HKY85 4
318 #define F84 5
319 #define TN93 6
320 #define GTR 7
321 #define CUSTOM 8
322
323 #define WAG 11
324 #define DAYHOFF 12
325 #define JTT 13
326 #define BLOSUM62 14
327 #define MTREV 15
328 #define RTREV 16
329 #define CPREV 17
330 #define DCMUT 18
331 #define VT 19
332 #define MTMAM 20
333 #define MTART 21
334 #define HIVW 22
335 #define HIVB 23
336 #define FLU 24
337 #define CUSTOMAA 25
338 #define LG 26
339 #define AB 27
340 // Amino acid ordering:
341 // Ala Arg Asn Asp Cys Gln Glu Gly His Ile Leu Lys Met Phe Pro Ser Thr Trp Tyr Val
342
343 #define COALESCENT 0
344
345 #define COMPOUND_COR 0
346 #define COMPOUND_NOCOR 1
347 #define EXPONENTIAL 2
348 #define LOGNORMAL 3
349 #define THORNE 4
350 #define GUINDON 5
351 #define STRICTCLOCK 6
352 #define BIRTHDEATH 7
353 #define NONE -1
354
355 #define ALRTSTAT 1
356 #define ALRTCHI2 2
357 #define MINALRTCHI2SH 3
358 #define SH 4
359 #define ABAYES 5
360
361
362 /* /\* Uncomment the lines below to switch to single precision *\/ */
363 /* typedef float phydbl; */
364 /* #define LOG logf */
365 /* #define POW powf */
366 /* #define EXP expf */
367 /* #define FABS fabsf */
368 /* #define SQRT sqrtf */
369 /* #define CEIL ceilf */
370 /* #define FLOOR floorf */
371 /* #define RINT rintf */
372 /* #define ROUND roundf */
373 /* #define TRUNC truncf */
374 /* #define COS cosf */
375 /* #define SIN sinf */
376 /* #define TAN tanf */
377 /* #define SMALL FLT_MIN */
378 /* #define BIG FLT_MAX */
379 /* #define SMALL_PIJ 1.E-10 */
380 /* #define BL_MIN 1.E-5 */
381 /* #define P_LK_LIM_INF 2.168404e-19 /\* 2^-62 *\/ */
382 /* #define P_LK_LIM_SUP 4.611686e+18 /\* 2^62 *\/ */
383
384 /* Uncomment the line below to switch to double precision */
385 typedef double phydbl;
386 #define LOG log
387 #define POW pow
388 #define EXP exp
389 #define FABS fabs
390 #define SQRT sqrt
391 #define CEIL ceil
392 #define FLOOR floor
393 #define RINT rint
394 #define ROUND round
395 #define TRUNC trunc
396 #define COS cos
397 #define SIN sin
398 #define TAN tan
399 #define SMALL DBL_MIN
400 #define BIG DBL_MAX
401 #define SMALL_PIJ 1.E-100
402 #define LOGBIG 690.
403 #define LOGSMALL -690.
404
405
406 #if !(defined PHYTIME)
407 #define BL_MIN 1.E-8
408 #define BL_MAX 100.
409 #else
410 #define BL_MIN 1.E-6
411 #define BL_MAX 1.
412 #endif
413
414 // Do *not* change the values below and leave the lines with
415 // curr_scaler_pow = (int)(-XXX.-LOG(smallest_p_lk))/LOG2;
416 // as XXX depends on what the value of P_LK_LIM_INF is
417 #define P_LK_LIM_INF 3.054936e-151 /* 2^-500 */
418 #define P_LK_LIM_SUP 3.273391e+150 /* 2^500 */
419 /* #define P_LK_LIM_INF 1.499697e-241 /\* 2^-800 *\/ */
420 /* #define P_LK_LIM_SUP 6.668014e+240 /\* 2^800 *\/ */
421
422 /* #define LARGE 100 */
423 /* #define TWO_TO_THE_LARGE 1267650600228229401496703205376.0 /\* 2^100 In R : sprintf("%.100f", 2^100)*\/ */
424
425 /* #define LARGE 200 */
426 /* #define TWO_TO_THE_LARGE 1606938044258990275541962092341162602522202993782792835301376.0 /\* 2^200 In R : sprintf("%.100f", 2^256)*\/ */
427
428 #define LARGE 256
429 #define TWO_TO_THE_LARGE 115792089237316195423570985008687907853269984665640564039457584007913129639936.0 /* 2^256 In R : sprintf("%.100f", 2^256)*/
430
431 /* #define LARGE 128 */
432 /* #define TWO_TO_THE_LARGE 340282366920938463463374607431768211456.0 /\* 2^128 In R : sprintf("%.100f", 2^128)*\/ */
433
434 /* #define LARGE 320 */
435 /* #define TWO_TO_THE_LARGE 2135987035920910082395021706169552114602704522356652769947041607822219725780640550022962086936576.0 */
436
437 #define INV_TWO_TO_THE_LARGE (1./TWO_TO_THE_LARGE)
438
439 #define SCALE_RATE_SPECIFIC 1
440 #define SCALE_FAST 2
441
442 #define SCALENO 0
443 #define SCALEYES 1
444
445 #define T_MAX_XML_TAG 64
446
447 #define NARGS_SEQ(_1,_2,_3,_4,_5,_6,_7,_8,N,...) N
448 #define NARGS(...) NARGS_SEQ(__VA_ARGS__, 8, 7, 6, 5, 4, 3, 2, 1)
449 #define PRIMITIVE_CAT(x, y) x ## y
450 #define CAT(x, y) PRIMITIVE_CAT(x, y)
451 #define FOR_EACH(macro, ...) CAT(FOR_EACH_, NARGS(__VA_ARGS__))(macro, __VA_ARGS__)
452 #define FOR_EACH_1(m, x1) m(x1)
453 #define FOR_EACH_2(m, x1, x2) m(x1) m(x2)
454 #define FOR_EACH_3(m, x1, x2, x3) m(x1) m(x2) m(x3)
455 #define FOR_EACH_4(m, x1, x2, x3, x4) m(x1) m(x2) m(x3) m(x4)
456 #define FOR_EACH_5(m, x1, x2, x3, x4, x5) m(x1) m(x2) m(x3) m(x4) m(x5)
457 #define FOR_EACH_6(m, x1, x2, x3, x4, x5, x6) m(x1) m(x2) m(x3) m(x4) m(x5) m(x6)
458 #define FOR_EACH_7(m, x1, x2, x3, x4, x5, x6, x7) m(x1) m(x2) m(x3) m(x4) m(x5) m(x6) m(x7)
459 #define FOR_EACH_8(m, x1, x2, x3, x4, x5, x6, x7, x8) m(x1) m(x2) m(x3) m(x4) m(x5) m(x6) m(x7) m(x8)
460 #define DUMP_EACH_INT(v) fprintf(stderr,"\n\t\tDEBUG:%s:\t\t%s--->%i",__PRETTY_FUNCTION__,#v,(v));fflush(stderr);
461 #define DUMP_EACH_STRING(v) fprintf(stderr,"\n\t\tDEBUG:%s:\t\t%s--->%s",__PRETTY_FUNCTION__,#v,(v));fflush(stderr);
462 #define DUMP_EACH_DECIMAL(v) fprintf(stderr,"\n\t\tDEBUG:%s:\t\t%s--->%f",__PRETTY_FUNCTION__,#v,(v));fflush(stderr);
463 #define DUMP_EACH_SCI(v) fprintf(stderr,"\n\t\tDEBUG:%s:\t\t%s--->%e",__PRETTY_FUNCTION__,#v,(v));fflush(stderr);
464 #define DUMP_I(...) FOR_EACH(DUMP_EACH_INT, __VA_ARGS__)
465 #define DUMP_S(...) FOR_EACH(DUMP_EACH_STRING, __VA_ARGS__)
466 #define DUMP_D(...) FOR_EACH(DUMP_EACH_DECIMAL, __VA_ARGS__)
467 #define DUMP_E(...) FOR_EACH(DUMP_EACH_SCI, __VA_ARGS__)
468
469 /*!********************************************************/
470 // Generic linked list
471 typedef struct __Generic_LL {
472 void *v;
473 struct __Generic_LL *next;
474 struct __Generic_LL *prev;
475 struct __Generic_LL *tail;
476 struct __Generic_LL *head;
477 }t_ll;
478
479 /*!********************************************************/
480
481 typedef struct __Scalar_Int {
482 int v;
483 struct __Scalar_Int *next;
484 struct __Scalar_Int *prev;
485 }scalar_int;
486
487
488 /*!********************************************************/
489
490 typedef struct __Scalar_Dbl {
491 phydbl v;
492 bool onoff;
493 struct __Scalar_Dbl *next;
494 struct __Scalar_Dbl *prev;
495 }scalar_dbl;
496
497 /*!********************************************************/
498
499 typedef struct __Vect_Int {
500 int *v;
501 int len;
502 struct __Vect_Int *next;
503 struct __Vect_Int *prev;
504 }vect_int;
505
506
507 /*!********************************************************/
508
509 typedef struct __Vect_Dbl {
510 phydbl *v;
511 int len;
512 struct __Vect_Dbl *next;
513 struct __Vect_Dbl *prev;
514 }vect_dbl;
515
516 /*!********************************************************/
517
518 typedef struct __String {
519 char *s;
520 int len;
521 struct __String *next;
522 struct __String *prev;
523 }t_string;
524
525 /*!********************************************************/
526
527 typedef struct __Node {
528 struct __Node **v; /*! table of pointers to neighbor nodes. Dimension = 3 */
529 struct __Node ***bip_node; /*! three lists of pointer to tip nodes. One list for each direction */
530 struct __Edge **b; /*! table of pointers to neighbor branches */
531 struct __Node *anc; /*! direct ancestor t_node (for rooted tree only) */
532 struct __Node *ext_node;
533 struct __Node *match_node;
534 struct __Align *c_seq; /*! corresponding compressed sequence */
535 struct __Align *c_seq_anc; /*! corresponding compressed ancestral sequence */
536
537
538 struct __Node *next; /*! tree->a_nodes[i]->next <=> tree->next->a_nodes[i] */
539 struct __Node *prev; /*! See above */
540 struct __Node *next_mixt; /*! Next mixture tree*/
541 struct __Node *prev_mixt; /*! Parent mixture tree */
542 struct __Calibration **cal; /*! List of calibration constraints attached to this node */
543 struct __Lindisk_Node *ldsk; /*! Used in PhyREX. Lineage/Disk this node corresponds to */
544 struct __Node *rk_next; /*! Next node in the list of ranked nodes (from oldest to youngest) */
545 struct __Node *rk_prev; /*! Previous node in the list of ranked nodes (from oldest to youngest) */
546 struct __Label *label;
547
548
549 int *bip_size; /*! Size of each of the three lists from bip_node */
550 int num; /*! t_node number */
551 int tax; /*! tax = 1 -> external node, else -> internal t_node */
552
553 char *name; /*! taxon name (if exists) */
554 char *ori_name; /*! taxon name (if exists) */
555 int n_cal; /*! Number of calibration constraints */
556
557 phydbl *score; /*! score used in BioNJ to determine the best pair of nodes to agglomerate */
558 phydbl dist_to_root; /*! distance to the root t_node */
559
560 short int common;
561 phydbl y_rank;
562 phydbl y_rank_ori;
563 phydbl y_rank_min;
564 phydbl y_rank_max;
565
566 int *s_ingrp; /*! does the subtree beneath belong to the ingroup */
567 int *s_outgrp; /*! does the subtree beneath belong to the outgroup */
568
569 int id_rank; /*! order taxa alphabetically and use id_rank to store the ranks */
570 int rank;
571 int rank_max;
572
573 struct __Geo_Coord *coord;
574
575 }t_node;
576
577
578 /*!********************************************************/
579
580 typedef struct __Edge {
581 /*!
582 syntax : (node) [edge]
583 (left_1) . .(right_1)
584 \ (left) (right) /
585 \._____________./
586 / [b_fcus] \
587 / \
588 (left_2) . .(right_2)
589
590 */
591
592 struct __Node *left,*rght; /*! t_node on the left/right side of the t_edge */
593 short int l_r,r_l,l_v1,l_v2,r_v1,r_v2;
594 /*! these are directions (i.e., 0, 1 or 2): */
595 /*! l_r (left to right) -> left[b_fcus->l_r] = right */
596 /*! r_l (right to left) -> right[b_fcus->r_l] = left */
597 /*! l_v1 (left t_node to first t_node != from right) -> left[b_fcus->l_v1] = left_1 */
598 /*! l_v2 (left t_node to secnd t_node != from right) -> left[b_fcus->l_v2] = left_2 */
599 /*! r_v1 (right t_node to first t_node != from left) -> right[b_fcus->r_v1] = right_1 */
600 /*! r_v2 (right t_node to secnd t_node != from left) -> right[b_fcus->r_v2] = right_2 */
601
602 struct __NNI *nni;
603 struct __Edge *next;
604 struct __Edge *prev;
605 struct __Edge *next_mixt;
606 struct __Edge *prev_mixt;
607 struct __Label *label;
608
609 int num; /*! branch number */
610 scalar_dbl *l; /*! branch length */
611 scalar_dbl *best_l; /*! best branch length found so far */
612 scalar_dbl *l_old; /*! old branch length */
613 scalar_dbl *l_var; /*! variance of edge length */
614 scalar_dbl *l_var_old; /*! variance of edge length (previous value) */
615
616
617 int bip_score; /*! score of the bipartition generated by the corresponding edge
618 bip_score = 1 iif the branch is found in both trees to be compared,
619 bip_score = 0 otherwise. */
620 phydbl tdist_score; /*! average transfer distance over bootstrap trees */
621
622 int num_st_left; /*! number of the subtree on the left side */
623 int num_st_rght; /*! number of the subtree on the right side */
624
625 phydbl *Pij_rr; /*! matrix of change probabilities and its first and secnd derivates (rate*state*state) */
626 phydbl *tPij_rr; /*! transpose matrix of change probabilities and its first and secnd derivates (rate*state*state) */
627 #ifdef BEAGLE
628 int Pij_rr_idx;
629 #endif
630
631 short int *div_post_pred_left; /*! posterior prediction of nucleotide/aa diversity (left-hand subtree) */
632 short int *div_post_pred_rght; /*! posterior prediction of nucleotide/aa diversity (rght-hand subtree) */
633 short int does_exist;
634
635
636
637 phydbl *p_lk_left,*p_lk_rght; /*! likelihoods of the subtree on the left and right side (for each site and each relative rate category) */
638 phydbl *p_lk_tip_r, *p_lk_tip_l;
639
640
641 #ifdef BEAGLE
642 int p_lk_left_idx, p_lk_rght_idx;
643 int p_lk_tip_idx;
644 #endif
645
646 int *patt_id_left;
647 int *patt_id_rght;
648 int *p_lk_loc_left;
649 int *p_lk_loc_rght;
650
651 int *pars_l,*pars_r; /*! parsimony of the subtree on the left and right sides (for each site) */
652 int *ui_l, *ui_r; /*! union - intersection vectors used in Fitch's parsimony algorithm */
653 int *p_pars_l, *p_pars_r; /*! conditional parsimony vectors */
654
655 /*! Below are the likelihood scaling factors (used in functions
656 `Get_All_Partial_Lk_Scale' in lk.c. */
657 /*
658 For every site, every subtree and every rate class, PhyML maintains a`sum_scale_pow' value
659 where sum_scale_pow = sum_scale_pow_v1 + sum_scale_pow_v2 + curr_scale_pow'
660 sum_scale_pow_v1 and sum_scale_pow_v2 are sum_scale_pow of the left and
661 right subtrees. curr_scale_pow is an integer greater than one.
662 The smaller the partials, the larger curr_scale_pow.
663
664 Now the partials for this subtree are scaled by *multiplying* each of
665 them by 2^curr_scale_pow. The reason for doing the scaling this way is
666 that multiplications by 2^x (x an integer) can be done in an 'exact'
667 manner (i.e., there is no loss of numerical precision)
668
669 At the root edge, the log-likelihood is then
670 logL = logL' - (sum_scale_pow_left + sum_scale_pow_right)log(2),
671 where L' is the scaled likelihood.
672 */
673
674 int *sum_scale_left_cat;
675 int *sum_scale_rght_cat;
676 int *sum_scale_left;
677 int *sum_scale_rght;
678
679 phydbl bootval; /*! bootstrap value (if exists) */
680
681 short int is_alive; /*! is_alive = 1 if this t_edge is used in a tree */
682
683 phydbl dist_btw_edges;
684 int topo_dist_btw_edges;
685
686 int has_zero_br_len;
687
688 phydbl ratio_test; /*! approximate likelihood ratio test */
689 phydbl alrt_statistic; /*! aLRT statistic */
690 phydbl support_val;
691
692
693 int n_jumps; /*! number of jumps of substitution rates */
694
695 int *n_diff_states_l; /*! Number of different states found in the subtree on the left of this edge */
696 int *n_diff_states_r; /*! Number of different states found in the subtree on the right of this edge */
697
698 phydbl bin_cod_num;
699
700 short int update_partial_lk_left;
701 short int update_partial_lk_rght;
702
703
704
705 }t_edge;
706
707 /*!********************************************************/
708
709 typedef struct __Tree{
710
711 struct __Node *n_root; /*! root t_node */
712 struct __Edge *e_root; /*! t_edge on which lies the root */
713 struct __Node **a_nodes; /*! array of nodes that defines the tree topology */
714 struct __Edge **a_edges; /*! array of edges */
715 struct __Model *mod; /*! substitution model */
716 struct __Calign *data; /*! sequences */
717 struct __Tree *next; /*! set to NULL by default. Used for mixture models */
718 struct __Tree *prev; /*! set to NULL by default. Used for mixture models */
719 struct __Tree *next_mixt; /*! set to NULL by default. Used for mixture models */
720 struct __Tree *prev_mixt; /*! set to NULL by default. Used for mixture models */
721 struct __Tree *mixt_tree; /*! set to NULL by default. Used for mixture models */
722 struct __Tree *extra_tree; /*! set to NULL by default. Used a latent variable in molecular dating */
723 struct __Option *io; /*! input/output */
724 struct __Matrix *mat; /*! pairwise distance matrix */
725 struct __Node **curr_path; /*! list of nodes that form a path in the tree */
726 struct __SPR **spr_list_one_edge;
727 struct __SPR **spr_list_all_edge;
728 struct __SPR *best_spr;
729 struct __Tdraw *ps_tree; /*! structure for drawing trees in postscript format */
730 struct __T_Rate *rates; /*! structure for handling rates of evolution */
731 struct __T_Time *times; /*! structure for handling node ages */
732 struct __Tmcmc *mcmc;
733 struct __Phylogeo *geo;
734 struct __Migrep_Model *mmod;
735 struct __Disk_Event *young_disk; /*! Youngest disk (i.e., disk which age is the closest to present). Used in PhyREX */
736 struct __Disk_Event *old_samp_disk; /*! Oldest sampled disk. Used in PhyREX */
737 struct __XML_node *xml_root;
738 struct __Generic_LL *edge_list;
739 struct __Generic_LL *node_list;
740 struct __Independent_Contrasts *ctrst; /*! Pointer to data structure used for independent contrasts */
741
742
743 short int eval_alnL; /*! Evaluate likelihood for genetic data */
744 short int eval_rlnL; /*! Evaluate likelihood for rates along the tree */
745 short int eval_glnL; /*! Evaluate tree likelihood */
746 short int scaling_method;
747 short int fully_nni_opt;
748 short int numerical_warning;
749
750 short int use_eigen_lr;
751 int is_mixt_tree;
752 int tree_num; /*! tree number. Used for mixture models */
753 int ps_page_number; /*! when multiple trees are printed, this variable give the current page number */
754 int depth_curr_path; /*! depth of the t_node path defined by curr_path */
755 int has_bip; /*!if has_bip=1, then the structure to compare
756 tree topologies is allocated, has_bip=0 otherwise */
757 int n_otu; /*! number of taxa */
758 int curr_site; /*! current site of the alignment to be processed */
759 int curr_catg; /*! current class of the discrete gamma rate distribution */
760 int n_swap; /*! number of NNIs performed */
761 int n_pattern; /*! number of distinct site patterns */
762 int has_branch_lengths; /*! =1 iff input tree displays branch lengths */
763 short int both_sides; /*! both_sides=1 -> a pre-order and a post-order tree
764 traversals are required to compute the likelihood
765 of every subtree in the phylogeny*/
766 int num_curr_branch_available; /*!gives the number of the next cell in a_edges that is free to receive a pointer to a branch */
767 short int *t_dir;
768 int n_moves;
769 int verbose;
770
771 int dp; /*! Data partition */
772 int s_mod_num; /*! Substitution model number */
773 int lock_topo; /*! = 1 any subsequent topological modification will be banished */
774 int write_labels;
775 int write_br_lens;
776 int *mutmap; /*! Mutational map */
777 int json_num;
778 short int update_eigen_lr;
779 int tip_root; /*! Index of tip node used as the root */
780 phydbl *eigen_lr_left;
781 phydbl *eigen_lr_rght;
782 phydbl *dot_prod;
783
784 phydbl *expl;
785
786 phydbl init_lnL;
787 phydbl best_lnL; /*! highest value of the loglikelihood found so far */
788 int best_pars; /*! highest value of the parsimony found so far */
789 phydbl c_lnL; /*! loglikelihood */
790 phydbl old_lnL; /*! old loglikelihood */
791 phydbl sum_min_sum_scale; /*! common factor of scaling factors */
792 phydbl *c_lnL_sorted; /*! used to compute c_lnL by adding sorted terms to minimize CPU errors */
793 phydbl *cur_site_lk; /*! vector of loglikelihoods at individual sites */
794 phydbl *old_site_lk; /*! vector of likelihoods at individual sites */
795 phydbl annealing_temp; /*! annealing temperature in simulated annealing optimization algo */
796 phydbl c_dlnL; /*! First derivative of the log-likelihood with respect to the length of a branch */
797 phydbl c_d2lnL; /*! Second derivative of the log-likelihood with respect to the length of a branch */
798
799
800 phydbl *unscaled_site_lk_cat; /*! partially scaled site likelihood at individual sites */
801
802 phydbl *site_lk_cat; /*! loglikelihood at a single site and for each class of rate*/
803 phydbl unconstraint_lk; /*! unconstrained (or multinomial) log-likelihood */
804 phydbl composite_lk; /*! composite log-likelihood */
805 int *fact_sum_scale;
806 phydbl **log_lks_aLRT; /*! used to compute several branch supports */
807 phydbl n_root_pos; /*! position of the root on its t_edge */
808 phydbl size; /*! tree size */
809 int *site_pars;
810 int c_pars;
811 int *step_mat;
812
813
814 int size_spr_list_one_edge;
815 int size_spr_list_all_edge;
816 int perform_spr_right_away;
817
818 time_t t_beg;
819 time_t t_current;
820
821 int bl_from_node_stamps; /*! == 1 -> Branch lengths are determined by t_node times */
822 phydbl sum_y_dist_sq;
823 phydbl sum_y_dist;
824 phydbl tip_order_score;
825 int write_tax_names;
826 int update_alias_subpatt;
827
828 phydbl geo_mig_sd; /*! standard deviation of the migration step random variable */
829 phydbl geo_lnL; /*! log likelihood of the phylo-geography model */
830
831 int bl_ndigits;
832
833 phydbl *short_l; /*! Vector of short branch length values */
834 int n_short_l; /*! Length of short_l */
835 phydbl norm_scale;
836
837 short int br_len_recorded;
838
839 short int apply_lk_scaling; /*! Applying scaling of likelihoods. YES/NO */
840
841 phydbl *K; /*! a vector of the norm.constants for the node times prior. */
842
843 short int ignore_root;
844 short int ignore_mixt_info;
845 #ifdef BEAGLE
846 int b_inst; /*! The BEAGLE instance id associated with this tree. */
847 #endif
848
849 // Extra partial lk structure for bookkeeping
850 short int *div_post_pred_extra_0;
851 int *sum_scale_cat_extra_0;
852 int *sum_scale_extra_0;
853 phydbl *p_lk_extra_0;
854 phydbl *p_lk_tip_extra_0;
855 int *patt_id_extra_0;
856
857 short int *div_post_pred_extra_1;
858 int *sum_scale_cat_extra_1;
859 int *sum_scale_extra_1;
860 phydbl *p_lk_extra_1;
861 phydbl *p_lk_tip_extra_1;
862 int *patt_id_extra_1;
863
864 int n_edges_traversed;
865 int n_tot_bl_opt;
866 }t_tree;
867
868 /*!********************************************************/
869
870 typedef struct __Super_Tree {
871 struct __Tree *tree;
872 struct __List_Tree *treelist; /*! list of trees. One tree for each data set to be processed */
873 struct __Calign *curr_cdata;
874 struct __Option **optionlist; /*! list of pointers to input structures (used in supertrees) */
875
876 struct __Node ***match_st_node_in_gt;
877 /*! match_st_in_gt_node[subdataset number][supertree t_node number]
878 * gives the t_node in tree estimated from 'subdataset number' that corresponds
879 * to 'supertree t_node number' in the supertree
880 */
881
882 struct __Node *****map_st_node_in_gt;
883 /*! mat_st_gt_node[gt_num][st_node_num][direction] gives the
884 * t_node in gt gt_num that maps t_node st_node_num in st.
885 */
886
887 struct __Edge ***map_st_edge_in_gt;
888 /*! map_st_gt_br[gt_num][st_branch_num] gives the
889 * branch in gt gt_num that maps branch st_branch_num
890 * in st.
891 */
892
893 struct __Edge ****map_gt_edge_in_st;
894 /*! mat_gt_st_br[gt_num][gt_branch_num][] is the list of
895 * branches in st that map branch gt_branch_num
896 * in gt gt_num.
897 */
898
899 int **size_map_gt_edge_in_st;
900 /*! size_map_gt_st_br[gt_num][gt_branch_num] gives the
901 * size of the list map_gt_st_br[gt_num][gt_branch_num][]
902 */
903
904
905 struct __Edge ***match_st_edge_in_gt;
906 /*! match_st_edge_in_gt[gt_num][st_branch_num] gives the
907 * branch in gt gt_num that matches branch st_branch_num
908 */
909
910 struct __Edge ***match_gt_edge_in_st;
911 /*! match_gt_edge_in_st[gt_num][gt_branch_num] gives the
912 * branch in st that matches branch gt_branch_num
913 */
914
915 struct __Node ****closest_match;
916 /*! closest_match[gt_num][st_node_num][dir] gives the
917 * closest t_node in st that matches a t_node in gt gt_num
918 */
919
920 int ***closest_dist;
921 /*! closest_dist[gt_num][st_node_num][dir] gives the
922 * number of edges to traverse to get to node
923 * closest_match[gt_num][st_node_num][dir]
924 */
925
926 int n_part;
927 /*! number of trees */
928
929 phydbl **bl;
930 /*! bl[gt_num][gt_branch_num] gives the length of
931 * branch gt_branch_num
932 */
933
934 phydbl **bl_cpy;
935 /*! copy of bl */
936
937 phydbl **bl0;
938 /*! bl estimated during NNI (original topo)
939 * See Mg_NNI.
940 */
941
942 phydbl **bl1;
943 /*! bl estimated during NNI (topo conf 1)
944 * See Mg_NNI.
945 */
946
947 phydbl **bl2;
948 /*! bl estimated during NNI (topo conf 2)
949 * See Mg_NNI.
950 */
951
952 int *bl_partition;
953 /*! partition[gt_num] gives the t_edge partition number
954 * gt_num belongs to.
955 */
956 int n_bl_part;
957
958 struct __Model **s_mod; /*! substitution model */
959
960 int n_s_mod;
961 int lock_br_len;
962
963 }supert_tree;
964
965 /*!********************************************************/
966
967 typedef struct __List_Tree { /*! a list of trees */
968 struct __Tree **tree;
969 int list_size; /*! number of trees in the list */
970 }t_treelist;
971
972 /*!********************************************************/
973
974 typedef struct __Align {
975 char *name; /*! sequence name */
976 int len; /*! sequence length */
977 char *state; /*! sequence itself */
978 short int *d_state; /*! sequence itself (digits) */
979 short int *is_ambigu; /*! is_ambigu[site] = 1 if state[site] is an ambiguous character. 0 otherwise */
980 short int is_duplicate;
981 int num;
982 }align;
983
984 /*!********************************************************/
985
986 typedef struct __Calign {
987 struct __Align **c_seq; /*! compressed sequences */
988 struct __Align **c_seq_rm; /*! removed sequences */
989 struct __Option *io; /*! input/output */
990 phydbl *obs_state_frq; /*! observed state frequencies */
991 short int *invar; /*! < 0 -> polymorphism observed */
992 phydbl *wght; /*! # of each site in c_align */
993 short int *ambigu; /*! ambigu[i]=1 is one or more of the sequences at site
994 i display an ambiguous character */
995 phydbl obs_pinvar;
996 int n_otu; /*! number of taxa */
997 int n_rm; /*! number of taxa removed */
998 int clean_len; /*! uncrunched sequences lenghts without gaps */
999 int crunch_len; /*! crunched sequences lengths */
1000 int init_len; /*! length of the uncompressed sequences */
1001 int *sitepatt; /*! this array maps the position of the patterns in the
1002 compressed alignment to the positions in the uncompressed
1003 one */
1004 int format; /*! 0 (default): PHYLIP. 1: NEXUS. */
1005 scalar_dbl *io_wght; /*! weight of each *site* (not pattern) given as input */
1006 }calign;
1007
1008 /*!********************************************************/
1009
1010 typedef struct __Matrix { /*! mostly used in BIONJ */
1011 phydbl **P,**Q,**dist; /*! observed proportions of transition, transverion and distances
1012 between pairs of sequences */
1013
1014 t_tree *tree; /*! tree... */
1015 int *on_off; /*! on_off[i]=1 if column/line i corresponds to a t_node that has not
1016 been agglomerated yet */
1017 int n_otu; /*! number of taxa */
1018 char **name; /*! sequence names */
1019 int r; /*! number of nodes that have not been agglomerated yet */
1020 struct __Node **tip_node; /*! array of pointer to the leaves of the tree */
1021 int curr_int; /*! used in the NJ/BIONJ algorithms */
1022 int method; /*! if method=1->NJ method is used, BIONJ otherwise */
1023 }matrix;
1024
1025 /*!********************************************************/
1026
1027 typedef struct __RateMatrix {
1028 int n_diff_rr; /*! number of different relative substitution rates in the custom model */
1029 vect_dbl *rr; /*! relative rate parameters of the GTR or custom model (rescaled) */
1030 vect_dbl *rr_val; /*! log of relative rate parameters of the GTR or custom model (unscaled) */
1031 vect_int *rr_num;
1032 vect_int *n_rr_per_cat; /*! number of rate parameters in each category */
1033 vect_dbl *qmat;
1034 vect_dbl *qmat_buff;
1035
1036 struct __RateMatrix *next;
1037 struct __RateMatrix *prev;
1038 }t_rmat;
1039
1040 /*!********************************************************/
1041
1042 typedef struct __RAS {
1043 /*! Rate across sites */
1044 int n_catg; /*! number of categories in the discrete gamma distribution */
1045 int invar; /*! =1 iff the substitution model takes into account invariable sites */
1046 int gamma_median; /*! 1: use the median of each bin in the discrete gamma distribution. 0: the mean is used */
1047 vect_dbl *gamma_r_proba; /*! probabilities of the substitution rates defined by the discrete gamma distribution */
1048 vect_dbl *gamma_r_proba_unscaled;
1049 vect_dbl *gamma_rr; /*! substitution rates defined by the RAS distribution */
1050 vect_dbl *gamma_rr_unscaled; /*! substitution rates defined by the RAS distribution (unscaled) */
1051 scalar_dbl *alpha; /*! gamma shapa parameter */
1052 int free_mixt_rates;
1053 scalar_dbl *free_rate_mr; /*! mean relative rate as given by the FreeRate model */
1054
1055
1056 int parent_class_number;
1057 scalar_dbl *pinvar; /*! proportion of invariable sites */
1058
1059 short int init_rr;
1060 short int init_r_proba;
1061 short int normalise_rr;
1062
1063 short int *skip_rate_cat; /*! indicates whether of not the the likelihood for a given rate class shall be calculated.
1064 The default model in PhyML has four rate classes and the likelihood at a given site is
1065 then \sum_{i=1}^{4} \Pr(D|R=i) \Pr(R=i). Now, when optimising
1066 the rate for say the first rate class (i=1) one does not need to
1067 re-compute \Pr(D|R=2), \Pr(D|R=3) and \Pr(D|R=4) (which is the time
1068 consuming part of the likelihood calculation). This is where
1069 'skip_rate_category' comes in handy. */
1070
1071 short int sort_rate_classes; /*! When doing MCMC moves on rate classes, one needs to have the rate classes sorted */
1072
1073 struct __RAS *next;
1074 struct __RAS *prev;
1075
1076 }t_ras;
1077
1078 /*!********************************************************/
1079
1080 typedef struct __EquFreq {
1081 /*! Equilibrium frequencies */
1082 vect_dbl *pi; /*! states frequencies */
1083 vect_dbl *pi_unscaled; /*! states frequencies (unscaled) */
1084
1085 phydbl *b_frq; /*! vector of empirical state frequencies */
1086
1087 short int empirical_state_freq;
1088 short int user_state_freq;
1089 vect_dbl *user_b_freq; /*! user-defined nucleotide frequencies */
1090
1091 struct __EquFreq *next;
1092 struct __EquFreq *prev;
1093
1094 }t_efrq;
1095
1096 /*!********************************************************/
1097
1098 typedef struct __Model {
1099 struct __Optimiz *s_opt; /*! pointer to parameters to optimize */
1100 struct __Eigen *eigen;
1101 struct __M4 *m4mod;
1102 struct __Option *io;
1103 struct __Model *next;
1104 struct __Model *prev;
1105 struct __Model *next_mixt;
1106 struct __Model *prev_mixt;
1107 struct __RateMatrix *r_mat;
1108 struct __EquFreq *e_frq;
1109 struct __RAS *ras;
1110
1111 t_string *aa_rate_mat_file;
1112 FILE *fp_aa_rate_mat;
1113
1114 t_string *modelname;
1115 t_string *custom_mod_string; /*! string of characters used to define custom models of substitution */
1116
1117 int mod_num; /*! model number */
1118
1119 int update_eigen; /*! update_eigen=1-> eigen values/vectors need to be updated */
1120
1121 int whichmodel;
1122 int is_mixt_mod;
1123 int augmented;
1124 int ns; /*! number of states (4 for ADN, 20 for AA) */
1125
1126 int use_m4mod; /*! Use a Markov modulated Markov model ? */
1127
1128 scalar_dbl *kappa; /*! transition/transversion rate */
1129 scalar_dbl *lambda; /*! parameter used to define the ts/tv ratios in the F84 and TN93 models */
1130 scalar_dbl *br_len_mult; /*! when users want to fix the relative length of edges and simply estimate the total length of the tree. This multiplier does the trick */
1131 scalar_dbl *br_len_mult_unscaled;
1132
1133 vect_dbl *Pij_rr; /*! matrix of change probabilities */
1134 scalar_dbl *mr; /*! mean rate = branch length/time interval mr = -sum(i)(vct_pi[i].mat_Q[ii]) */
1135
1136 short int log_l; /*! Edge lengths are actually log(Edge lengths) if log_l == YES !*/
1137 phydbl l_min; /*! Minimum branch length !*/
1138 phydbl l_max; /*! Maximum branch length !*/
1139
1140 phydbl l_var_sigma; /*! For any edge b we have b->l_var->v = l_var_sigma * (b->l->v)^2 */
1141 phydbl l_var_min; /*! Min of variance of branch lengths (used in conjunction with gamma_mgf_bl == YES) */
1142 phydbl l_var_max; /*! Max of variance of branch lengths (used in conjunction with gamma_mgf_bl == YES) */
1143
1144 int gamma_mgf_bl; /*! P = \int_0^inf exp(QL) p(L) where L=\int_0^t R(s) ds and p(L) is the gamma density. Set to NO by default !*/
1145
1146 int n_mixt_classes; /* Number of classes in the mixture model. */
1147
1148 scalar_dbl *r_mat_weight;
1149 scalar_dbl *e_frq_weight;
1150 #ifdef BEAGLE
1151 int b_inst;
1152 bool optimizing_topology; /*! This is a flag that prevents the resetting of category weights. Why? Read */
1153 /* Recall that while optimizing the toplogy, PhyML temporarily only uses 2
1154 * rate categories. Recall also that a BEAGLE instance is created with all the
1155 * required categories, but we temporarily assign 0 weight to the other categories
1156 * thus effectively using only 2 categories. However, subsequent calls to
1157 * update the rates (i.e. update_beagle_ras()) will reset the weights. This flag
1158 * prevents this resetting from happening */
1159 #endif
1160
1161 }t_mod;
1162
1163 /*!********************************************************/
1164
1165 typedef struct __Eigen{
1166 int size; /*! matrix is size * size */
1167 phydbl *q; /*! matrix for which eigen values and vectors are computed */
1168 phydbl *space;
1169 int *space_int;
1170 phydbl *e_val; /*! eigen values (vector), real part. */
1171 phydbl *e_val_im; /*! eigen values (vector), imaginary part */
1172 phydbl *r_e_vect_im; /*! right eigen vector (matrix), imaginary part */
1173 phydbl *l_e_vect; /*! left eigen vector (matrix), real part */
1174 phydbl *r_e_vect; /*! right eigen vector (matrix), real part */
1175 phydbl *dum; /*! dummy ns*ns space */
1176
1177 struct __Eigen *prev;
1178 struct __Eigen *next;
1179 }eigen;
1180
1181 /*!********************************************************/
1182
1183 typedef struct __Option { /*! mostly used in 'help.c' */
1184 struct __Model *mod; /*! pointer to a substitution model */
1185 struct __Tree *tree; /*! pointer to the current tree */
1186 struct __Align **data; /*! pointer to the uncompressed sequences */
1187 struct __Tree *cstr_tree; /*! pointer to a constraint tree (can be a multifurcating one) */
1188 struct __Calign *cdata; /*! pointer to the compressed sequences */
1189 struct __Super_Tree *st; /*! pointer to supertree */
1190 struct __Tnexcom **nex_com_list;
1191 struct __List_Tree *treelist; /*! list of trees. */
1192 struct __Option *next;
1193 struct __Option *prev;
1194 struct __Tmcmc *mcmc;
1195 struct __T_Rate *rates;
1196 struct __T_Time *times;
1197
1198 int interleaved; /*! interleaved or sequential sequence file format ? */
1199 int in_tree; /*! =1 iff a user input tree is used as input */
1200
1201 char *in_align_file; /*! alignment file name */
1202 FILE *fp_in_align; /*! pointer to the alignment file */
1203
1204 char *in_tree_file; /*! input tree file name */
1205 FILE *fp_in_tree; /*! pointer to the input tree file */
1206
1207 char *in_constraint_tree_file; /*! input constraint tree file name */
1208 FILE *fp_in_constraint_tree; /*! pointer to the input constraint tree file */
1209
1210 char *out_tree_file; /*! name of the tree file */
1211 FILE *fp_out_tree;
1212
1213 char *weight_file; /*! name of the file containing site weights */
1214 FILE *fp_weight_file;
1215
1216 char *out_trees_file; /*! name of the tree file */
1217 FILE *fp_out_trees; /*! pointer to the tree file containing all the trees estimated using random starting trees */
1218
1219 char *out_boot_tree_file; /*! name of the tree file */
1220 FILE *fp_out_boot_tree; /*! pointer to the bootstrap tree file */
1221
1222 char *out_boot_stats_file; /*! name of the tree file */
1223 FILE *fp_out_boot_stats; /*! pointer to the statistics file */
1224
1225 char *out_stats_file; /*! name of the statistics file */
1226 FILE *fp_out_stats;
1227
1228 char *out_trace_file; /*! name of the file in which the trace is written */
1229 FILE *fp_out_trace;
1230
1231 char *out_json_trace_file; /*! name of the file in which json trace is written */
1232 FILE *fp_out_json_trace;
1233
1234 char *out_lk_file; /*! name of the file in which the likelihood of the model is written */
1235 FILE *fp_out_lk;
1236
1237 char *out_summary_file; /*! name of the file in which summary statistics are written */
1238 FILE *fp_out_summary;
1239
1240 char *out_ps_file; /*! name of the file in which tree(s) is(are) written */
1241 FILE *fp_out_ps;
1242
1243 char *out_ancestral_seq_file; /*! name of the file containing the ancestral sequences */
1244 char *out_ancestral_tree_file; /*! name of the file containing the tree with internal node labelled according to refs in ancestral_seq_file */
1245
1246 FILE *fp_out_ancestral_seq; /*! pointer to the file containing the ancestral sequences */
1247 FILE *fp_out_ancestral_tree; /*! pointer to the file containing the tree with labels on internal nodes */
1248
1249 char *in_coord_file; /*! name of input file containing coordinates */
1250 FILE *fp_in_coord; /*! pointer to the file containing coordinates */
1251
1252 char *out_file; /*! name of the output file */
1253
1254 char *clade_list_file;
1255
1256 int datatype; /*! 0->DNA, 1->AA */
1257 int print_boot_trees; /*! =1 if the bootstrapped trees are printed in output */
1258 int out_stats_file_open_mode; /*! opening file mode for statistics file */
1259 int out_tree_file_open_mode; /*! opening file mode for tree file */
1260 int n_data_sets; /*! number of data sets to be analysed */
1261 int n_trees; /*! number of trees */
1262 int init_len; /*! sequence length */
1263 int n_otu; /*! number of taxa */
1264 int n_data_set_asked; /*! number of bootstrap replicates */
1265 char *nt_or_cd; /*! nucleotide or codon data ? (not used) */
1266 int multigene; /*! if=1 -> analyse several partitions. */
1267 int config_multigene;
1268 int n_part; /*! number of data partitions */
1269 int curr_gt;
1270 int ratio_test; /*! from 1 to 4 for specific branch supports, 0 of not */
1271 int ready_to_go;
1272 int data_file_format; /*! Data format: Phylip or Nexus */
1273 int tree_file_format; /*! Tree format: Phylip or Nexus */
1274 int state_len;
1275
1276 int curr_interface;
1277 int r_seed; /*! random seed */
1278 int collapse_boot; /*! 0 -> branch length on bootstrap trees are not collapsed if too small */
1279 int random_boot_seq_order; /*! !0 -> sequence order in bootstrapped data set is random */
1280 int print_trace;
1281 int print_json_trace;
1282 int print_site_lnl;
1283 int m4_model;
1284 int rm_ambigu; /*! 0 is the default. 1: columns with ambiguous characters are discarded prior further analysis */
1285 int colalias;
1286 int append_run_ID;
1287 char *run_id_string;
1288 int quiet; /*! 0 is the default. 1: no interactive question (for batch mode) */
1289 int lk_approx; /* EXACT or NORMAL */
1290 char **alphabet;
1291 int codpos;
1292 int mutmap;
1293 int use_xml;
1294
1295 char **long_tax_names;
1296 char **short_tax_names;
1297 int size_tax_names;
1298
1299 phydbl *z_scores;
1300 phydbl *lat;
1301 phydbl *lon;
1302
1303 int boot_prog_every;
1304
1305 int mem_question;
1306 int do_alias_subpatt;
1307
1308 #ifdef BEAGLE
1309 int beagle_resource;
1310 #endif
1311
1312 int ancestral;
1313 int has_io_weights;
1314 int tbe_bootstrap;/* Replace standard bootstrap with tbe bootstrap (only when b>0) */
1315
1316 int leave_duplicates;/* Leave duplicated sequences */
1317 int precision;/* Decimal output precision for values in stats file */
1318 int n_boot_replicates;
1319
1320 short int print_node_num; /*! print node numbers if print_node_num=1 */
1321 short int print_support_val;
1322
1323 short int do_tbe;
1324 short int do_boot;
1325 short int do_alrt;
1326
1327 }option;
1328
1329 /*!********************************************************/
1330
1331 typedef struct __Optimiz { /*! parameters to be optimised (mostly used in 'optimiz.c') */
1332 short int opt_alpha; /*! =1 -> the gamma shape parameter is optimised */
1333 short int opt_kappa; /*! =1 -> the ts/tv ratio parameter is optimised */
1334 short int opt_lambda; /*! =1 -> the F84|TN93 model specific parameter is optimised */
1335 short int opt_pinvar; /*! =1 -> the proportion of invariants is optimised */
1336 short int opt_state_freq; /*! =1 -> the nucleotide frequencies are optimised */
1337 short int opt_rr; /*! =1 -> the relative rate parameters of the GTR or the customn model are optimised */
1338 short int opt_subst_param; /*! if opt_topo=0 and opt_subst_param=1 -> the numerical parameters of the
1339 model are optimised. if opt_topo=0 and opt_free_param=0 -> no parameter is
1340 optimised */
1341 short int opt_cov_delta;
1342 short int opt_cov_alpha;
1343 short int opt_cov_free_rates;
1344
1345 short int opt_clock_r;
1346
1347 short int opt_bl; /*! =1 -> the branch lengths are optimised */
1348 short int opt_topo; /*! =1 -> the tree topology is optimised */
1349 short int topo_search;
1350 phydbl init_lk; /*! initial loglikelihood value */
1351 int n_it_max; /*! maximum bnumber of iteration during an optimisation step */
1352 int last_opt; /*! =1 -> the numerical parameters are optimised further while the
1353 tree topology remains fixed */
1354 int random_input_tree; /*! boolean */
1355 int n_rand_starts; /*! number of random starting points */
1356 int brent_it_max;
1357 int steph_spr;
1358 int opt_five_branch;
1359 int pars_thresh;
1360 int hybrid_thresh;
1361 int opt_br_len_mult;
1362 int min_n_triple_moves;
1363 int max_rank_triple_move;
1364 int n_improvements;
1365 int max_spr_depth;
1366 int max_no_better_tree_found;
1367
1368 phydbl tree_size_mult; /*! tree size multiplier */
1369 phydbl min_diff_lk_local;
1370 phydbl min_diff_lk_global;
1371 phydbl min_diff_lk_move;
1372 phydbl p_moves_to_examine;
1373 int fast_nni;
1374 int greedy;
1375 int general_pars;
1376 int quickdirty;
1377 int spr_pars;
1378 int spr_lnL;
1379 int max_depth_path;
1380 int min_depth_path;
1381 int deepest_path;
1382 int eval_list_regraft;
1383 phydbl max_delta_lnL_spr;
1384 phydbl max_delta_lnL_spr_current;
1385 phydbl worst_lnL_spr;
1386 int br_len_in_spr;
1387 int opt_free_mixt_rates;
1388 int constrained_br_len;
1389 int opt_gamma_br_len;
1390 int first_opt_free_mixt_rates;
1391 int wim_n_rgrft;
1392 int wim_n_globl;
1393 int wim_max_dist;
1394 int wim_n_optim;
1395 int wim_n_best;
1396 int wim_inside_opt;
1397
1398 int opt_rmat_weight;
1399 int opt_efrq_weight;
1400
1401 int skip_tree_traversal;
1402 int serial_free_rates;
1403
1404 int curr_opt_free_rates;
1405
1406 int nni_br_len_opt;
1407
1408 int apply_spr_right_away;
1409 int apply_spr;
1410
1411 phydbl l_min_spr;
1412 }t_opt;
1413
1414 /*!********************************************************/
1415
1416 typedef struct __NNI{
1417
1418 struct __Node *left;
1419 struct __Node *rght;
1420 struct __Edge *b;
1421
1422 phydbl score;
1423 scalar_dbl *init_l;
1424 scalar_dbl *init_v;
1425 phydbl init_lk;
1426 scalar_dbl *best_l;
1427 scalar_dbl *best_v;
1428 phydbl lk0,lk1,lk2;
1429 scalar_dbl *l0,*l1,*l2;
1430 scalar_dbl *v0,*v1,*v2;
1431
1432 struct __Node *swap_node_v1;
1433 struct __Node *swap_node_v2;
1434 struct __Node *swap_node_v3;
1435 struct __Node *swap_node_v4;
1436
1437 int best_conf; /*! best topological configuration :
1438 ((left_1,left_2),right_1,right_2) or
1439 ((left_1,right_2),right_1,left_2) or
1440 ((left_1,right_1),right_1,left_2) */
1441 }t_nni;
1442
1443 /*!********************************************************/
1444
1445 typedef struct __SPR{
1446 struct __Node *n_link;
1447 struct __Node *n_opp_to_link;
1448 struct __Edge *b_opp_to_link;
1449 struct __Edge *b_target;
1450 struct __Edge *b_init_target;
1451 struct __Node **path;
1452
1453 scalar_dbl *init_target_l;
1454 scalar_dbl *init_target_v;
1455
1456 scalar_dbl *l0,*l1,*l2;
1457 scalar_dbl *v0,*v1,*v2;
1458
1459 phydbl lnL;
1460 int depth_path;
1461 int pars;
1462 int dist;
1463
1464 struct __SPR *next;
1465 struct __SPR *prev;
1466 struct __SPR *next_mixt;
1467 struct __SPR *prev_mixt;
1468 struct __SPR *path_prev;
1469 struct __SPR *path_next;
1470
1471 }t_spr;
1472
1473 /*!********************************************************/
1474
1475 typedef struct __Pnode{
1476 struct __Pnode **next;
1477 int weight;
1478 int num;
1479 }pnode;
1480
1481 /*!********************************************************/
1482
1483 typedef struct __M4 {
1484 int n_h; /*! number of hidden states */
1485 int n_o; /*! number of observable states */
1486 int use_cov_alpha;
1487 int use_cov_free;
1488
1489 phydbl **o_mats; /*! set of matrices of substitution rates across observable states */
1490 phydbl *multipl; /*! vector of values that multiply each o_mats matrix */
1491 phydbl *o_rr; /*! relative rates (symmetric) of substitution between observable states */
1492 phydbl *h_rr; /*! relative rates (symmetric) of substitution between hidden states */
1493 phydbl *h_mat; /*! matrix that describes the substitutions between hidden states (aka switches) */
1494 phydbl *o_fq; /*! equilibrium frequencies for the observable states */
1495 phydbl *h_fq; /*! equilibrium frequencies for the hidden states */
1496 phydbl *h_fq_unscaled; /*! unscaled equilibrium frequencies for the hidden states */
1497 phydbl *multipl_unscaled; /*! unscaled vector of values that multiply each o_mats matrix */
1498
1499 phydbl delta; /*! switching rate */
1500 phydbl alpha; /*! gamma shape parameter */
1501 }m4;
1502
1503 /*!********************************************************/
1504
1505 typedef struct __Tdraw {
1506 phydbl *xcoord; /*! t_node coordinates on the x axis */
1507 phydbl *ycoord; /*! t_node coordinates on the y axis */
1508 phydbl *xcoord_s; /*! t_node coordinates on the x axis (scaled) */
1509 phydbl *ycoord_s; /*! t_node coordinates on the y axis (scaled) */
1510 int page_width;
1511 int page_height;
1512 int tree_box_width;
1513
1514 int *cdf_mat;
1515 phydbl *cdf_mat_x;
1516 phydbl *cdf_mat_y;
1517
1518 phydbl max_dist_to_root;
1519 }tdraw;
1520
1521 /*!********************************************************/
1522
1523 typedef struct __T_Rate {
1524 phydbl lexp; /*! Parameter of the exponential distribution that governs the rate at which substitution between rate classes ocur */
1525 phydbl alpha;
1526 phydbl less_likely;
1527 phydbl c_lnL1;
1528 phydbl c_lnL2;
1529 phydbl c_lnL_rates; /*! Prob(Br len | time stamps, model of rate evolution) */
1530 phydbl clock_r; /*! Mean substitution rate, i.e., 'molecular clock' rate */
1531 phydbl min_clock;
1532 phydbl max_clock;
1533 phydbl lbda_nu;
1534 phydbl min_dt;
1535 phydbl step_rate;
1536 phydbl true_tree_size;
1537 phydbl p_max;
1538 phydbl norm_fact;
1539 phydbl nu; /*! Parameter of the Exponential distribution for the corresponding model */
1540 phydbl min_nu;
1541 phydbl max_nu;
1542 phydbl covdet;
1543 phydbl sum_invalid_areas;
1544
1545 phydbl min_rate;
1546 phydbl max_rate;
1547
1548 phydbl *nd_r; /*! Current rates at nodes */
1549 phydbl *br_r; /*! Current rates along edges */
1550 phydbl *triplet;
1551 phydbl *true_r; /*! true t_edge rates (on rooted tree) */
1552 phydbl *buff_t;
1553 phydbl *buff_br_r;
1554 phydbl *buff_nd_r;
1555 phydbl *dens; /*! Probability densities of mean substitution rates at the nodes */
1556 phydbl *ml_l; /*! ML t_edge lengths (rooted) */
1557 phydbl *cur_l; /*! Current t_edge lengths (rooted) */
1558 phydbl *u_ml_l; /*! ML t_edge lengths (unrooted) */
1559 phydbl *u_cur_l; /*! Current t_edge lengths (unrooted) */
1560 phydbl *invcov;
1561 phydbl *cov_r;
1562 phydbl *mean_r; /*! average values of br_r taken across the sampled values during the MCMC */
1563 phydbl *_2n_vect1;
1564 phydbl *_2n_vect2;
1565 phydbl *_2n_vect3;
1566 phydbl *_2n_vect4;
1567 short int *_2n_vect5;
1568 phydbl *_2n2n_vect1;
1569 phydbl *_2n2n_vect2;
1570 phydbl *trip_cond_cov;
1571 phydbl *trip_reg_coeff;
1572 phydbl *cond_var;
1573 phydbl *reg_coeff;
1574 phydbl *mean_l;
1575 phydbl *cov_l;
1576 phydbl *grad_l; /* gradient */
1577 phydbl inflate_var;
1578
1579 int adjust_rates; /*! if = 1, branch rates are adjusted such that a modification of a given t_node time
1580 does not modify any branch lengths */
1581 int use_rates; /*! if = 0, branch lengths are expressed as differences between t_node times */
1582 int bl_from_rt; /*! if =1, branch lengths are obtained as the product of cur_r and t */
1583 int approx;
1584 int model; /*! Model number */
1585 char *model_name;
1586 int is_allocated;
1587 int met_within_gibbs;
1588
1589 int update_mean_l;
1590 int update_cov_l;
1591
1592 int *n_tips_below;
1593
1594 struct __Node **lca; /*! 2-way table of common ancestral nodes for each pair of nodes */
1595
1596 short int *br_do_updt;
1597 phydbl *cur_gamma_prior_mean;
1598 phydbl *cur_gamma_prior_var;
1599
1600 short int br_r_recorded;
1601
1602 phydbl log_K_cur;
1603 int cur_comb_numb;
1604
1605 struct __T_Rate *next;
1606 struct __T_Rate *prev;
1607
1608 }t_rate;
1609
1610 /*!********************************************************/
1611
1612 typedef struct __T_Time {
1613 phydbl *nd_t; /*! Current t_node times */
1614 phydbl *buff_t; /*! Current t_node times */
1615 phydbl *true_t; /*! Current t_node times */
1616
1617 phydbl c_lnL_times; /*! Prob(time stamps) */
1618 phydbl c_lnL_jps; /*! Prob(# Jumps | time stamps, rates, model of rate evolution) */
1619
1620 phydbl scaled_pop_size; // Product of effective population size with length of a generation in calendar time unit
1621
1622 phydbl birth_rate;
1623 phydbl birth_rate_min;
1624 phydbl birth_rate_max;
1625 phydbl birth_rate_pivot;
1626
1627 phydbl death_rate;
1628 phydbl death_rate_min;
1629 phydbl death_rate_max;
1630 phydbl death_rate_pivot;
1631
1632 phydbl *t_prior;
1633 phydbl *t_prior_min;
1634 phydbl *t_prior_max;
1635 phydbl *t_floor;
1636 phydbl *t_mean;
1637 int *t_rank; /* rank of nodes, e.g., tree->nd_a[tree->rates->t_rank[0]] is the oldest node */
1638
1639 short int nd_t_recorded;
1640
1641 short int is_asynchronous;
1642
1643 phydbl *time_slice_lims;
1644 phydbl *calib_prob;
1645
1646 struct __Calibration **a_cal; /* array of calibration data */
1647 int n_cal; /* number of elements in a_cal */
1648
1649 phydbl *t_prior_min_ori;
1650 phydbl *t_prior_max_ori;
1651 phydbl *times_partial_proba;
1652
1653 int *has_survived;
1654
1655 int *n_jps;
1656 int *t_jps;
1657
1658 int *numb_calib_chosen;
1659
1660 int n_time_slices;
1661 int *n_time_slice_spans;
1662 int *curr_slice;
1663
1664 short int *t_has_prior;
1665
1666 phydbl *mean_t; /*! average values of nd_t taken across the sampled values during the MCMC */
1667
1668 int model;
1669
1670 int update_time_norm_const;
1671 }t_time;
1672
1673 /*!********************************************************/
1674
1675 typedef struct __Tmcmc {
1676 struct __Option *io;
1677
1678 phydbl *tune_move;
1679 phydbl *move_weight;
1680
1681 phydbl *acc_rate;
1682 int *acc_move;
1683 int *run_move;
1684 int *prev_acc_move;
1685 int *prev_run_move;
1686 int *num_move;
1687 int *move_type;
1688 char **move_name;
1689
1690 int num_move_nd_r;
1691 int num_move_br_r;
1692 int num_move_times;
1693 int num_move_times_and_rates;
1694 int num_move_times_and_rates_root;
1695 int num_move_root_time;
1696 int num_move_nu;
1697 int num_move_clock_r;
1698 int num_move_tree_height;
1699 int num_move_time_slice;
1700 int num_move_subtree_height;
1701 int num_move_kappa;
1702 int num_move_rr;
1703 int num_move_spr;
1704 int num_move_spr_weighted;
1705 int num_move_spr_local;
1706 int num_move_spr_root;
1707 int num_move_tree_rates;
1708 int num_move_subtree_rates;
1709 int num_move_updown_nu_cr;
1710 int num_move_updown_t_cr;
1711 int num_move_updown_t_br;
1712 int num_move_ras;
1713 int num_move_cov_rates;
1714 int num_move_cov_switch;
1715 int num_move_birth_rate;
1716 int num_move_death_rate;
1717 int num_move_birth_death_updown;
1718 int num_move_jump_calibration;
1719 int num_move_geo_lambda;
1720 int num_move_geo_sigma;
1721 int num_move_geo_tau;
1722 int num_move_geo_dum;
1723 int num_move_geo_updown_tau_lbda;
1724 int num_move_geo_updown_lbda_sigma;
1725 int num_move_phyrex_lbda;
1726 int num_move_phyrex_mu;
1727 int num_move_phyrex_rad;
1728 int num_move_phyrex_indel_disk;
1729 int num_move_phyrex_move_disk_ct;
1730 int num_move_phyrex_move_disk_ud;
1731 int num_move_phyrex_swap_disk;
1732 int num_move_phyrex_indel_hit;
1733 int num_move_phyrex_spr;
1734 int num_move_phyrex_spr_local;
1735 int num_move_phyrex_scale_times;
1736 int num_move_phyrex_ldscape_lim;
1737 int num_move_phyrex_sigsq;
1738 int num_move_phyrex_neff;
1739 int num_move_phyrex_sim;
1740 int num_move_phyrex_traj;
1741 int num_move_phyrex_indel_disk_serial;
1742 int num_move_phyrex_sim_plus;
1743 int num_move_phyrex_indel_hit_serial;
1744 int num_move_phyrex_ldsk_and_disk;
1745 int num_move_phyrex_ldsk_multi;
1746 int num_move_phyrex_disk_multi;
1747 int num_move_phyrex_ldsk_given_disk;
1748 int num_move_phyrex_disk_given_ldsk;
1749 int num_move_phyrex_add_remove_jump;
1750 int num_move_clade_change;
1751 int num_move_phyrex_ldsk_tip_to_root;
1752 int num_move_phyrex_sigsq_scale;
1753
1754 int nd_t_digits;
1755 int *monitor;
1756
1757 char *out_filename;
1758
1759 time_t t_beg;
1760 time_t t_cur;
1761 time_t t_last_print;
1762
1763 FILE *out_fp_stats;
1764 FILE *out_fp_trees;
1765 FILE *out_fp_means;
1766 FILE *out_fp_last;
1767 FILE *out_fp_constree;
1768 FILE *in_fp_par;
1769
1770 int *adjust_tuning;
1771 int n_moves;
1772 int move_idx;
1773 int randomize;
1774 int norm_freq;
1775 int run;
1776 int chain_len;
1777 int sample_interval;
1778 int chain_len_burnin;
1779 int print_every;
1780 int is_burnin;
1781 int max_lag;
1782
1783 phydbl max_tune;
1784 phydbl min_tune;
1785
1786 phydbl *sampled_val;
1787 int sample_size;
1788 int sample_num;
1789 phydbl *ess;
1790 int *ess_run;
1791 int *start_ess;
1792 phydbl *mode;
1793 int always_yes; /* Always accept proposed move (as long as log-likelihood > UNLIKELY) */
1794 int is; /* Importance sampling? Yes or NO */
1795 }t_mcmc;
1796
1797 /*!********************************************************/
1798
1799 typedef struct __Tpart {
1800 int *ns; /*! number of states for each partition (e.g., 2, 4, 3) */
1801 int *cum_ns; /*! cumulative number of states (e.g., 0, 2, 6) */
1802 int ns_max; /*! maximum number of states */
1803 int ns_min; /*! minimum number of states */
1804 int n_partitions; /*! number of partitions */
1805 struct __Eigen *eigen;
1806 }part;
1807
1808 /*!********************************************************/
1809
1810 typedef struct __Tnexcom {
1811 char *name;
1812 int nparm;
1813 int nxt_token_t;
1814 int cur_token_t;
1815 struct __Tnexparm **parm;
1816 }nexcom;
1817
1818 /*!********************************************************/
1819
1820 typedef struct __Tnexparm {
1821 char *name;
1822 char *value;
1823 int nxt_token_t;
1824 int cur_token_t;
1825 int (*fp)(char *, struct __Tnexparm *, struct __Option *);
1826 struct __Tnexcom *com;
1827 }nexparm;
1828
1829 /*!********************************************************/
1830
1831 typedef struct __ParamInt {
1832 int val;
1833 }t_param_int;
1834
1835 /*!********************************************************/
1836
1837 typedef struct __ParamDbl {
1838 phydbl val;
1839 }t_param_dbl;
1840
1841 /*!********************************************************/
1842
1843 typedef struct __XML_node {
1844
1845 struct __XML_attr *attr; // Pointer to the first element of a list of attributes
1846 int n_attr; // Number of attributes
1847 struct __XML_node *next; // Next sibling
1848 struct __XML_node *prev; // Previous sibling
1849 struct __XML_node *parent; // Parent of this node
1850 struct __XML_node *child; // Child of this node
1851 char *id;
1852 char *name;
1853 char *value;
1854 struct __Generic_Data_Structure *ds; // Pointer to a data strucuture. Can be a scalar, a vector, anything.
1855 }xml_node;
1856
1857 /*!********************************************************/
1858
1859 typedef struct __Generic_Data_Structure {
1860 void *obj;
1861 struct __Generic_Data_Structure *next;
1862 }t_ds;
1863
1864
1865 /*!********************************************************/
1866
1867 typedef struct __XML_attr {
1868 char *name;
1869 char *value;
1870 struct __XML_attr *next; // Next attribute
1871 struct __XML_attr *prev; // Previous attribute
1872 }xml_attr;
1873
1874 /*!********************************************************/
1875
1876 typedef struct __Calibration {
1877 struct __Calibration *next; // Next calibration
1878 struct __Calibration *prev; // Previous calibration
1879 struct __Clade **clade_list;
1880
1881 phydbl *alpha_proba_list; // list of alpha proba, one for each clade in clade_list
1882
1883 int current_clade_idx; // index of the clade the calibration time interval currently applies to
1884 int clade_list_size;
1885
1886 phydbl lower; // lower bound
1887 phydbl upper; // upper bound
1888
1889 short int is_primary; // Is it a primary or secondary calibration interval?
1890
1891 char *id; // calibration ID
1892 }t_cal;
1893
1894 /*!********************************************************/
1895
1896 typedef struct __Clade{
1897 char *id;
1898 struct __Node **tip_list; // list of tips defining the clade
1899 char **tax_list; // list of names of tips defining the clade
1900 int n_tax; // number of taxa in the clade
1901 struct __Node *target_nd; // The node this calibration applies to
1902 }t_clad;
1903
1904 /*!********************************************************/
1905
1906 typedef struct __Phylogeo{
1907 phydbl *cov; // Covariance of migrations (n_dim x n_dim)
1908 phydbl *r_mat; // R matrix. Gives the rates of migrations between locations. See article.
1909 phydbl *f_mat; // F matrix. See article.
1910 int *occup; // Vector giving the number of lineages that occupy each location
1911 int *idx_loc; // Index of location for each lineage
1912 int *idx_loc_beneath; // Gives the index of location occupied beneath each node in the tree
1913 int ldscape_sz; // Landscape size: number of locations
1914 int n_dim; // Dimension of the data (e.g., longitude + lattitude -> n_dim = 2)
1915 int update_fmat;
1916 struct __Geo_Coord **coord_loc; // Coordinates of the observed locations
1917
1918 phydbl sigma; // Dispersal parameter
1919 phydbl min_sigma;
1920 phydbl max_sigma;
1921 phydbl sigma_thresh; // beyond sigma_thresh, there is no dispersal bias.
1922
1923 phydbl lbda; // Competition parameter
1924 phydbl min_lbda;
1925 phydbl max_lbda;
1926
1927 phydbl c_lnL;
1928
1929 struct __Node **sorted_nd; // Table of nodes sorted wrt their heights.
1930
1931 phydbl tau; // overall migration rate parameter
1932 phydbl min_tau;
1933 phydbl max_tau;
1934
1935 phydbl dum; // dummy parameter use to assess non-identifiability issues
1936 phydbl min_dum;
1937 phydbl max_dum;
1938
1939 }t_geo;
1940
1941 /*!********************************************************/
1942 // Structure for the Etheridge-Barton migration/reproduction model
1943 typedef struct __Migrep_Model{
1944 struct __Geo_Coord *lim_up; // max longitude and lattitude
1945 struct __Geo_Coord *lim_do; // min longitude and lattitude
1946 phydbl *sigsq_scale; // Scaling factors for the variance parameter in RRW model
1947
1948 short int id;
1949 int n_dim;
1950 int safe_phyrex;
1951 int max_num_of_intervals;
1952 int update_rad;
1953
1954 phydbl lbda; // rate at which events occur
1955 phydbl min_lbda; // min of rate at which events occur
1956 phydbl max_lbda; // max of rate at which events occur
1957 phydbl prior_param_lbda; // parameter of the parameter for the prior on lbda
1958
1959 phydbl mu; // per-capita and per event death probability
1960 phydbl min_mu; // min of per-capita and per event death probability
1961 phydbl max_mu; // max of per-capita and per event death probability
1962 phydbl prior_param_mu; // parameter of the parameter for the prior on mu
1963
1964 phydbl rad; // radius of the migrep disk
1965 phydbl min_rad; // min of radius of the migrep disk
1966 phydbl max_rad; // max of radius of the migrep disk
1967 phydbl prior_param_rad; // parameter of the parameter for the prior on radius
1968
1969 phydbl sigsq; // parent to offspring distance variance (i.e., gene flow) parameter.
1970 phydbl min_sigsq; // min
1971 phydbl max_sigsq; // max
1972 phydbl prior_param_sigsq; // parameter of the parameter for the prior
1973
1974 phydbl rho; // intensity parameter of the Poisson point processs
1975 phydbl gen_cal_time; // duration of one generation in calendar time unit
1976 phydbl nu; // parameter of hyperprior on sigsq_scale (see Eq. (1) in Lemey et al., 2010).
1977
1978 phydbl c_lnL; // current value of log-likelihood
1979 phydbl c_ln_prior_rad; // current value of log prior for the prior on radius
1980 phydbl c_ln_prior_lbda; // current value of log prior for the prior on lbda
1981 phydbl c_ln_prior_mu; // current value of log prior for the prior on mu
1982 phydbl c_ln_prior_sigsq; // current value of log prior for the prior on sigsq=4.pi.lbda.mu.rad^4
1983
1984 phydbl soft_bound_area;
1985
1986
1987 }t_phyrex_mod;
1988
1989 /*!********************************************************/
1990
1991 typedef struct __Disk_Event{
1992 struct __Geo_Coord *centr;
1993 phydbl time;
1994 struct __Disk_Event *next;
1995 struct __Disk_Event *prev;
1996 struct __Lindisk_Node **ldsk_a; // array of lindisk nodes corresponding to this disk event.
1997 int n_ldsk_a; // size of ldsk_a.
1998 struct __Lindisk_Node *ldsk;
1999 struct __Migrep_Model *mmod;
2000 char *id;
2001
2002 phydbl c_lnL;
2003 short int age_fixed; // time is fixed for disks corresponding to samples.
2004 }t_dsk;
2005
2006 /*!********************************************************/
2007
2008 // Data structure for implementing Felsenstein's independent contrasts likelihood calculation
2009 // Use the same notation as that in the 1973 article (Am J Hum Genet 25:471-492)
2010 typedef struct __Independent_Contrasts{
2011 phydbl *x;
2012 phydbl *tprime;
2013 }t_ctrst;
2014
2015 /*!********************************************************/
2016
2017 typedef struct __Geo_Coord{
2018 phydbl *lonlat; /* longitude-latitude vector */
2019 int dim;
2020 char *id;
2021 struct __Geo_Coord *cpy; /* keep a copy of this coordinate */
2022
2023 }t_geo_coord;
2024
2025 /*!********************************************************/
2026
2027 typedef struct __Lindisk_Node{
2028 struct __Disk_Event *disk;
2029 struct __Lindisk_Node **next;
2030 struct __Lindisk_Node *prev;
2031 struct __Geo_Coord *coord;
2032 struct __Geo_Coord *cpy_coord;
2033 short int is_hit;
2034 int n_next;
2035 struct __Node *nd;
2036 }t_ldsk;
2037
2038 /*!********************************************************/
2039
2040 typedef struct __Polygon{
2041 struct __Geo_Coord **poly_vert; /* array of polygon vertex coordinates */
2042 int n_poly_vert; /* number of vertices */
2043 }t_poly;
2044
2045 /*!********************************************************/
2046
2047 typedef struct __SampArea {
2048 int n_poly; /* Number of polygons making the sampling area */
2049 t_poly **a_poly; /* Polygons making the sampling area */
2050 }t_sarea;
2051
2052
2053 /*!********************************************************/
2054
2055 typedef struct __JSON_KeyVal {
2056 char *key;
2057 char *value;
2058 struct __JSON_Object *object;
2059 struct __JSON_Array *array;
2060 struct __JSON_KeyVal *next;
2061 }json_kv;
2062
2063 /*!********************************************************/
2064
2065 typedef struct __JSON_Object {
2066 struct __JSON_KeyVal *kv;
2067 struct __JSON_Object *next;
2068 }json_o;
2069
2070 /*!********************************************************/
2071
2072 typedef struct __JSON_Array {
2073 struct __JSON_Object *object;
2074 }json_a;
2075
2076 /*!********************************************************/
2077
2078
2079 typedef struct __Label{
2080 char *key;
2081 char *val;
2082 char sep;
2083 struct __Label *next;
2084 }t_label;
2085
2086
2087
2088
2089 /*!********************************************************/
2090 /*!********************************************************/
2091 /*!********************************************************/
2092
2093 void Unroot_Tree(char **subtrees);
2094 void Set_Edge_Dirs(t_edge *b,t_node *a,t_node *d,t_tree *tree);
2095 void Init_Nexus_Format(nexcom **com);
2096 void Restrict_To_Coding_Position(align **data,option *io);
2097 void Uppercase(char *ch);
2098 void Lowercase(char *ch);
2099 calign *Compact_Data(align **data,option *io);
2100 calign *Compact_Cdata(calign *data,option *io);
2101 void Traverse_Prefix_Tree(int site,int seqnum,int *patt_num,int *n_patt,align **data,option *io,pnode *n);
2102 pnode *Create_Pnode(int size);
2103 void Get_Base_Freqs(calign *data);
2104 void Get_AA_Freqs(calign *data);
2105 void Swap_Nodes_On_Edges(t_edge *e1,t_edge *e2,int swap,t_tree *tree);
2106 void Connect_Edges_To_Nodes_Recur(t_node *a,t_node *d,t_tree *tree);
2107 void Connect_One_Edge_To_Two_Nodes(t_node *a,t_node *d,t_edge *b,t_tree *tree);
2108 void Update_Dirs(t_tree *tree);
2109 void Exit(char *message);
2110 void *mCalloc(int nb,size_t size);
2111 void *mRealloc(void *p,int nb,size_t size);
2112 int Sort_Phydbl_Decrease(const void *a,const void *b);
2113 void Qksort_Int(int *A,int *B,int ilo,int ihi);
2114 void Qksort(phydbl *A,phydbl *B,int ilo,int ihi);
2115 void Qksort_Matrix(phydbl **A,int col,int ilo,int ihi);
2116 void Order_Tree_Seq(t_tree *tree,align **data);
2117 char *Add_Taxa_To_Constraint_Tree(FILE *fp,calign *cdata);
2118 void Check_Constraint_Tree_Taxa_Names(t_tree *tree,calign *cdata);
2119 void Order_Tree_CSeq(t_tree *tree,calign *cdata);
2120 void Init_Mat(matrix *mat,calign *data);
2121 void Copy_Tax_Names_To_Tip_Labels(t_tree *tree,calign *data);
2122 void Share_Lk_Struct(t_tree *t_full,t_tree *t_empt);
2123 void Share_Spr_Struct(t_tree *t_full,t_tree *t_empt);
2124 void Share_Pars_Struct(t_tree *t_full,t_tree *t_empt);
2125 int Sort_Edges_NNI_Score(t_tree *tree,t_edge **sorted_edges,int n_elem);
2126 int Sort_Edges_Depth(t_tree *tree,t_edge **sorted_edges,int n_elem);
2127 void NNI(t_tree *tree,t_edge *b_fcus,int do_swap);
2128 void NNI_Pars(t_tree *tree,t_edge *b_fcus,int do_swap);
2129 void Swap(t_node *a,t_node *b,t_node *c,t_node *d,t_tree *tree);
2130 void Update_SubTree_Partial_Lk(t_edge *b_fcus,t_node *a,t_node *d,t_tree *tree);
2131 void Copy_Seq_Names_To_Tip_Labels(t_tree *tree,calign *data);
2132 calign *Copy_Cseq(calign *ori,option *io);
2133 int Filexists(char *filename);
2134 int Is_Invar(int patt_num,int stepsize,int datatype,calign *data);
2135 int Is_Ambigu(char *state,int datatype,int stepsize);
2136 void Check_Ambiguities(calign *data,int datatype,int stepsize);
2137 int Get_State_From_Ui(int ui,int datatype);
2138 int Assign_State(char *c,int datatype,int stepsize);
2139 char Reciproc_Assign_State(int i_state,int datatype);
2140 int Assign_State_With_Ambiguity(char *c,int datatype,int stepsize);
2141 void Clean_Tree_Connections(t_tree *tree);
2142 void Bootstrap(t_tree *tree);
2143 void Br_Len_Involving_Invar(t_tree *tree);
2144 void Br_Len_Not_Involving_Invar(t_tree *tree);
2145 void Getstring_Stdin(char *s);
2146 phydbl Num_Derivatives_One_Param(phydbl (*func)(t_tree *tree), t_tree *tree,
2147 phydbl f0, phydbl *param, int which, int n_param, phydbl stepsize, int logt,
2148 phydbl *err, int precise, int is_positive);
2149 phydbl Num_Derivatives_One_Param_Nonaligned(phydbl (*func)(t_tree *tree), t_tree *tree,
2150 phydbl f0, phydbl **param, int which, int n_param, phydbl stepsize, int logt,
2151 phydbl *err, int precise, int is_positive);
2152 int Num_Derivative_Several_Param(t_tree *tree,phydbl *param,int n_param,phydbl stepsize,int logt,phydbl(*func)(t_tree *tree),phydbl *derivatives, int is_positive);
2153 int Num_Derivative_Several_Param_Nonaligned(t_tree *tree, phydbl **param, int n_param, phydbl stepsize, int logt,
2154 phydbl (*func)(t_tree *tree), phydbl *derivatives, int is_positive);
2155 int Compare_Two_States(char *state1,char *state2,int state_size);
2156 void Copy_One_State(char *from,char *to,int state_size);
2157 void Copy_Dist(phydbl **cpy,phydbl **orig,int n);
2158 t_mod *Copy_Model(t_mod *ori);
2159 void Record_Model(t_mod *ori,t_mod *cpy);
2160 void Set_Defaults_Input(option *io);
2161 void Set_Defaults_Model(t_mod *mod);
2162 void Set_Defaults_Optimiz(t_opt *s_opt);
2163 void Test_Node_Table_Consistency(t_tree *tree);
2164 void Get_Bip(t_node *a,t_node *d,t_tree *tree);
2165 void Alloc_Bip(t_tree *tree);
2166 int Sort_Phydbl_Increase(const void *a,const void *b);
2167 int Sort_String(const void *a,const void *b);
2168 int Compare_Bip(t_tree *tree1,t_tree *tree2,int on_existing_edges_only);
2169 void Compare_Bip_Distance(t_tree *tree1,t_tree *tree2);
2170 void Match_Tip_Numbers(t_tree *tree1,t_tree *tree2);
2171 void Test_Multiple_Data_Set_Format(option *io);
2172 int Are_Compatible(char *statea,char *stateb,int stepsize,int datatype);
2173 void Hide_Ambiguities(calign *data);
2174 void Copy_Tree(t_tree *ori, t_tree *cpy);
2175 void Prune_Subtree(t_node *a,t_node *d,t_edge **target,t_edge **residual,t_tree *tree);
2176 void Graft_Subtree(t_edge *target, t_node *link, t_node *link_daughter, t_edge *residual, t_node *target_nd, t_tree *tree);
2177 void Reassign_Node_Nums(t_node *a,t_node *d, unsigned int *curr_ext_node, unsigned int *curr_int_node,t_tree *tree);
2178 void Reassign_Edge_Nums(t_node *a,t_node *d,int *curr_br,t_tree *tree);
2179 void Find_Mutual_Direction(t_node *n1,t_node *n2,short int *dir_n1_to_n2,short int *dir_n2_to_n1);
2180 void Update_Dir_To_Tips(t_node *a,t_node *d,t_tree *tree);
2181 void Fill_Dir_Table(t_tree *tree);
2182 int Get_Subtree_Size(t_node *a,t_node *d);
2183 void Init_Eigen_Struct(eigen *this);
2184 phydbl Triple_Dist(t_node *a,t_tree *tree);
2185 phydbl Triple_Dist_Approx(t_node *a, t_edge *b, t_tree *tree);
2186 void Make_Symmetric(phydbl **F,int size);
2187 void Divide_Mat_By_Vect(phydbl **F,phydbl *vect,int size);
2188 void Found_In_Subtree(t_node *a,t_node *d,t_node *target,int *match,t_tree *tree);
2189 void Get_List_Of_Target_Edges(t_node *a,t_node *d,t_edge **list,int *list_size,t_tree *tree);
2190 void Fix_All(t_tree *tree);
2191 void Record_Br_Len(t_tree *tree);
2192 void Restore_Br_Len(t_tree *tree);
2193 void Get_Dist_Btw_Edges(t_node *a,t_node *d,t_tree *tree);
2194 void Detect_Polytomies(t_edge *b,phydbl l_thresh,t_tree *tree);
2195 void Get_List_Of_Nodes_In_Polytomy(t_node *a,t_node *d,t_node ***list,int *size_list);
2196 void Check_Path(t_node *a,t_node *d,t_node *target,t_tree *tree);
2197 void Connect_Two_Nodes(t_node *a,t_node *d);
2198 void Get_List_Of_Adjacent_Targets(t_node *a, t_node *d, t_node ***node_list, t_edge ***edge_list, int *list_size, int curr_depth, int max_depth);
2199 void Sort_List_Of_Adjacent_Targets(t_edge ***list,int list_size);
2200 t_node *Common_Nodes_Btw_Two_Edges(t_edge *a,t_edge *b);
2201 int KH_Test(phydbl *site_lk_M1,phydbl *site_lk_M2,t_tree *tree);
2202 void Random_Tree(t_tree *tree);
2203 void Reorganize_Edges_Given_Lk_Struct(t_tree *tree);
2204 void Random_NNI(int n_moves,t_tree *tree);
2205 void Fill_Missing_Dist(matrix *mat);
2206 void Fill_Missing_Dist_XY(int x,int y,matrix *mat);
2207 phydbl Least_Square_Missing_Dist_XY(int x,int y,phydbl dxy,matrix *mat);
2208 void Check_Memory_Amount(t_tree *tree);
2209 int Get_State_From_P_Lk(phydbl *p_lk,int pos,t_tree *tree);
2210 int Get_State_From_P_Pars(short int *p_pars,int pos,t_tree *tree);
2211 void Check_Dirs(t_tree *tree);
2212 void Warn_And_Exit(const char *s);
2213 void Randomize_Sequence_Order(calign *cdata);
2214 void Update_Root_Pos(t_tree *tree);
2215 void Add_Root(t_edge *target,t_tree *tree);
2216 void Update_Ancestors(t_node *a,t_node *d,t_tree *tree);
2217 #if (defined PHYTIME || defined SERGEII)
2218 t_tree *Generate_Random_Tree_From_Scratch(int n_otu,int rooted);
2219 #endif
2220 void Random_Lineage_Rates(t_node *a,t_node *d,t_edge *b,phydbl stick_prob,phydbl *rates,int curr_rate,int n_rates,t_tree *tree);
2221 t_edge *Find_Edge_With_Label(char *label,t_tree *tree);
2222 void Evolve(calign *data, t_mod *mod, int first_site_pos, t_tree *tree);
2223 int Pick_State(int n,phydbl *prob);
2224 void Evolve_Recur(t_node *a,t_node *d,t_edge *b,int a_state,int r_class,int site_num,calign *gen_data,t_mod *mod,t_tree *tree);
2225 void Site_Diversity(t_tree *tree);
2226 void Site_Diversity_Post(t_node *a,t_node *d,t_edge *b,t_tree *tree);
2227 void Site_Diversity_Pre(t_node *a,t_node *d,t_edge *b,t_tree *tree);
2228 void Subtree_Union(t_node *n,t_edge *b_fcus,t_tree *tree);
2229 void Binary_Decomposition(int value,int *bit_vect,int size);
2230 void Print_Diversity_Header(FILE *fp,t_tree *tree);
2231 void Best_Of_NNI_And_SPR(t_tree *tree);
2232 int Polint(phydbl *xa,phydbl *ya,int n,phydbl x,phydbl *y,phydbl *dy);
2233 t_tree *Dist_And_BioNJ(calign *cdata,t_mod *mod,option *io);
2234 void Add_BioNJ_Branch_Lengths(t_tree *tree, calign *cdata, t_mod *mod, matrix *mat);
2235 char *Bootstrap_From_String(char *s_tree,calign *cdata,t_mod *mod,option *io);
2236 char *aLRT_From_String(char *s_tree,calign *cdata,t_mod *mod,option *io);
2237 void Find_Common_Tips(t_tree *tree1,t_tree *tree2);
2238 phydbl Get_Tree_Size(t_tree *tree);
2239 void Dist_To_Root_Pre(t_node *a, t_node *d, t_edge *b, t_tree *tree);
2240 void Dist_To_Root(t_tree *tree);
2241 char *Basename(char *path);
2242 t_node *Find_Lca_Pair_Of_Nodes(t_node *n1,t_node *n2,t_tree *tree);
2243 t_node *Find_Lca_Clade(t_node **node_list,int node_list_size,t_tree *tree);
2244 int Get_List_Of_Ancestors(t_node *ref_node,t_node **list,int *size,t_tree *tree);
2245 int Edge_Num_To_Node_Num(int edge_num,t_tree *tree);
2246 void Branch_Lengths_To_Rate_Lengths(t_tree *tree);
2247 void Branch_Lengths_To_Rate_Lengths_Pre(t_node *a,t_node *d,t_tree *tree);
2248 int Find_Clade(char **tax_name_list,int list_size,t_tree *tree);
2249 void Find_Clade_Pre(t_node *a,t_node *d,int *tax_num_list,int list_size,int *num,t_tree *tree);
2250 t_edge *Find_Root_Edge(FILE *fp_input_tree,t_tree *tree);
2251 void Copy_Tree_Topology_With_Labels(t_tree *ori,t_tree *cpy);
2252 void Set_Model_Name(t_mod *mod);
2253 void Adjust_Min_Diff_Lk(t_tree *tree);
2254 void Translate_Tax_Names(char **tax_names,t_tree *tree);
2255 void Skip_Comment(FILE *fp);
2256 void Get_Best_Root_Position(t_tree *tree);
2257 void Get_Best_Root_Position_Post(t_node *a,t_node *d,int *has_outgrp,t_tree *tree);
2258 void Get_Best_Root_Position_Pre(t_node *a,t_node *d,t_tree *tree);
2259 void Get_OutIn_Scores(t_node *a,t_node *d);
2260 int Check_Sequence_Name(char *s);
2261 int Scale_Subtree_Height(t_node *a,phydbl K,phydbl floor,int *n_nodes,t_tree *tree);
2262 void Scale_Node_Heights_Post(t_node *a,t_node *d,phydbl K,phydbl floor,int *n_nodes,t_tree *tree);
2263 int Scale_Subtree_Rates(t_node *a,phydbl mult,int *n_nodes,t_tree *tree);
2264 void Check_Br_Len_Bounds(t_tree *tree);
2265 int Scale_Subtree_Rates_Post(t_node *a,t_node *d,phydbl mult,int *n_nodes,t_tree *tree);
2266 void Get_Node_Ranks(t_tree *tree);
2267 void Get_Node_Ranks_Pre(t_node *a,t_node *d,t_tree *tree);
2268 void Log_Br_Len(t_tree *tree);
2269 phydbl Diff_Lk_Norm_At_Given_Edge(t_edge *b,t_tree *tree);
2270 void Adjust_Variances(t_tree *tree);
2271 phydbl Effective_Sample_Size(phydbl first_val,phydbl last_val,phydbl sum,phydbl sumsq,phydbl sumcurnext,int n);
2272 void Rescale_Free_Rate_Tree(t_tree *tree);
2273 phydbl Rescale_Br_Len_Multiplier_Tree(t_tree *tree);
2274 phydbl Unscale_Br_Len_Multiplier_Tree(t_tree *tree);
2275 phydbl Reflect(phydbl x,phydbl l,phydbl u);
2276 int Are_Equal(phydbl a,phydbl b,phydbl eps);
2277 int Check_Topo_Constraints(t_tree *big_tree,t_tree *small_tree);
2278 void Prune_Tree(t_tree *big_tree,t_tree *small_tree);
2279 void Match_Nodes_In_Small_Tree(t_tree *small_tree,t_tree *big_tree);
2280 void Find_Surviving_Edges_In_Small_Tree(t_tree *small_tree,t_tree *big_tree);
2281 void Find_Surviving_Edges_In_Small_Tree_Post(t_node *a,t_node *d,t_tree *small_tree,t_tree *big_tree);
2282 void Set_Taxa_Id_Ranking(t_tree *tree);
2283 void Get_Edge_Binary_Coding_Number(t_tree *tree);
2284 void Make_Ancestral_Seq(t_tree *tree);
2285 void Make_MutMap(t_tree *tree);
2286 int Get_Mutmap_Val(int edge,int site,int mut,t_tree *tree);
2287 void Get_Mutmap_Coord(int idx,int *edge,int *site,int *mut,t_tree *tree);
2288 void Copy_Edge_Lengths(t_tree *to,t_tree *from);
2289 void Init_Scalar_Dbl(scalar_dbl *p);
2290 void Init_Scalar_Int(scalar_int *p);
2291 void Init_Vect_Dbl(int len,vect_dbl *p);
2292 void Init_Vect_Int(int len,vect_int *p);
2293 char *To_Lower_String(char *in);
2294 phydbl String_To_Dbl(char *string);
2295 int String_To_Int(char *string);
2296 char *To_Upper_String(char *in);
2297 void Connect_CSeqs_To_Nodes(calign *cdata, option *io, t_tree *tree);
2298 void Joint_Proba_States_Left_Right(phydbl *Pij, phydbl *p_lk_left, phydbl *p_lk_rght,
2299 vect_dbl *pi, int scale_left, int scale_rght,
2300 phydbl *F, int n, int site, t_tree *tree);
2301 void Set_Both_Sides(int yesno, t_tree *tree);
2302 void Set_D_States(calign *data, int datatype, int stepsize);
2303 void Path_Length(t_node *dep, t_node *arr, phydbl *len, t_tree *tree);
2304 phydbl *Dist_Btw_Tips(t_tree *tree);
2305 void Best_Root_Position_IL_Model(t_tree *tree);
2306 void Set_Br_Len_Var(t_edge *b, t_tree *tree);
2307 void Check_Br_Lens(t_tree *tree);
2308 void Calculate_Number_Of_Diff_States_Post(t_node *a, t_node *d, t_edge *b, t_tree *tree);
2309 void Calculate_Number_Of_Diff_States_Pre(t_node *a, t_node *d, t_edge *b, t_tree *tree);
2310 void Calculate_Number_Of_Diff_States_Core(t_node *a, t_node *d, t_edge *b, t_tree *tree);
2311 void Calculate_Number_Of_Diff_States(t_tree *tree);
2312 void Build_Distrib_Number_Of_Diff_States_Under_Model(t_tree *tree);
2313 int Number_Of_Diff_States_One_Site(int site, t_tree *tree);
2314 void Number_Of_Diff_States_One_Site_Post(t_node *a, t_node *d, t_edge *b, int site, t_tree *tree);
2315 int Number_Of_Diff_States_One_Site_Core(t_node *a, t_node *d, t_edge *b, int site, t_tree *tree);
2316 phydbl Get_Lk(t_tree *tree);
2317 phydbl Get_d2Lk(t_tree *tree);
2318 phydbl Get_dLk(t_tree *tree);
2319 align **Make_Empty_Alignment(option *io);
2320 void Connect_Edges_To_Nodes_Serial(t_tree *tree);
2321 phydbl Mean_Identity(calign *data);
2322 phydbl Pairwise_Identity(int i, int j, calign *data);
2323 phydbl Fst(int i, int j, calign *data);
2324 phydbl Nucleotide_Diversity(calign *data);
2325 void Swap_Partial_Lk(t_edge *a, t_edge *b, int side_a, int side_b, t_tree *tree);
2326 scalar_dbl **Copy_Br_Len_Var(t_tree *mixt_tree);
2327 scalar_dbl **Copy_Br_Len(t_tree *mixt_tree);
2328 void Transfer_Br_Len_To_Tree(scalar_dbl **bl, t_tree *tree);
2329 void Copy_Scalar_Dbl(scalar_dbl *from, scalar_dbl *to);
2330 scalar_dbl *Duplicate_Scalar_Dbl(scalar_dbl *from);
2331 scalar_dbl *Read_Weights(option *io);
2332 phydbl Scalar_Elem(int pos, scalar_dbl *scl);
2333 int Scalar_Len(scalar_dbl *scl);
2334 void Set_Scalar_Dbl(phydbl val, scalar_dbl *from);
2335 void Set_Scalar_Dbl_Min_Thresh(phydbl thresh, scalar_dbl *from);
2336 void Set_Scalar_Dbl_Max_Thresh(phydbl thresh, scalar_dbl *from);
2337 void List_Of_Regraft_Nodes(t_node *a, t_node *d, phydbl time_thresh, t_ll *list, t_tree *tree);
2338 void Push_Bottom_Linked_List(void *what, t_ll **list, bool remove_duplicates);
2339 void Remove_From_Linked_List(t_ll *elem, void *val, t_ll **list);
2340 int Linked_List_Len(t_ll *list);
2341 void *Linked_List_Elem(int pos, t_ll *ll);
2342 void Randomize_Tree(t_tree *tree, int n_prune_regraft);
2343 t_ll *Get_List_Of_Reachable_Tips(t_node *a, t_node *d, t_tree *tree);
2344 void Get_List_Of_Reachable_Tips_Post(t_node *a, t_node *d, t_ll **list, t_tree *tree);
2345 phydbl Length_Of_Path_Between_List_Of_Tips(t_ll *tips0, t_ll *tips1, matrix *mat);
2346 void Set_Update_Eigen_Lr(int yn, t_tree *tree);
2347 void Set_Use_Eigen_Lr(int yn, t_tree *tree);
2348 void Random_Walk_Along_Tree_On_Radius(t_node *a, t_node *d, t_edge *b, phydbl *radius, t_edge **target_edge, t_node **target_nd, phydbl *target_time, t_tree *tree);
2349 void Table_Top(unsigned int width);
2350 void Table_Row(unsigned int width);
2351 void Table_Bottom(unsigned int width);
2352 t_cal *Duplicate_Calib(t_cal *from);
2353 t_clad *Duplicate_Clade(t_clad *from);
2354 void Swap_Partial_Lk_Extra(t_edge *b, t_node *d, int whichone, t_tree *tree);
2355 void Remove_Duplicates(calign *data, option *io, t_tree *tree);
2356 short int Are_Sequences_Identical(align *seq1, align *seq2);
2357 char *Mutation_Id(int mut_idx, t_tree *tree);
2358 void Random_Tax_Idx(t_node *a, t_node *d, int *idx, t_tree *tree);
2359 void List_Taxa_In_Clade(t_node *a, t_node *d, t_tree *tree);
2360 void Alias_Subpatt_Pre(t_node *a, t_node *d, t_tree *tree);
2361 void Alias_Subpatt_Post(t_node *a, t_node *d, t_tree *tree);
2362 void Alias_One_Subpatt(t_node *a, t_node *d, t_tree *tree);
2363 void Alias_Subpatt(t_tree *tree);
2364 void Map_Mutations(t_node *a, t_node *d, int sa, int sd, t_edge *b, int site, int rcat, int *muttype, phydbl *muttime, int *muttax, int *n_mut, t_tree *tree);
2365 void Set_Update_Eigen(int yesno, t_mod *mod);
2366 int *Order_Int(const int *u, const int n);
2367 int *Order_Dbl(const phydbl *u, const int n);
2368 char Integer_To_IUPAC_Code(int x);
2369 void Shuffle_Sites(const phydbl prop, align **data, const int n_otu);
2370 void Multiply_Scalar_Dbl(phydbl mult, scalar_dbl *x);
2371 void Insert_Duplicates(t_tree *tree);
2372 void Get_Node_Ranks_From_Dist_To_Root(t_tree *tree);
2373 void Get_Node_Ranks_From_Times(t_tree *tree);
2374 void Get_Node_Ranks_From_Tip_Times(t_tree *tree);
2375 phydbl Tree_Height(t_tree *tree);
2376 void Post_Inflate_Times_To_Get_Reasonnable_Edge_Lengths(t_node *a, t_node *d, t_edge *b, phydbl min_l, t_tree *tree);
2377 void Inflate_Times_To_Get_Reasonnable_Edge_Lengths(phydbl min_l, t_tree *tree);
2378 void Refactor_Tree(t_tree *tree);
2379 void Refactor_External(t_node *a, t_node *d, int *idx, t_tree *tree);
2380 void Refactor_Internal(t_node *a, t_node *d, t_edge *b, int *idx_nd, int *idx_br, t_tree *tree);
2381 int *Integer_To_Bit(int val, const int ns);
2382 char *Bit_To_Character_String(int *bit, int ns);
2383 t_tree *Duplicate_Tree(t_tree *ori);
2384 matrix *K80_dist(calign *data, phydbl g_shape);
2385 matrix *JC69_Dist(calign *data, t_mod *mod);
2386 matrix *Hamming_Dist(calign *data, t_mod *mod);
2387 phydbl Haversine_Distance(phydbl lon1, phydbl lat1, phydbl lon2, phydbl lat2);
2388 phydbl Tree_Length(t_tree *tree);
2389 void Remove_Duplicates_From_Tree(calign *data, t_tree *tree);
2390
2391
2392 #include "xml.h"
2393 #include "free.h"
2394 #include "spr.h"
2395 #include "lk.h"
2396 #include "optimiz.h"
2397 #include "models.h"
2398 #include "bionj.h"
2399 #include "simu.h"
2400 #include "eigen.h"
2401 #include "pars.h"
2402 #include "alrt.h"
2403 #include "stats.h"
2404 #include "help.h"
2405 #include "io.h"
2406 #include "make.h"
2407 #include "nexus.h"
2408 #include "init.h"
2409 #include "mcmc.h"
2410 #include "ancestral.h"
2411
2412 #ifdef GEO
2413 #include "geo.h"
2414 #endif
2415
2416 #ifdef PHYREX
2417 #include "phyrex.h"
2418 #include "rw.h"
2419 #include "rrw.h"
2420 #include "slfv.h"
2421 #endif
2422
2423 #ifdef MPI
2424 #include "mpi_boot.h"
2425 #endif
2426
2427 #ifdef MG
2428 #include "mg.h"
2429 #endif
2430
2431 #ifdef TIME
2432 #include "times.h"
2433 #include "rates.h"
2434 #endif
2435
2436 #ifdef _NOT_NEEDED_A_PRIORI
2437 #include "m4.h"
2438 #endif
2439
2440
2441 #endif
2442