1 /**
2 * PLL (version 1.0.0) a software library for phylogenetic inference
3 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4 *
5 * Derived from
6 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7 * trees by Alexandros Stamatakis
8 *
9 * This program is free software: you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program. If not, see <http://www.gnu.org/licenses/>.
21 *
22 * For any other enquiries send an Email to Tomas Flouri
23 * Tomas.Flouri@h-its.org
24 *
25 * When publishing work that uses PLL please cite PLL
26 *
27 * @file utils.c
28 *
29 * @brief Miscellaneous general utility and helper functions
30 */
31 #ifdef WIN32
32 #include <direct.h>
33 #endif
34
35 #ifndef WIN32
36 #include <sys/times.h>
37 #include <sys/types.h>
38 #include <sys/time.h>
39 #include <unistd.h>
40 #endif
41
42 #include <math.h>
43 #include <time.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 #include <ctype.h>
47 #include <string.h>
48 #include <stdarg.h>
49 #include <limits.h>
50 #include <assert.h>
51 #include <errno.h>
52 #include "cycle.h"
53
54
55 #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC))
56 #if (defined(__AVX) || defined(__SSE3))
57 #include <xmmintrin.h>
58 #endif
59 /*
60 special bug fix, enforces denormalized numbers to be flushed to zero,
61 without this program is a tiny bit faster though.
62 #include <emmintrin.h>
63 #define MM_DAZ_MASK 0x0040
64 #define MM_DAZ_ON 0x0040
65 #define MM_DAZ_OFF 0x0000
66 */
67 #endif
68
69 #include "pll.h"
70 #include "pllInternal.h"
71
72 #define GLOBAL_VARIABLES_DEFINITION
73
74 #include "globalVariables.h"
75
76 /* mappings of BIN/DNA/AA alphabet to numbers */
77
78 static const char PLL_MAP_BIN[256] =
79 {
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
82 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3, -1, -1,
83 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3,
84 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
85 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
86 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
87 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
88 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
89 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
90 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
91 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
93 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
95 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
96 };
97
98 static const char PLL_MAP_NT[256] =
99 {
100 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
101 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
102 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15, -1, -1,
103 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 15,
104 -1, 1, 14, 2, 13, -1, -1, 4, 11, -1, -1, 12, -1, 3, 15, 15,
105 -1, -1, 5, 6, 8, 8, 7, 9, 15, 10, -1, -1, -1, -1, -1, -1,
106 -1, 1, 14, 2, 13, -1, -1, 4, 11, -1, -1, 12, -1, 3, 15, 15,
107 -1, -1, 5, 6, 8, 8, 7, 9, 15, 10, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
109 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
110 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
111 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
112 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
113 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
114 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
116 };
117
118 static const char PLL_MAP_AA[256] =
119 {
120 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
121 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
122 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22, -1, -1, 22, -1, -1,
123 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 22,
124 -1, 0, 20, 4, 3, 6, 13, 7, 8, 9, -1, 11, 10, 12, 2, -1,
125 14, 5, 1, 15, 16, -1, 19, 17, 22, 18, 21, -1, -1, -1, -1, -1,
126 -1, 0, 20, 4, 3, 6, 13, 7, 8, 9, -1, 11, 10, 12, 2, -1,
127 14, 5, 1, 15, 16, -1, 19, 17, 22, 18, 21, -1, -1, -1, -1, -1,
128 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
129 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
130 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
131 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
132 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
133 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
134 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
135 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
136 };
137
138
139
140
141
142 static void pllTreeInitDefaults (pllInstance * tr, int tips);
143 static void getInnerBranchEndPointsRecursive (nodeptr p, int tips, int * i, node **nodes);
144 #if (!defined(_FINE_GRAIN_MPI) && !defined(_USE_PTHREADS))
145 static void initializePartitionsSequential(pllInstance *tr, partitionList *pr);
146 #endif
147
148 /** @defgroup instanceLinkingGroup Linking topology, partition scheme and alignment to the PLL instance
149
150 This set of functions handles the linking of topology, partition scheme and multiple sequence alignment
151 with the PLL instance
152 */
153 /***************** UTILITY FUNCTIONS **************************/
154
my_strndup(const char * s,size_t n)155 char *my_strndup(const char *s, size_t n) {
156 char *ret = (char *) rax_malloc(n+1);
157 strncpy(ret, s, n);
158 ret[n] = 0;
159 return ret;
160 }
161
162 #ifndef HAVE_STRTOK_R
strtok_r(char * s,const char * delim,char ** save_ptr)163 char *strtok_r (char * s, const char * delim, char **save_ptr)
164 {
165 char *token;
166
167 /* Scan leading delimiters */
168 if (s == NULL)
169 s = *save_ptr;
170
171 s += strspn (s, delim);
172 if (*s == '\0')
173 {
174 *save_ptr = s;
175 return NULL;
176 }
177
178 /* Find the end of the token. */
179 token = s;
180 s = strpbrk (token, delim);
181 if (!s)
182 *save_ptr = strchr (token, '\0');
183 else
184 {
185 /* Terminate the token and make *SAVE_PTR point past it */
186 *s = '\0';
187 *save_ptr = s + 1;
188 }
189
190 return token;
191 }
192 #endif
193
194
storeExecuteMaskInTraversalDescriptor(pllInstance * tr,partitionList * pr)195 void storeExecuteMaskInTraversalDescriptor(pllInstance *tr, partitionList *pr)
196 {
197 int model;
198
199 for(model = 0; model < pr->numberOfPartitions; model++)
200 tr->td[0].executeModel[model] = pr->partitionData[model]->executeModel;
201
202 }
203
storeValuesInTraversalDescriptor(pllInstance * tr,partitionList * pr,double * value)204 void storeValuesInTraversalDescriptor(pllInstance *tr, partitionList *pr, double *value)
205 {
206 int model;
207
208 for(model = 0; model < pr->numberOfPartitions; model++)
209 tr->td[0].parameterValues[model] = value[model];
210 }
211
getBitVector(int dataType)212 const unsigned int *getBitVector(int dataType)
213 {
214 assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
215
216 return pLengths[dataType].bitVector;
217 }
218
219 /*
220 int getStates(int dataType)
221 {
222 assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
223
224 return pLengths[dataType].states;
225 }
226 */
227
getUndetermined(int dataType)228 int getUndetermined(int dataType)
229 {
230 assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
231
232 return pLengths[dataType].undetermined;
233 }
234
getPartitionLengths(pInfo * p)235 const partitionLengths *getPartitionLengths(pInfo *p)
236 {
237 int
238 dataType = p->dataType,
239 states = p->states,
240 tipLength = p->maxTipStates;
241
242 assert(states != -1 && tipLength != -1);
243
244 assert(PLL_MIN_MODEL < dataType && dataType < PLL_MAX_MODEL);
245
246 /*pLength.leftLength = pLength.rightLength = states * states;
247 pLength.eignLength = states;
248 pLength.evLength = states * states;
249 pLength.eiLength = states * states;
250 pLength.substRatesLength = (states * states - states) / 2;
251 pLength.frequenciesLength = states;
252 pLength.tipVectorLength = tipLength * states;
253 pLength.symmetryVectorLength = (states * states - states) / 2;
254 pLength.frequencyGroupingLength = states;
255 pLength.nonGTR = PLL_FALSE;*/
256 return (&pLengths[dataType]);
257 }
258
discreteRateCategories(int rateHetModel)259 size_t discreteRateCategories(int rateHetModel)
260 {
261 size_t
262 result;
263
264 switch(rateHetModel)
265 {
266 case PLL_CAT:
267 result = 1;
268 break;
269 case PLL_GAMMA:
270 result = 4;
271 break;
272 default:
273 assert(0);
274 }
275
276 return result;
277 }
278
279
280
gettime(void)281 double gettime(void)
282 {
283 #ifdef WIN32
284 time_t tp;
285 struct tm localtm;
286 tp = time(NULL);
287 localtm = *localtime(&tp);
288 return 60.0*localtm.tm_min + localtm.tm_sec;
289 #else
290 struct timeval ttime;
291 gettimeofday(&ttime , NULL);
292 return ttime.tv_sec + ttime.tv_usec * 0.000001;
293 #endif
294 }
295
gettimeSrand(void)296 int gettimeSrand(void)
297 {
298 #ifdef WIN32
299 time_t tp;
300 struct tm localtm;
301 tp = time(NULL);
302 localtm = *localtime(&tp);
303 return 24*60*60*localtm.tm_yday + 60*60*localtm.tm_hour + 60*localtm.tm_min + localtm.tm_sec;
304 #else
305 struct timeval ttime;
306 gettimeofday(&ttime , NULL);
307 return ttime.tv_sec + ttime.tv_usec;
308 #endif
309 }
310
randum(long * seed)311 double randum (long *seed)
312 {
313 long sum, mult0, mult1, seed0, seed1, seed2, newseed0, newseed1, newseed2;
314 double res;
315
316 mult0 = 1549;
317 seed0 = *seed & 4095;
318 sum = mult0 * seed0;
319 newseed0 = sum & 4095;
320 sum >>= 12;
321 seed1 = (*seed >> 12) & 4095;
322 mult1 = 406;
323 sum += mult0 * seed1 + mult1 * seed0;
324 newseed1 = sum & 4095;
325 sum >>= 12;
326 seed2 = (*seed >> 24) & 255;
327 sum += mult0 * seed2 + mult1 * seed1;
328 newseed2 = sum & 255;
329
330 *seed = newseed2 << 24 | newseed1 << 12 | newseed0;
331 res = 0.00390625 * (newseed2 + 0.000244140625 * (newseed1 + 0.000244140625 * newseed0));
332
333 return res;
334 }
335
336
337 /********************* END UTILITY FUNCTIONS ********************/
338
339
340 /******************************some functions for the likelihood computation ****************************/
341
342
343 /** @brief Check whether a node is a tip.
344
345 Checks whether the node with number \a number is a tip.
346
347 @param number
348 Node number to be checked
349
350 @param maxTips
351 Number of tips in the tree
352
353 @return
354 \b PLL_TRUE if tip, \b PLL_FALSE otherwise
355 */
isTip(int number,int maxTips)356 pllBoolean isTip(int number, int maxTips)
357 {
358 assert(number > 0);
359
360 if(number <= maxTips)
361 return PLL_TRUE;
362 else
363 return PLL_FALSE;
364 }
365
366 /** @brief Set the orientation of a node
367
368 Sets the orientation of node \a p. That means, it will reset the orientation
369 \a p->next->x and \a p->next->next->x to 0 and of \a p->x to 1, meaning that
370 the conditional likelihood vector for that node is oriented on \a p, i.e.
371 the conditional likelihood vector represents the subtree rooted at \a p and
372 not any other of the two nodes.
373
374 @param p
375 Node which we want to orient
376 */
getxnode(nodeptr p)377 void getxnode (nodeptr p)
378 {
379 nodeptr s;
380
381 if ((s = p->next)->x || (s = s->next)->x)
382 {
383 p->x = s->x;
384 s->x = 0;
385 }
386
387 assert(p->x);
388 }
389
390
391 /** @brief Connect two nodes and assign branch lengths
392 *
393 * Connect the two nodes \a p and \a q in each partition \e i with a branch of
394 * length \a z[i]
395 *
396 * @param p
397 * Node \a p
398 *
399 * @param q
400 * Node \a q
401 *
402 * @param numBranches
403 * Number of partitions
404 */
hookup(nodeptr p,nodeptr q,double * z,int numBranches)405 void hookup (nodeptr p, nodeptr q, double *z, int numBranches)
406 {
407 int i;
408
409 p->back = q;
410 q->back = p;
411
412 for(i = 0; i < numBranches; i++)
413 p->z[i] = q->z[i] = z[i];
414 }
415
416 /* connects node p with q and assigns the branch lengths z for the whole vector*/
hookupFull(nodeptr p,nodeptr q,double * z)417 void hookupFull (nodeptr p, nodeptr q, double *z)
418 {
419 //int i;
420
421 p->back = q;
422 q->back = p;
423
424 memcpy(p->z, z, PLL_NUM_BRANCHES*sizeof(double) );
425 memcpy(q->z, z, PLL_NUM_BRANCHES*sizeof(double) );
426 //for(i = 0; i < numBranches; i++)
427 // p->z[i] = q->z[i] = z[i];
428
429 }
430
431 /* connect node p with q and assign the default branch lengths */
hookupDefault(nodeptr p,nodeptr q)432 void hookupDefault (nodeptr p, nodeptr q)
433 {
434 int i;
435
436 p->back = q;
437 q->back = p;
438
439 // TODO: fix: this make parsimony tree computation very slow with increasing PLL_NUM_BRANCHES
440 // for(i = 0; i < PLL_NUM_BRANCHES; i++)
441 // p->z[i] = q->z[i] = PLL_DEFAULTZ;
442 p->z[0] = q->z[0] = PLL_DEFAULTZ;
443 }
444
445
446 /***********************reading and initializing input ******************/
447
448
449
whitechar(int ch)450 pllBoolean whitechar (int ch)
451 {
452 return (ch == ' ' || ch == '\n' || ch == '\t' || ch == '\r');
453 }
454 /*
455 static unsigned int KISS32(void)
456 {
457 static unsigned int
458 x = 123456789,
459 y = 362436069,
460 z = 21288629,
461 w = 14921776,
462 c = 0;
463
464 unsigned int t;
465
466 x += 545925293;
467 y ^= (y<<13);
468 y ^= (y>>17);
469 y ^= (y<<5);
470 t = z + w + c;
471 z = w;
472 c = (t>>31);
473 w = t & 2147483647;
474
475 return (x+y+w);
476 }
477 */
478
479 /** @brief Get a random subtree
480
481 Returns the root node of a randomly picked subtree of the tree in PLL
482 instance \a tr. The picked subtree is guaranteed to have height over
483 1, that is, the direct descendents of the returned (root) node are not tips.
484
485 @param tr
486 PLL instance
487
488 @return
489 The root node of the randomly picked subtree
490 */
pllGetRandomSubtree(pllInstance * tr)491 nodeptr pllGetRandomSubtree(pllInstance *tr)
492 {
493 nodeptr p;
494 do
495 {
496 int exitDirection = rand() % 3;
497 p = tr->nodep[(rand() % (tr->mxtips - 2)) + 1 + tr->mxtips];
498 switch(exitDirection)
499 {
500 case 0:
501 break;
502 case 1:
503 p = p->next;
504 break;
505 case 2:
506 p = p->next->next;
507 break;
508 default:
509 assert(0);
510 }
511 }
512 while(isTip(p->next->back->number, tr->mxtips) && isTip(p->next->next->back->number, tr->mxtips));
513 assert(!isTip(p->number, tr->mxtips));
514 return p;
515 }
516 /* small example program that executes ancestral state computations
517 on the entire subtree rooted at p.
518
519 Note that this is a post-order traversal.
520 */
521
522
computeAllAncestralVectors(nodeptr p,pllInstance * tr,partitionList * pr)523 void computeAllAncestralVectors(nodeptr p, pllInstance *tr, partitionList *pr)
524 {
525 /* if this is not a tip, for which evidently it does not make sense
526 to compute the ancestral sequence because we have the real one ....
527 */
528
529 if(!isTip(p->number, tr->mxtips))
530 {
531 /* descend recursively to compute the ancestral states in the left and right subtrees */
532
533 computeAllAncestralVectors(p->next->back, tr, pr);
534 computeAllAncestralVectors(p->next->next->back, tr, pr);
535
536 /* then compute the ancestral state at node p */
537
538 pllUpdatePartialsAncestral(tr, pr, p);
539
540 /* and print it to terminal, the two booleans that are set to PLL_TRUE here
541 tell the function to print the marginal probabilities as well as
542 a discrete inner sequence, that is, ACGT etc., always selecting and printing
543 the state that has the highest probability */
544
545 printAncestralState(p, PLL_TRUE, PLL_TRUE, tr, pr);
546 }
547 }
548
549
550
initializePartitionData(pllInstance * localTree,partitionList * localPartitions)551 void initializePartitionData(pllInstance *localTree, partitionList * localPartitions)
552 {
553 /* in ancestralVectorWidth we store the total length in bytes (!) of
554 one conditional likelihood array !
555 we need to know this length such that in the pthreads version the master thread can actually
556 gather the scattered ancestral probabilities from the threads such that they can be printed to screen!
557 */
558
559 size_t
560 maxCategories = (size_t)localTree->maxCategories;
561
562 size_t
563 ancestralVectorWidth = 0,
564 model;
565
566 int
567 tid = localTree->threadID,
568 innerNodes = localTree->mxtips - 2;
569
570 if(tid > 0)
571 localTree->rateCategory = (int *) rax_calloc((size_t)localTree->originalCrunchedLength, sizeof(int));
572
573 for(model = 0; model < (size_t)localPartitions->numberOfPartitions; model++)
574 {
575 size_t
576 width = localPartitions->partitionData[model]->width;
577
578 const partitionLengths
579 *pl = getPartitionLengths(localPartitions->partitionData[model]);
580
581 /*
582 globalScaler needs to be 2 * localTree->mxtips such that scalers of inner AND tip nodes can be added without a case switch
583 to this end, it must also be initialized with zeros -> calloc
584 */
585
586 localPartitions->partitionData[model]->globalScaler = (unsigned int *)rax_calloc(2 *(size_t)localTree->mxtips, sizeof(unsigned int));
587
588 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->left), PLL_BYTE_ALIGNMENT, (size_t)pl->leftLength * (maxCategories + 1) * sizeof(double));
589 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->right), PLL_BYTE_ALIGNMENT, (size_t)pl->rightLength * (maxCategories + 1) * sizeof(double));
590 localPartitions->partitionData[model]->EIGN = (double*)rax_malloc((size_t)pl->eignLength * sizeof(double));
591 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->EV), PLL_BYTE_ALIGNMENT, (size_t)pl->evLength * sizeof(double));
592 localPartitions->partitionData[model]->EI = (double*)rax_malloc((size_t)pl->eiLength * sizeof(double));
593 localPartitions->partitionData[model]->substRates = (double *)rax_malloc((size_t)pl->substRatesLength * sizeof(double));
594 localPartitions->partitionData[model]->frequencies = (double*)rax_malloc((size_t)pl->frequenciesLength * sizeof(double));
595 localPartitions->partitionData[model]->freqExponents = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
596 localPartitions->partitionData[model]->empiricalFrequencies = (double*)rax_malloc((size_t)pl->frequenciesLength * sizeof(double));
597 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->tipVector), PLL_BYTE_ALIGNMENT, (size_t)pl->tipVectorLength * sizeof(double));
598 //localPartitions->partitionData[model]->partitionName = NULL; // very imporatant since it is deallocated in pllPartitionDestroy
599
600 if(localPartitions->partitionData[model]->dataType == PLL_AA_DATA
601 && (localPartitions->partitionData[model]->protModels == PLL_LG4M || localPartitions->partitionData[model]->protModels == PLL_LG4X))
602 {
603 int
604 k;
605
606 for(k = 0; k < 4; k++)
607 {
608 localPartitions->partitionData[model]->EIGN_LG4[k] = (double*)rax_malloc(pl->eignLength * sizeof(double));
609 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->EV_LG4[k]), PLL_BYTE_ALIGNMENT, pl->evLength * sizeof(double));
610 localPartitions->partitionData[model]->EI_LG4[k] = (double*)rax_malloc(pl->eiLength * sizeof(double));
611 localPartitions->partitionData[model]->substRates_LG4[k] = (double *)rax_malloc(pl->substRatesLength * sizeof(double));
612 localPartitions->partitionData[model]->frequencies_LG4[k] = (double*)rax_malloc(pl->frequenciesLength * sizeof(double));
613 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->tipVector_LG4[k]), PLL_BYTE_ALIGNMENT, pl->tipVectorLength * sizeof(double));
614 }
615 }
616
617 localPartitions->partitionData[model]->symmetryVector = (int *)rax_malloc((size_t)pl->symmetryVectorLength * sizeof(int));
618 localPartitions->partitionData[model]->frequencyGrouping = (int *)rax_malloc((size_t)pl->frequencyGroupingLength * sizeof(int));
619
620 localPartitions->partitionData[model]->perSiteRates = (double *)rax_malloc(sizeof(double) * maxCategories);
621
622 localPartitions->partitionData[model]->nonGTR = PLL_FALSE;
623
624 localPartitions->partitionData[model]->gammaRates = (double*)rax_malloc(sizeof(double) * 4);
625 localPartitions->partitionData[model]->yVector = (unsigned char **)rax_malloc(sizeof(unsigned char*) * ((size_t)localTree->mxtips + 1));
626
627
628 localPartitions->partitionData[model]->xVector = (double **)rax_calloc(sizeof(double*), (size_t)localTree->mxtips);
629
630 if (localPartitions->partitionData[model]->ascBias)
631 {
632 localPartitions->partitionData[model]->ascOffset = 4 * localPartitions->partitionData[model]->states * localPartitions->partitionData[model]->states;
633 localPartitions->partitionData[model]->ascVector = (double *)rax_malloc(innerNodes *
634 localPartitions->partitionData[model]->ascOffset *
635 sizeof(double));
636 localPartitions->partitionData[model]->ascExpVector = (int *)rax_calloc(innerNodes *
637 localPartitions->partitionData[model]->states,
638 sizeof(int));
639 localPartitions->partitionData[model]->ascSumBuffer = (double *)rax_malloc(localPartitions->partitionData[model]->ascOffset * sizeof(double));
640 }
641
642
643 /*
644 Initializing the xVector array like this is absolutely required !!!!
645 I don't know which programming genious removed this, but it must absolutely stay in here!!!!
646 */
647
648 {
649 int k;
650
651 for(k = 0; k < localTree->mxtips; k++)
652 localPartitions->partitionData[model]->xVector[k] = (double*)NULL;
653 }
654
655
656 localPartitions->partitionData[model]->xSpaceVector = (size_t *)rax_calloc((size_t)localTree->mxtips, sizeof(size_t));
657
658 const size_t span = (size_t)(localPartitions->partitionData[model]->states) *
659 discreteRateCategories(localTree->rateHetModel);
660
661 #ifdef __MIC_NATIVE
662
663 // Alexey: sum buffer buffer padding for Xeon PHI
664 const int aligned_width = width % PLL_VECTOR_WIDTH == 0 ? width : width + (PLL_VECTOR_WIDTH - (width % PLL_VECTOR_WIDTH));
665
666 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->sumBuffer), PLL_BYTE_ALIGNMENT, aligned_width *
667 span *
668 sizeof(double));
669
670 // Alexey: fill padding entries with 1. (will be corrected with site weights, s. below)
671 {
672 int k;
673 for (k = width*span; k < aligned_width*span; ++k)
674 localPartitions->partitionData[model]->sumBuffer[k] = 1.;
675 }
676
677 #else
678
679 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->sumBuffer), PLL_BYTE_ALIGNMENT, width *
680 span *
681 sizeof(double));
682 #endif
683
684 /* Initialize buffers to store per-site log likelihoods */
685
686 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->perSiteLikelihoods), PLL_BYTE_ALIGNMENT, width * sizeof(double));
687
688 /* initialize data structures for per-site likelihood scaling */
689
690 if(localTree->fastScaling)
691 {
692 localPartitions->partitionData[model]->expVector = (int **)NULL;
693 localPartitions->partitionData[model]->expSpaceVector = (size_t *)NULL;
694 }
695 else
696 {
697 localPartitions->partitionData[model]->expVector = (int **)rax_malloc(sizeof(int*) * innerNodes);
698
699 /*
700 Initializing the expVector array like this is absolutely required !!!!
701 Not doing this can (and did) cause segmentation faults !!!!
702 */
703
704 {
705 int k;
706
707 for(k = 0; k < innerNodes; k++)
708 localPartitions->partitionData[model]->expVector[k] = (int*)NULL;
709 }
710
711 localPartitions->partitionData[model]->expSpaceVector = (size_t *)rax_calloc(innerNodes, sizeof(size_t));
712 }
713
714 /* data structure to store the marginal ancestral probabilities in the sequential version or for each thread */
715
716 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->ancestralBuffer), PLL_BYTE_ALIGNMENT, width *
717 (size_t)(localPartitions->partitionData[model]->states) *
718 sizeof(double));
719
720 /* count and accumulate how many bytes we will need for storing a full ancestral vector. for this we addf over the per-partition space requirements in bytes */
721 /* ancestralVectorWidth += ((size_t)(pr->partitionData[model]->upper - pr->partitionData[model]->lower) * (size_t)(localPartitions->partitionData[model]->states) * sizeof(double)); */
722 ancestralVectorWidth += ((size_t)(localPartitions->partitionData[model]->upper - localPartitions->partitionData[model]->lower) * (size_t)(localPartitions->partitionData[model]->states) * sizeof(double));
723 /* :TODO: do we have to use the original tree for that */
724
725 #ifdef __MIC_NATIVE
726
727 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->wgt), PLL_BYTE_ALIGNMENT, aligned_width * sizeof(int));
728
729 // Alexey: fill padding entries with 0.
730 {
731 int k;
732 for (k = width; k < aligned_width; ++k)
733 localPartitions->partitionData[model]->wgt[k] = 0;
734 }
735 #else
736 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->wgt), PLL_BYTE_ALIGNMENT, width * sizeof(int));
737 #endif
738
739 /* rateCategory must be assigned using rax_calloc() at start up there is only one rate category 0 for all sites */
740
741 localPartitions->partitionData[model]->rateCategory = (int *)rax_calloc(width, sizeof(int));
742
743 if(width > 0 && localTree->saveMemory)
744 {
745 localPartitions->partitionData[model]->gapVectorLength = ((int)width / 32) + 1;
746 assert(4 == sizeof(unsigned int));
747 localPartitions->partitionData[model]->gapVector = (unsigned int*)rax_calloc((size_t)localPartitions->partitionData[model]->gapVectorLength * 2 * (size_t)localTree->mxtips, sizeof(unsigned int));
748 rax_posix_memalign ((void **)&(localPartitions->partitionData[model]->gapColumn),PLL_BYTE_ALIGNMENT, ((size_t)localTree->mxtips) *
749 ((size_t)(localPartitions->partitionData[model]->states)) *
750 discreteRateCategories(localTree->rateHetModel) * sizeof(double));
751 }
752 else
753 {
754 localPartitions->partitionData[model]->gapVectorLength = 0;
755 localPartitions->partitionData[model]->gapVector = (unsigned int*)NULL;
756 localPartitions->partitionData[model]->gapColumn = (double*)NULL;
757 }
758 }
759 }
760
virtual_width(int n)761 int virtual_width( int n ) {
762 const int global_vw = 2;
763 return (n+1) / global_vw * global_vw;
764 }
765
766
initMemorySavingAndRecom(pllInstance * tr,partitionList * pr)767 void initMemorySavingAndRecom(pllInstance *tr, partitionList *pr)
768 {
769 pllInstance
770 *localTree = tr;
771 partitionList
772 *localPartitions = pr;
773 size_t model;
774
775 /* initialize gap bit vectors at tips when memory saving option is enabled */
776
777 if(localTree->saveMemory)
778 {
779 for(model = 0; model < (size_t)localPartitions->numberOfPartitions; model++)
780 {
781 int
782 undetermined = getUndetermined(localPartitions->partitionData[model]->dataType);
783
784 size_t
785 i,
786 j,
787 width = localPartitions->partitionData[model]->width;
788
789 if(width > 0)
790 {
791 for(j = 1; j <= (size_t)(localTree->mxtips); j++)
792 for(i = 0; i < width; i++)
793 if(localPartitions->partitionData[model]->yVector[j][i] == undetermined)
794 localPartitions->partitionData[model]->gapVector[localPartitions->partitionData[model]->gapVectorLength * j + i / 32] |= mask32[i % 32];
795 }
796 }
797 }
798 /* recom */
799 if(localTree->useRecom)
800 allocRecompVectorsInfo(localTree);
801 else
802 localTree->rvec = (recompVectors*)NULL;
803 /* E recom */
804 }
805
806 /** @brief Get the length of a specific branch
807
808 Get the length of the branch specified by node \a p and \a p->back
809 of partition \a partition_id.
810 The branch length is decoded from the PLL representation.
811
812 @param tr
813 PLL instance
814
815 @param p
816 Specifies one end-point of the branch. The other one is \a p->back
817
818 @param partition_id
819 Specifies the partition
820
821 @return
822 The branch length
823 */
pllGetBranchLength(pllInstance * tr,nodeptr p,int partition_id)824 double pllGetBranchLength (pllInstance *tr, nodeptr p, int partition_id)
825 {
826 //assert(partition_id < tr->numBranches);
827 assert(partition_id < PLL_NUM_BRANCHES);
828 assert(partition_id >= 0);
829 assert(tr->fracchange != -1.0);
830 double z = p->z[partition_id];
831 if(z < PLL_ZMIN) z = PLL_ZMIN;
832 if(z > PLL_ZMAX) z = PLL_ZMAX;
833 return (-log(z) * tr->fracchange);
834 }
835
836 /** @brief Set the length of a specific branch
837
838 Set the length of the branch specified by node \a p and \a p->back
839 of partition \a partition_id.
840 The function encodes the branch length to the PLL representation.
841
842 @param tr
843 PLL instance
844
845 @param p
846 Specifies one end-point of the branch. The other one is \a p->back
847
848 @param partition_id
849 Specifies the partition
850
851 @param bl
852 Branch length
853 */
pllSetBranchLength(pllInstance * tr,nodeptr p,int partition_id,double bl)854 void pllSetBranchLength (pllInstance *tr, nodeptr p, int partition_id, double bl)
855 {
856 //assert(partition_id < tr->numBranches);
857 assert(partition_id < PLL_NUM_BRANCHES);
858 assert(partition_id >= 0);
859 assert(tr->fracchange != -1.0);
860 double z;
861 z = exp((-1 * bl)/tr->fracchange);
862 if(z < PLL_ZMIN) z = PLL_ZMIN;
863 if(z > PLL_ZMAX) z = PLL_ZMAX;
864 p->z[partition_id] = z;
865 }
866
867 #if (!defined(_FINE_GRAIN_MPI) && !defined(_USE_PTHREADS))
initializePartitionsSequential(pllInstance * tr,partitionList * pr)868 static void initializePartitionsSequential(pllInstance *tr, partitionList *pr)
869 {
870 size_t
871 model;
872
873 for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
874 assert(pr->partitionData[model]->width == pr->partitionData[model]->upper - pr->partitionData[model]->lower);
875
876 initializePartitionData(tr, pr);
877
878 /* figure in tip sequence data per-site pattern weights */
879 for(model = 0; model < (size_t)pr->numberOfPartitions; model++)
880 {
881 size_t
882 j;
883 size_t lower = pr->partitionData[model]->lower;
884 size_t width = pr->partitionData[model]->upper - lower;
885
886 for(j = 1; j <= (size_t)tr->mxtips; j++)
887 {
888 pr->partitionData[model]->yVector[j] = &(tr->yVector[j][pr->partitionData[model]->lower]);
889 }
890
891 memcpy((void*)(&(pr->partitionData[model]->wgt[0])), (void*)(&(tr->aliaswgt[lower])), sizeof(int) * width);
892 }
893
894 initMemorySavingAndRecom(tr, pr);
895 }
896 #endif
897
898
899 /* interface to outside */
900 //void initializePartitions(pllInstance *tr, pllInstance *localTree, partitionList *pr, partitionList *localPr, int tid, int n)
901 //{
902 //#if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
903 // initializePartitionsMaster(tr,localTree,pr,localPr,tid,n);
904 //#else
905 // initializePartitionsSequential(tr, pr);
906 //#endif
907 //}
908
freeLinkageList(linkageList * ll)909 static void freeLinkageList( linkageList* ll)
910 {
911 int i;
912
913 for(i = 0; i < ll->entries; i++)
914 rax_free(ll->ld[i].partitionList);
915
916 rax_free(ll->ld);
917 rax_free(ll);
918 }
919
920 /** @brief free all data structures associated to a partition
921
922 frees all data structures allocated for this partition
923
924 @param partitions
925 the pointer to the partition list
926
927 @param tips
928 number of tips in the tree
929 */
930 void
pllPartitionsDestroy(pllInstance * tr,partitionList ** partitions)931 pllPartitionsDestroy (pllInstance * tr, partitionList ** partitions)
932 {
933 int i, j, tips;
934 partitionList * pl = *partitions;
935
936 #ifdef _USE_PTHREADS
937 int tid = tr->threadID;
938 if (MASTER_P) {
939 pllMasterBarrier (tr, pl, PLL_THREAD_EXIT_GRACEFULLY);
940 pllStopPthreads (tr);
941 }
942 #endif
943
944 tips = tr->mxtips;
945
946 #ifdef _USE_PTHREADS
947 if (MASTER_P) {
948 #endif
949 #ifdef _FINE_GRAIN_MPI
950 if (MASTER_P) {
951 pllMasterBarrier (tr, pl, PLL_THREAD_EXIT_GRACEFULLY);
952 #endif
953 freeLinkageList(pl->alphaList);
954 freeLinkageList(pl->freqList);
955 freeLinkageList(pl->rateList);
956 #ifdef _FINE_GRAIN_MPI
957 }
958 #endif
959
960 #ifdef _USE_PTHREADS
961 }
962 #endif
963 for (i = 0; i < pl->numberOfPartitions; ++ i)
964 {
965 rax_free (pl->partitionData[i]->gammaRates);
966 rax_free (pl->partitionData[i]->perSiteRates);
967 rax_free (pl->partitionData[i]->globalScaler);
968 rax_free (pl->partitionData[i]->left);
969 rax_free (pl->partitionData[i]->right);
970 rax_free (pl->partitionData[i]->EIGN);
971 rax_free (pl->partitionData[i]->EV);
972 rax_free (pl->partitionData[i]->EI);
973 rax_free (pl->partitionData[i]->substRates);
974 rax_free (pl->partitionData[i]->frequencies);
975 rax_free (pl->partitionData[i]->freqExponents);
976 rax_free (pl->partitionData[i]->empiricalFrequencies);
977 rax_free (pl->partitionData[i]->tipVector);
978 rax_free (pl->partitionData[i]->symmetryVector);
979 rax_free (pl->partitionData[i]->frequencyGrouping);
980 for (j = 0; j < tips; ++ j)
981 rax_free (pl->partitionData[i]->xVector[j]);
982 rax_free (pl->partitionData[i]->xVector);
983 rax_free (pl->partitionData[i]->yVector);
984 rax_free (pl->partitionData[i]->xSpaceVector);
985 rax_free (pl->partitionData[i]->sumBuffer);
986 rax_free (pl->partitionData[i]->ancestralBuffer);
987 rax_free (pl->partitionData[i]->wgt);
988 rax_free (pl->partitionData[i]->rateCategory);
989 rax_free (pl->partitionData[i]->gapVector);
990 rax_free (pl->partitionData[i]->gapColumn);
991 rax_free (pl->partitionData[i]->perSiteLikelihoods);
992 rax_free (pl->partitionData[i]->partitionName);
993 rax_free (pl->partitionData[i]->expSpaceVector);
994 /*TODO: Deallocate all entries of expVector */
995 if (pl->partitionData[i]->expVector)
996 {
997 for (j = 0; j < tips - 2; ++ j)
998 rax_free (pl->partitionData[i]->expVector[j]);
999 }
1000 rax_free (pl->partitionData[i]->expVector);
1001 rax_free (pl->partitionData[i]);
1002 }
1003 rax_free (pl->partitionData);
1004 rax_free (pl);
1005
1006 *partitions = NULL;
1007
1008 #if (defined(_USE_PTHREADS) || defined(_FINE_GRAIN_MPI))
1009 rax_free (tr->y_ptr);
1010 #endif
1011 }
1012
1013 /** @ingroup instanceLinkingGroup
1014 @brief Correspondance check between partitions and alignment
1015
1016 This function checks whether the partitions to be created and the given
1017 alignment correspond, that is, whether each site of the alignment is
1018 assigned to exactly one partition.
1019
1020 @param parts
1021 A list of partitions suggested by the caller
1022
1023 @param alignmentData
1024 The multiple sequence alignment
1025
1026 @return
1027 Returns \a 1 in case of success, otherwise \a 0
1028 */
1029 int
pllPartitionsValidate(pllQueue * parts,pllAlignmentData * alignmentData)1030 pllPartitionsValidate (pllQueue * parts, pllAlignmentData * alignmentData)
1031 {
1032 int nparts;
1033 char * used;
1034 struct pllQueueItem * elm;
1035 struct pllQueueItem * regionItem;
1036 pllPartitionRegion * region;
1037 pllPartitionInfo * pi;
1038 int i;
1039
1040 /* check if the list contains at least one partition */
1041 nparts = pllQueueSize (parts);
1042 if (!nparts)
1043 return (0);
1044
1045 /* pllBoolean array for marking that a site was assigned a partition */
1046 used = (char *) rax_calloc (alignmentData->sequenceLength, sizeof (char));
1047
1048 /* traverse all partitions and their respective regions and mark sites */
1049 for (elm = parts->head; elm; elm = elm->next)
1050 {
1051 pi = (pllPartitionInfo *) elm->item;
1052
1053 for (regionItem = pi->regionList->head; regionItem; regionItem = regionItem->next)
1054 {
1055 region = (pllPartitionRegion *) regionItem->item;
1056
1057 if (region->start < 1 || region->end > alignmentData->sequenceLength)
1058 {
1059 rax_free (used);
1060 return (0);
1061 }
1062
1063 for (i = region->start - 1; i < region->end; i += region->stride)
1064 {
1065 if (used[i])
1066 {
1067 rax_free (used);
1068 return (0);
1069 }
1070 used[i] = 1;
1071 }
1072 }
1073 }
1074
1075 /* check whether all sites were assigned a partition */
1076 for (i = 0; i < alignmentData->sequenceLength; ++ i)
1077 if (used[i] != 1)
1078 {
1079 rax_free (used);
1080 return (0);
1081 }
1082
1083 rax_free (used);
1084 return (1);
1085 }
1086
1087 /** @brief Swap two sites in a buffer
1088
1089 Swaps sites \a s1 and \a s2 in buffer \a buf which consists of \a nTaxa + 1
1090 taxa (i.e. rows), and the first row contains no information, i.e. it is not
1091 accessed.
1092
1093 @param buffer
1094 Memory buffer
1095
1096 @param s1
1097 First site
1098
1099 @param s2
1100 Second site
1101
1102 @param nTaxa
1103 Number of taxa, i.e. size of site
1104 */
1105 static __inline void
swapSite(unsigned char ** buf,int s1,int s2,int nTaxa)1106 swapSite (unsigned char ** buf, int s1, int s2, int nTaxa)
1107 {
1108 int i;
1109 int x;
1110
1111 for (i = 1; i <= nTaxa; ++ i)
1112 {
1113 x = buf[i][s1];
1114 buf[i][s1] = buf[i][s2];
1115 buf[i][s2] = x;
1116 }
1117 }
1118
1119 /** @brief Constructs the list of partitions according to the proposed partition scheme
1120
1121 A static function that construcs the \a partitionList structure according to
1122 the partition scheme \b AFTER the sites have been repositioned in contiguous
1123 regions according to the partition scheme.
1124
1125 @param bounds An array of the new starting and ending posititons of sites
1126 in the alignment for each partition. This array is of size 2 * \a nparts.
1127 The elements are always couples (lower,upper). The upper bounds is a site
1128 that is not included in the partition
1129
1130 @param nparts The number of partitions to be created
1131
1132 @todo Fix the bug in PLL
1133 */
createPartitions(pllQueue * parts,int * bounds)1134 static partitionList * createPartitions (pllQueue * parts, int * bounds)
1135 {
1136 partitionList * pl;
1137 pllPartitionInfo * pi;
1138 struct pllQueueItem * elm;
1139 int i, j;
1140
1141 pl = (partitionList *) rax_malloc (sizeof (partitionList));
1142
1143 // TODO: fix this
1144 pl->perGeneBranchLengths = 0;
1145
1146 // TODO: change PLL_NUM_BRANCHES to number of partitions I guess
1147 pl->partitionData = (pInfo **) rax_calloc (PLL_NUM_BRANCHES, sizeof (pInfo *));
1148
1149 for (i = 0, elm = parts->head; elm; elm = elm->next, ++ i)
1150 {
1151 pi = (pllPartitionInfo *) elm->item;
1152
1153 /* check whether the data type is valid, and in case it's not, deallocate
1154 and return NULL */
1155 if (pi->dataType <= PLL_MIN_MODEL || pi->dataType >= PLL_MAX_MODEL)
1156 {
1157 for (j = 0; j < i; ++ j)
1158 {
1159 rax_free (pl->partitionData[j]->partitionName);
1160 rax_free (pl->partitionData[j]);
1161 }
1162 rax_free (pl->partitionData);
1163 rax_free (pl);
1164 return (NULL);
1165 }
1166
1167 pl->partitionData[i] = (pInfo *) rax_malloc (sizeof (pInfo));
1168
1169 pl->partitionData[i]->lower = bounds[i << 1];
1170 pl->partitionData[i]->upper = bounds[(i << 1) + 1];
1171 pl->partitionData[i]->width = bounds[(i << 1) + 1] - bounds[i << 1];
1172 pl->partitionData[i]->partitionWeight = 1.0 * (double) pl->partitionData[i]->width;
1173
1174 //the two flags below are required to allow users to set
1175 //alpha parameters and substitution rates in the Q matrix
1176 //to fixed values. These parameters will then not be optimized
1177 //in the model parameter optimization functions
1178 //by default we assume that all parameters are being optimized, i.e.,
1179 //this has to be explicitly set by the user
1180
1181 pl->partitionData[i]->optimizeAlphaParameter = PLL_TRUE;
1182 pl->partitionData[i]->optimizeSubstitutionRates = PLL_TRUE;
1183 pl->partitionData[i]->dataType = pi->dataType;
1184 pl->partitionData[i]->protModels = -1;
1185 pl->partitionData[i]->protUseEmpiricalFreqs = -1;
1186 pl->partitionData[i]->maxTipStates = pLengths[pi->dataType].undetermined + 1;
1187 pl->partitionData[i]->optimizeBaseFrequencies = pi->optimizeBaseFrequencies;
1188 pl->partitionData[i]->ascBias = pi->ascBias;
1189 pl->partitionData[i]->parsVect = NULL;
1190
1191
1192
1193 if (pi->dataType == PLL_AA_DATA)
1194 {
1195 if(pl->partitionData[i]->protModels != PLL_GTR)
1196 pl->partitionData[i]->optimizeSubstitutionRates = PLL_FALSE;
1197 pl->partitionData[i]->protUseEmpiricalFreqs = pi->protUseEmpiricalFreqs;
1198 pl->partitionData[i]->protModels = pi->protModels;
1199 }
1200
1201 pl->partitionData[i]->states = pLengths[pl->partitionData[i]->dataType].states;
1202 pl->partitionData[i]->numberOfCategories = 1;
1203 pl->partitionData[i]->autoProtModels = 0;
1204 pl->partitionData[i]->nonGTR = PLL_FALSE;
1205 pl->partitionData[i]->partitionContribution = -1.0;
1206 pl->partitionData[i]->partitionLH = 0.0;
1207 pl->partitionData[i]->fracchange = 1.0;
1208 pl->partitionData[i]->executeModel = PLL_TRUE;
1209
1210
1211 pl->partitionData[i]->partitionName = (char *) rax_malloc ((strlen (pi->partitionName) + 1) * sizeof (char));
1212 strcpy (pl->partitionData[i]->partitionName, pi->partitionName);
1213 }
1214
1215 return (pl);
1216 }
1217
1218
1219 /** @ingroup instanceLinkingGroup
1220 @brief Constructs the proposed partition scheme
1221
1222 This function constructs the proposed partition scheme. It assumes
1223 that the partition scheme is correct.
1224
1225 @note This function \b does \b not validate the partition scheme.
1226 The user must manually call the ::pllPartitionsValidate function
1227 for validation
1228
1229 @param parts
1230 A list of partitions suggested by the caller
1231
1232 @param alignmentData
1233 The multiple sequence alignment
1234
1235 @return
1236 Returns a pointer to \a partitionList structure of partitions in case of success, \b NULL otherwise
1237 */
pllPartitionsCommit(pllQueue * parts,pllAlignmentData * alignmentData)1238 partitionList * pllPartitionsCommit (pllQueue * parts, pllAlignmentData * alignmentData)
1239 {
1240 int * oi;
1241 int i, j, dst;
1242 struct pllQueueItem * elm;
1243 struct pllQueueItem * regionItem;
1244 pllPartitionRegion * region;
1245 pllPartitionInfo * pi;
1246 partitionList * pl;
1247 int * newBounds;
1248 int k, nparts;
1249 int tmpvar;
1250
1251
1252 dst = k = 0;
1253 oi = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
1254 for (i = 0; i < alignmentData->sequenceLength; ++ i) oi[i] = i;
1255
1256 nparts = pllQueueSize (parts);
1257 newBounds = (int *) rax_malloc (2 * nparts * sizeof (int));
1258
1259 /* reposition the sites in the alignment */
1260 for (elm = parts->head; elm; elm = elm->next, ++ k)
1261 {
1262 pi = (pllPartitionInfo *) elm->item;
1263
1264 newBounds[k << 1] = dst; /* set the lower column for this partition */
1265 for (regionItem = pi->regionList->head; regionItem; regionItem = regionItem->next)
1266 {
1267 region = (pllPartitionRegion *) regionItem->item;
1268
1269 for (i = region->start - 1; i < region->end && i < alignmentData->sequenceLength; i += region->stride)
1270 {
1271 if (oi[i] == i)
1272 {
1273 swapSite (alignmentData->sequenceData, dst, i, alignmentData->sequenceCount);
1274 tmpvar = oi[i];
1275 oi[i] = oi[dst];
1276 oi[dst++] = tmpvar;
1277 }
1278 else
1279 {
1280 j = i;
1281 while (oi[j] != i) j = oi[j];
1282
1283 swapSite (alignmentData->sequenceData, dst, j, alignmentData->sequenceCount);
1284 tmpvar = oi[j];
1285 oi[j] = oi[dst];
1286 oi[dst++] = tmpvar;
1287 }
1288 }
1289 }
1290 newBounds[(k << 1) + 1] = dst; /* set the uppwer limit for this partition */
1291 }
1292 if ((pl = createPartitions (parts, newBounds)))
1293 {
1294 pl->numberOfPartitions = nparts;
1295 pl->dirty = PLL_FALSE;
1296 }
1297
1298 rax_free (newBounds);
1299 rax_free (oi);
1300
1301 return (pl);
1302 }
1303
1304 /** @brief Copy a site to another buffer
1305
1306 Copies site \a from from buffer \a src to \a to in buffer \a dst. Both buffers
1307 must consist of \a nTaxa + 1 taxa and the first row contains no information, i.e.
1308 it is not accessed.
1309
1310 @param dst
1311 Destination buffer
1312
1313 @param src
1314 Source buffer
1315
1316 @param to
1317 At which position in \a dst to copy the site to
1318
1319 @param from
1320 Which site from \a src to copy
1321
1322 @param nTaxa
1323 Number of taxa, i.e. size of site
1324 */
1325 static __inline void
copySite(unsigned char ** dst,unsigned char ** src,int to,int from,int nTaxa)1326 copySite (unsigned char ** dst, unsigned char ** src, int to, int from, int nTaxa)
1327 {
1328 int i;
1329
1330 for (i = 1; i <= nTaxa; ++ i)
1331 {
1332 dst[i][to] = src[i][from];
1333 }
1334 }
1335
1336 /** @brief Remove duplicate sites from alignment and update weights vector
1337
1338 Removes duplicate sites from the alignment given the partitions list
1339 and updates the weight vector of the alignment and the boundaries
1340 (upper, lower, width) for each partition.
1341
1342 @param alignmentData
1343 The multiple sequence alignment
1344
1345 @param pl
1346 List of partitions
1347
1348 */
1349 void
pllAlignmentRemoveDups(pllAlignmentData * alignmentData,partitionList * pl)1350 pllAlignmentRemoveDups (pllAlignmentData * alignmentData, partitionList * pl)
1351 {
1352 int i, j, k, p;
1353 char *** sites;
1354 void ** memptr;
1355 int ** oi;
1356 int dups = 0;
1357 int lower;
1358
1359 /* allocate space for the transposed alignments (sites) for every partition */
1360 sites = (char ***) rax_malloc (pl->numberOfPartitions * sizeof (char **));
1361 memptr = (void **) rax_malloc (pl->numberOfPartitions * sizeof (void *));
1362 oi = (int **) rax_malloc (pl->numberOfPartitions * sizeof (int *));
1363
1364 /* transpose the sites by partition */
1365 for (p = 0; p < pl->numberOfPartitions; ++ p)
1366 {
1367 sites[p] = (char **) rax_malloc (sizeof (char *) * pl->partitionData[p]->width);
1368 memptr[p] = rax_malloc (sizeof (char) * (alignmentData->sequenceCount + 1) * pl->partitionData[p]->width);
1369
1370 for (i = 0; i < pl->partitionData[p]->width; ++ i)
1371 {
1372 sites[p][i] = (char *) ((char*)memptr[p] + sizeof (char) * i * (alignmentData->sequenceCount + 1));
1373 }
1374
1375 for (i = 0; i < pl->partitionData[p]->width; ++ i)
1376 {
1377 for (j = 0; j < alignmentData->sequenceCount; ++ j)
1378 {
1379 sites[p][i][j] = alignmentData->sequenceData[j + 1][pl->partitionData[p]->lower + i];
1380 }
1381 sites[p][i][j] = 0;
1382 }
1383
1384 oi[p] = pllssort1main (sites[p], pl->partitionData[p]->width);
1385
1386 for (i = 0; i < pl->partitionData[p]->width; ++ i) oi[p][i] = 1;
1387
1388 for (i = 1; i < pl->partitionData[p]->width; ++ i)
1389 {
1390 if (! strcmp (sites[p][i], sites[p][i - 1]))
1391 {
1392 ++ dups;
1393 oi[p][i] = 0;
1394 }
1395 }
1396 }
1397
1398 /* allocate memory for the alignment without duplicates*/
1399 rax_free (alignmentData->sequenceData[1]);
1400 rax_free (alignmentData->siteWeights);
1401
1402 alignmentData->sequenceLength = alignmentData->sequenceLength - dups;
1403 alignmentData->sequenceData[0] = (unsigned char *) rax_malloc ((alignmentData->sequenceLength + 1) * sizeof (unsigned char) * alignmentData->sequenceCount);
1404 for (i = 0; i < alignmentData->sequenceCount; ++ i)
1405 {
1406 alignmentData->sequenceData[i + 1] = (unsigned char *) (alignmentData->sequenceData[0] + sizeof (unsigned char) * i * (alignmentData->sequenceLength + 1));
1407 alignmentData->sequenceData[i + 1][alignmentData->sequenceLength] = 0;
1408 }
1409
1410 alignmentData->siteWeights = (int *) rax_malloc ((alignmentData->sequenceLength) * sizeof (int));
1411 alignmentData->siteWeights[0] = 1;
1412
1413 /* transpose sites back to alignment */
1414 for (p = 0, k = 0; p < pl->numberOfPartitions; ++ p)
1415 {
1416 lower = k;
1417 for (i = 0; i < pl->partitionData[p]->width; ++ i)
1418 {
1419 if (!oi[p][i])
1420 {
1421 ++ alignmentData->siteWeights[k - 1];
1422 }
1423 else
1424 {
1425 alignmentData->siteWeights[k] = 1;
1426 for (j = 0; j < alignmentData->sequenceCount; ++ j)
1427 {
1428 alignmentData->sequenceData[j + 1][k] = sites[p][i][j];
1429 }
1430 ++ k;
1431 }
1432 }
1433 pl->partitionData[p]->lower = lower;
1434 pl->partitionData[p]->upper = k;
1435 pl->partitionData[p]->width = k - lower;
1436 }
1437
1438 /* deallocate storage for transposed alignment (sites) */
1439 for (p = 0; p < pl->numberOfPartitions; ++ p)
1440 {
1441 rax_free (oi[p]);
1442 rax_free (memptr[p]);
1443 rax_free (sites[p]);
1444 }
1445 rax_free (oi);
1446 rax_free (sites);
1447 rax_free (memptr);
1448 }
1449
1450
1451 /** @brief Compute the empirical frequencies of a partition
1452
1453 Compute the empirical frequencies of partition \a partition and store them in
1454 \a pfreqs.
1455
1456 @param partition
1457 The partition for which to compute empirical frequencies
1458
1459 @param alignmentData
1460 The multiple sequence alignment
1461
1462 @param smoothFrequencies
1463 Not needed?
1464
1465 @param bitMask
1466 The bitmask
1467
1468 @param pfreqs
1469 Array of size \a partition->states where the empirical frequencies for this partition are stored
1470 */
genericBaseFrequenciesAlignment(pInfo * partition,pllAlignmentData * alignmentData,pllBoolean smoothFrequencies,const unsigned int * bitMask,double * pfreqs)1471 static int genericBaseFrequenciesAlignment (pInfo * partition,
1472 pllAlignmentData * alignmentData,
1473 pllBoolean smoothFrequencies,
1474 const unsigned int * bitMask,
1475 double * pfreqs)
1476 {
1477 double
1478 wj,
1479 acc,
1480 sumf[64],
1481 temp[64];
1482
1483 int
1484 i,
1485 j,
1486 k,
1487 l,
1488 numFreqs,
1489 lower,
1490 upper;
1491
1492 unsigned char *yptr;
1493 const char * map;
1494
1495 switch (partition->dataType)
1496 {
1497 case PLL_BINARY_DATA:
1498 map = PLL_MAP_BIN;
1499 case PLL_DNA_DATA:
1500 map = PLL_MAP_NT;
1501 break;
1502 case PLL_AA_DATA:
1503 map = PLL_MAP_AA;
1504 break;
1505 default:
1506 assert(0);
1507 }
1508
1509 numFreqs = partition->states;
1510 lower = partition->lower;
1511 upper = partition->upper;
1512
1513 for(l = 0; l < numFreqs; l++)
1514 pfreqs[l] = 1.0 / ((double)numFreqs);
1515
1516 for (k = 1; k <= 8; k++)
1517 {
1518 for(l = 0; l < numFreqs; l++)
1519 sumf[l] = 0.0;
1520
1521 for (i = 1; i <= alignmentData->sequenceCount; i++)
1522 {
1523 yptr = alignmentData->sequenceData[i];
1524
1525 for(j = lower; j < upper; j++)
1526 {
1527 if (map[yptr[j]] < 0) return (0);
1528 unsigned int code = bitMask[(unsigned char)map[yptr[j]]];
1529 assert(code >= 1);
1530
1531 for(l = 0; l < numFreqs; l++)
1532 {
1533 if((code >> l) & 1)
1534 temp[l] = pfreqs[l];
1535 else
1536 temp[l] = 0.0;
1537 }
1538
1539 for(l = 0, acc = 0.0; l < numFreqs; l++)
1540 {
1541 if(temp[l] != 0.0)
1542 acc += temp[l];
1543 }
1544
1545 wj = alignmentData->siteWeights[j] / acc;
1546
1547 for(l = 0; l < numFreqs; l++)
1548 {
1549 if(temp[l] != 0.0)
1550 sumf[l] += wj * temp[l];
1551 }
1552 }
1553 }
1554
1555 for(l = 0, acc = 0.0; l < numFreqs; l++)
1556 {
1557 if(sumf[l] != 0.0)
1558 acc += sumf[l];
1559 }
1560
1561 for(l = 0; l < numFreqs; l++)
1562 pfreqs[l] = sumf[l] / acc;
1563 }
1564
1565 /* TODO: What is that? */
1566 /*
1567 if(smoothFrequencies)
1568 {;
1569 smoothFreqs(numFreqs, pfreqs, tr->partitionData[model].frequencies, &(tr->partitionData[model]));
1570 }
1571 else
1572 {
1573 pllBoolean
1574 zeroFreq = PLL_FALSE;
1575
1576 char
1577 typeOfData[1024];
1578
1579 getDataTypeString(tr, model, typeOfData);
1580
1581 for(l = 0; l < numFreqs; l++)
1582 {
1583 if(pfreqs[l] == 0.0)
1584 {
1585 printBothOpen("Empirical base frequency for state number %d is equal to zero in %s data partition %s\n", l, typeOfData, tr->partitionData[model].partitionName);
1586 printBothOpen("Since this is probably not what you want to do, RAxML will soon exit.\n\n");
1587 zeroFreq = PLL_TRUE;
1588 }
1589 }
1590
1591 if(zeroFreq)
1592 exit(-1);
1593
1594 for(l = 0; l < numFreqs; l++)
1595 {
1596 assert(pfreqs[l] > 0.0);
1597 tr->partitionData[model].frequencies[l] = pfreqs[l];
1598 }
1599 }
1600 */
1601 return (1);
1602
1603 }
1604
genericBaseFrequenciesInstance(pInfo * partition,pllInstance * tr,pllBoolean smoothFrequencies,const unsigned int * bitMask,double * pfreqs)1605 static void genericBaseFrequenciesInstance (pInfo * partition,
1606 pllInstance * tr,
1607 pllBoolean smoothFrequencies,
1608 const unsigned int * bitMask,
1609 double * pfreqs)
1610 {
1611 double
1612 wj,
1613 acc,
1614 sumf[64],
1615 temp[64];
1616
1617 int
1618 i,
1619 j,
1620 k,
1621 l,
1622 numFreqs,
1623 lower,
1624 upper;
1625
1626 unsigned char *yptr;
1627
1628 numFreqs = partition->states;
1629 lower = partition->lower;
1630 upper = partition->upper;
1631
1632 for(l = 0; l < numFreqs; l++)
1633 pfreqs[l] = 1.0 / ((double)numFreqs);
1634
1635 for (k = 1; k <= 8; k++)
1636 {
1637 for(l = 0; l < numFreqs; l++)
1638 sumf[l] = 0.0;
1639
1640 for (i = 1; i <= tr->mxtips; i++)
1641 {
1642 yptr = tr->yVector[i];
1643
1644 for(j = lower; j < upper; j++)
1645 {
1646 unsigned int code = bitMask[yptr[j]];
1647 assert(code >= 1);
1648
1649 for(l = 0; l < numFreqs; l++)
1650 {
1651 if((code >> l) & 1)
1652 temp[l] = pfreqs[l];
1653 else
1654 temp[l] = 0.0;
1655 }
1656
1657 for(l = 0, acc = 0.0; l < numFreqs; l++)
1658 {
1659 if(temp[l] != 0.0)
1660 acc += temp[l];
1661 }
1662
1663 wj = tr->aliaswgt[j] / acc;
1664
1665 for(l = 0; l < numFreqs; l++)
1666 {
1667 if(temp[l] != 0.0)
1668 sumf[l] += wj * temp[l];
1669 }
1670 }
1671 }
1672
1673 for(l = 0, acc = 0.0; l < numFreqs; l++)
1674 {
1675 if(sumf[l] != 0.0)
1676 acc += sumf[l];
1677 }
1678
1679 for(l = 0; l < numFreqs; l++)
1680 pfreqs[l] = sumf[l] / acc;
1681 }
1682
1683 /* TODO: What is that? */
1684 /*
1685 if(smoothFrequencies)
1686 {;
1687 smoothFreqs(numFreqs, pfreqs, tr->partitionData[model].frequencies, &(tr->partitionData[model]));
1688 }
1689 else
1690 {
1691 pllBoolean
1692 zeroFreq = PLL_FALSE;
1693
1694 char
1695 typeOfData[1024];
1696
1697 getDataTypeString(tr, model, typeOfData);
1698
1699 for(l = 0; l < numFreqs; l++)
1700 {
1701 if(pfreqs[l] == 0.0)
1702 {
1703 printBothOpen("Empirical base frequency for state number %d is equal to zero in %s data partition %s\n", l, typeOfData, tr->partitionData[model].partitionName);
1704 printBothOpen("Since this is probably not what you want to do, RAxML will soon exit.\n\n");
1705 zeroFreq = PLL_TRUE;
1706 }
1707 }
1708
1709 if(zeroFreq)
1710 exit(-1);
1711
1712 for(l = 0; l < numFreqs; l++)
1713 {
1714 assert(pfreqs[l] > 0.0);
1715 tr->partitionData[model].frequencies[l] = pfreqs[l];
1716 }
1717 }
1718 */
1719
1720
1721 }
1722
1723 /** Compute the empirical base frequencies of an alignment
1724
1725 Computes the empirical base frequencies per partition of an alignment \a alignmentData
1726 given the partition structure \a pl.
1727
1728 @param alignmentData The alignment structure for which to compute the empirical base frequencies
1729 @param pl List of partitions
1730 @return Returns a list of frequencies for each partition
1731 */
pllBaseFrequenciesAlignment(pllAlignmentData * alignmentData,partitionList * pl)1732 double ** pllBaseFrequenciesAlignment (pllAlignmentData * alignmentData, partitionList * pl)
1733 {
1734 int
1735 i,
1736 model;
1737
1738 double
1739 **freqs = (double **) rax_malloc (pl->numberOfPartitions * sizeof (double *));
1740
1741 for (model = 0; model < pl->numberOfPartitions; ++ model)
1742 {
1743 freqs[model] = (double *) rax_malloc (pl->partitionData[model]->states * sizeof (double));
1744
1745 switch (pl->partitionData[model]->dataType)
1746 {
1747 case PLL_BINARY_DATA:
1748 case PLL_AA_DATA:
1749 case PLL_DNA_DATA:
1750 if (!genericBaseFrequenciesAlignment (pl->partitionData[model],
1751 alignmentData,
1752 pLengths[pl->partitionData[model]->dataType].smoothFrequencies,
1753 pLengths[pl->partitionData[model]->dataType].bitVector,
1754 freqs[model]
1755 ))
1756 return (NULL);
1757 break;
1758 default:
1759 {
1760 errno = PLL_UNKNOWN_MOLECULAR_DATA_TYPE;
1761 for (i = 0; i <= model; ++ i) rax_free (freqs[i]);
1762 rax_free (freqs);
1763 return (double **)NULL;
1764 }
1765 }
1766 }
1767
1768 return (freqs);
1769 }
1770
1771 /** Compute the empirical base frequencies of the alignment incorporated in the instance
1772
1773 Computes the empirical base frequencies per partition of the alignment
1774 incorporated in the instance \a tr given the partition structure \a pl.
1775
1776 @param tr The instance for which to compute the empirical base frequencies
1777 @param pl List of partitions
1778 @return Returns a list of frequencies for each partition
1779 */
pllBaseFrequenciesInstance(pllInstance * tr,partitionList * pl)1780 double ** pllBaseFrequenciesInstance (pllInstance * tr, partitionList * pl)
1781 {
1782 int
1783 i,
1784 model;
1785
1786 double
1787 **freqs = (double **) rax_malloc (pl->numberOfPartitions * sizeof (double *));
1788
1789 for (model = 0; model < pl->numberOfPartitions; ++ model)
1790 {
1791 freqs[model] = (double *) rax_malloc (pl->partitionData[model]->states * sizeof (double));
1792
1793 switch (pl->partitionData[model]->dataType)
1794 {
1795 case PLL_AA_DATA:
1796 case PLL_DNA_DATA:
1797 case PLL_BINARY_DATA:
1798 genericBaseFrequenciesInstance (pl->partitionData[model],
1799 tr,
1800 pLengths[pl->partitionData[model]->dataType].smoothFrequencies,
1801 pLengths[pl->partitionData[model]->dataType].bitVector,
1802 freqs[model]
1803 );
1804 break;
1805 default:
1806 {
1807 errno = PLL_UNKNOWN_MOLECULAR_DATA_TYPE;
1808 for (i = 0; i <= model; ++ i) rax_free (freqs[i]);
1809 rax_free (freqs);
1810 return (double **)NULL;
1811 }
1812 }
1813 }
1814
1815 return (freqs);
1816 }
1817
1818 void
pllEmpiricalFrequenciesDestroy(double *** empiricalFrequencies,int models)1819 pllEmpiricalFrequenciesDestroy (double *** empiricalFrequencies, int models)
1820 {
1821 int i;
1822
1823 for (i = 0; i < models; ++ i)
1824 {
1825 rax_free ((*empiricalFrequencies)[i]);
1826 }
1827 rax_free (*empiricalFrequencies);
1828
1829 *empiricalFrequencies = NULL;
1830 }
1831
pllLoadAlignment(pllInstance * tr,pllAlignmentData * alignmentData,partitionList * partitions)1832 int pllLoadAlignment (pllInstance * tr, pllAlignmentData * alignmentData, partitionList * partitions)
1833 {
1834 int i;
1835 nodeptr node;
1836 pllHashItem * hItem;
1837
1838 if (tr->mxtips != alignmentData->sequenceCount) return (0);
1839
1840 tr->aliaswgt = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
1841 memcpy (tr->aliaswgt, alignmentData->siteWeights, alignmentData->sequenceLength * sizeof (int));
1842
1843 tr->originalCrunchedLength = alignmentData->sequenceLength;
1844 tr->rateCategory = (int *) rax_calloc (tr->originalCrunchedLength, sizeof (int));
1845 tr->patrat = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
1846 tr->patratStored = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
1847 tr->lhs = (double*) rax_malloc((size_t)tr->originalCrunchedLength * sizeof(double));
1848
1849 /* allocate memory for the alignment */
1850 tr->yVector = (unsigned char **) rax_malloc ((alignmentData->sequenceCount + 1) * sizeof (unsigned char *));
1851
1852 tr->yVector[0] = (unsigned char *) rax_malloc (sizeof (unsigned char) * (alignmentData->sequenceLength + 1) * alignmentData->sequenceCount);
1853 for (i = 1; i <= alignmentData->sequenceCount; ++ i)
1854 {
1855 tr->yVector[i] = (unsigned char *) (tr->yVector[0] + (i - 1) * (alignmentData->sequenceLength + 1) * sizeof (unsigned char));
1856 tr->yVector[i][alignmentData->sequenceLength] = 0;
1857 }
1858
1859 /* place sequences to tips */
1860 for (i = 1; i <= alignmentData->sequenceCount; ++ i)
1861 {
1862 if (!pllHashSearch (tr->nameHash, alignmentData->sequenceLabels[i],(void **)&node))
1863 {
1864 //rax_free (tr->originalCrunchedLength);
1865 rax_free (tr->rateCategory);
1866 rax_free (tr->patrat);
1867 rax_free (tr->patratStored);
1868 rax_free (tr->lhs);
1869 rax_free (tr->yVector[0]);
1870 rax_free (tr->yVector);
1871 return (0);
1872 }
1873 memcpy (tr->yVector[node->number], alignmentData->sequenceData[i], alignmentData->sequenceLength);
1874 }
1875
1876 /* Do the base substitution (from A,C,G.... -> 0,1,2,3....)*/
1877 pllBaseSubstitute (tr, partitions);
1878
1879 /* Populate tipNames */
1880 tr->tipNames = (char **) rax_calloc(tr->mxtips + 1, sizeof (char *));
1881 for (i = 0; (unsigned int)i < tr->nameHash->size; ++ i)
1882 {
1883 hItem = tr->nameHash->Items[i];
1884
1885 for (; hItem; hItem = hItem->next)
1886 {
1887 tr->tipNames[((nodeptr)hItem->data)->number] = hItem->str;
1888 }
1889 }
1890
1891 return (1);
1892 }
1893
pllCreateInstance(pllInstanceAttr * attr)1894 pllInstance * pllCreateInstance (pllInstanceAttr * attr)
1895 {
1896 pllInstance * tr;
1897
1898 if (attr->rateHetModel != PLL_GAMMA && attr->rateHetModel != PLL_CAT) return NULL;
1899
1900 tr = (pllInstance *) rax_calloc (1, sizeof (pllInstance));
1901
1902 tr->threadID = 0;
1903 tr->rateHetModel = attr->rateHetModel;
1904 tr->fastScaling = attr->fastScaling;
1905 tr->saveMemory = attr->saveMemory;
1906 tr->useRecom = attr->useRecom;
1907 tr->likelihoodEpsilon = 0.01;
1908
1909 tr->randomNumberSeed = attr->randomNumberSeed;
1910 tr->parsimonyScore = NULL;
1911
1912 /* remove it from the library */
1913 tr->useMedian = PLL_FALSE;
1914
1915 tr->maxCategories = (attr->rateHetModel == PLL_GAMMA) ? 4 : 25;
1916
1917 tr->numberOfThreads = attr->numberOfThreads;
1918 tr->rearrangeHistory = NULL;
1919
1920 /* Lock the slave processors at this point */
1921 #ifdef _FINE_GRAIN_MPI
1922 pllLockMPI (tr);
1923 #endif
1924
1925 return (tr);
1926 }
1927
1928 /** @brief Initialize PLL tree structure with default values
1929
1930 Initialize PLL tree structure with default values and allocate
1931 memory for its elements.
1932
1933 @todo
1934 STILL NOT FINISHED
1935 */
pllTreeInitDefaults(pllInstance * tr,int tips)1936 static void pllTreeInitDefaults (pllInstance * tr, int tips)
1937 {
1938 nodeptr p0, p, q;
1939 int i, j;
1940 int inner;
1941
1942
1943
1944 /* TODO: make a proper static setupTree function */
1945
1946 inner = tips - 1;
1947
1948 tr->mxtips = tips;
1949
1950 tr->bigCutoff = PLL_FALSE;
1951 tr->treeStringLength = tr->mxtips * (PLL_NMLNGTH + 128) + 256 + tr->mxtips * 2;
1952 tr->tree_string = (char *) rax_calloc ( tr->treeStringLength, sizeof(char));
1953 tr->tree0 = (char*)rax_calloc((size_t)tr->treeStringLength, sizeof(char));
1954 tr->tree1 = (char*)rax_calloc((size_t)tr->treeStringLength, sizeof(char));
1955 tr->constraintVector = (int *)rax_malloc((2 * tr->mxtips) * sizeof(int));
1956
1957 p0 = (nodeptr) rax_malloc ((tips + 3 * inner) * sizeof (node));
1958 assert (p0);
1959
1960 tr->nodeBaseAddress = p0;
1961
1962 tr->nameList = (char **) rax_malloc ((tips + 1) * sizeof (char *));
1963 tr->nodep = (nodeptr *) rax_malloc ((2 * tips) * sizeof (nodeptr));
1964
1965 tr->autoProteinSelectionType = PLL_AUTO_ML;
1966
1967 assert (tr->nameList && tr->nodep);
1968
1969 tr->nodep[0] = NULL;
1970
1971
1972 /* TODO: The line below was commented... why? */
1973 tr->fracchange = -1;
1974 tr->rawFracchange = -1;
1975
1976 for (i = 1; i <= tips; ++ i)
1977 {
1978 p = p0++;
1979
1980 //p->hash = KISS32();
1981 p->x = 0;
1982 p->xBips = 0;
1983 p->number = i;
1984 p->next = p;
1985 p->back = NULL;
1986 p->bInf = NULL;
1987 tr->nodep[i] = p;
1988 }
1989
1990 for (i = tips + 1; i <= tips + inner; ++i)
1991 {
1992 q = NULL;
1993 for (j = 1; j <= 3; ++ j)
1994 {
1995 p = p0++;
1996 if (j == 1)
1997 {
1998 p->xBips = 1;
1999 p->x = 1; //p->x = 1;
2000 }
2001 else
2002 {
2003 p->xBips = 0;
2004 p->x = 0;
2005 }
2006 p->number = i;
2007 p->next = q;
2008 p->bInf = NULL;
2009 p->back = NULL;
2010 p->hash = 0;
2011 q = p;
2012 }
2013 p->next->next->next = p;
2014 tr->nodep[i] = p;
2015 }
2016
2017 tr->likelihood = PLL_UNLIKELY;
2018 tr->start = NULL;
2019 tr->ntips = 0;
2020 tr->nextnode = 0;
2021
2022 for (i = 0; i < PLL_NUM_BRANCHES; ++ i) tr->partitionSmoothed[i] = PLL_FALSE;
2023
2024 tr->bitVectors = NULL;
2025 tr->vLength = 0;
2026 //tr->h = NULL;
2027
2028 /* TODO: Fix hash type */
2029 tr->nameHash = pllHashInit (10 * tr->mxtips);
2030
2031 /* TODO: do these options really fit here or should they be put elsewhere? */
2032 tr->td[0].count = 0;
2033 tr->td[0].ti = (traversalInfo *) rax_malloc (sizeof(traversalInfo) * (size_t)tr->mxtips);
2034 tr->td[0].parameterValues = (double *) rax_malloc(sizeof(double) * (size_t)PLL_NUM_BRANCHES);
2035 tr->td[0].executeModel = (pllBoolean *) rax_malloc (sizeof(pllBoolean) * (size_t)PLL_NUM_BRANCHES);
2036 tr->td[0].executeModel[0] = PLL_TRUE;
2037 for (i = 0; i < PLL_NUM_BRANCHES; ++ i) tr->td[0].executeModel[i] = PLL_TRUE;
2038 }
2039
2040
2041 /* @brief Check a parsed tree for inclusion in the current tree
2042
2043 Check whether the set of leaves (taxa) of the parsed tree \a nTree is a
2044 subset of the leaves of the currently loaded tree.
2045
2046 @param pInst
2047 PLL instance
2048
2049 @param nTree
2050 Parsed newick tree structure
2051
2052 @return
2053 Returns \b PLL_TRUE in case it is a subset, otherwise \b PLL_FALSE
2054 */
2055 static int
checkTreeInclusion(pllInstance * pInst,pllNewickTree * nTree)2056 checkTreeInclusion (pllInstance * pInst, pllNewickTree * nTree)
2057 {
2058 pllStack * sList;
2059 pllNewickNodeInfo * sItem;
2060 void * dummy;
2061
2062 if (!pInst->nameHash) return (PLL_FALSE);
2063
2064 for (sList = nTree->tree; sList; sList = sList->next)
2065 {
2066 sItem = (pllNewickNodeInfo *) sList->item;
2067 if (!sItem->rank) /* leaf */
2068 {
2069 if (!pllHashSearch (pInst->nameHash, sItem->name, &dummy)) return (PLL_FALSE);
2070 }
2071 }
2072
2073 return (PLL_TRUE);
2074 }
2075
2076 static void
updateBranchLength(nodeptr p,double old_fracchange,double new_fracchange)2077 updateBranchLength (nodeptr p, double old_fracchange, double new_fracchange)
2078 {
2079 double z;
2080 int j;
2081
2082 for (j = 0; j < PLL_NUM_BRANCHES; ++ j)
2083 {
2084 z = exp ((log (p->z[j]) * old_fracchange) / new_fracchange);
2085 if (z < PLL_ZMIN) z = PLL_ZMIN;
2086 if (z > PLL_ZMAX) z = PLL_ZMAX;
2087 p->z[j] = p->back->z[j] = z;
2088 }
2089 }
2090
2091 static void
updateAllBranchLengthsRecursive(nodeptr p,int tips,double old_fracchange,double new_fracchange)2092 updateAllBranchLengthsRecursive (nodeptr p, int tips, double old_fracchange, double new_fracchange)
2093 {
2094 updateBranchLength (p, old_fracchange, new_fracchange);
2095
2096 if (!isTip (p->number, tips))
2097 {
2098 updateAllBranchLengthsRecursive (p->next->back, tips, old_fracchange, new_fracchange);
2099 updateAllBranchLengthsRecursive (p->next->next->back, tips, old_fracchange, new_fracchange);
2100 }
2101 }
2102
2103 static void
updateAllBranchLengths(pllInstance * tr,double old_fracchange,double new_fracchange)2104 updateAllBranchLengths (pllInstance * tr, double old_fracchange, double new_fracchange)
2105 {
2106 nodeptr p;
2107
2108 p = tr->start;
2109 assert (isTip(p->number, tr->mxtips));
2110
2111 updateAllBranchLengthsRecursive (p->back, tr->mxtips, old_fracchange, new_fracchange);
2112
2113 }
2114
2115
2116 /** @brief Relink the taxa
2117
2118 Relink the taxa by performing a preorder traversal of the unrooted binary tree.
2119 We assume that the tree is rooted such that the root is the only node of
2120 out-degree 3 and in-degree 0, while all the other inner nodes have in-degree
2121 1 and out-degree 2. Finally, the leaves have in-degree 1 and out-degree 0.
2122
2123 @param pInst
2124 PLL instance
2125
2126 @param nTree
2127 Parsed newick tree structure
2128
2129 @param taxaExist
2130 Is the set of taxa of \a nTree a subset of the taxa of the current tree
2131
2132 @return
2133 */
2134 static int
linkTaxa(pllInstance * pInst,pllNewickTree * nTree,int taxaExist)2135 linkTaxa (pllInstance * pInst, pllNewickTree * nTree, int taxaExist)
2136 {
2137 nodeptr
2138 parent,
2139 child;
2140 pllStack
2141 * nodeStack = NULL,
2142 * current;
2143 int
2144 i,
2145 j,
2146 inner = nTree->tips + 1,
2147 leaf = 1;
2148 double z;
2149 pllNewickNodeInfo * nodeInfo;
2150
2151 if (!taxaExist) pllTreeInitDefaults (pInst, nTree->tips);
2152
2153 /* Place the ternary root node 3 times on the stack such that later on
2154 three nodes use it as their parent */
2155 current = nTree->tree;
2156 for (parent = pInst->nodep[inner], i = 0; i < 3; ++ i, parent = parent->next)
2157 pllStackPush (&nodeStack, parent);
2158 ++ inner;
2159
2160 /* now traverse the rest of the nodes */
2161 for (current = current->next; current; current = current->next)
2162 {
2163 parent = (nodeptr) pllStackPop (&nodeStack);
2164 nodeInfo = (pllNewickNodeInfo *) current->item;
2165
2166 /* if inner node place it twice on the stack (out-degree 2) */
2167 if (nodeInfo->rank)
2168 {
2169 child = pInst->nodep[inner ++];
2170 pllStackPush (&nodeStack, child->next);
2171 pllStackPush (&nodeStack, child->next->next);
2172 }
2173 else /* check if taxon already exists, i.e. we loaded another tree topology */
2174 {
2175 if (taxaExist)
2176 {
2177 assert (pllHashSearch (pInst->nameHash, nodeInfo->name, (void **) &child));
2178 }
2179 else
2180 {
2181 child = pInst->nodep[leaf];
2182 pInst->nameList[leaf] = strdup (nodeInfo->name);
2183 pllHashAdd (pInst->nameHash, pllHashString(pInst->nameList[leaf], pInst->nameHash->size), pInst->nameList[leaf], (void *) (pInst->nodep[leaf]));
2184 ++ leaf;
2185 }
2186 }
2187 assert (parent);
2188 /* link parent and child */
2189 parent->back = child;
2190 child->back = parent;
2191
2192 if (!taxaExist) pInst->fracchange = 1;
2193
2194 /* set the branch length */
2195 z = exp ((-1 * atof (nodeInfo->branch)) / pInst->fracchange);
2196 if (z < PLL_ZMIN) z = PLL_ZMIN;
2197 if (z > PLL_ZMAX) z = PLL_ZMAX;
2198 for (j = 0; j < PLL_NUM_BRANCHES; ++ j)
2199 parent->z[j] = child->z[j] = z;
2200 }
2201 pllStackClear (&nodeStack);
2202
2203 return PLL_TRUE;
2204 }
2205
2206 /** @brief Get the instantaneous rate matrix
2207
2208 Obtain the instantaneous rate matrix (Q) for partitionm \a model
2209 of the partition list \a pr, and store it in an array \a outBuffer.
2210
2211 @param tr PLL instance
2212 @param pr List of partitions
2213 @param model Index of partition to use
2214 @param outBuffer Where to store the instantaneous rate matrix
2215
2216 @todo Currently, the Q matrix can be only obtained for DNA GTR data.
2217
2218 @return Returns \b PLL_TRUE in case of success, otherwise \b PLL_FALSE
2219 */
pllGetInstRateMatrix(partitionList * pr,int model,double * outBuffer)2220 int pllGetInstRateMatrix (partitionList * pr, int model, double * outBuffer)
2221 {
2222 if (pr->partitionData[model]->dataType != PLL_DNA_DATA) return (PLL_FALSE);
2223
2224 int i;
2225 double mean = 0;
2226 double * substRates = pr->partitionData[model]->substRates;
2227 double * freqs = pr->partitionData[model]->frequencies;
2228
2229 /* normalize substitution rates */
2230 for (i = 0; i < 6; ++ i) substRates[i] /= substRates[5];
2231
2232 outBuffer[0 * 4 + 1] = (substRates[0] * freqs[1]);
2233 outBuffer[0 * 4 + 2] = (substRates[1] * freqs[2]);
2234 outBuffer[0 * 4 + 3] = (substRates[2] * freqs[3]);
2235
2236 outBuffer[1 * 4 + 0] = (substRates[0] * freqs[0]);
2237 outBuffer[1 * 4 + 2] = (substRates[3] * freqs[2]);
2238 outBuffer[1 * 4 + 3] = (substRates[4] * freqs[3]);
2239
2240 outBuffer[2 * 4 + 0] = (substRates[1] * freqs[0]);
2241 outBuffer[2 * 4 + 1] = (substRates[3] * freqs[1]);
2242 outBuffer[2 * 4 + 3] = (substRates[5] * freqs[3]);
2243
2244 outBuffer[3 * 4 + 0] = (substRates[2] * freqs[0]);
2245 outBuffer[3 * 4 + 1] = (substRates[4] * freqs[1]);
2246 outBuffer[3 * 4 + 2] = (substRates[5] * freqs[2]);
2247
2248 outBuffer[0 * 4 + 0] = -(substRates[0] * freqs[1] + substRates[1] * freqs[2] + substRates[2] * freqs[3]);
2249 outBuffer[1 * 4 + 1] = -(substRates[0] * freqs[0] + substRates[3] * freqs[2] + substRates[4] * freqs[3]);
2250 outBuffer[2 * 4 + 2] = -(substRates[1] * freqs[0] + substRates[3] * freqs[1] + substRates[5] * freqs[3]);
2251 outBuffer[3 * 4 + 3] = -(substRates[2] * freqs[0] + substRates[4] * freqs[1] + substRates[5] * freqs[2]);
2252
2253 for (i = 0; i < 4; ++ i) mean += freqs[i] * (-outBuffer[i * 4 + i]);
2254 for (i = 0; i < 16; ++ i) outBuffer[i] /= mean;
2255
2256 return (PLL_TRUE);
2257 }
2258
2259 /** @ingroup instanceLinkingGroup
2260 @brief Initializes the PLL tree topology according to a parsed newick tree
2261
2262 Set the tree topology based on a parsed and validated newick tree
2263
2264 @param tree
2265 The PLL instance
2266
2267 @param nt
2268 The \a pllNewickTree wrapper structure that contains the parsed newick tree
2269
2270 @param useDefaultz
2271 If set to \b PLL_TRUE then the branch lengths will be reset to the default
2272 value.
2273 */
2274 void
pllTreeInitTopologyNewick(pllInstance * tr,pllNewickTree * newick,int useDefaultz)2275 pllTreeInitTopologyNewick (pllInstance * tr, pllNewickTree * newick, int useDefaultz)
2276 {
2277 linkTaxa (tr, newick, tr->nameHash && checkTreeInclusion (tr, newick));
2278
2279 tr->start = tr->nodep[1];
2280
2281 if (useDefaultz == PLL_TRUE)
2282 resetBranches (tr);
2283 }
2284
2285 /** @brief Get the node oriented pointer from a round-about node
2286
2287 Returns the pointer of the round-about node $p$ that has the orientation, i.e.
2288 has the \a x flag set to 1. In case a tip is passed, then the returned pointer
2289 is the same as the input.
2290
2291 @param pInst PLL instance
2292 @param p One of the three pointers of a round-about node
2293
2294 @return Returns the the pointer that has the orientation
2295 */
pllGetOrientedNodePointer(pllInstance * pInst,nodeptr p)2296 nodeptr pllGetOrientedNodePointer (pllInstance * pInst, nodeptr p)
2297 {
2298 if (p->number <= pInst->mxtips || p->x) return p;
2299
2300 if (p->next->x) return p->next;
2301
2302 return p->next->next;
2303 }
2304
2305
2306 //void
2307 //pllTreeInitTopologyNewick (pllInstance * tr, pllNewickTree * nt, int useDefaultz)
2308 //{
2309 // pllStack * nodeStack = NULL;
2310 // pllStack * head;
2311 // pllNewickNodeInfo * item;
2312 // int i, j, k;
2313 //
2314 ///*
2315 // for (i = 0; i < partitions->numberOfPartitions; ++ i)
2316 // {
2317 // partitions->partitionData[i] = (pInfo *) rax_malloc (sizeof (pInfo));
2318 // partitions->partitionData[i]->partitionContribution = -1.0;
2319 // partitions->partitionData[i]->partitionLH = 0.0;
2320 // partitions->partitionData[i]->fracchange = 1.0;
2321 // }
2322 //*/
2323 //
2324 //
2325 // if (tr->nameHash)
2326 // {
2327 // if (checkTreeInclusion (tr, nt))
2328 // {
2329 // printf ("It is a subset\n");
2330 // }
2331 // else
2332 // {
2333 // printf ("It is not a subset\n");
2334 // }
2335 // }
2336 //
2337 // pllTreeInitDefaults (tr, nt->tips);
2338 //
2339 // i = nt->tips + 1;
2340 // j = 1;
2341 // nodeptr v;
2342 //
2343 //
2344 // for (head = nt->tree; head; head = head->next)
2345 // {
2346 // item = (pllNewickNodeInfo *) head->item;
2347 // if (!nodeStack)
2348 // {
2349 // pllStackPush (&nodeStack, tr->nodep[i]);
2350 // pllStackPush (&nodeStack, tr->nodep[i]->next);
2351 // pllStackPush (&nodeStack, tr->nodep[i]->next->next);
2352 // ++i;
2353 // }
2354 // else
2355 // {
2356 // v = (nodeptr) pllStackPop (&nodeStack);
2357 // if (item->rank) /* internal node */
2358 // {
2359 // v->back = tr->nodep[i];
2360 // tr->nodep[i]->back = v; //t->nodep[v->number]
2361 // pllStackPush (&nodeStack, tr->nodep[i]->next);
2362 // pllStackPush (&nodeStack, tr->nodep[i]->next->next);
2363 // double z = exp((-1 * atof(item->branch))/tr->fracchange);
2364 // if(z < PLL_ZMIN) z = PLL_ZMIN;
2365 // if(z > PLL_ZMAX) z = PLL_ZMAX;
2366 // for (k = 0; k < PLL_NUM_BRANCHES; ++ k)
2367 // v->z[k] = tr->nodep[i]->z[k] = z;
2368 //
2369 // ++ i;
2370 // }
2371 // else /* leaf */
2372 // {
2373 // v->back = tr->nodep[j];
2374 // tr->nodep[j]->back = v; //t->nodep[v->number];
2375 //
2376 // double z = exp((-1 * atof(item->branch))/tr->fracchange);
2377 // if(z < PLL_ZMIN) z = PLL_ZMIN;
2378 // if(z > PLL_ZMAX) z = PLL_ZMAX;
2379 // for (k = 0; k < PLL_NUM_BRANCHES; ++ k)
2380 // v->z[k] = tr->nodep[j]->z[k] = z;
2381 //
2382 // //t->nameList[j] = strdup (item->name);
2383 // tr->nameList[j] = (char *) rax_malloc ((strlen (item->name) + 1) * sizeof (char));
2384 // strcpy (tr->nameList[j], item->name);
2385 //
2386 // pllHashAdd (tr->nameHash, tr->nameList[j], (void *) (tr->nodep[j]));
2387 // ++ j;
2388 // }
2389 // }
2390 // }
2391 //
2392 // tr->start = tr->nodep[1];
2393 //
2394 // pllStackClear (&nodeStack);
2395 //
2396 // if (useDefaultz == PLL_TRUE)
2397 // resetBranches (tr);
2398 //}
2399
2400 /** @brief Initialize PLL tree with a random topology
2401
2402 Initializes the PLL tree with a randomly created topology
2403
2404 @todo
2405 Perhaps pass a seed?
2406
2407 @param tr
2408 The PLL instance
2409
2410 @param tips
2411 Number of tips
2412
2413 @param nameList
2414 A set of \a tips names representing the taxa labels
2415 */
2416 void
pllTreeInitTopologyRandom(pllInstance * tr,int tips,char ** nameList)2417 pllTreeInitTopologyRandom (pllInstance * tr, int tips, char ** nameList)
2418 {
2419 int i;
2420 pllTreeInitDefaults (tr, tips);
2421
2422 for (i = 1; i <= tips; ++ i)
2423 {
2424 tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char));
2425 strcpy (tr->nameList[i], nameList[i]);
2426 pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i]));
2427 }
2428
2429
2430 pllMakeRandomTree (tr);
2431 }
2432
2433
2434 /** @brief Initialize a tree that corresponds to a given (already parsed) alignment
2435
2436 Initializes the PLL tree such that it corresponds to the given alignment
2437
2438 @todo
2439 nothing
2440
2441 @param tr
2442 The PLL instance
2443
2444 @param alignmentData
2445 Parsed alignment
2446 */
2447 void
pllTreeInitTopologyForAlignment(pllInstance * tr,pllAlignmentData * alignmentData)2448 pllTreeInitTopologyForAlignment (pllInstance * tr, pllAlignmentData * alignmentData)
2449 {
2450 int
2451 tips = alignmentData->sequenceCount,
2452 i;
2453
2454 char
2455 **nameList = alignmentData->sequenceLabels;
2456
2457 pllTreeInitDefaults (tr, tips);
2458
2459 for (i = 1; i <= tips; ++ i)
2460 {
2461 tr->nameList[i] = (char *) rax_malloc ((strlen (nameList[i]) + 1) * sizeof (char));
2462 strcpy (tr->nameList[i], nameList[i]);
2463 pllHashAdd (tr->nameHash, pllHashString(tr->nameList[i], tr->nameHash->size), tr->nameList[i], (void *) (tr->nodep[i]));
2464 }
2465 }
2466
2467
2468 /** @brief Compute a randomized stepwise addition oder parsimony tree
2469
2470 Implements the RAxML randomized stepwise addition order algorithm
2471
2472 @todo
2473 check functions that are invoked for potential memory leaks!
2474
2475 @param tr
2476 The PLL instance
2477
2478 @param partitions
2479 The partitions
2480
2481 @param sprDist
2482 SPR distance for the SPR search in parsimony
2483 */
pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInstance * tr,partitionList * partitions,int sprDist)2484 void pllComputeRandomizedStepwiseAdditionParsimonyTree(pllInstance * tr, partitionList * partitions, int sprDist)
2485 {
2486 allocateParsimonyDataStructures(tr, partitions);
2487 pllMakeParsimonyTreeFast(tr, partitions, sprDist);
2488 pllFreeParsimonyDataStructures(tr, partitions);
2489 }
2490
2491 /** @brief Encode the alignment data to the PLL numerical representation
2492
2493 Transforms the alignment to the PLL internal representation by substituting each base
2494 with a specific digit.
2495
2496 @param alignmentData Multiple sequence alignment
2497 @param partitions List of partitions
2498 */
pllBaseSubstitute(pllInstance * tr,partitionList * partitions)2499 void pllBaseSubstitute (pllInstance * tr, partitionList * partitions)
2500 {
2501 const char * d;
2502 int i, j, k;
2503
2504 for (i = 0; i < partitions->numberOfPartitions; ++ i)
2505 {
2506 switch (partitions->partitionData[i]->dataType)
2507 {
2508 case PLL_DNA_DATA:
2509 d = PLL_MAP_NT;
2510 break;
2511 case PLL_BINARY_DATA:
2512 d = PLL_MAP_BIN;
2513 break;
2514 case PLL_AA_DATA:
2515 d = PLL_MAP_AA;
2516 break;
2517 default:
2518 assert(0);
2519 }
2520
2521 for (j = 1; j <= tr->mxtips; ++ j)
2522 {
2523 for (k = partitions->partitionData[i]->lower; k < partitions->partitionData[i]->upper; ++ k)
2524 {
2525 tr->yVector[j][k] = d[tr->yVector[j][k]];
2526 }
2527 }
2528 }
2529 }
2530
2531 /** Clears the rearrangements history from PLL instance
2532
2533 Clears the rearrangements rollback information (history) from the PLL instance \a tr.
2534
2535 @param tr
2536 PLL instance
2537 */
pllClearRearrangeHistory(pllInstance * tr)2538 void pllClearRearrangeHistory (pllInstance * tr)
2539 {
2540 pllRollbackInfo * ri;
2541
2542 while ((ri = (pllRollbackInfo *)pllStackPop (&(tr->rearrangeHistory))))
2543 {
2544 rax_free (ri);
2545 }
2546 }
2547
2548 /** @brief Deallocate the PLL instance
2549
2550 Deallocates the library instance and all its elements.
2551
2552 @param tr
2553 The PLL instance
2554 */
2555 void
pllDestroyInstance(pllInstance * tr)2556 pllDestroyInstance (pllInstance * tr)
2557 {
2558 int i;
2559
2560 for (i = 1; i <= tr->mxtips; ++ i)
2561 rax_free (tr->nameList[i]);
2562
2563 pllHashDestroy (&(tr->nameHash), NULL);
2564 if (tr->yVector)
2565 {
2566 if (tr->yVector[0]) rax_free (tr->yVector[0]);
2567 rax_free (tr->yVector);
2568 }
2569 rax_free (tr->aliaswgt);
2570 rax_free (tr->rateCategory);
2571 rax_free (tr->patrat);
2572 rax_free (tr->patratStored);
2573 rax_free (tr->lhs);
2574 rax_free (tr->td[0].parameterValues);
2575 rax_free (tr->td[0].executeModel);
2576 rax_free (tr->td[0].ti);
2577 rax_free (tr->nameList);
2578 rax_free (tr->nodep);
2579 rax_free (tr->nodeBaseAddress);
2580 rax_free (tr->tree_string);
2581 rax_free (tr->tree0);
2582 rax_free (tr->tree1);
2583 rax_free (tr->tipNames);
2584 rax_free (tr->constraintVector);
2585 pllClearRearrangeHistory (tr);
2586
2587 rax_free (tr);
2588
2589 #ifdef _FINE_GRAIN_MPI
2590 pllFinalizeMPI ();
2591 #endif
2592
2593 }
2594
2595 /* initializwe a parameter linkage list for a certain parameter type (can be whatever).
2596 the input is an integer vector that contaions NumberOfModels (numberOfPartitions) elements.
2597
2598 if we want to have all alpha parameters unlinked and have say 4 partitions the input
2599 vector would look like this: {0, 1, 2, 3}, if we want to link partitions 0 and 3 the vector
2600 should look like this: {0, 1, 2, 0}
2601 */
2602
2603
2604
init_Q_MatrixSymmetries(char * linkageString,partitionList * pr,int model)2605 static int init_Q_MatrixSymmetries(char *linkageString, partitionList * pr, int model)
2606 {
2607 int
2608 states = pr->partitionData[model]->states,
2609 numberOfRates = ((states * states - states) / 2),
2610 *list = (int *)rax_malloc(sizeof(int) * numberOfRates),
2611 j,
2612 max = -1;
2613
2614 char
2615 *str1,
2616 *saveptr,
2617 *ch,
2618 *token;
2619
2620 ch = (char *) rax_malloc (strlen (linkageString) + 1);
2621 strcpy (ch, linkageString);
2622
2623
2624 for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL)
2625 {
2626 token = strtok_r(str1, ",", &saveptr);
2627 if(token == (char *)NULL)
2628 break;
2629 if(!(j < numberOfRates))
2630 {
2631 errno = PLL_SUBSTITUTION_RATE_OUT_OF_BOUNDS;
2632 return PLL_FALSE;
2633 }
2634 list[j] = atoi(token);
2635 }
2636
2637 rax_free(ch);
2638
2639 for(j = 0; j < numberOfRates; j++)
2640 {
2641 if(!(list[j] <= j))
2642 {
2643 errno = PLL_INVALID_Q_MATRIX_SYMMETRY;
2644 return PLL_FALSE;
2645 }
2646
2647 if(!(list[j] <= max + 1))
2648 {
2649 errno = PLL_Q_MATRIX_SYMMETRY_OUT_OF_BOUNDS;
2650 return PLL_FALSE;
2651 }
2652
2653 if(list[j] > max)
2654 max = list[j];
2655 }
2656
2657 for(j = 0; j < numberOfRates; j++)
2658 pr->partitionData[model]->symmetryVector[j] = list[j];
2659
2660 //less than the maximum possible number of rate parameters
2661
2662 if(max < numberOfRates - 1)
2663 pr->partitionData[model]->nonGTR = PLL_TRUE;
2664
2665 pr->partitionData[model]->optimizeSubstitutionRates = PLL_TRUE;
2666
2667 rax_free(list);
2668
2669 return PLL_TRUE;
2670 }
2671
2672 /** @brief Check parameter linkage across partitions for consistency
2673 *
2674 * Checks that linked alpha, substitution rate and frequency model parameters
2675 * across several partitions are consistent. E.g., when two partitions are linked
2676 * via the alpha parameter, the alpha parameter should either be set to the same
2677 * fixed value or it should be estimated!
2678 *
2679 * @param pr
2680 * List of partitions
2681 *
2682 * @todo
2683 * Call this in more functions, right now it's only invoked in the wrapper
2684 * for modOpt()
2685 */
checkLinkageConsistency(partitionList * pr)2686 static int checkLinkageConsistency(partitionList *pr)
2687 {
2688 if(pr->dirty)
2689 {
2690 int
2691 i;
2692
2693 linkageList
2694 *ll;
2695
2696 /* first deal with rates */
2697
2698 ll = pr->rateList;
2699
2700 for(i = 0; i < ll->entries; i++)
2701 {
2702 int
2703 partitions = ll->ld[i].partitions,
2704 reference = ll->ld[i].partitionList[0];
2705
2706 if(pr->partitionData[reference]->dataType == PLL_AA_DATA)
2707 {
2708 if(pr->partitionData[reference]->protModels == PLL_GTR || pr->partitionData[reference]->nonGTR)
2709 {
2710 if(!(pr->partitionData[reference]->optimizeSubstitutionRates == PLL_TRUE))
2711 {
2712 errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
2713 return PLL_FALSE;
2714 }
2715 }
2716 else
2717 {
2718 if(!(pr->partitionData[reference]->optimizeSubstitutionRates == PLL_FALSE))
2719 {
2720 errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
2721 return PLL_FALSE;
2722 }
2723 }
2724 }
2725
2726 if(partitions > 1)
2727 {
2728 int
2729 j,
2730 k;
2731
2732 for(k = 1; k < partitions; k++)
2733 {
2734 int
2735 index = ll->ld[i].partitionList[k];
2736
2737 int
2738 states = pr->partitionData[index]->states,
2739 rates = ((states * states - states) / 2);
2740
2741 if(!(pr->partitionData[reference]->nonGTR == pr->partitionData[index]->nonGTR))
2742 {
2743 errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
2744 return PLL_FALSE;
2745 }
2746 if(!(pr->partitionData[reference]->optimizeSubstitutionRates == pr->partitionData[index]->optimizeSubstitutionRates))
2747 {
2748 errno = PLL_INCONSISTENT_SUBST_RATE_OPTIMIZATION_SETTING;
2749 return PLL_FALSE;
2750 }
2751
2752
2753 if(pr->partitionData[reference]->nonGTR)
2754 {
2755
2756 for(j = 0; j < rates; j++)
2757 {
2758 if(!(pr->partitionData[reference]->symmetryVector[j] == pr->partitionData[index]->symmetryVector[j]))
2759 {
2760 errno = PLL_INCONSISTENT_Q_MATRIX_SYMMETRIES_ACROSS_LINKED_PARTITIONS;
2761 return PLL_FALSE;
2762 }
2763 }
2764 }
2765
2766
2767 for(j = 0; j < rates; j++)
2768 {
2769 if(!(pr->partitionData[reference]->substRates[j] == pr->partitionData[index]->substRates[j]))
2770 {
2771 errno = PLL_INCONSISTENT_Q_MATRIX_ENTRIES_ACROSS_LINKED_PARTITIONS;
2772 return PLL_FALSE;
2773 }
2774 }
2775 }
2776 }
2777 }
2778
2779 /* then deal with alpha parameters */
2780
2781 ll = pr->alphaList;
2782
2783 for(i = 0; i < ll->entries; i++)
2784 {
2785 int
2786 partitions = ll->ld[i].partitions;
2787
2788 if(partitions > 1)
2789 {
2790 int
2791 k,
2792 reference = ll->ld[i].partitionList[0];
2793
2794 for(k = 1; k < partitions; k++)
2795 {
2796 int
2797 index = ll->ld[i].partitionList[k];
2798
2799 if(!(pr->partitionData[reference]->optimizeAlphaParameter == pr->partitionData[index]->optimizeAlphaParameter))
2800 {
2801 errno = PLL_INCONSISTENT_ALPHA_STATES_ACROSS_LINKED_PARTITIONS;
2802 return PLL_FALSE;
2803 }
2804 if(!(pr->partitionData[reference]->alpha == pr->partitionData[index]->alpha))
2805 {
2806 errno = PLL_INCONSISTENT_ALPHA_VALUES_ACROSS_LINKED_PARTITIONS;
2807 return PLL_FALSE;
2808 }
2809 }
2810 }
2811 }
2812
2813 /* and then deal with base frequencies */
2814
2815 ll = pr->freqList;
2816
2817 for(i = 0; i < ll->entries; i++)
2818 {
2819 int
2820 partitions = ll->ld[i].partitions;
2821
2822 if(partitions > 1)
2823 {
2824 int
2825 k,
2826 reference = ll->ld[i].partitionList[0];
2827
2828 for(k = 1; k < partitions; k++)
2829 {
2830 int
2831 j,
2832 index = ll->ld[i].partitionList[k],
2833 states = pr->partitionData[index]->states;
2834
2835 if(!(pr->partitionData[reference]->optimizeBaseFrequencies == pr->partitionData[index]->optimizeBaseFrequencies))
2836 {
2837 errno = PLL_INCONSISTENT_FREQUENCY_STATES_ACROSS_LINKED_PARTITIONS;
2838 return PLL_FALSE;
2839 }
2840
2841 for(j = 0; j < states; j++)
2842 {
2843 if(!(pr->partitionData[reference]->frequencies[j] == pr->partitionData[index]->frequencies[j]))
2844 {
2845 errno = PLL_INCONSISTENT_FREQUENCY_VALUES_ACROSS_LINKED_PARTITIONS;
2846 return PLL_FALSE;
2847 }
2848 }
2849 }
2850 }
2851 }
2852
2853 pr->dirty = PLL_FALSE;
2854 }
2855
2856 return PLL_TRUE;
2857 }
2858 /** @brief Set symmetries among parameters in the Q matrix
2859
2860 Allows to link some or all rate parameters in the Q-matrix
2861 for obtaining simpler models than GTR
2862
2863 @param string
2864 string describing the symmetry pattern among the rates in the Q matrix
2865
2866 @param pr
2867 List of partitions
2868
2869 @param model
2870 Index of the partition for which we want to set the Q matrix symmetries
2871
2872 @todo
2873 nothing
2874 */
pllSetSubstitutionRateMatrixSymmetries(char * string,partitionList * pr,int model)2875 int pllSetSubstitutionRateMatrixSymmetries(char *string, partitionList * pr, int model)
2876 {
2877 int
2878 result = init_Q_MatrixSymmetries(string, pr, model);
2879
2880 pr->dirty = PLL_TRUE;
2881
2882 return result;
2883 }
2884
2885 /** @defgroup modelParamsGroup Model parameters setup and retrieval
2886
2887 This set of functions is responsible for setting, retrieving, and optimizing
2888 model parameters. It also contains functions for linking model parameters
2889 across partitions.
2890 */
2891
2892 /** @ingroup modelParamsGroups
2893 @brief Set the alpha parameter of the Gamma model to a fixed value for a partition
2894
2895 Sets the alpha parameter of the gamma model of rate heterogeneity to a fixed value
2896 and disables the optimization of this parameter
2897
2898 @param alpha
2899 alpha value
2900
2901 @param model
2902 Index of the partition for which we want to set the alpha value
2903
2904 @param pr
2905 List of partitions
2906
2907 @param tr
2908 Library instance for which we want to fix alpha
2909
2910 @todo
2911 test if this works with the parallel versions
2912 */
pllSetFixedAlpha(double alpha,int model,partitionList * pr,pllInstance * tr)2913 void pllSetFixedAlpha(double alpha, int model, partitionList * pr, pllInstance *tr)
2914 {
2915 //make sure that we are swetting alpha for a partition within the current range
2916 //of partitions
2917 double old_fracchange = tr->fracchange;
2918
2919 assert(model >= 0 && model < pr->numberOfPartitions);
2920
2921 assert(alpha >= PLL_ALPHA_MIN && alpha <= PLL_ALPHA_MAX);
2922
2923 //set the alpha paremeter
2924
2925 pr->partitionData[model]->alpha = alpha;
2926
2927 //do the discretization of the gamma curve
2928
2929 pllMakeGammaCats(pr->partitionData[model]->alpha, pr->partitionData[model]->gammaRates, 4, tr->useMedian);
2930
2931 //broadcast the changed parameters to all threads/MPI processes
2932
2933 #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
2934 pllMasterBarrier(tr, pr, PLL_THREAD_COPY_ALPHA);
2935 #endif
2936
2937 pr->partitionData[model]->optimizeAlphaParameter = PLL_FALSE;
2938
2939 pr->dirty = PLL_FALSE;
2940 updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
2941 }
2942
2943 /** @ingroup modelParamsGroups
2944 @brief Get the rate categories of the Gamma model of a partition
2945
2946 Gets the gamma rate categories of the Gamma model of rate heterogeneity
2947 of partition \a pid from partition list \a pr.
2948
2949 @param pr List of partitions
2950 @param pid Index of partition to use
2951 @param outBuffer Output buffer where to store the rates
2952 */
pllGetGammaRates(partitionList * pr,int pid,double * outBuffer)2953 void pllGetGammaRates (partitionList * pr, int pid, double * outBuffer)
2954 {
2955 /* TODO: Change the hardcoded 4 and also add a check that this partition
2956 really uses gamma. Currently, instance is also not required */
2957 memcpy (outBuffer, pr->partitionData[pid]->gammaRates, 4 * sizeof (double));
2958 }
2959
2960 /** @ingroup modelParamsGroups
2961 @brief Get the alpha parameter of the Gamma model of a partition
2962
2963 Returns the alpha parameter of the gamma model of rate heterogeneity
2964 of partition \a pid from partition list \a pr.
2965
2966 @param pr List of partitions
2967 @param pid Index of partition to use
2968
2969 @return
2970 Alpha parameter
2971 */
pllGetAlpha(partitionList * pr,int pid)2972 double pllGetAlpha (partitionList * pr, int pid)
2973 {
2974 /* TODO: check if the partition uses gamma */
2975 return (pr->partitionData[pid]->alpha);
2976 }
2977
2978
2979 /** @ingroup modelParamsGroups
2980 @brief Get the base frequencies of a partition
2981
2982 Gets the base frequencies of partition \a model from partition list
2983 \a partitionList and stores them in \a outBuffer. Note that \outBuffer
2984 must be of size s, where s is the number of states.
2985
2986 @param tr PLL instance
2987 @param pr List of partitions
2988 @param model Index of the partition for which we want to get the base frequencies
2989 @param outBuffer Buffer where to store the base frequencies
2990 */
pllGetBaseFrequencies(partitionList * pr,int model,double * outBuffer)2991 void pllGetBaseFrequencies(partitionList * pr, int model, double * outBuffer)
2992 {
2993 memcpy (outBuffer, pr->partitionData[model]->frequencies, pr->partitionData[model]->states * sizeof (double));
2994 }
2995
2996
2997 /** @ingroup modelParamsGroups
2998 @brief Set all base frequencies to a fixed value for a partition
2999
3000 Sets all base freuqencies of a partition to fixed values and disables
3001 ML optimization of these parameters
3002
3003 @param f
3004 array containing the base frequencies
3005
3006 @param length
3007 length of array f, this needs to be as long as the number of
3008 states in the model, otherwise an assertion will fail!
3009
3010 @param model
3011 Index of the partition for which we want to set the frequencies
3012
3013 @param pr
3014 List of partitions
3015
3016 @param tr
3017 Library instance for which we want to fix the base frequencies
3018
3019 @todo
3020 test if this works with the parallel versions
3021 */
pllSetFixedBaseFrequencies(double * f,int length,int model,partitionList * pr,pllInstance * tr)3022 void pllSetFixedBaseFrequencies(double *f, int length, int model, partitionList * pr, pllInstance *tr)
3023 {
3024 int
3025 i;
3026
3027 double
3028 acc = 0.0,
3029 old_fracchange;
3030
3031 old_fracchange = tr->fracchange;
3032
3033 //make sure that we are setting the base frequencies for a partition within the current range
3034 //of partitions
3035 assert(model >= 0 && model < pr->numberOfPartitions);
3036
3037 //make sure that the length of the input array f containing the frequencies
3038 //is as long as the number of states in the model
3039 assert(length == pr->partitionData[model]->states);
3040
3041
3042 //make sure that the base frequencies sum approximately to 1.0
3043
3044 for(i = 0; i < length; i++)
3045 acc += f[i];
3046
3047 if(fabs(acc - 1.0) > 0.000001)
3048 assert(0);
3049
3050 //copy the base frequencies
3051 memcpy(pr->partitionData[model]->frequencies, f, sizeof(double) * length);
3052
3053 //re-calculate the Q matrix
3054 pllInitReversibleGTR(tr, pr, model);
3055
3056
3057 //broadcast the new Q matrix to all threads/processes
3058 #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
3059 pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
3060 #endif
3061
3062 pr->partitionData[model]->optimizeBaseFrequencies = PLL_FALSE;
3063
3064 pr->dirty = PLL_TRUE;
3065 updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
3066 }
3067
3068 /** @ingroup modelParamsGroups
3069 @brief Set that the base freuqencies are optimized under ML
3070
3071 The base freuqencies for partition model will be optimized under ML
3072
3073 @param model
3074 Index of the partition for which we want to optimize base frequencies
3075
3076 @param pr
3077 List of partitions
3078
3079 @param tr
3080 Library instance for which we want to fix the base frequencies
3081
3082 @todo
3083 test if this works with the parallel versions
3084 */
pllSetOptimizeBaseFrequencies(int model,partitionList * pr,pllInstance * tr)3085 int pllSetOptimizeBaseFrequencies(int model, partitionList * pr, pllInstance *tr)
3086 {
3087 int
3088 states,
3089 i;
3090
3091 double
3092 initialFrequency,
3093 acc = 0.0;
3094
3095 //make sure that we are setting the base frequencies for a partition within the current range
3096 //of partitions
3097 if(!(model >= 0 && model < pr->numberOfPartitions))
3098 {
3099 errno = PLL_PARTITION_OUT_OF_BOUNDS;
3100 return PLL_FALSE;
3101 }
3102
3103 //set the number of states/ferquencies in this partition
3104 states = pr->partitionData[model]->states;
3105
3106 //set all frequencies to 1/states
3107
3108 initialFrequency = 1.0 / (double)states;
3109
3110 for(i = 0; i < states; i++)
3111 pr->partitionData[model]->frequencies[i] = initialFrequency;
3112
3113 //make sure that the base frequencies sum approximately to 1.0
3114
3115 for(i = 0; i < states; i++)
3116 acc += pr->partitionData[model]->frequencies[i];
3117
3118 if(fabs(acc - 1.0) > 0.000001)
3119 {
3120 errno = PLL_BASE_FREQUENCIES_DO_NOT_SUM_TO_1;
3121 return PLL_FALSE;
3122 }
3123
3124 //re-calculate the Q matrix
3125 pllInitReversibleGTR(tr, pr, model);
3126
3127 //broadcast the new Q matrix to all threads/processes
3128 #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
3129 pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
3130 #endif
3131
3132 pr->partitionData[model]->optimizeBaseFrequencies = PLL_TRUE;
3133
3134 pr->dirty = PLL_TRUE;
3135
3136 return PLL_TRUE;
3137 }
3138
3139
3140
3141
3142 /** @ingroup modelParamsGroups
3143 @brief Get the substitution rates for a specific partition
3144
3145 Gets the substitution rates of partition \a model from partition list
3146 \a partitionList and stores them in \a outBuffer. Note that \outBuffer
3147 must be of size (2 * s - s) / 2, where s is the number of states, i.e.
3148 the number of upper diagonal entries of the Q matrix.
3149
3150 @param tr PLL instance
3151 @param pr List of partitions
3152 @param model Index of partition for which we want to get the substitution rates
3153 @param outBuffer Buffer where to store the substitution rates.
3154 */
pllGetSubstitutionMatrix(partitionList * pr,int model,double * outBuffer)3155 void pllGetSubstitutionMatrix (partitionList * pr, int model, double * outBuffer)
3156 {
3157 int
3158 rates,
3159 states;
3160
3161 states = pr->partitionData[model]->states;
3162 rates = (states * states - states) / 2;
3163
3164 memcpy (outBuffer, pr->partitionData[model]->substRates, rates * sizeof (double));
3165 }
3166
3167 /** @ingroup modelParamsGroups
3168 @brief Set all substitution rates for a specific partition and disable ML optimization for them
3169
3170 Sets all substitution rates of a partition to fixed values and disables
3171 ML optimization of these parameters. It will automatically re-scale the relative rates
3172 such that the last rate is 1.0
3173
3174 @param f
3175 array containing the substitution rates
3176
3177 @param length
3178 length of array f, this needs to be as long as: (s * s - s) / 2,
3179 i.e., the number of upper diagonal entries of the Q matrix
3180
3181 @param model
3182 Index of the partition for which we want to set/fix the substitution rates
3183
3184 @param pr
3185 List of partitions
3186
3187 @param tr
3188 Library instance for which we want to fix the substitution rates
3189
3190 @todo
3191 test if this works with the parallel versions
3192 */
pllSetFixedSubstitutionMatrix(double * q,int length,int model,partitionList * pr,pllInstance * tr)3193 void pllSetFixedSubstitutionMatrix(double *q, int length, int model, partitionList * pr, pllInstance *tr)
3194 {
3195 pllSetSubstitutionMatrix(q, length, model, pr, tr);
3196 pr->partitionData[model]->optimizeSubstitutionRates = PLL_FALSE;
3197 }
3198
3199 /** @ingroup modelParamsGroups
3200 @brief Set all substitution rates for a specific partition
3201
3202 Sets all substitution rates of a partition to the given values.
3203 It will automatically re-scale the relative rates such that the last rate is 1.0
3204
3205 @param f
3206 array containing the substitution rates
3207
3208 @param length
3209 length of array f, this needs to be as long as: (s * s - s) / 2,
3210 i.e., the number of upper diagonal entries of the Q matrix
3211
3212 @param model
3213 Index of the partition for which we want to set/fix the substitution rates
3214
3215 @param pr
3216 List of partitions
3217
3218 @param tr
3219 Library instance for which we want to fix the substitution rates
3220
3221 @todo
3222 test if this works with the parallel versions
3223 */
pllSetSubstitutionMatrix(double * q,int length,int model,partitionList * pr,pllInstance * tr)3224 void pllSetSubstitutionMatrix(double *q, int length, int model, partitionList * pr, pllInstance *tr)
3225 {
3226 int
3227 i,
3228 numberOfRates;
3229
3230 double
3231 scaler,
3232 old_fracchange;
3233
3234 old_fracchange = tr->fracchange;
3235
3236 //make sure that we are setting the Q matrix for a partition within the current range
3237 //of partitions
3238 assert(model >= 0 && model < pr->numberOfPartitions);
3239
3240 numberOfRates = (pr->partitionData[model]->states * pr->partitionData[model]->states - pr->partitionData[model]->states) / 2;
3241
3242 // make sure that the length of the array containing the subsitution rates
3243 // corresponds to the number of states in the model
3244
3245 assert(length == numberOfRates);
3246
3247 //automatically scale the last rate to 1.0 if this is not already the case
3248
3249 if(q[length - 1] != 1.0)
3250 scaler = 1.0 / q[length - 1];
3251 else
3252 scaler = 1.0;
3253
3254 //set the rates for the partition and make sure that they are within the allowed bounds
3255
3256 for(i = 0; i < length; i++)
3257 {
3258 double
3259 r = q[i] * scaler;
3260
3261 assert(r >= PLL_RATE_MIN && r <= PLL_RATE_MAX);
3262
3263 pr->partitionData[model]->substRates[i] = r;
3264 }
3265
3266 //re-calculate the Q matrix
3267 pllInitReversibleGTR(tr, pr, model);
3268
3269 //broadcast the new Q matrix to all threads/processes
3270 #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
3271 pllMasterBarrier (tr, pr, PLL_THREAD_COPY_RATES);
3272 #endif
3273
3274
3275 pr->dirty = PLL_TRUE;
3276 updateAllBranchLengths (tr, old_fracchange, tr->fracchange);
3277 }
3278
3279
3280
3281
3282 /* initialize a parameter linkage list for a certain parameter type (can be whatever).
3283 the input is an integer vector that contaions NumberOfModels (numberOfPartitions) elements.
3284
3285 if we want to have all alpha parameters unlinked and have say 4 partitions the input
3286 vector would look like this: {0, 1, 2, 3}, if we want to link partitions 0 and 3 the vector
3287 should look like this: {0, 1, 2, 0}
3288 */
3289
3290 /** @ingroup modelParamsGroups
3291 */
initLinkageList(int * linkList,partitionList * pr)3292 linkageList* initLinkageList(int *linkList, partitionList *pr)
3293 {
3294 int
3295 k,
3296 partitions,
3297 numberOfModels = 0,
3298 i,
3299 pos;
3300
3301 linkageList
3302 *ll = (linkageList*)rax_malloc(sizeof(linkageList));
3303
3304 /* figure out how many distinct parameters we need to estimate
3305 in total, if all parameters are linked the result will be 1 if all
3306 are unlinked the result will be pr->numberOfPartitions */
3307
3308 for(i = 0; i < pr->numberOfPartitions; i++)
3309 {
3310 if(!(linkList[i] >= 0 && linkList[i] < pr->numberOfPartitions))
3311 {
3312 errno = PLL_LINKAGE_LIST_OUT_OF_BOUNDS;
3313 return (linkageList*)NULL;
3314 }
3315
3316 if(!(linkList[i] <= i && linkList[i] <= numberOfModels + 1))
3317 {
3318 errno = PLL_LINKAGE_LIST_OUT_OF_BOUNDS;
3319 return (linkageList*)NULL;
3320 }
3321
3322 if(linkList[i] > numberOfModels)
3323 numberOfModels = linkList[i];
3324
3325 }
3326
3327 numberOfModels++;
3328
3329 /* allocate the linkage list data structure that containes information which parameters of which partition are
3330 linked with each other.
3331
3332 Note that we need a separate invocation of initLinkageList() and a separate linkage list
3333 for each parameter type */
3334
3335 ll->entries = numberOfModels;
3336 ll->ld = (linkageData*)rax_malloc(sizeof(linkageData) * numberOfModels);
3337
3338 /* noe loop over the number of free parameters and assign the corresponding partitions to each parameter */
3339
3340 for(i = 0; i < numberOfModels; i++)
3341 {
3342 /*
3343 the valid flag is used for distinguishing between DNA and protein data partitions.
3344 This can be used to enable/disable parameter optimization for the paremeter
3345 associated to the corresponding partitions. This deature is used in optRatesGeneric
3346 to first optimize all DNA GTR rate matrices and then all PROT GTR rate matrices */
3347
3348 ll->ld[i].valid = PLL_TRUE;
3349 partitions = 0;
3350
3351 /* now figure out how many partitions share this joint parameter */
3352
3353 for(k = 0; k < pr->numberOfPartitions; k++)
3354 if(linkList[k] == i)
3355 partitions++;
3356
3357 /* assign a list to store the partitions that share the parameter */
3358
3359 ll->ld[i].partitions = partitions;
3360 ll->ld[i].partitionList = (int*)rax_malloc(sizeof(int) * partitions);
3361
3362 /* now store the respective partition indices in this list */
3363
3364 for(k = 0, pos = 0; k < pr->numberOfPartitions; k++)
3365 if(linkList[k] == i)
3366 ll->ld[i].partitionList[pos++] = k;
3367 }
3368
3369 /* return the linkage list for the parameter */
3370
3371 return ll;
3372 }
3373
3374
3375
initLinkageListString(char * linkageString,partitionList * pr)3376 static linkageList* initLinkageListString(char *linkageString, partitionList * pr)
3377 {
3378 int
3379 *list = (int*)rax_malloc(sizeof(int) * pr->numberOfPartitions),
3380 j;
3381
3382 linkageList
3383 *l;
3384
3385 char
3386 *str1,
3387 *saveptr,
3388 // *ch = strdup(linkageString),
3389 *ch,
3390 *token;
3391
3392 ch = (char *) rax_malloc (strlen (linkageString) + 1);
3393 strcpy (ch, linkageString);
3394
3395 for(j = 0, str1 = ch; ;j++, str1 = (char *)NULL)
3396 {
3397 token = strtok_r(str1, ",", &saveptr);
3398 if(token == (char *)NULL)
3399 break;
3400 assert(j < pr->numberOfPartitions);
3401 list[j] = atoi(token);
3402 }
3403
3404 rax_free(ch);
3405
3406 l = initLinkageList(list, pr);
3407
3408 rax_free(list);
3409
3410 return l;
3411 }
3412
3413 /** @ingroup modelParamsGroups
3414 @brief Link alpha parameters across partitions
3415
3416 Links alpha paremeters across partitions (GAMMA model of rate heterogeneity)
3417
3418 @param string
3419 string describing the linkage pattern
3420
3421 @param pr
3422 List of partitions
3423
3424 @todo
3425 test behavior/impact/mem-leaks of this when PSR model is used
3426 it shouldn't do any harm, but it would be better to check!
3427 */
pllLinkAlphaParameters(char * string,partitionList * pr)3428 int pllLinkAlphaParameters(char *string, partitionList *pr)
3429 {
3430 //assumes that it has already been assigned once
3431 freeLinkageList(pr->alphaList);
3432
3433 pr->alphaList = initLinkageListString(string, pr);
3434
3435 pr->dirty = PLL_TRUE;
3436
3437 if(!pr->alphaList)
3438 return PLL_FALSE;
3439 else
3440 return PLL_TRUE;
3441 }
3442
3443 /** @ingroup modelParamsGroups
3444 @brief Link base frequency parameters across partitions
3445
3446 Links base frequency paremeters across partitions
3447
3448 @param string
3449 string describing the linkage pattern
3450
3451 @param pr
3452 List of partitions
3453
3454 @todo
3455 semantics of this function not clear yet: right now this only has an effect
3456 when we do a ML estimate of base frequencies
3457 when we use empirical or model-defined (protein data) base frequencies, one could
3458 maybe average over the per-partition frequencies, but the averages would need to be weighted
3459 accodring on the number of patterns per partition
3460 */
pllLinkFrequencies(char * string,partitionList * pr)3461 int pllLinkFrequencies(char *string, partitionList *pr)
3462 {
3463 //assumes that it has already been assigned once
3464 freeLinkageList(pr->freqList);
3465
3466 pr->freqList = initLinkageListString(string, pr);
3467
3468 pr->dirty = PLL_TRUE;
3469
3470 if(!pr->freqList)
3471 return PLL_FALSE;
3472 else
3473 return PLL_TRUE;
3474 }
3475
3476 /** @ingroup modelParamsGroups
3477 @brief Link Substitution matrices across partitions
3478
3479 Links substitution matrices (Q matrices) across partitions
3480
3481 @param string
3482 string describing the linkage pattern
3483
3484 @param pr
3485 List of partitions
3486
3487 @todo
3488 re-think/re-design how this is done for protein
3489 models
3490 */
pllLinkRates(char * string,partitionList * pr)3491 int pllLinkRates(char *string, partitionList *pr)
3492 {
3493 //assumes that it has already been assigned once
3494 freeLinkageList(pr->rateList);
3495
3496 pr->rateList = initLinkageListString(string, pr);
3497
3498 pr->dirty = PLL_TRUE;
3499
3500 if(!pr->dirty)
3501 return PLL_FALSE;
3502 else
3503 return PLL_TRUE;
3504 }
3505
3506
3507
3508
3509 /** @ingroup modelParamsGroups
3510 @brief Initialize partitions according to model parameters
3511
3512 Initializes partitions according to model parameters.
3513
3514 @param tr The PLL instance
3515 @param partitions List of partitions
3516 @param alignmentData The parsed alignment
3517 @return Returns \b PLL_TRUE in case of success, otherwise \b PLL_FALSE
3518 */
pllInitModel(pllInstance * tr,partitionList * partitions)3519 int pllInitModel (pllInstance * tr, partitionList * partitions)
3520 {
3521 double ** ef;
3522 int
3523 i,
3524 *unlinked = (int *)rax_malloc(sizeof(int) * partitions->numberOfPartitions);
3525 double old_fracchange = tr->fracchange;
3526
3527 ef = pllBaseFrequenciesInstance (tr, partitions);
3528
3529 if(!ef)
3530 return PLL_FALSE;
3531
3532
3533 #if ! (defined(__ppc) || defined(__powerpc__) || defined(PPC))
3534 #if (defined(__AVX) || defined(__SSE3))
3535 _mm_setcsr( _mm_getcsr() | _MM_FLUSH_ZERO_ON);
3536 #endif
3537 #endif
3538
3539 #ifdef _USE_PTHREADS
3540 tr->threadID = 0;
3541 #ifndef _PORTABLE_PTHREADS
3542 /* not very portable thread to core pinning if PORTABLE_PTHREADS is not defined
3543 by defualt the cod ebelow is deactivated */
3544 pinToCore(0);
3545 #endif
3546 #endif
3547
3548 #if (defined(_FINE_GRAIN_MPI) || defined(_USE_PTHREADS))
3549 /*
3550 this main function is the master thread, so if we want to run RAxML with n threads,
3551 we use pllStartPthreads to start the n-1 worker threads */
3552
3553 #ifdef _USE_PTHREADS
3554 pllStartPthreads (tr, partitions);
3555 #endif
3556
3557 /* via pllMasterBarrier() we invoke parallel regions in which all Pthreads work on computing something, mostly likelihood
3558 computations. Have a look at execFunction() in axml.c where we siwtch of the different types of parallel regions.
3559
3560 Although not necessary, below we copy the info stored on tr->partitionData to corresponding copies in each thread.
3561 While this is shared memory and we don't really need to copy stuff, it was implemented like this to allow for an easier
3562 transition to a distributed memory implementation (MPI).
3563 */
3564 #ifdef _FINE_GRAIN_MPI
3565 //MPI_Bcast (&(partitions->numberOfPartitions), 1, MPI_INT, MPI_ROOT, MPI_COMM_WORLD);
3566 MPI_Bcast (&(partitions->numberOfPartitions), 1, MPI_INT, 0, MPI_COMM_WORLD);
3567 #endif
3568
3569 /* mpi version now also uses the generic barrier */
3570 pllMasterBarrier (tr, partitions, PLL_THREAD_INIT_PARTITION);
3571 #else /* SEQUENTIAL */
3572 /*
3573 allocate the required data structures for storing likelihood vectors etc
3574 */
3575
3576 //initializePartitions(tr, tr, partitions, partitions, 0, 0);
3577 initializePartitionsSequential (tr, partitions);
3578 #endif
3579
3580 //initializePartitions (tr, tr, partitions, partitions, 0, 0);
3581
3582 initModel (tr, ef, partitions);
3583
3584 pllEmpiricalFrequenciesDestroy (&ef, partitions->numberOfPartitions);
3585
3586 for(i = 0; i < partitions->numberOfPartitions; i++)
3587 unlinked[i] = i;
3588
3589 //by default everything is unlinked initially
3590 partitions->alphaList = initLinkageList(unlinked, partitions);
3591 partitions->freqList = initLinkageList(unlinked, partitions);
3592 partitions->rateList = initLinkageList(unlinked, partitions);
3593
3594 rax_free(unlinked);
3595
3596 updateAllBranchLengths (tr, old_fracchange ? old_fracchange : 1, tr->fracchange);
3597 pllEvaluateLikelihood (tr, partitions, tr->start, PLL_TRUE, PLL_FALSE);
3598
3599 return PLL_TRUE;
3600 }
3601
3602 /** @ingroup modelParamsGroups
3603 @brief Optimize all free model parameters of the likelihood model
3604
3605 Initializes partitions according to model parameters.
3606
3607 @param tr
3608 The PLL instance
3609
3610 @param pr
3611 List of partitions
3612
3613 @param likelihoodEpsilon
3614 Specifies up to which epsilon in likelihood values the iterative routine will
3615 be optimizing the parameters
3616 */
pllOptimizeModelParameters(pllInstance * tr,partitionList * pr,double likelihoodEpsilon)3617 int pllOptimizeModelParameters(pllInstance *tr, partitionList *pr, double likelihoodEpsilon)
3618 {
3619 //force the consistency check
3620
3621 pr->dirty = PLL_TRUE;
3622
3623 if(!checkLinkageConsistency(pr))
3624 return PLL_FALSE;
3625
3626 modOpt(tr, pr, likelihoodEpsilon);
3627
3628 return PLL_TRUE;
3629 }
3630
3631 /** @brief Read the contents of a file
3632
3633 Reads the ile \a filename and return its content. In addition
3634 the size of the file is stored in the input variable \a filesize.
3635 The content of the variable \a filesize can be anything and will
3636 be overwritten.
3637
3638 @param filename
3639 Name of the input file
3640
3641 @param filesize
3642 Input parameter where the size of the file (in bytes) will be stored
3643
3644 @return
3645 Contents of the file
3646 */
3647 char *
pllReadFile(const char * filename,long * filesize)3648 pllReadFile (const char * filename, long * filesize)
3649 {
3650 FILE * fp;
3651 char * rawdata;
3652
3653 // FIX BUG: opening with "r" does not work on Windows
3654 // fp = fopen (filename, "r");
3655 printf("[PLL] Reading file %s...\n", filename);
3656 fp = fopen (filename, "rb");
3657 printf("[PLL] Success!\n");
3658 if (!fp) return (NULL);
3659
3660 /* obtain file size */
3661 if (fseek (fp, 0, SEEK_END) == -1)
3662 {
3663 fclose (fp);
3664 return (NULL);
3665 }
3666
3667 *filesize = ftell (fp);
3668
3669 if (*filesize == -1)
3670 {
3671 fclose (fp);
3672 return (NULL);
3673 }
3674 rewind (fp);
3675
3676 /* allocate buffer and read file contents */
3677 rawdata = (char *) rax_malloc (((*filesize) + 1) * sizeof (char));
3678 if (rawdata)
3679 {
3680 if (fread (rawdata, sizeof (char), *filesize, fp) != (size_t) *filesize)
3681 {
3682 rax_free (rawdata);
3683 rawdata = NULL;
3684 }
3685 else
3686 {
3687 rawdata[*filesize] = 0;
3688 }
3689 }
3690
3691 fclose (fp);
3692
3693 return (rawdata);
3694 }
3695
getInnerBranchEndPointsRecursive(nodeptr p,int tips,int * i,node ** nodes)3696 static void getInnerBranchEndPointsRecursive (nodeptr p, int tips, int * i, node **nodes)
3697 {
3698 if (!isTip (p->next->back->number, tips))
3699 {
3700 nodes[(*i)++] = p->next;
3701 getInnerBranchEndPointsRecursive(p->next->back, tips, i, nodes);
3702 }
3703 if (!isTip (p->next->next->back->number, tips))
3704 {
3705 nodes[(*i)++] = p->next->next;
3706 getInnerBranchEndPointsRecursive(p->next->next->back, tips, i, nodes);
3707 }
3708 }
3709
pllGetInnerBranchEndPoints(pllInstance * tr)3710 node ** pllGetInnerBranchEndPoints (pllInstance * tr)
3711 {
3712 node ** nodes;
3713 nodeptr p;
3714 int i = 0;
3715
3716 nodes = (node **) rax_calloc(tr->mxtips - 3, sizeof(node *));
3717
3718 p = tr->start;
3719 assert (isTip(p->number, tr->mxtips));
3720
3721 getInnerBranchEndPointsRecursive(p->back, tr->mxtips, &i, nodes);
3722
3723 return nodes;
3724 }
3725
3726 #if defined WIN32 || defined _WIN32 || defined __WIN32__
rax_calloc(size_t count,size_t size)3727 void* rax_calloc(size_t count, size_t size) {
3728 void* res = rax_malloc(size * count);
3729 memset(res,0,size * count);
3730 return res;
3731 }
3732 #endif
3733
3734