1 /**
2 * PLL (version 1.0.0) a software library for phylogenetic inference
3 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4 *
5 * Derived from
6 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7 * trees by Alexandros Stamatakis
8 *
9 * This program is free software: you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program. If not, see <http://www.gnu.org/licenses/>.
21 *
22 * For any other enquiries send an Email to Tomas Flouri
23 * Tomas.Flouri@h-its.org
24 *
25 * When publishing work that uses PLL please cite PLL
26 *
27 * @file recom.c
28 * @brief Functions used for recomputation of vectors (only a fraction of LH vectors stored in RAM)
29 */
30 #include "mem_alloc.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <assert.h>
34 #include <string.h>
35 #include <limits.h>
36 #include <errno.h>
37 #include <time.h>
38 #include <math.h>
39 #ifndef WIN32
40 #include <sys/time.h>
41 #endif
42 #include "pll.h"
43 #include "pllInternal.h"
44
45 /** @brief Locks node \a nodenum to force it remains availably in memory
46 *
47 * @warning If a node is available we dont need to recompute it, but we neet to make sure it is not unpinned while buildding the rest of the traversal descriptor, i.e. unpinnable must be PLL_FALSE at this point, it will automatically be set to PLL_TRUE, after the counter post-order instructions have been executed
48 Omitting this call the traversal will likely still work as long as num_allocated_nodes >> log n, but wrong inner vectors will be used at the wrong moment of pllNewviewIterative, careful!
49 *
50 * @param rvec
51 * Recomputation info
52 *
53 * @param nodenum
54 * Node id that must remain available in memory
55 *
56 * @param mxtips
57 * Number of tips in the tree
58 *
59 */
protectNode(recompVectors * rvec,int nodenum,int mxtips)60 void protectNode(recompVectors *rvec, int nodenum, int mxtips)
61 {
62
63 int slot;
64 slot = rvec->iNode[nodenum - mxtips - 1];
65 assert(slot != PLL_NODE_UNPINNED);
66 assert(rvec->iVector[slot] == nodenum);
67
68 if(rvec->unpinnable[slot])
69 rvec->unpinnable[slot] = PLL_FALSE;
70 }
71
72 /** @brief Checks if \a nodenum is currently pinned (available in RAM)
73 *
74 * @note shall we document static functions?
75 *
76 * @param rvec
77 * Recomputation info
78 *
79 * @param nodenum
80 * Node id to be checked
81 *
82 * @param mxtips
83 * Number of tips in the tree
84 *
85 */
isNodePinned(recompVectors * rvec,int nodenum,int mxtips)86 static pllBoolean isNodePinned(recompVectors *rvec, int nodenum, int mxtips)
87 {
88 assert(nodenum > mxtips);
89
90 if(rvec->iNode[nodenum - mxtips - 1] == PLL_NODE_UNPINNED)
91 return PLL_FALSE;
92 else
93 return PLL_TRUE;
94 }
95
96 /** @brief Checks if the likelihood entries at node \a p should be updated
97 *
98 * A node needs update if one of the following holds:
99 * 1. It is not oriented (p->x == 0)
100 * 2. We are applying recomputations and node \a p is not currently available in RAM
101 *
102 * @param recompute
103 * PLL_TRUE if recomputation is currently applied
104 *
105 * @param p
106 * Node to check whether it is associated with the likelihood vector
107 *
108 * @param mxtips
109 * Number of tips in the tree
110 *
111 */
needsRecomp(pllBoolean recompute,recompVectors * rvec,nodeptr p,int mxtips)112 pllBoolean needsRecomp(pllBoolean recompute, recompVectors *rvec, nodeptr p, int mxtips)
113 {
114 if((!p->x) || (recompute && !isNodePinned(rvec, p->number, mxtips)))
115 return PLL_TRUE;
116 else
117 return PLL_FALSE;
118 }
119
120
121
122 /** @brief Allocates memory for recomputation structure
123 *
124 *
125 * @todo this should not depend on tr (\a vectorRecomFraction should be a parameter)
126 * PLL_TRUE if recomputation is currently applied
127 *
128 */
allocRecompVectorsInfo(pllInstance * tr)129 void allocRecompVectorsInfo(pllInstance *tr)
130 {
131 recompVectors
132 *v = (recompVectors *) rax_malloc(sizeof(recompVectors));
133
134 int
135 num_inner_nodes = tr->mxtips - 2,
136 num_vectors,
137 i;
138
139 assert(tr->vectorRecomFraction > PLL_MIN_RECOM_FRACTION);
140 assert(tr->vectorRecomFraction < PLL_MAX_RECOM_FRACTION);
141
142 num_vectors = (int) (1 + tr->vectorRecomFraction * (float)num_inner_nodes);
143
144 int theoretical_minimum_of_vectors = 3 + ((int)(log((double)tr->mxtips)/log(2.0)));
145 //printBothOpen("Try to use %d ancestral vectors, min required %d\n", num_vectors, theoretical_minimum_of_vectors);
146
147 assert(num_vectors >= theoretical_minimum_of_vectors);
148 assert(num_vectors < tr->mxtips);
149
150
151 v->numVectors = num_vectors; /* use minimum bound theoretical */
152
153 /* init vectors tracking */
154
155 v->iVector = (int *) rax_malloc((size_t)num_vectors * sizeof(int));
156 v->unpinnable = (pllBoolean *) rax_malloc((size_t)num_vectors * sizeof(pllBoolean));
157
158 for(i = 0; i < num_vectors; i++)
159 {
160 v->iVector[i] = PLL_SLOT_UNUSED;
161 v->unpinnable[i] = PLL_FALSE;
162 }
163
164 v->iNode = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
165 v->stlen = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
166
167 for(i = 0; i < num_inner_nodes; i++)
168 {
169 v->iNode[i] = PLL_NODE_UNPINNED;
170 v->stlen[i] = PLL_INNER_NODE_INIT_STLEN;
171 }
172
173 v->allSlotsBusy = PLL_FALSE;
174
175 /* init nodes tracking */
176
177 v->maxVectorsUsed = 0;
178 tr->rvec = v;
179 }
180
181 /** @brief Find the slot id with the minimum cost to be recomputed.
182 *
183 * The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips,
184 * the less recomputations are required to re-establish its likelihood entries
185 *
186 * @todo remove _DEBUG_RECOMPUTATION code
187 *
188 * @param v
189 *
190 * @param mxtips
191 * Number of tips in the tree
192 *
193 */
findUnpinnableSlotByCost(recompVectors * v,int mxtips)194 static int findUnpinnableSlotByCost(recompVectors *v, int mxtips)
195 {
196 int
197 i,
198 slot,
199 cheapest_slot = -1,
200 min_cost = mxtips * 2; /* more expensive than the most expensive*/
201 #ifdef _DEBUG_RECOMPUTATION
202 double straTime = gettime();
203 #endif
204
205
206 for(i = 0; i < mxtips - 2; i++)
207 {
208 slot = v->iNode[i];
209 if(slot != PLL_NODE_UNPINNED)
210 {
211 assert(slot >= 0 && slot < v->numVectors);
212
213 if(v->unpinnable[slot])
214 {
215 assert(v->stlen[i] > 0);
216
217 if(v->stlen[i] < min_cost)
218 {
219 min_cost = v->stlen[i];
220 cheapest_slot = slot;
221 /* if the slot costs 2 you can break cause there is nothing cheaper to recompute */
222 if(min_cost == 2)
223 break;
224 }
225 }
226 }
227 }
228 assert(min_cost < mxtips * 2 && min_cost >= 2);
229 assert(cheapest_slot >= 0);
230 return cheapest_slot;
231 }
232
unpinAtomicSlot(recompVectors * v,int slot,int mxtips)233 static void unpinAtomicSlot(recompVectors *v, int slot, int mxtips)
234 {
235 int
236 nodenum = v->iVector[slot];
237
238 v->iVector[slot] = PLL_SLOT_UNUSED;
239
240 if(nodenum != PLL_SLOT_UNUSED)
241 v->iNode[nodenum - mxtips - 1] = PLL_NODE_UNPINNED;
242 }
243
244 /** @brief Finds the cheapest slot and unpins it
245 *
246 */
findUnpinnableSlot(recompVectors * v,int mxtips)247 static int findUnpinnableSlot(recompVectors *v, int mxtips)
248 {
249 int
250 slot_unpinned = findUnpinnableSlotByCost(v, mxtips);
251
252 assert(slot_unpinned >= 0);
253 assert(v->unpinnable[slot_unpinned]);
254
255 unpinAtomicSlot(v, slot_unpinned, mxtips);
256
257 return slot_unpinned;
258 }
259
260 /** @brief Finds a free slot
261 *
262 * If all slots are occupied, it will find the cheapest slot and unpin it
263 *
264 */
findFreeSlot(recompVectors * v,int mxtips)265 static int findFreeSlot(recompVectors *v, int mxtips)
266 {
267 int
268 slotno = -1,
269 i;
270
271 assert(v->allSlotsBusy == PLL_FALSE);
272
273 for(i = 0; i < v->numVectors; i++)
274 {
275 if(v->iVector[i] == PLL_SLOT_UNUSED)
276 {
277 slotno = i;
278 break;
279 }
280 }
281
282 if(slotno == -1)
283 {
284 v->allSlotsBusy = PLL_TRUE;
285 slotno = findUnpinnableSlot(v, mxtips);
286 }
287
288 return slotno;
289 }
290
291
292 /** @brief Pins node \a nodenum to slot \a slot
293 *
294 * The slot is initialized as non-unpinnable (ensures that the contents of the vector will not be overwritten)
295 *
296 * @param nodenum
297 * node id
298 *
299 * @param slot
300 * slot id
301 *
302 * @param mxtips
303 * Number of tips in the tree
304 *
305 */
pinAtomicNode(recompVectors * v,int nodenum,int slot,int mxtips)306 static void pinAtomicNode(recompVectors *v, int nodenum, int slot, int mxtips)
307 {
308 v->iVector[slot] = nodenum;
309 v->iNode[nodenum - mxtips - 1] = slot;
310 v->unpinnable[slot] = PLL_FALSE;
311 }
312
pinNode(recompVectors * rvec,int nodenum,int mxtips)313 static int pinNode(recompVectors *rvec, int nodenum, int mxtips)
314 {
315 int
316 slot;
317
318 assert(!isNodePinned(rvec, nodenum, mxtips));
319
320 if(rvec->allSlotsBusy)
321 slot = findUnpinnableSlot(rvec, mxtips);
322 else
323 slot = findFreeSlot(rvec, mxtips);
324
325 assert(slot >= 0);
326
327 pinAtomicNode(rvec, nodenum, slot, mxtips);
328
329 if(slot > rvec->maxVectorsUsed)
330 rvec->maxVectorsUsed = slot;
331
332 assert(slot == rvec->iNode[nodenum - mxtips - 1]);
333
334 return slot;
335 }
336
337 /** @brief Marks node \a nodenum as unpinnable
338 *
339 * The slot holding the node \a nodenum is added to the pool of slot candidates that can be overwritten.
340 *
341 * @param v
342 * Recomputation info
343 *
344 * @param nodenum
345 * node id
346 *
347 * @param mxtips
348 * Number of tips in the tree
349 *
350 */
unpinNode(recompVectors * v,int nodenum,int mxtips)351 void unpinNode(recompVectors *v, int nodenum, int mxtips)
352 {
353 if(nodenum <= mxtips)
354 return;
355 else
356 {
357 int
358 slot = -1;
359
360 assert(nodenum > mxtips);
361 slot = v->iNode[nodenum-mxtips-1];
362 assert(slot >= 0 && slot < v->numVectors);
363
364 if(slot >= 0 && slot < v->numVectors)
365 v->unpinnable[slot] = PLL_TRUE;
366 }
367 }
368
369
370 /** @brief Get a pinned slot \a slot that holds the likelihood vector for inner node \a nodenum
371 *
372 * If node \a node nodenum is not pinned to any slot yet, the minimum cost replacement strategy is used.
373 *
374 * @param v
375 * Recomputation info
376 *
377 * @param nodenum
378 * node id
379 *
380 * @param slot
381 * slot id
382 *
383 * @param mxtips
384 * Number of tips in the tree
385 *
386 */
getxVector(recompVectors * rvec,int nodenum,int * slot,int mxtips)387 pllBoolean getxVector(recompVectors *rvec, int nodenum, int *slot, int mxtips)
388 {
389 pllBoolean
390 slotNeedsRecomp = PLL_FALSE;
391
392 *slot = rvec->iNode[nodenum - mxtips - 1];
393
394 if(*slot == PLL_NODE_UNPINNED)
395 {
396 *slot = pinNode(rvec, nodenum, mxtips); /* now we will run the replacement strategy */
397 slotNeedsRecomp = PLL_TRUE;
398 }
399
400 assert(*slot >= 0 && *slot < rvec->numVectors);
401
402 rvec->unpinnable[*slot] = PLL_FALSE;
403
404 return slotNeedsRecomp;
405 }
406
407
408 #ifdef _DEBUG_RECOMPUTATION
409
subtreeSize(nodeptr p,int maxTips)410 static int subtreeSize(nodeptr p, int maxTips)
411 {
412 if(isTip(p->number, maxTips))
413 return 1;
414 else
415 return (subtreeSize(p->next->back, maxTips) + subtreeSize(p->next->next->back, maxTips));
416 }
417
418 #endif
419
420 /** @brief Annotes unoriented tree nodes \a tr with their subtree size
421 *
422 * This function recursively updates the subtree size of each inner node.
423 * @note The subtree size of node \a p->number is the number of nodes included in the subtree where node record \a p is the virtual root.
424 *
425 * @param p
426 * Pointer to node
427 *
428 * @param maxTips
429 * Number of tips in the tree
430 *
431 * @param rvec
432 * Recomputation info
433 *
434 * @param count
435 * Number of visited nodes
436 */
computeTraversalInfoStlen(nodeptr p,int maxTips,recompVectors * rvec,int * count)437 void computeTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec, int *count)
438 {
439 if(isTip(p->number, maxTips))
440 return;
441 else
442 {
443 nodeptr
444 q = p->next->back,
445 r = p->next->next->back;
446
447 *count += 1;
448 /* set xnode info at this point */
449
450 if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
451 {
452 rvec->stlen[p->number - maxTips - 1] = 2;
453
454 #ifdef _DEBUG_RECOMPUTATION
455 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
456 #endif
457 }
458 else
459 {
460 if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
461 {
462 nodeptr
463 tmp;
464
465 if(isTip(r->number, maxTips))
466 {
467 tmp = r;
468 r = q;
469 q = tmp;
470 }
471
472 if(!r->x)
473 computeTraversalInfoStlen(r, maxTips, rvec, count);
474
475 rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;
476
477 #ifdef _DEBUG_RECOMPUTATION
478 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
479 #endif
480 }
481 else
482 {
483 if(!r->x)
484 computeTraversalInfoStlen(r, maxTips, rvec, count);
485 if(!q->x)
486 computeTraversalInfoStlen(q, maxTips, rvec, count);
487
488 rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];
489
490 #ifdef _DEBUG_RECOMPUTATION
491 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
492 #endif
493 }
494 }
495 }
496 }
497
498
499
500
501 /* pre-compute the node stlens (this needs to be known prior to running the strategy) */
502 /** @brief Annotes all tree nodes \a tr with their subtree size
503 *
504 * Similar to \a computeTraversalInfoStlen, but does a full traversal ignoring orientation.
505 * The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips,
506 * the less recomputations are required to re-establish its likelihood entries
507 *
508 * @param p
509 * Pointer to node
510 *
511 * @param maxTips
512 * Number of tips in the tree
513 *
514 * @param rvec
515 * Recomputation info
516 */
computeFullTraversalInfoStlen(nodeptr p,int maxTips,recompVectors * rvec)517 void computeFullTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec)
518 {
519 if(isTip(p->number, maxTips))
520 return;
521 else
522 {
523 nodeptr
524 q = p->next->back,
525 r = p->next->next->back;
526
527 if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
528 {
529 rvec->stlen[p->number - maxTips - 1] = 2;
530
531 #ifdef _DEBUG_RECOMPUTATION
532 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
533 #endif
534 }
535 else
536 {
537 if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
538 {
539 nodeptr
540 tmp;
541
542 if(isTip(r->number, maxTips))
543 {
544 tmp = r;
545 r = q;
546 q = tmp;
547 }
548
549 computeFullTraversalInfoStlen(r, maxTips, rvec);
550
551 rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;
552
553 #ifdef _DEBUG_RECOMPUTATION
554 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
555 #endif
556 }
557 else
558 {
559 computeFullTraversalInfoStlen(r, maxTips, rvec);
560 computeFullTraversalInfoStlen(q, maxTips, rvec);
561
562 rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];
563 #ifdef _DEBUG_RECOMPUTATION
564 assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
565 #endif
566 }
567 }
568 }
569 }
570
571
572 #ifdef _DEBUG_RECOMPUTATION
573
allocTraversalCounter(pllInstance * tr)574 void allocTraversalCounter(pllInstance *tr)
575 {
576 traversalCounter
577 *tc;
578
579 int
580 k;
581
582 tc = (traversalCounter *)rax_malloc(sizeof(traversalCounter));
583
584 tc->travlenFreq = (unsigned int *)rax_malloc(tr->mxtips * sizeof(int));
585
586 for(k = 0; k < tr->mxtips; k++)
587 tc->travlenFreq[k] = 0;
588
589 tc->tt = 0;
590 tc->ti = 0;
591 tc->ii = 0;
592 tc->numTraversals = 0;
593 tr->travCounter = tc;
594 }
595
596 /* recomp */
597 /* code to track traversal descriptor stats */
598
countTraversal(pllInstance * tr)599 void countTraversal(pllInstance *tr)
600 {
601 traversalInfo
602 *ti = tr->td[0].ti;
603 int i;
604 traversalCounter *tc = tr->travCounter;
605 tc->numTraversals += 1;
606
607 /*
608 printBothOpen("trav #%d(%d):",tc->numTraversals, tr->td[0].count);
609 */
610
611 for(i = 1; i < tr->td[0].count; i++)
612 {
613 traversalInfo *tInfo = &ti[i];
614
615 /*
616 printBothOpen(" %d q%d r%d |", tInfo->pNumber, tInfo->qNumber, tInfo->rNumber);
617 printBothOpen("%d", tInfo->pNumber);
618 */
619 switch(tInfo->tipCase)
620 {
621 case PLL_TIP_TIP:
622 tc->tt++;
623 /* printBothOpen("T"); */
624 break;
625 case PLL_TIP_INNER:
626 tc->ti++;
627 /* printBothOpen("M"); */
628 break;
629
630 case PLL_INNER_INNER:
631 tc->ii++;
632 /* printBothOpen("I"); */
633 break;
634 default:
635 assert(0);
636 }
637 /* printBothOpen(" "); */
638 }
639 /* printBothOpen(" so far T %d, M %d, I %d \n", tc->tt, tc->ti,tc->ii); */
640 tc->travlenFreq[tr->td[0].count] += 1;
641 }
642
643
644 /*
645 void printTraversalInfo(pllInstance *tr)
646 {
647 int
648 k,
649 total_steps = 0;
650
651 printBothOpen("Traversals : %d \n", tr->travCounter->numTraversals);
652 printBothOpen("Traversals tt: %d \n", tr->travCounter->tt);
653 printBothOpen("Traversals ti: %d \n", tr->travCounter->ti);
654 printBothOpen("Traversals ii: %d \n", tr->travCounter->ii);
655 printBothOpen("all: %d \n", tr->travCounter->tt + tr->travCounter->ii + tr->travCounter->ti);
656 printBothOpen("Traversals len freq : \n");
657
658 for(k = 0; k < tr->mxtips; k++)
659 {
660 total_steps += tr->travCounter->travlenFreq[k] * (k - 1);
661 if(tr->travCounter->travlenFreq[k] > 0)
662 printBothOpen("len %d : %d\n", k, tr->travCounter->travlenFreq[k]);
663 }
664 printBothOpen("all steps: %d \n", total_steps);
665 }
666 */
667 /*end code to track traversal descriptor stats */
668 /* E recomp */
669
670 /*
671 void printVector(double *vector, int len, char *name)
672 {
673 int i;
674 printBothOpen("LHVECTOR %s :", name);
675 for(i=0; i < len; i++)
676 {
677 printBothOpen("%.2f ", vector[i]);
678 if(i>10)
679 {
680 printBothOpen("...");
681 break;
682 }
683 }
684 printBothOpen("\n");
685 }
686 */
687
688 #endif
689
690