1 /**
2  * PLL (version 1.0.0) a software library for phylogenetic inference
3  * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4  *
5  * Derived from
6  * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7  * trees by Alexandros Stamatakis
8  *
9  * This program is free software: you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation, either version 3 of the License, or (at your option)
12  * any later version.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
17  * more details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  * For any other enquiries send an Email to Tomas Flouri
23  * Tomas.Flouri@h-its.org
24  *
25  * When publishing work that uses PLL please cite PLL
26  *
27  * @file recom.c
28  * @brief Functions used for recomputation of vectors (only a fraction of LH vectors stored in RAM)
29  */
30 #include "mem_alloc.h"
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <assert.h>
34 #include <string.h>
35 #include <limits.h>
36 #include <errno.h>
37 #include <time.h>
38 #include <math.h>
39 #ifndef WIN32
40 #include <sys/time.h>
41 #endif
42 #include "pll.h"
43 #include "pllInternal.h"
44 
45 /** @brief Locks node \a nodenum to force it remains availably in memory
46  *
47  * @warning If a node is available we dont need to recompute it, but we neet to make sure it is not unpinned while buildding the rest of the traversal descriptor, i.e. unpinnable must be PLL_FALSE at this point, it will automatically be set to PLL_TRUE, after the counter post-order instructions have been executed
48 Omitting this call the traversal will likely still work as long as num_allocated_nodes >> log n, but wrong inner vectors will be used at the wrong moment of pllNewviewIterative, careful!
49  *
50  *  @param rvec
51  *    Recomputation info
52  *
53  *  @param nodenum
54  *    Node id that must remain available in memory
55  *
56  *  @param mxtips
57  *    Number of tips in the tree
58  *
59  */
protectNode(recompVectors * rvec,int nodenum,int mxtips)60 void protectNode(recompVectors *rvec, int nodenum, int mxtips)
61 {
62 
63   int slot;
64   slot = rvec->iNode[nodenum - mxtips - 1];
65   assert(slot != PLL_NODE_UNPINNED);
66   assert(rvec->iVector[slot] == nodenum);
67 
68   if(rvec->unpinnable[slot])
69     rvec->unpinnable[slot] = PLL_FALSE;
70 }
71 
72 /** @brief Checks if \a nodenum  is currently pinned (available in RAM)
73  *
74  *  @note shall we document static functions?
75  *
76  *  @param rvec
77  *    Recomputation info
78  *
79  *  @param nodenum
80  *    Node id to be checked
81  *
82  *  @param mxtips
83  *    Number of tips in the tree
84  *
85  */
isNodePinned(recompVectors * rvec,int nodenum,int mxtips)86 static pllBoolean isNodePinned(recompVectors *rvec, int nodenum, int mxtips)
87 {
88   assert(nodenum > mxtips);
89 
90   if(rvec->iNode[nodenum - mxtips - 1] == PLL_NODE_UNPINNED)
91     return PLL_FALSE;
92   else
93     return PLL_TRUE;
94 }
95 
96 /** @brief Checks if the likelihood entries at node \a p should be updated
97  *
98  * A node needs update if one of the following holds:
99  *    1. It is not oriented (p->x == 0)
100  *    2. We are applying recomputations and node \a p is not currently available in RAM
101  *
102  *  @param recompute
103  *    PLL_TRUE if recomputation is currently applied
104  *
105  *  @param p
106  *    Node to check whether it is associated with the likelihood vector
107  *
108  *  @param mxtips
109  *    Number of tips in the tree
110  *
111  */
needsRecomp(pllBoolean recompute,recompVectors * rvec,nodeptr p,int mxtips)112 pllBoolean needsRecomp(pllBoolean recompute, recompVectors *rvec, nodeptr p, int mxtips)
113 {
114   if((!p->x) || (recompute && !isNodePinned(rvec, p->number, mxtips)))
115     return PLL_TRUE;
116   else
117     return PLL_FALSE;
118 }
119 
120 
121 
122 /** @brief Allocates memory for recomputation structure
123  *
124  *
125  *  @todo this should not depend on tr (\a vectorRecomFraction should be a parameter)
126  *    PLL_TRUE if recomputation is currently applied
127  *
128  */
allocRecompVectorsInfo(pllInstance * tr)129 void allocRecompVectorsInfo(pllInstance *tr)
130 {
131   recompVectors
132     *v = (recompVectors *) rax_malloc(sizeof(recompVectors));
133 
134   int
135     num_inner_nodes = tr->mxtips - 2,
136                     num_vectors,
137                     i;
138 
139   assert(tr->vectorRecomFraction > PLL_MIN_RECOM_FRACTION);
140   assert(tr->vectorRecomFraction < PLL_MAX_RECOM_FRACTION);
141 
142   num_vectors = (int) (1 + tr->vectorRecomFraction * (float)num_inner_nodes);
143 
144   int theoretical_minimum_of_vectors = 3 + ((int)(log((double)tr->mxtips)/log(2.0)));
145   //printBothOpen("Try to use %d ancestral vectors, min required %d\n", num_vectors, theoretical_minimum_of_vectors);
146 
147   assert(num_vectors >= theoretical_minimum_of_vectors);
148   assert(num_vectors < tr->mxtips);
149 
150 
151   v->numVectors = num_vectors; /* use minimum bound theoretical */
152 
153   /* init vectors tracking */
154 
155   v->iVector         = (int *) rax_malloc((size_t)num_vectors * sizeof(int));
156   v->unpinnable      = (pllBoolean *) rax_malloc((size_t)num_vectors * sizeof(pllBoolean));
157 
158   for(i = 0; i < num_vectors; i++)
159   {
160     v->iVector[i]         = PLL_SLOT_UNUSED;
161     v->unpinnable[i]      = PLL_FALSE;
162   }
163 
164   v->iNode      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
165   v->stlen      = (int *) rax_malloc((size_t)num_inner_nodes * sizeof(int));
166 
167   for(i = 0; i < num_inner_nodes; i++)
168   {
169     v->iNode[i] = PLL_NODE_UNPINNED;
170     v->stlen[i] = PLL_INNER_NODE_INIT_STLEN;
171   }
172 
173   v->allSlotsBusy = PLL_FALSE;
174 
175   /* init nodes tracking */
176 
177   v->maxVectorsUsed = 0;
178   tr->rvec = v;
179 }
180 
181 /** @brief Find the slot id with the minimum cost to be recomputed.
182  *
183  *  The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips,
184  *  the less recomputations are required to re-establish its likelihood entries
185  *
186  *  @todo remove _DEBUG_RECOMPUTATION code
187  *
188  *  @param v
189  *
190  *  @param mxtips
191  *    Number of tips in the tree
192  *
193  */
findUnpinnableSlotByCost(recompVectors * v,int mxtips)194 static int findUnpinnableSlotByCost(recompVectors *v, int mxtips)
195 {
196   int
197     i,
198     slot,
199     cheapest_slot = -1,
200     min_cost = mxtips * 2; /* more expensive than the most expensive*/
201 #ifdef _DEBUG_RECOMPUTATION
202   double straTime = gettime();
203 #endif
204 
205 
206   for(i = 0; i < mxtips - 2; i++)
207   {
208     slot = v->iNode[i];
209     if(slot != PLL_NODE_UNPINNED)
210     {
211       assert(slot >= 0 && slot < v->numVectors);
212 
213       if(v->unpinnable[slot])
214       {
215         assert(v->stlen[i] > 0);
216 
217         if(v->stlen[i] < min_cost)
218         {
219           min_cost = v->stlen[i];
220           cheapest_slot = slot;
221           /* if the slot costs 2 you can break cause there is nothing cheaper to recompute */
222           if(min_cost == 2)
223             break;
224         }
225       }
226     }
227   }
228   assert(min_cost < mxtips * 2 && min_cost >= 2);
229   assert(cheapest_slot >= 0);
230   return cheapest_slot;
231 }
232 
unpinAtomicSlot(recompVectors * v,int slot,int mxtips)233 static void unpinAtomicSlot(recompVectors *v, int slot, int mxtips)
234 {
235   int
236     nodenum = v->iVector[slot];
237 
238   v->iVector[slot] = PLL_SLOT_UNUSED;
239 
240   if(nodenum != PLL_SLOT_UNUSED)
241     v->iNode[nodenum - mxtips - 1] = PLL_NODE_UNPINNED;
242 }
243 
244 /** @brief Finds the cheapest slot and unpins it
245  *
246  */
findUnpinnableSlot(recompVectors * v,int mxtips)247 static int findUnpinnableSlot(recompVectors *v, int mxtips)
248 {
249   int
250     slot_unpinned = findUnpinnableSlotByCost(v, mxtips);
251 
252   assert(slot_unpinned >= 0);
253   assert(v->unpinnable[slot_unpinned]);
254 
255   unpinAtomicSlot(v, slot_unpinned, mxtips);
256 
257   return slot_unpinned;
258 }
259 
260 /** @brief Finds a free slot
261  *
262  *  If all slots are occupied, it will find the cheapest slot and unpin it
263  *
264  */
findFreeSlot(recompVectors * v,int mxtips)265 static int findFreeSlot(recompVectors *v, int mxtips)
266 {
267   int
268     slotno = -1,
269            i;
270 
271   assert(v->allSlotsBusy == PLL_FALSE);
272 
273   for(i = 0; i < v->numVectors; i++)
274   {
275     if(v->iVector[i] == PLL_SLOT_UNUSED)
276     {
277       slotno = i;
278       break;
279     }
280   }
281 
282   if(slotno == -1)
283   {
284     v->allSlotsBusy = PLL_TRUE;
285     slotno = findUnpinnableSlot(v, mxtips);
286   }
287 
288   return slotno;
289 }
290 
291 
292 /** @brief Pins node \a nodenum to slot \a slot
293  *
294  *  The slot is initialized as non-unpinnable (ensures that the contents of the vector will not be overwritten)
295  *
296  *  @param nodenum
297  *    node id
298  *
299  *  @param slot
300  *    slot id
301  *
302  *  @param mxtips
303  *    Number of tips in the tree
304  *
305  */
pinAtomicNode(recompVectors * v,int nodenum,int slot,int mxtips)306 static void pinAtomicNode(recompVectors *v, int nodenum, int slot, int mxtips)
307 {
308   v->iVector[slot] = nodenum;
309   v->iNode[nodenum - mxtips - 1] = slot;
310   v->unpinnable[slot] = PLL_FALSE;
311 }
312 
pinNode(recompVectors * rvec,int nodenum,int mxtips)313 static int pinNode(recompVectors *rvec, int nodenum, int mxtips)
314 {
315   int
316     slot;
317 
318   assert(!isNodePinned(rvec, nodenum, mxtips));
319 
320   if(rvec->allSlotsBusy)
321     slot = findUnpinnableSlot(rvec, mxtips);
322   else
323     slot = findFreeSlot(rvec, mxtips);
324 
325   assert(slot >= 0);
326 
327   pinAtomicNode(rvec, nodenum, slot, mxtips);
328 
329   if(slot > rvec->maxVectorsUsed)
330     rvec->maxVectorsUsed = slot;
331 
332   assert(slot == rvec->iNode[nodenum - mxtips - 1]);
333 
334   return slot;
335 }
336 
337 /** @brief Marks node \a nodenum as unpinnable
338  *
339  *  The slot holding the node \a nodenum is added to the pool of slot candidates that can be overwritten.
340  *
341  *  @param v
342  *    Recomputation info
343  *
344  *  @param nodenum
345  *    node id
346  *
347  *  @param mxtips
348  *    Number of tips in the tree
349  *
350  */
unpinNode(recompVectors * v,int nodenum,int mxtips)351 void unpinNode(recompVectors *v, int nodenum, int mxtips)
352 {
353   if(nodenum <= mxtips)
354     return;
355   else
356   {
357     int
358       slot = -1;
359 
360     assert(nodenum > mxtips);
361     slot = v->iNode[nodenum-mxtips-1];
362     assert(slot >= 0 && slot < v->numVectors);
363 
364     if(slot >= 0 && slot < v->numVectors)
365       v->unpinnable[slot] = PLL_TRUE;
366   }
367 }
368 
369 
370 /** @brief Get a pinned slot \a slot that holds the likelihood vector for inner node \a nodenum
371  *
372  *  If node \a node nodenum is not pinned to any slot yet, the minimum cost replacement strategy is used.
373  *
374  *  @param v
375  *    Recomputation info
376  *
377  *  @param nodenum
378  *    node id
379  *
380  *  @param slot
381  *    slot id
382  *
383  *  @param mxtips
384  *    Number of tips in the tree
385  *
386  */
getxVector(recompVectors * rvec,int nodenum,int * slot,int mxtips)387 pllBoolean getxVector(recompVectors *rvec, int nodenum, int *slot, int mxtips)
388 {
389   pllBoolean
390     slotNeedsRecomp = PLL_FALSE;
391 
392   *slot = rvec->iNode[nodenum - mxtips - 1];
393 
394   if(*slot == PLL_NODE_UNPINNED)
395   {
396     *slot = pinNode(rvec, nodenum, mxtips); /* now we will run the replacement strategy */
397     slotNeedsRecomp = PLL_TRUE;
398   }
399 
400   assert(*slot >= 0 && *slot < rvec->numVectors);
401 
402   rvec->unpinnable[*slot] = PLL_FALSE;
403 
404   return slotNeedsRecomp;
405 }
406 
407 
408 #ifdef _DEBUG_RECOMPUTATION
409 
subtreeSize(nodeptr p,int maxTips)410 static int subtreeSize(nodeptr p, int maxTips)
411 {
412   if(isTip(p->number, maxTips))
413     return 1;
414   else
415     return (subtreeSize(p->next->back, maxTips) + subtreeSize(p->next->next->back, maxTips));
416 }
417 
418 #endif
419 
420 /** @brief Annotes unoriented tree nodes \a tr with their subtree size
421  *
422  *  This function recursively updates the subtree size of each inner node.
423  *  @note The subtree size of node \a p->number is the number of nodes included in the subtree where node record \a p is the virtual root.
424  *
425  *  @param p
426  *    Pointer to node
427  *
428  *  @param maxTips
429  *    Number of tips in the tree
430  *
431  *  @param rvec
432  *    Recomputation info
433  *
434  *  @param count
435  *    Number of visited nodes
436  */
computeTraversalInfoStlen(nodeptr p,int maxTips,recompVectors * rvec,int * count)437 void computeTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec, int *count)
438 {
439   if(isTip(p->number, maxTips))
440     return;
441   else
442   {
443     nodeptr
444       q = p->next->back,
445         r = p->next->next->back;
446 
447     *count += 1;
448     /* set xnode info at this point */
449 
450     if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
451     {
452       rvec->stlen[p->number - maxTips - 1] = 2;
453 
454 #ifdef _DEBUG_RECOMPUTATION
455       assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
456 #endif
457     }
458     else
459     {
460       if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
461       {
462         nodeptr
463           tmp;
464 
465         if(isTip(r->number, maxTips))
466         {
467           tmp = r;
468           r = q;
469           q = tmp;
470         }
471 
472         if(!r->x)
473           computeTraversalInfoStlen(r, maxTips, rvec, count);
474 
475         rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;
476 
477 #ifdef _DEBUG_RECOMPUTATION
478         assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
479 #endif
480       }
481       else
482       {
483         if(!r->x)
484           computeTraversalInfoStlen(r, maxTips, rvec, count);
485         if(!q->x)
486           computeTraversalInfoStlen(q, maxTips, rvec, count);
487 
488         rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];
489 
490 #ifdef _DEBUG_RECOMPUTATION
491         assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
492 #endif
493       }
494     }
495   }
496 }
497 
498 
499 
500 
501 /* pre-compute the node stlens (this needs to be known prior to running the strategy) */
502 /** @brief Annotes all tree nodes \a tr with their subtree size
503  *
504  *  Similar to \a computeTraversalInfoStlen, but does a full traversal ignoring orientation.
505  *  The minum cost is defined as the minimum subtree size. In general, the closer a vector is to the tips,
506  *  the less recomputations are required to re-establish its likelihood entries
507  *
508  *  @param p
509  *    Pointer to node
510  *
511  *  @param maxTips
512  *    Number of tips in the tree
513  *
514  *  @param rvec
515  *    Recomputation info
516  */
computeFullTraversalInfoStlen(nodeptr p,int maxTips,recompVectors * rvec)517 void computeFullTraversalInfoStlen(nodeptr p, int maxTips, recompVectors *rvec)
518 {
519   if(isTip(p->number, maxTips))
520     return;
521   else
522   {
523     nodeptr
524       q = p->next->back,
525         r = p->next->next->back;
526 
527     if(isTip(r->number, maxTips) && isTip(q->number, maxTips))
528     {
529       rvec->stlen[p->number - maxTips - 1] = 2;
530 
531 #ifdef _DEBUG_RECOMPUTATION
532       assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
533 #endif
534     }
535     else
536     {
537       if(isTip(r->number, maxTips) || isTip(q->number, maxTips))
538       {
539         nodeptr
540           tmp;
541 
542         if(isTip(r->number, maxTips))
543         {
544           tmp = r;
545           r = q;
546           q = tmp;
547         }
548 
549         computeFullTraversalInfoStlen(r, maxTips, rvec);
550 
551         rvec->stlen[p->number - maxTips - 1] = rvec->stlen[r->number - maxTips - 1] + 1;
552 
553 #ifdef _DEBUG_RECOMPUTATION
554         assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
555 #endif
556       }
557       else
558       {
559         computeFullTraversalInfoStlen(r, maxTips, rvec);
560         computeFullTraversalInfoStlen(q, maxTips, rvec);
561 
562         rvec->stlen[p->number - maxTips - 1] = rvec->stlen[q->number - maxTips - 1] + rvec->stlen[r->number - maxTips - 1];
563 #ifdef _DEBUG_RECOMPUTATION
564         assert(rvec->stlen[p->number - maxTips - 1] == subtreeSize(p, maxTips));
565 #endif
566       }
567     }
568   }
569 }
570 
571 
572 #ifdef _DEBUG_RECOMPUTATION
573 
allocTraversalCounter(pllInstance * tr)574 void allocTraversalCounter(pllInstance *tr)
575 {
576   traversalCounter
577     *tc;
578 
579   int
580     k;
581 
582   tc = (traversalCounter *)rax_malloc(sizeof(traversalCounter));
583 
584   tc->travlenFreq = (unsigned int *)rax_malloc(tr->mxtips * sizeof(int));
585 
586   for(k = 0; k < tr->mxtips; k++)
587     tc->travlenFreq[k] = 0;
588 
589   tc->tt = 0;
590   tc->ti = 0;
591   tc->ii = 0;
592   tc->numTraversals = 0;
593   tr->travCounter = tc;
594 }
595 
596 /* recomp */
597 /* code to track traversal descriptor stats */
598 
countTraversal(pllInstance * tr)599 void countTraversal(pllInstance *tr)
600 {
601   traversalInfo
602     *ti   = tr->td[0].ti;
603   int i;
604   traversalCounter *tc = tr->travCounter;
605   tc->numTraversals += 1;
606 
607   /*
608   printBothOpen("trav #%d(%d):",tc->numTraversals, tr->td[0].count);
609   */
610 
611   for(i = 1; i < tr->td[0].count; i++)
612   {
613     traversalInfo *tInfo = &ti[i];
614 
615     /*
616        printBothOpen(" %d q%d r%d |",  tInfo->pNumber, tInfo->qNumber, tInfo->rNumber);
617        printBothOpen("%d",  tInfo->pNumber);
618        */
619     switch(tInfo->tipCase)
620     {
621       case PLL_TIP_TIP:
622         tc->tt++;
623         /* printBothOpen("T"); */
624         break;
625       case PLL_TIP_INNER:
626         tc->ti++;
627         /* printBothOpen("M"); */
628         break;
629 
630       case PLL_INNER_INNER:
631         tc->ii++;
632         /* printBothOpen("I"); */
633         break;
634       default:
635         assert(0);
636     }
637     /* printBothOpen(" "); */
638   }
639   /* printBothOpen(" so far T %d, M %d, I %d \n", tc->tt, tc->ti,tc->ii); */
640   tc->travlenFreq[tr->td[0].count] += 1;
641 }
642 
643 
644 /*
645 void printTraversalInfo(pllInstance *tr)
646 {
647   int
648     k,
649     total_steps = 0;
650 
651   printBothOpen("Traversals : %d \n", tr->travCounter->numTraversals);
652   printBothOpen("Traversals tt: %d \n", tr->travCounter->tt);
653   printBothOpen("Traversals ti: %d \n", tr->travCounter->ti);
654   printBothOpen("Traversals ii: %d \n", tr->travCounter->ii);
655   printBothOpen("all: %d \n", tr->travCounter->tt + tr->travCounter->ii + tr->travCounter->ti);
656   printBothOpen("Traversals len freq  : \n");
657 
658   for(k = 0; k < tr->mxtips; k++)
659   {
660     total_steps += tr->travCounter->travlenFreq[k] * (k - 1);
661     if(tr->travCounter->travlenFreq[k] > 0)
662       printBothOpen("len %d : %d\n", k, tr->travCounter->travlenFreq[k]);
663   }
664   printBothOpen("all steps: %d \n", total_steps);
665 }
666 */
667 /*end code to track traversal descriptor stats */
668 /* E recomp */
669 
670 /*
671 void printVector(double *vector, int len, char *name)
672 {
673   int i;
674   printBothOpen("LHVECTOR %s :", name);
675   for(i=0; i < len; i++)
676   {
677     printBothOpen("%.2f ", vector[i]);
678     if(i>10)
679     {
680       printBothOpen("...");
681       break;
682     }
683   }
684   printBothOpen("\n");
685 }
686 */
687 
688 #endif
689 
690