1 /**
2  * PLL (version 1.0.0) a software library for phylogenetic inference
3  * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4  *
5  * Derived from
6  * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7  * trees by Alexandros Stamatakis
8  *
9  * This program is free software: you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation, either version 3 of the License, or (at your option)
12  * any later version.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
17  * more details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  * For any other enquiries send an Email to Tomas Flouri
23  * Tomas.Flouri@h-its.org
24  *
25  * When publishing work that uses PLL please cite PLL
26  *
27  * @file fastDNAparsimony.c
28  */
29 #include "mem_alloc.h"
30 
31 #ifndef WIN32
32 #include <sys/times.h>
33 #include <sys/types.h>
34 #include <sys/time.h>
35 #include <unistd.h>
36 #endif
37 
38 #include <limits.h>
39 #include <math.h>
40 #include <time.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <string.h>
45 #include <stdint.h>
46 #include <assert.h>
47 
48 #if defined(__MIC_NATIVE)
49 
50 #include <immintrin.h>
51 
52 #define INTS_PER_VECTOR 16
53 //#define LONG_INTS_PER_VECTOR 8
54 #define LONG_INTS_PER_VECTOR (64/sizeof(long))
55 #define INT_TYPE __m512i
56 #define CAST double*
57 #define SET_ALL_BITS_ONE _mm512_set1_epi32(0xFFFFFFFF)
58 #define SET_ALL_BITS_ZERO _mm512_setzero_epi32()
59 #define VECTOR_LOAD _mm512_load_epi32
60 #define VECTOR_STORE  _mm512_store_epi32
61 #define VECTOR_BIT_AND _mm512_and_epi32
62 #define VECTOR_BIT_OR  _mm512_or_epi32
63 #define VECTOR_AND_NOT _mm512_andnot_epi32
64 
65 #elif defined(__AVX)
66 
67 #include <xmmintrin.h>
68 #include <immintrin.h>
69 #include <pmmintrin.h>
70 
71 #define INTS_PER_VECTOR 8
72 //#define LONG_INTS_PER_VECTOR 4
73 #define LONG_INTS_PER_VECTOR (32/sizeof(long))
74 #define INT_TYPE __m256d
75 #define CAST double*
76 //#define SET_ALL_BITS_ONE (__m256d)_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
77 //#define SET_ALL_BITS_ZERO (__m256d)_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000)
78 #define SET_ALL_BITS_ONE _mm256_castsi256_pd(_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
79 #define SET_ALL_BITS_ZERO _mm256_castsi256_pd(_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000))
80 #define VECTOR_LOAD _mm256_load_pd
81 #define VECTOR_BIT_AND _mm256_and_pd
82 #define VECTOR_BIT_OR  _mm256_or_pd
83 #define VECTOR_STORE  _mm256_store_pd
84 #define VECTOR_AND_NOT _mm256_andnot_pd
85 
86 #elif (defined(__SSE3))
87 
88 #include <xmmintrin.h>
89 #include <pmmintrin.h>
90 
91 #define INTS_PER_VECTOR 4
92 #ifdef __i386__
93 //#define LONG_INTS_PER_VECTOR 4
94 #define LONG_INTS_PER_VECTOR (16/sizeof(long))
95 #else
96 //#define LONG_INTS_PER_VECTOR 2
97 #define LONG_INTS_PER_VECTOR (16/sizeof(long))
98 #endif
99 #define INT_TYPE __m128i
100 #define CAST __m128i*
101 #define SET_ALL_BITS_ONE _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
102 #define SET_ALL_BITS_ZERO _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000)
103 #define VECTOR_LOAD _mm_load_si128
104 #define VECTOR_BIT_AND _mm_and_si128
105 #define VECTOR_BIT_OR  _mm_or_si128
106 #define VECTOR_STORE  _mm_store_si128
107 #define VECTOR_AND_NOT _mm_andnot_si128
108 
109 #endif
110 
111 
112 #include "pll.h"
113 #include "pllInternal.h"
114 
115 #if defined (_MSC_VER)
116 #	if defined ( __SSE4_2__ ) || defined (__AVX__)
117 #		include <nmmintrin.h>
118 #		define __builtin_popcount _mm_popcnt_u32
119 #		define __builtin_popcountl _mm_popcnt_u64
120 #	else
121 #		include <intrin.h>
__builtin_popcount(uint32_t a)122 	static __inline uint32_t __builtin_popcount (uint32_t a) {
123 		// popcnt instruction not available
124 		uint32_t b = a - ((a >> 1) & 0x55555555);
125 		uint32_t c = (b & 0x33333333) + ((b >> 2) & 0x33333333);
126 		uint32_t d = (c + (c >> 4)) & 0x0F0F0F0F;
127 		uint32_t e = d * 0x01010101;
128 		return   e >> 24;
129 	}
130 //#		define __builtin_popcount __popcnt
131 #		define __builtin_popcountl __popcnt64
132 #	endif
133 #endif
134 
135 static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping);
136 
137 extern const unsigned int mask32[32];
138 /* vector-specific stuff */
139 
140 
141 extern double masterTime;
142 
143 /************************************************ pop count stuff ***********************************************/
144 
bitcount_32_bit(unsigned int i)145  unsigned int bitcount_32_bit(unsigned int i)
146 {
147   return ((unsigned int) __builtin_popcount(i));
148 }
149 
150 /* bit count for 64 bit integers */
151 
152 //__inline unsigned int bitcount_64_bit(uint64_t i)
153 //{
154 //  return ((unsigned int) __builtin_popcountl(i));
155 //}
156 
157 /* bit count for 128 bit SSE3 and 256 bit AVX registers */
158 
159 #if (defined(__SSE3) || defined(__AVX))
160 
161 #ifdef _WIN32
162  /* emulate with 32-bit version */
vectorPopcount(INT_TYPE v)163 static __inline unsigned int vectorPopcount(INT_TYPE v)
164 {
165 PLL_ALIGN_BEGIN unsigned int counts[INTS_PER_VECTOR] PLL_ALIGN_END;
166 
167   int
168     i,
169     sum = 0;
170 
171   VECTOR_STORE((CAST)counts, v);
172 
173   for(i = 0; i < INTS_PER_VECTOR; i++)
174     sum += __builtin_popcount(counts[i]);
175 
176   return ((unsigned int)sum);
177 }
178 #else
179 
vectorPopcount(INT_TYPE v)180 static __inline unsigned int vectorPopcount(INT_TYPE v)
181 {
182   unsigned long
183     counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
184 
185   int
186     i,
187     sum = 0;
188 
189   VECTOR_STORE((CAST)counts, v);
190 
191   for(i = 0; i < LONG_INTS_PER_VECTOR; i++)
192     sum += __builtin_popcountl(counts[i]);
193 
194   return ((unsigned int)sum);
195 }
196 #endif
197 
198 #endif
199 
200 
201 
202 /********************************DNA FUNCTIONS *****************************************************************/
203 
204 
checkerPars(pllInstance * tr,nodeptr p)205 static int checkerPars(pllInstance *tr, nodeptr p)
206 {
207   int group = tr->constraintVector[p->number];
208 
209   if(isTip(p->number, tr->mxtips))
210     {
211       group = tr->constraintVector[p->number];
212       return group;
213     }
214   else
215     {
216       if(group != -9)
217         return group;
218 
219       group = checkerPars(tr, p->next->back);
220       if(group != -9)
221         return group;
222 
223       group = checkerPars(tr, p->next->next->back);
224       if(group != -9)
225         return group;
226 
227       return -9;
228     }
229 }
230 
tipHomogeneityCheckerPars(pllInstance * tr,nodeptr p,int grouping)231 static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping)
232 {
233   if(isTip(p->number, tr->mxtips))
234     {
235       if(tr->constraintVector[p->number] != grouping)
236         return PLL_FALSE;
237       else
238         return PLL_TRUE;
239     }
240   else
241     {
242       return  (tipHomogeneityCheckerPars(tr, p->next->back, grouping) && tipHomogeneityCheckerPars(tr, p->next->next->back,grouping));
243     }
244 }
245 
getxnodeLocal(nodeptr p)246 static void getxnodeLocal (nodeptr p)
247 {
248   nodeptr  s;
249 
250   if((s = p->next)->xPars || (s = s->next)->xPars)
251     {
252       p->xPars = s->xPars;
253       s->xPars = 0;
254     }
255 
256   assert(p->next->xPars || p->next->next->xPars || p->xPars);
257 
258 }
259 
computeTraversalInfoParsimony(nodeptr p,int * ti,int * counter,int maxTips,pllBoolean full)260 static void computeTraversalInfoParsimony(nodeptr p, int *ti, int *counter, int maxTips, pllBoolean full)
261 {
262   nodeptr
263     q = p->next->back,
264     r = p->next->next->back;
265 
266   if(! p->xPars)
267     getxnodeLocal(p);
268 
269   if(full)
270     {
271        if(q->number > maxTips)
272          computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
273 
274       if(r->number > maxTips)
275         computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
276     }
277   else
278     {
279       if(q->number > maxTips && !q->xPars)
280         computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
281 
282       if(r->number > maxTips && !r->xPars)
283         computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
284     }
285 
286 
287   ti[*counter]     = p->number;
288   ti[*counter + 1] = q->number;
289   ti[*counter + 2] = r->number;
290   *counter = *counter + 4;
291 }
292 
293 
294 
295 
296 
297 
298 
299 #if (defined(__SSE3) || defined(__AVX))
300 
newviewParsimonyIterativeFast(pllInstance * tr,partitionList * pr)301 static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
302 {
303   INT_TYPE
304     allOne = SET_ALL_BITS_ONE;
305 
306   int
307     model,
308     *ti = tr->ti,
309     count = ti[0],
310     index;
311 
312   for(index = 4; index < count; index += 4)
313     {
314       unsigned int
315         totalScore = 0;
316 
317       size_t
318         pNumber = (size_t)ti[index],
319         qNumber = (size_t)ti[index + 1],
320         rNumber = (size_t)ti[index + 2];
321 
322       for(model = 0; model < pr->numberOfPartitions; model++)
323         {
324           size_t
325             k,
326             states = pr->partitionData[model]->states,
327             width = pr->partitionData[model]->parsimonyLength;
328 
329           unsigned int
330             i;
331 
332           switch(states)
333             {
334             case 2:
335               {
336                 parsimonyNumber
337                   *left[2],
338                   *right[2],
339                   *this[2];
340 
341                 for(k = 0; k < 2; k++)
342                   {
343                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
344                     right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
345                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
346                   }
347 
348                 for(i = 0; i < width; i += INTS_PER_VECTOR)
349                   {
350                     INT_TYPE
351                       s_r, s_l, v_N,
352                       l_A, l_C,
353                       v_A, v_C;
354 
355                     s_l = VECTOR_LOAD((CAST)(&left[0][i]));
356                     s_r = VECTOR_LOAD((CAST)(&right[0][i]));
357                     l_A = VECTOR_BIT_AND(s_l, s_r);
358                     v_A = VECTOR_BIT_OR(s_l, s_r);
359 
360                     s_l = VECTOR_LOAD((CAST)(&left[1][i]));
361                     s_r = VECTOR_LOAD((CAST)(&right[1][i]));
362                     l_C = VECTOR_BIT_AND(s_l, s_r);
363                     v_C = VECTOR_BIT_OR(s_l, s_r);
364 
365                     v_N = VECTOR_BIT_OR(l_A, l_C);
366 
367                     VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
368                     VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
369 
370                     v_N = VECTOR_AND_NOT(v_N, allOne);
371 
372                     totalScore += vectorPopcount(v_N);
373                   }
374               }
375               break;
376             case 4:
377               {
378                 parsimonyNumber
379                   *left[4],
380                   *right[4],
381                   *this[4];
382 
383                 for(k = 0; k < 4; k++)
384                   {
385                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
386                     right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
387                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
388                   }
389 
390                 for(i = 0; i < width; i += INTS_PER_VECTOR)
391                   {
392                     INT_TYPE
393                       s_r, s_l, v_N,
394                       l_A, l_C, l_G, l_T,
395                       v_A, v_C, v_G, v_T;
396 
397                     s_l = VECTOR_LOAD((CAST)(&left[0][i]));
398                     s_r = VECTOR_LOAD((CAST)(&right[0][i]));
399                     l_A = VECTOR_BIT_AND(s_l, s_r);
400                     v_A = VECTOR_BIT_OR(s_l, s_r);
401 
402                     s_l = VECTOR_LOAD((CAST)(&left[1][i]));
403                     s_r = VECTOR_LOAD((CAST)(&right[1][i]));
404                     l_C = VECTOR_BIT_AND(s_l, s_r);
405                     v_C = VECTOR_BIT_OR(s_l, s_r);
406 
407                     s_l = VECTOR_LOAD((CAST)(&left[2][i]));
408                     s_r = VECTOR_LOAD((CAST)(&right[2][i]));
409                     l_G = VECTOR_BIT_AND(s_l, s_r);
410                     v_G = VECTOR_BIT_OR(s_l, s_r);
411 
412                     s_l = VECTOR_LOAD((CAST)(&left[3][i]));
413                     s_r = VECTOR_LOAD((CAST)(&right[3][i]));
414                     l_T = VECTOR_BIT_AND(s_l, s_r);
415                     v_T = VECTOR_BIT_OR(s_l, s_r);
416 
417                     v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
418 
419                     VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
420                     VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
421                     VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G)));
422                     VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T)));
423 
424                     v_N = VECTOR_AND_NOT(v_N, allOne);
425 
426                     totalScore += vectorPopcount(v_N);
427                   }
428               }
429               break;
430             case 20:
431               {
432                 parsimonyNumber
433                   *left[20],
434                   *right[20],
435                   *this[20];
436 
437                 for(k = 0; k < 20; k++)
438                   {
439                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
440                     right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
441                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
442                   }
443 
444                 for(i = 0; i < width; i += INTS_PER_VECTOR)
445                   {
446                     size_t j;
447 
448                     INT_TYPE
449                       s_r, s_l,
450                       v_N = SET_ALL_BITS_ZERO,
451                       l_A[20],
452                       v_A[20];
453 
454                     for(j = 0; j < 20; j++)
455                       {
456                         s_l = VECTOR_LOAD((CAST)(&left[j][i]));
457                         s_r = VECTOR_LOAD((CAST)(&right[j][i]));
458                         l_A[j] = VECTOR_BIT_AND(s_l, s_r);
459                         v_A[j] = VECTOR_BIT_OR(s_l, s_r);
460 
461                         v_N = VECTOR_BIT_OR(v_N, l_A[j]);
462                       }
463 
464                     for(j = 0; j < 20; j++)
465                       VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
466 
467                     v_N = VECTOR_AND_NOT(v_N, allOne);
468 
469                     totalScore += vectorPopcount(v_N);
470                   }
471               }
472               break;
473             default:
474               {
475                 parsimonyNumber
476                   *left[32],
477                   *right[32],
478                   *this[32];
479 
480                 assert(states <= 32);
481 
482                 for(k = 0; k < states; k++)
483                   {
484                     left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
485                     right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
486                     this[k]  = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
487                   }
488 
489                 for(i = 0; i < width; i += INTS_PER_VECTOR)
490                   {
491                     size_t j;
492 
493                     INT_TYPE
494                       s_r, s_l,
495                       v_N = SET_ALL_BITS_ZERO,
496                       l_A[32],
497                       v_A[32];
498 
499                     for(j = 0; j < states; j++)
500                       {
501                         s_l = VECTOR_LOAD((CAST)(&left[j][i]));
502                         s_r = VECTOR_LOAD((CAST)(&right[j][i]));
503                         l_A[j] = VECTOR_BIT_AND(s_l, s_r);
504                         v_A[j] = VECTOR_BIT_OR(s_l, s_r);
505 
506                         v_N = VECTOR_BIT_OR(v_N, l_A[j]);
507                       }
508 
509                     for(j = 0; j < states; j++)
510                       VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
511 
512                     v_N = VECTOR_AND_NOT(v_N, allOne);
513 
514                     totalScore += vectorPopcount(v_N);
515                   }
516               }
517             }
518         }
519 
520       tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];
521     }
522 }
523 
524 
525 
evaluateParsimonyIterativeFast(pllInstance * tr,partitionList * pr)526 static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
527 {
528   INT_TYPE
529     allOne = SET_ALL_BITS_ONE;
530 
531   size_t
532     pNumber = (size_t)tr->ti[1],
533     qNumber = (size_t)tr->ti[2];
534 
535   int
536     model;
537 
538   unsigned int
539     bestScore = tr->bestParsimony,
540     sum;
541 
542   if(tr->ti[0] > 4)
543     newviewParsimonyIterativeFast(tr, pr);
544 
545   sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
546 
547   for(model = 0; model < pr->numberOfPartitions; model++)
548     {
549       size_t
550         k,
551         states = pr->partitionData[model]->states,
552         width  = pr->partitionData[model]->parsimonyLength,
553         i;
554 
555        switch(states)
556          {
557          case 2:
558            {
559              parsimonyNumber
560                *left[2],
561                *right[2];
562 
563              for(k = 0; k < 2; k++)
564                {
565                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
566                  right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
567                }
568 
569              for(i = 0; i < width; i += INTS_PER_VECTOR)
570                {
571                  INT_TYPE
572                    l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
573                    l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
574                    v_N = VECTOR_BIT_OR(l_A, l_C);
575 
576                  v_N = VECTOR_AND_NOT(v_N, allOne);
577 
578                  sum += vectorPopcount(v_N);
579 
580                  if(sum >= bestScore)
581                    return sum;
582                }
583            }
584            break;
585          case 4:
586            {
587              parsimonyNumber
588                *left[4],
589                *right[4];
590 
591              for(k = 0; k < 4; k++)
592                {
593                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
594                  right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
595                }
596 
597              for(i = 0; i < width; i += INTS_PER_VECTOR)
598                {
599                  INT_TYPE
600                    l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
601                    l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
602                    l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))),
603                    l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))),
604                    v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
605 
606                  v_N = VECTOR_AND_NOT(v_N, allOne);
607 
608                  sum += vectorPopcount(v_N);
609 
610                  if(sum >= bestScore)
611                    return sum;
612                }
613            }
614            break;
615          case 20:
616            {
617              parsimonyNumber
618                *left[20],
619                *right[20];
620 
621               for(k = 0; k < 20; k++)
622                 {
623                   left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
624                   right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
625                 }
626 
627               for(i = 0; i < width; i += INTS_PER_VECTOR)
628                 {
629                   int
630                     j;
631 
632                   INT_TYPE
633                     l_A,
634                     v_N = SET_ALL_BITS_ZERO;
635 
636                   for(j = 0; j < 20; j++)
637                     {
638                       l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
639                       v_N = VECTOR_BIT_OR(l_A, v_N);
640                     }
641 
642                   v_N = VECTOR_AND_NOT(v_N, allOne);
643 
644                   sum += vectorPopcount(v_N);
645 
646                   if(sum >= bestScore)
647                     return sum;
648                 }
649            }
650            break;
651          default:
652            {
653              parsimonyNumber
654                *left[32],
655                *right[32];
656 
657              assert(states <= 32);
658 
659              for(k = 0; k < states; k++)
660                {
661                  left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
662                  right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
663                }
664 
665              for(i = 0; i < width; i += INTS_PER_VECTOR)
666                {
667                  size_t
668                    j;
669 
670                  INT_TYPE
671                    l_A,
672                    v_N = SET_ALL_BITS_ZERO;
673 
674                  for(j = 0; j < states; j++)
675                    {
676                      l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
677                      v_N = VECTOR_BIT_OR(l_A, v_N);
678                    }
679 
680                  v_N = VECTOR_AND_NOT(v_N, allOne);
681 
682                  sum += vectorPopcount(v_N);
683 
684                  if(sum >= bestScore)
685                    return sum;
686                }
687            }
688          }
689     }
690 
691   return sum;
692 }
693 
694 
695 #else
newviewParsimonyIterativeFast(pllInstance * tr,partitionList * pr)696 static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
697 {
698   int
699     model,
700     *ti = tr->ti,
701     count = ti[0],
702     index;
703 
704   for(index = 4; index < count; index += 4)
705     {
706       unsigned int
707         totalScore = 0;
708 
709       size_t
710         pNumber = (size_t)ti[index],
711         qNumber = (size_t)ti[index + 1],
712         rNumber = (size_t)ti[index + 2];
713 
714       for(model = 0; model < pr->numberOfPartitions; model++)
715         {
716           size_t
717             k,
718             states = pr->partitionData[model]->states,
719             width = pr->partitionData[model]->parsimonyLength;
720 
721           unsigned int
722             i;
723 
724           switch(states)
725             {
726             case 2:
727               {
728                 parsimonyNumber
729                   *left[2],
730                   *right[2],
731                   *this[2];
732 
733                 parsimonyNumber
734                    o_A,
735                    o_C,
736                    t_A,
737                    t_C,
738                    t_N;
739 
740                 for(k = 0; k < 2; k++)
741                   {
742                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
743                     right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
744                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
745                   }
746 
747                 for(i = 0; i < width; i++)
748                   {
749                     t_A = left[0][i] & right[0][i];
750                     t_C = left[1][i] & right[1][i];
751 
752                     o_A = left[0][i] | right[0][i];
753                     o_C = left[1][i] | right[1][i];
754 
755                     t_N = ~(t_A | t_C);
756 
757                     this[0][i] = t_A | (t_N & o_A);
758                     this[1][i] = t_C | (t_N & o_C);
759 
760                     totalScore += ((unsigned int) __builtin_popcount(t_N));
761                   }
762               }
763               break;
764             case 4:
765               {
766                 parsimonyNumber
767                   *left[4],
768                   *right[4],
769                   *this[4];
770 
771                 for(k = 0; k < 4; k++)
772                   {
773                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
774                     right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
775                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
776                   }
777 
778                 parsimonyNumber
779                    o_A,
780                    o_C,
781                    o_G,
782                    o_T,
783                    t_A,
784                    t_C,
785                    t_G,
786                    t_T,
787                    t_N;
788 
789                 for(i = 0; i < width; i++)
790                   {
791                     t_A = left[0][i] & right[0][i];
792                     t_C = left[1][i] & right[1][i];
793                     t_G = left[2][i] & right[2][i];
794                     t_T = left[3][i] & right[3][i];
795 
796                     o_A = left[0][i] | right[0][i];
797                     o_C = left[1][i] | right[1][i];
798                     o_G = left[2][i] | right[2][i];
799                     o_T = left[3][i] | right[3][i];
800 
801                     t_N = ~(t_A | t_C | t_G | t_T);
802 
803                     this[0][i] = t_A | (t_N & o_A);
804                     this[1][i] = t_C | (t_N & o_C);
805                     this[2][i] = t_G | (t_N & o_G);
806                     this[3][i] = t_T | (t_N & o_T);
807 
808                     totalScore += ((unsigned int) __builtin_popcount(t_N));
809                   }
810               }
811               break;
812             case 20:
813               {
814                 parsimonyNumber
815                   *left[20],
816                   *right[20],
817                   *this[20];
818 
819                 parsimonyNumber
820                   o_A[20],
821                   t_A[20],
822                   t_N;
823 
824                 for(k = 0; k < 20; k++)
825                   {
826                     left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
827                     right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
828                     this[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
829                   }
830 
831                 for(i = 0; i < width; i++)
832                   {
833                     size_t k;
834 
835                     t_N = 0;
836 
837                     for(k = 0; k < 20; k++)
838                       {
839                         t_A[k] = left[k][i] & right[k][i];
840                         o_A[k] = left[k][i] | right[k][i];
841                         t_N = t_N | t_A[k];
842                       }
843 
844                     t_N = ~t_N;
845 
846                     for(k = 0; k < 20; k++)
847                       this[k][i] = t_A[k] | (t_N & o_A[k]);
848 
849                     totalScore += ((unsigned int) __builtin_popcount(t_N));
850                   }
851               }
852               break;
853             default:
854               {
855                 parsimonyNumber
856                   *left[32],
857                   *right[32],
858                   *this[32];
859 
860                 parsimonyNumber
861                   o_A[32],
862                   t_A[32],
863                   t_N;
864 
865                 assert(states <= 32);
866 
867                 for(k = 0; k < states; k++)
868                   {
869                     left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
870                     right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
871                     this[k]  = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
872                   }
873 
874                 for(i = 0; i < width; i++)
875                   {
876                     t_N = 0;
877 
878                     for(k = 0; k < states; k++)
879                       {
880                         t_A[k] = left[k][i] & right[k][i];
881                         o_A[k] = left[k][i] | right[k][i];
882                         t_N = t_N | t_A[k];
883                       }
884 
885                     t_N = ~t_N;
886 
887                     for(k = 0; k < states; k++)
888                       this[k][i] = t_A[k] | (t_N & o_A[k]);
889 
890                     totalScore += ((unsigned int) __builtin_popcount(t_N));
891                   }
892               }
893             }
894         }
895 
896       tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];
897     }
898 }
899 
900 
evaluateParsimonyIterativeFast(pllInstance * tr,partitionList * pr)901 static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
902 {
903   size_t
904     pNumber = (size_t)tr->ti[1],
905     qNumber = (size_t)tr->ti[2];
906 
907   int
908     model;
909 
910   unsigned int
911     bestScore = tr->bestParsimony,
912     sum;
913 
914   if(tr->ti[0] > 4)
915     newviewParsimonyIterativeFast(tr, pr);
916 
917   sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
918 
919   for(model = 0; model < pr->numberOfPartitions; model++)
920     {
921       size_t
922         k,
923         states = pr->partitionData[model]->states,
924         width  = pr->partitionData[model]->parsimonyLength,
925         i;
926 
927        switch(states)
928          {
929          case 2:
930            {
931              parsimonyNumber
932                t_A,
933                t_C,
934                t_N,
935                *left[2],
936                *right[2];
937 
938              for(k = 0; k < 2; k++)
939                {
940                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
941                  right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
942                }
943 
944              for(i = 0; i < width; i++)
945                {
946                  t_A = left[0][i] & right[0][i];
947                  t_C = left[1][i] & right[1][i];
948 
949                   t_N = ~(t_A | t_C);
950 
951                   sum += ((unsigned int) __builtin_popcount(t_N));
952 
953                  if(sum >= bestScore)
954                    return sum;
955                }
956            }
957            break;
958          case 4:
959            {
960              parsimonyNumber
961                t_A,
962                t_C,
963                t_G,
964                t_T,
965                t_N,
966                *left[4],
967                *right[4];
968 
969              for(k = 0; k < 4; k++)
970                {
971                  left[k]  = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
972                  right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
973                }
974 
975              for(i = 0; i < width; i++)
976                {
977                   t_A = left[0][i] & right[0][i];
978                   t_C = left[1][i] & right[1][i];
979                   t_G = left[2][i] & right[2][i];
980                   t_T = left[3][i] & right[3][i];
981 
982                   t_N = ~(t_A | t_C | t_G | t_T);
983 
984                   sum += ((unsigned int) __builtin_popcount(t_N));
985 
986                  if(sum >= bestScore)
987                    return sum;
988                }
989            }
990            break;
991          case 20:
992            {
993              parsimonyNumber
994                t_A,
995                t_N,
996                *left[20],
997                *right[20];
998 
999               for(k = 0; k < 20; k++)
1000                 {
1001                   left[k]  = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
1002                   right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
1003                 }
1004 
1005               for(i = 0; i < width; i++)
1006                 {
1007                   t_N = 0;
1008 
1009                   for(k = 0; k < 20; k++)
1010                     {
1011                       t_A = left[k][i] & right[k][i];
1012                       t_N = t_N | t_A;
1013                     }
1014 
1015                   t_N = ~t_N;
1016 
1017                   sum += ((unsigned int) __builtin_popcount(t_N));
1018 
1019                   if(sum >= bestScore)
1020                     return sum;
1021                 }
1022            }
1023            break;
1024          default:
1025            {
1026              parsimonyNumber
1027                t_A,
1028                t_N,
1029                *left[32],
1030                *right[32];
1031 
1032              assert(states <= 32);
1033 
1034              for(k = 0; k < states; k++)
1035                {
1036                  left[k]  = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
1037                  right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
1038                }
1039 
1040              for(i = 0; i < width; i++)
1041                {
1042                  t_N = 0;
1043 
1044                  for(k = 0; k < states; k++)
1045                    {
1046                      t_A = left[k][i] & right[k][i];
1047                      t_N = t_N | t_A;
1048                    }
1049 
1050                   t_N = ~t_N;
1051 
1052                   sum += ((unsigned int) __builtin_popcount(t_N));
1053 
1054                  if(sum >= bestScore)
1055                    return sum;
1056                }
1057            }
1058          }
1059     }
1060 
1061   return sum;
1062 }
1063 
1064 #endif
1065 
1066 
1067 
1068 
1069 
1070 
evaluateParsimony(pllInstance * tr,partitionList * pr,nodeptr p,pllBoolean full)1071 static unsigned int evaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full)
1072 {
1073   volatile unsigned int result;
1074   nodeptr q = p->back;
1075   int
1076     *ti = tr->ti,
1077     counter = 4;
1078 
1079   ti[1] = p->number;
1080   ti[2] = q->number;
1081 
1082   if(full)
1083     {
1084       if(p->number > tr->mxtips)
1085         computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
1086       if(q->number > tr->mxtips)
1087         computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
1088     }
1089   else
1090     {
1091       if(p->number > tr->mxtips && !p->xPars)
1092         computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
1093       if(q->number > tr->mxtips && !q->xPars)
1094         computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
1095     }
1096 
1097   ti[0] = counter;
1098 
1099   result = evaluateParsimonyIterativeFast(tr, pr);
1100 
1101   return result;
1102 }
1103 
1104 
newviewParsimony(pllInstance * tr,partitionList * pr,nodeptr p)1105 static void newviewParsimony(pllInstance *tr, partitionList *pr, nodeptr  p)
1106 {
1107   if(p->number <= tr->mxtips)
1108     return;
1109 
1110   {
1111     int
1112       counter = 4;
1113 
1114     computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1115     tr->ti[0] = counter;
1116 
1117     newviewParsimonyIterativeFast(tr, pr);
1118   }
1119 }
1120 
1121 
1122 
1123 
1124 
1125 /****************************************************************************************************************************************/
1126 
insertParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1127 static void insertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1128 {
1129   nodeptr  r;
1130 
1131   r = q->back;
1132 
1133   hookupDefault(p->next,       q);
1134   hookupDefault(p->next->next, r);
1135 
1136   newviewParsimony(tr, pr, p);
1137 }
1138 
1139 
1140 
buildNewTip(pllInstance * tr,nodeptr p)1141 static nodeptr buildNewTip (pllInstance *tr, nodeptr p)
1142 {
1143   nodeptr  q;
1144 
1145   q = tr->nodep[(tr->nextnode)++];
1146   hookupDefault(p, q);
1147   q->next->back = (nodeptr)NULL;
1148   q->next->next->back = (nodeptr)NULL;
1149 
1150   return  q;
1151 }
1152 
buildSimpleTree(pllInstance * tr,partitionList * pr,int ip,int iq,int ir)1153 static void buildSimpleTree (pllInstance *tr, partitionList *pr, int ip, int iq, int ir)
1154 {
1155   nodeptr  p, s;
1156   int  i;
1157 
1158   i = PLL_MIN(ip, iq);
1159   if (ir < i)  i = ir;
1160   tr->start = tr->nodep[i];
1161   tr->ntips = 3;
1162   p = tr->nodep[ip];
1163   hookupDefault(p, tr->nodep[iq]);
1164   s = buildNewTip(tr, tr->nodep[ir]);
1165   insertParsimony(tr, pr, s, p);
1166 }
1167 
1168 
testInsertParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q,pllBoolean saveBranches)1169 static void testInsertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, pllBoolean saveBranches)
1170 {
1171   unsigned int
1172     mp;
1173 
1174   nodeptr
1175     r = q->back;
1176 
1177   pllBoolean
1178     doIt = PLL_TRUE;
1179 
1180   int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
1181 
1182   if(tr->grouped)
1183     {
1184       int
1185         rNumber = tr->constraintVector[r->number],
1186         qNumber = tr->constraintVector[q->number],
1187         pNumber = tr->constraintVector[p->number];
1188 
1189       doIt = PLL_FALSE;
1190 
1191       if(pNumber == -9)
1192         pNumber = checkerPars(tr, p->back);
1193       if(pNumber == -9)
1194         doIt = PLL_TRUE;
1195       else
1196         {
1197           if(qNumber == -9)
1198             qNumber = checkerPars(tr, q);
1199 
1200           if(rNumber == -9)
1201             rNumber = checkerPars(tr, r);
1202 
1203           if(pNumber == rNumber || pNumber == qNumber)
1204             doIt = PLL_TRUE;
1205         }
1206     }
1207 
1208   if(doIt)
1209     {
1210       double
1211         *z = rax_malloc(numBranches*sizeof(double));
1212 
1213       if(saveBranches)
1214         {
1215           int i;
1216 
1217           for(i = 0; i < numBranches; i++)
1218             z[i] = q->z[i];
1219         }
1220 
1221       insertParsimony(tr, pr, p, q);
1222 
1223       mp = evaluateParsimony(tr, pr, p->next->next, PLL_FALSE);
1224 
1225       if(mp < tr->bestParsimony)
1226         {
1227           tr->bestParsimony = mp;
1228           tr->insertNode = q;
1229           tr->removeNode = p;
1230         }
1231 
1232       if(saveBranches)
1233         hookup(q, r, z, numBranches);
1234       else
1235         hookupDefault(q, r);
1236 
1237       p->next->next->back = p->next->back = (nodeptr) NULL;
1238       rax_free(z);
1239     }
1240 
1241   return;
1242 }
1243 
1244 
restoreTreeParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1245 static void restoreTreeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1246 {
1247   nodeptr
1248     r = q->back;
1249 
1250   int counter = 4;
1251 
1252   hookupDefault(p->next,       q);
1253   hookupDefault(p->next->next, r);
1254 
1255   computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1256   tr->ti[0] = counter;
1257 
1258   newviewParsimonyIterativeFast(tr, pr);
1259 }
1260 
1261 
addTraverseParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q,int mintrav,int maxtrav,pllBoolean doAll,pllBoolean saveBranches)1262 static void addTraverseParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav, pllBoolean doAll, pllBoolean saveBranches)
1263 {
1264   if (doAll || (--mintrav <= 0))
1265     testInsertParsimony(tr, pr, p, q, saveBranches);
1266 
1267   if (((q->number > tr->mxtips)) && ((--maxtrav > 0) || doAll))
1268     {
1269       addTraverseParsimony(tr, pr, p, q->next->back, mintrav, maxtrav, doAll, saveBranches);
1270       addTraverseParsimony(tr, pr, p, q->next->next->back, mintrav, maxtrav, doAll, saveBranches);
1271     }
1272 }
1273 
1274 
1275 
1276 
1277 
makePermutationFast(int * perm,int n,pllInstance * tr)1278 static void makePermutationFast(int *perm, int n, pllInstance *tr)
1279 {
1280   int
1281     i,
1282     j,
1283     k;
1284 
1285   for (i = 1; i <= n; i++)
1286     perm[i] = i;
1287 
1288   for (i = 1; i <= n; i++)
1289     {
1290       double d =  randum(&tr->randomNumberSeed);
1291 
1292       k =  (int)((double)(n + 1 - i) * d);
1293 
1294       j        = perm[i];
1295 
1296       perm[i]     = perm[i + k];
1297       perm[i + k] = j;
1298     }
1299 }
1300 
1301 //static nodeptr  removeNodeParsimony (nodeptr p, tree *tr)
removeNodeParsimony(nodeptr p)1302 static nodeptr  removeNodeParsimony (nodeptr p)
1303 {
1304   nodeptr  q, r;
1305 
1306   q = p->next->back;
1307   r = p->next->next->back;
1308 
1309   hookupDefault(q, r);
1310 
1311   p->next->next->back = p->next->back = (node *) NULL;
1312 
1313   return  q;
1314 }
1315 
rearrangeParsimony(pllInstance * tr,partitionList * pr,nodeptr p,int mintrav,int maxtrav,pllBoolean doAll)1316 static int rearrangeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav, pllBoolean doAll)
1317 {
1318   nodeptr
1319     p1,
1320     p2,
1321     q,
1322     q1,
1323     q2;
1324 
1325   int
1326     mintrav2;
1327 
1328   pllBoolean
1329     doP = PLL_TRUE,
1330     doQ = PLL_TRUE;
1331 
1332   if (maxtrav > tr->ntips - 3)
1333     maxtrav = tr->ntips - 3;
1334 
1335   assert(mintrav == 1);
1336 
1337   if(maxtrav < mintrav)
1338     return 0;
1339 
1340   q = p->back;
1341 
1342   if(tr->constrained)
1343     {
1344       if(! tipHomogeneityCheckerPars(tr, p->back, 0))
1345         doP = PLL_FALSE;
1346 
1347       if(! tipHomogeneityCheckerPars(tr, q->back, 0))
1348         doQ = PLL_FALSE;
1349 
1350       if(doQ == PLL_FALSE && doP == PLL_FALSE)
1351         return 0;
1352     }
1353 
1354   if((p->number > tr->mxtips) && doP)
1355     {
1356       p1 = p->next->back;
1357       p2 = p->next->next->back;
1358 
1359       if ((p1->number > tr->mxtips) || (p2->number > tr->mxtips))
1360         {
1361           //removeNodeParsimony(p, tr);
1362           removeNodeParsimony(p);
1363 
1364           if ((p1->number > tr->mxtips))
1365             {
1366               addTraverseParsimony(tr, pr, p, p1->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1367               addTraverseParsimony(tr, pr, p, p1->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1368             }
1369 
1370           if ((p2->number > tr->mxtips))
1371             {
1372               addTraverseParsimony(tr, pr, p, p2->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1373               addTraverseParsimony(tr, pr, p, p2->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1374             }
1375 
1376 
1377           hookupDefault(p->next,       p1);
1378           hookupDefault(p->next->next, p2);
1379 
1380           newviewParsimony(tr, pr, p);
1381         }
1382     }
1383 
1384   if ((q->number > tr->mxtips) && (maxtrav > 0) && doQ)
1385     {
1386       q1 = q->next->back;
1387       q2 = q->next->next->back;
1388 
1389       if (
1390           (
1391            (q1->number > tr->mxtips) &&
1392            ((q1->next->back->number > tr->mxtips) || (q1->next->next->back->number > tr->mxtips))
1393            )
1394           ||
1395           (
1396            (q2->number > tr->mxtips) &&
1397            ((q2->next->back->number > tr->mxtips) || (q2->next->next->back->number > tr->mxtips))
1398            )
1399           )
1400         {
1401 
1402           //removeNodeParsimony(q, tr);
1403           removeNodeParsimony(q);
1404 
1405           mintrav2 = mintrav > 2 ? mintrav : 2;
1406 
1407           if ((q1->number > tr->mxtips))
1408             {
1409               addTraverseParsimony(tr, pr, q, q1->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1410               addTraverseParsimony(tr, pr, q, q1->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1411             }
1412 
1413           if ((q2->number > tr->mxtips))
1414             {
1415               addTraverseParsimony(tr, pr, q, q2->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1416               addTraverseParsimony(tr, pr, q, q2->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1417             }
1418 
1419           hookupDefault(q->next,       q1);
1420           hookupDefault(q->next->next, q2);
1421 
1422           newviewParsimony(tr, pr, q);
1423         }
1424     }
1425 
1426   return 1;
1427 }
1428 
1429 
restoreTreeRearrangeParsimony(pllInstance * tr,partitionList * pr)1430 static void restoreTreeRearrangeParsimony(pllInstance *tr, partitionList *pr)
1431 {
1432   removeNodeParsimony(tr->removeNode);
1433   //removeNodeParsimony(tr->removeNode, tr);
1434   restoreTreeParsimony(tr, pr, tr->removeNode, tr->insertNode);
1435 }
1436 
1437 /*
1438 static pllBoolean isInformative2(pllInstance *tr, int site)
1439 {
1440   int
1441     informativeCounter = 0,
1442     check[256],
1443     j,
1444     undetermined = 15;
1445 
1446   unsigned char
1447     nucleotide,
1448     target = 0;
1449 
1450   for(j = 0; j < 256; j++)
1451     check[j] = 0;
1452 
1453   for(j = 1; j <= tr->mxtips; j++)
1454     {
1455       nucleotide = tr->yVector[j][site];
1456       check[nucleotide] =  check[nucleotide] + 1;
1457     }
1458 
1459 
1460   if(check[1] > 1)
1461     {
1462       informativeCounter++;
1463       target = target | 1;
1464     }
1465   if(check[2] > 1)
1466     {
1467       informativeCounter++;
1468       target = target | 2;
1469     }
1470   if(check[4] > 1)
1471     {
1472       informativeCounter++;
1473       target = target | 4;
1474     }
1475   if(check[8] > 1)
1476     {
1477       informativeCounter++;
1478       target = target | 8;
1479     }
1480 
1481   if(informativeCounter >= 2)
1482     return PLL_TRUE;
1483   else
1484     {
1485       for(j = 0; j < undetermined; j++)
1486         {
1487           if(j == 3 || j == 5 || j == 6 || j == 7 || j == 9 || j == 10 || j == 11 ||
1488              j == 12 || j == 13 || j == 14)
1489             {
1490               if(check[j] > 1)
1491                 {
1492                   if(!(target & j))
1493                     return PLL_TRUE;
1494                 }
1495             }
1496         }
1497     }
1498 
1499   return PLL_FALSE;
1500 }
1501 */
1502 
isInformative(pllInstance * tr,int dataType,int site)1503 static pllBoolean isInformative(pllInstance *tr, int dataType, int site)
1504 {
1505   int
1506     informativeCounter = 0,
1507     check[256],
1508     j,
1509     undetermined = getUndetermined(dataType);
1510 
1511   const unsigned int
1512     *bitVector = getBitVector(dataType);
1513 
1514   unsigned char
1515     nucleotide;
1516 
1517 
1518   for(j = 0; j < 256; j++)
1519     check[j] = 0;
1520 
1521   for(j = 1; j <= tr->mxtips; j++)
1522     {
1523       nucleotide = tr->yVector[j][site];
1524       check[nucleotide] =  check[nucleotide] + 1;
1525       assert(bitVector[nucleotide] > 0);
1526     }
1527 
1528   for(j = 0; j < undetermined; j++)
1529     {
1530       if(check[j] > 0)
1531         informativeCounter++;
1532     }
1533 
1534   if(informativeCounter <= 1)
1535     return PLL_FALSE;
1536   else
1537     {
1538       for(j = 0; j < undetermined; j++)
1539         {
1540           if(check[j] > 1)
1541             return PLL_TRUE;
1542         }
1543     }
1544 
1545   return PLL_FALSE;
1546 }
1547 
1548 
determineUninformativeSites(pllInstance * tr,partitionList * pr,int * informative)1549 static void determineUninformativeSites(pllInstance *tr, partitionList *pr, int *informative)
1550 {
1551   int
1552     model,
1553     number = 0,
1554     i;
1555 
1556   /*
1557      Not all characters are useful in constructing a parsimony tree.
1558      Invariant characters, those that have the same state in all taxa,
1559      are obviously useless and are ignored by the method. Characters in
1560      which a state occurs in only one taxon are also ignored.
1561      All these characters are called parsimony uninformative.
1562 
1563      Alternative definition: informative columns contain at least two types
1564      of nucleotides, and each nucleotide must appear at least twice in each
1565      column. Kind of a pain if we intend to check for this when using, e.g.,
1566      amibiguous DNA encoding.
1567   */
1568 
1569 
1570   for(model = 0; model < pr->numberOfPartitions; model++)
1571     {
1572       for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
1573         {
1574            if(isInformative(tr, pr->partitionData[model]->dataType, i))
1575              informative[i] = 1;
1576            else
1577              {
1578                informative[i] = 0;
1579                number++;
1580              }
1581         }
1582     }
1583 
1584 
1585   /* printf("Uninformative Patterns: %d\n", number); */
1586 }
1587 
1588 
reorderNodes(pllInstance * tr,nodeptr * np,nodeptr p,int * count)1589 static void reorderNodes(pllInstance *tr, nodeptr *np, nodeptr p, int *count)
1590 {
1591   int i, found = 0;
1592 
1593   if((p->number <= tr->mxtips))
1594     return;
1595   else
1596     {
1597       for(i = tr->mxtips + 1; (i <= (tr->mxtips + tr->mxtips - 1)) && (found == 0); i++)
1598         {
1599           if (p == np[i] || p == np[i]->next || p == np[i]->next->next)
1600             {
1601               if(p == np[i])
1602                 tr->nodep[*count + tr->mxtips + 1] = np[i];
1603               else
1604                 {
1605                   if(p == np[i]->next)
1606                     tr->nodep[*count + tr->mxtips + 1] = np[i]->next;
1607                   else
1608                     tr->nodep[*count + tr->mxtips + 1] = np[i]->next->next;
1609                 }
1610 
1611               found = 1;
1612               *count = *count + 1;
1613             }
1614         }
1615 
1616       assert(found != 0);
1617 
1618       reorderNodes(tr, np, p->next->back, count);
1619       reorderNodes(tr, np, p->next->next->back, count);
1620     }
1621 }
1622 
1623 
1624 
nodeRectifierPars(pllInstance * tr)1625 static void nodeRectifierPars(pllInstance *tr)
1626 {
1627   nodeptr *np = (nodeptr *)rax_malloc(2 * tr->mxtips * sizeof(nodeptr));
1628   int i;
1629   int count = 0;
1630 
1631   tr->start       = tr->nodep[1];
1632   tr->rooted      = PLL_FALSE;
1633 
1634   /* TODO why is tr->rooted set to PLL_FALSE here ?*/
1635 
1636   for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++)
1637     np[i] = tr->nodep[i];
1638 
1639   reorderNodes(tr, np, tr->start->back, &count);
1640 
1641 
1642   rax_free(np);
1643 }
1644 
1645 
1646 
compressDNA(pllInstance * tr,partitionList * pr,int * informative)1647 static void compressDNA(pllInstance *tr, partitionList *pr, int *informative)
1648 {
1649   size_t
1650     totalNodes,
1651     i,
1652     model;
1653 
1654   totalNodes = 2 * (size_t)tr->mxtips;
1655 
1656 
1657 
1658   for(model = 0; model < (size_t) pr->numberOfPartitions; model++)
1659     {
1660       size_t
1661         k,
1662         states = (size_t)pr->partitionData[model]->states,
1663         compressedEntries,
1664         compressedEntriesPadded,
1665         entries = 0,
1666         lower = pr->partitionData[model]->lower,
1667         upper = pr->partitionData[model]->upper;
1668 
1669       parsimonyNumber
1670         **compressedTips = (parsimonyNumber **)rax_malloc(states * sizeof(parsimonyNumber*)),
1671         *compressedValues = (parsimonyNumber *)rax_malloc(states * sizeof(parsimonyNumber));
1672 
1673       for(i = lower; i < upper; i++)
1674         if(informative[i])
1675           entries += (size_t)tr->aliaswgt[i];
1676 
1677       compressedEntries = entries / PLL_PCF;
1678 
1679       if(entries % PLL_PCF != 0)
1680         compressedEntries++;
1681 
1682 #if (defined(__SSE3) || defined(__AVX))
1683       if(compressedEntries % INTS_PER_VECTOR != 0)
1684         compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
1685       else
1686         compressedEntriesPadded = compressedEntries;
1687 #else
1688       compressedEntriesPadded = compressedEntries;
1689 #endif
1690 
1691 
1692       rax_posix_memalign ((void **) &(pr->partitionData[model]->parsVect), PLL_BYTE_ALIGNMENT, (size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
1693 
1694       for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)
1695         pr->partitionData[model]->parsVect[i] = 0;
1696 
1697       for(i = 0; i < (size_t)tr->mxtips; i++)
1698         {
1699           size_t
1700             w = 0,
1701             compressedIndex = 0,
1702             compressedCounter = 0,
1703             index = 0;
1704 
1705           for(k = 0; k < states; k++)
1706             {
1707               compressedTips[k] = &(pr->partitionData[model]->parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
1708               compressedValues[k] = 0;
1709             }
1710 
1711           for(index = lower; index < (size_t)upper; index++)
1712             {
1713               if(informative[index])
1714                 {
1715                   const unsigned int
1716                     *bitValue = getBitVector(pr->partitionData[model]->dataType);
1717 
1718                   parsimonyNumber
1719                     value = bitValue[tr->yVector[i + 1][index]];
1720 
1721                   for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
1722                     {
1723                       for(k = 0; k < states; k++)
1724                         {
1725                           if(value & mask32[k])
1726                             compressedValues[k] |= mask32[compressedCounter];
1727                         }
1728 
1729                       compressedCounter++;
1730 
1731                       if(compressedCounter == PLL_PCF)
1732                         {
1733                           for(k = 0; k < states; k++)
1734                             {
1735                               compressedTips[k][compressedIndex] = compressedValues[k];
1736                               compressedValues[k] = 0;
1737                             }
1738 
1739                           compressedCounter = 0;
1740                           compressedIndex++;
1741                         }
1742                     }
1743                 }
1744             }
1745 
1746           for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
1747             {
1748               for(;compressedCounter < PLL_PCF; compressedCounter++)
1749                 for(k = 0; k < states; k++)
1750                   compressedValues[k] |= mask32[compressedCounter];
1751 
1752               for(k = 0; k < states; k++)
1753                 {
1754                   compressedTips[k][compressedIndex] = compressedValues[k];
1755                   compressedValues[k] = 0;
1756                 }
1757 
1758               compressedCounter = 0;
1759             }
1760         }
1761 
1762       pr->partitionData[model]->parsimonyLength = compressedEntriesPadded;
1763 
1764       rax_free(compressedTips);
1765       rax_free(compressedValues);
1766     }
1767 
1768   rax_posix_memalign ((void **) &(tr->parsimonyScore), PLL_BYTE_ALIGNMENT, sizeof(unsigned int) * totalNodes);
1769 
1770   for(i = 0; i < totalNodes; i++)
1771     tr->parsimonyScore[i] = 0;
1772 }
1773 
1774 
1775 
stepwiseAddition(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1776 static void stepwiseAddition(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1777 {
1778   nodeptr
1779     r = q->back;
1780 
1781   unsigned int
1782     mp;
1783 
1784   int
1785     counter = 4;
1786 
1787   p->next->back = q;
1788   q->back = p->next;
1789 
1790   p->next->next->back = r;
1791   r->back = p->next->next;
1792 
1793   computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1794   tr->ti[0] = counter;
1795   tr->ti[1] = p->number;
1796   tr->ti[2] = p->back->number;
1797 
1798   mp = evaluateParsimonyIterativeFast(tr, pr);
1799 
1800   if(mp < tr->bestParsimony)
1801     {
1802       tr->bestParsimony = mp;
1803       tr->insertNode = q;
1804     }
1805 
1806   q->back = r;
1807   r->back = q;
1808 
1809   if(q->number > tr->mxtips && tr->parsimonyScore[q->number] > 0)
1810     {
1811       stepwiseAddition(tr, pr, p, q->next->back);
1812       stepwiseAddition(tr, pr, p, q->next->next->back);
1813     }
1814 }
1815 
1816 
1817 
allocateParsimonyDataStructures(pllInstance * tr,partitionList * pr)1818 void allocateParsimonyDataStructures(pllInstance *tr, partitionList *pr)
1819 {
1820   int
1821     i,
1822     *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);
1823 
1824   determineUninformativeSites(tr, pr, informative);
1825 
1826   compressDNA(tr, pr, informative);
1827 
1828   for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
1829     {
1830       nodeptr
1831         p = tr->nodep[i];
1832 
1833       p->xPars = 1;
1834       p->next->xPars = 0;
1835       p->next->next->xPars = 0;
1836     }
1837 
1838   tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);
1839 
1840   rax_free(informative);
1841 }
1842 
pllFreeParsimonyDataStructures(pllInstance * tr,partitionList * pr)1843 void pllFreeParsimonyDataStructures(pllInstance *tr, partitionList *pr)
1844 {
1845   size_t
1846     model;
1847 
1848   rax_free(tr->parsimonyScore);
1849 
1850   for(model = 0; model < (size_t) pr->numberOfPartitions; ++model)
1851     rax_free(pr->partitionData[model]->parsVect);
1852 
1853   rax_free(tr->ti);
1854 }
1855 
1856 
pllMakeParsimonyTreeFast(pllInstance * tr,partitionList * pr,int sprDist)1857 void pllMakeParsimonyTreeFast(pllInstance *tr, partitionList *pr, int sprDist)
1858 {
1859   nodeptr
1860     p,
1861     f;
1862 
1863   int
1864     i,
1865     nextsp,
1866     *perm        = (int *)rax_malloc((size_t)(tr->mxtips + 1) * sizeof(int));
1867 
1868   unsigned int
1869     randomMP,
1870     startMP;
1871 
1872   assert(!tr->constrained);
1873 
1874   makePermutationFast(perm, tr->mxtips, tr);
1875 
1876   tr->ntips = 0;
1877 
1878   tr->nextnode = tr->mxtips + 1;
1879 
1880   buildSimpleTree(tr, pr, perm[1], perm[2], perm[3]);
1881 
1882   f = tr->start;
1883 
1884   while(tr->ntips < tr->mxtips)
1885     {
1886       nodeptr q;
1887 
1888       tr->bestParsimony = INT_MAX;
1889       nextsp = ++(tr->ntips);
1890       p = tr->nodep[perm[nextsp]];
1891       q = tr->nodep[(tr->nextnode)++];
1892       p->back = q;
1893       q->back = p;
1894 
1895       if(tr->grouped)
1896         {
1897           int
1898             number = p->back->number;
1899 
1900           tr->constraintVector[number] = -9;
1901         }
1902 
1903       stepwiseAddition(tr, pr, q, f->back);
1904 
1905       {
1906         nodeptr
1907           r = tr->insertNode->back;
1908 
1909         int counter = 4;
1910 
1911         hookupDefault(q->next,       tr->insertNode);
1912         hookupDefault(q->next->next, r);
1913 
1914         computeTraversalInfoParsimony(q, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1915         tr->ti[0] = counter;
1916 
1917         newviewParsimonyIterativeFast(tr, pr);
1918       }
1919     }
1920 
1921   nodeRectifierPars(tr);
1922 
1923   randomMP = tr->bestParsimony;
1924 
1925   do
1926     {
1927       startMP = randomMP;
1928       nodeRectifierPars(tr);
1929       for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
1930         {
1931           rearrangeParsimony(tr, pr, tr->nodep[i], 1, sprDist, PLL_FALSE);
1932           if(tr->bestParsimony < randomMP)
1933             {
1934               restoreTreeRearrangeParsimony(tr, pr);
1935               randomMP = tr->bestParsimony;
1936             }
1937         }
1938     }
1939   while(randomMP < startMP);
1940 
1941   rax_free(perm);
1942 }
1943