1 /**
2 * PLL (version 1.0.0) a software library for phylogenetic inference
3 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4 *
5 * Derived from
6 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7 * trees by Alexandros Stamatakis
8 *
9 * This program is free software: you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program. If not, see <http://www.gnu.org/licenses/>.
21 *
22 * For any other enquiries send an Email to Tomas Flouri
23 * Tomas.Flouri@h-its.org
24 *
25 * When publishing work that uses PLL please cite PLL
26 *
27 * @file fastDNAparsimony.c
28 */
29 #include "mem_alloc.h"
30
31 #ifndef WIN32
32 #include <sys/times.h>
33 #include <sys/types.h>
34 #include <sys/time.h>
35 #include <unistd.h>
36 #endif
37
38 #include <limits.h>
39 #include <math.h>
40 #include <time.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <string.h>
45 #include <stdint.h>
46 #include <assert.h>
47
48 #if defined(__MIC_NATIVE)
49
50 #include <immintrin.h>
51
52 #define INTS_PER_VECTOR 16
53 //#define LONG_INTS_PER_VECTOR 8
54 #define LONG_INTS_PER_VECTOR (64/sizeof(long))
55 #define INT_TYPE __m512i
56 #define CAST double*
57 #define SET_ALL_BITS_ONE _mm512_set1_epi32(0xFFFFFFFF)
58 #define SET_ALL_BITS_ZERO _mm512_setzero_epi32()
59 #define VECTOR_LOAD _mm512_load_epi32
60 #define VECTOR_STORE _mm512_store_epi32
61 #define VECTOR_BIT_AND _mm512_and_epi32
62 #define VECTOR_BIT_OR _mm512_or_epi32
63 #define VECTOR_AND_NOT _mm512_andnot_epi32
64
65 #elif defined(__AVX)
66
67 #include <xmmintrin.h>
68 #include <immintrin.h>
69 #include <pmmintrin.h>
70
71 #define INTS_PER_VECTOR 8
72 //#define LONG_INTS_PER_VECTOR 4
73 #define LONG_INTS_PER_VECTOR (32/sizeof(long))
74 #define INT_TYPE __m256d
75 #define CAST double*
76 //#define SET_ALL_BITS_ONE (__m256d)_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
77 //#define SET_ALL_BITS_ZERO (__m256d)_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000)
78 #define SET_ALL_BITS_ONE _mm256_castsi256_pd(_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
79 #define SET_ALL_BITS_ZERO _mm256_castsi256_pd(_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000))
80 #define VECTOR_LOAD _mm256_load_pd
81 #define VECTOR_BIT_AND _mm256_and_pd
82 #define VECTOR_BIT_OR _mm256_or_pd
83 #define VECTOR_STORE _mm256_store_pd
84 #define VECTOR_AND_NOT _mm256_andnot_pd
85
86 #elif (defined(__SSE3))
87
88 #include <xmmintrin.h>
89 #include <pmmintrin.h>
90
91 #define INTS_PER_VECTOR 4
92 #ifdef __i386__
93 //#define LONG_INTS_PER_VECTOR 4
94 #define LONG_INTS_PER_VECTOR (16/sizeof(long))
95 #else
96 //#define LONG_INTS_PER_VECTOR 2
97 #define LONG_INTS_PER_VECTOR (16/sizeof(long))
98 #endif
99 #define INT_TYPE __m128i
100 #define CAST __m128i*
101 #define SET_ALL_BITS_ONE _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
102 #define SET_ALL_BITS_ZERO _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000)
103 #define VECTOR_LOAD _mm_load_si128
104 #define VECTOR_BIT_AND _mm_and_si128
105 #define VECTOR_BIT_OR _mm_or_si128
106 #define VECTOR_STORE _mm_store_si128
107 #define VECTOR_AND_NOT _mm_andnot_si128
108
109 #endif
110
111
112 #include "pll.h"
113 #include "pllInternal.h"
114
115 #if defined (_MSC_VER)
116 # if defined ( __SSE4_2__ ) || defined (__AVX__)
117 # include <nmmintrin.h>
118 # define __builtin_popcount _mm_popcnt_u32
119 # define __builtin_popcountl _mm_popcnt_u64
120 # else
121 # include <intrin.h>
__builtin_popcount(uint32_t a)122 static __inline uint32_t __builtin_popcount (uint32_t a) {
123 // popcnt instruction not available
124 uint32_t b = a - ((a >> 1) & 0x55555555);
125 uint32_t c = (b & 0x33333333) + ((b >> 2) & 0x33333333);
126 uint32_t d = (c + (c >> 4)) & 0x0F0F0F0F;
127 uint32_t e = d * 0x01010101;
128 return e >> 24;
129 }
130 //# define __builtin_popcount __popcnt
131 # define __builtin_popcountl __popcnt64
132 # endif
133 #endif
134
135 static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping);
136
137 extern const unsigned int mask32[32];
138 /* vector-specific stuff */
139
140
141 extern double masterTime;
142
143 /************************************************ pop count stuff ***********************************************/
144
bitcount_32_bit(unsigned int i)145 unsigned int bitcount_32_bit(unsigned int i)
146 {
147 return ((unsigned int) __builtin_popcount(i));
148 }
149
150 /* bit count for 64 bit integers */
151
152 //__inline unsigned int bitcount_64_bit(uint64_t i)
153 //{
154 // return ((unsigned int) __builtin_popcountl(i));
155 //}
156
157 /* bit count for 128 bit SSE3 and 256 bit AVX registers */
158
159 #if (defined(__SSE3) || defined(__AVX))
160
161 #ifdef _WIN32
162 /* emulate with 32-bit version */
vectorPopcount(INT_TYPE v)163 static __inline unsigned int vectorPopcount(INT_TYPE v)
164 {
165 PLL_ALIGN_BEGIN unsigned int counts[INTS_PER_VECTOR] PLL_ALIGN_END;
166
167 int
168 i,
169 sum = 0;
170
171 VECTOR_STORE((CAST)counts, v);
172
173 for(i = 0; i < INTS_PER_VECTOR; i++)
174 sum += __builtin_popcount(counts[i]);
175
176 return ((unsigned int)sum);
177 }
178 #else
179
vectorPopcount(INT_TYPE v)180 static __inline unsigned int vectorPopcount(INT_TYPE v)
181 {
182 unsigned long
183 counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
184
185 int
186 i,
187 sum = 0;
188
189 VECTOR_STORE((CAST)counts, v);
190
191 for(i = 0; i < LONG_INTS_PER_VECTOR; i++)
192 sum += __builtin_popcountl(counts[i]);
193
194 return ((unsigned int)sum);
195 }
196 #endif
197
198 #endif
199
200
201
202 /********************************DNA FUNCTIONS *****************************************************************/
203
204
checkerPars(pllInstance * tr,nodeptr p)205 static int checkerPars(pllInstance *tr, nodeptr p)
206 {
207 int group = tr->constraintVector[p->number];
208
209 if(isTip(p->number, tr->mxtips))
210 {
211 group = tr->constraintVector[p->number];
212 return group;
213 }
214 else
215 {
216 if(group != -9)
217 return group;
218
219 group = checkerPars(tr, p->next->back);
220 if(group != -9)
221 return group;
222
223 group = checkerPars(tr, p->next->next->back);
224 if(group != -9)
225 return group;
226
227 return -9;
228 }
229 }
230
tipHomogeneityCheckerPars(pllInstance * tr,nodeptr p,int grouping)231 static pllBoolean tipHomogeneityCheckerPars(pllInstance *tr, nodeptr p, int grouping)
232 {
233 if(isTip(p->number, tr->mxtips))
234 {
235 if(tr->constraintVector[p->number] != grouping)
236 return PLL_FALSE;
237 else
238 return PLL_TRUE;
239 }
240 else
241 {
242 return (tipHomogeneityCheckerPars(tr, p->next->back, grouping) && tipHomogeneityCheckerPars(tr, p->next->next->back,grouping));
243 }
244 }
245
getxnodeLocal(nodeptr p)246 static void getxnodeLocal (nodeptr p)
247 {
248 nodeptr s;
249
250 if((s = p->next)->xPars || (s = s->next)->xPars)
251 {
252 p->xPars = s->xPars;
253 s->xPars = 0;
254 }
255
256 assert(p->next->xPars || p->next->next->xPars || p->xPars);
257
258 }
259
computeTraversalInfoParsimony(nodeptr p,int * ti,int * counter,int maxTips,pllBoolean full)260 static void computeTraversalInfoParsimony(nodeptr p, int *ti, int *counter, int maxTips, pllBoolean full)
261 {
262 nodeptr
263 q = p->next->back,
264 r = p->next->next->back;
265
266 if(! p->xPars)
267 getxnodeLocal(p);
268
269 if(full)
270 {
271 if(q->number > maxTips)
272 computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
273
274 if(r->number > maxTips)
275 computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
276 }
277 else
278 {
279 if(q->number > maxTips && !q->xPars)
280 computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
281
282 if(r->number > maxTips && !r->xPars)
283 computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
284 }
285
286
287 ti[*counter] = p->number;
288 ti[*counter + 1] = q->number;
289 ti[*counter + 2] = r->number;
290 *counter = *counter + 4;
291 }
292
293
294
295
296
297
298
299 #if (defined(__SSE3) || defined(__AVX))
300
newviewParsimonyIterativeFast(pllInstance * tr,partitionList * pr)301 static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
302 {
303 INT_TYPE
304 allOne = SET_ALL_BITS_ONE;
305
306 int
307 model,
308 *ti = tr->ti,
309 count = ti[0],
310 index;
311
312 for(index = 4; index < count; index += 4)
313 {
314 unsigned int
315 totalScore = 0;
316
317 size_t
318 pNumber = (size_t)ti[index],
319 qNumber = (size_t)ti[index + 1],
320 rNumber = (size_t)ti[index + 2];
321
322 for(model = 0; model < pr->numberOfPartitions; model++)
323 {
324 size_t
325 k,
326 states = pr->partitionData[model]->states,
327 width = pr->partitionData[model]->parsimonyLength;
328
329 unsigned int
330 i;
331
332 switch(states)
333 {
334 case 2:
335 {
336 parsimonyNumber
337 *left[2],
338 *right[2],
339 *this[2];
340
341 for(k = 0; k < 2; k++)
342 {
343 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
344 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
345 this[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
346 }
347
348 for(i = 0; i < width; i += INTS_PER_VECTOR)
349 {
350 INT_TYPE
351 s_r, s_l, v_N,
352 l_A, l_C,
353 v_A, v_C;
354
355 s_l = VECTOR_LOAD((CAST)(&left[0][i]));
356 s_r = VECTOR_LOAD((CAST)(&right[0][i]));
357 l_A = VECTOR_BIT_AND(s_l, s_r);
358 v_A = VECTOR_BIT_OR(s_l, s_r);
359
360 s_l = VECTOR_LOAD((CAST)(&left[1][i]));
361 s_r = VECTOR_LOAD((CAST)(&right[1][i]));
362 l_C = VECTOR_BIT_AND(s_l, s_r);
363 v_C = VECTOR_BIT_OR(s_l, s_r);
364
365 v_N = VECTOR_BIT_OR(l_A, l_C);
366
367 VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
368 VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
369
370 v_N = VECTOR_AND_NOT(v_N, allOne);
371
372 totalScore += vectorPopcount(v_N);
373 }
374 }
375 break;
376 case 4:
377 {
378 parsimonyNumber
379 *left[4],
380 *right[4],
381 *this[4];
382
383 for(k = 0; k < 4; k++)
384 {
385 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
386 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
387 this[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
388 }
389
390 for(i = 0; i < width; i += INTS_PER_VECTOR)
391 {
392 INT_TYPE
393 s_r, s_l, v_N,
394 l_A, l_C, l_G, l_T,
395 v_A, v_C, v_G, v_T;
396
397 s_l = VECTOR_LOAD((CAST)(&left[0][i]));
398 s_r = VECTOR_LOAD((CAST)(&right[0][i]));
399 l_A = VECTOR_BIT_AND(s_l, s_r);
400 v_A = VECTOR_BIT_OR(s_l, s_r);
401
402 s_l = VECTOR_LOAD((CAST)(&left[1][i]));
403 s_r = VECTOR_LOAD((CAST)(&right[1][i]));
404 l_C = VECTOR_BIT_AND(s_l, s_r);
405 v_C = VECTOR_BIT_OR(s_l, s_r);
406
407 s_l = VECTOR_LOAD((CAST)(&left[2][i]));
408 s_r = VECTOR_LOAD((CAST)(&right[2][i]));
409 l_G = VECTOR_BIT_AND(s_l, s_r);
410 v_G = VECTOR_BIT_OR(s_l, s_r);
411
412 s_l = VECTOR_LOAD((CAST)(&left[3][i]));
413 s_r = VECTOR_LOAD((CAST)(&right[3][i]));
414 l_T = VECTOR_BIT_AND(s_l, s_r);
415 v_T = VECTOR_BIT_OR(s_l, s_r);
416
417 v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
418
419 VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
420 VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
421 VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G)));
422 VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T)));
423
424 v_N = VECTOR_AND_NOT(v_N, allOne);
425
426 totalScore += vectorPopcount(v_N);
427 }
428 }
429 break;
430 case 20:
431 {
432 parsimonyNumber
433 *left[20],
434 *right[20],
435 *this[20];
436
437 for(k = 0; k < 20; k++)
438 {
439 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
440 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
441 this[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
442 }
443
444 for(i = 0; i < width; i += INTS_PER_VECTOR)
445 {
446 size_t j;
447
448 INT_TYPE
449 s_r, s_l,
450 v_N = SET_ALL_BITS_ZERO,
451 l_A[20],
452 v_A[20];
453
454 for(j = 0; j < 20; j++)
455 {
456 s_l = VECTOR_LOAD((CAST)(&left[j][i]));
457 s_r = VECTOR_LOAD((CAST)(&right[j][i]));
458 l_A[j] = VECTOR_BIT_AND(s_l, s_r);
459 v_A[j] = VECTOR_BIT_OR(s_l, s_r);
460
461 v_N = VECTOR_BIT_OR(v_N, l_A[j]);
462 }
463
464 for(j = 0; j < 20; j++)
465 VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
466
467 v_N = VECTOR_AND_NOT(v_N, allOne);
468
469 totalScore += vectorPopcount(v_N);
470 }
471 }
472 break;
473 default:
474 {
475 parsimonyNumber
476 *left[32],
477 *right[32],
478 *this[32];
479
480 assert(states <= 32);
481
482 for(k = 0; k < states; k++)
483 {
484 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
485 right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
486 this[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
487 }
488
489 for(i = 0; i < width; i += INTS_PER_VECTOR)
490 {
491 size_t j;
492
493 INT_TYPE
494 s_r, s_l,
495 v_N = SET_ALL_BITS_ZERO,
496 l_A[32],
497 v_A[32];
498
499 for(j = 0; j < states; j++)
500 {
501 s_l = VECTOR_LOAD((CAST)(&left[j][i]));
502 s_r = VECTOR_LOAD((CAST)(&right[j][i]));
503 l_A[j] = VECTOR_BIT_AND(s_l, s_r);
504 v_A[j] = VECTOR_BIT_OR(s_l, s_r);
505
506 v_N = VECTOR_BIT_OR(v_N, l_A[j]);
507 }
508
509 for(j = 0; j < states; j++)
510 VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
511
512 v_N = VECTOR_AND_NOT(v_N, allOne);
513
514 totalScore += vectorPopcount(v_N);
515 }
516 }
517 }
518 }
519
520 tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];
521 }
522 }
523
524
525
evaluateParsimonyIterativeFast(pllInstance * tr,partitionList * pr)526 static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList *pr)
527 {
528 INT_TYPE
529 allOne = SET_ALL_BITS_ONE;
530
531 size_t
532 pNumber = (size_t)tr->ti[1],
533 qNumber = (size_t)tr->ti[2];
534
535 int
536 model;
537
538 unsigned int
539 bestScore = tr->bestParsimony,
540 sum;
541
542 if(tr->ti[0] > 4)
543 newviewParsimonyIterativeFast(tr, pr);
544
545 sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
546
547 for(model = 0; model < pr->numberOfPartitions; model++)
548 {
549 size_t
550 k,
551 states = pr->partitionData[model]->states,
552 width = pr->partitionData[model]->parsimonyLength,
553 i;
554
555 switch(states)
556 {
557 case 2:
558 {
559 parsimonyNumber
560 *left[2],
561 *right[2];
562
563 for(k = 0; k < 2; k++)
564 {
565 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
566 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
567 }
568
569 for(i = 0; i < width; i += INTS_PER_VECTOR)
570 {
571 INT_TYPE
572 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
573 l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
574 v_N = VECTOR_BIT_OR(l_A, l_C);
575
576 v_N = VECTOR_AND_NOT(v_N, allOne);
577
578 sum += vectorPopcount(v_N);
579
580 if(sum >= bestScore)
581 return sum;
582 }
583 }
584 break;
585 case 4:
586 {
587 parsimonyNumber
588 *left[4],
589 *right[4];
590
591 for(k = 0; k < 4; k++)
592 {
593 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
594 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
595 }
596
597 for(i = 0; i < width; i += INTS_PER_VECTOR)
598 {
599 INT_TYPE
600 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
601 l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
602 l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))),
603 l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))),
604 v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
605
606 v_N = VECTOR_AND_NOT(v_N, allOne);
607
608 sum += vectorPopcount(v_N);
609
610 if(sum >= bestScore)
611 return sum;
612 }
613 }
614 break;
615 case 20:
616 {
617 parsimonyNumber
618 *left[20],
619 *right[20];
620
621 for(k = 0; k < 20; k++)
622 {
623 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
624 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
625 }
626
627 for(i = 0; i < width; i += INTS_PER_VECTOR)
628 {
629 int
630 j;
631
632 INT_TYPE
633 l_A,
634 v_N = SET_ALL_BITS_ZERO;
635
636 for(j = 0; j < 20; j++)
637 {
638 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
639 v_N = VECTOR_BIT_OR(l_A, v_N);
640 }
641
642 v_N = VECTOR_AND_NOT(v_N, allOne);
643
644 sum += vectorPopcount(v_N);
645
646 if(sum >= bestScore)
647 return sum;
648 }
649 }
650 break;
651 default:
652 {
653 parsimonyNumber
654 *left[32],
655 *right[32];
656
657 assert(states <= 32);
658
659 for(k = 0; k < states; k++)
660 {
661 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
662 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
663 }
664
665 for(i = 0; i < width; i += INTS_PER_VECTOR)
666 {
667 size_t
668 j;
669
670 INT_TYPE
671 l_A,
672 v_N = SET_ALL_BITS_ZERO;
673
674 for(j = 0; j < states; j++)
675 {
676 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
677 v_N = VECTOR_BIT_OR(l_A, v_N);
678 }
679
680 v_N = VECTOR_AND_NOT(v_N, allOne);
681
682 sum += vectorPopcount(v_N);
683
684 if(sum >= bestScore)
685 return sum;
686 }
687 }
688 }
689 }
690
691 return sum;
692 }
693
694
695 #else
newviewParsimonyIterativeFast(pllInstance * tr,partitionList * pr)696 static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
697 {
698 int
699 model,
700 *ti = tr->ti,
701 count = ti[0],
702 index;
703
704 for(index = 4; index < count; index += 4)
705 {
706 unsigned int
707 totalScore = 0;
708
709 size_t
710 pNumber = (size_t)ti[index],
711 qNumber = (size_t)ti[index + 1],
712 rNumber = (size_t)ti[index + 2];
713
714 for(model = 0; model < pr->numberOfPartitions; model++)
715 {
716 size_t
717 k,
718 states = pr->partitionData[model]->states,
719 width = pr->partitionData[model]->parsimonyLength;
720
721 unsigned int
722 i;
723
724 switch(states)
725 {
726 case 2:
727 {
728 parsimonyNumber
729 *left[2],
730 *right[2],
731 *this[2];
732
733 parsimonyNumber
734 o_A,
735 o_C,
736 t_A,
737 t_C,
738 t_N;
739
740 for(k = 0; k < 2; k++)
741 {
742 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
743 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
744 this[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
745 }
746
747 for(i = 0; i < width; i++)
748 {
749 t_A = left[0][i] & right[0][i];
750 t_C = left[1][i] & right[1][i];
751
752 o_A = left[0][i] | right[0][i];
753 o_C = left[1][i] | right[1][i];
754
755 t_N = ~(t_A | t_C);
756
757 this[0][i] = t_A | (t_N & o_A);
758 this[1][i] = t_C | (t_N & o_C);
759
760 totalScore += ((unsigned int) __builtin_popcount(t_N));
761 }
762 }
763 break;
764 case 4:
765 {
766 parsimonyNumber
767 *left[4],
768 *right[4],
769 *this[4];
770
771 for(k = 0; k < 4; k++)
772 {
773 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
774 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
775 this[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
776 }
777
778 parsimonyNumber
779 o_A,
780 o_C,
781 o_G,
782 o_T,
783 t_A,
784 t_C,
785 t_G,
786 t_T,
787 t_N;
788
789 for(i = 0; i < width; i++)
790 {
791 t_A = left[0][i] & right[0][i];
792 t_C = left[1][i] & right[1][i];
793 t_G = left[2][i] & right[2][i];
794 t_T = left[3][i] & right[3][i];
795
796 o_A = left[0][i] | right[0][i];
797 o_C = left[1][i] | right[1][i];
798 o_G = left[2][i] | right[2][i];
799 o_T = left[3][i] | right[3][i];
800
801 t_N = ~(t_A | t_C | t_G | t_T);
802
803 this[0][i] = t_A | (t_N & o_A);
804 this[1][i] = t_C | (t_N & o_C);
805 this[2][i] = t_G | (t_N & o_G);
806 this[3][i] = t_T | (t_N & o_T);
807
808 totalScore += ((unsigned int) __builtin_popcount(t_N));
809 }
810 }
811 break;
812 case 20:
813 {
814 parsimonyNumber
815 *left[20],
816 *right[20],
817 *this[20];
818
819 parsimonyNumber
820 o_A[20],
821 t_A[20],
822 t_N;
823
824 for(k = 0; k < 20; k++)
825 {
826 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
827 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
828 this[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
829 }
830
831 for(i = 0; i < width; i++)
832 {
833 size_t k;
834
835 t_N = 0;
836
837 for(k = 0; k < 20; k++)
838 {
839 t_A[k] = left[k][i] & right[k][i];
840 o_A[k] = left[k][i] | right[k][i];
841 t_N = t_N | t_A[k];
842 }
843
844 t_N = ~t_N;
845
846 for(k = 0; k < 20; k++)
847 this[k][i] = t_A[k] | (t_N & o_A[k]);
848
849 totalScore += ((unsigned int) __builtin_popcount(t_N));
850 }
851 }
852 break;
853 default:
854 {
855 parsimonyNumber
856 *left[32],
857 *right[32],
858 *this[32];
859
860 parsimonyNumber
861 o_A[32],
862 t_A[32],
863 t_N;
864
865 assert(states <= 32);
866
867 for(k = 0; k < states; k++)
868 {
869 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
870 right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
871 this[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
872 }
873
874 for(i = 0; i < width; i++)
875 {
876 t_N = 0;
877
878 for(k = 0; k < states; k++)
879 {
880 t_A[k] = left[k][i] & right[k][i];
881 o_A[k] = left[k][i] | right[k][i];
882 t_N = t_N | t_A[k];
883 }
884
885 t_N = ~t_N;
886
887 for(k = 0; k < states; k++)
888 this[k][i] = t_A[k] | (t_N & o_A[k]);
889
890 totalScore += ((unsigned int) __builtin_popcount(t_N));
891 }
892 }
893 }
894 }
895
896 tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];
897 }
898 }
899
900
evaluateParsimonyIterativeFast(pllInstance * tr,partitionList * pr)901 static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList * pr)
902 {
903 size_t
904 pNumber = (size_t)tr->ti[1],
905 qNumber = (size_t)tr->ti[2];
906
907 int
908 model;
909
910 unsigned int
911 bestScore = tr->bestParsimony,
912 sum;
913
914 if(tr->ti[0] > 4)
915 newviewParsimonyIterativeFast(tr, pr);
916
917 sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
918
919 for(model = 0; model < pr->numberOfPartitions; model++)
920 {
921 size_t
922 k,
923 states = pr->partitionData[model]->states,
924 width = pr->partitionData[model]->parsimonyLength,
925 i;
926
927 switch(states)
928 {
929 case 2:
930 {
931 parsimonyNumber
932 t_A,
933 t_C,
934 t_N,
935 *left[2],
936 *right[2];
937
938 for(k = 0; k < 2; k++)
939 {
940 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
941 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
942 }
943
944 for(i = 0; i < width; i++)
945 {
946 t_A = left[0][i] & right[0][i];
947 t_C = left[1][i] & right[1][i];
948
949 t_N = ~(t_A | t_C);
950
951 sum += ((unsigned int) __builtin_popcount(t_N));
952
953 if(sum >= bestScore)
954 return sum;
955 }
956 }
957 break;
958 case 4:
959 {
960 parsimonyNumber
961 t_A,
962 t_C,
963 t_G,
964 t_T,
965 t_N,
966 *left[4],
967 *right[4];
968
969 for(k = 0; k < 4; k++)
970 {
971 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
972 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
973 }
974
975 for(i = 0; i < width; i++)
976 {
977 t_A = left[0][i] & right[0][i];
978 t_C = left[1][i] & right[1][i];
979 t_G = left[2][i] & right[2][i];
980 t_T = left[3][i] & right[3][i];
981
982 t_N = ~(t_A | t_C | t_G | t_T);
983
984 sum += ((unsigned int) __builtin_popcount(t_N));
985
986 if(sum >= bestScore)
987 return sum;
988 }
989 }
990 break;
991 case 20:
992 {
993 parsimonyNumber
994 t_A,
995 t_N,
996 *left[20],
997 *right[20];
998
999 for(k = 0; k < 20; k++)
1000 {
1001 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
1002 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
1003 }
1004
1005 for(i = 0; i < width; i++)
1006 {
1007 t_N = 0;
1008
1009 for(k = 0; k < 20; k++)
1010 {
1011 t_A = left[k][i] & right[k][i];
1012 t_N = t_N | t_A;
1013 }
1014
1015 t_N = ~t_N;
1016
1017 sum += ((unsigned int) __builtin_popcount(t_N));
1018
1019 if(sum >= bestScore)
1020 return sum;
1021 }
1022 }
1023 break;
1024 default:
1025 {
1026 parsimonyNumber
1027 t_A,
1028 t_N,
1029 *left[32],
1030 *right[32];
1031
1032 assert(states <= 32);
1033
1034 for(k = 0; k < states; k++)
1035 {
1036 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
1037 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
1038 }
1039
1040 for(i = 0; i < width; i++)
1041 {
1042 t_N = 0;
1043
1044 for(k = 0; k < states; k++)
1045 {
1046 t_A = left[k][i] & right[k][i];
1047 t_N = t_N | t_A;
1048 }
1049
1050 t_N = ~t_N;
1051
1052 sum += ((unsigned int) __builtin_popcount(t_N));
1053
1054 if(sum >= bestScore)
1055 return sum;
1056 }
1057 }
1058 }
1059 }
1060
1061 return sum;
1062 }
1063
1064 #endif
1065
1066
1067
1068
1069
1070
evaluateParsimony(pllInstance * tr,partitionList * pr,nodeptr p,pllBoolean full)1071 static unsigned int evaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full)
1072 {
1073 volatile unsigned int result;
1074 nodeptr q = p->back;
1075 int
1076 *ti = tr->ti,
1077 counter = 4;
1078
1079 ti[1] = p->number;
1080 ti[2] = q->number;
1081
1082 if(full)
1083 {
1084 if(p->number > tr->mxtips)
1085 computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
1086 if(q->number > tr->mxtips)
1087 computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
1088 }
1089 else
1090 {
1091 if(p->number > tr->mxtips && !p->xPars)
1092 computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
1093 if(q->number > tr->mxtips && !q->xPars)
1094 computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
1095 }
1096
1097 ti[0] = counter;
1098
1099 result = evaluateParsimonyIterativeFast(tr, pr);
1100
1101 return result;
1102 }
1103
1104
newviewParsimony(pllInstance * tr,partitionList * pr,nodeptr p)1105 static void newviewParsimony(pllInstance *tr, partitionList *pr, nodeptr p)
1106 {
1107 if(p->number <= tr->mxtips)
1108 return;
1109
1110 {
1111 int
1112 counter = 4;
1113
1114 computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1115 tr->ti[0] = counter;
1116
1117 newviewParsimonyIterativeFast(tr, pr);
1118 }
1119 }
1120
1121
1122
1123
1124
1125 /****************************************************************************************************************************************/
1126
insertParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1127 static void insertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1128 {
1129 nodeptr r;
1130
1131 r = q->back;
1132
1133 hookupDefault(p->next, q);
1134 hookupDefault(p->next->next, r);
1135
1136 newviewParsimony(tr, pr, p);
1137 }
1138
1139
1140
buildNewTip(pllInstance * tr,nodeptr p)1141 static nodeptr buildNewTip (pllInstance *tr, nodeptr p)
1142 {
1143 nodeptr q;
1144
1145 q = tr->nodep[(tr->nextnode)++];
1146 hookupDefault(p, q);
1147 q->next->back = (nodeptr)NULL;
1148 q->next->next->back = (nodeptr)NULL;
1149
1150 return q;
1151 }
1152
buildSimpleTree(pllInstance * tr,partitionList * pr,int ip,int iq,int ir)1153 static void buildSimpleTree (pllInstance *tr, partitionList *pr, int ip, int iq, int ir)
1154 {
1155 nodeptr p, s;
1156 int i;
1157
1158 i = PLL_MIN(ip, iq);
1159 if (ir < i) i = ir;
1160 tr->start = tr->nodep[i];
1161 tr->ntips = 3;
1162 p = tr->nodep[ip];
1163 hookupDefault(p, tr->nodep[iq]);
1164 s = buildNewTip(tr, tr->nodep[ir]);
1165 insertParsimony(tr, pr, s, p);
1166 }
1167
1168
testInsertParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q,pllBoolean saveBranches)1169 static void testInsertParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, pllBoolean saveBranches)
1170 {
1171 unsigned int
1172 mp;
1173
1174 nodeptr
1175 r = q->back;
1176
1177 pllBoolean
1178 doIt = PLL_TRUE;
1179
1180 int numBranches = pr->perGeneBranchLengths?pr->numberOfPartitions:1;
1181
1182 if(tr->grouped)
1183 {
1184 int
1185 rNumber = tr->constraintVector[r->number],
1186 qNumber = tr->constraintVector[q->number],
1187 pNumber = tr->constraintVector[p->number];
1188
1189 doIt = PLL_FALSE;
1190
1191 if(pNumber == -9)
1192 pNumber = checkerPars(tr, p->back);
1193 if(pNumber == -9)
1194 doIt = PLL_TRUE;
1195 else
1196 {
1197 if(qNumber == -9)
1198 qNumber = checkerPars(tr, q);
1199
1200 if(rNumber == -9)
1201 rNumber = checkerPars(tr, r);
1202
1203 if(pNumber == rNumber || pNumber == qNumber)
1204 doIt = PLL_TRUE;
1205 }
1206 }
1207
1208 if(doIt)
1209 {
1210 double
1211 *z = rax_malloc(numBranches*sizeof(double));
1212
1213 if(saveBranches)
1214 {
1215 int i;
1216
1217 for(i = 0; i < numBranches; i++)
1218 z[i] = q->z[i];
1219 }
1220
1221 insertParsimony(tr, pr, p, q);
1222
1223 mp = evaluateParsimony(tr, pr, p->next->next, PLL_FALSE);
1224
1225 if(mp < tr->bestParsimony)
1226 {
1227 tr->bestParsimony = mp;
1228 tr->insertNode = q;
1229 tr->removeNode = p;
1230 }
1231
1232 if(saveBranches)
1233 hookup(q, r, z, numBranches);
1234 else
1235 hookupDefault(q, r);
1236
1237 p->next->next->back = p->next->back = (nodeptr) NULL;
1238 rax_free(z);
1239 }
1240
1241 return;
1242 }
1243
1244
restoreTreeParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1245 static void restoreTreeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1246 {
1247 nodeptr
1248 r = q->back;
1249
1250 int counter = 4;
1251
1252 hookupDefault(p->next, q);
1253 hookupDefault(p->next->next, r);
1254
1255 computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1256 tr->ti[0] = counter;
1257
1258 newviewParsimonyIterativeFast(tr, pr);
1259 }
1260
1261
addTraverseParsimony(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q,int mintrav,int maxtrav,pllBoolean doAll,pllBoolean saveBranches)1262 static void addTraverseParsimony (pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q, int mintrav, int maxtrav, pllBoolean doAll, pllBoolean saveBranches)
1263 {
1264 if (doAll || (--mintrav <= 0))
1265 testInsertParsimony(tr, pr, p, q, saveBranches);
1266
1267 if (((q->number > tr->mxtips)) && ((--maxtrav > 0) || doAll))
1268 {
1269 addTraverseParsimony(tr, pr, p, q->next->back, mintrav, maxtrav, doAll, saveBranches);
1270 addTraverseParsimony(tr, pr, p, q->next->next->back, mintrav, maxtrav, doAll, saveBranches);
1271 }
1272 }
1273
1274
1275
1276
1277
makePermutationFast(int * perm,int n,pllInstance * tr)1278 static void makePermutationFast(int *perm, int n, pllInstance *tr)
1279 {
1280 int
1281 i,
1282 j,
1283 k;
1284
1285 for (i = 1; i <= n; i++)
1286 perm[i] = i;
1287
1288 for (i = 1; i <= n; i++)
1289 {
1290 double d = randum(&tr->randomNumberSeed);
1291
1292 k = (int)((double)(n + 1 - i) * d);
1293
1294 j = perm[i];
1295
1296 perm[i] = perm[i + k];
1297 perm[i + k] = j;
1298 }
1299 }
1300
1301 //static nodeptr removeNodeParsimony (nodeptr p, tree *tr)
removeNodeParsimony(nodeptr p)1302 static nodeptr removeNodeParsimony (nodeptr p)
1303 {
1304 nodeptr q, r;
1305
1306 q = p->next->back;
1307 r = p->next->next->back;
1308
1309 hookupDefault(q, r);
1310
1311 p->next->next->back = p->next->back = (node *) NULL;
1312
1313 return q;
1314 }
1315
rearrangeParsimony(pllInstance * tr,partitionList * pr,nodeptr p,int mintrav,int maxtrav,pllBoolean doAll)1316 static int rearrangeParsimony(pllInstance *tr, partitionList *pr, nodeptr p, int mintrav, int maxtrav, pllBoolean doAll)
1317 {
1318 nodeptr
1319 p1,
1320 p2,
1321 q,
1322 q1,
1323 q2;
1324
1325 int
1326 mintrav2;
1327
1328 pllBoolean
1329 doP = PLL_TRUE,
1330 doQ = PLL_TRUE;
1331
1332 if (maxtrav > tr->ntips - 3)
1333 maxtrav = tr->ntips - 3;
1334
1335 assert(mintrav == 1);
1336
1337 if(maxtrav < mintrav)
1338 return 0;
1339
1340 q = p->back;
1341
1342 if(tr->constrained)
1343 {
1344 if(! tipHomogeneityCheckerPars(tr, p->back, 0))
1345 doP = PLL_FALSE;
1346
1347 if(! tipHomogeneityCheckerPars(tr, q->back, 0))
1348 doQ = PLL_FALSE;
1349
1350 if(doQ == PLL_FALSE && doP == PLL_FALSE)
1351 return 0;
1352 }
1353
1354 if((p->number > tr->mxtips) && doP)
1355 {
1356 p1 = p->next->back;
1357 p2 = p->next->next->back;
1358
1359 if ((p1->number > tr->mxtips) || (p2->number > tr->mxtips))
1360 {
1361 //removeNodeParsimony(p, tr);
1362 removeNodeParsimony(p);
1363
1364 if ((p1->number > tr->mxtips))
1365 {
1366 addTraverseParsimony(tr, pr, p, p1->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1367 addTraverseParsimony(tr, pr, p, p1->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1368 }
1369
1370 if ((p2->number > tr->mxtips))
1371 {
1372 addTraverseParsimony(tr, pr, p, p2->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1373 addTraverseParsimony(tr, pr, p, p2->next->next->back, mintrav, maxtrav, doAll, PLL_FALSE);
1374 }
1375
1376
1377 hookupDefault(p->next, p1);
1378 hookupDefault(p->next->next, p2);
1379
1380 newviewParsimony(tr, pr, p);
1381 }
1382 }
1383
1384 if ((q->number > tr->mxtips) && (maxtrav > 0) && doQ)
1385 {
1386 q1 = q->next->back;
1387 q2 = q->next->next->back;
1388
1389 if (
1390 (
1391 (q1->number > tr->mxtips) &&
1392 ((q1->next->back->number > tr->mxtips) || (q1->next->next->back->number > tr->mxtips))
1393 )
1394 ||
1395 (
1396 (q2->number > tr->mxtips) &&
1397 ((q2->next->back->number > tr->mxtips) || (q2->next->next->back->number > tr->mxtips))
1398 )
1399 )
1400 {
1401
1402 //removeNodeParsimony(q, tr);
1403 removeNodeParsimony(q);
1404
1405 mintrav2 = mintrav > 2 ? mintrav : 2;
1406
1407 if ((q1->number > tr->mxtips))
1408 {
1409 addTraverseParsimony(tr, pr, q, q1->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1410 addTraverseParsimony(tr, pr, q, q1->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1411 }
1412
1413 if ((q2->number > tr->mxtips))
1414 {
1415 addTraverseParsimony(tr, pr, q, q2->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1416 addTraverseParsimony(tr, pr, q, q2->next->next->back, mintrav2 , maxtrav, doAll, PLL_FALSE);
1417 }
1418
1419 hookupDefault(q->next, q1);
1420 hookupDefault(q->next->next, q2);
1421
1422 newviewParsimony(tr, pr, q);
1423 }
1424 }
1425
1426 return 1;
1427 }
1428
1429
restoreTreeRearrangeParsimony(pllInstance * tr,partitionList * pr)1430 static void restoreTreeRearrangeParsimony(pllInstance *tr, partitionList *pr)
1431 {
1432 removeNodeParsimony(tr->removeNode);
1433 //removeNodeParsimony(tr->removeNode, tr);
1434 restoreTreeParsimony(tr, pr, tr->removeNode, tr->insertNode);
1435 }
1436
1437 /*
1438 static pllBoolean isInformative2(pllInstance *tr, int site)
1439 {
1440 int
1441 informativeCounter = 0,
1442 check[256],
1443 j,
1444 undetermined = 15;
1445
1446 unsigned char
1447 nucleotide,
1448 target = 0;
1449
1450 for(j = 0; j < 256; j++)
1451 check[j] = 0;
1452
1453 for(j = 1; j <= tr->mxtips; j++)
1454 {
1455 nucleotide = tr->yVector[j][site];
1456 check[nucleotide] = check[nucleotide] + 1;
1457 }
1458
1459
1460 if(check[1] > 1)
1461 {
1462 informativeCounter++;
1463 target = target | 1;
1464 }
1465 if(check[2] > 1)
1466 {
1467 informativeCounter++;
1468 target = target | 2;
1469 }
1470 if(check[4] > 1)
1471 {
1472 informativeCounter++;
1473 target = target | 4;
1474 }
1475 if(check[8] > 1)
1476 {
1477 informativeCounter++;
1478 target = target | 8;
1479 }
1480
1481 if(informativeCounter >= 2)
1482 return PLL_TRUE;
1483 else
1484 {
1485 for(j = 0; j < undetermined; j++)
1486 {
1487 if(j == 3 || j == 5 || j == 6 || j == 7 || j == 9 || j == 10 || j == 11 ||
1488 j == 12 || j == 13 || j == 14)
1489 {
1490 if(check[j] > 1)
1491 {
1492 if(!(target & j))
1493 return PLL_TRUE;
1494 }
1495 }
1496 }
1497 }
1498
1499 return PLL_FALSE;
1500 }
1501 */
1502
isInformative(pllInstance * tr,int dataType,int site)1503 static pllBoolean isInformative(pllInstance *tr, int dataType, int site)
1504 {
1505 int
1506 informativeCounter = 0,
1507 check[256],
1508 j,
1509 undetermined = getUndetermined(dataType);
1510
1511 const unsigned int
1512 *bitVector = getBitVector(dataType);
1513
1514 unsigned char
1515 nucleotide;
1516
1517
1518 for(j = 0; j < 256; j++)
1519 check[j] = 0;
1520
1521 for(j = 1; j <= tr->mxtips; j++)
1522 {
1523 nucleotide = tr->yVector[j][site];
1524 check[nucleotide] = check[nucleotide] + 1;
1525 assert(bitVector[nucleotide] > 0);
1526 }
1527
1528 for(j = 0; j < undetermined; j++)
1529 {
1530 if(check[j] > 0)
1531 informativeCounter++;
1532 }
1533
1534 if(informativeCounter <= 1)
1535 return PLL_FALSE;
1536 else
1537 {
1538 for(j = 0; j < undetermined; j++)
1539 {
1540 if(check[j] > 1)
1541 return PLL_TRUE;
1542 }
1543 }
1544
1545 return PLL_FALSE;
1546 }
1547
1548
determineUninformativeSites(pllInstance * tr,partitionList * pr,int * informative)1549 static void determineUninformativeSites(pllInstance *tr, partitionList *pr, int *informative)
1550 {
1551 int
1552 model,
1553 number = 0,
1554 i;
1555
1556 /*
1557 Not all characters are useful in constructing a parsimony tree.
1558 Invariant characters, those that have the same state in all taxa,
1559 are obviously useless and are ignored by the method. Characters in
1560 which a state occurs in only one taxon are also ignored.
1561 All these characters are called parsimony uninformative.
1562
1563 Alternative definition: informative columns contain at least two types
1564 of nucleotides, and each nucleotide must appear at least twice in each
1565 column. Kind of a pain if we intend to check for this when using, e.g.,
1566 amibiguous DNA encoding.
1567 */
1568
1569
1570 for(model = 0; model < pr->numberOfPartitions; model++)
1571 {
1572 for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
1573 {
1574 if(isInformative(tr, pr->partitionData[model]->dataType, i))
1575 informative[i] = 1;
1576 else
1577 {
1578 informative[i] = 0;
1579 number++;
1580 }
1581 }
1582 }
1583
1584
1585 /* printf("Uninformative Patterns: %d\n", number); */
1586 }
1587
1588
reorderNodes(pllInstance * tr,nodeptr * np,nodeptr p,int * count)1589 static void reorderNodes(pllInstance *tr, nodeptr *np, nodeptr p, int *count)
1590 {
1591 int i, found = 0;
1592
1593 if((p->number <= tr->mxtips))
1594 return;
1595 else
1596 {
1597 for(i = tr->mxtips + 1; (i <= (tr->mxtips + tr->mxtips - 1)) && (found == 0); i++)
1598 {
1599 if (p == np[i] || p == np[i]->next || p == np[i]->next->next)
1600 {
1601 if(p == np[i])
1602 tr->nodep[*count + tr->mxtips + 1] = np[i];
1603 else
1604 {
1605 if(p == np[i]->next)
1606 tr->nodep[*count + tr->mxtips + 1] = np[i]->next;
1607 else
1608 tr->nodep[*count + tr->mxtips + 1] = np[i]->next->next;
1609 }
1610
1611 found = 1;
1612 *count = *count + 1;
1613 }
1614 }
1615
1616 assert(found != 0);
1617
1618 reorderNodes(tr, np, p->next->back, count);
1619 reorderNodes(tr, np, p->next->next->back, count);
1620 }
1621 }
1622
1623
1624
nodeRectifierPars(pllInstance * tr)1625 static void nodeRectifierPars(pllInstance *tr)
1626 {
1627 nodeptr *np = (nodeptr *)rax_malloc(2 * tr->mxtips * sizeof(nodeptr));
1628 int i;
1629 int count = 0;
1630
1631 tr->start = tr->nodep[1];
1632 tr->rooted = PLL_FALSE;
1633
1634 /* TODO why is tr->rooted set to PLL_FALSE here ?*/
1635
1636 for(i = tr->mxtips + 1; i <= (tr->mxtips + tr->mxtips - 1); i++)
1637 np[i] = tr->nodep[i];
1638
1639 reorderNodes(tr, np, tr->start->back, &count);
1640
1641
1642 rax_free(np);
1643 }
1644
1645
1646
compressDNA(pllInstance * tr,partitionList * pr,int * informative)1647 static void compressDNA(pllInstance *tr, partitionList *pr, int *informative)
1648 {
1649 size_t
1650 totalNodes,
1651 i,
1652 model;
1653
1654 totalNodes = 2 * (size_t)tr->mxtips;
1655
1656
1657
1658 for(model = 0; model < (size_t) pr->numberOfPartitions; model++)
1659 {
1660 size_t
1661 k,
1662 states = (size_t)pr->partitionData[model]->states,
1663 compressedEntries,
1664 compressedEntriesPadded,
1665 entries = 0,
1666 lower = pr->partitionData[model]->lower,
1667 upper = pr->partitionData[model]->upper;
1668
1669 parsimonyNumber
1670 **compressedTips = (parsimonyNumber **)rax_malloc(states * sizeof(parsimonyNumber*)),
1671 *compressedValues = (parsimonyNumber *)rax_malloc(states * sizeof(parsimonyNumber));
1672
1673 for(i = lower; i < upper; i++)
1674 if(informative[i])
1675 entries += (size_t)tr->aliaswgt[i];
1676
1677 compressedEntries = entries / PLL_PCF;
1678
1679 if(entries % PLL_PCF != 0)
1680 compressedEntries++;
1681
1682 #if (defined(__SSE3) || defined(__AVX))
1683 if(compressedEntries % INTS_PER_VECTOR != 0)
1684 compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
1685 else
1686 compressedEntriesPadded = compressedEntries;
1687 #else
1688 compressedEntriesPadded = compressedEntries;
1689 #endif
1690
1691
1692 rax_posix_memalign ((void **) &(pr->partitionData[model]->parsVect), PLL_BYTE_ALIGNMENT, (size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
1693
1694 for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)
1695 pr->partitionData[model]->parsVect[i] = 0;
1696
1697 for(i = 0; i < (size_t)tr->mxtips; i++)
1698 {
1699 size_t
1700 w = 0,
1701 compressedIndex = 0,
1702 compressedCounter = 0,
1703 index = 0;
1704
1705 for(k = 0; k < states; k++)
1706 {
1707 compressedTips[k] = &(pr->partitionData[model]->parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
1708 compressedValues[k] = 0;
1709 }
1710
1711 for(index = lower; index < (size_t)upper; index++)
1712 {
1713 if(informative[index])
1714 {
1715 const unsigned int
1716 *bitValue = getBitVector(pr->partitionData[model]->dataType);
1717
1718 parsimonyNumber
1719 value = bitValue[tr->yVector[i + 1][index]];
1720
1721 for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
1722 {
1723 for(k = 0; k < states; k++)
1724 {
1725 if(value & mask32[k])
1726 compressedValues[k] |= mask32[compressedCounter];
1727 }
1728
1729 compressedCounter++;
1730
1731 if(compressedCounter == PLL_PCF)
1732 {
1733 for(k = 0; k < states; k++)
1734 {
1735 compressedTips[k][compressedIndex] = compressedValues[k];
1736 compressedValues[k] = 0;
1737 }
1738
1739 compressedCounter = 0;
1740 compressedIndex++;
1741 }
1742 }
1743 }
1744 }
1745
1746 for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
1747 {
1748 for(;compressedCounter < PLL_PCF; compressedCounter++)
1749 for(k = 0; k < states; k++)
1750 compressedValues[k] |= mask32[compressedCounter];
1751
1752 for(k = 0; k < states; k++)
1753 {
1754 compressedTips[k][compressedIndex] = compressedValues[k];
1755 compressedValues[k] = 0;
1756 }
1757
1758 compressedCounter = 0;
1759 }
1760 }
1761
1762 pr->partitionData[model]->parsimonyLength = compressedEntriesPadded;
1763
1764 rax_free(compressedTips);
1765 rax_free(compressedValues);
1766 }
1767
1768 rax_posix_memalign ((void **) &(tr->parsimonyScore), PLL_BYTE_ALIGNMENT, sizeof(unsigned int) * totalNodes);
1769
1770 for(i = 0; i < totalNodes; i++)
1771 tr->parsimonyScore[i] = 0;
1772 }
1773
1774
1775
stepwiseAddition(pllInstance * tr,partitionList * pr,nodeptr p,nodeptr q)1776 static void stepwiseAddition(pllInstance *tr, partitionList *pr, nodeptr p, nodeptr q)
1777 {
1778 nodeptr
1779 r = q->back;
1780
1781 unsigned int
1782 mp;
1783
1784 int
1785 counter = 4;
1786
1787 p->next->back = q;
1788 q->back = p->next;
1789
1790 p->next->next->back = r;
1791 r->back = p->next->next;
1792
1793 computeTraversalInfoParsimony(p, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1794 tr->ti[0] = counter;
1795 tr->ti[1] = p->number;
1796 tr->ti[2] = p->back->number;
1797
1798 mp = evaluateParsimonyIterativeFast(tr, pr);
1799
1800 if(mp < tr->bestParsimony)
1801 {
1802 tr->bestParsimony = mp;
1803 tr->insertNode = q;
1804 }
1805
1806 q->back = r;
1807 r->back = q;
1808
1809 if(q->number > tr->mxtips && tr->parsimonyScore[q->number] > 0)
1810 {
1811 stepwiseAddition(tr, pr, p, q->next->back);
1812 stepwiseAddition(tr, pr, p, q->next->next->back);
1813 }
1814 }
1815
1816
1817
allocateParsimonyDataStructures(pllInstance * tr,partitionList * pr)1818 void allocateParsimonyDataStructures(pllInstance *tr, partitionList *pr)
1819 {
1820 int
1821 i,
1822 *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);
1823
1824 determineUninformativeSites(tr, pr, informative);
1825
1826 compressDNA(tr, pr, informative);
1827
1828 for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
1829 {
1830 nodeptr
1831 p = tr->nodep[i];
1832
1833 p->xPars = 1;
1834 p->next->xPars = 0;
1835 p->next->next->xPars = 0;
1836 }
1837
1838 tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);
1839
1840 rax_free(informative);
1841 }
1842
pllFreeParsimonyDataStructures(pllInstance * tr,partitionList * pr)1843 void pllFreeParsimonyDataStructures(pllInstance *tr, partitionList *pr)
1844 {
1845 size_t
1846 model;
1847
1848 rax_free(tr->parsimonyScore);
1849
1850 for(model = 0; model < (size_t) pr->numberOfPartitions; ++model)
1851 rax_free(pr->partitionData[model]->parsVect);
1852
1853 rax_free(tr->ti);
1854 }
1855
1856
pllMakeParsimonyTreeFast(pllInstance * tr,partitionList * pr,int sprDist)1857 void pllMakeParsimonyTreeFast(pllInstance *tr, partitionList *pr, int sprDist)
1858 {
1859 nodeptr
1860 p,
1861 f;
1862
1863 int
1864 i,
1865 nextsp,
1866 *perm = (int *)rax_malloc((size_t)(tr->mxtips + 1) * sizeof(int));
1867
1868 unsigned int
1869 randomMP,
1870 startMP;
1871
1872 assert(!tr->constrained);
1873
1874 makePermutationFast(perm, tr->mxtips, tr);
1875
1876 tr->ntips = 0;
1877
1878 tr->nextnode = tr->mxtips + 1;
1879
1880 buildSimpleTree(tr, pr, perm[1], perm[2], perm[3]);
1881
1882 f = tr->start;
1883
1884 while(tr->ntips < tr->mxtips)
1885 {
1886 nodeptr q;
1887
1888 tr->bestParsimony = INT_MAX;
1889 nextsp = ++(tr->ntips);
1890 p = tr->nodep[perm[nextsp]];
1891 q = tr->nodep[(tr->nextnode)++];
1892 p->back = q;
1893 q->back = p;
1894
1895 if(tr->grouped)
1896 {
1897 int
1898 number = p->back->number;
1899
1900 tr->constraintVector[number] = -9;
1901 }
1902
1903 stepwiseAddition(tr, pr, q, f->back);
1904
1905 {
1906 nodeptr
1907 r = tr->insertNode->back;
1908
1909 int counter = 4;
1910
1911 hookupDefault(q->next, tr->insertNode);
1912 hookupDefault(q->next->next, r);
1913
1914 computeTraversalInfoParsimony(q, tr->ti, &counter, tr->mxtips, PLL_FALSE);
1915 tr->ti[0] = counter;
1916
1917 newviewParsimonyIterativeFast(tr, pr);
1918 }
1919 }
1920
1921 nodeRectifierPars(tr);
1922
1923 randomMP = tr->bestParsimony;
1924
1925 do
1926 {
1927 startMP = randomMP;
1928 nodeRectifierPars(tr);
1929 for(i = 1; i <= tr->mxtips + tr->mxtips - 2; i++)
1930 {
1931 rearrangeParsimony(tr, pr, tr->nodep[i], 1, sprDist, PLL_FALSE);
1932 if(tr->bestParsimony < randomMP)
1933 {
1934 restoreTreeRearrangeParsimony(tr, pr);
1935 randomMP = tr->bestParsimony;
1936 }
1937 }
1938 }
1939 while(randomMP < startMP);
1940
1941 rax_free(perm);
1942 }
1943