1 /**
2 * PLL (version 1.0.0) a software library for phylogenetic inference
3 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4 *
5 * Derived from
6 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7 * trees by Alexandros Stamatakis
8 *
9 * This program is free software: you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program. If not, see <http://www.gnu.org/licenses/>.
21 *
22 * For any other enquiries send an Email to Tomas Flouri
23 * Tomas.Flouri@h-its.org
24 *
25 * When publishing work that uses PLL please cite PLL
26 *
27 * @file parsimony.c
28 */
29 #include "mem_alloc.h"
30
31 #ifndef WIN32
32 #include <sys/times.h>
33 #include <sys/types.h>
34 #include <sys/time.h>
35 #include <unistd.h>
36 #endif
37
38 #include <limits.h>
39 #include <math.h>
40 #include <time.h>
41 #include <stdlib.h>
42 #include <stdio.h>
43 #include <ctype.h>
44 #include <string.h>
45 #include <stdint.h>
46 #include <assert.h>
47
48 #if defined(__MIC_NATIVE)
49
50 #include <immintrin.h>
51
52 #define INTS_PER_VECTOR 16
53 #define LONG_INTS_PER_VECTOR 8
54 #define INT_TYPE __m512i
55 #define CAST double*
56 #define SET_ALL_BITS_ONE _mm512_set1_epi32(0xFFFFFFFF)
57 #define SET_ALL_BITS_ZERO _mm512_setzero_epi32()
58 #define VECTOR_LOAD _mm512_load_epi32
59 #define VECTOR_STORE _mm512_store_epi32
60 #define VECTOR_BIT_AND _mm512_and_epi32
61 #define VECTOR_BIT_OR _mm512_or_epi32
62 #define VECTOR_AND_NOT _mm512_andnot_epi32
63
64 #elif defined(__AVX)
65
66 #include <xmmintrin.h>
67 #include <immintrin.h>
68 #include <pmmintrin.h>
69
70 #define ULINT_SIZE 64
71 #define INTS_PER_VECTOR 8
72 #define LONG_INTS_PER_VECTOR 4
73 #define INT_TYPE __m256d
74 #define CAST double*
75 #define SET_ALL_BITS_ONE (__m256d)_mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
76 #define SET_ALL_BITS_ZERO (__m256d)_mm256_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000)
77 #define VECTOR_LOAD _mm256_load_pd
78 #define VECTOR_BIT_AND _mm256_and_pd
79 #define VECTOR_BIT_OR _mm256_or_pd
80 #define VECTOR_STORE _mm256_store_pd
81 #define VECTOR_AND_NOT _mm256_andnot_pd
82
83 #elif (defined(__SSE3))
84
85 #include <xmmintrin.h>
86 #include <pmmintrin.h>
87
88 #define INTS_PER_VECTOR 4
89 #ifdef __i386__
90 #define ULINT_SIZE 32
91 #define LONG_INTS_PER_VECTOR 4
92 #else
93 #define ULINT_SIZE 64
94 #define LONG_INTS_PER_VECTOR 2
95 #endif
96 #define INT_TYPE __m128i
97 #define CAST __m128i*
98 #define SET_ALL_BITS_ONE _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)
99 #define SET_ALL_BITS_ZERO _mm_set_epi32(0x00000000, 0x00000000, 0x00000000, 0x00000000)
100 #define VECTOR_LOAD _mm_load_si128
101 #define VECTOR_BIT_AND _mm_and_si128
102 #define VECTOR_BIT_OR _mm_or_si128
103 #define VECTOR_STORE _mm_store_si128
104 #define VECTOR_AND_NOT _mm_andnot_si128
105
106 #endif
107
108 #include "pll.h"
109 #include "pllInternal.h"
110
111 extern const unsigned int mask32[32];
112
vectorPopcount(INT_TYPE v)113 static __inline unsigned int vectorPopcount(INT_TYPE v)
114 {
115 unsigned long
116 counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
117
118 int
119 i,
120 sum = 0;
121
122 VECTOR_STORE((CAST)counts, v);
123
124 for(i = 0; i < LONG_INTS_PER_VECTOR; i++)
125 sum += __builtin_popcountl(counts[i]);
126
127 return ((unsigned int)sum);
128 }
129
storePerSiteScores(partitionList * pr,int model,INT_TYPE v,unsigned int offset)130 static __inline void storePerSiteScores (partitionList * pr, int model, INT_TYPE v, unsigned int offset)
131 {
132 unsigned long
133 counts[LONG_INTS_PER_VECTOR] __attribute__ ((aligned (PLL_BYTE_ALIGNMENT)));
134 parsimonyNumber * buf;
135
136 int
137 i,
138 j;
139
140 VECTOR_STORE((CAST)counts, v);
141
142 for (i = 0; i < LONG_INTS_PER_VECTOR; ++i)
143 {
144 buf = &(pr->partitionData[model]->perSiteParsScores[offset * PLL_PCF + i * ULINT_SIZE]);
145 for (j = 0; j < ULINT_SIZE; ++ j)
146 buf[j] += ((counts[i] >> j) & 1);
147 }
148
149 }
150
getxnodeLocal(nodeptr p)151 static void getxnodeLocal (nodeptr p)
152 {
153 nodeptr s;
154
155 if((s = p->next)->xPars || (s = s->next)->xPars)
156 {
157 p->xPars = s->xPars;
158 s->xPars = 0;
159 }
160
161 assert(p->next->xPars || p->next->next->xPars || p->xPars);
162
163 }
164
computeTraversalInfoParsimony(nodeptr p,int * ti,int * counter,int maxTips,pllBoolean full)165 static void computeTraversalInfoParsimony(nodeptr p, int *ti, int *counter, int maxTips, pllBoolean full)
166 {
167 nodeptr
168 q = p->next->back,
169 r = p->next->next->back;
170
171 if(! p->xPars)
172 getxnodeLocal(p);
173
174 if(full)
175 {
176 if(q->number > maxTips)
177 computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
178
179 if(r->number > maxTips)
180 computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
181 }
182 else
183 {
184 if(q->number > maxTips && !q->xPars)
185 computeTraversalInfoParsimony(q, ti, counter, maxTips, full);
186
187 if(r->number > maxTips && !r->xPars)
188 computeTraversalInfoParsimony(r, ti, counter, maxTips, full);
189 }
190
191
192 ti[*counter] = p->number;
193 ti[*counter + 1] = q->number;
194 ti[*counter + 2] = r->number;
195 *counter = *counter + 4;
196 }
197
198 /* check whether site contains at least 2 different letters, i.e.
199 whether it will generate a score */
isInformative(pllInstance * tr,int dataType,int site)200 static pllBoolean isInformative(pllInstance *tr, int dataType, int site)
201 {
202 int
203 informativeCounter = 0,
204 check[256],
205 j,
206 undetermined = getUndetermined(dataType);
207
208 const unsigned int
209 *bitVector = getBitVector(dataType);
210
211 unsigned char
212 nucleotide;
213
214
215 for(j = 0; j < 256; j++)
216 check[j] = 0;
217
218 for(j = 1; j <= tr->mxtips; j++)
219 {
220 nucleotide = tr->yVector[j][site];
221 check[nucleotide] = 1;
222 assert(bitVector[nucleotide] > 0);
223 }
224
225 for(j = 0; j < undetermined; j++)
226 {
227 if(check[j] > 0)
228 informativeCounter++;
229 }
230
231 if(informativeCounter > 1)
232 return PLL_TRUE;
233
234 return PLL_FALSE;
235 }
236
compressDNA(pllInstance * tr,partitionList * pr,int * informative,int perSiteScores)237 static void compressDNA(pllInstance *tr, partitionList *pr, int *informative, int perSiteScores)
238 {
239 size_t
240 totalNodes,
241 i,
242 model;
243
244 totalNodes = 2 * (size_t)tr->mxtips;
245
246
247
248 for(model = 0; model < (size_t) pr->numberOfPartitions; model++)
249 {
250 size_t
251 k,
252 states = (size_t)pr->partitionData[model]->states,
253 compressedEntries,
254 compressedEntriesPadded,
255 entries = 0,
256 lower = pr->partitionData[model]->lower,
257 upper = pr->partitionData[model]->upper;
258
259 parsimonyNumber
260 **compressedTips = (parsimonyNumber **)rax_malloc(states * sizeof(parsimonyNumber*)),
261 *compressedValues = (parsimonyNumber *)rax_malloc(states * sizeof(parsimonyNumber));
262
263 for(i = lower; i < upper; i++)
264 if(informative[i])
265 entries += (size_t)tr->aliaswgt[i];
266
267 compressedEntries = entries / PLL_PCF;
268
269 if(entries % PLL_PCF != 0)
270 compressedEntries++;
271
272 #if (defined(__SSE3) || defined(__AVX))
273 if(compressedEntries % INTS_PER_VECTOR != 0)
274 compressedEntriesPadded = compressedEntries + (INTS_PER_VECTOR - (compressedEntries % INTS_PER_VECTOR));
275 else
276 compressedEntriesPadded = compressedEntries;
277 #else
278 compressedEntriesPadded = compressedEntries;
279 #endif
280
281
282 rax_posix_memalign ((void **) &(pr->partitionData[model]->parsVect), PLL_BYTE_ALIGNMENT, (size_t)compressedEntriesPadded * states * totalNodes * sizeof(parsimonyNumber));
283 if (perSiteScores)
284 {
285 rax_posix_memalign ((void **) &(pr->partitionData[model]->perSiteParsScores), PLL_BYTE_ALIGNMENT, (size_t)pr->partitionData[model]->width* sizeof (parsimonyNumber));
286 for (i = 0; i < (size_t)pr->partitionData[model]->width; ++i) pr->partitionData[model]->perSiteParsScores[i] = 0;
287 }
288
289
290 for(i = 0; i < compressedEntriesPadded * states * totalNodes; i++)
291 pr->partitionData[model]->parsVect[i] = 0;
292
293 for(i = 0; i < (size_t)tr->mxtips; i++)
294 {
295 size_t
296 w = 0,
297 compressedIndex = 0,
298 compressedCounter = 0,
299 index = 0;
300
301 for(k = 0; k < states; k++)
302 {
303 compressedTips[k] = &(pr->partitionData[model]->parsVect[(compressedEntriesPadded * states * (i + 1)) + (compressedEntriesPadded * k)]);
304 compressedValues[k] = 0;
305 }
306
307 for(index = lower; index < (size_t)upper; index++)
308 {
309 if(informative[index])
310 {
311 const unsigned int
312 *bitValue = getBitVector(pr->partitionData[model]->dataType);
313
314 parsimonyNumber
315 value = bitValue[tr->yVector[i + 1][index]];
316
317 for(w = 0; w < (size_t)tr->aliaswgt[index]; w++)
318 {
319 for(k = 0; k < states; k++)
320 {
321 if(value & mask32[k])
322 compressedValues[k] |= mask32[compressedCounter];
323 }
324
325 compressedCounter++;
326
327 if(compressedCounter == PLL_PCF)
328 {
329 for(k = 0; k < states; k++)
330 {
331 compressedTips[k][compressedIndex] = compressedValues[k];
332 compressedValues[k] = 0;
333 }
334
335 compressedCounter = 0;
336 compressedIndex++;
337 }
338 }
339 }
340 }
341
342 for(;compressedIndex < compressedEntriesPadded; compressedIndex++)
343 {
344 for(;compressedCounter < PLL_PCF; compressedCounter++)
345 for(k = 0; k < states; k++)
346 compressedValues[k] |= mask32[compressedCounter];
347
348 for(k = 0; k < states; k++)
349 {
350 compressedTips[k][compressedIndex] = compressedValues[k];
351 compressedValues[k] = 0;
352 }
353
354 compressedCounter = 0;
355 }
356 }
357
358 pr->partitionData[model]->parsimonyLength = compressedEntriesPadded;
359
360 rax_free(compressedTips);
361 rax_free(compressedValues);
362 }
363
364 rax_posix_memalign ((void **) &(tr->parsimonyScore), PLL_BYTE_ALIGNMENT, sizeof(unsigned int) * totalNodes);
365
366 for(i = 0; i < totalNodes; i++)
367 tr->parsimonyScore[i] = 0;
368 }
369
determineUninformativeSites(pllInstance * tr,partitionList * pr,int * informative)370 static void determineUninformativeSites(pllInstance *tr, partitionList *pr, int *informative)
371 {
372 int
373 model,
374 number = 0,
375 i;
376
377 /*
378 Not all characters are useful in constructing a parsimony tree.
379 Invariant characters, those that have the same state in all taxa,
380 are obviously useless and are ignored by the method. Characters in
381 which a state occurs in only one taxon are also ignored.
382 All these characters are called parsimony uninformative.
383
384 Alternative definition: informative columns contain at least two types
385 of nucleotides, and each nucleotide must appear at least twice in each
386 column. Kind of a pain if we intend to check for this when using, e.g.,
387 amibiguous DNA encoding.
388 */
389
390
391 for(model = 0; model < pr->numberOfPartitions; model++)
392 {
393 for(i = pr->partitionData[model]->lower; i < pr->partitionData[model]->upper; i++)
394 {
395 if(isInformative(tr, pr->partitionData[model]->dataType, i))
396 informative[i] = 1;
397 else
398 {
399 informative[i] = 0;
400 number++;
401 }
402 }
403 }
404
405 /* printf("Uninformative Patterns: %d\n", number); */
406 }
407
pllInitParsimonyStructures(pllInstance * tr,partitionList * pr,pllBoolean perSiteScores)408 void pllInitParsimonyStructures(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
409 {
410 int
411 i,
412 *informative = (int *)rax_malloc(sizeof(int) * (size_t)tr->originalCrunchedLength);
413
414 for (i = 0; i < pr->numberOfPartitions; ++ i)
415 rax_free (pr->partitionData[i]->parsVect);
416
417 rax_free (tr->parsimonyScore);
418
419 determineUninformativeSites(tr, pr, informative);
420
421 compressDNA(tr, pr, informative, perSiteScores);
422
423 for(i = tr->mxtips + 1; i <= tr->mxtips + tr->mxtips - 1; i++)
424 {
425 nodeptr
426 p = tr->nodep[i];
427
428 p->xPars = 1;
429 p->next->xPars = 0;
430 p->next->next->xPars = 0;
431 }
432
433 tr->ti = (int*)rax_malloc(sizeof(int) * 4 * (size_t)tr->mxtips);
434
435 rax_free(informative);
436 }
437
newviewParsimonyIterativeFast(pllInstance * tr,partitionList * pr,pllBoolean perSiteScores)438 static void newviewParsimonyIterativeFast(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
439 {
440 INT_TYPE
441 allOne = SET_ALL_BITS_ONE;
442
443 int
444 model,
445 *ti = tr->ti,
446 count = ti[0],
447 index;
448
449 for(index = 4; index < count; index += 4)
450 {
451 unsigned int
452 totalScore = 0;
453
454 size_t
455 pNumber = (size_t)ti[index],
456 qNumber = (size_t)ti[index + 1],
457 rNumber = (size_t)ti[index + 2];
458
459 for(model = 0; model < pr->numberOfPartitions; model++)
460 {
461 size_t
462 k,
463 states = pr->partitionData[model]->states,
464 width = pr->partitionData[model]->parsimonyLength;
465
466 unsigned int
467 i;
468
469 switch(states)
470 {
471 case 2:
472 {
473 parsimonyNumber
474 *left[2],
475 *right[2],
476 *this[2];
477
478 for(k = 0; k < 2; k++)
479 {
480 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
481 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * rNumber) + width * k]);
482 this[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
483 }
484
485 for(i = 0; i < width; i += INTS_PER_VECTOR)
486 {
487 INT_TYPE
488 s_r, s_l, v_N,
489 l_A, l_C,
490 v_A, v_C;
491
492 s_l = VECTOR_LOAD((CAST)(&left[0][i]));
493 s_r = VECTOR_LOAD((CAST)(&right[0][i]));
494 l_A = VECTOR_BIT_AND(s_l, s_r);
495 v_A = VECTOR_BIT_OR(s_l, s_r);
496
497 s_l = VECTOR_LOAD((CAST)(&left[1][i]));
498 s_r = VECTOR_LOAD((CAST)(&right[1][i]));
499 l_C = VECTOR_BIT_AND(s_l, s_r);
500 v_C = VECTOR_BIT_OR(s_l, s_r);
501
502 v_N = VECTOR_BIT_OR(l_A, l_C);
503
504 VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
505 VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
506
507 v_N = VECTOR_AND_NOT(v_N, allOne);
508
509 totalScore += vectorPopcount(v_N);
510 if (perSiteScores)
511 storePerSiteScores (pr, model, v_N, i);
512 }
513 }
514 break;
515 case 4:
516 {
517 parsimonyNumber
518 *left[4],
519 *right[4],
520 *this[4];
521
522 for(k = 0; k < 4; k++)
523 {
524 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
525 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * rNumber) + width * k]);
526 this[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
527 }
528 for(i = 0; i < width; i += INTS_PER_VECTOR)
529 {
530 INT_TYPE
531 s_r, s_l, v_N,
532 l_A, l_C, l_G, l_T,
533 v_A, v_C, v_G, v_T;
534
535 s_l = VECTOR_LOAD((CAST)(&left[0][i]));
536 s_r = VECTOR_LOAD((CAST)(&right[0][i]));
537 l_A = VECTOR_BIT_AND(s_l, s_r);
538 v_A = VECTOR_BIT_OR(s_l, s_r);
539
540 s_l = VECTOR_LOAD((CAST)(&left[1][i]));
541 s_r = VECTOR_LOAD((CAST)(&right[1][i]));
542 l_C = VECTOR_BIT_AND(s_l, s_r);
543 v_C = VECTOR_BIT_OR(s_l, s_r);
544
545 s_l = VECTOR_LOAD((CAST)(&left[2][i]));
546 s_r = VECTOR_LOAD((CAST)(&right[2][i]));
547 l_G = VECTOR_BIT_AND(s_l, s_r);
548 v_G = VECTOR_BIT_OR(s_l, s_r);
549
550 s_l = VECTOR_LOAD((CAST)(&left[3][i]));
551 s_r = VECTOR_LOAD((CAST)(&right[3][i]));
552 l_T = VECTOR_BIT_AND(s_l, s_r);
553 v_T = VECTOR_BIT_OR(s_l, s_r);
554
555 v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
556
557 VECTOR_STORE((CAST)(&this[0][i]), VECTOR_BIT_OR(l_A, VECTOR_AND_NOT(v_N, v_A)));
558 VECTOR_STORE((CAST)(&this[1][i]), VECTOR_BIT_OR(l_C, VECTOR_AND_NOT(v_N, v_C)));
559 VECTOR_STORE((CAST)(&this[2][i]), VECTOR_BIT_OR(l_G, VECTOR_AND_NOT(v_N, v_G)));
560 VECTOR_STORE((CAST)(&this[3][i]), VECTOR_BIT_OR(l_T, VECTOR_AND_NOT(v_N, v_T)));
561
562 v_N = VECTOR_AND_NOT(v_N, allOne);
563
564 totalScore += vectorPopcount(v_N);
565
566 if (perSiteScores)
567 storePerSiteScores (pr, model, v_N, i);
568 }
569 }
570 break;
571 case 20:
572 {
573 parsimonyNumber
574 *left[20],
575 *right[20],
576 *this[20];
577
578 for(k = 0; k < 20; k++)
579 {
580 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
581 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * rNumber) + width * k]);
582 this[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
583 }
584
585 for(i = 0; i < width; i += INTS_PER_VECTOR)
586 {
587 size_t j;
588
589 INT_TYPE
590 s_r, s_l,
591 v_N = SET_ALL_BITS_ZERO,
592 l_A[20],
593 v_A[20];
594
595 for(j = 0; j < 20; j++)
596 {
597 s_l = VECTOR_LOAD((CAST)(&left[j][i]));
598 s_r = VECTOR_LOAD((CAST)(&right[j][i]));
599 l_A[j] = VECTOR_BIT_AND(s_l, s_r);
600 v_A[j] = VECTOR_BIT_OR(s_l, s_r);
601
602 v_N = VECTOR_BIT_OR(v_N, l_A[j]);
603 }
604
605 for(j = 0; j < 20; j++)
606 VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
607
608 v_N = VECTOR_AND_NOT(v_N, allOne);
609
610 totalScore += vectorPopcount(v_N);
611
612 if (perSiteScores)
613 storePerSiteScores (pr, model, v_N, i);
614 }
615 }
616 break;
617 default:
618 {
619 parsimonyNumber
620 *left[32],
621 *right[32],
622 *this[32];
623
624 assert(states <= 32);
625
626 for(k = 0; k < states; k++)
627 {
628 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
629 right[k] = &(pr->partitionData[model]->parsVect[(width * states * rNumber) + width * k]);
630 this[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
631 }
632
633 for(i = 0; i < width; i += INTS_PER_VECTOR)
634 {
635 size_t j;
636
637 INT_TYPE
638 s_r, s_l,
639 v_N = SET_ALL_BITS_ZERO,
640 l_A[32],
641 v_A[32];
642
643 for(j = 0; j < states; j++)
644 {
645 s_l = VECTOR_LOAD((CAST)(&left[j][i]));
646 s_r = VECTOR_LOAD((CAST)(&right[j][i]));
647 l_A[j] = VECTOR_BIT_AND(s_l, s_r);
648 v_A[j] = VECTOR_BIT_OR(s_l, s_r);
649
650 v_N = VECTOR_BIT_OR(v_N, l_A[j]);
651 }
652
653 for(j = 0; j < states; j++)
654 VECTOR_STORE((CAST)(&this[j][i]), VECTOR_BIT_OR(l_A[j], VECTOR_AND_NOT(v_N, v_A[j])));
655
656 v_N = VECTOR_AND_NOT(v_N, allOne);
657
658 totalScore += vectorPopcount(v_N);
659
660 if (perSiteScores)
661 storePerSiteScores (pr, model, v_N, i);
662 }
663 }
664 }
665 }
666 tr->parsimonyScore[pNumber] = totalScore + tr->parsimonyScore[rNumber] + tr->parsimonyScore[qNumber];
667 }
668 }
669
evaluateParsimonyIterativeFast(pllInstance * tr,partitionList * pr,pllBoolean perSiteScores)670 static unsigned int evaluateParsimonyIterativeFast(pllInstance *tr, partitionList *pr, pllBoolean perSiteScores)
671 {
672 INT_TYPE
673 allOne = SET_ALL_BITS_ONE;
674
675 size_t
676 pNumber = (size_t)tr->ti[1],
677 qNumber = (size_t)tr->ti[2];
678
679 int
680 model;
681
682 unsigned int
683 bestScore = tr->bestParsimony,
684 sum;
685
686 if(tr->ti[0] > 4)
687 newviewParsimonyIterativeFast(tr, pr, perSiteScores);
688
689 sum = tr->parsimonyScore[pNumber] + tr->parsimonyScore[qNumber];
690
691 for(model = 0; model < pr->numberOfPartitions; model++)
692 {
693 size_t
694 k,
695 states = pr->partitionData[model]->states,
696 width = pr->partitionData[model]->parsimonyLength,
697 i;
698
699 switch(states)
700 {
701 case 2:
702 {
703 parsimonyNumber
704 *left[2],
705 *right[2];
706
707 for(k = 0; k < 2; k++)
708 {
709 left[k] = &(pr->partitionData[model]->parsVect[(width * 2 * qNumber) + width * k]);
710 right[k] = &(pr->partitionData[model]->parsVect[(width * 2 * pNumber) + width * k]);
711 }
712
713 for(i = 0; i < width; i += INTS_PER_VECTOR)
714 {
715 INT_TYPE
716 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
717 l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
718 v_N = VECTOR_BIT_OR(l_A, l_C);
719
720 v_N = VECTOR_AND_NOT(v_N, allOne);
721
722 sum += vectorPopcount(v_N);
723 if (perSiteScores)
724 storePerSiteScores (pr, model, v_N, i);
725 }
726 }
727 break;
728 case 4:
729 {
730 parsimonyNumber
731 *left[4],
732 *right[4];
733
734 for(k = 0; k < 4; k++)
735 {
736 left[k] = &(pr->partitionData[model]->parsVect[(width * 4 * qNumber) + width * k]);
737 right[k] = &(pr->partitionData[model]->parsVect[(width * 4 * pNumber) + width * k]);
738 }
739
740 for(i = 0; i < width; i += INTS_PER_VECTOR)
741 {
742 INT_TYPE
743 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[0][i])), VECTOR_LOAD((CAST)(&right[0][i]))),
744 l_C = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[1][i])), VECTOR_LOAD((CAST)(&right[1][i]))),
745 l_G = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[2][i])), VECTOR_LOAD((CAST)(&right[2][i]))),
746 l_T = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[3][i])), VECTOR_LOAD((CAST)(&right[3][i]))),
747 v_N = VECTOR_BIT_OR(VECTOR_BIT_OR(l_A, l_C), VECTOR_BIT_OR(l_G, l_T));
748
749 v_N = VECTOR_AND_NOT(v_N, allOne);
750
751 sum += vectorPopcount(v_N);
752 if (perSiteScores)
753 storePerSiteScores (pr, model, v_N, i);
754 }
755 }
756 break;
757 case 20:
758 {
759 parsimonyNumber
760 *left[20],
761 *right[20];
762
763 for(k = 0; k < 20; k++)
764 {
765 left[k] = &(pr->partitionData[model]->parsVect[(width * 20 * qNumber) + width * k]);
766 right[k] = &(pr->partitionData[model]->parsVect[(width * 20 * pNumber) + width * k]);
767 }
768
769 for(i = 0; i < width; i += INTS_PER_VECTOR)
770 {
771 int
772 j;
773
774 INT_TYPE
775 l_A,
776 v_N = SET_ALL_BITS_ZERO;
777
778 for(j = 0; j < 20; j++)
779 {
780 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
781 v_N = VECTOR_BIT_OR(l_A, v_N);
782 }
783
784 v_N = VECTOR_AND_NOT(v_N, allOne);
785
786 sum += vectorPopcount(v_N);
787 if (perSiteScores)
788 storePerSiteScores (pr, model, v_N, i);
789 }
790 }
791 break;
792 default:
793 {
794 parsimonyNumber
795 *left[32],
796 *right[32];
797
798 assert(states <= 32);
799
800 for(k = 0; k < states; k++)
801 {
802 left[k] = &(pr->partitionData[model]->parsVect[(width * states * qNumber) + width * k]);
803 right[k] = &(pr->partitionData[model]->parsVect[(width * states * pNumber) + width * k]);
804 }
805
806 for(i = 0; i < width; i += INTS_PER_VECTOR)
807 {
808 size_t
809 j;
810
811 INT_TYPE
812 l_A,
813 v_N = SET_ALL_BITS_ZERO;
814
815 for(j = 0; j < states; j++)
816 {
817 l_A = VECTOR_BIT_AND(VECTOR_LOAD((CAST)(&left[j][i])), VECTOR_LOAD((CAST)(&right[j][i])));
818 v_N = VECTOR_BIT_OR(l_A, v_N);
819 }
820
821 v_N = VECTOR_AND_NOT(v_N, allOne);
822
823 sum += vectorPopcount(v_N);
824 if (perSiteScores)
825 storePerSiteScores (pr, model, v_N, i);
826 }
827 }
828 }
829 }
830
831 return sum;
832 }
833
pllEvaluateParsimony(pllInstance * tr,partitionList * pr,nodeptr p,pllBoolean full,pllBoolean perSiteScores)834 unsigned int pllEvaluateParsimony(pllInstance *tr, partitionList *pr, nodeptr p, pllBoolean full, pllBoolean perSiteScores)
835 {
836 volatile unsigned int result;
837 nodeptr q = p->back;
838 int
839 *ti = tr->ti,
840 counter = 4;
841
842 ti[1] = p->number;
843 ti[2] = q->number;
844
845 if(full)
846 {
847 if(p->number > tr->mxtips)
848 computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
849 if(q->number > tr->mxtips)
850 computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
851 }
852 else
853 {
854 if(p->number > tr->mxtips && !p->xPars)
855 computeTraversalInfoParsimony(p, ti, &counter, tr->mxtips, full);
856 if(q->number > tr->mxtips && !q->xPars)
857 computeTraversalInfoParsimony(q, ti, &counter, tr->mxtips, full);
858 }
859
860 ti[0] = counter;
861
862 result = evaluateParsimonyIterativeFast(tr, pr, perSiteScores);
863
864 return result;
865 }
866