1 /*
2 * MrBayes 3
3 *
4 * (c) 2002-2013
5 *
6 * John P. Huelsenbeck
7 * Dept. Integrative Biology
8 * University of California, Berkeley
9 * Berkeley, CA 94720-3140
10 * johnh@berkeley.edu
11 *
12 * Fredrik Ronquist
13 * Swedish Museum of Natural History
14 * Box 50007
15 * SE-10405 Stockholm, SWEDEN
16 * fredrik.ronquist@nrm.se
17 *
18 * With important contributions by
19 *
20 * Paul van der Mark (paulvdm@sc.fsu.edu)
21 * Maxim Teslenko (maxkth@gmail.com)
22 * Chi Zhang (zhangchicool@gmail.com)
23 *
24 * and by many users (run 'acknowledgments' to see more info)
25 *
26 * This program is free software; you can redistribute it and/or
27 * modify it under the terms of the GNU General Public License
28 * as published by the Free Software Foundation; either version 2
29 * of the License, or (at your option) any later version.
30 *
31 * This program is distributed in the hope that it will be useful,
32 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 * GNU General Public License for more details (www.gnu.org).
35 *
36 */
37
38 #include "bayes.h"
39 #include "likelihood.h"
40 #include "mbbeagle.h"
41 #include "model.h"
42 #include "utils.h"
43
44 #define LIKE_EPSILON 1.0e-300
45
46 /* global variables declared here */
47 CLFlt *preLikeL; /* precalculated cond likes for left descendant */
48 CLFlt *preLikeR; /* precalculated cond likes for right descendant*/
49 CLFlt *preLikeA; /* precalculated cond likes for ancestor */
50
51 /* global variables used here but declared elsewhere */
52 extern int *chainId;
53 extern int numLocalChains;
54 extern int rateProbRowSize; /* size of rate probs for one chain one state */
55 extern MrBFlt **rateProbs; /* pointers to rate probs used by adgamma model */
56
57 /* local prototypes */
58 void CopySiteScalers (ModelInfo *m, int chain);
59 void FlipCondLikeSpace (ModelInfo *m, int chain, int nodeIndex);
60 void FlipCijkSpace (ModelInfo *m, int chain);
61 void FlipNodeScalerSpace (ModelInfo *m, int chain, int nodeIndex);
62 void FlipSiteScalerSpace (ModelInfo *m, int chain);
63 void FlipTiProbsSpace (ModelInfo *m, int chain, int nodeIndex);
64 MrBFlt GetRate (int division, int chain);
65 int RemoveNodeScalers(TreeNode *p, int division, int chain);
66 #if defined (SSE_ENABLED)
67 int RemoveNodeScalers_SSE(TreeNode *p, int division, int chain);
68 #endif
69 #if defined (AVX_ENABLED)
70 int RemoveNodeScalers_AVX(TreeNode *p, int division, int chain);
71 #endif
72 void ResetSiteScalers (ModelInfo *m, int chain);
73 int SetBinaryQMatrix (MrBFlt **a, int whichChain, int division);
74 int SetNucQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult, MrBFlt *rA, MrBFlt *rS);
75 int SetStdQMatrix (MrBFlt **a, int nStates, MrBFlt *bs, int cType);
76 int SetProteinQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult);
77 int UpDateCijk (int whichPart, int whichChain);
78
79
80 #if !defined (SSE_ENABLED) || 1
81 /*----------------------------------------------------------------
82 |
83 | CondLikeDown_Bin: binary model with or without rate
84 | variation
85 |
86 -----------------------------------------------------------------*/
CondLikeDown_Bin(TreeNode * p,int division,int chain)87 int CondLikeDown_Bin (TreeNode *p, int division, int chain)
88 {
89 int c, k;
90 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
91 ModelInfo *m;
92
93 /* find model settings for this division */
94 m = &modelSettings[division];
95
96 /* Flip conditional likelihood space */
97 FlipCondLikeSpace (m, chain, p->index);
98
99 /* find conditional likelihood pointers */
100 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
101 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
102 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
103
104 /* find transition probabilities */
105 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
106 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
107
108 tiPL = pL;
109 tiPR = pR;
110 for (k=0; k<m->numRateCats; k++)
111 {
112 for (c=0; c<m->numChars; c++)
113 {
114 *(clP++) = (tiPL[0]*clL[0] + tiPL[1]*clL[1])
115 *(tiPR[0]*clR[0] + tiPR[1]*clR[1]);
116 *(clP++) = (tiPL[2]*clL[0] + tiPL[3]*clL[1])
117 *(tiPR[2]*clR[0] + tiPR[3]*clR[1]);
118
119 clL += 2;
120 clR += 2;
121 }
122 tiPL += 4;
123 tiPR += 4;
124 }
125
126 return NO_ERROR;
127
128 }
129 #endif
130
131
132 #if defined (SSE_ENABLED)
133 /*----------------------------------------------------------------
134 |
135 | CondLikeDown_Bin_SSE: binary model with or without rate
136 | variation
137 |
138 -----------------------------------------------------------------*/
CondLikeDown_Bin_SSE(TreeNode * p,int division,int chain)139 int CondLikeDown_Bin_SSE (TreeNode *p, int division, int chain)
140 {
141 int c, k;
142 CLFlt *pL, *pR, *tiPL, *tiPR;
143 __m128 *clL, *clR, *clP;
144 __m128 m1, m2, m3, m4, m5, m6;
145 ModelInfo *m;
146
147 m = &modelSettings[division];
148
149 /* flip state of node so that we are not overwriting old cond likes */
150 FlipCondLikeSpace (m, chain, p->index);
151
152 /* find conditional likelihood pointers */
153 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
154 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
155 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
156
157 /* find transition probabilities */
158 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
159 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
160
161 tiPL = pL;
162 tiPR = pR;
163 for (k=0; k<m->numRateCats; k++)
164 {
165 for (c=0; c<m->numVecChars; c++)
166 {
167 m1 = _mm_load1_ps (&tiPL[0]);
168 m2 = _mm_load1_ps (&tiPR[0]);
169 m5 = _mm_mul_ps (m1, clL[0]);
170 m6 = _mm_mul_ps (m2, clR[0]);
171
172 m1 = _mm_load1_ps (&tiPL[1]);
173 m2 = _mm_load1_ps (&tiPR[1]);
174 m3 = _mm_mul_ps (m1, clL[1]);
175 m4 = _mm_mul_ps (m2, clR[1]);
176
177 m5 = _mm_add_ps (m3, m5);
178 m6 = _mm_add_ps (m4, m6);
179
180 *clP++ = _mm_mul_ps (m5, m6);
181
182 m1 = _mm_load1_ps (&tiPL[2]);
183 m2 = _mm_load1_ps (&tiPR[2]);
184 m5 = _mm_mul_ps (m1, clL[0]);
185 m6 = _mm_mul_ps (m2, clR[0]);
186
187 m1 = _mm_load1_ps (&tiPL[3]);
188 m2 = _mm_load1_ps (&tiPR[3]);
189 m3 = _mm_mul_ps (m1, clL[1]);
190 m4 = _mm_mul_ps (m2, clR[1]);
191
192 m5 = _mm_add_ps (m3, m5);
193 m6 = _mm_add_ps (m4, m6);
194
195 *clP++ = _mm_mul_ps (m5, m6);
196 clL += 2;
197 clR += 2;
198 }
199 tiPL += 4;
200 tiPR += 4;
201 }
202
203 return NO_ERROR;
204 }
205 #endif
206
207
208 /*----------------------------------------------------------------
209 |
210 | CondLikeDown_Gen: general n-state model with or without rate
211 | variation
212 |
213 -----------------------------------------------------------------*/
CondLikeDown_Gen(TreeNode * p,int division,int chain)214 int CondLikeDown_Gen (TreeNode *p, int division, int chain)
215 {
216 int a, b, c, h, i, k, j, shortCut, *lState=NULL, *rState=NULL,
217 nObsStates, nStates, nStatesSquared, preLikeJump;
218 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
219 ModelInfo *m;
220 # if !defined (DEBUG_NOSHORTCUTS)
221 int catStart;
222 # endif
223
224 /* find model settings for this division and nStates, nStatesSquared */
225 m = &modelSettings[division];
226 nObsStates = m->numStates;
227 nStates = m->numModelStates;
228 nStatesSquared = nStates * nStates;
229 preLikeJump = nObsStates * nStates;
230
231 /* flip conditional likelihood space */
232 FlipCondLikeSpace (m, chain, p->index);
233
234 /* find conditional likelihood pointers */
235 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
236 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
237 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
238
239 /* find transition probabilities */
240 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
241 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
242
243 /* find likelihoods of site patterns for left branch if terminal */
244 shortCut = 0;
245 # if !defined (DEBUG_NOSHORTCUTS)
246 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
247 {
248 shortCut |= 1;
249 lState = m->termState[p->left->index];
250 tiPL = pL;
251 for (k=a=0; k<m->numRateCats; k++)
252 {
253 catStart = a;
254 for (i=0; i<nObsStates; i++)
255 for (j=i; j<nStatesSquared; j+=nStates)
256 preLikeL[a++] = tiPL[j];
257 for (b=1; b<nStates/nObsStates; b++)
258 {
259 a = catStart;
260 for (i=0; i<nObsStates; i++)
261 {
262 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
263 preLikeL[a++] += tiPL[j];
264 }
265 }
266 /* for ambiguous */
267 for (i=0; i<nStates; i++)
268 preLikeL[a++] = 1.0;
269 tiPL += nStatesSquared;
270 }
271 }
272
273 /* find likelihoods of site patterns for right branch if terminal */
274 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
275 {
276 shortCut |= 2;
277 rState = m->termState[p->right->index];
278 tiPR = pR;
279 for (k=a=0; k<m->numRateCats; k++)
280 {
281 catStart = a;
282 for (i=0; i<nObsStates; i++)
283 for (j=i; j<nStatesSquared; j+=nStates)
284 preLikeR[a++] = tiPR[j];
285 for (b=1; b<nStates/nObsStates; b++)
286 {
287 a = catStart;
288 for (i=0; i<nObsStates; i++)
289 {
290 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
291 preLikeR[a++] += tiPR[j];
292 }
293 }
294 /* for ambiguous */
295 for (i=0; i<nStates; i++)
296 preLikeR[a++] = 1.0;
297 tiPR += nStatesSquared;
298 }
299 }
300 # endif
301 switch (shortCut)
302 {
303 case 0:
304 tiPL = pL;
305 tiPR = pR;
306 for (k=0; k<m->numRateCats; k++)
307 {
308 for (c=0; c<m->numChars; c++)
309 {
310 for (i=h=0; i<nStates; i++)
311 {
312 likeL = likeR = 0.0;
313 for (j=0; j<nStates; j++)
314 {
315 likeL += tiPL[h]*clL[j];
316 likeR += tiPR[h++]*clR[j];
317 }
318 *(clP++) = likeL * likeR;
319 }
320 clL += nStates;
321 clR += nStates;
322 }
323 tiPL += nStatesSquared;
324 tiPR += nStatesSquared;
325 }
326 break;
327 case 1:
328 tiPR = pR;
329 for (k=0; k<m->numRateCats; k++)
330 {
331 for (c=0; c<m->numChars; c++)
332 {
333 a = lState[c] + k*(preLikeJump+nStates);
334 for (i=h=0; i<nStates; i++)
335 {
336 likeR = 0.0;
337 for (j=0; j<nStates; j++)
338 {
339 likeR += tiPR[h++]*clR[j];
340 }
341 *(clP++) = preLikeL[a++] * likeR;
342 }
343 clR += nStates;
344 }
345 tiPR += nStatesSquared;
346 }
347 break;
348 case 2:
349 tiPL = pL;
350 for (k=0; k<m->numRateCats; k++)
351 {
352 for (c=0; c<m->numChars; c++)
353 {
354 a = rState[c] + k*(preLikeJump+nStates);
355 for (i=h=0; i<nStates; i++)
356 {
357 likeL = 0.0;
358 for (j=0; j<nStates; j++)
359 {
360 likeL += tiPL[h++]*clL[j];
361 }
362 *(clP++) = preLikeR[a++] * likeL;
363 }
364 clL += nStates;
365 }
366 tiPL += nStatesSquared;
367 }
368 break;
369 case 3:
370 for (k=0; k<m->numRateCats; k++)
371 {
372 for (c=0; c<m->numChars; c++)
373 {
374 a = rState[c] + k*(preLikeJump+nStates);
375 b = lState[c] + k*(preLikeJump+nStates);
376 for (i=0; i<nStates; i++)
377 {
378 *(clP++) = preLikeR[a++] * preLikeL[b++];
379 }
380 }
381 }
382 break;
383 }
384
385 return NO_ERROR;
386 }
387
388
389 #if defined (SSE_ENABLED)
390 /*----------------------------------------------------------------
391 |
392 | CondLikeDown_Gen_SSE: general n-state model with or without rate
393 | variation
394 |
395 -----------------------------------------------------------------*/
CondLikeDown_Gen_SSE(TreeNode * p,int division,int chain)396 int CondLikeDown_Gen_SSE (TreeNode *p, int division, int chain)
397 {
398 int c, c1, h, i, j, k, t, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared, nObsStates, preLikeJump;
399 CLFlt *pL, *pR, *tiPL, *tiPR;
400 __m128 *clL, *clR, *clP;
401 __m128 mTiPL, mTiPR, mL, mR, mAcumL, mAcumR;
402 ModelInfo *m;
403 CLFlt *preLikeRV[4] = {0};
404 CLFlt *preLikeLV[4] = {0};
405
406 # if !defined (DEBUG_NOSHORTCUTS)
407 int a, b, catStart;
408 # endif
409
410 /* find model settings for this division and nStates, nStatesSquared */
411 m = &modelSettings[division];
412 nObsStates = m->numStates;
413 nStates = m->numModelStates;
414 nStatesSquared = nStates * nStates;
415 preLikeJump = nObsStates * nStates;
416
417 /* Flip conditional likelihood space */
418 FlipCondLikeSpace (m, chain, p->index);
419
420 /* find conditional likelihood pointers */
421 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
422 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
423 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
424
425 /* find transition probabilities */
426 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
427 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
428
429 /* find likelihoods of site patterns for left branch if terminal */
430 shortCut = 0;
431 # if !defined (DEBUG_NOSHORTCUTS)
432 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
433 {
434 shortCut |= 1;
435 lState = m->termState[p->left->index];
436 tiPL = pL;
437 for (k=a=0; k<m->numRateCats; k++)
438 {
439 catStart = a;
440 for (i=0; i<nObsStates; i++)
441 for (j=i; j<nStatesSquared; j+=nStates)
442 preLikeL[a++] = tiPL[j];
443 for (b=1; b<nStates/nObsStates; b++)
444 {
445 a = catStart;
446 for (i=0; i<nObsStates; i++)
447 {
448 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
449 preLikeL[a++] += tiPL[j];
450 }
451 }
452 /* for ambiguous */
453 for (i=0; i<nStates; i++)
454 preLikeL[a++] = 1.0;
455 tiPL += nStatesSquared;
456 }
457 }
458
459 /* find likelihoods of site patterns for right branch if terminal */
460 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
461 {
462 shortCut |= 2;
463 rState = m->termState[p->right->index];
464 tiPR = pR;
465 for (k=a=0; k<m->numRateCats; k++)
466 {
467 catStart = a;
468 for (i=0; i<nObsStates; i++)
469 for (j=i; j<nStatesSquared; j+=nStates)
470 preLikeR[a++] = tiPR[j];
471 for (b=1; b<nStates/nObsStates; b++)
472 {
473 a = catStart;
474 for (i=0; i<nObsStates; i++)
475 {
476 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
477 preLikeR[a++] += tiPR[j];
478 }
479 }
480 /* for ambiguous */
481 for (i=0; i<nStates; i++)
482 preLikeR[a++] = 1.0;
483 tiPR += nStatesSquared;
484 }
485 }
486 # endif
487
488 switch (shortCut)
489 {
490 case 0:
491 tiPL = pL;
492 tiPR = pR;
493 for (k=0; k<m->numRateCats; k++)
494 {
495 for (c=0; c<m->numVecChars; c++)
496 {
497 for (i=h=0; i<nStates; i++)
498 {
499 mAcumL = _mm_setzero_ps();
500 mAcumR = _mm_setzero_ps();
501 for (j=0; j<nStates; j++)
502 {
503 mTiPL = _mm_load1_ps (&tiPL[h]);
504 mTiPR = _mm_load1_ps (&tiPR[h++]);
505 mL = _mm_mul_ps (mTiPL, clL[j]);
506 mR = _mm_mul_ps (mTiPR, clR[j]);
507 mAcumL = _mm_add_ps (mL, mAcumL);
508 mAcumR = _mm_add_ps (mR, mAcumR);
509 }
510 *(clP++) = _mm_mul_ps (mAcumL, mAcumR);
511 }
512 clL += nStates;
513 clR += nStates;
514 }
515 tiPL += nStatesSquared;
516 tiPR += nStatesSquared;
517 }
518 break;
519 case 1:
520 tiPR = pR;
521 for (k=0; k<m->numRateCats; k++)
522 {
523 for (c=t=0; c<m->numVecChars; c++)
524 {
525 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
526 {
527 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
528 }
529 for (i=h=0; i<nStates; i++)
530 {
531 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
532 mAcumR = _mm_setzero_ps();
533 for (j=0; j<nStates; j++)
534 {
535 mTiPR = _mm_load1_ps (&tiPR[h++]);
536 mR = _mm_mul_ps (mTiPR, clR[j]);
537 mAcumR = _mm_add_ps (mR, mAcumR);
538 }
539 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
540 }
541 clR += nStates;
542 }
543 tiPR += nStatesSquared;
544 }
545 break;
546 case 2:
547 tiPL = pL;
548 for (k=0; k<m->numRateCats; k++)
549 {
550 for (c=t=0; c<m->numVecChars; c++)
551 {
552 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
553 {
554 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
555 }
556 for (i=h=0; i<nStates; i++)
557 {
558 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
559 mAcumL = _mm_setzero_ps();
560 for (j=0; j<nStates; j++)
561 {
562 mTiPL = _mm_load1_ps (&tiPL[h++]);
563 mL = _mm_mul_ps (mTiPL, clL[j]);
564 mAcumL = _mm_add_ps (mL, mAcumL);
565 }
566 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
567 }
568 clL += nStates;
569 }
570 tiPL += nStatesSquared;
571 }
572 break;
573 case 3:
574 for (k=0; k<m->numRateCats; k++)
575 {
576 for (c=t=0; c<m->numVecChars; c++)
577 {
578 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
579 {
580 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
581 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
582 }
583 for (i=0; i<nStates; i++)
584 {
585 assert (m->numFloatsPerVec == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
586 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
587 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
588 *(clP++) = _mm_mul_ps (mL,mR);
589 }
590 }
591 }
592 break;
593 }
594 return NO_ERROR;
595 }
596 #endif
597
598
599 /*----------------------------------------------------------------
600 |
601 | CondLikeDown_Gen_GibbsGamma: general n-state model with rate
602 | variation modeled using discrete gamma with Gibbs resampling
603 |
604 -----------------------------------------------------------------*/
CondLikeDown_Gen_GibbsGamma(TreeNode * p,int division,int chain)605 int CondLikeDown_Gen_GibbsGamma (TreeNode *p, int division, int chain)
606 {
607 int a, b, c, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL,
608 nObsStates, nStates, nStatesSquared, nGammaCats;
609 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
610 ModelInfo *m;
611 # if !defined (DEBUG_NOSHORTCUTS)
612 int k, catStart;
613 # endif
614
615 /* find model settings for this division and nStates, nStatesSquared */
616 m = &modelSettings[division];
617 nObsStates = m->numStates;
618 nStates = m->numModelStates;
619 nStatesSquared = nStates * nStates;
620
621 /* flip conditional likelihood space */
622 FlipCondLikeSpace (m, chain, p->index);
623
624 /* find conditional likelihood pointers */
625 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
626 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
627 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
628
629 /* find transition probabilities */
630 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
631 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
632
633 /* find rate category index and number of gamma categories */
634 rateCat = m->tiIndex + chain * m->numChars;
635 nGammaCats = m->numRateCats;
636
637 /* find likelihoods of site patterns for left branch if terminal */
638 shortCut = 0;
639 # if !defined (DEBUG_NOSHORTCUTS)
640 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
641 {
642 shortCut |= 1;
643 lState = m->termState[p->left->index];
644 tiPL = pL;
645 for (k=a=0; k<nGammaCats; k++)
646 {
647 catStart = a;
648 for (i=0; i<nObsStates; i++)
649 for (j=i; j<nStatesSquared; j+=nStates)
650 preLikeL[a++] = tiPL[j];
651 for (b=1; b<nStates/nObsStates; b++)
652 {
653 a = catStart;
654 for (i=0; i<nObsStates; i++)
655 {
656 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
657 preLikeL[a++] += tiPL[j];
658 }
659 }
660 /* for ambiguous */
661 for (i=0; i<nStates; i++)
662 preLikeL[a++] = 1.0;
663 tiPL += nStatesSquared;
664 }
665 }
666
667 /* find likelihoods of site patterns for right branch if terminal */
668 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
669 {
670 shortCut |= 2;
671 rState = m->termState[p->right->index];
672 tiPR = pR;
673 for (k=a=0; k<nGammaCats; k++)
674 {
675 catStart = a;
676 for (i=0; i<nObsStates; i++)
677 for (j=i; j<nStatesSquared; j+=nStates)
678 preLikeR[a++] = tiPR[j];
679 for (b=1; b<nStates/nObsStates; b++)
680 {
681 a = catStart;
682 for (i=0; i<nObsStates; i++)
683 {
684 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
685 preLikeR[a++] += tiPR[j];
686 }
687 }
688 /* for ambiguous */
689 for (i=0; i<nStates; i++)
690 preLikeR[a++] = 1.0;
691 tiPR += nStatesSquared;
692 }
693 }
694 # endif
695
696 switch (shortCut)
697 {
698 case 0:
699 for (c=0; c<m->numChars; c++)
700 {
701 r = (*rateCat++);
702 if (r < nGammaCats)
703 {
704 tiPL = pL + r*nStatesSquared;
705 tiPR = pR + r*nStatesSquared;
706 for (i=0; i<nStates; i++)
707 {
708 likeL = likeR = 0.0;
709 for (j=0; j<nStates; j++)
710 {
711 likeL += (*tiPL++) * clL[j];
712 likeR += (*tiPR++) * clR[j];
713 }
714 *(clP++) = likeL * likeR;
715 }
716 }
717 else
718 clP += nStates;
719 clL += nStates;
720 clR += nStates;
721 }
722 break;
723 case 1:
724 for (c=0; c<m->numChars; c++)
725 {
726 r = (*rateCat++);
727 if (r < nGammaCats)
728 {
729 tiPR = pR + r*nStatesSquared;
730 a = lState[c] + r*(nStatesSquared+nStates);
731 for (i=0; i<nStates; i++)
732 {
733 likeR = 0.0;
734 for (j=0; j<nStates; j++)
735 {
736 likeR += (*tiPR++)*clR[j];
737 }
738 *(clP++) = preLikeL[a++] * likeR;
739 }
740 }
741 else
742 clP += nStates;
743 clR += nStates;
744 }
745 break;
746 case 2:
747 for (c=0; c<m->numChars; c++)
748 {
749 r = (*rateCat++);
750 if (r < nGammaCats)
751 {
752 tiPL = pL + r*nStatesSquared;
753 a = rState[c] + r*(nStatesSquared+nStates);
754 for (i=0; i<nStates; i++)
755 {
756 likeL = 0.0;
757 for (j=0; j<nStates; j++)
758 {
759 likeL += (*tiPL++)*clL[j];
760 }
761 *(clP++) = preLikeR[a++] * likeL;
762 }
763 }
764 else
765 clP += nStates;
766 clL += nStates;
767 }
768 break;
769 case 3:
770 for (c=0; c<m->numChars; c++)
771 {
772 r = (*rateCat++);
773 if (r < nGammaCats)
774 {
775 a = lState[c] + r*(nStatesSquared+nStates);
776 b = rState[c] + r*(nStatesSquared+nStates);
777 for (i=0; i<nStates; i++)
778 *(clP++) = preLikeL[a++]*preLikeR[b++];
779 }
780 else
781 clP += nStates;
782 }
783 break;
784 }
785
786 return NO_ERROR;
787 }
788
789
790 /*----------------------------------------------------------------
791 |
792 | CondLikeDown_NUC4: 4by4 nucleotide model with or without rate
793 | variation
794 |
795 -----------------------------------------------------------------*/
CondLikeDown_NUC4(TreeNode * p,int division,int chain)796 int CondLikeDown_NUC4 (TreeNode *p, int division, int chain)
797 {
798 int c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL;
799 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
800 ModelInfo *m;
801
802 m = &modelSettings[division];
803
804 /* flip space so that we do not overwrite old cond likes */
805 FlipCondLikeSpace (m, chain, p->index);
806
807 /* find conditional likelihood pointers */
808 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
809 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
810 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
811
812 /* find transition probabilities */
813 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
814 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
815
816 /* find likelihoods of site patterns for left branch if terminal */
817 shortCut = 0;
818 # if !defined (DEBUG_NOSHORTCUTS)
819 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
820 {
821 shortCut |= 1;
822 lState = m->termState[p->left->index];
823 tiPL = pL;
824 for (k=j=0; k<m->numRateCats; k++)
825 {
826 for (i=0; i<4; i++)
827 {
828 preLikeL[j++] = tiPL[0];
829 preLikeL[j++] = tiPL[4];
830 preLikeL[j++] = tiPL[8];
831 preLikeL[j++] = tiPL[12];
832 tiPL++;
833 }
834 /* for ambiguous */
835 for (i=0; i<4; i++)
836 preLikeL[j++] = 1.0;
837 tiPL += 12;
838 }
839 }
840
841 /* find likelihoods of site patterns for right branch if terminal */
842 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
843 {
844 shortCut |= 2;
845 rState = m->termState[p->right->index];
846 tiPR = pR;
847 for (k=j=0; k<m->numRateCats; k++)
848 {
849 for (i=0; i<4; i++)
850 {
851 preLikeR[j++] = tiPR[0];
852 preLikeR[j++] = tiPR[4];
853 preLikeR[j++] = tiPR[8];
854 preLikeR[j++] = tiPR[12];
855 tiPR++;
856 }
857 /* for ambiguous */
858 for (i=0; i<4; i++)
859 preLikeR[j++] = 1.0;
860 tiPR += 12;
861 }
862 }
863 # endif
864
865 switch (shortCut)
866 {
867 case 0:
868 tiPL = pL;
869 tiPR = pR;
870 for (k=h=0; k<m->numRateCats; k++)
871 {
872 for (c=0; c<m->numChars; c++)
873 {
874 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
875 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
876 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
877 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
878 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
879 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
880 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
881 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
882 clL += 4;
883 clR += 4;
884 }
885 tiPL += 16;
886 tiPR += 16;
887 }
888 break;
889 case 1:
890 tiPR = pR;
891 for (k=h=0; k<m->numRateCats; k++)
892 {
893 for (c=0; c<m->numChars; c++)
894 {
895 i = lState[c] + k*20;
896 clP[h++] = preLikeL[i++]
897 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
898 clP[h++] = preLikeL[i++]
899 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
900 clP[h++] = preLikeL[i++]
901 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
902 clP[h++] = preLikeL[i++]
903 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
904 clR += 4;
905 }
906 tiPR += 16;
907 }
908 break;
909 case 2:
910 tiPL = pL;
911 for (k=h=0; k<m->numRateCats; k++)
912 {
913 for (c=0; c<m->numChars; c++)
914 {
915 i = rState[c] + k*20;
916 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
917 *preLikeR[i++];
918 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
919 *preLikeR[i++];
920 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
921 *preLikeR[i++];
922 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
923 *preLikeR[i++];
924 clL += 4;
925 }
926 tiPL += 16;
927 }
928 break;
929 case 3:
930 for (k=h=0; k<m->numRateCats; k++)
931 {
932 for (c=0; c<m->numChars; c++)
933 {
934 i = j = k*20;
935 i += lState[c];
936 j += rState[c];
937 clP[h++] = preLikeL[i++]*preLikeR[j++];
938 clP[h++] = preLikeL[i++]*preLikeR[j++];
939 clP[h++] = preLikeL[i++]*preLikeR[j++];
940 clP[h++] = preLikeL[i++]*preLikeR[j++];
941 }
942 }
943 }
944
945 return NO_ERROR;
946 }
947
948
949 /*----------------------------------------------------------------
950 |
951 | CondLikeDown_NUC4_GibbsGamma: 4by4 nucleotide model with rate
952 | variation approximated using Gibbs sampling of gamma
953 |
954 -----------------------------------------------------------------*/
CondLikeDown_NUC4_GibbsGamma(TreeNode * p,int division,int chain)955 int CondLikeDown_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
956 {
957 int c, h, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL,
958 nGammaCats;
959 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
960 ModelInfo *m;
961 # if !defined (DEBUG_NOSHORTCUTS)
962 int k;
963 # endif
964
965 m = &modelSettings[division];
966
967 /* flip conditional likelihood space */
968 FlipCondLikeSpace (m, chain, p->index);
969
970 /* find conditional likelihood pointers */
971 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
972 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
973 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
974
975 /* find transition probabilities */
976 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
977 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
978
979 /* find rate category index and number of gamma categories */
980 rateCat = m->tiIndex + chain * m->numChars;
981 nGammaCats = m->numRateCats;
982
983 /* find likelihoods of site patterns for left branch if terminal */
984 shortCut = 0;
985 # if !defined (DEBUG_NOSHORTCUTS)
986 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
987 {
988 shortCut |= 1;
989 lState = m->termState[p->left->index];
990 tiPL = pL;
991 for (k=j=0; k<nGammaCats; k++)
992 {
993 for (i=0; i<4; i++)
994 {
995 preLikeL[j++] = tiPL[0];
996 preLikeL[j++] = tiPL[4];
997 preLikeL[j++] = tiPL[8];
998 preLikeL[j++] = tiPL[12];
999 tiPL++;
1000 }
1001 /* for ambiguous */
1002 for (i=0; i<4; i++)
1003 preLikeL[j++] = 1.0;
1004 tiPL += 12;
1005 }
1006 }
1007
1008 /* find likelihoods of site patterns for right branch if terminal */
1009 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1010 {
1011 shortCut |= 2;
1012 rState = m->termState[p->right->index];
1013 tiPR = pR;
1014 for (k=j=0; k<nGammaCats; k++)
1015 {
1016 for (i=0; i<4; i++)
1017 {
1018 preLikeR[j++] = tiPR[0];
1019 preLikeR[j++] = tiPR[4];
1020 preLikeR[j++] = tiPR[8];
1021 preLikeR[j++] = tiPR[12];
1022 tiPR++;
1023 }
1024 /* for ambiguous */
1025 for (i=0; i<4; i++)
1026 preLikeR[j++] = 1.0;
1027 tiPR += 12;
1028 }
1029 }
1030 # endif
1031
1032 switch (shortCut)
1033 {
1034 case 0:
1035 for (c=h=0; c<m->numChars; c++)
1036 {
1037 r = rateCat[c];
1038 if (r < nGammaCats)
1039 {
1040 tiPL = pL + r * 16;
1041 tiPR = pR + r * 16;
1042 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
1043 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
1044 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
1045 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
1046 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
1047 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
1048 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
1049 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
1050 }
1051 else
1052 h += 4;
1053 clL += 4;
1054 clR += 4;
1055 }
1056 break;
1057 case 1:
1058 for (c=h=0; c<m->numChars; c++)
1059 {
1060 r = rateCat[c];
1061 if (r < nGammaCats)
1062 {
1063 tiPR = pR + r * 16;
1064 i = lState[c] + r * 20;
1065 clP[h++] = preLikeL[i++]
1066 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
1067 clP[h++] = preLikeL[i++]
1068 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
1069 clP[h++] = preLikeL[i++]
1070 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
1071 clP[h++] = preLikeL[i++]
1072 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
1073 }
1074 else
1075 h += 4;
1076 clR += 4;
1077 }
1078 break;
1079 case 2:
1080 for (c=h=0; c<m->numChars; c++)
1081 {
1082 r = rateCat[c];
1083 if (r < nGammaCats)
1084 {
1085 tiPL = pL + r * 16;
1086 i = rState[c] + r * 20;
1087 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
1088 *preLikeR[i++];
1089 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
1090 *preLikeR[i++];
1091 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
1092 *preLikeR[i++];
1093 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
1094 *preLikeR[i++];
1095 }
1096 else
1097 h += 4;
1098 clL += 4;
1099 }
1100 break;
1101 case 3:
1102 for (c=h=0; c<m->numChars; c++)
1103 {
1104 r = rateCat[c];
1105 if (r < nGammaCats)
1106 {
1107 i = lState[c] + r * 20;
1108 j = rState[c] + r * 20;
1109 clP[h++] = preLikeL[i++]*preLikeR[j++];
1110 clP[h++] = preLikeL[i++]*preLikeR[j++];
1111 clP[h++] = preLikeL[i++]*preLikeR[j++];
1112 clP[h++] = preLikeL[i++]*preLikeR[j++];
1113 }
1114 else
1115 h += 4;
1116 }
1117 break;
1118 }
1119
1120 return NO_ERROR;
1121 }
1122
1123
1124 #if defined (FMA_ENABLED)
1125 /*----------------------------------------------------------------
1126 |
1127 | CondLikeDown_NUC4_FMA: 4by4 nucleotide model with or without rate
1128 | variation, using AVX + FMA instructions
1129 |
1130 -----------------------------------------------------------------*/
CondLikeDown_NUC4_FMA(TreeNode * p,int division,int chain)1131 int CondLikeDown_NUC4_FMA (TreeNode *p, int division, int chain)
1132 {
1133 int c, k;
1134 CLFlt *pL, *pR, *tiPL, *tiPR;
1135 __m256 *clL, *clR, *clP;
1136 __m256 m1, m2, m3, m4;
1137 ModelInfo *m;
1138
1139 m = &modelSettings[division];
1140
1141 /* flip state of node so that we are not overwriting old cond likes */
1142 FlipCondLikeSpace (m, chain, p->index);
1143
1144 /* find conditional likelihood pointers */
1145 clL = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1146 clR = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
1147 clP = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
1148
1149 /* find transition probabilities */
1150 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1151 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1152
1153 tiPL = pL;
1154 tiPR = pR;
1155 for (k=0; k<m->numRateCats; k++)
1156 {
1157 for (c=0; c<m->numVecChars; c++)
1158 {
1159 m1 = _mm256_broadcast_ss (&tiPL[AA]);
1160 m2 = _mm256_broadcast_ss (&tiPR[AA]);
1161 m3 = _mm256_mul_ps (m1, clL[A]);
1162 m4 = _mm256_mul_ps (m2, clR[A]);
1163
1164 m1 = _mm256_broadcast_ss (&tiPL[AC]);
1165 m2 = _mm256_broadcast_ss (&tiPR[AC]);
1166 m3 = _mm256_fmadd_ps (m1, clL[C], m3);
1167 m4 = _mm256_fmadd_ps (m2, clR[C], m4);
1168
1169 m1 = _mm256_broadcast_ss (&tiPL[AG]);
1170 m2 = _mm256_broadcast_ss (&tiPR[AG]);
1171 m3 = _mm256_fmadd_ps (m1, clL[G], m3);
1172 m4 = _mm256_fmadd_ps (m2, clR[G], m4);
1173
1174 m1 = _mm256_broadcast_ss (&tiPL[AT]);
1175 m2 = _mm256_broadcast_ss (&tiPR[AT]);
1176 m3 = _mm256_fmadd_ps (m1, clL[T], m3);
1177 m4 = _mm256_fmadd_ps (m2, clR[T], m4);
1178
1179 *clP++ = _mm256_mul_ps (m3, m4);
1180
1181 m1 = _mm256_broadcast_ss (&tiPL[CA]);
1182 m2 = _mm256_broadcast_ss (&tiPR[CA]);
1183 m3 = _mm256_mul_ps (m1, clL[A]);
1184 m4 = _mm256_mul_ps (m2, clR[A]);
1185
1186 m1 = _mm256_broadcast_ss (&tiPL[CC]);
1187 m2 = _mm256_broadcast_ss (&tiPR[CC]);
1188 m3 = _mm256_fmadd_ps (m1, clL[C], m3);
1189 m4 = _mm256_fmadd_ps (m2, clR[C], m4);
1190
1191 m1 = _mm256_broadcast_ss (&tiPL[CG]);
1192 m2 = _mm256_broadcast_ss (&tiPR[CG]);
1193 m3 = _mm256_fmadd_ps (m1, clL[G], m3);
1194 m4 = _mm256_fmadd_ps (m2, clR[G], m4);
1195
1196 m1 = _mm256_broadcast_ss (&tiPL[CT]);
1197 m2 = _mm256_broadcast_ss (&tiPR[CT]);
1198 m3 = _mm256_fmadd_ps (m1, clL[T], m3);
1199 m4 = _mm256_fmadd_ps (m2, clR[T], m4);
1200
1201 *clP++ = _mm256_mul_ps (m3, m4);
1202
1203 m1 = _mm256_broadcast_ss (&tiPL[GA]);
1204 m2 = _mm256_broadcast_ss (&tiPR[GA]);
1205 m3 = _mm256_mul_ps (m1, clL[A]);
1206 m4 = _mm256_mul_ps (m2, clR[A]);
1207
1208 m1 = _mm256_broadcast_ss (&tiPL[GC]);
1209 m2 = _mm256_broadcast_ss (&tiPR[GC]);
1210 m3 = _mm256_fmadd_ps (m1, clL[C], m3);
1211 m4 = _mm256_fmadd_ps (m2, clR[C], m4);
1212
1213 m1 = _mm256_broadcast_ss (&tiPL[GG]);
1214 m2 = _mm256_broadcast_ss (&tiPR[GG]);
1215 m3 = _mm256_fmadd_ps (m1, clL[G], m3);
1216 m4 = _mm256_fmadd_ps (m2, clR[G], m4);
1217
1218 m1 = _mm256_broadcast_ss (&tiPL[GT]);
1219 m2 = _mm256_broadcast_ss (&tiPR[GT]);
1220 m3 = _mm256_fmadd_ps (m1, clL[T], m3);
1221 m4 = _mm256_fmadd_ps (m2, clR[T], m4);
1222
1223 *clP++ = _mm256_mul_ps (m3, m4);
1224
1225 m1 = _mm256_broadcast_ss (&tiPL[TA]);
1226 m2 = _mm256_broadcast_ss (&tiPR[TA]);
1227 m3 = _mm256_mul_ps (m1, clL[A]);
1228 m4 = _mm256_mul_ps (m2, clR[A]);
1229
1230 m1 = _mm256_broadcast_ss (&tiPL[TC]);
1231 m2 = _mm256_broadcast_ss (&tiPR[TC]);
1232 m3 = _mm256_fmadd_ps (m1, clL[C], m3);
1233 m4 = _mm256_fmadd_ps (m2, clR[C], m4);
1234
1235 m1 = _mm256_broadcast_ss (&tiPL[TG]);
1236 m2 = _mm256_broadcast_ss (&tiPR[TG]);
1237 m3 = _mm256_fmadd_ps (m1, clL[G], m3);
1238 m4 = _mm256_fmadd_ps (m2, clR[G], m4);
1239
1240 m1 = _mm256_broadcast_ss (&tiPL[TT]);
1241 m2 = _mm256_broadcast_ss (&tiPR[TT]);
1242 m3 = _mm256_fmadd_ps (m1, clL[T], m3);
1243 m4 = _mm256_fmadd_ps (m2, clR[T], m4);
1244
1245 *clP++ = _mm256_mul_ps (m3, m4);
1246
1247 clL += 4;
1248 clR += 4;
1249 }
1250 tiPL += 16;
1251 tiPR += 16;
1252 }
1253
1254 return NO_ERROR;
1255
1256 }
1257 #endif
1258
1259
1260 #if defined (AVX_ENABLED)
1261 /*----------------------------------------------------------------
1262 |
1263 | CondLikeDown_NUC4_AVX: 4by4 nucleotide model with or without rate
1264 | variation, using AVX instructions
1265 |
1266 -----------------------------------------------------------------*/
CondLikeDown_NUC4_AVX(TreeNode * p,int division,int chain)1267 int CondLikeDown_NUC4_AVX (TreeNode *p, int division, int chain)
1268 {
1269 int c, k;
1270 CLFlt *pL, *pR, *tiPL, *tiPR;
1271 __m256 *clL, *clR, *clP;
1272 __m256 m1, m2, m3, m4, m5, m6;
1273 ModelInfo *m;
1274
1275 m = &modelSettings[division];
1276
1277 /* flip state of node so that we are not overwriting old cond likes */
1278 FlipCondLikeSpace (m, chain, p->index);
1279
1280 /* find conditional likelihood pointers */
1281 clL = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1282 clR = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
1283 clP = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
1284
1285 /* find transition probabilities */
1286 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1287 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1288
1289 tiPL = pL;
1290 tiPR = pR;
1291 for (k=0; k<m->numRateCats; k++)
1292 {
1293 for (c=0; c<m->numVecChars; c++)
1294 {
1295 m1 = _mm256_broadcast_ss (&tiPL[AA]);
1296 m2 = _mm256_broadcast_ss (&tiPR[AA]);
1297 m5 = _mm256_mul_ps (m1, clL[A]);
1298 m6 = _mm256_mul_ps (m2, clR[A]);
1299
1300 m1 = _mm256_broadcast_ss (&tiPL[AC]);
1301 m2 = _mm256_broadcast_ss (&tiPR[AC]);
1302 m3 = _mm256_mul_ps (m1, clL[C]);
1303 m4 = _mm256_mul_ps (m2, clR[C]);
1304 m5 = _mm256_add_ps (m3, m5);
1305 m6 = _mm256_add_ps (m4, m6);
1306
1307 m1 = _mm256_broadcast_ss (&tiPL[AG]);
1308 m2 = _mm256_broadcast_ss (&tiPR[AG]);
1309 m3 = _mm256_mul_ps (m1, clL[G]);
1310 m4 = _mm256_mul_ps (m2, clR[G]);
1311 m5 = _mm256_add_ps (m3, m5);
1312 m6 = _mm256_add_ps (m4, m6);
1313
1314 m1 = _mm256_broadcast_ss (&tiPL[AT]);
1315 m2 = _mm256_broadcast_ss (&tiPR[AT]);
1316 m3 = _mm256_mul_ps (m1, clL[T]);
1317 m4 = _mm256_mul_ps (m2, clR[T]);
1318 m5 = _mm256_add_ps (m3, m5);
1319 m6 = _mm256_add_ps (m4, m6);
1320
1321 *clP++ = _mm256_mul_ps (m5, m6);
1322
1323 m1 = _mm256_broadcast_ss (&tiPL[CA]);
1324 m2 = _mm256_broadcast_ss (&tiPR[CA]);
1325 m5 = _mm256_mul_ps (m1, clL[A]);
1326 m6 = _mm256_mul_ps (m2, clR[A]);
1327
1328 m1 = _mm256_broadcast_ss (&tiPL[CC]);
1329 m2 = _mm256_broadcast_ss (&tiPR[CC]);
1330 m3 = _mm256_mul_ps (m1, clL[C]);
1331 m4 = _mm256_mul_ps (m2, clR[C]);
1332 m5 = _mm256_add_ps (m3, m5);
1333 m6 = _mm256_add_ps (m4, m6);
1334
1335 m1 = _mm256_broadcast_ss (&tiPL[CG]);
1336 m2 = _mm256_broadcast_ss (&tiPR[CG]);
1337 m3 = _mm256_mul_ps (m1, clL[G]);
1338 m4 = _mm256_mul_ps (m2, clR[G]);
1339 m5 = _mm256_add_ps (m3, m5);
1340 m6 = _mm256_add_ps (m4, m6);
1341
1342 m1 = _mm256_broadcast_ss (&tiPL[CT]);
1343 m2 = _mm256_broadcast_ss (&tiPR[CT]);
1344 m3 = _mm256_mul_ps (m1, clL[T]);
1345 m4 = _mm256_mul_ps (m2, clR[T]);
1346 m5 = _mm256_add_ps (m3, m5);
1347 m6 = _mm256_add_ps (m4, m6);
1348
1349 *clP++ = _mm256_mul_ps (m5, m6);
1350
1351 m1 = _mm256_broadcast_ss (&tiPL[GA]);
1352 m2 = _mm256_broadcast_ss (&tiPR[GA]);
1353 m5 = _mm256_mul_ps (m1, clL[A]);
1354 m6 = _mm256_mul_ps (m2, clR[A]);
1355
1356 m1 = _mm256_broadcast_ss (&tiPL[GC]);
1357 m2 = _mm256_broadcast_ss (&tiPR[GC]);
1358 m3 = _mm256_mul_ps (m1, clL[C]);
1359 m4 = _mm256_mul_ps (m2, clR[C]);
1360 m5 = _mm256_add_ps (m3, m5);
1361 m6 = _mm256_add_ps (m4, m6);
1362
1363 m1 = _mm256_broadcast_ss (&tiPL[GG]);
1364 m2 = _mm256_broadcast_ss (&tiPR[GG]);
1365 m3 = _mm256_mul_ps (m1, clL[G]);
1366 m4 = _mm256_mul_ps (m2, clR[G]);
1367 m5 = _mm256_add_ps (m3, m5);
1368 m6 = _mm256_add_ps (m4, m6);
1369
1370 m1 = _mm256_broadcast_ss (&tiPL[GT]);
1371 m2 = _mm256_broadcast_ss (&tiPR[GT]);
1372 m3 = _mm256_mul_ps (m1, clL[T]);
1373 m4 = _mm256_mul_ps (m2, clR[T]);
1374 m5 = _mm256_add_ps (m3, m5);
1375 m6 = _mm256_add_ps (m4, m6);
1376
1377 *clP++ = _mm256_mul_ps (m5, m6);
1378
1379 m1 = _mm256_broadcast_ss (&tiPL[TA]);
1380 m2 = _mm256_broadcast_ss (&tiPR[TA]);
1381 m5 = _mm256_mul_ps (m1, clL[A]);
1382 m6 = _mm256_mul_ps (m2, clR[A]);
1383
1384 m1 = _mm256_broadcast_ss (&tiPL[TC]);
1385 m2 = _mm256_broadcast_ss (&tiPR[TC]);
1386 m3 = _mm256_mul_ps (m1, clL[C]);
1387 m4 = _mm256_mul_ps (m2, clR[C]);
1388 m5 = _mm256_add_ps (m3, m5);
1389 m6 = _mm256_add_ps (m4, m6);
1390
1391 m1 = _mm256_broadcast_ss (&tiPL[TG]);
1392 m2 = _mm256_broadcast_ss (&tiPR[TG]);
1393 m3 = _mm256_mul_ps (m1, clL[G]);
1394 m4 = _mm256_mul_ps (m2, clR[G]);
1395 m5 = _mm256_add_ps (m3, m5);
1396 m6 = _mm256_add_ps (m4, m6);
1397
1398 m1 = _mm256_broadcast_ss (&tiPL[TT]);
1399 m2 = _mm256_broadcast_ss (&tiPR[TT]);
1400 m3 = _mm256_mul_ps (m1, clL[T]);
1401 m4 = _mm256_mul_ps (m2, clR[T]);
1402 m5 = _mm256_add_ps (m3, m5);
1403 m6 = _mm256_add_ps (m4, m6);
1404
1405 *clP++ = _mm256_mul_ps (m5, m6);
1406
1407 clL += 4;
1408 clR += 4;
1409 }
1410 tiPL += 16;
1411 tiPR += 16;
1412 }
1413
1414 return NO_ERROR;
1415
1416 }
1417 #endif
1418
1419
1420 #if defined (SSE_ENABLED)
1421 /*----------------------------------------------------------------
1422 |
1423 | CondLikeDown_NUC4_SSE: 4by4 nucleotide model with or without rate
1424 | variation, using SSE instructions
1425 |
1426 -----------------------------------------------------------------*/
CondLikeDown_NUC4_SSE(TreeNode * p,int division,int chain)1427 int CondLikeDown_NUC4_SSE (TreeNode *p, int division, int chain)
1428 {
1429 int c, k;
1430 CLFlt *pL, *pR, *tiPL, *tiPR;
1431 __m128 *clL, *clR, *clP;
1432 __m128 m1, m2, m3, m4, m5, m6;
1433 ModelInfo *m;
1434
1435 m = &modelSettings[division];
1436
1437 /* flip state of node so that we are not overwriting old cond likes */
1438 FlipCondLikeSpace (m, chain, p->index);
1439
1440 /* find conditional likelihood pointers */
1441 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1442 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
1443 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
1444
1445 /* find transition probabilities */
1446 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1447 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1448
1449 tiPL = pL;
1450 tiPR = pR;
1451 for (k=0; k<m->numRateCats; k++)
1452 {
1453 for (c=0; c<m->numVecChars; c++)
1454 {
1455 m1 = _mm_load1_ps (&tiPL[AA]);
1456 m2 = _mm_load1_ps (&tiPR[AA]);
1457 m5 = _mm_mul_ps (m1, clL[A]);
1458 m6 = _mm_mul_ps (m2, clR[A]);
1459
1460 m1 = _mm_load1_ps (&tiPL[AC]);
1461 m2 = _mm_load1_ps (&tiPR[AC]);
1462 m3 = _mm_mul_ps (m1, clL[C]);
1463 m4 = _mm_mul_ps (m2, clR[C]);
1464 m5 = _mm_add_ps (m3, m5);
1465 m6 = _mm_add_ps (m4, m6);
1466
1467 m1 = _mm_load1_ps (&tiPL[AG]);
1468 m2 = _mm_load1_ps (&tiPR[AG]);
1469 m3 = _mm_mul_ps (m1, clL[G]);
1470 m4 = _mm_mul_ps (m2, clR[G]);
1471 m5 = _mm_add_ps (m3, m5);
1472 m6 = _mm_add_ps (m4, m6);
1473
1474 m1 = _mm_load1_ps (&tiPL[AT]);
1475 m2 = _mm_load1_ps (&tiPR[AT]);
1476 m3 = _mm_mul_ps (m1, clL[T]);
1477 m4 = _mm_mul_ps (m2, clR[T]);
1478 m5 = _mm_add_ps (m3, m5);
1479 m6 = _mm_add_ps (m4, m6);
1480
1481 *clP++ = _mm_mul_ps (m5, m6);
1482
1483 m1 = _mm_load1_ps (&tiPL[CA]);
1484 m2 = _mm_load1_ps (&tiPR[CA]);
1485 m5 = _mm_mul_ps (m1, clL[A]);
1486 m6 = _mm_mul_ps (m2, clR[A]);
1487
1488 m1 = _mm_load1_ps (&tiPL[CC]);
1489 m2 = _mm_load1_ps (&tiPR[CC]);
1490 m3 = _mm_mul_ps (m1, clL[C]);
1491 m4 = _mm_mul_ps (m2, clR[C]);
1492 m5 = _mm_add_ps (m3, m5);
1493 m6 = _mm_add_ps (m4, m6);
1494
1495 m1 = _mm_load1_ps (&tiPL[CG]);
1496 m2 = _mm_load1_ps (&tiPR[CG]);
1497 m3 = _mm_mul_ps (m1, clL[G]);
1498 m4 = _mm_mul_ps (m2, clR[G]);
1499 m5 = _mm_add_ps (m3, m5);
1500 m6 = _mm_add_ps (m4, m6);
1501
1502 m1 = _mm_load1_ps (&tiPL[CT]);
1503 m2 = _mm_load1_ps (&tiPR[CT]);
1504 m3 = _mm_mul_ps (m1, clL[T]);
1505 m4 = _mm_mul_ps (m2, clR[T]);
1506 m5 = _mm_add_ps (m3, m5);
1507 m6 = _mm_add_ps (m4, m6);
1508
1509 *clP++ = _mm_mul_ps (m5, m6);
1510
1511 m1 = _mm_load1_ps (&tiPL[GA]);
1512 m2 = _mm_load1_ps (&tiPR[GA]);
1513 m5 = _mm_mul_ps (m1, clL[A]);
1514 m6 = _mm_mul_ps (m2, clR[A]);
1515
1516 m1 = _mm_load1_ps (&tiPL[GC]);
1517 m2 = _mm_load1_ps (&tiPR[GC]);
1518 m3 = _mm_mul_ps (m1, clL[C]);
1519 m4 = _mm_mul_ps (m2, clR[C]);
1520 m5 = _mm_add_ps (m3, m5);
1521 m6 = _mm_add_ps (m4, m6);
1522
1523 m1 = _mm_load1_ps (&tiPL[GG]);
1524 m2 = _mm_load1_ps (&tiPR[GG]);
1525 m3 = _mm_mul_ps (m1, clL[G]);
1526 m4 = _mm_mul_ps (m2, clR[G]);
1527 m5 = _mm_add_ps (m3, m5);
1528 m6 = _mm_add_ps (m4, m6);
1529
1530 m1 = _mm_load1_ps (&tiPL[GT]);
1531 m2 = _mm_load1_ps (&tiPR[GT]);
1532 m3 = _mm_mul_ps (m1, clL[T]);
1533 m4 = _mm_mul_ps (m2, clR[T]);
1534 m5 = _mm_add_ps (m3, m5);
1535 m6 = _mm_add_ps (m4, m6);
1536
1537 *clP++ = _mm_mul_ps (m5, m6);
1538
1539 m1 = _mm_load1_ps (&tiPL[TA]);
1540 m2 = _mm_load1_ps (&tiPR[TA]);
1541 m5 = _mm_mul_ps (m1, clL[A]);
1542 m6 = _mm_mul_ps (m2, clR[A]);
1543
1544 m1 = _mm_load1_ps (&tiPL[TC]);
1545 m2 = _mm_load1_ps (&tiPR[TC]);
1546 m3 = _mm_mul_ps (m1, clL[C]);
1547 m4 = _mm_mul_ps (m2, clR[C]);
1548 m5 = _mm_add_ps (m3, m5);
1549 m6 = _mm_add_ps (m4, m6);
1550
1551 m1 = _mm_load1_ps (&tiPL[TG]);
1552 m2 = _mm_load1_ps (&tiPR[TG]);
1553 m3 = _mm_mul_ps (m1, clL[G]);
1554 m4 = _mm_mul_ps (m2, clR[G]);
1555 m5 = _mm_add_ps (m3, m5);
1556 m6 = _mm_add_ps (m4, m6);
1557
1558 m1 = _mm_load1_ps (&tiPL[TT]);
1559 m2 = _mm_load1_ps (&tiPR[TT]);
1560 m3 = _mm_mul_ps (m1, clL[T]);
1561 m4 = _mm_mul_ps (m2, clR[T]);
1562 m5 = _mm_add_ps (m3, m5);
1563 m6 = _mm_add_ps (m4, m6);
1564
1565 *clP++ = _mm_mul_ps (m5, m6);
1566
1567 clL += 4;
1568 clR += 4;
1569 }
1570 tiPL += 16;
1571 tiPR += 16;
1572 }
1573
1574 return NO_ERROR;
1575
1576 }
1577 #endif
1578
1579
1580 #if !defined (SSE_ENABLED) || 1
1581 /*----------------------------------------------------------------
1582 |
1583 | CondLikeDown_NY98: codon model with omega variation
1584 |
1585 -----------------------------------------------------------------*/
CondLikeDown_NY98(TreeNode * p,int division,int chain)1586 int CondLikeDown_NY98 (TreeNode *p, int division, int chain)
1587 {
1588 int a, b, c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared;
1589 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
1590 ModelInfo *m;
1591
1592 /* find model settings for this division and nStates, nStatesSquared */
1593 m = &modelSettings[division];
1594 nStates = m->numModelStates;
1595 nStatesSquared = nStates * nStates;
1596
1597 /* Flip conditional likelihood space */
1598 FlipCondLikeSpace (m, chain, p->index);
1599
1600 /* find conditional likelihood pointers */
1601 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1602 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1603 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1604
1605 /* find transition probabilities */
1606 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1607 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1608
1609 /* find likelihoods of site patterns for left branch if terminal */
1610 shortCut = 0;
1611 # if !defined (DEBUG_NOSHORTCUTS)
1612 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
1613 {
1614 shortCut |= 1;
1615 lState = m->termState[p->left->index];
1616 tiPL = pL;
1617 for (k=a=0; k<m->numOmegaCats; k++)
1618 {
1619 for (i=0; i<nStates; i++)
1620 for (j=i; j<nStatesSquared; j+=nStates)
1621 preLikeL[a++] = tiPL[j];
1622 /* for ambiguous */
1623 for (i=0; i<nStates; i++)
1624 preLikeL[a++] = 1.0;
1625 tiPL += nStatesSquared;
1626 }
1627 }
1628
1629 /* find likelihoods of site patterns for right branch if terminal */
1630 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1631 {
1632 shortCut |= 2;
1633 rState = m->termState[p->right->index];
1634 tiPR = pR;
1635 for (k=a=0; k<m->numOmegaCats; k++)
1636 {
1637 for (i=0; i<nStates; i++)
1638 for (j=i; j<nStatesSquared; j+=nStates)
1639 preLikeR[a++] = tiPR[j];
1640 /* for ambiguous */
1641 for (i=0; i<nStates; i++)
1642 preLikeR[a++] = 1.0;
1643 tiPR += nStatesSquared;
1644 }
1645 }
1646 # endif
1647
1648 switch (shortCut)
1649 {
1650 case 0:
1651 tiPL = pL;
1652 tiPR = pR;
1653 for (k=0; k<m->numOmegaCats; k++)
1654 {
1655 for (c=0; c<m->numChars; c++)
1656 {
1657 for (i=h=0; i<nStates; i++)
1658 {
1659 likeL = likeR = 0.0;
1660 for (j=0; j<nStates; j++)
1661 {
1662 likeL += tiPL[h]*clL[j];
1663 likeR += tiPR[h++]*clR[j];
1664 }
1665 *(clP++) = likeL * likeR;
1666 }
1667 clL += nStates;
1668 clR += nStates;
1669 }
1670 tiPL += nStatesSquared;
1671 tiPR += nStatesSquared;
1672 }
1673 break;
1674 case 1:
1675 tiPR = pR;
1676 for (k=0; k<m->numOmegaCats; k++)
1677 {
1678 for (c=0; c<m->numChars; c++)
1679 {
1680 a = lState[c] + k*(nStatesSquared+nStates);
1681 for (i=h=0; i<nStates; i++)
1682 {
1683 likeR = 0.0;
1684 for (j=0; j<nStates; j++)
1685 {
1686 likeR += tiPR[h++]*clR[j];
1687 }
1688 *(clP++) = preLikeL[a++] * likeR;
1689 }
1690 clR += nStates;
1691 }
1692 tiPR += nStatesSquared;
1693 }
1694 break;
1695 case 2:
1696 tiPL = pL;
1697 for (k=0; k<m->numOmegaCats; k++)
1698 {
1699 for (c=0; c<m->numChars; c++)
1700 {
1701 a = rState[c] + k*(nStatesSquared+nStates);
1702 for (i=h=0; i<nStates; i++)
1703 {
1704 likeL = 0.0;
1705 for (j=0; j<nStates; j++)
1706 {
1707 likeL += tiPL[h++]*clL[j];
1708 }
1709 *(clP++) = preLikeR[a++] * likeL;
1710 }
1711 clL += nStates;
1712 }
1713 tiPL += nStatesSquared;
1714 }
1715 break;
1716 case 3:
1717 for (k=0; k<m->numOmegaCats; k++)
1718 {
1719 for (c=0; c<m->numChars; c++)
1720 {
1721 a = rState[c] + k*(nStatesSquared+nStates);
1722 b = lState[c] + k*(nStatesSquared+nStates);
1723 for (i=0; i<nStates; i++)
1724 {
1725 *(clP++) = preLikeR[a++] * preLikeL[b++];
1726 }
1727 }
1728 }
1729 break;
1730 }
1731
1732 return NO_ERROR;
1733 }
1734 #endif
1735
1736
1737 #if defined (SSE_ENABLED)
1738 /*----------------------------------------------------------------
1739 |
1740 | CondLikeDown_NY98_SSE: codon model with omega variation
1741 |
1742 -----------------------------------------------------------------*/
CondLikeDown_NY98_SSE(TreeNode * p,int division,int chain)1743 int CondLikeDown_NY98_SSE (TreeNode *p, int division, int chain)
1744 {
1745 int c, c1, h, i, j, k, t, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared;
1746 CLFlt *pL, *pR, *tiPL, *tiPR;
1747 __m128 *clL, *clR, *clP;
1748 __m128 mTiPL, mTiPR, mL, mR, mAcumL, mAcumR;
1749 ModelInfo *m;
1750 CLFlt *preLikeRV[4] = {0};
1751 CLFlt *preLikeLV[4] = {0};
1752 # if !defined (DEBUG_NOSHORTCUTS)
1753 int a;
1754 # endif
1755
1756 /* find model settings for this division and nStates, nStatesSquared */
1757 m = &modelSettings[division];
1758 nStates = m->numModelStates;
1759 nStatesSquared = nStates * nStates;
1760
1761 /* Flip conditional likelihood space */
1762 FlipCondLikeSpace (m, chain, p->index);
1763
1764 /* find conditional likelihood pointers */
1765 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1766 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
1767 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
1768
1769 /* find transition probabilities */
1770 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1771 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1772
1773 /* find likelihoods of site patterns for left branch if terminal */
1774 shortCut = 0;
1775 # if !defined (DEBUG_NOSHORTCUTS)
1776 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
1777 {
1778 shortCut |= 1;
1779 lState = m->termState[p->left->index];
1780 tiPL = pL;
1781 for (k=a=0; k<m->numOmegaCats; k++)
1782 {
1783 for (i=0; i<nStates; i++)
1784 for (j=i; j<nStatesSquared; j+=nStates)
1785 preLikeL[a++] = tiPL[j];
1786 /* for ambiguous */
1787 for (i=0; i<nStates; i++)
1788 preLikeL[a++] = 1.0;
1789 tiPL += nStatesSquared;
1790 }
1791 }
1792
1793 /* find likelihoods of site patterns for right branch if terminal */
1794 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1795 {
1796 shortCut |= 2;
1797 rState = m->termState[p->right->index];
1798 tiPR = pR;
1799 for (k=a=0; k<m->numOmegaCats; k++)
1800 {
1801 for (i=0; i<nStates; i++)
1802 for (j=i; j<nStatesSquared; j+=nStates)
1803 preLikeR[a++] = tiPR[j];
1804 /* for ambiguous */
1805 for (i=0; i<nStates; i++)
1806 preLikeR[a++] = 1.0;
1807 tiPR += nStatesSquared;
1808 }
1809 }
1810 # endif
1811
1812 switch (shortCut)
1813 {
1814 case 0:
1815 tiPL = pL;
1816 tiPR = pR;
1817 for (k=0; k<m->numOmegaCats; k++)
1818 {
1819 for (c=0; c<m->numVecChars; c++)
1820 {
1821 for (i=h=0; i<nStates; i++)
1822 {
1823 mAcumL = _mm_setzero_ps();
1824 mAcumR = _mm_setzero_ps();
1825 for (j=0; j<nStates; j++)
1826 {
1827 mTiPL = _mm_load1_ps (&tiPL[h]);
1828 mTiPR = _mm_load1_ps (&tiPR[h++]);
1829 mL = _mm_mul_ps (mTiPL, clL[j]);
1830 mR = _mm_mul_ps (mTiPR, clR[j]);
1831 mAcumL = _mm_add_ps (mL, mAcumL);
1832 mAcumR = _mm_add_ps (mR, mAcumR);
1833 }
1834 *(clP++) = _mm_mul_ps (mAcumL, mAcumR);
1835 }
1836 clL += nStates;
1837 clR += nStates;
1838 }
1839 tiPL += nStatesSquared;
1840 tiPR += nStatesSquared;
1841 }
1842 break;
1843 case 1:
1844 tiPR = pR;
1845 for (k=0; k<m->numOmegaCats; k++)
1846 {
1847 for (c=t=0; c<m->numVecChars; c++)
1848 {
1849 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
1850 {
1851 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
1852 }
1853 for (i=h=0; i<nStates; i++)
1854 {
1855 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
1856 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
1857 mAcumR = _mm_setzero_ps();
1858 for (j=0; j<nStates; j++)
1859 {
1860 mTiPR = _mm_load1_ps (&tiPR[h++]);
1861 mR = _mm_mul_ps (mTiPR, clR[j]);
1862 mAcumR = _mm_add_ps (mR, mAcumR);
1863 }
1864 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
1865 }
1866 clR += nStates;
1867 }
1868 tiPR += nStatesSquared;
1869 }
1870 break;
1871 case 2:
1872 tiPL = pL;
1873 for (k=0; k<m->numOmegaCats; k++)
1874 {
1875 for (c=t=0; c<m->numVecChars; c++)
1876 {
1877 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
1878 {
1879 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
1880 }
1881 for (i=h=0; i<nStates; i++)
1882 {
1883 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
1884 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
1885 mAcumL = _mm_setzero_ps();
1886 for (j=0; j<nStates; j++)
1887 {
1888 mTiPL = _mm_load1_ps (&tiPL[h++]);
1889 mL = _mm_mul_ps (mTiPL, clL[j]);
1890 mAcumL = _mm_add_ps (mL, mAcumL);
1891 }
1892 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
1893 }
1894 clL += nStates;
1895 }
1896 tiPL += nStatesSquared;
1897 }
1898 break;
1899 case 3:
1900 for (k=0; k<m->numOmegaCats; k++)
1901 {
1902 for (c=t=0; c<m->numVecChars; c++)
1903 {
1904 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
1905 {
1906 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
1907 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
1908 }
1909 for (i=0; i<nStates; i++)
1910 {
1911 assert (m->numFloatsPerVec == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
1912 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
1913 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
1914 *(clP++) = _mm_mul_ps (mL,mR);
1915 }
1916 }
1917 }
1918 break;
1919 }
1920
1921 return NO_ERROR;
1922 }
1923 #endif
1924
1925
1926 /*----------------------------------------------------------------
1927 |
1928 | CondLikeDown_Std: variable number of states model
1929 | with or without rate variation
1930 |
1931 -----------------------------------------------------------------*/
CondLikeDown_Std(TreeNode * p,int division,int chain)1932 int CondLikeDown_Std (TreeNode *p, int division, int chain)
1933 {
1934 int a, c, h, i, j, k, nStates, nCats, tmp;
1935 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR, likeL, likeR;
1936 ModelInfo *m;
1937
1938 m = &modelSettings[division];
1939
1940 /* Flip conditional likelihood space */
1941 FlipCondLikeSpace (m, chain, p->index);
1942
1943 /* find conditional likelihood pointers */
1944 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1945 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1946 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1947
1948 /* find transition probabilities */
1949 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1950 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1951
1952 /* Conditional likelihood space is assumed to be arranged in numGammaCats blocks of data. Each block contains all data for one gamma category.
1953 Each gamma cat block consist of numChars sequences of data, each of this sequences corresponds to a character of data matrix.
1954 A sequence consists of nStates for all non-binary data, otherwise length of sequence is nStates*numBetaCats (i.e. 2*numBetaCats) */
1955
1956 /* calculate ancestral probabilities */
1957 for (k=h=0; k<m->numRateCats; k++)
1958 {
1959 /* calculate ancestral probabilities */
1960 for (c=0; c<m->numChars; c++)
1961 {
1962 nStates = m->nStates[c];
1963
1964 /* the following lines ensure that nCats is 1 unless */
1965 /* the character is binary and beta categories are used */
1966 if (nStates == 2)
1967 nCats = m->numBetaCats;
1968 else
1969 nCats = 1;
1970
1971 tmp = k*nStates*nStates; /* tmp contains offset to skip rate cats that already processed*/
1972 tiPL = pL + m->tiIndex[c] + tmp;
1973 tiPR = pR + m->tiIndex[c] + tmp;
1974 tmp = (m->numRateCats-1)*2*2; /* tmp contains size of block of tpi matrices across all rate cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
1975
1976 for (j=0; j<nCats;j++)
1977 {
1978 for (a=0; a<nStates; a++)
1979 {
1980 likeL = likeR = 0.0;
1981 for (i=0; i<nStates; i++)
1982 {
1983 likeL += *(tiPL++) * clL[i];
1984 likeR += *(tiPR++) * clR[i];
1985 }
1986 clP[h++] = likeL * likeR;
1987 }
1988 clL += nStates;
1989 clR += nStates;
1990
1991 tiPL += tmp;
1992 tiPR += tmp;
1993 }
1994 }
1995 }
1996
1997 return NO_ERROR;
1998 }
1999
2000
2001 #if !defined (SSE_ENABLED) || 1
2002 /*----------------------------------------------------------------
2003 |
2004 | CondLikeRoot_Bin: binary model with or without rate
2005 | variation
2006 |
2007 -----------------------------------------------------------------*/
CondLikeRoot_Bin(TreeNode * p,int division,int chain)2008 int CondLikeRoot_Bin (TreeNode *p, int division, int chain)
2009 {
2010 int c, k;
2011 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
2012 ModelInfo *m;
2013
2014 /* find model settings for this division */
2015 m = &modelSettings[division];
2016
2017 /* flip state of node so that we are not overwriting old cond likes */
2018 FlipCondLikeSpace (m, chain, p->index);
2019
2020 /* find conditional likelihood pointers */
2021 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2022 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2023 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2024 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2025
2026 /* find transition probabilities (or calculate instead) */
2027 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2028 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2029 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2030
2031 tiPL = pL;
2032 tiPR = pR;
2033 tiPA = pA;
2034 for (k=0; k<m->numRateCats; k++)
2035 {
2036 for (c=0; c<m->numChars; c++)
2037 {
2038 *(clP++) = (tiPL[0]*clL[0] + tiPL[1]*clL[1])
2039 *(tiPR[0]*clR[0] + tiPR[1]*clR[1])
2040 *(tiPA[0]*clA[0] + tiPA[1]*clA[1]);
2041 *(clP++) = (tiPL[2]*clL[0] + tiPL[3]*clL[1])
2042 *(tiPR[2]*clR[0] + tiPR[3]*clR[1])
2043 *(tiPA[2]*clA[0] + tiPA[3]*clA[1]);
2044
2045 clA += 2;
2046 clL += 2;
2047 clR += 2;
2048 }
2049 tiPA += 4;
2050 tiPL += 4;
2051 tiPR += 4;
2052 }
2053
2054 return NO_ERROR;
2055 }
2056 #endif
2057
2058
2059 #if defined (SSE_ENABLED)
2060 /*----------------------------------------------------------------
2061 |
2062 | CondLikeRoot_Bin_SSE:binary model with or without rate
2063 | variation
2064 |
2065 -----------------------------------------------------------------*/
CondLikeRoot_Bin_SSE(TreeNode * p,int division,int chain)2066 int CondLikeRoot_Bin_SSE (TreeNode *p, int division, int chain)
2067 {
2068 int c, k;
2069 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
2070 __m128 *clL, *clR, *clP, *clA;
2071 __m128 m1, m2, m3, m4, m5, m6, m7;
2072 ModelInfo *m;
2073
2074 m = &modelSettings[division];
2075
2076 /* flip state of node so that we are not overwriting old cond likes */
2077 FlipCondLikeSpace (m, chain, p->index);
2078
2079 /* find conditional likelihood pointers */
2080 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2081 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
2082 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
2083 clA = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2084
2085 /* find transition probabilities */
2086 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2087 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2088 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2089
2090 tiPL = pL;
2091 tiPR = pR;
2092 tiPA = pA;
2093 for (k=0; k<m->numRateCats; k++)
2094 {
2095 for (c=0; c<m->numVecChars; c++)
2096 {
2097 m1 = _mm_load1_ps (&tiPL[0]);
2098 m5 = *clL++;
2099 m2 = _mm_mul_ps (m1, m5);
2100 m1 = _mm_load1_ps (&tiPL[2]);
2101 m6 = _mm_mul_ps (m1, m5);
2102
2103 m1 = _mm_load1_ps (&tiPL[1]);
2104 m5 = *clL++;
2105 m3 = _mm_mul_ps (m1, m5);
2106 m1 = _mm_load1_ps (&tiPL[3]);
2107 m5 = _mm_mul_ps (m1, m5);
2108
2109 m4 = _mm_add_ps (m2, m3); /* in m4 we get (tiPL[0]*clL[0] + tiPL[1]*clL[1]) */
2110 m6 = _mm_add_ps (m5, m6); /* in m6 we get (tiPL[2]*clL[0] + tiPL[3]*clL[1]) */
2111
2112 m1 = _mm_load1_ps (&tiPR[0]);
2113 m5 = *clR++;
2114 m2 = _mm_mul_ps (m1, m5);
2115 m1 = _mm_load1_ps (&tiPR[2]);
2116 m7 = _mm_mul_ps (m1, m5);
2117
2118 m1 = _mm_load1_ps (&tiPR[1]);
2119 m5 = *clR++;
2120 m3 = _mm_mul_ps (m1, m5);
2121 m1 = _mm_load1_ps (&tiPR[3]);
2122 m5 = _mm_mul_ps (m1, m5);
2123
2124 m1 = _mm_add_ps (m2, m3); /* in m1 we get (tiPR[0]*clR[0] + tiPR[1]*clR[1]) */
2125 m7 = _mm_add_ps (m5, m7); /* in m7 we get (tiPR[2]*clR[0] + tiPR[3]*clR[1]) */
2126
2127 m4 = _mm_mul_ps (m1, m4); /* in m4 we get (tiPL[0]*clL[0] + tiPL[1]*clL[1])*(tiPR[0]*clR[0] + tiPR[1]*clR[1]) */
2128 m7 = _mm_mul_ps (m6, m7); /* in m7 we get (tiPL[2]*clL[0] + tiPL[3]*clL[1])*(tiPR[2]*clR[0] + tiPR[3]*clR[1]) */
2129
2130 m1 = _mm_load1_ps (&tiPA[0]);
2131 m5 = *clA++;
2132 m2 = _mm_mul_ps (m1, m5);
2133 m1 = _mm_load1_ps (&tiPA[2]);
2134 m6 = _mm_mul_ps (m1, m5);
2135
2136 m1 = _mm_load1_ps (&tiPA[1]);
2137 m5 = *clA++;
2138 m3 = _mm_mul_ps (m1, m5);
2139 m1 = _mm_load1_ps (&tiPA[3]);
2140 m1 = _mm_mul_ps (m1, m5);
2141
2142 m2 = _mm_add_ps (m2, m3); /* in m1 we get (tiPA[0]*clA[0] + tiPA[1]*clA[1]) */
2143 m1 = _mm_add_ps (m1, m6); /* in m1 we get (tiPA[2]*clA[0] + tiPA[3]*clA[1]) */
2144
2145 *clP++ = _mm_mul_ps (m2, m4);
2146 *clP++ = _mm_mul_ps (m1, m7);
2147
2148 }
2149 tiPL += 4;
2150 tiPR += 4;
2151 tiPA += 4;
2152 }
2153
2154 return NO_ERROR;
2155
2156 }
2157 #endif
2158
2159
2160 /*----------------------------------------------------------------
2161 |
2162 | CondLikeRoot_Gen: general n-state model with or without rate
2163 | variation
2164 |
2165 -----------------------------------------------------------------*/
CondLikeRoot_Gen(TreeNode * p,int division,int chain)2166 int CondLikeRoot_Gen (TreeNode *p, int division, int chain)
2167 {
2168 int a, b, c, d, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
2169 nObsStates, nStates, nStatesSquared, preLikeJump;
2170 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
2171 *tiPL, *tiPR, *tiPA;
2172 ModelInfo *m;
2173 # if !defined (DEBUG_NOSHORTCUTS)
2174 int catStart;
2175 # endif
2176
2177 /* find model settings for this division and nStates, nStatesSquared */
2178 m = &modelSettings[division];
2179 nObsStates = m->numStates;
2180 nStates = m->numModelStates;
2181 nStatesSquared = nStates * nStates;
2182 preLikeJump = nObsStates * nStates;
2183
2184 /* flip state of node so that we are not overwriting old cond likes */
2185 FlipCondLikeSpace (m, chain, p->index);
2186
2187 /* find conditional likelihood pointers */
2188 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2189 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2190 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2191 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2192
2193 /* find transition probabilities (or calculate instead) */
2194 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2195 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2196 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2197
2198 /* find likelihoods of site patterns for left branch if terminal */
2199 shortCut = 0;
2200 # if !defined (DEBUG_NOSHORTCUTS)
2201 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2202 {
2203 shortCut |= 1;
2204 lState = m->termState[p->left->index];
2205 tiPL = pL;
2206 for (k=a=0; k<m->numRateCats; k++)
2207 {
2208 catStart = a;
2209 for (i=0; i<nObsStates; i++)
2210 for (j=i; j<nStatesSquared; j+=nStates)
2211 preLikeL[a++] = tiPL[j];
2212 for (b=1; b<nStates/nObsStates; b++)
2213 {
2214 a = catStart;
2215 for (i=0; i<nObsStates; i++)
2216 {
2217 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2218 preLikeL[a++] += tiPL[j];
2219 }
2220 }
2221 /* for ambiguous */
2222 for (i=0; i<nStates; i++)
2223 preLikeL[a++] = 1.0;
2224 tiPL += nStatesSquared;
2225 }
2226 }
2227
2228 /* find likelihoods of site patterns for right branch if terminal */
2229 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2230 {
2231 shortCut |= 2;
2232 rState = m->termState[p->right->index];
2233 tiPR = pR;
2234 for (k=a=0; k<m->numRateCats; k++)
2235 {
2236 catStart = a;
2237 for (i=0; i<nObsStates; i++)
2238 for (j=i; j<nStatesSquared; j+=nStates)
2239 preLikeR[a++] = tiPR[j];
2240 for (b=1; b<nStates/nObsStates; b++)
2241 {
2242 a = catStart;
2243 for (i=0; i<nObsStates; i++)
2244 {
2245 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2246 preLikeR[a++] += tiPR[j];
2247 }
2248 }
2249 /* for ambiguous */
2250 for (i=0; i<nStates; i++)
2251 preLikeR[a++] = 1.0;
2252 tiPR += nStatesSquared;
2253 }
2254 }
2255
2256 /* find likelihoods of site patterns for anc branch, always terminal */
2257 if (m->isPartAmbig[p->anc->index] == YES)
2258 {
2259 shortCut = 4;
2260 }
2261 else
2262 {
2263 aState = m->termState[p->anc->index];
2264 tiPA = pA;
2265 for (k=a=0; k<m->numRateCats; k++)
2266 {
2267 catStart = a;
2268 for (i=0; i<nObsStates; i++)
2269 for (j=i; j<nStatesSquared; j+=nStates)
2270 preLikeA[a++] = tiPA[j];
2271 for (b=1; b<nStates/nObsStates; b++)
2272 {
2273 a = catStart;
2274 for (i=0; i<nObsStates; i++)
2275 {
2276 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2277 preLikeA[a++] += tiPA[j];
2278 }
2279 }
2280 /* for ambiguous */
2281 for (i=0; i<nStates; i++)
2282 preLikeA[a++] = 1.0;
2283 tiPA += nStatesSquared;
2284 }
2285 }
2286 # else
2287 shortCut = 4;
2288 # endif
2289
2290 //shortCut = 4;
2291 switch (shortCut)
2292 {
2293 case 4:
2294 tiPL = pL;
2295 tiPR = pR;
2296 tiPA = pA;
2297 for (k=0; k<m->numRateCats; k++)
2298 {
2299 for (c=0; c<m->numChars; c++)
2300 {
2301 for (i=h=0; i<nStates; i++)
2302 {
2303 likeL = likeR = likeA = 0.0;
2304 for (j=0; j<nStates; j++)
2305 {
2306 likeL += tiPL[h]*clL[j];
2307 likeR += tiPR[h]*clR[j];
2308 likeA += tiPA[h++]*clA[j];
2309 }
2310 *(clP++) = likeL * likeR * likeA;
2311 }
2312 clL += nStates;
2313 clR += nStates;
2314 clA += nStates;
2315 }
2316 tiPL += nStatesSquared;
2317 tiPR += nStatesSquared;
2318 tiPA += nStatesSquared;
2319 }
2320 break;
2321 case 0:
2322 tiPR = pR;
2323 tiPL = pL;
2324 for (k=0; k<m->numRateCats; k++)
2325 {
2326 for (c=0; c<m->numChars; c++)
2327 {
2328 a = aState[c] + k*(preLikeJump+nStates);
2329 for (i=h=0; i<nStates; i++)
2330 {
2331 likeR = likeL = 0.0;
2332 for (j=0; j<nStates; j++)
2333 {
2334 likeR += tiPR[h]*clR[j];
2335 likeL += tiPL[h++]*clL[j];
2336 }
2337 *(clP++) = preLikeA[a++] * likeR * likeL;
2338 }
2339 clR += nStates;
2340 clL += nStates;
2341 }
2342 tiPR += nStatesSquared;
2343 tiPL += nStatesSquared;
2344 }
2345 break;
2346 case 1:
2347 tiPR = pR;
2348 for (k=0; k<m->numRateCats; k++)
2349 {
2350 for (c=0; c<m->numChars; c++)
2351 {
2352 a = lState[c] + k*(preLikeJump+nStates);
2353 b = aState[c] + k*(preLikeJump+nStates);
2354 for (i=h=0; i<nStates; i++)
2355 {
2356 likeR = 0.0;
2357 for (j=0; j<nStates; j++)
2358 {
2359 likeR += tiPR[h++]*clR[j];
2360 }
2361 *(clP++) = preLikeL[a++] * preLikeA[b++] * likeR;
2362 }
2363 clR += nStates;
2364 }
2365 tiPR += nStatesSquared;
2366 }
2367 break;
2368 case 2:
2369 tiPL = pL;
2370 for (k=0; k<m->numRateCats; k++)
2371 {
2372 for (c=0; c<m->numChars; c++)
2373 {
2374 a = rState[c] + k*(preLikeJump+nStates);
2375 b = aState[c] + k*(preLikeJump+nStates);
2376 for (i=h=0; i<nStates; i++)
2377 {
2378 likeL = 0.0;
2379 for (j=0; j<nStates; j++)
2380 {
2381 likeL += tiPL[h++]*clL[j];
2382 }
2383 *(clP++) = preLikeR[a++] * preLikeA[b++] * likeL;
2384 }
2385 clL += nStates;
2386 }
2387 tiPL += nStatesSquared;
2388 }
2389 break;
2390 case 3:
2391 for (k=0; k<m->numRateCats; k++)
2392 {
2393 for (c=0; c<m->numChars; c++)
2394 {
2395 a = rState[c] + k*(preLikeJump+nStates);
2396 b = lState[c] + k*(preLikeJump+nStates);
2397 d = aState[c] + k*(preLikeJump+nStates);
2398 for (i=0; i<nStates; i++)
2399 {
2400 *(clP++) = preLikeR[a++] * preLikeL[b++] * preLikeA[d++];
2401 }
2402 }
2403 }
2404 break;
2405 }
2406
2407 return NO_ERROR;
2408 }
2409
2410
2411 #if defined (SSE_ENABLED)
2412 /*----------------------------------------------------------------
2413 |
2414 | CondLikeRoot_Gen_SSE:general n-state model with or without rate
2415 | variation
2416 |
2417 -----------------------------------------------------------------*/
CondLikeRoot_Gen_SSE(TreeNode * p,int division,int chain)2418 int CondLikeRoot_Gen_SSE (TreeNode *p, int division, int chain)
2419 {
2420 int c, c1, t, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL, nObsStates, preLikeJump,
2421 nStates, nStatesSquared;
2422 CLFlt *pL, *pR, *pA,
2423 *tiPL, *tiPR, *tiPA;
2424 __m128 *clL, *clR, *clP, *clA;
2425 __m128 mTiPL, mTiPR, mTiPA, mL, mR, mA, mAcumL, mAcumR, mAcumA;
2426 ModelInfo *m;
2427 CLFlt *preLikeRV[4] = {0};
2428 CLFlt *preLikeLV[4] = {0};
2429 CLFlt *preLikeAV[4] = {0};
2430
2431 # if !defined (DEBUG_NOSHORTCUTS)
2432 int a, b, catStart;
2433 # endif
2434
2435 /* find model settings for this division and nStates, nStatesSquared */
2436 m = &modelSettings[division];
2437 nObsStates = m->numStates;
2438 nStates = m->numModelStates;
2439 nStatesSquared = nStates * nStates;
2440 preLikeJump = nObsStates * nStates;
2441
2442 /* flip state of node so that we are not overwriting old cond likes */
2443 FlipCondLikeSpace (m, chain, p->index);
2444
2445 /* find conditional likelihood pointers */
2446 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2447 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
2448 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
2449 clA = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2450
2451 /* find transition probabilities (or calculate instead) */
2452 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2453 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2454 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2455
2456 /* find likelihoods of site patterns for left branch if terminal */
2457 shortCut = 0;
2458 # if !defined (DEBUG_NOSHORTCUTS)
2459 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2460 {
2461 shortCut |= 1;
2462 lState = m->termState[p->left->index];
2463 tiPL = pL;
2464 for (k=a=0; k<m->numRateCats; k++)
2465 {
2466 catStart = a;
2467 for (i=0; i<nObsStates; i++)
2468 for (j=i; j<nStatesSquared; j+=nStates)
2469 preLikeL[a++] = tiPL[j];
2470 for (b=1; b<nStates/nObsStates; b++)
2471 {
2472 a = catStart;
2473 for (i=0; i<nObsStates; i++)
2474 {
2475 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2476 preLikeL[a++] += tiPL[j];
2477 }
2478 }
2479 /* for ambiguous */
2480 for (i=0; i<nStates; i++)
2481 preLikeL[a++] = 1.0;
2482 tiPL += nStatesSquared;
2483 }
2484 }
2485
2486 /* find likelihoods of site patterns for right branch if terminal */
2487 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2488 {
2489 shortCut |= 2;
2490 rState = m->termState[p->right->index];
2491 tiPR = pR;
2492 for (k=a=0; k<m->numRateCats; k++)
2493 {
2494 catStart = a;
2495 for (i=0; i<nObsStates; i++)
2496 for (j=i; j<nStatesSquared; j+=nStates)
2497 preLikeR[a++] = tiPR[j];
2498 for (b=1; b<nStates/nObsStates; b++)
2499 {
2500 a = catStart;
2501 for (i=0; i<nObsStates; i++)
2502 {
2503 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2504 preLikeR[a++] += tiPR[j];
2505 }
2506 }
2507 /* for ambiguous */
2508 for (i=0; i<nStates; i++)
2509 preLikeR[a++] = 1.0;
2510 tiPR += nStatesSquared;
2511 }
2512 }
2513
2514 /* find likelihoods of site patterns for anc branch, always terminal */
2515 if (m->isPartAmbig[p->anc->index] == YES)
2516 {
2517 shortCut = 4;
2518 }
2519 else
2520 {
2521 aState = m->termState[p->anc->index];
2522 tiPA = pA;
2523 for (k=a=0; k<m->numRateCats; k++)
2524 {
2525 catStart = a;
2526 for (i=0; i<nObsStates; i++)
2527 for (j=i; j<nStatesSquared; j+=nStates)
2528 preLikeA[a++] = tiPA[j];
2529 for (b=1; b<nStates/nObsStates; b++)
2530 {
2531 a = catStart;
2532 for (i=0; i<nObsStates; i++)
2533 {
2534 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2535 preLikeA[a++] += tiPA[j];
2536 }
2537 }
2538 /* for ambiguous */
2539 for (i=0; i<nStates; i++)
2540 preLikeA[a++] = 1.0;
2541 tiPA += nStatesSquared;
2542 }
2543 }
2544 # else
2545 shortCut = 4;
2546 # endif
2547
2548 switch (shortCut)
2549 {
2550 case 4:
2551 tiPL = pL;
2552 tiPR = pR;
2553 tiPA = pA;
2554 for (k=0; k<m->numRateCats; k++)
2555 {
2556 for (c=0; c<m->numVecChars; c++)
2557 {
2558 for (i=h=0; i<nStates; i++)
2559 {
2560 mAcumL = _mm_setzero_ps();
2561 mAcumR = _mm_setzero_ps();
2562 mAcumA = _mm_setzero_ps();
2563 for (j=0; j<nStates; j++)
2564 {
2565 mTiPL = _mm_load1_ps (&tiPL[h]);
2566 mTiPR = _mm_load1_ps (&tiPR[h]);
2567 mTiPA = _mm_load1_ps (&tiPA[h++]);
2568 mL = _mm_mul_ps (mTiPL, clL[j]);
2569 mR = _mm_mul_ps (mTiPR, clR[j]);
2570 mA = _mm_mul_ps (mTiPA, clA[j]);
2571 mAcumL = _mm_add_ps (mL, mAcumL);
2572 mAcumR = _mm_add_ps (mR, mAcumR);
2573 mAcumA = _mm_add_ps (mA, mAcumA);
2574 }
2575 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2576 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2577 }
2578 clL += nStates;
2579 clR += nStates;
2580 clA += nStates;
2581 }
2582 tiPL += nStatesSquared;
2583 tiPR += nStatesSquared;
2584 tiPA += nStatesSquared;
2585 }
2586 break;
2587 case 0:
2588 tiPL =pL;
2589 tiPR =pR;
2590 for (k=0; k<m->numRateCats; k++)
2591 {
2592 for (c=t=0; c<m->numVecChars; c++)
2593 {
2594 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
2595 {
2596 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2597 }
2598 for (i=h=0; i<nStates; i++)
2599 {
2600 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2601 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2602 mAcumL = _mm_setzero_ps();
2603 mAcumR = _mm_setzero_ps();
2604 for (j=0; j<nStates; j++)
2605 {
2606 mTiPL = _mm_load1_ps (&tiPL[h]);
2607 mL = _mm_mul_ps (mTiPL, clL[j]);
2608 mAcumL = _mm_add_ps (mL, mAcumL);
2609 mTiPR = _mm_load1_ps (&tiPR[h++]);
2610 mR = _mm_mul_ps (mTiPR, clR[j]);
2611 mAcumR = _mm_add_ps (mR, mAcumR);
2612 }
2613 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2614 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2615 }
2616 clR += nStates;
2617 clL += nStates;
2618 }
2619 tiPL += nStatesSquared;
2620 tiPR += nStatesSquared;
2621 }
2622 break;
2623 case 1:
2624 tiPR = pR;
2625 for (k=0; k<m->numRateCats; k++)
2626 {
2627 for (c=t=0; c<m->numVecChars; c++)
2628 {
2629 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
2630 {
2631 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
2632 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2633 }
2634 for (i=h=0; i<nStates; i++)
2635 {
2636 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2637 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
2638 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2639 mAcumR = _mm_setzero_ps();
2640 for (j=0; j<nStates; j++)
2641 {
2642 mTiPR = _mm_load1_ps (&tiPR[h++]);
2643 mR = _mm_mul_ps (mTiPR, clR[j]);
2644 mAcumR = _mm_add_ps (mR, mAcumR);
2645 }
2646 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2647 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2648 }
2649 clR += nStates;
2650 }
2651 tiPR += nStatesSquared;
2652 }
2653 break;
2654 case 2:
2655 tiPL = pL;
2656 for (k=0; k<m->numRateCats; k++)
2657 {
2658 for (c=t=0; c<m->numVecChars; c++)
2659 {
2660 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
2661 {
2662 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
2663 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2664 }
2665 for (i=h=0; i<nStates; i++)
2666 {
2667 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2668 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
2669 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2670 mAcumL = _mm_setzero_ps();
2671 for (j=0; j<nStates; j++)
2672 {
2673 mTiPL = _mm_load1_ps (&tiPL[h++]);
2674 mL = _mm_mul_ps (mTiPL, clL[j]);
2675 mAcumL = _mm_add_ps (mL, mAcumL);
2676 }
2677 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2678 *(clP++) = _mm_mul_ps (mAcumL,mAcumA);
2679 }
2680 clL += nStates;
2681 }
2682 tiPL += nStatesSquared;
2683 }
2684 break;
2685 case 3:
2686 for (k=0; k<m->numRateCats; k++)
2687 {
2688 for (c=t=0; c<m->numVecChars; c++)
2689 {
2690 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
2691 {
2692 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
2693 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
2694 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2695 }
2696 for (i=0; i<nStates; i++)
2697 {
2698 assert (m->numFloatsPerVec == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
2699 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
2700 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
2701 mA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2702 mL = _mm_mul_ps (mL,mR);
2703 *(clP++) = _mm_mul_ps (mL,mA);
2704 }
2705 }
2706 }
2707 break;
2708 }
2709
2710 return NO_ERROR;
2711 }
2712 #endif
2713
2714
2715 /*----------------------------------------------------------------
2716 |
2717 | CondLikeRoot_Gen_GibbsGamma: general n-state model with rate
2718 | variation modeled using a discrete gamma distribution with
2719 | Gibbs resampling of rate categories
2720 |
2721 -----------------------------------------------------------------*/
CondLikeRoot_Gen_GibbsGamma(TreeNode * p,int division,int chain)2722 int CondLikeRoot_Gen_GibbsGamma (TreeNode *p, int division, int chain)
2723 {
2724 int a, b, c, i, j, r, *rateCat, shortCut, *lState=NULL,
2725 *rState=NULL, *aState=NULL, nObsStates, nStates,
2726 nStatesSquared, nRateCats;
2727 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
2728 *tiPL, *tiPR, *tiPA;
2729 ModelInfo *m;
2730 # if !defined (DEBUG_NOSHORTCUTS)
2731 int k, catStart;
2732 #endif
2733
2734 /* find model settings for this division and nStates, nStatesSquared */
2735 m = &modelSettings[division];
2736 nObsStates = m->numStates;
2737 nStates = m->numModelStates;
2738 nStatesSquared = nStates * nStates;
2739
2740 /* flip conditional likelihood space */
2741 FlipCondLikeSpace (m, chain, p->index);
2742
2743 /* find conditional likelihood pointers */
2744 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2745 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2746 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2747 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2748
2749 /* find transition probabilities (or calculate instead) */
2750 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2751 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2752 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2753
2754 /* find rate category index and number of rate categories */
2755 rateCat = m->tiIndex + chain * m->numChars;
2756 nRateCats = m->numRateCats;
2757
2758 /* find likelihoods of site patterns for left branch if terminal */
2759 shortCut = 0;
2760 # if !defined (DEBUG_NOSHORTCUTS)
2761 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2762 {
2763 shortCut |= 1;
2764 lState = m->termState[p->left->index];
2765 tiPL = pL;
2766 for (k=a=0; k<nRateCats; k++)
2767 {
2768 catStart = a;
2769 for (i=0; i<nObsStates; i++)
2770 for (j=i; j<nStatesSquared; j+=nStates)
2771 preLikeL[a++] = tiPL[j];
2772 for (b=1; b<nStates/nObsStates; b++)
2773 {
2774 a = catStart;
2775 for (i=0; i<nObsStates; i++)
2776 {
2777 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2778 preLikeL[a++] += tiPL[j];
2779 }
2780 }
2781 /* for ambiguous */
2782 for (i=0; i<nStates; i++)
2783 preLikeL[a++] = 1.0;
2784 tiPL += nStatesSquared;
2785 }
2786 }
2787
2788 /* find likelihoods of site patterns for right branch if terminal */
2789 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2790 {
2791 shortCut |= 2;
2792 rState = m->termState[p->right->index];
2793 tiPR = pR;
2794 for (k=a=0; k<nRateCats; k++)
2795 {
2796 catStart = a;
2797 for (i=0; i<nObsStates; i++)
2798 for (j=i; j<nStatesSquared; j+=nStates)
2799 preLikeR[a++] = tiPR[j];
2800 for (b=1; b<nStates/nObsStates; b++)
2801 {
2802 a = catStart;
2803 for (i=0; i<nObsStates; i++)
2804 {
2805 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2806 preLikeR[a++] += tiPR[j];
2807 }
2808 }
2809 /* for ambiguous */
2810 for (i=0; i<nStates; i++)
2811 preLikeR[a++] = 1.0;
2812 tiPR += nStatesSquared;
2813 }
2814 }
2815
2816 /* find likelihoods of site patterns for anc branch, always terminal */
2817 if (m->isPartAmbig[p->anc->index] == YES)
2818 {
2819 shortCut = 4;
2820 }
2821 else
2822 {
2823 aState = m->termState[p->anc->index];
2824 tiPA = pA;
2825 for (k=a=0; k<nRateCats; k++)
2826 {
2827 catStart = a;
2828 for (i=0; i<nObsStates; i++)
2829 for (j=i; j<nStatesSquared; j+=nStates)
2830 preLikeA[a++] = tiPA[j];
2831 for (b=1; b<nStates/nObsStates; b++)
2832 {
2833 a = catStart;
2834 for (i=0; i<nObsStates; i++)
2835 {
2836 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2837 preLikeA[a++] += tiPA[j];
2838 }
2839 }
2840 /* for ambiguous */
2841 for (i=0; i<nStates; i++)
2842 preLikeA[a++] = 1.0;
2843 tiPA += nStatesSquared;
2844 }
2845 }
2846 # else
2847 shortCut = 4;
2848 # endif
2849
2850 switch (shortCut)
2851 {
2852 case 4:
2853 for (c=0; c<m->numChars; c++)
2854 {
2855 r = (*rateCat++);
2856 if (r < nRateCats)
2857 {
2858 tiPL = pL + r*nStatesSquared;
2859 tiPR = pR + r*nStatesSquared;
2860 tiPA = pA + r*nStatesSquared;
2861 for (i=0; i<nStates; i++)
2862 {
2863 likeL = likeR = likeA = 0.0;
2864 for (j=0; j<nStates; j++)
2865 {
2866 likeL += (*tiPL++) * clL[j];
2867 likeR += (*tiPR++) * clR[j];
2868 likeA += (*tiPA++) * clA[j];
2869 }
2870 *(clP++) = likeL * likeR * likeA;
2871 }
2872 }
2873 else
2874 clP += nStates;
2875 clL += nStates;
2876 clR += nStates;
2877 clA += nStates;
2878 }
2879 break;
2880 case 0:
2881 case 3:
2882 for (c=0; c<m->numChars; c++)
2883 {
2884 r = (*rateCat++);
2885 if (r < nRateCats)
2886 {
2887 tiPL = pL + r*nStatesSquared;
2888 tiPR = pR + r*nStatesSquared;
2889 a = aState[c] + r*(nStatesSquared+nStates);
2890 for (i=0; i<nStates; i++)
2891 {
2892 likeL = likeR = 0.0;
2893 for (j=0; j<nStates; j++)
2894 {
2895 likeL += (*tiPL++) * clL[j];
2896 likeR += (*tiPR++) * clR[j];
2897 }
2898 *(clP++) = likeL * likeR * preLikeA[a++];
2899 }
2900 }
2901 else
2902 clP += nStates;
2903 clL += nStates;
2904 clR += nStates;
2905 }
2906 break;
2907 case 1:
2908 for (c=0; c<m->numChars; c++)
2909 {
2910 r = (*rateCat++);
2911 if (r < nRateCats)
2912 {
2913 tiPR = pR + r*nStatesSquared;
2914 a = lState[c] + r*(nStatesSquared+nStates);
2915 b = aState[c] + r*(nStatesSquared+nStates);
2916 for (i=0; i<nStates; i++)
2917 {
2918 likeR = 0.0;
2919 for (j=0; j<nStates; j++)
2920 {
2921 likeR += (*tiPR++) * clR[j];
2922 }
2923 *(clP++) = preLikeL[a++] * likeR * preLikeA[b++];
2924 }
2925 }
2926 else
2927 clP += nStates;
2928 clR += nStates;
2929 }
2930 break;
2931 case 2:
2932 for (c=0; c<m->numChars; c++)
2933 {
2934 r = (*rateCat++);
2935 if (r < nRateCats)
2936 {
2937 tiPL = pL + r*nStatesSquared;
2938 a = rState[c] + r*(nStatesSquared+nStates);
2939 b = aState[c] + r*(nStatesSquared+nStates);
2940 for (i=0; i<nStates; i++)
2941 {
2942 likeL = 0.0;
2943 for (j=0; j<nStates; j++)
2944 {
2945 likeL += (*tiPL++) * clL[j];
2946 }
2947 *(clP++) = likeL * preLikeR[a++] * preLikeA[b++];
2948 }
2949 }
2950 else
2951 clP += nStates;
2952 clL += nStates;
2953 }
2954 break;
2955 }
2956
2957 return NO_ERROR;
2958 }
2959
2960
2961 /*----------------------------------------------------------------
2962 |
2963 | CondLikeRoot_NUC4: 4by4 nucleotide model with or without rate
2964 | variation
2965 |
2966 -----------------------------------------------------------------*/
CondLikeRoot_NUC4(TreeNode * p,int division,int chain)2967 int CondLikeRoot_NUC4 (TreeNode *p, int division, int chain)
2968 {
2969 int a, c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL;
2970 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
2971 ModelInfo *m;
2972
2973 m = &modelSettings[division];
2974
2975 /* flip state of node so that we are not overwriting old cond likes */
2976 FlipCondLikeSpace (m, chain, p->index);
2977
2978 /* find conditional likelihood pointers */
2979 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2980 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2981 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2982 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2983
2984 /* find transition probabilities (or calculate instead) */
2985 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2986 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2987 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2988
2989 /* find likelihoods of site patterns for left branch if terminal */
2990 shortCut = 0;
2991 # if !defined (DEBUG_NOSHORTCUTS)
2992 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2993 {
2994 shortCut |= 1;
2995 lState = m->termState[p->left->index];
2996 tiPL = pL;
2997 for (k=j=0; k<m->numRateCats; k++)
2998 {
2999 for (i=0; i<4; i++)
3000 {
3001 preLikeL[j++] = tiPL[0];
3002 preLikeL[j++] = tiPL[4];
3003 preLikeL[j++] = tiPL[8];
3004 preLikeL[j++] = tiPL[12];
3005 tiPL++;
3006 }
3007 /* for ambiguous */
3008 for (i=0; i<4; i++)
3009 preLikeL[j++] = 1.0;
3010 tiPL += 12;
3011 }
3012 }
3013
3014 /* find likelihoods of site patterns for right branch if terminal */
3015 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
3016 {
3017 shortCut |= 2;
3018 rState = m->termState[p->right->index];
3019 tiPR = pR;
3020 for (k=j=0; k<m->numRateCats; k++)
3021 {
3022 for (i=0; i<4; i++)
3023 {
3024 preLikeR[j++] = tiPR[0];
3025 preLikeR[j++] = tiPR[4];
3026 preLikeR[j++] = tiPR[8];
3027 preLikeR[j++] = tiPR[12];
3028 tiPR++;
3029 }
3030 /* for ambiguous */
3031 for (i=0; i<4; i++)
3032 preLikeR[j++] = 1.0;
3033 tiPR += 12;
3034 }
3035 }
3036
3037 /* find likelihoods of site patterns for anc branch, always terminal */
3038 if (m->isPartAmbig[p->anc->index] == YES)
3039 {
3040 shortCut = 4;
3041 }
3042 else
3043 {
3044 aState = m->termState[p->anc->index];
3045 tiPA = pA;
3046 for (k=j=0; k<m->numRateCats; k++)
3047 {
3048 for (i=0; i<4; i++)
3049 {
3050 preLikeA[j++] = tiPA[0];
3051 preLikeA[j++] = tiPA[4];
3052 preLikeA[j++] = tiPA[8];
3053 preLikeA[j++] = tiPA[12];
3054 tiPA++;
3055 }
3056 /* for ambiguous */
3057 for (i=0; i<4; i++)
3058 preLikeA[j++] = 1.0;
3059 tiPA += 12;
3060 }
3061 }
3062 # else
3063 shortCut = 4;
3064 # endif
3065
3066 switch (shortCut)
3067 {
3068 case 4:
3069 tiPL = pL;
3070 tiPR = pR;
3071 tiPA = pA;
3072 for (k=h=0; k<m->numRateCats; k++)
3073 {
3074 for (c=0; c<m->numChars; c++)
3075 {
3076 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3077 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3078 *(tiPA[AA]*clA[A] + tiPA[AC]*clA[C] + tiPA[AG]*clA[G] + tiPA[AT]*clA[T]);
3079 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3080 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3081 *(tiPA[CA]*clA[A] + tiPA[CC]*clA[C] + tiPA[CG]*clA[G] + tiPA[CT]*clA[T]);
3082 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3083 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3084 *(tiPA[GA]*clA[A] + tiPA[GC]*clA[C] + tiPA[GG]*clA[G] + tiPA[GT]*clA[T]);
3085 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3086 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3087 *(tiPA[TA]*clA[A] + tiPA[TC]*clA[C] + tiPA[TG]*clA[G] + tiPA[TT]*clA[T]);
3088 clL += 4;
3089 clR += 4;
3090 clA += 4;
3091 }
3092 tiPL += 16;
3093 tiPR += 16;
3094 tiPA += 16;
3095 }
3096 break;
3097
3098 case 0:
3099 tiPL = pL;
3100 tiPR = pR;
3101 for (k=h=0; k<m->numRateCats; k++)
3102 {
3103 for (c=0; c<m->numChars; c++)
3104 {
3105 i = aState[c] + k*20;
3106 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3107 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3108 *preLikeA[i++];
3109 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3110 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3111 *preLikeA[i++];
3112 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3113 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3114 *preLikeA[i++];
3115 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3116 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3117 *preLikeA[i++];
3118 clL += 4;
3119 clR += 4;
3120 }
3121 tiPL += 16;
3122 tiPR += 16;
3123 }
3124 break;
3125
3126 case 1:
3127 tiPR = pR;
3128 for (k=h=0; k<m->numRateCats; k++)
3129 {
3130 for (c=0; c<m->numChars; c++)
3131 {
3132 i = lState[c] + k*20;
3133 j = aState[c] + k*20;
3134 clP[h++] = (tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3135 *preLikeL[i++]*preLikeA[j++];
3136 clP[h++] = (tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3137 *preLikeL[i++]*preLikeA[j++];
3138 clP[h++] = (tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3139 *preLikeL[i++]*preLikeA[j++];
3140 clP[h++] = (tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3141 *preLikeL[i++]*preLikeA[j++];
3142 clR += 4;
3143 }
3144 tiPR += 16;
3145 }
3146 break;
3147
3148 case 2:
3149 tiPL = pL;
3150 for (k=h=0; k<m->numRateCats; k++)
3151 {
3152 for (c=0; c<m->numChars; c++)
3153 {
3154 i = rState[c] + k*20;
3155 j = aState[c] + k*20;
3156 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3157 *preLikeR[i++]*preLikeA[j++];
3158 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3159 *preLikeR[i++]*preLikeA[j++];
3160 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3161 *preLikeR[i++]*preLikeA[j++];
3162 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3163 *preLikeR[i++]*preLikeA[j++];
3164 clL += 4;
3165 }
3166 tiPL += 16;
3167 }
3168 break;
3169
3170 case 3:
3171 for (k=h=0; k<m->numRateCats; k++)
3172 {
3173 for (c=0; c<m->numChars; c++)
3174 {
3175 a = lState[c] + k*20;
3176 i = rState[c] + k*20;
3177 j = aState[c] + k*20;
3178 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
3179 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
3180 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
3181 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
3182 }
3183 }
3184 break;
3185 }
3186
3187 return NO_ERROR;
3188 }
3189
3190
3191 /*----------------------------------------------------------------
3192 |
3193 | CondLikeRoot_NUC4_GibbsGamma: 4by4 nucleotide model with rate
3194 | variation approimated by Gibbs sampling from gamma
3195 |
3196 -----------------------------------------------------------------*/
CondLikeRoot_NUC4_GibbsGamma(TreeNode * p,int division,int chain)3197 int CondLikeRoot_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
3198 {
3199 int c, h, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
3200 nRateCats;
3201 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
3202 ModelInfo *m;
3203 # if !defined (DEBUG_NOSHORTCUTS)
3204 int k;
3205 # endif
3206
3207 m = &modelSettings[division];
3208
3209 /* flip conditional likelihood space */
3210 FlipCondLikeSpace (m, chain, p->index);
3211
3212 /* find conditional likelihood pointers */
3213 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3214 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
3215 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
3216 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3217
3218 /* find transition probabilities (or calculate instead) */
3219 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3220 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3221 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3222
3223 /* find rate category index and number of gamma categories */
3224 rateCat = m->tiIndex + chain * m->numChars;
3225 nRateCats = m->numRateCats;
3226
3227 /* find likelihoods of site patterns for left branch if terminal */
3228 shortCut = 0;
3229 # if !defined (DEBUG_NOSHORTCUTS)
3230 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
3231 {
3232 shortCut |= 1;
3233 lState = m->termState[p->left->index];
3234 tiPL = pL;
3235 for (k=j=0; k<nRateCats; k++)
3236 {
3237 for (i=0; i<4; i++)
3238 {
3239 preLikeL[j++] = tiPL[0];
3240 preLikeL[j++] = tiPL[4];
3241 preLikeL[j++] = tiPL[8];
3242 preLikeL[j++] = tiPL[12];
3243 tiPL++;
3244 }
3245 /* for ambiguous */
3246 for (i=0; i<4; i++)
3247 preLikeL[j++] = 1.0;
3248 tiPL += 12;
3249 }
3250 }
3251
3252 /* find likelihoods of site patterns for right branch if terminal */
3253 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
3254 {
3255 shortCut |= 2;
3256 rState = m->termState[p->right->index];
3257 tiPR = pR;
3258 for (k=j=0; k<nRateCats; k++)
3259 {
3260 for (i=0; i<4; i++)
3261 {
3262 preLikeR[j++] = tiPR[0];
3263 preLikeR[j++] = tiPR[4];
3264 preLikeR[j++] = tiPR[8];
3265 preLikeR[j++] = tiPR[12];
3266 tiPR++;
3267 }
3268 /* for ambiguous */
3269 for (i=0; i<4; i++)
3270 preLikeR[j++] = 1.0;
3271 tiPR += 12;
3272 }
3273 }
3274
3275 /* find likelihoods of site patterns for anc branch, always terminal */
3276 if (m->isPartAmbig[p->anc->index] == YES)
3277 {
3278 shortCut = 4;
3279 }
3280 else
3281 {
3282 aState = m->termState[p->anc->index];
3283 tiPA = pA;
3284 for (k=j=0; k<nRateCats; k++)
3285 {
3286 for (i=0; i<4; i++)
3287 {
3288 preLikeA[j++] = tiPA[0];
3289 preLikeA[j++] = tiPA[4];
3290 preLikeA[j++] = tiPA[8];
3291 preLikeA[j++] = tiPA[12];
3292 tiPA++;
3293 }
3294 /* for ambiguous */
3295 for (i=0; i<4; i++)
3296 preLikeA[j++] = 1.0;
3297 tiPA += 12;
3298 }
3299 }
3300 # else
3301 shortCut = 4;
3302 # endif
3303
3304 switch (shortCut)
3305 {
3306 case 4:
3307 for (c=h=0; c<m->numChars; c++)
3308 {
3309 r = rateCat[c];
3310 if (r < nRateCats)
3311 {
3312 tiPL = pL + r * 16;
3313 tiPR = pR + r * 16;
3314 tiPA = pA + r * 16;
3315 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3316 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3317 *(tiPA[AA]*clA[A] + tiPA[AC]*clA[C] + tiPA[AG]*clA[G] + tiPA[AT]*clA[T]);
3318 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3319 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3320 *(tiPA[CA]*clA[A] + tiPA[CC]*clA[C] + tiPA[CG]*clA[G] + tiPA[CT]*clA[T]);
3321 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3322 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3323 *(tiPA[GA]*clA[A] + tiPA[GC]*clA[C] + tiPA[GG]*clA[G] + tiPA[GT]*clA[T]);
3324 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3325 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3326 *(tiPA[TA]*clA[A] + tiPA[TC]*clA[C] + tiPA[TG]*clA[G] + tiPA[TT]*clA[T]);
3327 }
3328 else
3329 h += 4;
3330 clL += 4;
3331 clR += 4;
3332 clA += 4;
3333 }
3334 break;
3335
3336 case 0:
3337 case 3:
3338 for (c=h=0; c<m->numChars; c++)
3339 {
3340 r = rateCat[c];
3341 if (r < nRateCats)
3342 {
3343 tiPL = pL + r * 16;
3344 tiPR = pR + r * 16;
3345 i = aState[c] + r * 20;
3346 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3347 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3348 *preLikeA[i++];
3349 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3350 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3351 *preLikeA[i++];
3352 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3353 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3354 *preLikeA[i++];
3355 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3356 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3357 *preLikeA[i++];
3358 }
3359 else
3360 h += 4;
3361 clL += 4;
3362 clR += 4;
3363 }
3364 break;
3365
3366 case 1:
3367 for (c=h=0; c<m->numChars; c++)
3368 {
3369 r = rateCat[c];
3370 if (r < nRateCats)
3371 {
3372 tiPR = pR + r * 16;
3373 i = lState[c] + r * 20;
3374 j = aState[c] + r * 20;
3375 clP[h++] = (tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3376 *preLikeL[i++]*preLikeA[j++];
3377 clP[h++] = (tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3378 *preLikeL[i++]*preLikeA[j++];
3379 clP[h++] = (tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3380 *preLikeL[i++]*preLikeA[j++];
3381 clP[h++] = (tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3382 *preLikeL[i++]*preLikeA[j++];
3383 }
3384 else
3385 h += 4;
3386 clR += 4;
3387 }
3388 break;
3389
3390 case 2:
3391 for (c=h=0; c<m->numChars; c++)
3392 {
3393 r = rateCat[c];
3394 if (r < nRateCats)
3395 {
3396 tiPL = pL + r * 16;
3397 i = rState[c] + r * 20;
3398 j = aState[c] + r * 20;
3399 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3400 *preLikeR[i++]*preLikeA[j++];
3401 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3402 *preLikeR[i++]*preLikeA[j++];
3403 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3404 *preLikeR[i++]*preLikeA[j++];
3405 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3406 *preLikeR[i++]*preLikeA[j++];
3407 }
3408 else
3409 h += 4;
3410 clL += 4;
3411 }
3412 break;
3413 }
3414
3415 return NO_ERROR;
3416 }
3417
3418
3419 #if defined (FMA_ENABLED)
3420 /*----------------------------------------------------------------
3421 |
3422 | CondLikeRoot_NUC4_FMA: 4by4 nucleotide model with or without rate
3423 | variation using AVX + FMA instructions
3424 |
3425 -----------------------------------------------------------------*/
CondLikeRoot_NUC4_FMA(TreeNode * p,int division,int chain)3426 int CondLikeRoot_NUC4_FMA (TreeNode *p, int division, int chain)
3427 {
3428 int c, k;
3429 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
3430 __m256 *clL, *clR, *clP, *clA;
3431 __m256 m1, m2, m3, m4, m5, m6;
3432 ModelInfo *m;
3433
3434 m = &modelSettings[division];
3435
3436 /* flip state of node so that we are not overwriting old cond likes */
3437 FlipCondLikeSpace (m, chain, p->index);
3438
3439 /* find conditional likelihood pointers */
3440 clL = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3441 clR = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
3442 clP = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
3443 clA = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3444
3445 /* find transition probabilities */
3446 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3447 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3448 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3449
3450 tiPL = pL;
3451 tiPR = pR;
3452 tiPA = pA;
3453 for (k=0; k<m->numRateCats; k++)
3454 {
3455 for (c=0; c<m->numVecChars; c++)
3456 {
3457 m1 = _mm256_broadcast_ss (&tiPL[AA]);
3458 m2 = _mm256_broadcast_ss (&tiPR[AA]);
3459 m3 = _mm256_broadcast_ss (&tiPA[AA]);
3460 m4 = _mm256_mul_ps (m1, clL[A]);
3461 m5 = _mm256_mul_ps (m2, clR[A]);
3462 m6 = _mm256_mul_ps (m3, clA[A]);
3463
3464 m1 = _mm256_broadcast_ss (&tiPL[AC]);
3465 m2 = _mm256_broadcast_ss (&tiPR[AC]);
3466 m3 = _mm256_broadcast_ss (&tiPA[AC]);
3467 m4 = _mm256_fmadd_ps (m1, clL[C], m4);
3468 m5 = _mm256_fmadd_ps (m2, clR[C], m5);
3469 m6 = _mm256_fmadd_ps (m3, clA[C], m6);
3470
3471 m1 = _mm256_broadcast_ss (&tiPL[AG]);
3472 m2 = _mm256_broadcast_ss (&tiPR[AG]);
3473 m3 = _mm256_broadcast_ss (&tiPA[AG]);
3474 m4 = _mm256_fmadd_ps (m1, clL[G], m4);
3475 m5 = _mm256_fmadd_ps (m2, clR[G], m5);
3476 m6 = _mm256_fmadd_ps (m3, clA[G], m6);
3477
3478 m1 = _mm256_broadcast_ss (&tiPL[AT]);
3479 m2 = _mm256_broadcast_ss (&tiPR[AT]);
3480 m3 = _mm256_broadcast_ss (&tiPA[AT]);
3481 m4 = _mm256_fmadd_ps (m1, clL[T], m4);
3482 m5 = _mm256_fmadd_ps (m2, clR[T], m5);
3483 m6 = _mm256_fmadd_ps (m3, clA[T], m6);
3484
3485 m4 = _mm256_mul_ps (m4, m5);
3486 *clP++ = _mm256_mul_ps (m4, m6);
3487
3488 m1 = _mm256_broadcast_ss (&tiPL[CA]);
3489 m2 = _mm256_broadcast_ss (&tiPR[CA]);
3490 m3 = _mm256_broadcast_ss (&tiPA[CA]);
3491 m4 = _mm256_mul_ps (m1, clL[A]);
3492 m5 = _mm256_mul_ps (m2, clR[A]);
3493 m6 = _mm256_mul_ps (m3, clA[A]);
3494
3495 m1 = _mm256_broadcast_ss (&tiPL[CC]);
3496 m2 = _mm256_broadcast_ss (&tiPR[CC]);
3497 m3 = _mm256_broadcast_ss (&tiPA[CC]);
3498 m4 = _mm256_fmadd_ps (m1, clL[C], m4);
3499 m5 = _mm256_fmadd_ps (m2, clR[C], m5);
3500 m6 = _mm256_fmadd_ps (m3, clA[C], m6);
3501
3502 m1 = _mm256_broadcast_ss (&tiPL[CG]);
3503 m2 = _mm256_broadcast_ss (&tiPR[CG]);
3504 m3 = _mm256_broadcast_ss (&tiPA[CG]);
3505 m4 = _mm256_fmadd_ps (m1, clL[G], m4);
3506 m5 = _mm256_fmadd_ps (m2, clR[G], m5);
3507 m6 = _mm256_fmadd_ps (m3, clA[G], m6);
3508
3509 m1 = _mm256_broadcast_ss (&tiPL[CT]);
3510 m2 = _mm256_broadcast_ss (&tiPR[CT]);
3511 m3 = _mm256_broadcast_ss (&tiPA[CT]);
3512 m4 = _mm256_fmadd_ps (m1, clL[T], m4);
3513 m5 = _mm256_fmadd_ps (m2, clR[T], m5);
3514 m6 = _mm256_fmadd_ps (m3, clA[T], m6);
3515
3516 m4 = _mm256_mul_ps (m4, m5);
3517 *clP++ = _mm256_mul_ps (m4, m6);
3518
3519 m1 = _mm256_broadcast_ss (&tiPL[GA]);
3520 m2 = _mm256_broadcast_ss (&tiPR[GA]);
3521 m3 = _mm256_broadcast_ss (&tiPA[GA]);
3522 m4 = _mm256_mul_ps (m1, clL[A]);
3523 m5 = _mm256_mul_ps (m2, clR[A]);
3524 m6 = _mm256_mul_ps (m3, clA[A]);
3525
3526 m1 = _mm256_broadcast_ss (&tiPL[GC]);
3527 m2 = _mm256_broadcast_ss (&tiPR[GC]);
3528 m3 = _mm256_broadcast_ss (&tiPA[GC]);
3529 m4 = _mm256_fmadd_ps (m1, clL[C], m4);
3530 m5 = _mm256_fmadd_ps (m2, clR[C], m5);
3531 m6 = _mm256_fmadd_ps (m3, clA[C], m6);
3532
3533 m1 = _mm256_broadcast_ss (&tiPL[GG]);
3534 m2 = _mm256_broadcast_ss (&tiPR[GG]);
3535 m3 = _mm256_broadcast_ss (&tiPA[GG]);
3536 m4 = _mm256_fmadd_ps (m1, clL[G], m4);
3537 m5 = _mm256_fmadd_ps (m2, clR[G], m5);
3538 m6 = _mm256_fmadd_ps (m3, clA[G], m6);
3539
3540 m1 = _mm256_broadcast_ss (&tiPL[GT]);
3541 m2 = _mm256_broadcast_ss (&tiPR[GT]);
3542 m3 = _mm256_broadcast_ss (&tiPA[GT]);
3543 m4 = _mm256_fmadd_ps (m1, clL[T], m4);
3544 m5 = _mm256_fmadd_ps (m2, clR[T], m5);
3545 m6 = _mm256_fmadd_ps (m3, clA[T], m6);
3546
3547 m4 = _mm256_mul_ps (m4, m5);
3548 *clP++ = _mm256_mul_ps (m4, m6);
3549
3550 m1 = _mm256_broadcast_ss (&tiPL[TA]);
3551 m2 = _mm256_broadcast_ss (&tiPR[TA]);
3552 m3 = _mm256_broadcast_ss (&tiPA[TA]);
3553 m4 = _mm256_mul_ps (m1, clL[A]);
3554 m5 = _mm256_mul_ps (m2, clR[A]);
3555 m6 = _mm256_mul_ps (m3, clA[A]);
3556
3557 m1 = _mm256_broadcast_ss (&tiPL[TC]);
3558 m2 = _mm256_broadcast_ss (&tiPR[TC]);
3559 m3 = _mm256_broadcast_ss (&tiPA[TC]);
3560 m4 = _mm256_fmadd_ps (m1, clL[C], m4);
3561 m5 = _mm256_fmadd_ps (m2, clR[C], m5);
3562 m6 = _mm256_fmadd_ps (m3, clA[C], m6);
3563
3564 m1 = _mm256_broadcast_ss (&tiPL[TG]);
3565 m2 = _mm256_broadcast_ss (&tiPR[TG]);
3566 m3 = _mm256_broadcast_ss (&tiPA[TG]);
3567 m4 = _mm256_fmadd_ps (m1, clL[G], m4);
3568 m5 = _mm256_fmadd_ps (m2, clR[G], m5);
3569 m6 = _mm256_fmadd_ps (m3, clA[G], m6);
3570
3571 m1 = _mm256_broadcast_ss (&tiPL[TT]);
3572 m2 = _mm256_broadcast_ss (&tiPR[TT]);
3573 m3 = _mm256_broadcast_ss (&tiPA[TT]);
3574 m4 = _mm256_fmadd_ps (m1, clL[T], m4);
3575 m5 = _mm256_fmadd_ps (m2, clR[T], m5);
3576 m6 = _mm256_fmadd_ps (m3, clA[T], m6);
3577
3578 m4 = _mm256_mul_ps (m4, m5);
3579 *clP++ = _mm256_mul_ps (m4, m6);
3580
3581 clL += 4;
3582 clR += 4;
3583 clA += 4;
3584 }
3585 tiPL += 16;
3586 tiPR += 16;
3587 tiPA += 16;
3588 }
3589
3590 return NO_ERROR;
3591 }
3592 #endif
3593
3594
3595 #if defined (AVX_ENABLED)
3596 /*----------------------------------------------------------------
3597 |
3598 | CondLikeRoot_NUC4_AVX: 4by4 nucleotide model with or without rate
3599 | variation using AVX instructions
3600 |
3601 -----------------------------------------------------------------*/
CondLikeRoot_NUC4_AVX(TreeNode * p,int division,int chain)3602 int CondLikeRoot_NUC4_AVX (TreeNode *p, int division, int chain)
3603 {
3604 int c, k;
3605 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
3606 __m256 *clL, *clR, *clP, *clA;
3607 __m256 m1, m2, m3, m4, m5, m6, m7, m8, m9;
3608 ModelInfo *m;
3609
3610 m = &modelSettings[division];
3611
3612 /* flip state of node so that we are not overwriting old cond likes */
3613 FlipCondLikeSpace (m, chain, p->index);
3614
3615 /* find conditional likelihood pointers */
3616 clL = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3617 clR = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
3618 clP = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
3619 clA = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3620
3621 /* find transition probabilities */
3622 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3623 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3624 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3625
3626 tiPL = pL;
3627 tiPR = pR;
3628 tiPA = pA;
3629 for (k=0; k<m->numRateCats; k++)
3630 {
3631 for (c=0; c<m->numVecChars; c++)
3632 {
3633 m1 = _mm256_broadcast_ss (&tiPL[AA]);
3634 m2 = _mm256_broadcast_ss (&tiPR[AA]);
3635 m3 = _mm256_broadcast_ss (&tiPA[AA]);
3636 m7 = _mm256_mul_ps (m1, clL[A]);
3637 m8 = _mm256_mul_ps (m2, clR[A]);
3638 m9 = _mm256_mul_ps (m3, clA[A]);
3639
3640 m1 = _mm256_broadcast_ss (&tiPL[AC]);
3641 m2 = _mm256_broadcast_ss (&tiPR[AC]);
3642 m3 = _mm256_broadcast_ss (&tiPA[AC]);
3643 m4 = _mm256_mul_ps (m1, clL[C]);
3644 m5 = _mm256_mul_ps (m2, clR[C]);
3645 m6 = _mm256_mul_ps (m3, clA[C]);
3646 m7 = _mm256_add_ps (m4, m7);
3647 m8 = _mm256_add_ps (m5, m8);
3648 m9 = _mm256_add_ps (m6, m9);
3649
3650 m1 = _mm256_broadcast_ss (&tiPL[AG]);
3651 m2 = _mm256_broadcast_ss (&tiPR[AG]);
3652 m3 = _mm256_broadcast_ss (&tiPA[AG]);
3653 m4 = _mm256_mul_ps (m1, clL[G]);
3654 m5 = _mm256_mul_ps (m2, clR[G]);
3655 m6 = _mm256_mul_ps (m3, clA[G]);
3656 m7 = _mm256_add_ps (m4, m7);
3657 m8 = _mm256_add_ps (m5, m8);
3658 m9 = _mm256_add_ps (m6, m9);
3659
3660 m1 = _mm256_broadcast_ss (&tiPL[AT]);
3661 m2 = _mm256_broadcast_ss (&tiPR[AT]);
3662 m3 = _mm256_broadcast_ss (&tiPA[AT]);
3663 m4 = _mm256_mul_ps (m1, clL[T]);
3664 m5 = _mm256_mul_ps (m2, clR[T]);
3665 m6 = _mm256_mul_ps (m3, clA[T]);
3666 m7 = _mm256_add_ps (m4, m7);
3667 m8 = _mm256_add_ps (m5, m8);
3668 m9 = _mm256_add_ps (m6, m9);
3669
3670 m7 = _mm256_mul_ps (m7, m8);
3671 *clP++ = _mm256_mul_ps (m7, m9);
3672
3673 m1 = _mm256_broadcast_ss (&tiPL[CA]);
3674 m2 = _mm256_broadcast_ss (&tiPR[CA]);
3675 m3 = _mm256_broadcast_ss (&tiPA[CA]);
3676 m7 = _mm256_mul_ps (m1, clL[A]);
3677 m8 = _mm256_mul_ps (m2, clR[A]);
3678 m9 = _mm256_mul_ps (m3, clA[A]);
3679
3680 m1 = _mm256_broadcast_ss (&tiPL[CC]);
3681 m2 = _mm256_broadcast_ss (&tiPR[CC]);
3682 m3 = _mm256_broadcast_ss (&tiPA[CC]);
3683 m4 = _mm256_mul_ps (m1, clL[C]);
3684 m5 = _mm256_mul_ps (m2, clR[C]);
3685 m6 = _mm256_mul_ps (m3, clA[C]);
3686 m7 = _mm256_add_ps (m4, m7);
3687 m8 = _mm256_add_ps (m5, m8);
3688 m9 = _mm256_add_ps (m6, m9);
3689
3690 m1 = _mm256_broadcast_ss (&tiPL[CG]);
3691 m2 = _mm256_broadcast_ss (&tiPR[CG]);
3692 m3 = _mm256_broadcast_ss (&tiPA[CG]);
3693 m4 = _mm256_mul_ps (m1, clL[G]);
3694 m5 = _mm256_mul_ps (m2, clR[G]);
3695 m6 = _mm256_mul_ps (m3, clA[G]);
3696 m7 = _mm256_add_ps (m4, m7);
3697 m8 = _mm256_add_ps (m5, m8);
3698 m9 = _mm256_add_ps (m6, m9);
3699
3700 m1 = _mm256_broadcast_ss (&tiPL[CT]);
3701 m2 = _mm256_broadcast_ss (&tiPR[CT]);
3702 m3 = _mm256_broadcast_ss (&tiPA[CT]);
3703 m4 = _mm256_mul_ps (m1, clL[T]);
3704 m5 = _mm256_mul_ps (m2, clR[T]);
3705 m6 = _mm256_mul_ps (m3, clA[T]);
3706 m7 = _mm256_add_ps (m4, m7);
3707 m8 = _mm256_add_ps (m5, m8);
3708 m9 = _mm256_add_ps (m6, m9);
3709
3710 m7 = _mm256_mul_ps (m7, m8);
3711 *clP++ = _mm256_mul_ps (m7, m9);
3712
3713 m1 = _mm256_broadcast_ss (&tiPL[GA]);
3714 m2 = _mm256_broadcast_ss (&tiPR[GA]);
3715 m3 = _mm256_broadcast_ss (&tiPA[GA]);
3716 m7 = _mm256_mul_ps (m1, clL[A]);
3717 m8 = _mm256_mul_ps (m2, clR[A]);
3718 m9 = _mm256_mul_ps (m3, clA[A]);
3719
3720 m1 = _mm256_broadcast_ss (&tiPL[GC]);
3721 m2 = _mm256_broadcast_ss (&tiPR[GC]);
3722 m3 = _mm256_broadcast_ss (&tiPA[GC]);
3723 m4 = _mm256_mul_ps (m1, clL[C]);
3724 m5 = _mm256_mul_ps (m2, clR[C]);
3725 m6 = _mm256_mul_ps (m3, clA[C]);
3726 m7 = _mm256_add_ps (m4, m7);
3727 m8 = _mm256_add_ps (m5, m8);
3728 m9 = _mm256_add_ps (m6, m9);
3729
3730 m1 = _mm256_broadcast_ss (&tiPL[GG]);
3731 m2 = _mm256_broadcast_ss (&tiPR[GG]);
3732 m3 = _mm256_broadcast_ss (&tiPA[GG]);
3733 m4 = _mm256_mul_ps (m1, clL[G]);
3734 m5 = _mm256_mul_ps (m2, clR[G]);
3735 m6 = _mm256_mul_ps (m3, clA[G]);
3736 m7 = _mm256_add_ps (m4, m7);
3737 m8 = _mm256_add_ps (m5, m8);
3738 m9 = _mm256_add_ps (m6, m9);
3739
3740 m1 = _mm256_broadcast_ss (&tiPL[GT]);
3741 m2 = _mm256_broadcast_ss (&tiPR[GT]);
3742 m3 = _mm256_broadcast_ss (&tiPA[GT]);
3743 m4 = _mm256_mul_ps (m1, clL[T]);
3744 m5 = _mm256_mul_ps (m2, clR[T]);
3745 m6 = _mm256_mul_ps (m3, clA[T]);
3746 m7 = _mm256_add_ps (m4, m7);
3747 m8 = _mm256_add_ps (m5, m8);
3748 m9 = _mm256_add_ps (m6, m9);
3749
3750 m7 = _mm256_mul_ps (m7, m8);
3751 *clP++ = _mm256_mul_ps (m7, m9);
3752
3753 m1 = _mm256_broadcast_ss (&tiPL[TA]);
3754 m2 = _mm256_broadcast_ss (&tiPR[TA]);
3755 m3 = _mm256_broadcast_ss (&tiPA[TA]);
3756 m7 = _mm256_mul_ps (m1, clL[A]);
3757 m8 = _mm256_mul_ps (m2, clR[A]);
3758 m9 = _mm256_mul_ps (m3, clA[A]);
3759
3760 m1 = _mm256_broadcast_ss (&tiPL[TC]);
3761 m2 = _mm256_broadcast_ss (&tiPR[TC]);
3762 m3 = _mm256_broadcast_ss (&tiPA[TC]);
3763 m4 = _mm256_mul_ps (m1, clL[C]);
3764 m5 = _mm256_mul_ps (m2, clR[C]);
3765 m6 = _mm256_mul_ps (m3, clA[C]);
3766 m7 = _mm256_add_ps (m4, m7);
3767 m8 = _mm256_add_ps (m5, m8);
3768 m9 = _mm256_add_ps (m6, m9);
3769
3770 m1 = _mm256_broadcast_ss (&tiPL[TG]);
3771 m2 = _mm256_broadcast_ss (&tiPR[TG]);
3772 m3 = _mm256_broadcast_ss (&tiPA[TG]);
3773 m4 = _mm256_mul_ps (m1, clL[G]);
3774 m5 = _mm256_mul_ps (m2, clR[G]);
3775 m6 = _mm256_mul_ps (m3, clA[G]);
3776 m7 = _mm256_add_ps (m4, m7);
3777 m8 = _mm256_add_ps (m5, m8);
3778 m9 = _mm256_add_ps (m6, m9);
3779
3780 m1 = _mm256_broadcast_ss (&tiPL[TT]);
3781 m2 = _mm256_broadcast_ss (&tiPR[TT]);
3782 m3 = _mm256_broadcast_ss (&tiPA[TT]);
3783 m4 = _mm256_mul_ps (m1, clL[T]);
3784 m5 = _mm256_mul_ps (m2, clR[T]);
3785 m6 = _mm256_mul_ps (m3, clA[T]);
3786 m7 = _mm256_add_ps (m4, m7);
3787 m8 = _mm256_add_ps (m5, m8);
3788 m9 = _mm256_add_ps (m6, m9);
3789
3790 m7 = _mm256_mul_ps (m7, m8);
3791 *clP++ = _mm256_mul_ps (m7, m9);
3792
3793 clL += 4;
3794 clR += 4;
3795 clA += 4;
3796 }
3797 tiPL += 16;
3798 tiPR += 16;
3799 tiPA += 16;
3800 }
3801
3802 return NO_ERROR;
3803 }
3804 #endif
3805
3806
3807 #if defined (SSE_ENABLED)
3808 /*----------------------------------------------------------------
3809 |
3810 | CondLikeRoot_NUC4_SSE: 4by4 nucleotide model with or without rate
3811 | variation using SSE instructions
3812 |
3813 -----------------------------------------------------------------*/
CondLikeRoot_NUC4_SSE(TreeNode * p,int division,int chain)3814 int CondLikeRoot_NUC4_SSE (TreeNode *p, int division, int chain)
3815 {
3816 int c, k;
3817 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
3818 __m128 *clL, *clR, *clP, *clA;
3819 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9;
3820 ModelInfo *m;
3821
3822 m = &modelSettings[division];
3823
3824 /* flip state of node so that we are not overwriting old cond likes */
3825 FlipCondLikeSpace (m, chain, p->index);
3826
3827 /* find conditional likelihood pointers */
3828 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3829 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
3830 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
3831 clA = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3832
3833 /* find transition probabilities */
3834 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3835 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3836 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3837
3838 tiPL = pL;
3839 tiPR = pR;
3840 tiPA = pA;
3841 for (k=0; k<m->numRateCats; k++)
3842 {
3843 for (c=0; c<m->numVecChars; c++)
3844 {
3845 m1 = _mm_load1_ps (&tiPL[AA]);
3846 m2 = _mm_load1_ps (&tiPR[AA]);
3847 m3 = _mm_load1_ps (&tiPA[AA]);
3848 m7 = _mm_mul_ps (m1, clL[A]);
3849 m8 = _mm_mul_ps (m2, clR[A]);
3850 m9 = _mm_mul_ps (m3, clA[A]);
3851
3852 m1 = _mm_load1_ps (&tiPL[AC]);
3853 m2 = _mm_load1_ps (&tiPR[AC]);
3854 m3 = _mm_load1_ps (&tiPA[AC]);
3855 m4 = _mm_mul_ps (m1, clL[C]);
3856 m5 = _mm_mul_ps (m2, clR[C]);
3857 m6 = _mm_mul_ps (m3, clA[C]);
3858 m7 = _mm_add_ps (m4, m7);
3859 m8 = _mm_add_ps (m5, m8);
3860 m9 = _mm_add_ps (m6, m9);
3861
3862 m1 = _mm_load1_ps (&tiPL[AG]);
3863 m2 = _mm_load1_ps (&tiPR[AG]);
3864 m3 = _mm_load1_ps (&tiPA[AG]);
3865 m4 = _mm_mul_ps (m1, clL[G]);
3866 m5 = _mm_mul_ps (m2, clR[G]);
3867 m6 = _mm_mul_ps (m3, clA[G]);
3868 m7 = _mm_add_ps (m4, m7);
3869 m8 = _mm_add_ps (m5, m8);
3870 m9 = _mm_add_ps (m6, m9);
3871
3872 m1 = _mm_load1_ps (&tiPL[AT]);
3873 m2 = _mm_load1_ps (&tiPR[AT]);
3874 m3 = _mm_load1_ps (&tiPA[AT]);
3875 m4 = _mm_mul_ps (m1, clL[T]);
3876 m5 = _mm_mul_ps (m2, clR[T]);
3877 m6 = _mm_mul_ps (m3, clA[T]);
3878 m7 = _mm_add_ps (m4, m7);
3879 m8 = _mm_add_ps (m5, m8);
3880 m9 = _mm_add_ps (m6, m9);
3881
3882 m7 = _mm_mul_ps (m7, m8);
3883 *clP++ = _mm_mul_ps (m7, m9);
3884
3885 m1 = _mm_load1_ps (&tiPL[CA]);
3886 m2 = _mm_load1_ps (&tiPR[CA]);
3887 m3 = _mm_load1_ps (&tiPA[CA]);
3888 m7 = _mm_mul_ps (m1, clL[A]);
3889 m8 = _mm_mul_ps (m2, clR[A]);
3890 m9 = _mm_mul_ps (m3, clA[A]);
3891
3892 m1 = _mm_load1_ps (&tiPL[CC]);
3893 m2 = _mm_load1_ps (&tiPR[CC]);
3894 m3 = _mm_load1_ps (&tiPA[CC]);
3895 m4 = _mm_mul_ps (m1, clL[C]);
3896 m5 = _mm_mul_ps (m2, clR[C]);
3897 m6 = _mm_mul_ps (m3, clA[C]);
3898 m7 = _mm_add_ps (m4, m7);
3899 m8 = _mm_add_ps (m5, m8);
3900 m9 = _mm_add_ps (m6, m9);
3901
3902 m1 = _mm_load1_ps (&tiPL[CG]);
3903 m2 = _mm_load1_ps (&tiPR[CG]);
3904 m3 = _mm_load1_ps (&tiPA[CG]);
3905 m4 = _mm_mul_ps (m1, clL[G]);
3906 m5 = _mm_mul_ps (m2, clR[G]);
3907 m6 = _mm_mul_ps (m3, clA[G]);
3908 m7 = _mm_add_ps (m4, m7);
3909 m8 = _mm_add_ps (m5, m8);
3910 m9 = _mm_add_ps (m6, m9);
3911
3912 m1 = _mm_load1_ps (&tiPL[CT]);
3913 m2 = _mm_load1_ps (&tiPR[CT]);
3914 m3 = _mm_load1_ps (&tiPA[CT]);
3915 m4 = _mm_mul_ps (m1, clL[T]);
3916 m5 = _mm_mul_ps (m2, clR[T]);
3917 m6 = _mm_mul_ps (m3, clA[T]);
3918 m7 = _mm_add_ps (m4, m7);
3919 m8 = _mm_add_ps (m5, m8);
3920 m9 = _mm_add_ps (m6, m9);
3921
3922 m7 = _mm_mul_ps (m7, m8);
3923 *clP++ = _mm_mul_ps (m7, m9);
3924
3925 m1 = _mm_load1_ps (&tiPL[GA]);
3926 m2 = _mm_load1_ps (&tiPR[GA]);
3927 m3 = _mm_load1_ps (&tiPA[GA]);
3928 m7 = _mm_mul_ps (m1, clL[A]);
3929 m8 = _mm_mul_ps (m2, clR[A]);
3930 m9 = _mm_mul_ps (m3, clA[A]);
3931
3932 m1 = _mm_load1_ps (&tiPL[GC]);
3933 m2 = _mm_load1_ps (&tiPR[GC]);
3934 m3 = _mm_load1_ps (&tiPA[GC]);
3935 m4 = _mm_mul_ps (m1, clL[C]);
3936 m5 = _mm_mul_ps (m2, clR[C]);
3937 m6 = _mm_mul_ps (m3, clA[C]);
3938 m7 = _mm_add_ps (m4, m7);
3939 m8 = _mm_add_ps (m5, m8);
3940 m9 = _mm_add_ps (m6, m9);
3941
3942 m1 = _mm_load1_ps (&tiPL[GG]);
3943 m2 = _mm_load1_ps (&tiPR[GG]);
3944 m3 = _mm_load1_ps (&tiPA[GG]);
3945 m4 = _mm_mul_ps (m1, clL[G]);
3946 m5 = _mm_mul_ps (m2, clR[G]);
3947 m6 = _mm_mul_ps (m3, clA[G]);
3948 m7 = _mm_add_ps (m4, m7);
3949 m8 = _mm_add_ps (m5, m8);
3950 m9 = _mm_add_ps (m6, m9);
3951
3952 m1 = _mm_load1_ps (&tiPL[GT]);
3953 m2 = _mm_load1_ps (&tiPR[GT]);
3954 m3 = _mm_load1_ps (&tiPA[GT]);
3955 m4 = _mm_mul_ps (m1, clL[T]);
3956 m5 = _mm_mul_ps (m2, clR[T]);
3957 m6 = _mm_mul_ps (m3, clA[T]);
3958 m7 = _mm_add_ps (m4, m7);
3959 m8 = _mm_add_ps (m5, m8);
3960 m9 = _mm_add_ps (m6, m9);
3961
3962 m7 = _mm_mul_ps (m7, m8);
3963 *clP++ = _mm_mul_ps (m7, m9);
3964
3965 m1 = _mm_load1_ps (&tiPL[TA]);
3966 m2 = _mm_load1_ps (&tiPR[TA]);
3967 m3 = _mm_load1_ps (&tiPA[TA]);
3968 m7 = _mm_mul_ps (m1, clL[A]);
3969 m8 = _mm_mul_ps (m2, clR[A]);
3970 m9 = _mm_mul_ps (m3, clA[A]);
3971
3972 m1 = _mm_load1_ps (&tiPL[TC]);
3973 m2 = _mm_load1_ps (&tiPR[TC]);
3974 m3 = _mm_load1_ps (&tiPA[TC]);
3975 m4 = _mm_mul_ps (m1, clL[C]);
3976 m5 = _mm_mul_ps (m2, clR[C]);
3977 m6 = _mm_mul_ps (m3, clA[C]);
3978 m7 = _mm_add_ps (m4, m7);
3979 m8 = _mm_add_ps (m5, m8);
3980 m9 = _mm_add_ps (m6, m9);
3981
3982 m1 = _mm_load1_ps (&tiPL[TG]);
3983 m2 = _mm_load1_ps (&tiPR[TG]);
3984 m3 = _mm_load1_ps (&tiPA[TG]);
3985 m4 = _mm_mul_ps (m1, clL[G]);
3986 m5 = _mm_mul_ps (m2, clR[G]);
3987 m6 = _mm_mul_ps (m3, clA[G]);
3988 m7 = _mm_add_ps (m4, m7);
3989 m8 = _mm_add_ps (m5, m8);
3990 m9 = _mm_add_ps (m6, m9);
3991
3992 m1 = _mm_load1_ps (&tiPL[TT]);
3993 m2 = _mm_load1_ps (&tiPR[TT]);
3994 m3 = _mm_load1_ps (&tiPA[TT]);
3995 m4 = _mm_mul_ps (m1, clL[T]);
3996 m5 = _mm_mul_ps (m2, clR[T]);
3997 m6 = _mm_mul_ps (m3, clA[T]);
3998 m7 = _mm_add_ps (m4, m7);
3999 m8 = _mm_add_ps (m5, m8);
4000 m9 = _mm_add_ps (m6, m9);
4001
4002 m7 = _mm_mul_ps (m7, m8);
4003 *clP++ = _mm_mul_ps (m7, m9);
4004
4005 clL += 4;
4006 clR += 4;
4007 clA += 4;
4008 }
4009 tiPL += 16;
4010 tiPR += 16;
4011 tiPA += 16;
4012 }
4013
4014 return NO_ERROR;
4015 }
4016 #endif
4017
4018
4019 #if !defined (SSE_ENABLED) || 1
4020 /*----------------------------------------------------------------
4021 |
4022 | CondLikeRoot_NY98: codon model with omega variation
4023 |
4024 -----------------------------------------------------------------*/
CondLikeRoot_NY98(TreeNode * p,int division,int chain)4025 int CondLikeRoot_NY98 (TreeNode *p, int division, int chain)
4026 {
4027 int a, b, c, d, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
4028 nStates, nStatesSquared;
4029 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
4030 *tiPL, *tiPR, *tiPA;
4031 ModelInfo *m;
4032
4033 /* find model settings for this division and nStates, nStatesSquared */
4034 m = &modelSettings[division];
4035 nStates = m->numModelStates;
4036 nStatesSquared = nStates * nStates;
4037
4038 /* flip state of node so that we are not overwriting old cond likes */
4039 FlipCondLikeSpace (m, chain, p->index);
4040
4041 /* find conditional likelihood pointers */
4042 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
4043 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
4044 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4045 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
4046
4047 /* find transition probabilities (or calculate instead) */
4048 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
4049 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
4050 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
4051
4052 /* find likelihoods of site patterns for left branch if terminal */
4053 shortCut = 0;
4054 # if !defined (DEBUG_NOSHORTCUTS)
4055 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
4056 {
4057 shortCut |= 1;
4058 lState = m->termState[p->left->index];
4059 tiPL = pL;
4060 for (k=a=0; k<m->numOmegaCats; k++)
4061 {
4062 for (i=0; i<nStates; i++)
4063 for (j=i; j<nStatesSquared; j+=nStates)
4064 preLikeL[a++] = tiPL[j];
4065 /* for ambiguous */
4066 for (i=0; i<nStates; i++)
4067 preLikeL[a++] = 1.0;
4068 tiPL += nStatesSquared;
4069 }
4070 }
4071
4072 /* find likelihoods of site patterns for right branch if terminal */
4073 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
4074 {
4075 shortCut |= 2;
4076 rState = m->termState[p->right->index];
4077 tiPR = pR;
4078 for (k=a=0; k<m->numOmegaCats; k++)
4079 {
4080 for (i=0; i<nStates; i++)
4081 for (j=i; j<nStatesSquared; j+=nStates)
4082 preLikeR[a++] = tiPR[j];
4083 /* for ambiguous */
4084 for (i=0; i<nStates; i++)
4085 preLikeR[a++] = 1.0;
4086 tiPR += nStatesSquared;
4087 }
4088 }
4089
4090 /* find likelihoods of site patterns for anc branch, always terminal */
4091 if (m->isPartAmbig[p->anc->index] == YES)
4092 {
4093 shortCut = 4;
4094 }
4095 else
4096 {
4097 aState = m->termState[p->anc->index];
4098 tiPA = pA;
4099 for (k=a=0; k<m->numOmegaCats; k++)
4100 {
4101 for (i=0; i<nStates; i++)
4102 for (j=i; j<nStatesSquared; j+=nStates)
4103 preLikeA[a++] = tiPA[j];
4104 /* for ambiguous */
4105 for (i=0; i<nStates; i++)
4106 preLikeA[a++] = 1.0;
4107 tiPA += nStatesSquared;
4108 }
4109 }
4110 # else
4111 shortCut = 4;
4112 # endif
4113
4114 switch (shortCut)
4115 {
4116 case 4:
4117 tiPL = pL;
4118 tiPR = pR;
4119 tiPA = pA;
4120 for (k=0; k<m->numOmegaCats; k++)
4121 {
4122 for (c=0; c<m->numChars; c++)
4123 {
4124 for (i=h=0; i<nStates; i++)
4125 {
4126 likeL = likeR = likeA = 0.0;
4127 for (j=0; j<nStates; j++)
4128 {
4129 likeA += tiPA[h]*clA[j];
4130 likeL += tiPL[h]*clL[j];
4131 likeR += tiPR[h++]*clR[j];
4132 }
4133 *(clP++) = likeL * likeR * likeA;
4134 }
4135 clL += nStates;
4136 clR += nStates;
4137 clA += nStates;
4138 }
4139 tiPL += nStatesSquared;
4140 tiPR += nStatesSquared;
4141 tiPA += nStatesSquared;
4142 }
4143 break;
4144 case 0:
4145 tiPR = pR;
4146 tiPL = pL;
4147 for (k=0; k<m->numOmegaCats; k++)
4148 {
4149 for (c=0; c<m->numChars; c++)
4150 {
4151 b = aState[c] + k*(nStatesSquared+nStates);
4152 for (i=h=0; i<nStates; i++)
4153 {
4154 likeR = likeL = 0.0;
4155 for (j=0; j<nStates; j++)
4156 {
4157 likeR += tiPR[h]*clR[j];
4158 likeL += tiPL[h++]*clL[j];
4159 }
4160 *(clP++) = preLikeA[b++] * likeL * likeR;
4161 }
4162 clR += nStates;
4163 clL += nStates;
4164 }
4165 tiPR += nStatesSquared;
4166 tiPL += nStatesSquared;
4167 }
4168 break;
4169 case 1:
4170 tiPR = pR;
4171 for (k=0; k<m->numOmegaCats; k++)
4172 {
4173 for (c=0; c<m->numChars; c++)
4174 {
4175 a = lState[c] + k*(nStatesSquared+nStates);
4176 b = aState[c] + k*(nStatesSquared+nStates);
4177 for (i=h=0; i<nStates; i++)
4178 {
4179 likeR = 0.0;
4180 for (j=0; j<nStates; j++)
4181 {
4182 likeR += tiPR[h++]*clR[j];
4183 }
4184 *(clP++) = preLikeL[a++] * preLikeA[b++] * likeR;
4185 }
4186 clR += nStates;
4187 }
4188 tiPR += nStatesSquared;
4189 }
4190 break;
4191 case 2:
4192 tiPL = pL;
4193 for (k=0; k<m->numOmegaCats; k++)
4194 {
4195 for (c=0; c<m->numChars; c++)
4196 {
4197 a = rState[c] + k*(nStatesSquared+nStates);
4198 b = aState[c] + k*(nStatesSquared+nStates);
4199 for (i=h=0; i<nStates; i++)
4200 {
4201 likeL = 0.0;
4202 for (j=0; j<nStates; j++)
4203 {
4204 likeL += tiPL[h++]*clL[j];
4205 }
4206 *(clP++) = preLikeR[a++] * preLikeA[b++] * likeL;
4207 }
4208 clL += nStates;
4209 }
4210 tiPL += nStatesSquared;
4211 }
4212 break;
4213 case 3:
4214 for (k=0; k<m->numOmegaCats; k++)
4215 {
4216 for (c=0; c<m->numChars; c++)
4217 {
4218 a = rState[c] + k*(nStatesSquared+nStates);
4219 b = lState[c] + k*(nStatesSquared+nStates);
4220 d = aState[c] + k*(nStatesSquared+nStates);
4221 for (i=0; i<nStates; i++)
4222 {
4223 *(clP++) = preLikeR[a++] * preLikeL[b++] * preLikeA[d++];
4224 }
4225 }
4226 }
4227 break;
4228 }
4229
4230 return NO_ERROR;
4231 }
4232 #endif
4233
4234
4235 #if defined (SSE_ENABLED)
4236 /*----------------------------------------------------------------
4237 |
4238 | CondLikeRoot_NY98_SSE: codon model with omega variation
4239 |
4240 -----------------------------------------------------------------*/
CondLikeRoot_NY98_SSE(TreeNode * p,int division,int chain)4241 int CondLikeRoot_NY98_SSE (TreeNode *p, int division, int chain)
4242 {
4243 int c, c1, t, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
4244 nStates, nStatesSquared;
4245 CLFlt *pL, *pR, *pA,
4246 *tiPL, *tiPR, *tiPA;
4247 __m128 *clL, *clR, *clP, *clA;
4248 __m128 mTiPL, mTiPR, mTiPA, mL, mR, mA, mAcumL, mAcumR, mAcumA;
4249 ModelInfo *m;
4250 CLFlt *preLikeRV[4] = {0};
4251 CLFlt *preLikeLV[4] = {0};
4252 CLFlt *preLikeAV[4] = {0};
4253
4254 # if !defined (DEBUG_NOSHORTCUTS)
4255 int a;
4256
4257 # endif
4258
4259 /* find model settings for this division and nStates, nStatesSquared */
4260 m = &modelSettings[division];
4261 nStates = m->numModelStates;
4262 nStatesSquared = nStates * nStates;
4263
4264 /* flip state of node so that we are not overwriting old cond likes */
4265 FlipCondLikeSpace (m, chain, p->index);
4266
4267 /* find conditional likelihood pointers */
4268 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
4269 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
4270 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
4271 clA = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
4272
4273 /* find transition probabilities (or calculate instead) */
4274 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
4275 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
4276 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
4277
4278 /* find likelihoods of site patterns for left branch if terminal */
4279 shortCut = 0;
4280 # if !defined (DEBUG_NOSHORTCUTS)
4281 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
4282 {
4283 shortCut |= 1;
4284 lState = m->termState[p->left->index];
4285 tiPL = pL;
4286 for (k=a=0; k<m->numOmegaCats; k++)
4287 {
4288 for (i=0; i<nStates; i++)
4289 for (j=i; j<nStatesSquared; j+=nStates)
4290 preLikeL[a++] = tiPL[j];
4291 /* for ambiguous */
4292 for (i=0; i<nStates; i++)
4293 preLikeL[a++] = 1.0;
4294 tiPL += nStatesSquared;
4295 }
4296 }
4297
4298 /* find likelihoods of site patterns for right branch if terminal */
4299 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
4300 {
4301 shortCut |= 2;
4302 rState = m->termState[p->right->index];
4303 tiPR = pR;
4304 for (k=a=0; k<m->numOmegaCats; k++)
4305 {
4306 for (i=0; i<nStates; i++)
4307 for (j=i; j<nStatesSquared; j+=nStates)
4308 preLikeR[a++] = tiPR[j];
4309 /* for ambiguous */
4310 for (i=0; i<nStates; i++)
4311 preLikeR[a++] = 1.0;
4312 tiPR += nStatesSquared;
4313 }
4314 }
4315
4316 /* find likelihoods of site patterns for anc branch, always terminal */
4317 if (m->isPartAmbig[p->anc->index] == YES)
4318 {
4319 shortCut = 4;
4320 }
4321 else
4322 {
4323 aState = m->termState[p->anc->index];
4324 tiPA = pA;
4325 for (k=a=0; k<m->numOmegaCats; k++)
4326 {
4327 for (i=0; i<nStates; i++)
4328 for (j=i; j<nStatesSquared; j+=nStates)
4329 preLikeA[a++] = tiPA[j];
4330 /* for ambiguous */
4331 for (i=0; i<nStates; i++)
4332 preLikeA[a++] = 1.0;
4333 tiPA += nStatesSquared;
4334 }
4335 }
4336 # else
4337 shortCut = 4;
4338 # endif
4339 switch (shortCut)
4340 {
4341 case 4:
4342 tiPL = pL;
4343 tiPR = pR;
4344 tiPA = pA;
4345 for (k=0; k<m->numOmegaCats; k++)
4346 {
4347 for (c=0; c<m->numVecChars; c++)
4348 {
4349 for (i=h=0; i<nStates; i++)
4350 {
4351 mAcumL = _mm_setzero_ps();
4352 mAcumR = _mm_setzero_ps();
4353 mAcumA = _mm_setzero_ps();
4354 for (j=0; j<nStates; j++)
4355 {
4356 mTiPL = _mm_load1_ps (&tiPL[h]);
4357 mTiPR = _mm_load1_ps (&tiPR[h]);
4358 mTiPA = _mm_load1_ps (&tiPA[h++]);
4359 mL = _mm_mul_ps (mTiPL, clL[j]);
4360 mR = _mm_mul_ps (mTiPR, clR[j]);
4361 mA = _mm_mul_ps (mTiPA, clA[j]);
4362 mAcumL = _mm_add_ps (mL, mAcumL);
4363 mAcumR = _mm_add_ps (mR, mAcumR);
4364 mAcumA = _mm_add_ps (mA, mAcumA);
4365 }
4366 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
4367 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
4368 }
4369 clL += nStates;
4370 clR += nStates;
4371 clA += nStates;
4372 }
4373 tiPL += nStatesSquared;
4374 tiPR += nStatesSquared;
4375 tiPA += nStatesSquared;
4376 }
4377 break;
4378 case 0:
4379 tiPL =pL;
4380 tiPR =pR;
4381 for (k=0; k<m->numOmegaCats; k++)
4382 {
4383 for (c=t=0; c<m->numVecChars; c++)
4384 {
4385 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
4386 {
4387 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
4388 }
4389 for (i=h=0; i<nStates; i++)
4390 {
4391 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
4392 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
4393 mAcumL = _mm_setzero_ps();
4394 mAcumR = _mm_setzero_ps();
4395 for (j=0; j<nStates; j++)
4396 {
4397 mTiPL = _mm_load1_ps (&tiPL[h]);
4398 mL = _mm_mul_ps (mTiPL, clL[j]);
4399 mAcumL = _mm_add_ps (mL, mAcumL);
4400 mTiPR = _mm_load1_ps (&tiPR[h++]);
4401 mR = _mm_mul_ps (mTiPR, clR[j]);
4402 mAcumR = _mm_add_ps (mR, mAcumR);
4403 }
4404 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
4405 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
4406 }
4407 clR += nStates;
4408 clL += nStates;
4409 }
4410 tiPL += nStatesSquared;
4411 tiPR += nStatesSquared;
4412 }
4413 break;
4414 case 1:
4415 tiPR = pR;
4416 for (k=0; k<m->numOmegaCats; k++)
4417 {
4418 for (c=t=0; c<m->numVecChars; c++)
4419 {
4420 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
4421 {
4422 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
4423 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
4424 }
4425 for (i=h=0; i<nStates; i++)
4426 {
4427 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
4428 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
4429 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
4430 mAcumR = _mm_setzero_ps();
4431 for (j=0; j<nStates; j++)
4432 {
4433 mTiPR = _mm_load1_ps (&tiPR[h++]);
4434 mR = _mm_mul_ps (mTiPR, clR[j]);
4435 mAcumR = _mm_add_ps (mR, mAcumR);
4436 }
4437 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
4438 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
4439 }
4440 clR += nStates;
4441 }
4442 tiPR += nStatesSquared;
4443 }
4444 break;
4445 case 2:
4446 tiPL = pL;
4447 for (k=0; k<m->numOmegaCats; k++)
4448 {
4449 for (c=t=0; c<m->numVecChars; c++)
4450 {
4451 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
4452 {
4453 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
4454 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
4455 }
4456 for (i=h=0; i<nStates; i++)
4457 {
4458 assert (m->numFloatsPerVec == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
4459 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
4460 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
4461 mAcumL = _mm_setzero_ps();
4462 for (j=0; j<nStates; j++)
4463 {
4464 mTiPL = _mm_load1_ps (&tiPL[h++]);
4465 mL = _mm_mul_ps (mTiPL, clL[j]);
4466 mAcumL = _mm_add_ps (mL, mAcumL);
4467 }
4468 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
4469 *(clP++) = _mm_mul_ps (mAcumL,mAcumA);
4470 }
4471 clL += nStates;
4472 }
4473 tiPL += nStatesSquared;
4474 }
4475 break;
4476 case 3:
4477 for (k=0; k<m->numOmegaCats; k++)
4478 {
4479 for (c=t=0; c<m->numVecChars; c++)
4480 {
4481 for (c1=0; c1<m->numFloatsPerVec; c1++,t++)
4482 {
4483 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
4484 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
4485 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
4486 }
4487 for (i=0; i<nStates; i++)
4488 {
4489 assert (m->numFloatsPerVec == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
4490 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
4491 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
4492 mA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
4493 mL = _mm_mul_ps (mL,mR);
4494 *(clP++) = _mm_mul_ps (mL,mA);
4495 }
4496 }
4497 }
4498 break;
4499 }
4500
4501 return NO_ERROR;
4502 }
4503 #endif
4504
4505
4506 /*----------------------------------------------------------------
4507 |
4508 | CondLikeRoot_Std: variable number of states model
4509 | with or without rate variation
4510 |
4511 -----------------------------------------------------------------*/
CondLikeRoot_Std(TreeNode * p,int division,int chain)4512 int CondLikeRoot_Std (TreeNode *p, int division, int chain)
4513 {
4514 int a, c, h, i, j, k, nStates=0, nCats=0, tmp;
4515 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA,
4516 likeL, likeR, likeA;
4517 ModelInfo *m;
4518
4519 m = &modelSettings[division];
4520
4521 /* flip state of node so that we are not overwriting old cond likes */
4522 FlipCondLikeSpace (m, chain, p->index);
4523
4524 /* find conditional likelihood pointers */
4525 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
4526 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
4527 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4528 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
4529
4530 /* find transition probabilities (or calculate instead) */
4531 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
4532 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
4533 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
4534
4535 /* calculate ancestral probabilities */
4536 for (k=h=0; k<m->numRateCats; k++)
4537 {
4538 /* calculate ancestral probabilities */
4539 for (c=0; c<m->numChars; c++)
4540 {
4541 nStates = m->nStates[c];
4542
4543 /* the following lines ensure that nCats is 1 unless */
4544 /* the character is binary and beta categories are used */
4545 if (nStates == 2)
4546 nCats = m->numBetaCats;
4547 else
4548 nCats = 1;
4549
4550 tmp = k*nStates*nStates; /* tmp contains offset to skip gamma cats that already processed*/
4551 tiPL = pL + m->tiIndex[c] + tmp;
4552 tiPR = pR + m->tiIndex[c] + tmp;
4553 tiPA = pA + m->tiIndex[c] + tmp;
4554 tmp = (m->numRateCats-1)*2*2; /* tmp contains size of block of tpi matrices across all rate cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
4555
4556 for (j=0; j<nCats;j++)
4557 {
4558 for (a=0; a<nStates; a++)
4559 {
4560 likeL = likeR = likeA = 0.0;
4561 for (i=0; i<nStates; i++)
4562 {
4563 likeL += *(tiPL++) * clL[i];
4564 likeR += *(tiPR++) * clR[i];
4565 likeA += *(tiPA++) * clA[i];
4566 }
4567 clP[h++] = likeL * likeR * likeA;
4568 }
4569 clL += nStates;
4570 clR += nStates;
4571 clA += nStates;
4572
4573 tiPL += tmp;
4574 tiPR += tmp;
4575 tiPA += tmp;
4576 }
4577 }
4578 }
4579
4580 return NO_ERROR;
4581 }
4582
4583
4584 /*----------------------------------------------------------------
4585 |
4586 | CondLikeUp_Bin: pull likelihoods up and calculate scaled
4587 | finals, binary model with or without rate variation
4588 |
4589 -----------------------------------------------------------------*/
CondLikeUp_Bin(TreeNode * p,int division,int chain)4590 int CondLikeUp_Bin (TreeNode *p, int division, int chain)
4591 {
4592 int c, k;
4593 CLFlt *clFA, *clFP, *clDP, *tiP, condLikeUp[2], sum[2];
4594 ModelInfo *m;
4595
4596 /* find model settings for this division */
4597 m = &modelSettings[division];
4598
4599 if (p->anc->anc == NULL)
4600 {
4601 /* this is the root node */
4602 /* find conditional likelihood pointers = down cond likes */
4603 /* use conditional likelihood scratch space for final cond likes */
4604 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4605 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4606
4607 for (k=0; k<m->numRateCats; k++)
4608 {
4609 for (c=0; c<m->numChars; c++)
4610 {
4611 *(clFP++) = *(clDP++);
4612 *(clFP++) = *(clDP++);
4613 }
4614 }
4615 }
4616 else
4617 {
4618 /* find conditional likelihood pointers */
4619 /* use conditional likelihood scratch space for final cond likes */
4620 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4621 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4622 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4623
4624 /* find transition probabilities */
4625 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4626
4627 for (k=0; k<m->numRateCats; k++)
4628 {
4629 for (c=0; c<m->numChars; c++)
4630 {
4631 condLikeUp[0] = condLikeUp[1] = 0.0;
4632
4633 sum[0] = tiP[0]*clDP[0] + tiP[1]*clDP[1];
4634 sum[1] = tiP[2]*clDP[0] + tiP[3]*clDP[1];
4635
4636 if (sum[0] != 0.0) condLikeUp[0] = clFA[0] / sum[0];
4637 if (sum[1] != 0.0) condLikeUp[1] = clFA[1] / sum[1];
4638
4639 *(clFP++) = (condLikeUp[0]*tiP[0] + condLikeUp[1]*tiP[1])*clDP[0];
4640 *(clFP++) = (condLikeUp[0]*tiP[2] + condLikeUp[1]*tiP[3])*clDP[1];
4641
4642 clFA += 2;
4643 clDP += 2;
4644 }
4645 tiP += 4;
4646 }
4647 }
4648
4649 return NO_ERROR;
4650 }
4651
4652
4653 /*----------------------------------------------------------------
4654 |
4655 | CondLikeUp_Gen: pull likelihoods up and calculate scaled
4656 | finals for an interior node
4657 |
4658 -----------------------------------------------------------------*/
CondLikeUp_Gen(TreeNode * p,int division,int chain)4659 int CondLikeUp_Gen (TreeNode *p, int division, int chain)
4660 {
4661 int a, c, i, j, k, nStates, nStatesSquared, nRateCats;
4662 CLFlt *clFA, *clFP, *clDP, *tiP, *condLikeUp, sum;
4663 ModelInfo *m;
4664
4665 /* find model settings for this division */
4666 m = &modelSettings[division];
4667
4668 /* find number of states in the model */
4669 nStates = m->numModelStates;
4670 nStatesSquared = nStates * nStates;
4671
4672 /* find number of gamma cats */
4673 nRateCats = m->numRateCats;
4674 if (m->gibbsGamma == YES)
4675 nRateCats = 1;
4676
4677 /* use preallocated scratch space */
4678 condLikeUp = m->ancStateCondLikes;
4679
4680 /* calculate final states */
4681 if (p->anc->anc == NULL)
4682 {
4683 /* this is the root node */
4684 /* find conditional likelihood pointers = down cond likes */
4685 /* use conditional likelihood scratch space for final cond likes */
4686 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4687 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4688
4689 /* final cond likes = downpass cond likes */
4690 for (k=0; k<nRateCats; k++)
4691 {
4692 /* copy cond likes */
4693 for (c=0; c<m->numChars*nStates; c++)
4694 *(clFP++) = *(clDP++);
4695 }
4696 }
4697 else
4698 {
4699 /* find conditional likelihood pointers */
4700 /* use conditional likelihood scratch space for final cond likes */
4701 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4702 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4703 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4704
4705 /* find transition probabilities */
4706 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4707
4708 for (k=0; k<nRateCats; k++)
4709 {
4710 for (c=0; c<m->numChars; c++)
4711 {
4712 for (a=j=0; a<nStates; a++)
4713 {
4714 sum = 0.0;
4715 for (i=0; i<nStates; i++)
4716 sum += tiP[j++]*clDP[i];
4717 if (sum != 0.0) condLikeUp[a] = clFA[a] / sum;
4718 }
4719
4720 for (a=j=0; a<nStates; a++)
4721 {
4722 sum = 0.0;
4723 for (i=0; i<nStates; i++)
4724 {
4725 sum += condLikeUp[i] * tiP[j++];
4726 }
4727 *(clFP++) = sum * clDP[a];
4728 }
4729
4730 clFA += nStates;
4731 clDP += nStates;
4732 }
4733 tiP += nStatesSquared;
4734 }
4735 }
4736
4737 return NO_ERROR;
4738 }
4739
4740
4741 /*----------------------------------------------------------------
4742 |
4743 | CondLikeUp_NUC4: pull likelihoods up and calculate scaled
4744 | finals for an interior node
4745 |
4746 -----------------------------------------------------------------*/
CondLikeUp_NUC4(TreeNode * p,int division,int chain)4747 int CondLikeUp_NUC4 (TreeNode *p, int division, int chain)
4748 {
4749 int c, k, nRateCats;
4750 CLFlt *clFA, *clFP, *clDP, *tiP, condLikeUp[4], sum[4];
4751 ModelInfo *m;
4752
4753 /* find model settings for this division */
4754 m = &modelSettings[division];
4755
4756 /* find number of rate cats */
4757 nRateCats = m->numRateCats;
4758 if (m->gibbsGamma == YES)
4759 nRateCats = 1;
4760
4761 /* calculate final states */
4762 if (p->anc->anc == NULL)
4763 {
4764 /* this is the root node */
4765 /* find conditional likelihood pointers = down cond likes */
4766 /* use conditional likelihood scratch space for final cond likes */
4767 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4768 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4769
4770 /* final cond likes = downpass cond likes */
4771 for (k=0; k<nRateCats; k++)
4772 {
4773 /* copy cond likes */
4774 for (c=0; c<m->numChars; c++)
4775 {
4776 *(clFP++) = *(clDP++);
4777 *(clFP++) = *(clDP++);
4778 *(clFP++) = *(clDP++);
4779 *(clFP++) = *(clDP++);
4780 }
4781 }
4782 }
4783 else
4784 {
4785 /* find conditional likelihood pointers */
4786 /* use conditional likelihood scratch space for final cond likes */
4787 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4788 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4789 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4790
4791 /* find transition probabilities */
4792 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4793
4794 for (k=0; k<nRateCats; k++)
4795 {
4796 for (c=0; c<m->numChars; c++)
4797 {
4798 condLikeUp[A] = condLikeUp[C] = condLikeUp[G] = condLikeUp[T] = 0.0;
4799
4800 sum[A] = (tiP[AA]*clDP[A] + tiP[AC]*clDP[C] + tiP[AG]*clDP[G] + tiP[AT]*clDP[T]);
4801 sum[C] = (tiP[CA]*clDP[A] + tiP[CC]*clDP[C] + tiP[CG]*clDP[G] + tiP[CT]*clDP[T]);
4802 sum[G] = (tiP[GA]*clDP[A] + tiP[GC]*clDP[C] + tiP[GG]*clDP[G] + tiP[GT]*clDP[T]);
4803 sum[T] = (tiP[TA]*clDP[A] + tiP[TC]*clDP[C] + tiP[TG]*clDP[G] + tiP[TT]*clDP[T]);
4804
4805 if (sum[A] != 0.0) condLikeUp[A] = clFA[A] / sum[A];
4806 if (sum[C] != 0.0) condLikeUp[C] = clFA[C] / sum[C];
4807 if (sum[G] != 0.0) condLikeUp[G] = clFA[G] / sum[G];
4808 if (sum[T] != 0.0) condLikeUp[T] = clFA[T] / sum[T];
4809
4810 /*
4811 clFP[A] = (condLikeUp[A]*tiP[AA] + condLikeUp[C]*tiP[CA] + condLikeUp[G]*tiP[GA] + condLikeUp[T]*tiP[TA])*clDP[A];
4812 clFP[C] = (condLikeUp[A]*tiP[AC] + condLikeUp[C]*tiP[CC] + condLikeUp[G]*tiP[GC] + condLikeUp[T]*tiP[TC])*clDP[C];
4813 clFP[G] = (condLikeUp[A]*tiP[AG] + condLikeUp[C]*tiP[CG] + condLikeUp[G]*tiP[GG] + condLikeUp[T]*tiP[TG])*clDP[G];
4814 clFP[T] = (condLikeUp[A]*tiP[AT] + condLikeUp[C]*tiP[CT] + condLikeUp[G]*tiP[GT] + condLikeUp[T]*tiP[TT])*clDP[T];
4815 */
4816
4817 clFP[A] = (condLikeUp[A]*tiP[AA] + condLikeUp[C]*tiP[AC] + condLikeUp[G]*tiP[AG] + condLikeUp[T]*tiP[AT])*clDP[A];
4818 clFP[C] = (condLikeUp[A]*tiP[CA] + condLikeUp[C]*tiP[CC] + condLikeUp[G]*tiP[CG] + condLikeUp[T]*tiP[CT])*clDP[C];
4819 clFP[G] = (condLikeUp[A]*tiP[GA] + condLikeUp[C]*tiP[GC] + condLikeUp[G]*tiP[GG] + condLikeUp[T]*tiP[GT])*clDP[G];
4820 clFP[T] = (condLikeUp[A]*tiP[TA] + condLikeUp[C]*tiP[TC] + condLikeUp[G]*tiP[TG] + condLikeUp[T]*tiP[TT])*clDP[T];
4821
4822 clFA += 4;
4823 clFP += 4;
4824 clDP += 4;
4825 }
4826 tiP += 16;
4827 }
4828 }
4829
4830 return NO_ERROR;
4831 }
4832
4833
4834 /*----------------------------------------------------------------
4835 |
4836 | CondLikeUp_Std: pull likelihoods up and calculate scaled
4837 | finals for an interior node
4838 |
4839 -----------------------------------------------------------------*/
CondLikeUp_Std(TreeNode * p,int division,int chain)4840 int CondLikeUp_Std (TreeNode *p, int division, int chain)
4841 {
4842 int a, c, i, j, k, t, nStates, nCats, coppySize,tmp;
4843 CLFlt *clFA, *clFP, *clDP, *pA, *tiP, condLikeUp[10], sum;
4844 ModelInfo *m;
4845
4846 /* find model settings for this division */
4847 m = &modelSettings[division];
4848
4849 /* calculate final states */
4850 if (p->anc->anc == NULL)
4851 {
4852 /* this is the root node */
4853 /* find conditional likelihood pointers = down cond likes */
4854 /* use conditional likelihood scratch space for final cond likes */
4855 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4856 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4857
4858 coppySize=0;
4859 /* final cond likes = downpass cond likes */
4860 for (c=0; c<m->numChars; c++)
4861 {
4862 /* calculate nStates and nCats */
4863 nStates = m->nStates[c];
4864
4865 /* the following lines ensure that nCats is 1 unless */
4866 /* the character is binary and beta categories are used */
4867 if (nStates == 2)
4868 nCats = m->numBetaCats;
4869 else
4870 nCats = 1;
4871
4872 coppySize+=nCats*nStates;
4873 }
4874
4875 /* finally multiply with the rate cats */
4876 coppySize *= m->numRateCats;
4877
4878 /* copy cond likes */
4879 for (k=0; k<coppySize; k++)
4880 *(clFP++) = *(clDP++);
4881 }
4882 else
4883 {
4884 /* find conditional likelihood pointers */
4885 /* use conditional likelihood scratch space for final cond likes */
4886 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4887 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4888 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4889
4890 /* find transition probabilities */
4891 pA = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4892
4893 for (k=0; k<m->numRateCats; k++)
4894 {
4895 for (c=0; c<m->numChars; c++)
4896 {
4897
4898 /* calculate nStates and nCats */
4899 nStates = m->nStates[c];
4900
4901 /* the following lines ensure that nCats is 1 unless */
4902 /* the character is binary and beta categories are used */
4903 if (nStates == 2)
4904 nCats = m->numBetaCats;
4905 else
4906 nCats = 1;
4907
4908 tmp = k*nStates*nStates; /* tmp contains offset to skip rate cats that already processed*/
4909 tiP = pA + m->tiIndex[c] + tmp;
4910 tmp = (m->numRateCats-1)*2*2; /* tmp contains size of block of tpi matrices across all rate cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
4911
4912 /* now calculate the final cond likes */
4913 for (t=0; t<nCats; t++)
4914 {
4915 for (a=j=0; a<nStates; a++)
4916 {
4917 sum = 0.0;
4918 for (i=0; i<nStates; i++)
4919 sum += tiP[j++]*clDP[i];
4920 if (sum == 0.0)
4921 condLikeUp[a] = 0.0; /* we lost the conditional likelihood in the downpass (can occur in gamma model) */
4922 else
4923 condLikeUp[a] = clFA[a] / sum;
4924 }
4925
4926 for (a=j=0; a<nStates; a++)
4927 {
4928 sum = 0.0;
4929 for (i=0; i<nStates; i++)
4930 {
4931 sum += condLikeUp[i] * tiP[j++];
4932 }
4933 clFP[a] = sum * clDP[a];
4934 }
4935
4936 clFP += nStates;
4937 clFA += nStates;
4938 clDP += nStates;
4939 tiP += tmp;
4940 }
4941 }
4942 }
4943 }
4944
4945 return NO_ERROR;
4946 }
4947
4948
4949 /*----------------------------------------------------------------
4950 |
4951 | CondLikeScaler_Gen: general n-state model with or without rate
4952 | variation
4953 |
4954 -----------------------------------------------------------------*/
CondLikeScaler_Gen(TreeNode * p,int division,int chain)4955 int CondLikeScaler_Gen (TreeNode *p, int division, int chain)
4956 {
4957 int c, k, n, nStates;
4958 CLFlt scaler, **clP, *clPtr, *scP, *lnScaler;
4959 ModelInfo *m;
4960
4961 m = &modelSettings[division];
4962 nStates = m->numModelStates;
4963
4964 /* find conditional likelihood pointers */
4965 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
4966 clP = m->clP;
4967 for (k=0; k<m->numRateCats; k++)
4968 {
4969 clP[k] = clPtr;
4970 clPtr += m->numChars * m->numModelStates;
4971 }
4972
4973 /* find node scalers */
4974 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4975
4976 /* find site scalers */
4977 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4978
4979 /* rescale */
4980 for (c=0; c<m->numChars; c++)
4981 {
4982 scaler = 0.0;
4983 for (k=0; k<m->numRateCats; k++)
4984 {
4985 for (n=0; n<nStates; n++)
4986 {
4987 if (clP[k][n] > scaler)
4988 scaler = clP[k][n];
4989 }
4990 }
4991
4992 for (k=0; k<m->numRateCats; k++)
4993 {
4994 for (n=0; n<nStates; n++)
4995 clP[k][n] /= scaler;
4996 clP[k] += n;
4997 }
4998
4999 scP[c] = (CLFlt) log (scaler); /* store node scaler */
5000 lnScaler[c] += scP[c]; /* add into tree scaler */
5001 }
5002
5003 m->unscaledNodes[chain][p->index] = 0;
5004
5005 return (NO_ERROR);
5006 }
5007
5008
5009 #if defined (SSE_ENABLED)
5010 /*----------------------------------------------------------------
5011 |
5012 | CondLikeScaler_Gen_SSE: general n-state model with or without rate
5013 | variation
5014 |
5015 -----------------------------------------------------------------*/
CondLikeScaler_Gen_SSE(TreeNode * p,int division,int chain)5016 int CondLikeScaler_Gen_SSE (TreeNode *p, int division, int chain)
5017 {
5018 int c, k, n, nStates;
5019 CLFlt *scP, *lnScaler;
5020 __m128 *clPtr, **clP, m1;
5021 ModelInfo *m;
5022
5023 m = &modelSettings[division];
5024 nStates = m->numModelStates;
5025
5026 /* find conditional likelihood pointers */
5027 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
5028 clP = m->clP_SSE;
5029 for (k=0; k<m->numRateCats; k++)
5030 {
5031 clP[k] = clPtr;
5032 clPtr += m->numVecChars * m->numModelStates;
5033 }
5034
5035 /* find node scalers */
5036 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5037 //scP_SSE = (__m128 *) scP;
5038
5039 /* find site scalers */
5040 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5041
5042 /* rescale */
5043 for (c=0; c<m->numVecChars; c++)
5044 {
5045 //scaler = 0.0;
5046 m1 = _mm_setzero_ps ();
5047 for (k=0; k<m->numRateCats; k++)
5048 {
5049 for (n=0; n<nStates; n++)
5050 {
5051 m1 = _mm_max_ps (m1, clP[k][n]);
5052 }
5053 }
5054 _mm_store_ps (scP, m1);
5055 scP += m->numFloatsPerVec;
5056
5057 for (k=0; k<m->numRateCats; k++)
5058 {
5059 for (n=0; n<nStates; n++)
5060 {
5061 *clP[k] = _mm_div_ps (*clP[k], m1);
5062 clP[k]++;
5063 }
5064 }
5065 }
5066
5067 /* Reset scP to original position*/
5068 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5069 for (c=0; c<m->numChars; c++)
5070 {
5071 scP[c] = (CLFlt) log (scP[c]); /* store node scaler */
5072 lnScaler[c] += scP[c]; /* add into tree scaler */
5073 }
5074
5075 m->unscaledNodes[chain][p->index] = 0;
5076
5077 return (NO_ERROR);
5078 }
5079 #endif
5080
5081
5082 /*----------------------------------------------------------------
5083 |
5084 | CondLikeScaler_Gen_GibbsGamma: general n-state model with Gibbs
5085 | sampling of rate categories in discrete gamma
5086 |
5087 -----------------------------------------------------------------*/
CondLikeScaler_Gen_GibbsGamma(TreeNode * p,int division,int chain)5088 int CondLikeScaler_Gen_GibbsGamma (TreeNode *p, int division, int chain)
5089 {
5090 int c, i, j, n, nStates, *rateCat, nRateCats;
5091 CLFlt scaler, *clP, *scP, *lnScaler;
5092 ModelInfo *m;
5093
5094 m = &modelSettings[division];
5095 nStates = m->numModelStates;
5096
5097 /* find conditional likelihood pointer */
5098 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5099
5100 /* flip node scalers */
5101 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5102
5103 /* find site scalers */
5104 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5105
5106 /* find rate category index and number of rate categories */
5107 rateCat = m->tiIndex + chain * m->numChars;
5108 nRateCats = m->numRateCats;
5109
5110 /* scale */
5111 i = j = 0;
5112 for (c=0; c<m->numChars; c++)
5113 {
5114 if (rateCat[c] < nRateCats)
5115 {
5116 scaler = 0.0;
5117 for (n=0; n<nStates; n++)
5118 {
5119 if (clP[i] > scaler)
5120 scaler = clP[i];
5121 i++;
5122 }
5123
5124
5125 for (n=0; n<nStates; n++)
5126 clP[j++] /= scaler;
5127
5128 scP[c] = (CLFlt) log (scaler); /* store node scaler */
5129 lnScaler[c] += scP[c]; /* add into tree scaler */
5130
5131 }
5132 else
5133 {
5134 scP[c] = 0.0;
5135 /* no need to add it to the lnScaler */
5136 i += nStates;
5137 j += nStates;
5138 }
5139 }
5140
5141 m->unscaledNodes[chain][p->index] = 0;
5142
5143 return (NO_ERROR);
5144 }
5145
5146
5147 /*----------------------------------------------------------------
5148 |
5149 | CondLikeScaler_NUC4: 4by4 nucleotide model with or without rate
5150 | variation
5151 |
5152 -----------------------------------------------------------------*/
CondLikeScaler_NUC4(TreeNode * p,int division,int chain)5153 int CondLikeScaler_NUC4 (TreeNode *p, int division, int chain)
5154 {
5155 int c, k;
5156 CLFlt scaler, *scP, *lnScaler, *clPtr, **clP;
5157 ModelInfo *m;
5158
5159 m = &modelSettings[division];
5160
5161 /* find conditional likelihood pointers */
5162 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5163 clP = m->clP;
5164 for (k=0; k<m->numRateCats; k++)
5165 {
5166 clP[k] = clPtr;
5167 clPtr += m->numChars * m->numModelStates;
5168 }
5169
5170 /* find node scalers */
5171 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5172
5173 /* find site scalers */
5174 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5175
5176 /* rescale values */
5177 for (c=0; c<m->numChars; c++)
5178 {
5179 scaler = 0.0;
5180 for (k=0; k<m->numRateCats; k++)
5181 {
5182 if (clP[k][A] > scaler)
5183 scaler = clP[k][A];
5184 if (clP[k][C] > scaler)
5185 scaler = clP[k][C];
5186 if (clP[k][G] > scaler)
5187 scaler = clP[k][G];
5188 if (clP[k][T] > scaler)
5189 scaler = clP[k][T];
5190 }
5191
5192 for (k=0; k<m->numRateCats; k++)
5193 {
5194 clP[k][A] /= scaler;
5195 clP[k][C] /= scaler;
5196 clP[k][G] /= scaler;
5197 clP[k][T] /= scaler;
5198 clP[k] += 4;
5199 }
5200
5201 scP[c] = (CLFlt) log(scaler); /* store node scaler */
5202 lnScaler[c] += scP[c]; /* add into tree scaler */
5203 }
5204
5205 m->unscaledNodes[chain][p->index] = 0; /* set unscaled nodes to 0 */
5206
5207 return NO_ERROR;
5208 }
5209
5210
5211 #if defined (AVX_ENABLED)
5212 /*----------------------------------------------------------------
5213 |
5214 | CondLikeScaler_NUC4_AVX: 4by4 nucleotide model with or without rate
5215 | variation using AVX (or AVX + FMA) code
5216 |
5217 -----------------------------------------------------------------*/
CondLikeScaler_NUC4_AVX(TreeNode * p,int division,int chain)5218 int CondLikeScaler_NUC4_AVX (TreeNode *p, int division, int chain)
5219 {
5220 int c, k;
5221 CLFlt *scP, *lnScaler;
5222 __m256 *clPtr, **clP, *scP_AVX, m1;
5223 ModelInfo *m;
5224
5225 m = &modelSettings[division];
5226
5227 /* find conditional likelihood pointers */
5228 clPtr = (__m256 *) m->condLikes[m->condLikeIndex[chain][p->index]];
5229 clP = m->clP_AVX;
5230 for (k=0; k<m->numRateCats; k++)
5231 {
5232 clP[k] = clPtr;
5233 clPtr += m->numVecChars * m->numModelStates;
5234 }
5235
5236 /* find node scalers */
5237 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5238 scP_AVX = (__m256 *) scP;
5239
5240 /* find site scalers */
5241 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5242
5243 /* rescale */
5244 for (c=0; c<m->numVecChars; c++)
5245 {
5246 m1 = _mm256_setzero_ps ();
5247
5248 for (k=0; k<m->numRateCats; k++)
5249 {
5250 m1 = _mm256_max_ps (m1, clP[k][A]);
5251 m1 = _mm256_max_ps (m1, clP[k][C]);
5252 m1 = _mm256_max_ps (m1, clP[k][G]);
5253 m1 = _mm256_max_ps (m1, clP[k][T]);
5254 }
5255
5256 for (k=0; k<m->numRateCats; k++)
5257 {
5258 *clP[k] = _mm256_div_ps (*clP[k], m1);
5259 clP[k]++;
5260 *clP[k] = _mm256_div_ps (*clP[k], m1);
5261 clP[k]++;
5262 *clP[k] = _mm256_div_ps (*clP[k], m1);
5263 clP[k]++;
5264 *clP[k] = _mm256_div_ps (*clP[k], m1);
5265 clP[k]++;
5266 }
5267
5268 (*scP_AVX++) = m1;
5269 }
5270
5271 /* update site scalers */
5272 for (c=0; c<m->numChars; c++)
5273 lnScaler[c] += (scP[c] = logf (scP[c])); /* add log of new scaler into tree scaler */
5274
5275 m->unscaledNodes[chain][p->index] = 0; /* set unscaled nodes to 0 */
5276
5277 return NO_ERROR;
5278
5279 }
5280 #endif
5281
5282
5283 #if defined (SSE_ENABLED)
5284 /*----------------------------------------------------------------
5285 |
5286 | CondLikeScaler_NUC4_SSE: 4by4 nucleotide model with or without rate
5287 | variation using SSE code
5288 |
5289 -----------------------------------------------------------------*/
CondLikeScaler_NUC4_SSE(TreeNode * p,int division,int chain)5290 int CondLikeScaler_NUC4_SSE (TreeNode *p, int division, int chain)
5291 {
5292 int c, k;
5293 CLFlt *scP, *lnScaler;
5294 __m128 *clPtr, **clP, *scP_SSE, m1;
5295 ModelInfo *m;
5296
5297 m = &modelSettings[division];
5298
5299 /* find conditional likelihood pointers */
5300 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
5301 clP = m->clP_SSE;
5302 for (k=0; k<m->numRateCats; k++)
5303 {
5304 clP[k] = clPtr;
5305 clPtr += m->numVecChars * m->numModelStates;
5306 }
5307
5308 /* find node scalers */
5309 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5310 scP_SSE = (__m128 *) scP;
5311
5312 /* find site scalers */
5313 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5314
5315 /* rescale */
5316 for (c=0; c<m->numVecChars; c++)
5317 {
5318 m1 = _mm_setzero_ps ();
5319 for (k=0; k<m->numRateCats; k++)
5320 {
5321 m1 = _mm_max_ps (m1, clP[k][A]);
5322 m1 = _mm_max_ps (m1, clP[k][C]);
5323 m1 = _mm_max_ps (m1, clP[k][G]);
5324 m1 = _mm_max_ps (m1, clP[k][T]);
5325 }
5326
5327 for (k=0; k<m->numRateCats; k++)
5328 {
5329 *clP[k] = _mm_div_ps (*clP[k], m1);
5330 clP[k]++;
5331 *clP[k] = _mm_div_ps (*clP[k], m1);
5332 clP[k]++;
5333 *clP[k] = _mm_div_ps (*clP[k], m1);
5334 clP[k]++;
5335 *clP[k] = _mm_div_ps (*clP[k], m1);
5336 clP[k]++;
5337 }
5338
5339 (*scP_SSE++) = m1;
5340 }
5341
5342 /* update site scalers */
5343 for (c=0; c<m->numChars; c++)
5344 lnScaler[c] += (scP[c] = (CLFlt)(log (scP[c]))); /* add log of new scaler into tree scaler */
5345
5346 m->unscaledNodes[chain][p->index] = 0; /* number of unscaled nodes is 0 */
5347
5348 return NO_ERROR;
5349
5350 }
5351 #endif
5352
5353
5354 /*----------------------------------------------------------------
5355 |
5356 | CondLikeScaler_NUC4_GibbsGamma: 4by4 nucleotide model with rate
5357 | variation approximated by Gibbs sampling from gamma
5358 |
5359 -----------------------------------------------------------------*/
CondLikeScaler_NUC4_GibbsGamma(TreeNode * p,int division,int chain)5360 int CondLikeScaler_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
5361 {
5362 int c, i, j, nRateCats, *rateCat;
5363 CLFlt scaler, *clP, *scP, *lnScaler;
5364 ModelInfo *m;
5365
5366 m = &modelSettings[division];
5367
5368 /* find conditional likelihood pointer */
5369 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5370
5371 /* find node scalers */
5372 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5373
5374 /* find site scalers */
5375 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5376
5377 /* find rate category index and number of gamma categories */
5378 rateCat = m->tiIndex + chain * m->numChars;
5379 nRateCats = m->numRateCats;
5380
5381 /* scale */
5382 i = j = 0;
5383 for (c=0; c<m->numChars; c++)
5384 {
5385 if (rateCat[c] < nRateCats)
5386 {
5387 scaler = 0.0;
5388 if (clP[i] > scaler)
5389 scaler = clP[i];
5390 i++;
5391 if (clP[i] > scaler)
5392 scaler = clP[i];
5393 i++;
5394 if (clP[i] > scaler)
5395 scaler = clP[i];
5396 i++;
5397 if (clP[i] > scaler)
5398 scaler = clP[i];
5399 i++;
5400
5401 clP[j++] /= scaler;
5402 clP[j++] /= scaler;
5403 clP[j++] /= scaler;
5404 clP[j++] /= scaler;
5405
5406 scP[c] = (CLFlt) log (scaler); /* store node scaler */
5407 lnScaler[c] += scP[c]; /* add into tree scaler */
5408 }
5409 else
5410 {
5411 scP[c] = 0.0; /* store node scaler */
5412 /* no need to add it to the lnScaler */
5413 i += 4;
5414 j += 4;
5415 }
5416 }
5417
5418 m->unscaledNodes[chain][p->index] = 0;
5419
5420 return NO_ERROR;
5421 }
5422
5423
5424 #if !defined (SSE_ENABLED) || 1
5425 /*----------------------------------------------------------------
5426 |
5427 | CondLikeScaler_NY98: codon model with omega variation
5428 |
5429 -----------------------------------------------------------------*/
CondLikeScaler_NY98(TreeNode * p,int division,int chain)5430 int CondLikeScaler_NY98 (TreeNode *p, int division, int chain)
5431 {
5432 int c, k, n, nStates;
5433 CLFlt scaler, **clP, *clPtr, *scP, *lnScaler;
5434 ModelInfo *m;
5435
5436 m = &modelSettings[division];
5437 nStates = m->numModelStates;
5438
5439 /* find conditional likelihood pointers */
5440 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5441 clP = m->clP;
5442 for (k=0; k<m->numOmegaCats; k++)
5443 {
5444 clP[k] = clPtr;
5445 clPtr += m->numChars * m->numModelStates;
5446 }
5447
5448 /* find node scalers */
5449 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5450
5451 /* find site scalers */
5452 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5453
5454 /* rescale */
5455 for (c=0; c<m->numChars; c++)
5456 {
5457 scaler = 0.0;
5458 for (k=0; k<m->numOmegaCats; k++)
5459 {
5460 for (n=0; n<nStates; n++)
5461 {
5462 if (clP[k][n] > scaler)
5463 scaler = clP[k][n];
5464 }
5465 }
5466
5467 for (k=0; k<m->numOmegaCats; k++)
5468 {
5469 for (n=0; n<nStates; n++)
5470 {
5471 clP[k][n] /= scaler;
5472 }
5473 clP[k] += n;
5474 }
5475
5476 scP[c] = (CLFlt) log (scaler); /* store node scaler */
5477 lnScaler[c] += scP[c]; /* add into tree scaler */
5478 }
5479
5480 m->unscaledNodes[chain][p->index] = 0;
5481
5482 return (NO_ERROR);
5483 }
5484 #endif
5485
5486
5487 #if defined (SSE_ENABLED)
5488 /*----------------------------------------------------------------
5489 |
5490 | CondLikeScaler_NY98_SSE: codon model with omega variation
5491 |
5492 -----------------------------------------------------------------*/
CondLikeScaler_NY98_SSE(TreeNode * p,int division,int chain)5493 int CondLikeScaler_NY98_SSE (TreeNode *p, int division, int chain)
5494 {
5495 int c, k, n, nStates;
5496 CLFlt *scP, *lnScaler;
5497 __m128 *clPtr, **clP, m1;
5498 ModelInfo *m;
5499
5500 m = &modelSettings[division];
5501 nStates = m->numModelStates;
5502
5503 /* find conditional likelihood pointers */
5504 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
5505 clP = m->clP_SSE;
5506 for (k=0; k<m->numOmegaCats; k++)
5507 {
5508 clP[k] = clPtr;
5509 clPtr += m->numVecChars * m->numModelStates;
5510 }
5511
5512 /* find node scalers */
5513 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5514 //scP_SSE = (__m128 *) scP;
5515
5516 /* find site scalers */
5517 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5518
5519 /* rescale */
5520 for (c=0; c<m->numVecChars; c++)
5521 {
5522 //scaler = 0.0;
5523 m1 = _mm_setzero_ps ();
5524 for (k=0; k<m->numOmegaCats; k++)
5525 {
5526 for (n=0; n<nStates; n++)
5527 {
5528 m1 = _mm_max_ps (m1, clP[k][n]);
5529 }
5530 }
5531 _mm_store_ps (scP, m1);
5532 scP += m->numFloatsPerVec;
5533
5534 for (k=0; k<m->numOmegaCats; k++)
5535 {
5536 for (n=0; n<nStates; n++)
5537 {
5538 *clP[k] = _mm_div_ps (*clP[k], m1);
5539 clP[k]++;
5540 }
5541 }
5542 }
5543
5544 /* Reset scP to original position*/
5545 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5546 for (c=0; c<m->numChars; c++)
5547 {
5548 scP[c] = (CLFlt) log (scP[c]); /* store node scaler */
5549 lnScaler[c] += scP[c]; /* add into tree scaler */
5550 }
5551
5552 m->unscaledNodes[chain][p->index] = 0;
5553
5554 return (NO_ERROR);
5555 }
5556 #endif
5557
5558
5559 /*----------------------------------------------------------------
5560 |
5561 | CondLikeScaler_Std: variable states model with or without
5562 | rate variation
5563 |
5564 -----------------------------------------------------------------*/
CondLikeScaler_Std(TreeNode * p,int division,int chain)5565 int CondLikeScaler_Std (TreeNode *p, int division, int chain)
5566 {
5567 int c, n, k, nStates, numReps;
5568 CLFlt scaler, *clPtr, **clP, *scP, *lnScaler;
5569 ModelInfo *m;
5570
5571 m = &modelSettings[division];
5572
5573 numReps=0;
5574 for (c=0; c<m->numChars; c++)
5575 {
5576 if (m->nStates[c] == 2)
5577 numReps += m->numBetaCats * 2;
5578 else
5579 numReps += m->nStates[c];
5580 }
5581
5582 /* find conditional likelihood pointers */
5583 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5584 clP = m->clP;
5585 for (k=0; k<m->numRateCats; k++)
5586 {
5587 clP[k] = clPtr;
5588 clPtr += numReps;
5589 }
5590
5591 /* find node scalers */
5592 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
5593
5594 /* find site scalers */
5595 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5596
5597 /* rescale */
5598 for (c=0; c<m->numChars; c++)
5599 {
5600 scaler = 0.0;
5601 nStates = m->nStates[c];
5602 if (nStates == 2)
5603 nStates = m->numBetaCats * 2;
5604
5605 for (k=0; k<m->numRateCats; k++)
5606 {
5607 for (n=0; n<nStates; n++)
5608 {
5609 if (clP[k][n] > scaler)
5610 scaler = clP[k][n];
5611 }
5612 }
5613
5614 for (k=0; k<m->numRateCats; k++)
5615 {
5616 for (n=0; n<nStates; n++)
5617 clP[k][n] /= scaler;
5618 clP[k] += nStates;
5619 }
5620
5621 scP[c] = (CLFlt) log (scaler); /* store node scaler */
5622 lnScaler[c] += scP[c]; /* add into tree scaler */
5623 }
5624
5625 m->unscaledNodes[chain][p->index] = 0;
5626
5627 return NO_ERROR;
5628 }
5629
5630
5631 /*------------------------------------------------------------------
5632 |
5633 | Likelihood_Adgamma: all n-state models with autocorrelated
5634 | discrete gamma rate variation, NOT morph, restriction,
5635 | codon or doublet models; just fill in rateProbs
5636 |
5637 -------------------------------------------------------------------*/
Likelihood_Adgamma(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)5638 int Likelihood_Adgamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5639 {
5640 int c, j, k, i, nStates, nStatesDiv2;
5641 MrBFlt *bs, *swr, s01, s10, probOn, probOff, covBF[40];
5642 MrBFlt like, *rP;
5643 CLFlt *clP;
5644 ModelInfo *m;
5645
5646 /* NOTE: whichSitePats offsets numSitesOfPat by whichSitePats X numCompressedChars.
5647 This is done so we can use the character reweighting scheme for "heating" chains. This was easy to
5648 accomplish for all of the models except this one, which doesn't use numSitesOfPat when calculating
5649 likelihoods. Either we disallow autocorrelated rates when using MCMC with character reweighting, or
5650 we properly calculate likelihoods when some site patterns have increased or decreased weight. For
5651 now, we do not allow MCMCMC with character reweighting with this HMM; we bail out in the function
5652 FillNumSitesOfPat if we have Adgamma rate variation and reweighting. */
5653 k = whichSitePats; /* FIXME: Not used (from clang static analyzer) */
5654
5655 /* find model settings */
5656 m = &modelSettings[division];
5657
5658 /* get the number of states */
5659 nStates = m->numModelStates;
5660 nStatesDiv2 = nStates / 2;
5661
5662 /* find base frequencies */
5663 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5664
5665 /* find conditional likelihood pointer */
5666 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5667
5668 /* find pointer to rate probabilities */
5669 rP = rateProbs[chain] + state[chain] * rateProbRowSize + m->rateProbStart;
5670
5671 /* loop over characters and calculate rate probs */
5672 if (m->switchRates != NULL)
5673 {
5674 swr = GetParamVals (m->switchRates, chain, state[chain]);
5675 s01 = swr[0];
5676 s10 = swr[1];
5677 probOn = s01 / (s01 + s10);
5678 probOff = 1.0 - probOn;
5679 for (j=0; j<nStatesDiv2; j++)
5680 {
5681 covBF[j] = bs[j] * probOn;
5682 covBF[j+nStatesDiv2] = bs[j] * probOff;
5683 }
5684 bs = covBF;
5685 }
5686
5687 for (c=i=0; c<m->numChars; c++)
5688 {
5689 for (k=0; k<m->numRateCats; k++)
5690 {
5691 like = 0.0;
5692 for (j=0; j<nStates; j++)
5693 like += (*(clP++)) * bs[j];
5694 rP[i++] = like;
5695 }
5696 }
5697
5698 /* reset lnL, likelihood calculated later for this model */
5699 *lnL = 0.0;
5700
5701 return (NO_ERROR);
5702 }
5703
5704
5705 /*------------------------------------------------------------------
5706 |
5707 | Likelihood_Gen: general n-state models with or without rate
5708 | variation
5709 |
5710 -------------------------------------------------------------------*/
Likelihood_Gen(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)5711 int Likelihood_Gen (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5712 {
5713 int c, j, k, nStates, hasPInvar;
5714 MrBFlt s01, s10, probOn, probOff, *swr;
5715 MrBFlt covBF[40], freq, *bs, like, likeI, pInvar=0.0, lnLike;
5716 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
5717 ModelInfo *m;
5718
5719 /* find model settings and nStates, pInvar, invar cond likes */
5720 m = &modelSettings[division];
5721 nStates = m->numModelStates;
5722 if (m->pInvar == NULL)
5723 {
5724 hasPInvar = NO;
5725 }
5726 else
5727 {
5728 hasPInvar = YES;
5729 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
5730 clInvar = m->invCondLikes;
5731 }
5732
5733 /* find conditional likelihood pointers */
5734 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5735 clP = m->clP;
5736 for (k=0; k<m->numRateCats; k++)
5737 {
5738 clP[k] = clPtr;
5739 clPtr += m->numChars * m->numModelStates;
5740 }
5741
5742
5743 /* find base frequencies */
5744 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5745
5746 /* if covarion model, adjust base frequencies */
5747 if (m->switchRates != NULL)
5748 {
5749 /* find the stationary frequencies */
5750 swr = GetParamVals(m->switchRates, chain, state[chain]);
5751 s01 = swr[0];
5752 s10 = swr[1];
5753 probOn = s01 / (s01 + s10);
5754 probOff = 1.0 - probOn;
5755
5756 /* now adjust the base frequencies; on-state stored first in cond likes */
5757 for (j=0; j<nStates/2; j++)
5758 {
5759 covBF[j] = bs[j] * probOn;
5760 covBF[j+nStates/2] = bs[j] * probOff;
5761 }
5762
5763 /* finally set bs pointer to adjusted values */
5764 bs = covBF;
5765 }
5766
5767 /* find category frequencies */
5768 if (hasPInvar == NO)
5769 freq = 1.0 / m->numRateCats;
5770 else
5771 freq = (1.0 - pInvar) / m->numRateCats;
5772
5773 /* find site scaler */
5774 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5775
5776 /* find nSitesOfPat */
5777 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5778
5779 /* reset lnL */
5780 *lnL = 0.0;
5781
5782 /* loop over characters */
5783 if (hasPInvar == NO)
5784 {
5785 for (c=0; c<m->numChars; c++)
5786 {
5787 like = 0.0;
5788 for (k=0; k<m->numRateCats; k++)
5789 for (j=0; j<nStates; j++)
5790 {
5791 like += (*(clP[k]++)) * bs[j];
5792 # ifdef DEBUG_LIKELIHOOD
5793 // printf ("char=%d cat=%d j=%d like %E\n",c, k,j,like);
5794 # endif
5795 }
5796 like *= freq;
5797
5798 /* check against LIKE_EPSILON (values close to zero are problematic) */
5799 if (like < LIKE_EPSILON)
5800 {
5801 # ifdef DEBUG_LIKELIHOOD
5802 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5803 # endif
5804 (*lnL) = MRBFLT_NEG_MAX;
5805 abortMove = YES;
5806 return ERROR;
5807 }
5808 else
5809 {
5810 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5811 }
5812 }
5813 }
5814 else
5815 {
5816 /* has invariable category */
5817 for (c=0; c<m->numChars; c++)
5818 {
5819 likeI = like = 0.0;
5820 for (k=0; k<m->numRateCats; k++)
5821 for (j=0; j<nStates; j++)
5822 {
5823 like += (*(clP[k]++)) * bs[j];
5824 }
5825 like *= freq;
5826 for (j=0; j<nStates; j++)
5827 likeI += (*(clInvar++)) * bs[j] * pInvar;
5828 if (lnScaler[c] < -200.0)
5829 {
5830 /* we are not going to be able to exponentiate the scaling factor */
5831 if (likeI > 1E-70)
5832 {
5833 /* forget about like; it is going to be insignificant compared to likeI */
5834 lnLike = log(likeI);
5835 }
5836 else
5837 {
5838 /* treat likeI as if 0.0, that is, ignore it completely */
5839 lnLike = log(like) + lnScaler[c];
5840 }
5841 }
5842 else
5843 lnLike = log (like + (likeI / exp (lnScaler[c]))) + lnScaler[c];
5844
5845 /* check against LIKE_EPSILON (values close to zero are problematic) */
5846 if (like < LIKE_EPSILON)
5847 {
5848 # ifdef DEBUG_LIKELIHOOD
5849 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5850 # endif
5851 (*lnL) = MRBFLT_NEG_MAX;
5852 abortMove = YES;
5853 return ERROR;
5854 }
5855 else
5856 {
5857 (*lnL) += lnLike * nSitesOfPat[c];
5858 }
5859 }
5860 }
5861
5862 return NO_ERROR;
5863 }
5864
5865
5866 #if defined (SSE_ENABLED)
5867 //# if 0
5868 //CLFlt DeleteME[1000];
5869 //int PrintOld_SSE (TreeNode *p, int division, int chain){
5870 //
5871 // int c, c1, j, k, nStates;
5872 // //MrBFlt *swr, likeI, pInvar=0.0, lnLike;
5873 // CLFlt *temp_vector;
5874 // __m128 *clPtr, **clP;
5875 // ModelInfo *m;
5876 //
5877 // m = &modelSettings[division];
5878 // nStates = m->numModelStates;
5879 // /* find conditional likelihood pointers */
5880 //
5881 // temp_vector = DeleteME;
5882 //
5883 // clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
5884 // clP = m->clP_SSE;
5885 // for (k=0; k<m->numRateCats; k++)
5886 // {
5887 // clP[k] = clPtr;
5888 // clPtr += m->numVecChars * m->numModelStates;
5889 // }
5890 //
5891 // for (c=0; c<m->numChars; c++)
5892 // {
5893 // c1 = c / FLOATS_PER_VEC;
5894 // for (k=0; k<m->numRateCats; k++)
5895 // {
5896 // for (j=0; j<nStates; j++)
5897 // {
5898 // *temp_vector++ = *(((CLFlt*)&clP[k][c1*nStates+j])+c % FLOATS_PER_VEC);
5899 // }
5900 // }
5901 // }
5902 // temp_vector=DeleteME;
5903 //
5904 // return 1;
5905 //}
5906 //# endif
5907
5908
5909 /*------------------------------------------------------------------
5910 |
5911 | Likelihood_Gen_SSE: general n-state model with or without rate
5912 | variation
5913 |
5914 -------------------------------------------------------------------*/
Likelihood_Gen_SSE(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)5915 int Likelihood_Gen_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5916 {
5917 int c, j, k, nStates, hasPInvar;
5918 MrBFlt like, *bs;
5919 MrBFlt s01, s10, probOn, probOff, *swr, covBF[40], freq, likeI, pInvar=0.0, lnLike;
5920 CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE, *lnLI_SSE;
5921 __m128 *clPtr, **clP, *clInvar=NULL;
5922 __m128 m1, mCatLike, mLike, mFreq;
5923 ModelInfo *m;
5924
5925 /* find model settings and nStates, pInvar, invar cond likes */
5926 m = &modelSettings[division];
5927 nStates = m->numModelStates;
5928 if (m->pInvar == NULL)
5929 {
5930 hasPInvar = NO;
5931 }
5932 else
5933 {
5934 hasPInvar = YES;
5935 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
5936 clInvar = (__m128 *) (m->invCondLikes);
5937 }
5938
5939 /* find conditional likelihood pointers */
5940 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
5941 clP = m->clP_SSE;
5942 for (k=0; k<m->numRateCats; k++)
5943 {
5944 clP[k] = clPtr;
5945 clPtr += m->numVecChars * m->numModelStates;
5946 }
5947 lnL_SSE = m->lnL_Vec;
5948 lnLI_SSE = m->lnLI_Vec;
5949
5950 /* find base frequencies */
5951 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5952
5953 /* if covarion model, adjust base frequencies */
5954 if (m->switchRates != NULL)
5955 {
5956 /* find the stationary frequencies */
5957 swr = GetParamVals(m->switchRates, chain, state[chain]);
5958 s01 = swr[0];
5959 s10 = swr[1];
5960 probOn = s01 / (s01 + s10);
5961 probOff = 1.0 - probOn;
5962
5963 /* now adjust the base frequencies; on-state stored first in cond likes */
5964 for (j=0; j<nStates/2; j++)
5965 {
5966 covBF[j] = bs[j] * probOn;
5967 covBF[j+nStates/2] = bs[j] * probOff;
5968 }
5969
5970 /* finally set bs pointer to adjusted values */
5971 bs = covBF;
5972 }
5973
5974 /* find category frequencies */
5975 if (hasPInvar == NO)
5976 freq = 1.0 / m->numRateCats;
5977 else
5978 freq = (1.0 - pInvar) / m->numRateCats;
5979
5980 mFreq = _mm_set1_ps ((CLFlt)(freq));
5981
5982 /* find site scaler */
5983 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5984
5985 /* find nSitesOfPat */
5986 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5987
5988 /* reset lnL */
5989 *lnL = 0.0;
5990
5991 for (c=0; c<m->numVecChars; c++)
5992 {
5993 mLike = _mm_setzero_ps ();
5994 for (k=0; k<m->numRateCats; k++)
5995 {
5996 mCatLike = _mm_setzero_ps ();
5997 for (j=0; j<nStates; j++)
5998 {
5999 m1 = _mm_mul_ps (clP[k][j], _mm_set1_ps ((CLFlt)bs[j]));
6000 mCatLike = _mm_add_ps (mCatLike, m1);
6001 }
6002 m1 = _mm_mul_ps (mCatLike, mFreq);
6003 mLike = _mm_add_ps (mLike, m1);
6004 clP[k] += nStates;
6005 }
6006 _mm_store_ps (lnL_SSE, mLike);
6007 lnL_SSE += m->numFloatsPerVec;
6008 }
6009
6010 /* loop over characters */
6011 if (hasPInvar == NO)
6012 {
6013 for (c=0; c<m->numChars; c++)
6014 {
6015 like = m->lnL_Vec[c];
6016 /* check against LIKE_EPSILON (values close to zero are problematic) */
6017 if (like < LIKE_EPSILON)
6018 {
6019 # ifdef DEBUG_LIKELIHOOD
6020 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6021 # endif
6022 (*lnL) = MRBFLT_NEG_MAX;
6023 abortMove = YES;
6024 return ERROR;
6025 }
6026 else
6027 {
6028 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6029 }
6030 }
6031 }
6032 else
6033 {
6034 /* has invariable category */
6035 for (c=0; c<m->numVecChars; c++)
6036 {
6037 mCatLike = _mm_setzero_ps ();
6038 for (j=0; j<nStates; j++)
6039 {
6040 m1 = _mm_mul_ps (clInvar[j], _mm_set1_ps ((CLFlt)bs[j]));
6041 mCatLike = _mm_add_ps (mCatLike, m1);
6042 }
6043 clInvar += nStates;
6044 _mm_store_ps (lnL_SSE, mCatLike);
6045 lnLI_SSE += m->numFloatsPerVec;
6046 }
6047
6048 for (c=0; c<m->numChars; c++)
6049 {
6050 like = m->lnL_Vec[c];
6051 likeI = m->lnLI_Vec[c];
6052 if (lnScaler[c] < -200.0)
6053 {
6054 /* we are not going to be able to exponentiate the scaling factor */
6055 if (likeI > 1E-70)
6056 {
6057 /* forget about like; it is going to be insignificant compared to likeI */
6058 lnLike = log(likeI);
6059 }
6060 else
6061 {
6062 /* treat likeI as if 0.0, that is, ignore it completely */
6063 lnLike = log(like) + lnScaler[c];
6064 }
6065 }
6066 else
6067 lnLike = log (like + (likeI / exp (lnScaler[c]))) + lnScaler[c];
6068
6069 /* check against LIKE_EPSILON (values close to zero are problematic) */
6070 if (like < LIKE_EPSILON)
6071 {
6072 # ifdef DEBUG_LIKELIHOOD
6073 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6074 # endif
6075 (*lnL) = MRBFLT_NEG_MAX;
6076 abortMove = YES;
6077 return ERROR;
6078 }
6079 else
6080 {
6081 (*lnL) += lnLike * nSitesOfPat[c];
6082 }
6083 }
6084 }
6085
6086 return NO_ERROR;
6087
6088 }
6089 #endif
6090
6091
6092 /*------------------------------------------------------------------
6093 |
6094 | Likelihood_Gen_GibbsGamma: general n-state models using
6095 | Gibbs resampling of discrete gamma rate categories
6096 |
6097 -------------------------------------------------------------------*/
Likelihood_Gen_GibbsGamma(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6098 int Likelihood_Gen_GibbsGamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6099 {
6100 int c, j, nStates, nGammaCats, *rateCat;
6101 MrBFlt s01, s10, probOn, probOff, *swr;
6102 MrBFlt covBF[40], *bs, like;
6103 CLFlt *clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
6104 ModelInfo *m;
6105
6106 /* find model settings, nStates and invar cond likes */
6107 m = &modelSettings[division];
6108 nStates = m->numModelStates;
6109 clInvar = m->invCondLikes;
6110
6111 /* find conditional likelihood pointer */
6112 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
6113
6114 /* find base frequencies */
6115 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6116
6117 /* if covarion model, adjust base frequencies */
6118 if (m->switchRates != NULL)
6119 {
6120 /* find the stationary frequencies */
6121 swr = GetParamVals(m->switchRates, chain, state[chain]);
6122 s01 = swr[0];
6123 s10 = swr[1];
6124 probOn = s01 / (s01 + s10);
6125 probOff = 1.0 - probOn;
6126
6127 /* now adjust the base frequencies; on-state stored first in cond likes */
6128 for (j=0; j<nStates/2; j++)
6129 {
6130 covBF[j] = bs[j] * probOn;
6131 covBF[j+nStates/2] = bs[j] * probOff;
6132 }
6133
6134 /* finally set bs pointer to adjusted values */
6135 bs = covBF;
6136 }
6137
6138 /* find site scaler */
6139 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6140
6141 /* find nSitesOfPat */
6142 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6143
6144 /* find rate category index and number of gamma categories */
6145 rateCat = m->tiIndex + chain * m->numChars;
6146 nGammaCats = m->numRateCats;
6147
6148 /* reset lnL */
6149 *lnL = 0.0;
6150
6151 /* loop over characters */
6152 if (m->pInvar == NULL)
6153 {
6154 for (c=0; c<m->numChars; c++)
6155 {
6156 like = 0.0;
6157 for (j=0; j<nStates; j++)
6158 {
6159 like += (*(clP++)) * bs[j];
6160 # ifdef DEBUG_LIKELIHOOD
6161 // printf ("char=%d cat=%d j=%d like %E\n",c, k,j,like);
6162 # endif
6163 }
6164
6165 /* check against LIKE_EPSILON (values close to zero are problematic) */
6166 if (like < LIKE_EPSILON)
6167 {
6168 # ifdef DEBUG_LIKELIHOOD
6169 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6170 # endif
6171 (*lnL) = MRBFLT_NEG_MAX;
6172 abortMove = YES;
6173 return ERROR;
6174 }
6175 else
6176 {
6177 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6178 }
6179 }
6180 }
6181 else
6182 {
6183 /* has invariable category */
6184 for (c=0; c<m->numChars; c++)
6185 {
6186 like = 0.0;
6187 if (rateCat[c] < nGammaCats)
6188 {
6189 for (j=0; j<nStates; j++)
6190 like += (*(clP++)) * bs[j];
6191 clInvar += nStates;
6192 }
6193 else
6194 {
6195 for (j=0; j<nStates; j++)
6196 like += (*(clInvar++)) * bs[j];
6197 clP += nStates;
6198 }
6199
6200 /* check against LIKE_EPSILON (values close to zero are problematic) */
6201 if (like < LIKE_EPSILON)
6202 {
6203 # ifdef DEBUG_LIKELIHOOD
6204 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6205 # endif
6206 (*lnL) = MRBFLT_NEG_MAX;
6207 abortMove = YES;
6208 return ERROR;
6209 }
6210 else
6211 {
6212 (*lnL) += (log(like) + lnScaler[c]) * nSitesOfPat[c];
6213 }
6214 }
6215 }
6216
6217 return NO_ERROR;
6218 }
6219
6220
6221 /*------------------------------------------------------------------
6222 |
6223 | Likelihood_NUC4: 4by4 nucleotide models with or without rate
6224 | variation
6225 |
6226 -------------------------------------------------------------------*/
Likelihood_NUC4(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6227 int Likelihood_NUC4 (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6228 {
6229 int c, k, hasPInvar;
6230 MrBFlt freq, likeI, *bs, like, pInvar=0.0;
6231 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
6232 ModelInfo *m;
6233
6234 /* find model settings and pInvar, invar cond likes */
6235 m = &modelSettings[division];
6236 if (m->pInvar == NULL)
6237 {
6238 hasPInvar = NO;
6239 }
6240 else
6241 {
6242 hasPInvar = YES;
6243 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
6244 clInvar = m->invCondLikes;
6245 }
6246
6247 /* find conditional likelihood pointers */
6248 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
6249 clP = m->clP;
6250 for (k=0; k<m->numRateCats; k++)
6251 {
6252 clP[k] = clPtr;
6253 clPtr += m->numChars * m->numModelStates;
6254 }
6255
6256 /* find base frequencies */
6257 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6258
6259 /* find category frequencies */
6260 if (hasPInvar == NO)
6261 freq = 1.0 / m->numRateCats;
6262 else
6263 freq = (1.0 - pInvar) / m->numRateCats;
6264
6265 /* find tree scaler */
6266 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6267
6268 /* find nSitesOfPat */
6269 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6270
6271 /* reset lnL */
6272 *lnL = 0.0;
6273
6274 /* loop over characters */
6275 if (hasPInvar == NO)
6276 {
6277 for (c=0; c<m->numChars; c++)
6278 {
6279 like = 0.0;
6280 for (k=0; k<m->numRateCats; k++)
6281 {
6282 like += (clP[k][A] * bs[A] + clP[k][C] * bs[C] + clP[k][G] * bs[G] + clP[k][T] * bs[T]);
6283 clP[k] += 4;
6284 }
6285 like *= freq;
6286
6287 /* check against LIKE_EPSILON (values close to zero are problematic) */
6288 if (like < LIKE_EPSILON)
6289 {
6290 # ifdef DEBUG_LIKELIHOOD
6291 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6292 # endif
6293 (*lnL) = MRBFLT_NEG_MAX;
6294 abortMove = YES;
6295 return ERROR;
6296 }
6297 else
6298 {
6299 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6300 }
6301 }
6302 }
6303 else
6304 {
6305 /* has invariable category */
6306 for (c=0; c<m->numChars; c++)
6307 {
6308 like = 0.0;
6309 for (k=0; k<m->numRateCats; k++)
6310 {
6311 like += (clP[k][A] * bs[A] + clP[k][C] * bs[C] + clP[k][G] * bs[G] + clP[k][T] * bs[T]);
6312 clP[k] += 4;
6313 }
6314 like *= freq;
6315 likeI = (clInvar[A] * bs[A] + clInvar[C] * bs[C] + clInvar[G] * bs[G] + clInvar[T] * bs[T]) * pInvar;
6316 if (lnScaler[c] < -200)
6317 {
6318 /* we are not going to be able to exponentiate the scaling factor */
6319 if (likeI > 1E-70)
6320 {
6321 /* forget about like; it is going to be insignificant compared to likeI */
6322 like = likeI;
6323 }
6324 else
6325 {
6326 /* treat likeI as if 0.0, that is, ignore it completely */
6327 }
6328 }
6329 else
6330 like = like + (likeI / exp (lnScaler[c]));
6331
6332 clInvar += 4;
6333
6334 /* check against LIKE_EPSILON (values close to zero are problematic) */
6335 if (like < LIKE_EPSILON)
6336 {
6337 # ifdef DEBUG_LIKELIHOOD
6338 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6339 # endif
6340 (*lnL) = MRBFLT_NEG_MAX;
6341 abortMove = YES;
6342 return ERROR;
6343 }
6344 else
6345 {
6346 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6347 }
6348 }
6349 }
6350
6351
6352 return NO_ERROR;
6353 }
6354
6355
6356 /*------------------------------------------------------------------
6357 |
6358 | Likelihood_NUC4_GibbsGamma: 4by4 nucleotide models with rate
6359 | variation using Gibbs sampling from gamma rate categories
6360 |
6361 -------------------------------------------------------------------*/
Likelihood_NUC4_GibbsGamma(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6362 int Likelihood_NUC4_GibbsGamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6363 {
6364 int c, i, r, nGammaCats, *rateCat;
6365 MrBFlt *bs, like;
6366 CLFlt *clP, *lnScaler, *nSitesOfPat, *clInvar;
6367 ModelInfo *m;
6368
6369 /* find model settings and invar cond likes */
6370 m = &modelSettings[division];
6371 clInvar = m->invCondLikes;
6372
6373 /* find conditional likelihood pointer */
6374 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
6375
6376 /* find base frequencies */
6377 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6378
6379 /* find tree scaler */
6380 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6381
6382 /* find nSitesOfPat */
6383 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6384
6385 /* find rate category index and number of gamma categories */
6386 rateCat = m->tiIndex + chain * m->numChars;
6387 nGammaCats = m->numRateCats;
6388
6389 /* reset lnL */
6390 *lnL = 0.0;
6391
6392 /* loop over characters */
6393 if (m->pInvar == NULL)
6394 {
6395 for (c=i=0; c<m->numChars; c++)
6396 {
6397 like = (clP[A] * bs[A] + clP[C] * bs[C] + clP[G] * bs[G] + clP[T] * bs[T]);
6398 clP += 4;
6399
6400 /* check against LIKE_EPSILON (values close to zero are problematic) */
6401 if (like < LIKE_EPSILON)
6402 {
6403 # ifdef DEBUG_LIKELIHOOD
6404 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6405 # endif
6406 (*lnL) = MRBFLT_NEG_MAX;
6407 abortMove = YES;
6408 return ERROR;
6409 }
6410 else
6411 {
6412 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6413 }
6414 }
6415 }
6416 else
6417 {
6418 /* has invariable category */
6419 for (c=i=0; c<m->numChars; c++)
6420 {
6421 r = rateCat[c];
6422 if (r < nGammaCats)
6423 like = (clP[A] * bs[A] + clP[C] * bs[C] + clP[G] * bs[G] + clP[T] * bs[T]);
6424 else
6425 like = (clInvar[A] * bs[A] + clInvar[C] * bs[C] + clInvar[G] * bs[G] + clInvar[T] * bs[T]);
6426 clInvar += 4;
6427 clP += 4;
6428
6429 /* check against LIKE_EPSILON (values close to zero are problematic) */
6430 if (like < LIKE_EPSILON)
6431 {
6432 # ifdef DEBUG_LIKELIHOOD
6433 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6434 # endif
6435 (*lnL) = MRBFLT_NEG_MAX;
6436 abortMove = YES;
6437 return ERROR;
6438 }
6439 else
6440 {
6441 (*lnL) += (log (like) + lnScaler[c]) * nSitesOfPat[c];
6442 }
6443 }
6444 }
6445
6446 return NO_ERROR;
6447 }
6448
6449
6450 //#if defined (SSE_ENABLED)
6451 ///*------------------------------------------------------------------
6452 // |
6453 // | Likelihood_NUC4_GibbsGamma: 4by4 nucleotide models with rate
6454 // | variation using Gibbs sampling from gamma rate categories
6455 // |
6456 // -------------------------------------------------------------------*/
6457 //int Likelihood_NUC4_GibbsGamma_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6458 //{
6459 // int c, i, r, nRateCats, *rateCat;
6460 // MrBFlt *bs, like;
6461 // CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE, *lnLI_SSE;
6462 // __m128 *clP, *clInvar=NULL;
6463 // __m128 m1, mA, mC, mG, mT, mFreq, mPInvar, mLike;
6464 // ModelInfo *m;
6465 //
6466 //#if defined (FAST_LOG)
6467 // int k, index;
6468 // MrBFlt likeAdjust = 1.0, f;
6469 //#endif
6470 //
6471 // /* find model settings and invar cond likes */
6472 // m = &modelSettings[division];
6473 // clInvar = (__m128 *)m->invCondLikes;
6474 // /* find conditional likelihood pointer */
6475 // clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index]];
6476 //
6477 // lnL_SSE = m->lnL_SSE;
6478 // lnLI_SSE = m->lnLI_SSE;
6479 //
6480 // /* find base frequencies */
6481 // bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6482 //
6483 // /* find tree scaler */
6484 // lnScaler = m->scalers[m->siteScalerIndex[chain]];
6485 //
6486 // /* find nSitesOfPat */
6487 // nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6488 //
6489 // /* find rate category index and number of rate categories */
6490 // rateCat = m->tiIndex + chain * m->numChars;
6491 // nRateCats = m->numRateCats;
6492 //
6493 // /* reset lnL */
6494 // *lnL = 0.0;
6495 //
6496 // /* calculate variable likelihood */
6497 // for (c=0; c<m->numVecChars; c++)
6498 // {
6499 // mLike = _mm_mul_ps (clP[A], mA);
6500 // m1 = _mm_mul_ps (clP[C], mC);
6501 // mLike = _mm_add_ps (mLike, m1);
6502 // m1 = _mm_mul_ps (clP[G], mG);
6503 // mLike = _mm_add_ps (mLike, m1);
6504 // m1 = _mm_mul_ps (clP[T], mT);
6505 // mLike = _mm_add_ps (mLike, m1);
6506 //
6507 // clP += 4;
6508 // _mm_store_ps (lnL_SSE, mLike);
6509 // lnL_SSE += FLOATS_PER_VEC;
6510 // }
6511 //
6512 // /* calculate invariable likelihood */
6513 // if (hasPInvar == YES)
6514 // {
6515 // for (c=0; c<m->numVecChars; c++)
6516 // {
6517 // mLike = _mm_mul_ps (clInvar[A], mA);
6518 // m1 = _mm_mul_ps (clInvar[C], mC);
6519 // mLike = _mm_add_ps (mLike, m1);
6520 // m1 = _mm_mul_ps (clInvar[G], mG);
6521 // mLike = _mm_add_ps (mLike, m1);
6522 // m1 = _mm_mul_ps (clInvar[T], mT);
6523 // mLike = _mm_add_ps (mLike, m1);
6524 // mLike = _mm_mul_ps (mLike, mPInvar);
6525 //
6526 // _mm_store_ps (lnLI_SSE, mLike);
6527 // clInvar += 4;
6528 // lnLI_SSE += FLOATS_PER_VEC;
6529 // }
6530 // }
6531 //
6532 //
6533 // /* loop over characters */
6534 // if (m->pInvar == NULL)
6535 // {
6536 // for (c=i=0; c<m->numChars; c++)
6537 // {
6538 // like = m->lnL_SSE[c];
6539 // /* check against LIKE_EPSILON (values close to zero are problematic) */
6540 // if (like < LIKE_EPSILON)
6541 // {
6542 // MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30lf\n", spacer, division, c, like);
6543 // (*lnL) = MRBFLT_NEG_MAX;
6544 // return ERROR;
6545 // }
6546 // else
6547 // {
6548 //#if defined (FAST_LOG)
6549 // f = frexp (like, &index);
6550 // index = 1-index;
6551 // (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
6552 // for (k=0; k<(int)nSitesOfPat[c]; k++)
6553 // likeAdjust *= f;
6554 //#else
6555 // (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6556 //#endif
6557 // }
6558 // }
6559 // }
6560 // else
6561 // {
6562 // /* has invariable category */
6563 // for (c=i=0; c<m->numChars; c++)
6564 // {
6565 // r = rateCat[c];
6566 // if (r < nRateCats)
6567 // like = m->lnL_SSE[c];
6568 // else
6569 // like = m->lnLI_SSE[c];
6570 //
6571 // /* check against LIKE_EPSILON (values close to zero are problematic) */
6572 // if (like < LIKE_EPSILON)
6573 // {
6574 // MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30lf\n", spacer, division, c, like);
6575 // (*lnL) = MRBFLT_NEG_MAX;
6576 // return ERROR;
6577 // }
6578 // else
6579 // {
6580 // (*lnL) += (log (like) + lnScaler[c]) * nSitesOfPat[c];
6581 // }
6582 // }
6583 // }
6584 //
6585 //#if defined (FAST_LOG)
6586 // (*lnL) += log (likeAdjust);
6587 //#endif
6588 //
6589 // return NO_ERROR;
6590 //}
6591 //#endif
6592
6593
6594 #if defined (FMA_ENABLED)
6595 /*------------------------------------------------------------------
6596 |
6597 | Likelihood_NUC4_FMA: 4by4 nucleotide models with or without rate
6598 | variation using AVX + FMA code
6599 |
6600 -------------------------------------------------------------------*/
Likelihood_NUC4_FMA(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6601 int Likelihood_NUC4_FMA (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6602 {
6603 int c, k, hasPInvar;
6604 MrBFlt freq, *bs, pInvar=0.0, like, likeI;
6605 CLFlt *lnScaler, *nSitesOfPat, *lnL_Vec, *lnLI_Vec;
6606 __m256 *clPtr, **clP, *clInvar=NULL;
6607 __m256 mA, mC, mG, mT, mFreq, mPInvar=_mm256_set1_ps(0.0f), mLike;
6608 ModelInfo *m;
6609
6610 /* find model settings and pInvar, invar cond likes */
6611 m = &modelSettings[division];
6612 if (m->pInvar == NULL)
6613 {
6614 hasPInvar = NO;
6615 }
6616 else
6617 {
6618 hasPInvar = YES;
6619 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
6620 mPInvar = _mm256_set1_ps ((CLFlt)(pInvar));
6621 clInvar = (__m256 *) (m->invCondLikes);
6622 }
6623
6624 /* find conditional likelihood pointers */
6625 clPtr = (__m256 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
6626 clP = m->clP_AVX;
6627 for (k=0; k<m->numRateCats; k++)
6628 {
6629 clP[k] = clPtr;
6630 clPtr += m->numVecChars * m->numModelStates;
6631 }
6632 lnL_Vec = m->lnL_Vec;
6633 lnLI_Vec = m->lnLI_Vec;
6634
6635 /* find base frequencies */
6636 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6637 mA = _mm256_set1_ps ((CLFlt)(bs[A]));
6638 mC = _mm256_set1_ps ((CLFlt)(bs[C]));
6639 mG = _mm256_set1_ps ((CLFlt)(bs[G]));
6640 mT = _mm256_set1_ps ((CLFlt)(bs[T]));
6641
6642 /* find category frequencies */
6643 if (hasPInvar == NO)
6644 freq = 1.0 / m->numRateCats;
6645 else
6646 freq = (1.0 - pInvar) / m->numRateCats;
6647 mFreq = _mm256_set1_ps ((CLFlt)(freq));
6648
6649 /* find tree scaler */
6650 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6651
6652 /* find nSitesOfPat */
6653 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6654
6655 /* reset lnL */
6656 *lnL = 0.0;
6657
6658 /* calculate variable likelihood */
6659 for (c=0; c<m->numVecChars; c++)
6660 {
6661 mLike = _mm256_setzero_ps ();
6662 for (k=0; k<m->numRateCats; k++)
6663 {
6664 mLike = _mm256_fmadd_ps (clP[k][A], mA, mLike);
6665 mLike = _mm256_fmadd_ps (clP[k][C], mC, mLike);
6666 mLike = _mm256_fmadd_ps (clP[k][G], mG, mLike);
6667 mLike = _mm256_fmadd_ps (clP[k][T], mT, mLike);
6668 clP[k] += 4;
6669 }
6670 mLike = _mm256_mul_ps (mLike, mFreq);
6671 _mm256_store_ps (lnL_Vec, mLike);
6672 lnL_Vec += m->numFloatsPerVec;
6673 }
6674
6675 /* calculate invariable likelihood */
6676 if (hasPInvar == YES)
6677 {
6678 for (c=0; c<m->numVecChars; c++)
6679 {
6680 mLike = _mm256_mul_ps (clInvar[A], mA);
6681 mLike = _mm256_fmadd_ps (clInvar[C], mC, mLike);
6682 mLike = _mm256_fmadd_ps (clInvar[G], mG, mLike);
6683 mLike = _mm256_fmadd_ps (clInvar[T], mT, mLike);
6684 mLike = _mm256_mul_ps (mLike, mPInvar);
6685 _mm256_store_ps (lnLI_Vec, mLike);
6686 clInvar += 4;
6687 lnLI_Vec += m->numFloatsPerVec;
6688 }
6689 }
6690
6691 /* accumulate results */
6692 if (hasPInvar == NO)
6693 {
6694 for (c=0; c<m->numChars; c++)
6695 {
6696 like = m->lnL_Vec[c];
6697 /* check against LIKE_EPSILON (values close to zero are problematic) */
6698 if (like < LIKE_EPSILON)
6699 {
6700 # ifdef DEBUG_LIKELIHOOD
6701 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6702 # endif
6703 (*lnL) = MRBFLT_NEG_MAX;
6704 abortMove = YES;
6705 return ERROR;
6706 }
6707 else
6708 {
6709 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6710 }
6711 }
6712 }
6713 else
6714 {
6715 /* has invariable category */
6716 for (c=0; c<m->numChars; c++)
6717 {
6718 like = m->lnL_Vec[c];
6719 likeI = m->lnLI_Vec[c];
6720 if (lnScaler[c] < -200)
6721 {
6722 /* we are not going to be able to exponentiate the scaling factor */
6723 if (likeI > 1E-70)
6724 {
6725 /* forget about like; it is going to be insignificant compared to likeI */
6726 like = likeI;
6727 }
6728 else
6729 {
6730 /* treat likeI as if 0.0, that is, ignore it completely */
6731 }
6732 }
6733 else
6734 like = like + (likeI / exp (lnScaler[c]));
6735
6736 /* check against LIKE_EPSILON (values close to zero are problematic) */
6737 if (like < LIKE_EPSILON)
6738 {
6739 # ifdef DEBUG_LIKELIHOOD
6740 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6741 # endif
6742 (*lnL) = MRBFLT_NEG_MAX;
6743 abortMove = YES;
6744 return ERROR;
6745 }
6746 else
6747 {
6748 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6749 }
6750 }
6751 }
6752
6753 return NO_ERROR;
6754 }
6755 #endif
6756
6757
6758 #if defined (AVX_ENABLED)
6759 /*------------------------------------------------------------------
6760 |
6761 | Likelihood_NUC4_AVX: 4by4 nucleotide models with or without rate
6762 | variation using AVX code
6763 |
6764 -------------------------------------------------------------------*/
Likelihood_NUC4_AVX(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6765 int Likelihood_NUC4_AVX (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6766 {
6767 int c, k, hasPInvar;
6768 MrBFlt freq, *bs, pInvar=0.0, like, likeI;
6769 CLFlt *lnScaler, *nSitesOfPat, *lnL_Vec, *lnLI_Vec;
6770 __m256 *clPtr, **clP, *clInvar=NULL;
6771 __m256 m1, mA, mC, mG, mT, mFreq, mPInvar=_mm256_set1_ps(0.0f), mLike;
6772 ModelInfo *m;
6773
6774 /* find model settings and pInvar, invar cond likes */
6775 m = &modelSettings[division];
6776 if (m->pInvar == NULL)
6777 {
6778 hasPInvar = NO;
6779 }
6780 else
6781 {
6782 hasPInvar = YES;
6783 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
6784 mPInvar = _mm256_set1_ps ((CLFlt)(pInvar));
6785 clInvar = (__m256 *) (m->invCondLikes);
6786 }
6787
6788 /* find conditional likelihood pointers */
6789 clPtr = (__m256 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
6790 clP = m->clP_AVX;
6791 for (k=0; k<m->numRateCats; k++)
6792 {
6793 clP[k] = clPtr;
6794 clPtr += m->numVecChars * m->numModelStates;
6795 }
6796 lnL_Vec = m->lnL_Vec;
6797 lnLI_Vec = m->lnLI_Vec;
6798
6799 /* find base frequencies */
6800 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6801 mA = _mm256_set1_ps ((CLFlt)(bs[A]));
6802 mC = _mm256_set1_ps ((CLFlt)(bs[C]));
6803 mG = _mm256_set1_ps ((CLFlt)(bs[G]));
6804 mT = _mm256_set1_ps ((CLFlt)(bs[T]));
6805
6806 /* find category frequencies */
6807 if (hasPInvar == NO)
6808 freq = 1.0 / m->numRateCats;
6809 else
6810 freq = (1.0 - pInvar) / m->numRateCats;
6811 mFreq = _mm256_set1_ps ((CLFlt)(freq));
6812
6813 /* find tree scaler */
6814 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6815
6816 /* find nSitesOfPat */
6817 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6818
6819 /* reset lnL */
6820 *lnL = 0.0;
6821
6822 /* calculate variable likelihood */
6823 for (c=0; c<m->numVecChars; c++)
6824 {
6825 mLike = _mm256_setzero_ps ();
6826 for (k=0; k<m->numRateCats; k++)
6827 {
6828 m1 = _mm256_mul_ps (clP[k][A], mA);
6829 mLike = _mm256_add_ps (mLike, m1);
6830 m1 = _mm256_mul_ps (clP[k][C], mC);
6831 mLike = _mm256_add_ps (mLike, m1);
6832 m1 = _mm256_mul_ps (clP[k][G], mG);
6833 mLike = _mm256_add_ps (mLike, m1);
6834 m1 = _mm256_mul_ps (clP[k][T], mT);
6835 mLike = _mm256_add_ps (mLike, m1);
6836 clP[k] += 4;
6837 }
6838 mLike = _mm256_mul_ps (mLike, mFreq);
6839 _mm256_store_ps (lnL_Vec, mLike);
6840 lnL_Vec += m->numFloatsPerVec;
6841 }
6842
6843 /* calculate invariable likelihood */
6844 if (hasPInvar == YES)
6845 {
6846 for (c=0; c<m->numVecChars; c++)
6847 {
6848 mLike = _mm256_mul_ps (clInvar[A], mA);
6849 m1 = _mm256_mul_ps (clInvar[C], mC);
6850 mLike = _mm256_add_ps (mLike, m1);
6851 m1 = _mm256_mul_ps (clInvar[G], mG);
6852 mLike = _mm256_add_ps (mLike, m1);
6853 m1 = _mm256_mul_ps (clInvar[T], mT);
6854 mLike = _mm256_add_ps (mLike, m1);
6855 mLike = _mm256_mul_ps (mLike, mPInvar);
6856
6857 _mm256_store_ps (lnLI_Vec, mLike);
6858 clInvar += 4;
6859 lnLI_Vec += m->numFloatsPerVec;
6860 }
6861 }
6862
6863 /* accumulate results */
6864 if (hasPInvar == NO)
6865 {
6866 for (c=0; c<m->numChars; c++)
6867 {
6868 like = m->lnL_Vec[c];
6869 /* check against LIKE_EPSILON (values close to zero are problematic) */
6870 if (like < LIKE_EPSILON)
6871 {
6872 # ifdef DEBUG_LIKELIHOOD
6873 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6874 # endif
6875 (*lnL) = MRBFLT_NEG_MAX;
6876 abortMove = YES;
6877 return ERROR;
6878 }
6879 else
6880 {
6881 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6882 }
6883 }
6884 }
6885 else
6886 {
6887 /* has invariable category */
6888 for (c=0; c<m->numChars; c++)
6889 {
6890 like = m->lnL_Vec[c];
6891 likeI = m->lnLI_Vec[c];
6892 if (lnScaler[c] < -200)
6893 {
6894 /* we are not going to be able to exponentiate the scaling factor */
6895 if (likeI > 1E-70)
6896 {
6897 /* forget about like; it is going to be insignificant compared to likeI */
6898 like = likeI;
6899 }
6900 else
6901 {
6902 /* treat likeI as if 0.0, that is, ignore it completely */
6903 }
6904 }
6905 else
6906 like = like + (likeI / exp (lnScaler[c]));
6907
6908 /* check against LIKE_EPSILON (values close to zero are problematic) */
6909 if (like < LIKE_EPSILON)
6910 {
6911 # ifdef DEBUG_LIKELIHOOD
6912 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6913 # endif
6914 (*lnL) = MRBFLT_NEG_MAX;
6915 abortMove = YES;
6916 return ERROR;
6917 }
6918 else
6919 {
6920 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6921 }
6922 }
6923 }
6924
6925 return NO_ERROR;
6926 }
6927 #endif
6928
6929
6930 #if defined (SSE_ENABLED)
6931 /*------------------------------------------------------------------
6932 |
6933 | Likelihood_NUC4_SSE: 4by4 nucleotide models with or without rate
6934 | variation
6935 |
6936 -------------------------------------------------------------------*/
Likelihood_NUC4_SSE(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)6937 int Likelihood_NUC4_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6938 {
6939 int c, k, hasPInvar;
6940 MrBFlt freq, *bs, pInvar=0.0, like, likeI;
6941 CLFlt *lnScaler, *nSitesOfPat, *lnL_Vec, *lnLI_Vec;
6942 __m128 *clPtr, **clP, *clInvar=NULL;
6943 __m128 m1, mA, mC, mG, mT, mFreq, mPInvar=_mm_set1_ps(0.0f), mLike;
6944 ModelInfo *m;
6945
6946 /* find model settings and pInvar, invar cond likes */
6947 m = &modelSettings[division];
6948 if (m->pInvar == NULL)
6949 {
6950 hasPInvar = NO;
6951 }
6952 else
6953 {
6954 hasPInvar = YES;
6955 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
6956 mPInvar = _mm_set1_ps ((CLFlt)(pInvar));
6957 clInvar = (__m128 *) (m->invCondLikes);
6958 }
6959
6960 /* find conditional likelihood pointers */
6961 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
6962 clP = m->clP_SSE;
6963 for (k=0; k<m->numRateCats; k++)
6964 {
6965 clP[k] = clPtr;
6966 clPtr += m->numVecChars * m->numModelStates;
6967 }
6968 lnL_Vec = m->lnL_Vec;
6969 lnLI_Vec = m->lnLI_Vec;
6970
6971 /* find base frequencies */
6972 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6973 mA = _mm_set1_ps ((CLFlt)(bs[A]));
6974 mC = _mm_set1_ps ((CLFlt)(bs[C]));
6975 mG = _mm_set1_ps ((CLFlt)(bs[G]));
6976 mT = _mm_set1_ps ((CLFlt)(bs[T]));
6977
6978 /* find category frequencies */
6979 if (hasPInvar == NO)
6980 freq = 1.0 / m->numRateCats;
6981 else
6982 freq = (1.0 - pInvar) / m->numRateCats;
6983 mFreq = _mm_set1_ps ((CLFlt)(freq));
6984
6985 /* find tree scaler */
6986 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6987
6988 /* find nSitesOfPat */
6989 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6990
6991 /* reset lnL */
6992 *lnL = 0.0;
6993
6994 /* calculate variable likelihood */
6995 for (c=0; c<m->numVecChars; c++)
6996 {
6997 mLike = _mm_setzero_ps ();
6998 for (k=0; k<m->numRateCats; k++)
6999 {
7000 m1 = _mm_mul_ps (clP[k][A], mA);
7001 mLike = _mm_add_ps (mLike, m1);
7002 m1 = _mm_mul_ps (clP[k][C], mC);
7003 mLike = _mm_add_ps (mLike, m1);
7004 m1 = _mm_mul_ps (clP[k][G], mG);
7005 mLike = _mm_add_ps (mLike, m1);
7006 m1 = _mm_mul_ps (clP[k][T], mT);
7007 mLike = _mm_add_ps (mLike, m1);
7008 clP[k] += 4;
7009 }
7010 mLike = _mm_mul_ps (mLike, mFreq);
7011 _mm_store_ps (lnL_Vec, mLike);
7012 lnL_Vec += m->numFloatsPerVec;
7013 }
7014
7015 /* calculate invariable likelihood */
7016 if (hasPInvar == YES)
7017 {
7018 for (c=0; c<m->numVecChars; c++)
7019 {
7020 mLike = _mm_mul_ps (clInvar[A], mA);
7021 m1 = _mm_mul_ps (clInvar[C], mC);
7022 mLike = _mm_add_ps (mLike, m1);
7023 m1 = _mm_mul_ps (clInvar[G], mG);
7024 mLike = _mm_add_ps (mLike, m1);
7025 m1 = _mm_mul_ps (clInvar[T], mT);
7026 mLike = _mm_add_ps (mLike, m1);
7027 mLike = _mm_mul_ps (mLike, mPInvar);
7028
7029 _mm_store_ps (lnLI_Vec, mLike);
7030 clInvar += 4;
7031 lnLI_Vec += m->numFloatsPerVec;
7032 }
7033 }
7034
7035 /* accumulate results */
7036 if (hasPInvar == NO)
7037 {
7038 for (c=0; c<m->numChars; c++)
7039 {
7040 like = m->lnL_Vec[c];
7041 /* check against LIKE_EPSILON (values close to zero are problematic) */
7042 if (like < LIKE_EPSILON)
7043 {
7044 # ifdef DEBUG_LIKELIHOOD
7045 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7046 # endif
7047 (*lnL) = MRBFLT_NEG_MAX;
7048 abortMove = YES;
7049 return ERROR;
7050 }
7051 else
7052 {
7053 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7054 }
7055 }
7056 }
7057 else
7058 {
7059 /* has invariable category */
7060 for (c=0; c<m->numChars; c++)
7061 {
7062 like = m->lnL_Vec[c];
7063 likeI = m->lnLI_Vec[c];
7064 if (lnScaler[c] < -200)
7065 {
7066 /* we are not going to be able to exponentiate the scaling factor */
7067 if (likeI > 1E-70)
7068 {
7069 /* forget about like; it is going to be insignificant compared to likeI */
7070 like = likeI;
7071 }
7072 else
7073 {
7074 /* treat likeI as if 0.0, that is, ignore it completely */
7075 }
7076 }
7077 else
7078 like = like + (likeI / exp (lnScaler[c]));
7079
7080 /* check against LIKE_EPSILON (values close to zero are problematic) */
7081 if (like < LIKE_EPSILON)
7082 {
7083 # ifdef DEBUG_LIKELIHOOD
7084 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7085 # endif
7086 (*lnL) = MRBFLT_NEG_MAX;
7087 abortMove = YES;
7088 return ERROR;
7089 }
7090 else
7091 {
7092 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7093 }
7094 }
7095 }
7096
7097 return NO_ERROR;
7098 }
7099 #endif
7100
7101
7102 /*------------------------------------------------------------------
7103 |
7104 | Likelihood_NY98: Codon model with three selection categories,
7105 | after Nielsen and Yang (1998).
7106 |
7107 -------------------------------------------------------------------*/
Likelihood_NY98(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7108 int Likelihood_NY98 (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7109 {
7110 int c, j, k, nStates;
7111 MrBFlt catLike, like, *bs, *omegaCatFreq;
7112 CLFlt **clP,*clPtr, *lnScaler, *nSitesOfPat;
7113 ModelInfo *m;
7114
7115 m = &modelSettings[division];
7116
7117 /* number of states */
7118 nStates = m->numModelStates;
7119
7120 /* find conditional likelihood pointers */
7121 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
7122 clP = m->clP;
7123 for (k=0; k<m->numOmegaCats; k++)
7124 {
7125 clP[k] = clPtr;
7126 clPtr += m->numChars * m->numModelStates;
7127 }
7128
7129 /* find codon frequencies */
7130 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
7131
7132 /* find category frequencies */
7133 omegaCatFreq = GetParamSubVals (m->omega, chain, state[chain]);
7134
7135 /* find site scaler */
7136 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7137
7138 /* find nSitesOfPat */
7139 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7140
7141 *lnL = 0.0; /* reset lnL */
7142
7143 for (c=m->numDummyChars; c<m->numChars; c++)
7144 {
7145 like = 0.0;
7146 for (k=0; k<m->numOmegaCats; k++)
7147 {
7148 catLike = 0.0;
7149 for (j=0; j<nStates; j++)
7150 catLike += clP[k][j] * bs[j];
7151 like += catLike * omegaCatFreq[k];
7152 clP[k] += nStates;
7153 }
7154 /* check against LIKE_EPSILON (values close to zero are problematic) */
7155 if (like < LIKE_EPSILON)
7156 {
7157 # ifdef DEBUG_LIKELIHOOD
7158 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7159 # endif
7160 (*lnL) = MRBFLT_NEG_MAX;
7161 abortMove = YES;
7162 return ERROR;
7163 }
7164 else
7165 {
7166 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7167 }
7168 }
7169
7170 return NO_ERROR;
7171 }
7172
7173
7174 #if defined (SSE_ENABLED)
7175 /*------------------------------------------------------------------
7176 |
7177 | Likelihood_NY98_SSE: Codon model with three selection categories,
7178 | after Nielsen and Yang (1998).
7179 |
7180 -------------------------------------------------------------------*/
Likelihood_NY98_SSE(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7181 int Likelihood_NY98_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7182 {
7183 int c, j, k, nStates;
7184 MrBFlt like, *bs, *omegaCatFreq;
7185 CLFlt *lnScaler, *nSitesOfPat, *lnL_Vec;
7186 __m128 *clPtr, **clP;
7187 __m128 m1, mCatLike, mLike;
7188 ModelInfo *m;
7189
7190 m = &modelSettings[division];
7191
7192 /* number of states */
7193 nStates = m->numModelStates;
7194
7195 /* find conditional likelihood pointers */
7196 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
7197 clP = m->clP_SSE;
7198 for (k=0; k<m->numOmegaCats; k++)
7199 {
7200 clP[k] = clPtr;
7201 clPtr += m->numVecChars * nStates;
7202 }
7203
7204 /* find codon frequencies */
7205 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
7206
7207 /* find category frequencies */
7208 omegaCatFreq = GetParamSubVals (m->omega, chain, state[chain]);
7209
7210 /* find site scaler */
7211 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7212
7213 /* find nSitesOfPat */
7214 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7215
7216 *lnL = 0.0; /* reset lnL */
7217
7218 lnL_Vec = m->lnL_Vec;
7219 for (c=0; c<m->numVecChars; c++)
7220 {
7221 mLike = _mm_setzero_ps ();
7222 for (k=0; k<m->numOmegaCats; k++)
7223 {
7224 mCatLike = _mm_setzero_ps ();
7225 for (j=0; j<nStates; j++)
7226 {
7227 m1 = _mm_mul_ps (clP[k][j], _mm_set1_ps ((CLFlt)bs[j]));
7228 mCatLike = _mm_add_ps (mCatLike, m1);
7229 }
7230 m1 = _mm_mul_ps (mCatLike, _mm_set1_ps ((CLFlt)omegaCatFreq[k]));
7231 mLike = _mm_add_ps (mLike, m1);
7232 clP[k] += nStates;
7233 }
7234 _mm_store_ps (lnL_Vec, mLike);
7235 lnL_Vec += m->numFloatsPerVec;
7236 }
7237 for (c=m->numDummyChars; c<m->numChars; c++)
7238 {
7239 like = m->lnL_Vec[c];
7240 /* check against LIKE_EPSILON (values close to zero are problematic) */
7241 if (like < LIKE_EPSILON)
7242 {
7243 # ifdef DEBUG_LIKELIHOOD
7244 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7245 # endif
7246 (*lnL) = MRBFLT_NEG_MAX;
7247 abortMove = YES;
7248 return ERROR;
7249 }
7250 else
7251 {
7252 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7253 }
7254 }
7255
7256 return NO_ERROR;
7257 }
7258 #endif
7259
7260
7261 /*------------------------------------------------------------------
7262 |
7263 | Likelihood_Res: restriction site model with or without rate
7264 | variation
7265 |
7266 -------------------------------------------------------------------*/
Likelihood_Res(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7267 int Likelihood_Res (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7268 {
7269 int c, k;
7270 MrBFlt *bs, freq, like, pUnobserved, pObserved;
7271 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat;
7272 ModelInfo *m;
7273
7274
7275 m = &modelSettings[division];
7276
7277 /* find conditional likelihood pointer */
7278 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
7279 clP = m->clP;
7280 for (k=0; k<m->numRateCats; k++)
7281 {
7282 clP[k] = clPtr;
7283 clPtr += m->numChars * m->numModelStates;
7284 }
7285
7286 /* find base frequencies */
7287 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
7288
7289 /* find category frequencies */
7290 freq = 1.0 / m->numRateCats;
7291
7292 /* find site scaler */
7293 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7294
7295 /* find nSitesOfPat */
7296 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7297
7298 *lnL = 0.0; /* reset lnL */
7299
7300 pUnobserved = 0.0;
7301 for (c=0; c<m->numDummyChars; c++)
7302 {
7303 like = 0.0;
7304 for (k=0; k<m->numRateCats; k++)
7305 {
7306 like += (clP[k][0]*bs[0] + clP[k][1]*bs[1]) * freq;
7307 clP[k] += 2;
7308 }
7309 pUnobserved += like * exp(lnScaler[c]);
7310 }
7311
7312 pObserved = 1.0 - pUnobserved;
7313 if (pObserved < LIKE_EPSILON)
7314 {
7315 # ifdef DEBUG_LIKELIHOOD
7316 MrBayesPrint ("%s WARNING: p(Observed) < LIKE_EPSILON - for division %d p(Observed) = %1.30le\n", spacer, division+1, pObserved);
7317 # endif
7318 (*lnL) = MRBFLT_NEG_MAX;
7319 abortMove = YES;
7320 return ERROR;
7321 }
7322
7323 for (c=m->numDummyChars; c<m->numChars; c++)
7324 {
7325 like = 0.0;
7326 for (k=0; k<m->numRateCats; k++)
7327 {
7328 like += (clP[k][0]*bs[0] + clP[k][1]*bs[1]) * freq;
7329 clP[k] += 2;
7330 }
7331 /* check against LIKE_EPSILON (values close to zero are problematic) */
7332 if (like < LIKE_EPSILON)
7333 {
7334 # ifdef DEBUG_LIKELIHOOD
7335 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7336 # endif
7337 (*lnL) = MRBFLT_NEG_MAX;
7338 abortMove = YES;
7339 return ERROR;
7340 }
7341 else
7342 {
7343 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7344 }
7345 }
7346
7347 /* correct for absent characters */
7348 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
7349
7350 return NO_ERROR;
7351 }
7352
7353
7354 #if defined (SSE_ENABLED)
7355 /*------------------------------------------------------------------
7356 |
7357 | Likelihood_Res_SSE: restriction site model with or without rate
7358 | variation
7359 |
7360 -------------------------------------------------------------------*/
Likelihood_Res_SSE(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7361 int Likelihood_Res_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7362 {
7363 int c, k;
7364 MrBFlt freq, *bs, like, pUnobserved, pObserved;
7365 CLFlt *lnScaler, *nSitesOfPat, *lnL_Vec;
7366 __m128 *clPtr, **clP;
7367 __m128 m1, mA, mB, mFreq, mLike;
7368 ModelInfo *m;
7369
7370 /* find model settings and pInvar, invar cond likes */
7371 m = &modelSettings[division];
7372
7373 /* find conditional likelihood pointers */
7374 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
7375 clP = m->clP_SSE;
7376 for (k=0; k<m->numRateCats; k++)
7377 {
7378 clP[k] = clPtr;
7379 clPtr += m->numVecChars * m->numModelStates;
7380 }
7381 lnL_Vec = m->lnL_Vec;
7382
7383 /* find base frequencies */
7384 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
7385 mA = _mm_set1_ps ((CLFlt)(bs[0]));
7386 mB = _mm_set1_ps ((CLFlt)(bs[1]));
7387
7388 freq = 1.0 / m->numRateCats;
7389 mFreq = _mm_set1_ps ((CLFlt)(freq));
7390
7391 /* find tree scaler */
7392 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7393
7394 /* find nSitesOfPat */
7395 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7396
7397 /* reset lnL */
7398 *lnL = 0.0;
7399
7400 /* calculate variable likelihood */
7401 for (c=0; c<m->numVecChars; c++)
7402 {
7403 mLike = _mm_setzero_ps ();
7404 for (k=0; k<m->numRateCats; k++)
7405 {
7406 m1 = _mm_mul_ps (clP[k][0], mA);
7407 mLike = _mm_add_ps (mLike, m1);
7408 m1 = _mm_mul_ps (clP[k][1], mB);
7409 mLike = _mm_add_ps (mLike, m1);
7410 clP[k] += 2;
7411 }
7412 mLike = _mm_mul_ps (mLike, mFreq);
7413 _mm_store_ps (lnL_Vec, mLike);
7414 lnL_Vec += m->numFloatsPerVec;
7415 }
7416
7417 pUnobserved = 0.0;
7418 for (c=0; c<m->numDummyChars; c++)
7419 {
7420 like = m->lnL_Vec[c];
7421 pUnobserved += like * exp(lnScaler[c]);
7422 }
7423
7424 pObserved = 1.0 - pUnobserved;
7425 if (pObserved < LIKE_EPSILON)
7426 {
7427 # ifdef DEBUG_LIKELIHOOD
7428 MrBayesPrint ("%s WARNING: p(Observed) < LIKE_EPSILON - for division %d p(Observed) = %1.30le\n", spacer, division+1, pObserved);
7429 # endif
7430 (*lnL) = MRBFLT_NEG_MAX;
7431 abortMove = YES;
7432 return ERROR;
7433 }
7434
7435 for (c=m->numDummyChars; c<m->numChars; c++)
7436 {
7437 like = m->lnL_Vec[c];
7438 /* check against LIKE_EPSILON (values close to zero are problematic) */
7439 if (like < LIKE_EPSILON)
7440 {
7441 # ifdef DEBUG_LIKELIHOOD
7442 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7443 # endif
7444 (*lnL) = MRBFLT_NEG_MAX;
7445 abortMove = YES;
7446 return ERROR;
7447 }
7448 else
7449 {
7450 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7451 }
7452 }
7453
7454 /* correct for absent characters */
7455 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
7456
7457 return NO_ERROR;
7458 }
7459 #endif
7460
7461
7462 /*------------------------------------------------------------------
7463 |
7464 | Likelihood_Std: variable states model with or without rate
7465 | variation
7466 |
7467 -------------------------------------------------------------------*/
Likelihood_Std(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7468 int Likelihood_Std (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7469 {
7470 int b, c, j, k, nBetaCats, nRateCats, nStates, numReps;
7471 MrBFlt catLike, catFreq, rateFreq, like, *bs, *bsBase,
7472 pUnobserved, pObserved;
7473 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat;
7474 ModelInfo *m;
7475
7476 m = &modelSettings[division];
7477
7478 numReps=0;
7479 for (c=0; c<m->numChars; c++)
7480 {
7481 if (m->nStates[c] == 2)
7482 numReps += m->numBetaCats * 2;
7483 else
7484 numReps += m->nStates[c];
7485 }
7486 /* find conditional likelihood pointers */
7487 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
7488 clP = m->clP;
7489 for (k=0; k<m->numRateCats; k++)
7490 {
7491 clP[k] = clPtr;
7492 clPtr += numReps;
7493 }
7494
7495 /* find base frequencies */
7496 bsBase = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
7497
7498 /* find rate category number and frequencies */
7499 nRateCats = m->numRateCats;
7500 rateFreq = 1.0 / nRateCats;
7501
7502 /* find site scaler */
7503 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7504
7505 /* find nSitesOfPat */
7506 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7507
7508 *lnL = 0.0; /* reset lnL */
7509
7510 if (m->numBetaCats == 1)
7511 {
7512 pUnobserved = 0.0;
7513 catFreq = rateFreq;
7514 for (c=j=0; c<m->numDummyChars; c++)
7515 {
7516 like = 0.0;
7517 nStates = m->nStates[c];
7518 bs = bsBase + m->bsIndex[c];
7519 for (k=0; k<nRateCats; k++)
7520 {
7521 catLike = 0.0;
7522 for (j=0; j<nStates; j++)
7523 catLike += clP[k][j] * bs[j];
7524 like += catLike * catFreq;
7525 clP[k] += nStates;
7526 }
7527 pUnobserved += like * exp(lnScaler[c]);
7528 }
7529
7530 pObserved = 1.0 - pUnobserved;
7531 if (pObserved < LIKE_EPSILON)
7532 pObserved = LIKE_EPSILON;
7533
7534 for (c=m->numDummyChars; c<m->numChars; c++)
7535 {
7536 like = 0.0;
7537 nStates = m->nStates[c];
7538 bs = bsBase + m->bsIndex[c];
7539
7540 for (k=0; k<nRateCats; k++)
7541 {
7542 catLike = 0.0;
7543 for (j=0; j<nStates; j++)
7544 catLike += clP[k][j] * bs[j];
7545 like += catLike * catFreq;
7546 clP[k] += nStates;
7547 }
7548 /* check against LIKE_EPSILON (values close to zero are problematic) */
7549 if (like < LIKE_EPSILON)
7550 {
7551 # ifdef DEBUG_LIKELIHOOD
7552 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7553 # endif
7554 (*lnL) = MRBFLT_NEG_MAX;
7555 abortMove = YES;
7556 return ERROR;
7557 }
7558 else
7559 {
7560 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7561 }
7562 }
7563 }
7564 else
7565 {
7566 pUnobserved = 0.0;
7567 for (c=j=0; c<m->numDummyChars; c++)
7568 {
7569 like = 0.0;
7570 nStates = m->nStates[c];
7571 bs = bsBase + m->bsIndex[c];
7572 if (nStates == 2)
7573 {
7574 nBetaCats = m->numBetaCats;
7575 catFreq = rateFreq / nBetaCats;
7576 }
7577 else
7578 {
7579 nBetaCats = 1;
7580 catFreq = rateFreq;
7581 }
7582 for (b=0; b<nBetaCats; b++)
7583 {
7584 for (k=0; k<nRateCats; k++)
7585 {
7586 catLike = 0.0;
7587 for (j=0; j<nStates; j++)
7588 catLike += clP[k][j] * bs[j];
7589 like += catLike * catFreq;
7590 clP[k] += nStates;
7591 }
7592 bs += nStates;
7593 }
7594 pUnobserved += like * exp(lnScaler[c]);
7595 }
7596
7597 pObserved = 1.0 - pUnobserved;
7598 if (pObserved < LIKE_EPSILON)
7599 pObserved = LIKE_EPSILON;
7600
7601 for (c=m->numDummyChars; c<m->numChars; c++)
7602 {
7603 like = 0.0;
7604 nStates = m->nStates[c];
7605 bs = bsBase + m->bsIndex[c];
7606 if (nStates == 2)
7607 {
7608 nBetaCats = m->numBetaCats;
7609 catFreq = rateFreq / nBetaCats;
7610 }
7611 else
7612 {
7613 nBetaCats = 1;
7614 catFreq = rateFreq;
7615 }
7616 for (b=0; b<nBetaCats; b++)
7617 {
7618 for (k=0; k<nRateCats; k++)
7619 {
7620 catLike = 0.0;
7621 for (j=0; j<nStates; j++)
7622 catLike += clP[k][j] * bs[j];
7623 like += catLike * catFreq;
7624 clP[k] += nStates;
7625 }
7626 bs += nStates;
7627 }
7628 /* check against LIKE_EPSILON (values close to zero are problematic) */
7629 if (like < LIKE_EPSILON)
7630 {
7631 # ifdef DEBUG_LIKELIHOOD
7632 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
7633 # endif
7634 (*lnL) = MRBFLT_NEG_MAX;
7635 abortMove = YES;
7636 return ERROR;
7637 }
7638 else
7639 {
7640 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
7641 }
7642 }
7643 }
7644
7645 /* correct for absent characters */
7646 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
7647
7648 return NO_ERROR;
7649 }
7650
7651
7652 /*------------------------------------------------------------------
7653 |
7654 | Likelihood_Pars: likelihood under the Tuffley and Steel (1997)
7655 | model for characters with constant number of states. The idea
7656 | is described in:
7657 |
7658 | Tuffley, C., and M. Steel. 1997. Links between maximum likelihood
7659 | and maximum parsimony under a simple model of site substitution.
7660 | Bull. Math. Bio. 59:581-607.
7661 |
7662 | The likelihood under the Tuffley and Steel (1997) model is:
7663 |
7664 | L = k^[-(T + n)]
7665 |
7666 | where L is the likelihood
7667 | k is the number of character states
7668 | T is the parsimony tree length
7669 | n is the number of characters
7670 |
7671 | The parsimony calculator does not use character packing; this is
7672 | to enable reweighting of characters
7673 |
7674 | Note that this is an empirical Bayes approach in that it uses the
7675 | maximum likelihood branch length.
7676 |
7677 -------------------------------------------------------------------*/
Likelihood_Pars(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7678 int Likelihood_Pars (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7679 {
7680 int c, i, nStates;
7681 BitsLong done, *pL, *pR, *pP, *pA, *oldpP, x;
7682 CLFlt nParsChars, treeLength;
7683 CLFlt length, *nSitesOfPat, *newNodeLength, oldNodeLength;
7684 Tree *t;
7685 ModelInfo *m;
7686
7687 /* Find model settings */
7688 m = &modelSettings[division];
7689
7690 /* Get tree */
7691 t = GetTree(m->brlens,chain,state[chain]);
7692
7693 /* Get parsimony tree length */
7694 treeLength = (CLFlt) m->parsTreeLength[2 * chain + state[chain]];
7695
7696 /* Get number of states */
7697 nStates = m->numStates;
7698
7699 /* Get number of sites of pat */
7700 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7701
7702 /* Mark the nodes that can be stop nodes */
7703 /* (there must not be any touched side nodes below them) */
7704 p = t->root;
7705 p->marked = YES;
7706 for (i=t->nIntNodes-1; i>=0; i--)
7707 {
7708 p = t->intDownPass[i];
7709 p->marked = NO;
7710 if (p->upDateCl == YES && p->anc->marked == YES)
7711 {
7712 if (p->left->upDateCl == NO || p->right->upDateCl == NO)
7713 p->marked = YES;
7714 }
7715 }
7716
7717 /* Now make downpass node by node */
7718 for (i=0; i<t->nIntNodes; i++)
7719 {
7720 p = t->intDownPass[i];
7721
7722 /* continue if no work needs to be done */
7723 if (p->upDateCl == NO)
7724 continue;
7725
7726 /* flip space */
7727 FlipCondLikeSpace(m, chain, p->index);
7728
7729 /* find parsimony sets for the node and its environment */
7730 pL = m->parsSets[m->condLikeIndex[chain][p->left->index ]];
7731 pR = m->parsSets[m->condLikeIndex[chain][p->right->index]];
7732 oldpP = m->parsSets[m->condLikeScratchIndex[p->index ]];
7733 pP = m->parsSets[m->condLikeIndex[chain][p->index ]];
7734
7735 /* find old and new node lengths */
7736 oldNodeLength = m->parsNodeLens[m->condLikeScratchIndex[p->index]];
7737 newNodeLength = &m->parsNodeLens[m->condLikeIndex[chain][p->index]];
7738
7739 if (t->isRooted == NO && p->anc->anc == NULL)
7740 {
7741 pA = m->parsSets[m->condLikeIndex[chain][p->anc->index]];
7742 length = 0.0;
7743 for (c=0; c<m->numChars; c++)
7744 {
7745 x = pL[c] & pR[c];
7746 if (x == 0)
7747 {
7748 x = pL[c] | pR[c];
7749 length += nSitesOfPat[c];
7750 }
7751 if ((x & pA[c]) == 0)
7752 length += nSitesOfPat[c];
7753 pP[c] = x;
7754 }
7755 treeLength += (length - oldNodeLength);
7756 newNodeLength[0] = length;
7757 }
7758 else
7759 {
7760 length = 0.0;
7761 done = 0;
7762 for (c=0; c<m->numChars; c++)
7763 {
7764 x = pL[c] & pR[c];
7765 if (x == 0)
7766 {
7767 x = pL[c] | pR[c];
7768 length += nSitesOfPat[c];
7769 }
7770 pP[c] = x;
7771 done |= (x^oldpP[c]);
7772 }
7773 treeLength += (length - oldNodeLength);
7774 newNodeLength[0] = length;
7775 if (p->marked == YES && done == 0)
7776 break;
7777 }
7778 }
7779
7780 /* Count number of characters in the partition. It is calculated
7781 on the fly because this number is going to differ for
7782 different chains if character reweighting is used. */
7783 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7784 nParsChars = 0.0;
7785 for (c=0; c<m->numChars; c++)
7786 nParsChars += nSitesOfPat[c];
7787
7788 /* Calculate likelihood from parsimony tree length */
7789 *lnL = - ((treeLength + nParsChars) * log (nStates));
7790
7791 /* Store current parsimony tree length */
7792 m->parsTreeLength[2 * chain + state[chain]] = treeLength;
7793
7794 return (NO_ERROR);
7795 }
7796
7797
7798 #if 0
7799 int Likelihood_ParsCodon (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7800 {
7801 int x, y;
7802 TreeNode *q;
7803
7804 /* no warnings */
7805 q = p;
7806 x = division;
7807 y = chain;
7808 *lnL = 0.0;
7809 x = whichSitePats;
7810
7811 MrBayesPrint ("%s Parsimony calculator for codons not yet implemented\n", spacer);
7812
7813 return ERROR;
7814 }
7815 # endif
7816
7817
7818 /*------------------------------------------------------------------
7819 |
7820 | Likelihood_Pars: likelihood under the Tuffley and Steel (1997)
7821 | model for characters with constant number of states. The idea
7822 | is described in:
7823 |
7824 | Tuffley, C., and M. Steel. 1997. Links between maximum likelihood
7825 | and maximum parsimony under a simple model of site substitution.
7826 | Bull. Math. Bio. 59:581-607.
7827 |
7828 | The likelihood under the Tuffley and Steel (1997) model is:
7829 |
7830 | L = k^[-(T + n)]
7831 |
7832 | where L is the likelihood
7833 | k is the number of character states
7834 | T is the parsimony tree length
7835 | n is the number of characters
7836 |
7837 | The parsimony calculator does not use character packing; this is
7838 | to enable reweighting of characters
7839 |
7840 | Note that this is an empirical Bayes approach in that it uses the
7841 | maximum likelihood branch length.
7842 |
7843 | This variant of the calculator assumes that the number of states
7844 | is variable. It does not take state order into account.
7845 |
7846 -------------------------------------------------------------------*/
Likelihood_ParsStd(TreeNode * p,int division,int chain,MrBFlt * lnL,int whichSitePats)7847 int Likelihood_ParsStd (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
7848 {
7849 int c, i, *nStates;
7850 BitsLong *pL, *pR, *pP, *pA, x;
7851 CLFlt *treeLength;
7852 CLFlt *nSitesOfPat;
7853 Tree *t;
7854 ModelInfo *m;
7855
7856 /* Find model settings */
7857 m = &modelSettings[division];
7858
7859 /* Get tree */
7860 t = GetTree(m->brlens,chain,state[chain]);
7861
7862 /* Allocate space for parsimony tree length */
7863 treeLength = (CLFlt *) SafeCalloc (m->numChars, sizeof (CLFlt));
7864
7865 /* Get number of states */
7866 nStates = m->nStates;
7867
7868 /* Get number of sites of pat */
7869 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
7870
7871 /* Make downpass node by node; do not skip any nodes */
7872 for (i=0; i<t->nIntNodes; i++)
7873 {
7874 p = t->intDownPass[i];
7875
7876 /* flip space */
7877 FlipCondLikeSpace(m, chain, p->index);
7878
7879 /* find parsimony sets for the node and its environment */
7880 pL = m->parsSets[m->condLikeIndex[chain][p->left->index ]];
7881 pR = m->parsSets[m->condLikeIndex[chain][p->right->index]];
7882 pP = m->parsSets[m->condLikeIndex[chain][p->index ]];
7883
7884 if (t->isRooted == NO && p->anc->anc == NULL)
7885 {
7886 pA = m->parsSets[m->condLikeIndex[chain][p->anc->index]];
7887 for (c=0; c<m->numChars; c++)
7888 {
7889 x = pL[c] & pR[c];
7890 if (x == 0)
7891 {
7892 x = pL[c] | pR[c];
7893 treeLength[c] += nSitesOfPat[c];
7894 }
7895 if ((x & pA[c]) == 0)
7896 treeLength[c] += nSitesOfPat[c];
7897 pP[c] = x;
7898 }
7899 }
7900 else
7901 {
7902 for (c=0; c<m->numChars; c++)
7903 {
7904 x = pL[c] & pR[c];
7905 if (x == 0)
7906 {
7907 x = pL[c] | pR[c];
7908 treeLength[c] += nSitesOfPat[c];
7909 }
7910 pP[c] = x;
7911 }
7912 }
7913 }
7914
7915 /* Calculate the likelihood one character at a time */
7916 *lnL = 0.0;
7917 for (c=0; c<m->numChars; c++)
7918 {
7919 *lnL -= ((treeLength[c] + nSitesOfPat[c]) * log (nStates[c]));
7920 }
7921
7922 /* Free space for parsimony character states */
7923 free (treeLength);
7924
7925 return (NO_ERROR);
7926 }
7927
7928 #if defined(BEAGLE_V3_ENABLED)
7929 /*-----------------------------------------------------------------
7930 |
7931 | LaunchLogLikeForBeagleMultiPartition: calculate the log likelihood of the
7932 | new state of the chain for all divisions with Beagle
7933 |
7934 -----------------------------------------------------------------*/
LaunchLogLikeForBeagleMultiPartition(int chain,MrBFlt * lnL)7935 void LaunchLogLikeForBeagleMultiPartition(int chain, MrBFlt* lnL)
7936 {
7937 int d, divisionCount;
7938 int *divisions;
7939 ModelInfo *m;
7940 divisions = (int *) SafeCalloc (numCurrentDivisions, sizeof(int));
7941 divisionCount = 0;
7942 /* Cycle through divisions and recalculate tis and cond likes as necessary. */
7943 /* Code below does not try to avoid recalculating ti probs for divisions */
7944 /* that could share ti probs with other divisions. */
7945 for (d=0; d<numCurrentDivisions; d++)
7946 {
7947 # if defined (BEST_MPI_ENABLED)
7948 if (isDivisionActive[d] == NO)
7949 continue;
7950 # endif
7951 m = &modelSettings[d];
7952 if (m->upDateCl == YES)
7953 {
7954 if (m->upDateCijk == YES)
7955 {
7956 if (UpDateCijk(d, chain) == ERROR)
7957 {
7958 (*lnL) = MRBFLT_NEG_MAX; /* effectively abort the move */
7959 continue;
7960 }
7961 m->upDateAll = YES;
7962 }
7963 divisions[divisionCount++] = d;
7964 #if defined (DEBUG_MB_BEAGLE_MULTIPART)
7965 printf("divisions[%d] = %d\n", divisionCount-1, d);
7966 #endif
7967 }
7968 }
7969 LaunchBEAGLELogLikeMultiPartition(divisions, divisionCount, chain, lnL);
7970 if (divisionCount != numCurrentDivisions)
7971 {
7972 for (d=0; d<numCurrentDivisions; d++)
7973 {
7974 m = &modelSettings[d];
7975 if (m->upDateCl == NO)
7976 {
7977 /* add log likelihood of divisions that were not updated */
7978 (*lnL) += m->lnLike[2*chain + state[chain]];
7979 }
7980 }
7981 }
7982 free(divisions);
7983 return;
7984 }
7985 #endif /* BEAGLE_MULTI_PART_ENABLED */
7986
7987 /*-----------------------------------------------------------------
7988 |
7989 | LaunchLogLikeForDivision: calculate the log likelihood of the
7990 | new state of the chain for a single division
7991 |
7992 -----------------------------------------------------------------*/
LaunchLogLikeForDivision(int chain,int d,MrBFlt * lnL)7993 void LaunchLogLikeForDivision(int chain, int d, MrBFlt* lnL)
7994 {
7995 int i;
7996 TreeNode *p;
7997 ModelInfo *m;
7998 Tree *tree;
7999 # if defined (TIMING_ANALIZ)
8000 clock_t CPUTimeStart;
8001 # endif
8002
8003 m = &modelSettings[d];
8004 tree = GetTree(m->brlens, chain, state[chain]);
8005
8006 if (m->upDateCijk == YES)
8007 {
8008 if (UpDateCijk(d, chain)== ERROR)
8009 {
8010 (*lnL) = MRBFLT_NEG_MAX; /* effectively abort the move */
8011 return;
8012 }
8013 m->upDateAll = YES;
8014 }
8015
8016 # if defined (BEAGLE_ENABLED)
8017 if (m->useBeagle == YES)
8018 {
8019 LaunchBEAGLELogLikeForDivision(chain, d, m, tree, lnL);
8020 return;
8021 }
8022 # endif
8023
8024 /* Flip and copy or reset site scalers */
8025 FlipSiteScalerSpace(m, chain);
8026 if (m->upDateAll == YES)
8027 ResetSiteScalers(m, chain);
8028 else
8029 CopySiteScalers(m, chain);
8030
8031 if (m->parsModelId == NO)
8032 {
8033 for (i=0; i<tree->nIntNodes; i++)
8034 {
8035 p = tree->intDownPass[i];
8036
8037 if (p->left->upDateTi == YES)
8038 {
8039 /* shift state of ti probs for node */
8040 FlipTiProbsSpace (m, chain, p->left->index);
8041 m->TiProbs (p->left, d, chain);
8042 }
8043
8044 if (p->right->upDateTi == YES)
8045 {
8046 /* shift state of ti probs for node */
8047 FlipTiProbsSpace (m, chain, p->right->index);
8048 m->TiProbs (p->right, d, chain);
8049 }
8050
8051 if (tree->isRooted == NO)
8052 {
8053 if (p->anc->anc == NULL /* && p->upDateTi == YES */)
8054 {
8055 /* shift state of ti probs for node */
8056 FlipTiProbsSpace (m, chain, p->index);
8057 m->TiProbs (p, d, chain);
8058 }
8059 }
8060
8061 if (p->upDateCl == YES)
8062 {
8063 if (tree->isRooted == NO)
8064 {
8065 if (p->anc->anc == NULL)
8066 {
8067 TIME(m->CondLikeRoot (p, d, chain),CPUCondLikeRoot);
8068 }
8069 else
8070 {
8071 TIME(m->CondLikeDown (p, d, chain),CPUCondLikeDown);
8072 }
8073 }
8074 else
8075 {
8076 TIME(m->CondLikeDown (p, d, chain),CPUCondLikeDown);
8077 }
8078
8079 if (m->unscaledNodes[chain][p->index] == 0 && m->upDateAll == NO)
8080 {
8081 #if defined (SSE_ENABLED)
8082 if (m->useVec == VEC_SSE)
8083 {
8084 TIME(RemoveNodeScalers_SSE (p, d, chain),CPUScalersRemove);
8085 }
8086 #if defined (AVX_ENABLED)
8087 else if (m->useVec == VEC_AVX)
8088 {
8089 TIME(RemoveNodeScalers_AVX (p, d, chain),CPUScalersRemove);
8090 }
8091 #endif
8092 else
8093 {
8094 TIME(RemoveNodeScalers (p, d, chain),CPUScalersRemove);
8095 }
8096 # else
8097 TIME(RemoveNodeScalers (p, d, chain),CPUScalersRemove);
8098 # endif
8099 }
8100 FlipNodeScalerSpace (m, chain, p->index);
8101 m->unscaledNodes[chain][p->index] = 1 + m->unscaledNodes[chain][p->left->index] + m->unscaledNodes[chain][p->right->index];
8102
8103 if (m->unscaledNodes[chain][p->index] >= m->rescaleFreq[chain] && p->anc->anc != NULL)
8104 {
8105 TIME(m->CondLikeScaler (p, d, chain),CPUScalers);
8106 }
8107 }
8108 }
8109 }
8110 TIME(m->Likelihood (tree->root->left, d, chain, lnL, (chainId[chain] % chainParams.numChains)),CPULilklihood);
8111 return;
8112 }
8113
8114
8115 /*----------------------------------------------------------------
8116 |
8117 | RemoveNodeScalers: Remove node scalers
8118 |
8119 -----------------------------------------------------------------*/
RemoveNodeScalers(TreeNode * p,int division,int chain)8120 int RemoveNodeScalers (TreeNode *p, int division, int chain)
8121 {
8122 int c;
8123 CLFlt *scP, *lnScaler;
8124 ModelInfo *m;
8125
8126 m = &modelSettings[division];
8127 assert (m->unscaledNodes[chain][p->index] == 0);
8128
8129 /* find scalers */
8130 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
8131
8132 /* find site scalers */
8133 lnScaler = m->scalers[m->siteScalerIndex[chain]];
8134
8135 /* remove scalers */
8136 for (c=0; c<m->numChars; c++)
8137 lnScaler[c] -= scP[c];
8138
8139 return NO_ERROR;
8140 }
8141
8142
8143 #if defined (AVX_ENABLED)
8144 /*----------------------------------------------------------------
8145 |
8146 | RemoveNodeScalers_AVX: Remove node scalers, AVX code
8147 |
8148 -----------------------------------------------------------------*/
RemoveNodeScalers_AVX(TreeNode * p,int division,int chain)8149 int RemoveNodeScalers_AVX (TreeNode *p, int division, int chain)
8150 {
8151 int c;
8152 __m256 *scP_AVX, *lnScaler_AVX;
8153 ModelInfo *m;
8154
8155 m = &modelSettings[division];
8156 assert (m->unscaledNodes[chain][p->index] == 0);
8157
8158 /* find scalers */
8159 scP_AVX = (__m256*)(m->scalers[m->nodeScalerIndex[chain][p->index]]);
8160
8161 /* find site scalers */
8162 lnScaler_AVX = (__m256*)(m->scalers[m->siteScalerIndex[chain]]);
8163
8164 /* remove scalers */
8165 for (c=0; c<m->numVecChars; c++)
8166 {
8167 lnScaler_AVX[c] = _mm256_sub_ps(lnScaler_AVX[c], scP_AVX[c]);
8168 }
8169
8170 return NO_ERROR;
8171
8172 }
8173 #endif
8174
8175
8176 #if defined (SSE_ENABLED)
8177 /*----------------------------------------------------------------
8178 |
8179 | RemoveNodeScalers_SSE: Remove node scalers, SSE code
8180 |
8181 -----------------------------------------------------------------*/
RemoveNodeScalers_SSE(TreeNode * p,int division,int chain)8182 int RemoveNodeScalers_SSE (TreeNode *p, int division, int chain)
8183 {
8184 int c;
8185 __m128 *scP_SSE, *lnScaler_SSE;
8186 ModelInfo *m;
8187
8188 m = &modelSettings[division];
8189 assert (m->unscaledNodes[chain][p->index] == 0);
8190
8191 /* find scalers */
8192 scP_SSE = (__m128*)(m->scalers[m->nodeScalerIndex[chain][p->index]]);
8193
8194 /* find site scalers */
8195 lnScaler_SSE = (__m128*)(m->scalers[m->siteScalerIndex[chain]]);
8196
8197 /* remove scalers */
8198 for (c=0; c<m->numVecChars; c++)
8199 {
8200 lnScaler_SSE[c] = _mm_sub_ps(lnScaler_SSE[c], scP_SSE[c]);
8201 }
8202
8203 return NO_ERROR;
8204
8205 }
8206 #endif
8207
8208
SetBinaryQMatrix(MrBFlt ** a,int whichChain,int division)8209 int SetBinaryQMatrix (MrBFlt **a, int whichChain, int division)
8210 {
8211 MrBFlt scaler, *bs;
8212 ModelInfo *m;
8213
8214 /* set up pointers to the appropriate model information */
8215 m = &modelSettings[division];
8216 assert (m->numModelStates == 2);
8217
8218 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8219 scaler = 1.0 / (2*bs[0]*bs[1]);
8220 a[0][0]= -bs[1]*scaler;
8221 a[0][1]= bs[1]*scaler;
8222 a[1][0]= bs[0]*scaler;
8223 a[1][1]= -bs[0]*scaler;
8224
8225 return (NO_ERROR);
8226 }
8227
8228
SetNucQMatrix(MrBFlt ** a,int n,int whichChain,int division,MrBFlt rateMult,MrBFlt * rA,MrBFlt * rS)8229 int SetNucQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult, MrBFlt *rA, MrBFlt *rS)
8230 {
8231 register int i, j, k;
8232 int isTransition=0, nDiff, rtNum=0;
8233 MrBFlt scaler, mult=0.0, probOn, sum, *swr, s01, s10, s[4][4], nonsyn, *rateValues=NULL, *bs, dN, dS;
8234 ModelInfo *m;
8235 ModelParams *mp;
8236 # if defined BEAGLE_ENABLED
8237 MrBFlt trans;
8238 # endif
8239
8240 /* set up pointers to the appropriate model information */
8241 mp = &modelParams[division];
8242 m = &modelSettings[division];
8243 assert (m->numModelStates == n);
8244
8245 /* All of the models that are set up in this function require the frequencies
8246 of the nucleotides (or doublets or codons). They will also require either
8247 a transition/transversion rate ratio or the GTR rate parameters. The
8248 "rateValues" will either be
8249
8250 rateValues[0] = transtion/transversion rate (kappa)
8251
8252 for nst=2 models or
8253
8254 rateValues[0] = A <-> C rate
8255 rateValues[1] = A <-> G rate
8256 rateValues[2] = A <-> T rate
8257 rateValues[3] = C <-> G rate
8258 rateValues[4] = C <-> T rate
8259 rateValues[5] = G <-> T rate
8260
8261 for nst=6 models. */
8262 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8263 if (m->nst == 2)
8264 {
8265 rateValues = GetParamVals(m->tRatio, whichChain, state[whichChain]);
8266 # if defined (BEAGLE_ENABLED)
8267 /* transversions assumed to have rate 1.0; */
8268 trans = rateValues[0];
8269 if (m->numModelStates == 4) /* code to satisfy Beagle */
8270 {
8271 rateValues = (MrBFlt *) SafeCalloc (6, sizeof(MrBFlt));
8272 rateValues[0] = rateValues[2] = rateValues[3] = rateValues[5] =1.0; /* Setting transversions */
8273 rateValues[1] = rateValues[4] = trans; /* Setting transitions */
8274 }
8275 # endif
8276 }
8277
8278 else if (m->nst == 6 || m->nst == NST_MIXED)
8279 rateValues = GetParamVals(m->revMat, whichChain, state[whichChain]);
8280 # if defined (BEAGLE_ENABLED)
8281 else if (m->nst == 1 && m->numModelStates == 4) /* code to satisfy Beagle */
8282 {
8283 rateValues = (MrBFlt *) SafeCalloc (6, sizeof(MrBFlt));
8284 for (i=0; i<6; i++)
8285 rateValues[i] = 1.0;
8286 }
8287 # endif
8288
8289 if (n == 4)
8290 {
8291 /* 4 X 4 model:
8292
8293 Here, we set the rate matrix for the GTR model (Tavare, 1986). We
8294 need not only the 6 rates for this model (rateValues), but also the
8295 base frequencies (bs). */
8296
8297 /* set diagonal of Q matrix to 0 */
8298 for (i=0; i<4; i++)
8299 a[i][i] = 0.0;
8300
8301 /* initialize Q matrix */
8302 scaler = 0.0;
8303 for (i=0; i<4; i++)
8304 {
8305 for (j=i+1; j<4; j++)
8306 {
8307 if (i == 0 && j == 1)
8308 mult = rateValues[0];
8309 else if (i == 0 && j == 2)
8310 mult = rateValues[1];
8311 else if (i == 0 && j == 3)
8312 mult = rateValues[2];
8313 else if (i == 1 && j == 2)
8314 mult = rateValues[3];
8315 else if (i == 1 && j == 3)
8316 mult = rateValues[4];
8317 else if (i == 2 && j == 3)
8318 mult = rateValues[5];
8319 a[i][i] -= (a[i][j] = bs[j] * mult);
8320 a[j][j] -= (a[j][i] = bs[i] * mult);
8321 scaler += bs[i] * a[i][j];
8322 scaler += bs[j] * a[j][i];
8323 }
8324 }
8325
8326 /* rescale Q matrix */
8327 scaler = 1.0 / scaler;
8328 for (i=0; i<4; i++)
8329 for (j=0; j<4; j++)
8330 a[i][j] *= scaler;
8331 }
8332 else if (n == 8) /* we have a 4 X 4 covarion model */
8333 {
8334 /* 8 X 8 covarion model:
8335
8336 Here, we set the rate matrix for the covarion model (Tuffley and
8337 Steel, 1997). We need the rate parameters of the model
8338 (contained in rateValues), the frequencies of the four nucleotides,
8339 and the switching rates to completely specify the rate matrix. We
8340 first set up the 4 X 4 submatrix that represents changes (the upper
8341 left portion of the 8 X 8 matrix). Note that if we have rate
8342 variation across sites, that we need to deal with the multiplication
8343 in the rate matrix (i.e., we cannot simply deal with rate variation
8344 by multiplying the branch length by a rate multiplier as we can
8345 with other models). Instead, we multiply the scaled rate matrix
8346 by the rate multiplier. */
8347
8348 /* Get the switching rates. The rate of off->on is s01 and the rate
8349 of on->off is s10. The stationary probability of the switch process
8350 is prob1 = s01/(s01+s10) and prob0 = s10/(s01+s10). */
8351 swr = GetParamVals (m->switchRates, whichChain, state[whichChain]);
8352 s01 = swr[0];
8353 s10 = swr[1];
8354 probOn = s01 / (s01 + s10);
8355
8356 /* set matrix a to 0 */
8357 for (i=0; i<8; i++)
8358 for (j=0; j<8; j++)
8359 a[i][j] = 0.0;
8360
8361 /* set up the 4 X 4 matrix representing substitutions (s[][]; upper left) */
8362 if (m->nst == 1)
8363 {
8364 scaler = 0.0;
8365 for (i=0; i<4; i++)
8366 {
8367 for (j=i+1; j<4; j++)
8368 {
8369 s[i][j] = bs[j];
8370 s[j][i] = bs[i];
8371 scaler += bs[i] * s[i][j] * probOn;
8372 scaler += bs[j] * s[j][i] * probOn;
8373 }
8374 }
8375 }
8376 else if (m->nst == 2)
8377 {
8378 scaler = 0.0;
8379 for (i=0; i<4; i++)
8380 {
8381 for (j=i+1; j<4; j++)
8382 {
8383 if ((i == 0 && j == 2) || (i == 2 && j == 0) || (i == 1 && j == 3) || (i == 3 && j == 1))
8384 mult = rateValues[0];
8385 else
8386 mult = 1.0;
8387 s[i][j] = bs[j] * mult;
8388 s[j][i] = bs[i] * mult;
8389 scaler += bs[i] * s[i][j] * probOn;
8390 scaler += bs[j] * s[j][i] * probOn;
8391 }
8392 }
8393 }
8394 else
8395 {
8396 scaler = 0.0;
8397 for (i=0; i<4; i++)
8398 {
8399 for (j=i+1; j<4; j++)
8400 {
8401 if (i == 0 && j == 1)
8402 mult = rateValues[0];
8403 else if (i == 0 && j == 2)
8404 mult = rateValues[1];
8405 else if (i == 0 && j == 3)
8406 mult = rateValues[2];
8407 else if (i == 1 && j == 2)
8408 mult = rateValues[3];
8409 else if (i == 1 && j == 3)
8410 mult = rateValues[4];
8411 else if (i == 2 && j == 3)
8412 mult = rateValues[5];
8413
8414 s[i][j] = bs[j] * mult;
8415 s[j][i] = bs[i] * mult;
8416 scaler += bs[i] * s[i][j] * probOn;
8417 scaler += bs[j] * s[j][i] * probOn;
8418 }
8419 }
8420 }
8421
8422 /* rescale off diagonal elements of s[][] matrix */
8423 scaler = 1.0 / scaler;
8424 for (i=0; i<4; i++)
8425 {
8426 for (j=0; j<4; j++)
8427 {
8428 if (i != j)
8429 s[i][j] *= scaler;
8430 }
8431 }
8432
8433 /* now, scale s[][] by rate factor */
8434 for (i=0; i<4; i++)
8435 {
8436 for (j=0; j<4; j++)
8437 {
8438 if (i != j)
8439 s[i][j] *= rateMult;
8440 }
8441 }
8442
8443 /* put in diagonal elements of s[][] */
8444 for (i=0; i<4; i++)
8445 {
8446 sum = 0.0;
8447 for (j=0; j<4; j++)
8448 {
8449 if (i != j)
8450 sum += s[i][j];
8451 }
8452 s[i][i] = -(sum + s10);
8453 }
8454
8455 /* Now, put s[][] into top left portion of a matrix and fill in the
8456 other parts of the matrix with the appropriate switching rates. */
8457 for (i=0; i<4; i++)
8458 for (j=0; j<4; j++)
8459 a[i][j] = s[i][j];
8460 for (i=4; i<8; i++)
8461 a[i][i] = -s01;
8462 a[0][4] = s10;
8463 a[1][5] = s10;
8464 a[2][6] = s10;
8465 a[3][7] = s10;
8466 a[4][0] = s01;
8467 a[5][1] = s01;
8468 a[6][2] = s01;
8469 a[7][3] = s01;
8470
8471 # if 0
8472 for (i=0; i<8; i++)
8473 {
8474 for (j=0; j<8; j++)
8475 printf ("%1.10lf ", a[i][j]);
8476 printf ("\n");
8477 }
8478 for (i=0; i<4; i++)
8479 printf ("%lf ", bs[i]);
8480 printf ("\n");
8481 printf ("s01 = %lf s10 = %lf pi1 = %lf pi0 = %lf\n", s01, s10, probOn, 1-probOn);
8482 # endif
8483 }
8484 else if (n == 16)
8485 {
8486 /* 16 X 16 doublet model:
8487
8488 We have a doublet model. The states are in the order AA, AC, AG, AT, CA, CC
8489 CG, CT, GA, GC, GG, GT, TA, TC, TG, TT. The rate matrix is straight-forward
8490 to set up. We simply multiply the rate parameter (e.g., the ti/tv rate
8491 ratio) by the doublet frequencies. */
8492
8493 /* set diagonal of Q matrix to 0 */
8494 for (i=0; i<16; i++)
8495 a[i][i] = 0.0;
8496
8497 if (m->nst == 1) /* F81-like doublet model */
8498 {
8499 scaler = 0.0;
8500 for (i=0; i<16; i++)
8501 {
8502 for (j=i+1; j<16; j++)
8503 {
8504 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
8505 mult = 0.0;
8506 else
8507 mult = 1.0;
8508 a[i][i] -= (a[i][j] = bs[j] * mult);
8509 a[j][j] -= (a[j][i] = bs[i] * mult);
8510 scaler += bs[i] * a[i][j];
8511 scaler += bs[j] * a[j][i];
8512 }
8513 }
8514 }
8515 else if (m->nst == 2) /* HKY-like doublet model */
8516 {
8517 scaler = 0.0;
8518 for (i=0; i<16; i++)
8519 {
8520 for (j=i+1; j<16; j++)
8521 {
8522 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
8523 mult = 0.0;
8524 else
8525 {
8526 if ((doublet[i].first & doublet[j].first) == 0)
8527 {
8528 if ((doublet[i].first + doublet[j].first) == 5 || (doublet[i].first + doublet[j].first) == 10)
8529 mult = rateValues[0];
8530 else
8531 mult = 1.0;
8532 }
8533 else
8534 {
8535 if ((doublet[i].second + doublet[j].second) == 5 || (doublet[i].second + doublet[j].second) == 10)
8536 mult = rateValues[0];
8537 else
8538 mult = 1.0;
8539 }
8540 }
8541 a[i][i] -= (a[i][j] = bs[j] * mult);
8542 a[j][j] -= (a[j][i] = bs[i] * mult);
8543 scaler += bs[i] * a[i][j];
8544 scaler += bs[j] * a[j][i];
8545 }
8546 }
8547 }
8548 else /* GTR-like doublet model */
8549 {
8550 scaler = 0.0;
8551 for (i=0; i<16; i++)
8552 {
8553 for (j=i+1; j<16; j++)
8554 {
8555 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
8556 mult = 0.0;
8557 else
8558 {
8559 if ((doublet[i].first & doublet[j].first) == 0)
8560 {
8561 if ((doublet[i].first + doublet[j].first) == 3)
8562 mult = rateValues[0];
8563 else if ((doublet[i].first + doublet[j].first) == 5)
8564 mult = rateValues[1];
8565 else if ((doublet[i].first + doublet[j].first) == 9)
8566 mult = rateValues[2];
8567 else if ((doublet[i].first + doublet[j].first) == 6)
8568 mult = rateValues[3];
8569 else if ((doublet[i].first + doublet[j].first) == 10)
8570 mult = rateValues[4];
8571 else
8572 mult = rateValues[5];
8573 }
8574 else
8575 {
8576 if ((doublet[i].second + doublet[j].second) == 3)
8577 mult = rateValues[0];
8578 else if ((doublet[i].second + doublet[j].second) == 5)
8579 mult = rateValues[1];
8580 else if ((doublet[i].second + doublet[j].second) == 9)
8581 mult = rateValues[2];
8582 else if ((doublet[i].second + doublet[j].second) == 6)
8583 mult = rateValues[3];
8584 else if ((doublet[i].second + doublet[j].second) == 10)
8585 mult = rateValues[4];
8586 else
8587 mult = rateValues[5];
8588 }
8589 }
8590 a[i][i] -= (a[i][j] = bs[j] * mult);
8591 a[j][j] -= (a[j][i] = bs[i] * mult);
8592 scaler += bs[i] * a[i][j];
8593 scaler += bs[j] * a[j][i];
8594 }
8595 }
8596 }
8597
8598
8599 /* rescale Q matrix */
8600 scaler = 1.0 / scaler;
8601 for (i=0; i<16; i++)
8602 for (j=0; j<16; j++)
8603 a[i][j] *= scaler;
8604 }
8605 else
8606 {
8607 /* 64(ish) X 64(ish) codon model:
8608
8609 Here, we set the rate matrix for the codon model (see Goldman and
8610 Yang, 1994). Note that we can specifiy any general type of codon
8611 model, with these constraints:
8612
8613 a[i][j] = 0 -> if i and j differ at 2 or 3 nucleotides
8614 a[i][j] = rateValues[0] * bs[j] -> if synonymous A <-> C change
8615 a[i][j] = rateValues[1] * bs[j] -> if synonymous A <-> G change
8616 a[i][j] = rateValues[2] * bs[j] -> if synonymous A <-> T change
8617 a[i][j] = rateValues[3] * bs[j] -> if synonymous C <-> G change
8618 a[i][j] = rateValues[4] * bs[j] -> if synonymous C <-> T change
8619 a[i][j] = rateValues[5] * bs[j] -> if synonymous G <-> T change
8620
8621 a[i][j] = rateValues[0] * nonsyn * bs[j] -> if nonsynonymous A <-> C change
8622 a[i][j] = rateValues[1] * nonsyn * bs[j] -> if nonsynonymous A <-> G change
8623 a[i][j] = rateValues[2] * nonsyn * bs[j] -> if nonsynonymous A <-> T change
8624 a[i][j] = rateValues[3] * nonsyn * bs[j] -> if nonsynonymous C <-> G change
8625 a[i][j] = rateValues[4] * nonsyn * bs[j] -> if nonsynonymous C <-> T change
8626 a[i][j] = rateValues[5] * nonsyn * bs[j] -> if nonsynonymous G <-> T change
8627
8628 Other models, such as the one used by Nielsen & Yang (1998) can be obtained
8629 from this model by restricing transitions and transversions to have the same rate.
8630 nonsyn is the nonsynonymous/synonymous rate ratio (often called the
8631 dN/dS ratio). If we are in this part of the function, then we rely on it
8632 being called with the "rateMult" parameter specifying the dN/dS ratio. Note
8633 that the size of the matrix will never be 64 X 64 as we only consider changes
8634 among coding triplets (i.e., we exclude the stop codons). */
8635
8636 /* get the nonsynonymous/synonymous rate ratio */
8637 nonsyn = rateMult;
8638
8639 /* set diagonal of Q matrix to 0 */
8640 for (i=0; i<n; i++)
8641 a[i][i] = 0.0;
8642
8643 /* set dN and dS rates to zero */
8644 dN = dS = 0.0;
8645
8646 if (m->nst == 1) /* F81-like codon model */
8647 {
8648 scaler = 0.0;
8649 for (i=0; i<n; i++)
8650 {
8651 for (j=i+1; j<n; j++)
8652 {
8653 nDiff = 0;
8654 for (k=0; k<3; k++)
8655 {
8656 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
8657 nDiff++;
8658 }
8659 if (nDiff > 1)
8660 {
8661 mult = 0.0;
8662 }
8663 else
8664 {
8665 if (mp->codonAAs[i] == mp->codonAAs[j])
8666 mult = 1.0;
8667 else
8668 mult = nonsyn;
8669 }
8670
8671 a[i][i] -= (a[i][j] = bs[j] * mult);
8672 a[j][j] -= (a[j][i] = bs[i] * mult);
8673 if (mp->codonAAs[i] == mp->codonAAs[j])
8674 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8675 else
8676 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8677 scaler += bs[i] * a[i][j];
8678 scaler += bs[j] * a[j][i];
8679 }
8680 }
8681 }
8682 else if (m->nst == 2) /* HKY-like codon model */
8683 {
8684 scaler = 0.0;
8685 for (i=0; i<n; i++)
8686 {
8687 for (j=i+1; j<n; j++)
8688 {
8689 nDiff = 0;
8690 for (k=0; k<3; k++)
8691 {
8692 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
8693 {
8694 nDiff++;
8695 if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 0) ||
8696 (mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 1))
8697 isTransition = YES;
8698 else
8699 isTransition = NO;
8700 }
8701 }
8702 if (nDiff > 1)
8703 {
8704 mult = 0.0;
8705 }
8706 else
8707 {
8708 if (mp->codonAAs[i] == mp->codonAAs[j])
8709 mult = 1.0;
8710 else
8711 mult = nonsyn;
8712 if (isTransition == YES)
8713 mult *= rateValues[0];
8714 }
8715
8716 a[i][i] -= (a[i][j] = bs[j] * mult);
8717 a[j][j] -= (a[j][i] = bs[i] * mult);
8718 if (mp->codonAAs[i] == mp->codonAAs[j])
8719 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8720 else
8721 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8722 scaler += bs[i] * a[i][j];
8723 scaler += bs[j] * a[j][i];
8724 }
8725 }
8726 }
8727 else /* GTR-like codon model */
8728 {
8729 scaler = 0.0;
8730 for (i=0; i<n; i++)
8731 {
8732 for (j=i+1; j<n; j++)
8733 {
8734 nDiff = 0;
8735 for (k=0; k<3; k++)
8736 {
8737 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
8738 {
8739 nDiff++;
8740 if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 1) || (mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 0))
8741 rtNum = 0;
8742 else if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 0))
8743 rtNum = 1;
8744 else if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 0))
8745 rtNum = 2;
8746 else if ((mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 1))
8747 rtNum = 3;
8748 else if ((mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 1))
8749 rtNum = 4;
8750 else
8751 rtNum = 5;
8752 }
8753 }
8754 if (nDiff > 1)
8755 {
8756 mult = 0.0;
8757 }
8758 else
8759 {
8760 if (mp->codonAAs[i] == mp->codonAAs[j])
8761 mult = 1.0;
8762 else
8763 mult = nonsyn;
8764 if (rtNum == 0)
8765 mult *= rateValues[0];
8766 else if (rtNum == 1)
8767 mult *= rateValues[1];
8768 else if (rtNum == 2)
8769 mult *= rateValues[2];
8770 else if (rtNum == 3)
8771 mult *= rateValues[3];
8772 else if (rtNum == 4)
8773 mult *= rateValues[4];
8774 else
8775 mult *= rateValues[5];
8776 }
8777
8778 a[i][i] -= (a[i][j] = bs[j] * mult);
8779 a[j][j] -= (a[j][i] = bs[i] * mult);
8780 if (mp->codonAAs[i] == mp->codonAAs[j])
8781 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8782 else
8783 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
8784 scaler += bs[i] * a[i][j];
8785 scaler += bs[j] * a[j][i];
8786 }
8787 }
8788 }
8789
8790 /* rescale Q matrix */
8791 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
8792 {
8793 /* If we have a positive selection model with multiple categories, then
8794 we do not rescale the rate matrix until we have finished generating
8795 all of the necessary rate matrices. The rescaling occurrs in
8796 UpDateCijk. */
8797 (*rA) = dN;
8798 (*rS) = dS;
8799 }
8800 else
8801 {
8802 scaler = 1.0 / scaler;
8803 for (i=0; i<n; i++)
8804 for (j=0; j<n; j++)
8805 a[i][j] *= scaler;
8806 (*rA) = (*rS) = 1.0;
8807 }
8808 }
8809
8810 # if 0
8811 for (i=0; i<n; i++)
8812 {
8813 for (j=0; j<n; j++)
8814 printf ("%0.5lf ", a[i][j]);
8815 printf ("\n");
8816 }
8817 # endif
8818
8819 # if defined (BEAGLE_ENABLED)
8820 if ((m->nst == 1 || m->nst == 2) && m->numModelStates == 4)
8821 free (rateValues);
8822 # endif
8823
8824 return (NO_ERROR);
8825 }
8826
8827
SetProteinQMatrix(MrBFlt ** a,int n,int whichChain,int division,MrBFlt rateMult)8828 int SetProteinQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult)
8829 {
8830 register int i, j, k;
8831 int aaModelID;
8832 MrBFlt scaler, probOn, sum, *swr, s01, s10, *bs, *rt;
8833 ModelInfo *m;
8834
8835 /* set up pointers to the appropriate model information */
8836 m = &modelSettings[division];
8837
8838 /* get amino acid model ID
8839 AAMODEL_POISSON 0
8840 AAMODEL_JONES 1
8841 AAMODEL_DAY 2
8842 AAMODEL_MTREV 3
8843 AAMODEL_MTMAM 4
8844 AAMODEL_WAG 5
8845 AAMODEL_RTREV 6
8846 AAMODEL_CPREV 7
8847 AAMODEL_VT 8
8848 AAMODEL_BLOSUM 9
8849 AAMODEL_LG 10
8850 AAMODEL_EQ 11
8851 AAMODEL_GTR 12 */
8852
8853 if (m->aaModelId >= 0)
8854 aaModelID = m->aaModelId;
8855 else
8856 aaModelID = (int)*GetParamVals(m->aaModel, whichChain, state[whichChain]);
8857
8858 /* Make certain that we have either 20 or 40 states. Anything
8859 else means we have a real problem. */
8860 if (n != 20 && n != 40)
8861 {
8862 MrBayesPrint ("%s ERROR: There should be 20 or 40 states for the aa model\n");
8863 return (ERROR);
8864 }
8865
8866 if (n == 20)
8867 {
8868 /* We have a run-of-the-mill amino acid model (i.e., 20 X 20). */
8869 if (aaModelID == AAMODEL_POISSON)
8870 {
8871 scaler = 1.0 / 19.0;
8872 for (i=0; i<20; i++)
8873 {
8874 for (j=i+1; j<20; j++)
8875 {
8876 a[i][j] = scaler;
8877 a[j][i] = scaler;
8878 }
8879 }
8880 for (i=0; i<20; i++)
8881 a[i][i] = -1.0;
8882 }
8883 else if (aaModelID == AAMODEL_EQ)
8884 {
8885 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8886 for (i=0; i<20; i++)
8887 for (j=0; j<20; j++)
8888 a[i][j] = 0.0;
8889 scaler = 0.0;
8890 for (i=0; i<20; i++)
8891 {
8892 for (j=i+1; j<20; j++)
8893 {
8894 a[i][i] -= (a[i][j] = bs[j]);
8895 a[j][j] -= (a[j][i] = bs[i]);
8896 scaler += bs[i] * a[i][j];
8897 scaler += bs[j] * a[j][i];
8898 }
8899 }
8900 scaler = 1.0 / scaler;
8901 for (i=0; i<20; i++)
8902 for (j=0; j<20; j++)
8903 a[i][j] *= scaler;
8904 }
8905 else if (aaModelID == AAMODEL_GTR)
8906 {
8907 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8908 rt = GetParamVals (m->revMat, whichChain, state[whichChain]);
8909 for (i=0; i<20; i++)
8910 for (j=0; j<20; j++)
8911 a[i][j] = 0.0;
8912 scaler = 0.0;
8913 for (i=k=0; i<20; i++)
8914 {
8915 for (j=i+1; j<20; j++)
8916 {
8917 a[i][i] -= (a[i][j] = bs[j] * rt[k]);
8918 a[j][j] -= (a[j][i] = bs[i] * rt[k]);
8919 k++;
8920 }
8921 }
8922 for (i=0; i<20; i++)
8923 scaler += -(bs[i] * a[i][i]);
8924 for (i=0; i<20; i++)
8925 for (j=0; j<20; j++)
8926 a[i][j] /= scaler;
8927 }
8928 else if (aaModelID == AAMODEL_JONES)
8929 {
8930 for (i=0; i<20; i++)
8931 for (j=0; j<20; j++)
8932 a[i][j] = aaJones[i][j];
8933 }
8934 else if (aaModelID == AAMODEL_DAY)
8935 {
8936 for (i=0; i<20; i++)
8937 for (j=0; j<20; j++)
8938 a[i][j] = aaDayhoff[i][j];
8939 }
8940 else if (aaModelID == AAMODEL_MTREV)
8941 {
8942 for (i=0; i<20; i++)
8943 for (j=0; j<20; j++)
8944 a[i][j] = aaMtrev24[i][j];
8945 }
8946 else if (aaModelID == AAMODEL_MTMAM)
8947 {
8948 for (i=0; i<20; i++)
8949 for (j=0; j<20; j++)
8950 a[i][j] = aaMtmam[i][j];
8951 }
8952 else if (aaModelID == AAMODEL_RTREV)
8953 {
8954 for (i=0; i<20; i++)
8955 for (j=0; j<20; j++)
8956 a[i][j] = aartREV[i][j];
8957 }
8958 else if (aaModelID == AAMODEL_WAG)
8959 {
8960 for (i=0; i<20; i++)
8961 for (j=0; j<20; j++)
8962 a[i][j] = aaWAG[i][j];
8963 }
8964 else if (aaModelID == AAMODEL_CPREV)
8965 {
8966 for (i=0; i<20; i++)
8967 for (j=0; j<20; j++)
8968 a[i][j] = aacpREV[i][j];
8969 }
8970 else if (aaModelID == AAMODEL_VT)
8971 {
8972 for (i=0; i<20; i++)
8973 for (j=0; j<20; j++)
8974 a[i][j] = aaVt[i][j];
8975 }
8976 else if (aaModelID == AAMODEL_BLOSUM)
8977 {
8978 for (i=0; i<20; i++)
8979 for (j=0; j<20; j++)
8980 a[i][j] = aaBlosum[i][j];
8981 }
8982 else if (aaModelID == AAMODEL_LG)
8983 {
8984 for (i=0; i<20; i++)
8985 for (j=0; j<20; j++)
8986 a[i][j] = aaLG[i][j];
8987 }
8988 else
8989 {
8990 MrBayesPrint ("%s ERROR: Don't understand which amino acid model is needed\n");
8991 return (ERROR);
8992 }
8993 # if 0
8994 for (i=0; i<20; i++)
8995 {
8996 for (j=0; j<20; j++)
8997 printf ("%1.3lf ", a[i][j]);
8998 printf ("\n");
8999 }
9000 # endif
9001 }
9002 else
9003 {
9004 /* 40 X 40 covarion model:
9005
9006 We have a covarion model, and must set up the other quadrants. Note that if
9007 we are at this point in the code, that we have already set up the upper left
9008 portion of the 40 X 40 rate matrix. Note that if we have rate
9009 variation across sites, that we need to deal with the multiplication
9010 in the rate matrix (i.e., we cannot simply deal with rate variation
9011 by multiplying the branch length by a rate multiplier as we can
9012 with other models). Instead, we multiply the scaled rate matrix
9013 by the rate multiplier. */
9014
9015 /* Get the switching rates. The rate of off->on is s01 and the rate
9016 of on->off is s10. The stationary probability of the switch process
9017 is prob1 = s01/(s01+s10) and prob0 = s10/(s01+s10). */
9018 swr = GetParamVals (m->switchRates, whichChain, state[whichChain]);
9019 s01 = swr[0];
9020 s10 = swr[1];
9021 probOn = s01 / (s01 + s10);
9022
9023 /* set matrix a[][] to 0 */
9024 for (i=0; i<40; i++)
9025 for (j=0; j<40; j++)
9026 a[i][j] = 0.0;
9027
9028 /* fill in upper-left sub matrix (where substitutions occur */
9029 if (aaModelID == AAMODEL_POISSON)
9030 {
9031 scaler = 0.0;
9032 for (i=0; i<20; i++)
9033 {
9034 for (j=i+1; j<20; j++)
9035 {
9036 a[i][j] = 0.05;
9037 a[j][i] = 0.05;
9038 scaler += 0.05 * a[i][j] * probOn;
9039 scaler += 0.05 * a[j][i] * probOn;
9040 }
9041 }
9042 }
9043 else if (aaModelID == AAMODEL_EQ)
9044 {
9045 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
9046 scaler = 0.0;
9047 for (i=0; i<20; i++)
9048 {
9049 for (j=i+1; j<20; j++)
9050 {
9051 a[i][j] = bs[j];
9052 a[j][i] = bs[i];
9053 scaler += bs[i] * a[i][j] * probOn;
9054 scaler += bs[j] * a[j][i] * probOn;
9055 }
9056 }
9057 }
9058 else if (aaModelID == AAMODEL_GTR)
9059 {
9060 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
9061 rt = GetParamVals (m->revMat, whichChain, state[whichChain]);
9062 for (i=0; i<20; i++)
9063 for (j=0; j<20; j++)
9064 a[i][j] = 0.0;
9065 scaler = 0.0;
9066 for (i=k=0; i<20; i++)
9067 {
9068 for (j=i+1; j<20; j++)
9069 {
9070 a[i][i] -= (a[i][j] = bs[j] * rt[k]);
9071 a[j][j] -= (a[j][i] = bs[i] * rt[k]);
9072 k++;
9073 }
9074 }
9075 for (i=0; i<20; i++)
9076 scaler += -(bs[i] * a[i][i]);
9077 for (i=0; i<20; i++)
9078 for (j=0; j<20; j++)
9079 a[i][j] /= scaler;
9080 for (i=0; i<20; i++)
9081 {
9082 for (j=i+1; j<20; j++)
9083 {
9084 a[i][j] = bs[j];
9085 a[j][i] = bs[i];
9086 scaler += bs[i] * a[i][j] * probOn;
9087 scaler += bs[j] * a[j][i] * probOn;
9088 }
9089 }
9090 }
9091 else if (aaModelID == AAMODEL_JONES)
9092 {
9093 scaler = 0.0;
9094 for (i=0; i<20; i++)
9095 {
9096 for (j=i+1; j<20; j++)
9097 {
9098 a[i][j] = aaJones[i][j];
9099 a[j][i] = aaJones[j][i];
9100 scaler += jonesPi[i] * a[i][j] * probOn;
9101 scaler += jonesPi[j] * a[j][i] * probOn;
9102 }
9103 }
9104 }
9105 else if (aaModelID == AAMODEL_DAY)
9106 {
9107 scaler = 0.0;
9108 for (i=0; i<20; i++)
9109 {
9110 for (j=i+1; j<20; j++)
9111 {
9112 a[i][j] = aaDayhoff[i][j];
9113 a[j][i] = aaDayhoff[j][i];
9114 scaler += dayhoffPi[i] * a[i][j] * probOn;
9115 scaler += dayhoffPi[j] * a[j][i] * probOn;
9116 }
9117 }
9118 }
9119 else if (aaModelID == AAMODEL_MTREV)
9120 {
9121 scaler = 0.0;
9122 for (i=0; i<20; i++)
9123 {
9124 for (j=i+1; j<20; j++)
9125 {
9126 a[i][j] = aaMtrev24[i][j];
9127 a[j][i] = aaMtrev24[j][i];
9128 scaler += mtrev24Pi[i] * a[i][j] * probOn;
9129 scaler += mtrev24Pi[j] * a[j][i] * probOn;
9130 }
9131 }
9132 }
9133 else if (aaModelID == AAMODEL_MTMAM)
9134 {
9135 scaler = 0.0;
9136 for (i=0; i<20; i++)
9137 {
9138 for (j=i+1; j<20; j++)
9139 {
9140 a[i][j] = aaMtmam[i][j];
9141 a[j][i] = aaMtmam[j][i];
9142 scaler += mtmamPi[i] * a[i][j] * probOn;
9143 scaler += mtmamPi[j] * a[j][i] * probOn;
9144 }
9145 }
9146 }
9147 else if (aaModelID == AAMODEL_RTREV)
9148 {
9149 scaler = 0.0;
9150 for (i=0; i<20; i++)
9151 {
9152 for (j=i+1; j<20; j++)
9153 {
9154 a[i][j] = aartREV[i][j];
9155 a[j][i] = aartREV[j][i];
9156 scaler += rtrevPi[i] * a[i][j] * probOn;
9157 scaler += rtrevPi[j] * a[j][i] * probOn;
9158 }
9159 }
9160 }
9161 else if (aaModelID == AAMODEL_WAG)
9162 {
9163 scaler = 0.0;
9164 for (i=0; i<20; i++)
9165 {
9166 for (j=i+1; j<20; j++)
9167 {
9168 a[i][j] = aaWAG[i][j];
9169 a[j][i] = aaWAG[j][i];
9170 scaler += wagPi[i] * a[i][j] * probOn;
9171 scaler += wagPi[j] * a[j][i] * probOn;
9172 }
9173 }
9174 }
9175 else if (aaModelID == AAMODEL_CPREV)
9176 {
9177 scaler = 0.0;
9178 for (i=0; i<20; i++)
9179 {
9180 for (j=i+1; j<20; j++)
9181 {
9182 a[i][j] = aacpREV[i][j];
9183 a[j][i] = aacpREV[j][i];
9184 scaler += cprevPi[i] * a[i][j] * probOn;
9185 scaler += cprevPi[j] * a[j][i] * probOn;
9186 }
9187 }
9188 }
9189 else if (aaModelID == AAMODEL_VT)
9190 {
9191 scaler = 0.0;
9192 for (i=0; i<20; i++)
9193 {
9194 for (j=i+1; j<20; j++)
9195 {
9196 a[i][j] = aaVt[i][j];
9197 a[j][i] = aaVt[j][i];
9198 scaler += vtPi[i] * a[i][j] * probOn;
9199 scaler += vtPi[j] * a[j][i] * probOn;
9200 }
9201 }
9202 }
9203 else if (aaModelID == AAMODEL_BLOSUM)
9204 {
9205 scaler = 0.0;
9206 for (i=0; i<20; i++)
9207 {
9208 for (j=i+1; j<20; j++)
9209 {
9210 a[i][j] = aaBlosum[i][j];
9211 a[j][i] = aaBlosum[j][i];
9212 scaler += blosPi[i] * a[i][j] * probOn;
9213 scaler += blosPi[j] * a[j][i] * probOn;
9214 }
9215 }
9216 }
9217 else if (aaModelID == AAMODEL_LG)
9218 {
9219 scaler = 0.0;
9220 for (i=0; i<20; i++)
9221 {
9222 for (j=i+1; j<20; j++)
9223 {
9224 a[i][j] = aaLG[i][j];
9225 a[j][i] = aaLG[j][i];
9226 scaler += lgPi[i] * a[i][j] * probOn;
9227 scaler += lgPi[j] * a[j][i] * probOn;
9228 }
9229 }
9230 }
9231 else
9232 {
9233 MrBayesPrint ("%s ERROR: Don't understand which amino acid model is needed\n");
9234 return (ERROR);
9235 }
9236
9237 /* rescale off diagonal elements of Q matrix */
9238 scaler = 1.0 / scaler;
9239 for (i=0; i<20; i++)
9240 {
9241 for (j=0; j<20; j++)
9242 {
9243 if (i != j)
9244 a[i][j] *= scaler;
9245 }
9246 }
9247
9248 /* now, scale by rate factor */
9249 for (i=0; i<20; i++)
9250 {
9251 for (j=0; j<20; j++)
9252 {
9253 if (i != j)
9254 a[i][j] *= rateMult;
9255 }
9256 }
9257
9258 /* put in diagonal elements */
9259 for (i=0; i<20; i++)
9260 {
9261 sum = 0.0;
9262 for (j=0; j<20; j++)
9263 {
9264 if (i != j)
9265 sum += a[i][j];
9266 a[i][i] = -(sum + s10);
9267 }
9268 }
9269
9270 /* fill in the other three submatrices */
9271 for (i=20; i<40; i++)
9272 a[i][i] = -s01;
9273 for (i=0; i<20; i++)
9274 {
9275 a[i][20+i] = s10;
9276 a[20+i][i] = s01;
9277 }
9278
9279 }
9280
9281 return (NO_ERROR);
9282 }
9283
9284
SetStdQMatrix(MrBFlt ** a,int nStates,MrBFlt * bs,int cType)9285 int SetStdQMatrix (MrBFlt **a, int nStates, MrBFlt *bs, int cType)
9286 {
9287 register int i, j;
9288 MrBFlt scaler;
9289
9290 /* This function sets up ordered or unordered models for standard characters
9291 with unequal stationary state frequencies. It requires the stationary
9292 frequencies of the states (passed when calling the function). It also
9293 needs to know the number of states and the type (ordered or unordered)
9294 of the character. */
9295
9296 /* set Q matrix to 0 */
9297 for (i=0; i<nStates; i++)
9298 for (j=0; j<nStates; j++)
9299 a[i][j] = 0.0;
9300
9301 /* initialize Q matrix */
9302 scaler = 0.0;
9303 if (cType == UNORD)
9304 {
9305 /* unordered characters */
9306 for (i=0; i<nStates; i++)
9307 {
9308 for (j=0; j<nStates; j++)
9309 {
9310 if (i != j)
9311 {
9312 a[i][i] -= (a[i][j] = bs[j]);
9313 scaler += bs[i] * a[i][j];
9314 }
9315 }
9316 }
9317 }
9318 else
9319 {
9320 /* ordered characters */
9321 for (i=0; i<nStates; i++)
9322 {
9323 for (j=0; j<nStates; j++)
9324 {
9325 if (abs(i - j) == 1)
9326 {
9327 a[i][i] -= (a[i][j] = bs[j]);
9328 scaler += bs[i] * a[i][j];
9329 }
9330 }
9331 }
9332 }
9333
9334 /* rescale Q matrix */
9335 for (i=0; i<nStates; i++)
9336 for (j=0; j<nStates; j++)
9337 a[i][j] /= scaler;
9338
9339 # if defined DEBUG_SETSTDQMATRIX
9340 for (i=0; i<nStates; i++)
9341 {
9342 for (j=0; j<nStates; j++)
9343 printf ("%0.5lf ", a[i][j]);
9344 printf ("\n");
9345 }
9346 # endif
9347
9348 return (NO_ERROR);
9349 }
9350
9351
TiProbs_Fels(TreeNode * p,int division,int chain)9352 int TiProbs_Fels (TreeNode *p, int division, int chain)
9353 {
9354 int i, j, k, index;
9355 MrBFlt t, u, x, z, beta, bigPi_j[4], pij, bigPij,
9356 *catRate, baseRate, theRate, *pis, length;
9357 CLFlt *tiP;
9358 ModelInfo *m;
9359
9360 m = &modelSettings[division];
9361
9362 /* find transition probabilities */
9363 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9364
9365 /* get base frequencies */
9366 pis = GetParamSubVals (m->stateFreq, chain, state[chain]);
9367
9368 /* get base rate */
9369 baseRate = GetRate (division, chain);
9370
9371 /* compensate for invariable sites if appropriate */
9372 if (m->pInvar != NULL)
9373 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
9374
9375 /* get category rates */
9376 theRate = 1.0;
9377 if (m->shape != NULL)
9378 catRate = GetParamSubVals (m->shape, chain, state[chain]);
9379 else if (m->mixtureRates != NULL)
9380 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
9381 else
9382 catRate = &theRate;
9383
9384 /* rescale beta */
9385 beta = (0.5 / ((pis[0] + pis[2])*(pis[1] + pis[3]) + ((pis[0]*pis[2]) + (pis[1]*pis[3]))));
9386
9387 bigPi_j[0] = (pis[0] + pis[2]);
9388 bigPi_j[1] = (pis[1] + pis[3]);
9389 bigPi_j[2] = (pis[0] + pis[2]);
9390 bigPi_j[3] = (pis[1] + pis[3]);
9391
9392 /* find length */
9393 if (m->cppEvents != NULL)
9394 {
9395 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9396 }
9397 else if (m->tk02BranchRates != NULL)
9398 {
9399 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9400 }
9401 else if (m->igrBranchRates != NULL)
9402 {
9403 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9404 }
9405 else if (m->mixedBrchRates != NULL)
9406 {
9407 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9408 }
9409 else
9410 length = p->length;
9411
9412 /* numerical errors will ensue if we allow very large or very small branch lengths,
9413 which might occur in relaxed clock models */
9414
9415 /* fill in values */
9416 for (k=index=0; k<m->numRateCats; k++)
9417 {
9418 t = length * baseRate * catRate[k];
9419
9420 if (t < TIME_MIN)
9421 {
9422 /* Fill in identity matrix */
9423 for (i=0; i<4; i++)
9424 {
9425 for (j=0; j<4; j++)
9426 {
9427 if (i == j)
9428 tiP[index++] = 1.0;
9429 else
9430 tiP[index++] = 0.0;
9431 }
9432 }
9433 }
9434 else if (t > TIME_MAX)
9435 {
9436 /* Fill in stationary matrix */
9437 for (i=0; i<4; i++)
9438 for (j=0; j<4; j++)
9439 tiP[index++] = (CLFlt) pis[j];
9440 }
9441 else
9442 {
9443 /* calculate probabilities */
9444 for (i=0; i<4; i++)
9445 {
9446 for (j=0; j<4; j++)
9447 {
9448 bigPij = bigPi_j[j];
9449 pij = pis[j];
9450 u = 1.0/bigPij - 1.0;
9451 x = exp(-beta * t);
9452 z = (bigPij - pij) / bigPij;
9453
9454 if (i == j)
9455 tiP[index++] = (CLFlt) (pij + pij * u * x + z * x);
9456 else
9457 tiP[index++] = (CLFlt) (pij + pij * u * x - (pij/bigPij) * x);
9458 }
9459 }
9460 }
9461 }
9462
9463 return NO_ERROR;
9464 }
9465
9466
9467 /*----------------------------------------------------------------
9468 |
9469 | TiProbs_Gen: Calculates transition probabilities for general
9470 | models with or without rate variation. This function does
9471 | not work with:
9472 |
9473 | 1. codon models with omega variation or
9474 | 2. covarion models with rate variation
9475 |
9476 | In either of these cases, TiProbs_GenCov is used
9477 |
9478 -----------------------------------------------------------------*/
TiProbs_Gen(TreeNode * p,int division,int chain)9479 int TiProbs_Gen (TreeNode *p, int division, int chain)
9480 {
9481 register int i, j, k, n, s, index;
9482 MrBFlt t, *catRate, baseRate, *eigenValues, *cijk, *bs,
9483 EigValexp[64], sum, *ptr, theRate, correctionFactor,
9484 length;
9485 CLFlt *tiP;
9486 ModelInfo *m;
9487
9488 m = &modelSettings[division];
9489 n = m->numModelStates;
9490
9491 /* find the correction factor to make branch lengths
9492 in terms of expected number of substitutions per character */
9493 correctionFactor = 1.0;
9494 if (m->dataType == DNA || m->dataType == RNA)
9495 {
9496 if (m->nucModelId == NUCMODEL_DOUBLET)
9497 correctionFactor = 2.0;
9498 else if (m->nucModelId == NUCMODEL_CODON)
9499 correctionFactor = 3.0;
9500 }
9501
9502 /* find transition probabilities */
9503 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9504
9505 /* get base rate */
9506 baseRate = GetRate (division, chain);
9507
9508 /* compensate for invariable sites if appropriate */
9509 if (m->pInvar != NULL)
9510 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
9511
9512 /* get category rates */
9513 theRate = 1.0;
9514 if (m->shape != NULL)
9515 catRate = GetParamSubVals (m->shape, chain, state[chain]);
9516 else if (m->mixtureRates != NULL)
9517 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
9518 else
9519 catRate = &theRate;
9520
9521 /* get eigenvalues and cijk pointers */
9522 eigenValues = m->cijks[m->cijkIndex[chain]];
9523 cijk = eigenValues + (2 * n);
9524
9525 /* find length */
9526 if (m->cppEvents != NULL)
9527 {
9528 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9529 }
9530 else if (m->tk02BranchRates != NULL)
9531 {
9532 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9533 }
9534 else if (m->igrBranchRates != NULL)
9535 {
9536 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9537 }
9538 else if (m->mixedBrchRates != NULL)
9539 {
9540 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9541 }
9542 else
9543 length = p->length;
9544
9545 /* fill in values */
9546 for (k=index=0; k<m->numRateCats; k++)
9547 {
9548 t = length * baseRate * catRate[k] * correctionFactor;
9549
9550 if (t < TIME_MIN)
9551 {
9552 /* Fill in identity matrix */
9553 for (i=0; i<n; i++)
9554 {
9555 for (j=0; j<n; j++)
9556 {
9557 if (i == j)
9558 tiP[index++] = 1.0;
9559 else
9560 tiP[index++] = 0.0;
9561 }
9562 }
9563 }
9564 else if (t > TIME_MAX)
9565 {
9566 /* Get base freq */
9567 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
9568 /* Fill in stationary matrix */
9569 for (i=0; i<n; i++)
9570 for (j=0; j<n; j++)
9571 tiP[index++] = (CLFlt) bs[j];
9572 }
9573 else
9574 {
9575 /* We actually need to do some work... */
9576 for (s=0; s<n; s++)
9577 EigValexp[s] = exp(eigenValues[s] * t);
9578
9579 ptr = cijk;
9580 for (i=0; i<n; i++)
9581 {
9582 for (j=0; j<n; j++)
9583 {
9584 sum = 0.0;
9585 for (s=0; s<n; s++)
9586 sum += (*ptr++) * EigValexp[s];
9587 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
9588 }
9589 }
9590 }
9591 }
9592
9593 # if 0
9594 printf ("v = %lf (%d)\n", t, p->index);
9595 for (i=index=0; i<n; i++)
9596 {
9597 for (j=0; j<n; j++)
9598 printf ("%1.4lf ", tiP[index++]);
9599 printf ("\n");
9600 }
9601 printf ("\n");
9602 # endif
9603
9604 return NO_ERROR;
9605 }
9606
9607
9608 /*----------------------------------------------------------------
9609 |
9610 | TiProbs_GenCov: Calculates transition probabilities for codon
9611 | models with omega variation or covarion models with
9612 | rate variation.
9613 |
9614 -----------------------------------------------------------------*/
TiProbs_GenCov(TreeNode * p,int division,int chain)9615 int TiProbs_GenCov (TreeNode *p, int division, int chain)
9616 {
9617 register int i, j, k, n, s, index;
9618 int sizeOfSingleCijk;
9619 MrBFlt t, *eigenValues, *cijk, EigValexp[64], sum, *ptr, correctionFactor,
9620 length, *bs;
9621 CLFlt *tiP;
9622 ModelInfo *m;
9623
9624 m = &modelSettings[division];
9625 n = m->numModelStates;
9626
9627 /* find the correction factor to make branch lengths
9628 in terms of expected number of substitutions per character */
9629 correctionFactor = 1.0;
9630 if (m->dataType == DNA || m->dataType == RNA)
9631 {
9632 if (m->nucModelId == NUCMODEL_DOUBLET)
9633 correctionFactor = 2.0;
9634 else if (m->nucModelId == NUCMODEL_CODON)
9635 correctionFactor = 3.0;
9636 }
9637
9638 /* find transition probabilities */
9639 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9640
9641 /* get eigenvalues and cijk pointers */
9642 eigenValues = m->cijks[m->cijkIndex[chain]];
9643 cijk = eigenValues + (2 * n);
9644
9645 /* get offset size (we need to move the pointers to the appropriate
9646 cijk information for these models) */
9647 sizeOfSingleCijk = m->cijkLength / m->nCijkParts;
9648
9649 /* find length */
9650 if (m->cppEvents != NULL)
9651 {
9652 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9653 }
9654 else if (m->tk02BranchRates != NULL)
9655 {
9656 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9657 }
9658 else if (m->igrBranchRates != NULL)
9659 {
9660 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9661 }
9662 else if (m->mixedBrchRates != NULL)
9663 {
9664 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9665 }
9666 else
9667 length = p->length;
9668
9669 /* numerical errors will ensue if we allow very large or very small branch lengths,
9670 which might occur in relaxed clock models */
9671
9672 /* fill in values */
9673 for (k=index=0; k<m->nCijkParts; k++)
9674 {
9675 t = length * correctionFactor;
9676
9677 if (t < TIME_MIN)
9678 {
9679 /* Fill in identity matrix */
9680 for (i=0; i<n; i++)
9681 {
9682 for (j=0; j<n; j++)
9683 {
9684 if (i == j)
9685 tiP[index++] = 1.0;
9686 else
9687 tiP[index++] = 0.0;
9688 }
9689 }
9690 }
9691 else if (t > TIME_MAX)
9692 {
9693 /* Get base freq */
9694 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
9695 /* Fill in stationary matrix */
9696 for (i=0; i<n; i++)
9697 for (j=0; j<n; j++)
9698 tiP[index++] = (CLFlt) bs[j];
9699 }
9700 else
9701 {
9702 /* We actually need to do some work... */
9703 for (s=0; s<n; s++)
9704 EigValexp[s] = exp(eigenValues[s] * t);
9705
9706 ptr = cijk;
9707 for (i=0; i<n; i++)
9708 {
9709 for (j=0; j<n; j++)
9710 {
9711 sum = 0.0;
9712 for (s=0; s<n; s++)
9713 sum += (*ptr++) * EigValexp[s];
9714 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
9715 }
9716 }
9717
9718 /* increment pointers by m->cijkLength */
9719 if (k+1 < m->nCijkParts)
9720 {
9721 /* shift pointers */
9722 eigenValues += sizeOfSingleCijk;
9723 cijk += sizeOfSingleCijk;
9724 }
9725 }
9726 }
9727
9728 # if 0
9729 for (i=index=0; i<n; i++)
9730 {
9731 for (j=0; j<n; j++)
9732 printf ("%1.4lf ", tiP[index++]);
9733 printf ("\n");
9734 }
9735 # endif
9736
9737 return NO_ERROR;
9738 }
9739
9740
9741 /*-----------------------------------------------------------------
9742 |
9743 | TiProbs_Hky: update transition probabilities for 4by4
9744 | nucleotide model with nst == 2 (K80/HKY85)
9745 | with or without rate variation
9746 |
9747 ------------------------------------------------------------------*/
TiProbs_Hky(TreeNode * p,int division,int chain)9748 int TiProbs_Hky (TreeNode *p, int division, int chain)
9749 {
9750 int i, j, k, index;
9751 MrBFlt t, kap, u, w, x, y, z, beta, bigPi_j[4], pij, bigPij, *pis,
9752 *catRate, baseRate, theRate, length;
9753 CLFlt *tiP;
9754 ModelInfo *m;
9755
9756 m = &modelSettings[division];
9757
9758 /* find transition probabilities */
9759 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9760
9761 /* get kappa */
9762 kap = *GetParamVals (m->tRatio, chain, state[chain]);
9763
9764 /* get base frequencies */
9765 pis = GetParamSubVals (m->stateFreq, chain, state[chain]);
9766
9767 /* get base rate */
9768 baseRate = GetRate (division, chain);
9769
9770 /* compensate for invariable sites if appropriate */
9771 if (m->pInvar != NULL)
9772 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
9773
9774 /* get category rates */
9775 theRate = 1.0;
9776 if (m->shape != NULL)
9777 catRate = GetParamSubVals (m->shape, chain, state[chain]);
9778 else if (m->mixtureRates != NULL)
9779 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
9780 else
9781 catRate = &theRate;
9782
9783 /* rescale beta */
9784 beta = 0.5 / ((pis[0] + pis[2])*(pis[1] + pis[3]) + kap*((pis[0]*pis[2]) + (pis[1]*pis[3])));
9785
9786 bigPi_j[0] = pis[0] + pis[2];
9787 bigPi_j[1] = pis[1] + pis[3];
9788 bigPi_j[2] = pis[0] + pis[2];
9789 bigPi_j[3] = pis[1] + pis[3];
9790
9791 /* find length */
9792 if (m->cppEvents != NULL)
9793 {
9794 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9795 }
9796 else if (m->tk02BranchRates != NULL)
9797 {
9798 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9799 }
9800 else if (m->igrBranchRates != NULL)
9801 {
9802 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9803 }
9804 else if (m->mixedBrchRates != NULL)
9805 {
9806 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9807 }
9808 else
9809 length = p->length;
9810
9811 /* numerical errors will ensue if we allow very large or very small branch lengths,
9812 which might occur in relaxed clock models */
9813
9814 /* fill in values */
9815 for (k=index=0; k<m->numRateCats; k++)
9816 {
9817 t = length * baseRate * catRate[k];
9818
9819 if (t < TIME_MIN)
9820 {
9821 /* Fill in identity matrix */
9822 for (i=0; i<4; i++)
9823 {
9824 for (j=0; j<4; j++)
9825 {
9826 if (i == j)
9827 tiP[index++] = 1.0;
9828 else
9829 tiP[index++] = 0.0;
9830 }
9831 }
9832 }
9833 else if (t > TIME_MAX)
9834 {
9835 /* Fill in stationary matrix */
9836 for (i=0; i<4; i++)
9837 for (j=0; j<4; j++)
9838 tiP[index++] = (CLFlt) pis[j];
9839 }
9840 else
9841 {
9842 /* calculate probabilities */
9843 for (i=0; i<4; i++)
9844 {
9845 for (j=0; j<4; j++)
9846 {
9847 bigPij = bigPi_j[j];
9848 pij = pis[j];
9849 u = 1.0/bigPij - 1.0;
9850 w = -beta * (1.0 + bigPij * (kap - 1.0));
9851 x = exp(-beta * t);
9852 y = exp(w * t);
9853 z = (bigPij - pij) / bigPij;
9854
9855 if (i == j)
9856 tiP[index++] = (CLFlt) (pij + pij * u * x + z * y);
9857 else if ((i == 0 && j == 2) || (i == 2 && j == 0) || (i == 1 && j == 3) || (i == 3 && j == 1))
9858 tiP[index++] = (CLFlt) (pij + pij * u * x - (pij/bigPij) * y);
9859 else
9860 tiP[index++] = (CLFlt) (pij * (1.0 - x));
9861 }
9862 }
9863 }
9864 }
9865
9866 return NO_ERROR;
9867 }
9868
9869
9870 /*-----------------------------------------------------------------
9871 |
9872 | TiProbs_JukesCantor: update transition probabilities for 4by4
9873 | nucleotide model with nst == 1 (Jukes-Cantor)
9874 | with or without rate variation
9875 |
9876 ------------------------------------------------------------------*/
TiProbs_JukesCantor(TreeNode * p,int division,int chain)9877 int TiProbs_JukesCantor (TreeNode *p, int division, int chain)
9878 {
9879 /* calculate Jukes Cantor transition probabilities */
9880
9881 int i, j, k, index;
9882 MrBFlt t, *catRate, baseRate, theRate, length;
9883 CLFlt pNoChange, pChange, *tiP;
9884 ModelInfo *m;
9885
9886 m = &modelSettings[division];
9887
9888 /* find transition probabilities */
9889 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9890
9891 /* get base rate */
9892 baseRate = GetRate (division, chain);
9893
9894 /* compensate for invariable sites if appropriate */
9895 if (m->pInvar != NULL)
9896 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
9897
9898 /* get category rates */
9899 theRate = 1.0;
9900 if (m->shape != NULL)
9901 catRate = GetParamSubVals (m->shape, chain, state[chain]);
9902 else if (m->mixtureRates != NULL)
9903 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
9904 else
9905 catRate = &theRate;
9906
9907 /* find length */
9908 if (m->cppEvents != NULL)
9909 {
9910 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9911 }
9912 else if (m->tk02BranchRates != NULL)
9913 {
9914 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9915 }
9916 else if (m->igrBranchRates != NULL)
9917 {
9918 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9919 }
9920 else if (m->mixedBrchRates != NULL)
9921 {
9922 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9923 }
9924 else
9925 length = p->length;
9926
9927 /* numerical errors will ensue if we allow very large or very small branch lengths,
9928 which might occur in relaxed clock models */
9929
9930 /* fill in values */
9931 for (k=index=0; k<m->numRateCats; k++)
9932 {
9933 t = length * baseRate * catRate[k];
9934
9935 if (t < TIME_MIN)
9936 {
9937 /* Fill in identity matrix */
9938 for (i=0; i<4; i++)
9939 {
9940 for (j=0; j<4; j++)
9941 {
9942 if (i == j)
9943 tiP[index++] = 1.0;
9944 else
9945 tiP[index++] = 0.0;
9946 }
9947 }
9948 }
9949 else if (t > TIME_MAX)
9950 {
9951 /* Fill in stationary matrix */
9952 for (i=0; i<4; i++)
9953 for (j=0; j<4; j++)
9954 tiP[index++] = 0.25;
9955 }
9956 else
9957 {
9958 /* calculate probabilities */
9959 pChange = (CLFlt) (0.25 - 0.25 * exp(-(4.0/3.0)*t));
9960 pNoChange = (CLFlt) (0.25 + 0.75 * exp(-(4.0/3.0)*t));
9961 for (i=0; i<4; i++)
9962 {
9963 for (j=0; j<4; j++)
9964 {
9965 if (i == j)
9966 tiP[index++] = pNoChange;
9967 else
9968 tiP[index++] = pChange;
9969 }
9970 }
9971 }
9972 }
9973
9974 return NO_ERROR;
9975 }
9976
9977
9978 /*-----------------------------------------------------------------
9979 |
9980 | TiProbs_Res: update transition probabilities for binary
9981 | restriction site model with or without rate variation
9982 |
9983 ------------------------------------------------------------------*/
TiProbs_Res(TreeNode * p,int division,int chain)9984 int TiProbs_Res (TreeNode *p, int division, int chain)
9985 {
9986 int k, index;
9987 MrBFlt baseRate, eV, mu, theRate, v,
9988 *bs, *catRate, length;
9989 CLFlt *tiP;
9990 ModelInfo *m;
9991
9992 /* find model settings for the division */
9993 m = &modelSettings[division];
9994
9995 /* find transition probabilities */
9996 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9997
9998 /* get base rate */
9999 baseRate = GetRate (division, chain);
10000
10001 /* get category rates */
10002 theRate = 1.0;
10003 if (m->shape != NULL)
10004 catRate = GetParamSubVals (m->shape, chain, state[chain]);
10005 else if (m->mixtureRates != NULL)
10006 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
10007 else
10008 catRate = &theRate;
10009
10010 /* find base frequencies */
10011 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
10012
10013 /* calculate scaling factor */
10014 mu = 1.0 / (2.0 * bs[0] * bs[1]);
10015
10016 /* find length */
10017 if (m->cppEvents != NULL)
10018 {
10019 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
10020 }
10021 else if (m->tk02BranchRates != NULL)
10022 {
10023 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
10024 }
10025 else if (m->igrBranchRates != NULL)
10026 {
10027 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
10028 }
10029 else if (m->mixedBrchRates != NULL)
10030 {
10031 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
10032 }
10033 else
10034 length = p->length;
10035
10036 /* numerical errors will ensue if we allow very large or very small branch lengths,
10037 which might occur in relaxed clock models */
10038
10039 /* fill in values */
10040 for (k=index=0; k<m->numRateCats; k++)
10041 {
10042 v = length * baseRate * catRate[k];
10043
10044 if (v < TIME_MIN)
10045 {
10046 /* Fill in identity matrix */
10047 tiP[index++] = (CLFlt) (bs[0] + bs[1]);
10048 tiP[index++] = (CLFlt) (bs[1] - bs[1]);
10049 tiP[index++] = (CLFlt) (bs[0] - bs[0]);
10050 tiP[index++] = (CLFlt) (bs[1] + bs[0]);
10051 }
10052 else if (v > TIME_MAX)
10053 {
10054 /* Fill in stationary matrix */
10055 tiP[index++] = (CLFlt) bs[0];
10056 tiP[index++] = (CLFlt) bs[1];
10057 tiP[index++] = (CLFlt) bs[0];
10058 tiP[index++] = (CLFlt) bs[1];
10059 }
10060 else
10061 {
10062 /* calculate probabilities */
10063 eV = exp(-mu * v);
10064 tiP[index++] = (CLFlt) (bs[0] + bs[1] * eV);
10065 tiP[index++] = (CLFlt) (bs[1] - bs[1] * eV);
10066 tiP[index++] = (CLFlt) (bs[0] - bs[0] * eV);
10067 tiP[index++] = (CLFlt) (bs[1] + bs[0] * eV);
10068 }
10069 }
10070
10071 return NO_ERROR;
10072 }
10073
10074
10075 /*-----------------------------------------------------------------
10076 |
10077 | TiProbs_Std: update transition probabilities for
10078 | variable states model with or without rate variation
10079 |
10080 ------------------------------------------------------------------*/
TiProbs_Std(TreeNode * p,int division,int chain)10081 int TiProbs_Std (TreeNode *p, int division, int chain)
10082 {
10083 int b, c, i, j, k, n, s, nStates, index=0, index2;
10084 MrBFlt v, eV1, eV2, eV3, eV4, eV5, *catRate,
10085 baseRate, theRate, pi, f1, f2, f3, f4, f5, f6, f7, root,
10086 *eigenValues, *cijk, sum, *bs, mu, length;
10087 CLFlt pNoChange, pChange, *tiP;
10088 ModelInfo *m;
10089 # if defined (DEBUG_TIPROBS_STD)
10090 int index3;
10091 # endif
10092
10093 m = &modelSettings[division];
10094
10095 /* find transition probabilities */
10096 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
10097
10098 /* get base rate */
10099 baseRate = GetRate (division, chain);
10100
10101 /* get category rates */
10102 theRate = 1.0;
10103 if (m->shape != NULL)
10104 catRate = GetParamSubVals (m->shape, chain, state[chain]);
10105 else if (m->mixtureRates != NULL)
10106 catRate = GetParamSubVals (m->mixtureRates, chain, state[chain]);
10107 else
10108 catRate = &theRate;
10109
10110 # if defined (DEBUG_TIPROBS_STD)
10111 /* find base frequencies */
10112 bs = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
10113 # endif
10114
10115 /* find length */
10116 if (m->cppEvents != NULL)
10117 {
10118 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
10119 }
10120 else if (m->tk02BranchRates != NULL)
10121 {
10122 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
10123 }
10124 else if (m->igrBranchRates != NULL)
10125 {
10126 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
10127 }
10128 else if (m->mixedBrchRates != NULL)
10129 {
10130 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
10131 }
10132 else
10133 length = p->length;
10134
10135 /* numerical errors will ensue if we allow very large or very small branch lengths, which might
10136 occur in relaxed clock models; an elegant solution would be to substitute the stationary
10137 probs and initial probs but for now we truncate lengths at small or large values TODO */
10138 if (length > BRLENS_MAX)
10139 length = BRLENS_MAX;
10140 else if (length < BRLENS_MIN)
10141 length = BRLENS_MIN;
10142
10143 /* fill in values; this has to be done differently if state freqs are not equal */
10144 if (m->stateFreq->paramId == SYMPI_EQUAL)
10145 {
10146 /* equal state frequencies */
10147 /* fill in values for unordered characters */
10148 index = 0;
10149 # if defined (DEBUG_TIPROBS_STD)
10150 index3 = 0;
10151 # endif
10152 for (nStates=2; nStates<=10; nStates++)
10153 {
10154 if (m->isTiNeeded[nStates-2] == NO)
10155 continue;
10156 for (k=0; k<m->numRateCats; k++)
10157 {
10158 /* calculate probabilities */
10159 v = length * catRate[k] * baseRate;
10160 eV1 = exp(-(nStates / (nStates - 1.0)) * v);
10161 pChange = (CLFlt) ((1.0 / nStates) - ((1.0 / nStates) * eV1));
10162 pNoChange = (CLFlt) ((1.0 / nStates) + ((nStates - 1.0) / nStates) * eV1);
10163 if (pChange<0.0)
10164 pChange = (CLFlt) 0.0;
10165 for (i=0; i<nStates; i++)
10166 {
10167 for (j=0; j<nStates; j++)
10168 {
10169 if (i == j)
10170 tiP[index++] = pNoChange;
10171 else
10172 tiP[index++] = pChange;
10173 }
10174 }
10175 # if defined (DEBUG_TIPROBS_STD)
10176 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
10177 # endif
10178 }
10179 # if defined (DEBUG_TIPROBS_STD)
10180 index3 += nStates;
10181 # endif
10182 }
10183
10184 /* fill in values for 3-state ordered character */
10185 if (m->isTiNeeded[9] == YES)
10186 {
10187 nStates = 3;
10188 for (k=0; k<m->numRateCats; k++)
10189 {
10190 /* calculate probabilities */
10191 v = length * catRate[k] * baseRate;
10192 eV1 = exp (-(3.0 / 4.0) * v);
10193 eV2 = exp (-(9.0 / 4.0) * v);
10194
10195 /* pij(0,0) */
10196 tiP[index] = (CLFlt) ((1.0 / 3.0) + (eV1 / 2.0) + (eV2 / 6.0));
10197 /* pij(0,1) = pij(1,0) */
10198 tiP[index+1] = tiP[index+3] = (CLFlt) ((1.0 / 3.0) - (eV2 / 3.0));
10199 /* pij(0,2) */
10200 tiP[index+2] = (CLFlt) ((1.0 / 3.0) - (eV1 / 2.0) + (eV2 / 6.0));
10201 /* pij(1,1) */
10202 tiP[index+4] = (CLFlt) ((1.0 / 3.0) + (2.0 * eV2 / 3.0));
10203
10204 /* fill in mirror part of matrix */
10205 index += 5;
10206 index2 = index - 2;
10207 for (i=0; i<4; i++)
10208 tiP[index++] = tiP[index2--];
10209
10210 /* make sure no value is negative */
10211 for (i=index-(nStates*nStates); i<index; i++) {
10212 if (tiP[i] < 0.0)
10213 tiP[i] = (CLFlt) 0.0;
10214 }
10215 # if defined (DEBUG_TIPROBS_STD)
10216 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
10217 # endif
10218 }
10219
10220 # if defined (DEBUG_TIPROBS_STD)
10221 index3 += nStates;
10222 # endif
10223 }
10224
10225 /* 4-state ordered character */
10226 if (m->isTiNeeded[10] == YES)
10227 {
10228 nStates = 4;
10229 pi = 1.0 / 4.0;
10230 root = sqrt (2.0);
10231 f1 = root + 1.0;
10232 f2 = root - 1.0;
10233
10234 for (k=0; k<m->numRateCats; k++)
10235 {
10236 /* calculate probabilities */
10237 v = length * catRate[k] * baseRate;
10238 eV1 = 1.0 / (exp ((4.0 * v) / 3.0));
10239 eV2 = exp ((2.0 * (root - 2.0) * v) / 3.0) / root;
10240 eV3 = 1.0 / (root * exp ((2.0 * (root + 2.0) * v) / 3.0));
10241
10242 /* pij(0,0) */
10243 tiP[index] = (CLFlt) (pi * (1.0 + eV1 + (f1*eV2) + (f2*eV3)));
10244 /* pij(0,1) = pij(1,0) */
10245 tiP[index+1] = tiP[index+4] = (CLFlt) (pi * (1.0 - eV1 + eV2 - eV3));
10246 /* pij(0,2) = tiP(1,3) */
10247 tiP[index+2] = tiP[index+7] = (CLFlt) (pi * (1.0 - eV1 - eV2 + eV3));
10248 /* pij(0,3) */
10249 tiP[index+3] = (CLFlt) (pi * (1.0 + eV1 - (f1*eV2) - (f2*eV3)));
10250 /* pij(1,1) */
10251 tiP[index+5] = (CLFlt) (pi * (1.0 + eV1 + (f2*eV2) + (f1*eV3)));
10252 /* pij(1,2) */
10253 tiP[index+6] = (CLFlt) (pi * (1.0 + eV1 - (f2*eV2) - (f1*eV3)));
10254
10255 /* fill in mirror part of matrix */
10256 index += 8;
10257 index2 = index - 1;
10258 for (i=0; i<8; i++)
10259 tiP[index++] = tiP[index2--];
10260
10261 /* make sure no value is negative */
10262 for (i=index-(nStates*nStates); i<index; i++) {
10263 if (tiP[i] < 0.0)
10264 tiP[i] = (CLFlt) 0.0;
10265 }
10266 # if defined (DEBUG_TIPROBS_STD)
10267 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
10268 # endif
10269 }
10270 # if defined (DEBUG_TIPROBS_STD)
10271 index3 += nStates;
10272 # endif
10273 }
10274
10275 /* 5-state ordered character */
10276 if (m->isTiNeeded[11] == YES)
10277 {
10278 nStates = 5;
10279 pi = 1.0 / 5.0;
10280 root = sqrt (5.0);
10281
10282 f5 = root / 4.0;
10283 f1 = 0.75 + f5;;
10284 f2 = 1.25 + f5;
10285 f3 = 1.25 - f5;
10286 f4 = 0.75 - f5;
10287 f5 = f5 * 2.0;
10288 f6 = f5 + 0.5;
10289 f7 = f5 - 0.5;
10290
10291 for (k=0; k<m->numRateCats; k++)
10292 {
10293 /* calculate probabilities */
10294 v = length * catRate[k] * baseRate;
10295 v *= 5.0 / 16.0;
10296
10297 eV1 = exp ((root - 3.0) * v);
10298 eV2 = exp (-(root + 3.0) * v);
10299 eV3 = exp ((root - 5.0) * v);
10300 eV4 = exp (-(root + 5.0) * v);
10301
10302 /* pij(0,0) */
10303 tiP[index] = (CLFlt) (pi* (1.0 + (f1*eV3) + (f2*eV1) + (f3*eV2) + (f4*eV4)));
10304 /* pij(0,1) = pij(1,0) */
10305 tiP[index+1] = tiP[index+5] =
10306 (CLFlt) (pi*(1.0 - (eV3/2.0) + (f5*eV1) - (f5*eV2) - (eV4/2.0)));
10307 /* pij(0,2) = pij(2,0) */
10308 tiP[index+2] = tiP[index+10] = (CLFlt) (pi*(1.0 - (f6*eV3) + (f7*eV4)));
10309 /* pij(0,3) = pij(1,4) */
10310 tiP[index+3] = tiP[index+9] =
10311 (CLFlt) (pi*(1.0 - (eV3/2.0) - (f5*eV1) + (f5*eV2) - (eV4/2.0)));
10312 /* pij(0,4) */
10313 tiP[index+4] = (CLFlt) (pi*(1.0 + (f1*eV3) - (f2*eV1) - (f3*eV2) + (f4*eV4)));
10314 /* pij(1,1) */
10315 tiP[index+6] = (CLFlt) (pi*(1.0 + (f4*eV3) + (f3*eV1) + (f2*eV2) + (f1*eV4)));
10316 /* pij(1,2) = pij(2,1) */
10317 tiP[index+7] = tiP[index+11] = (CLFlt) (pi*(1.0 + (f7*eV3) - (f6*eV4)));
10318 /* pij(1,3) */
10319 tiP[index+8] = (CLFlt) (pi*(1.0 + (f4*eV3) - (f3*eV1) - (f2*eV2) + (f1*eV4)));
10320 /* pij(2,2) */
10321 tiP[index+12] = (CLFlt) (pi*(1.0 + (2.0*eV3) + (2.0*eV4)));
10322
10323 /* fill in mirror part of matrix */
10324 index += 13;
10325 index2 = index - 2;
10326 for (i=0; i<12; i++)
10327 tiP[index++] = tiP[index2--];
10328
10329 /* make sure no value is negative */
10330 for (i=index-(nStates*nStates); i<index; i++) {
10331 if (tiP[i] < 0.0)
10332 tiP[i] = (CLFlt) 0.0;
10333 }
10334 # if defined (DEBUG_TIPROBS_STD)
10335 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
10336 # endif
10337 }
10338 # if defined (DEBUG_TIPROBS_STD)
10339 index3 += nStates;
10340 # endif
10341 }
10342
10343 /* 6-state ordered character */
10344 if (m->isTiNeeded[12] == YES)
10345 {
10346 nStates = 6;
10347 pi = 1.0 / 6.0;
10348 root = sqrt (3.0);
10349
10350 f4 = (3.0 / (2.0 * root));
10351 f1 = 1.0 + f4;
10352 f2 = 1.0 - f4;
10353 f3 = 0.5 + f4;
10354 f4 = 0.5 - f4;
10355
10356 for (k=0; k<m->numRateCats; k++)
10357 {
10358 /* calculate probabilities */
10359 v = length * catRate[k] * baseRate;
10360 v /= 5.0;
10361
10362 eV1 = exp (-9 * v);
10363 eV2 = exp (-6 * v);
10364 eV3 = exp (-3 * v);
10365 eV4 = exp (3.0 * (root - 2.0) * v);
10366 eV5 = exp (-3.0 * (root + 2.0) * v);
10367
10368 /* pij(0,0) */
10369 tiP[index] = (CLFlt) (pi* (1.0 + (0.5*eV1) + eV2 + (1.5*eV3) + (f1*eV4) + (f2*eV5)));
10370 /* pij(0,1) = pij(1,0) */
10371 tiP[index+1] = tiP[index+6] = (CLFlt) (pi*(1.0 - eV1 - eV2 + (f3*eV4) + (f4*eV5)));
10372 /* pij(0,2) = pij(2,0) */
10373 tiP[index+2] = tiP[index+12] =
10374 (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 - (1.5*eV3) + (0.5*eV4) + (0.5*eV5)));
10375 /* pij(0,3) = pij(2,5) */
10376 tiP[index+3] = tiP[index+17] =
10377 (CLFlt) (pi*(1.0 + (0.5*eV1) + eV2 - (1.5*eV3) - (0.5*eV4) - (0.5*eV5)));
10378 /* pij(0,4) = pij(1,5) */
10379 tiP[index+4] = tiP[index+11] = (CLFlt) (pi*(1.0 - eV1 + eV2 - (f3*eV4) - (f4*eV5)));
10380 /* pij(0,5) */
10381 tiP[index+5] = (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 + (1.5*eV3) - (f1*eV4) - (f2*eV5)));
10382 /* pij(1,1) */
10383 tiP[index+7] = (CLFlt) (pi*(1.0 + (2.0*eV1) + eV2 + eV4 + eV5));
10384 /* pij(1,2) = pij(2,1) */
10385 tiP[index+8] = tiP[index+13] = (CLFlt) (pi*(1.0 - eV1 + eV2 - (f4*eV4) - (f3*eV5)));
10386 /* pij(1,3) = pij(2,4) */
10387 tiP[index+9] = tiP[index+16] = (CLFlt) (pi*(1.0 - eV1 - eV2 + (f4*eV4) + (f3*eV5)));
10388 /* pij(1,4) */
10389 tiP[index+10] = (CLFlt) (pi*(1.0 + (2.0*eV1) - eV2 - eV4 - eV5));
10390 /* pij(2,2) */
10391 tiP[index+14] = (CLFlt) (pi*(1.0 + (0.5*eV1) + eV2 + (1.5*eV3) + (f2*eV4) + (f1*eV5)));
10392 /* pij(2,3) */
10393 tiP[index+15] = (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 + (1.5*eV3) - (f2*eV4) - (f1*eV5)));
10394
10395 /* fill in mirror part of matrix */
10396 index += 18;
10397 index2 = index - 1;
10398 for (i=0; i<18; i++)
10399 tiP[index++] = tiP[index2--];
10400
10401 /* make sure no value is negative */
10402 for (i=index-(nStates*nStates); i<index; i++) {
10403 if (tiP[i] < 0.0)
10404 tiP[i] = (CLFlt) 0.0;
10405 }
10406 # if defined (DEBUG_TIPROBS_STD)
10407 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
10408 # endif
10409 }
10410 # if defined (DEBUG_TIPROBS_STD)
10411 index3 += nStates;
10412 # endif
10413 }
10414 }
10415 else
10416 {
10417 /* unequal state frequencies */
10418 index = 0;
10419
10420 /* first fill in for binary characters using beta categories if needed */
10421 if (m->isTiNeeded[0] == YES)
10422 {
10423 /* find base frequencies */
10424 bs = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
10425
10426 /* cycle through beta and gamma cats */
10427 for (b=0; b<m->numBetaCats; b++)
10428 {
10429 mu = 1.0 / (2.0 * bs[0] * bs[1]);
10430 for (k=0; k<m->numRateCats; k++)
10431 {
10432 /* calculate probabilities */
10433 v = length * catRate[k] * baseRate;
10434 eV1 = exp(- mu * v);
10435 tiP[index++] = (CLFlt) (bs[0] + (bs[1] * eV1));
10436 tiP[index++] = (CLFlt) (bs[1] - (bs[1] * eV1));
10437 tiP[index++] = (CLFlt) (bs[0] - (bs[0] * eV1));
10438 tiP[index++] = (CLFlt) (bs[1] + (bs[0] * eV1));
10439 }
10440 /* update stationary state frequency pointer */
10441 bs += 2;
10442 }
10443 }
10444
10445 /* now use general algorithm for the other cases */
10446 if (m->cijkLength > 0)
10447 {
10448 /* first update cijk if necessary */
10449 if (m->cijkLength > 0 && m->upDateCijk == YES)
10450 {
10451 if (UpDateCijk (division, chain) == ERROR)
10452 return (ERROR);
10453 }
10454
10455 /* then get first set of eigenvalues */
10456 eigenValues = m->cijks[m->cijkIndex[chain]];
10457
10458 /* and cycle through the relevant characters */
10459 for (c=0; c<m->stateFreq->nSympi; c++)
10460 {
10461 n = m->stateFreq->sympinStates[c];
10462
10463 /* fill in values */
10464 for (k=0; k<m->numRateCats; k++)
10465 {
10466 v = length * baseRate * catRate[k];
10467 cijk = eigenValues + (2 * n);
10468
10469 for (i=0; i<n; i++)
10470 {
10471 for (j=0; j<n; j++)
10472 {
10473 sum = 0.0;
10474 for (s=0; s<n; s++)
10475 sum += (*cijk++) * exp(eigenValues[s] * v);
10476 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
10477 }
10478 }
10479 }
10480
10481 /* update eigenValues pointer */
10482 eigenValues += (n * n * n) + (2 * n);
10483 }
10484 }
10485 }
10486
10487 return NO_ERROR;
10488 }
10489
10490
UpDateCijk(int whichPart,int whichChain)10491 int UpDateCijk (int whichPart, int whichChain)
10492 {
10493 int c, i, j, k, n, n3, isComplex, sizeOfSingleCijk, cType, numQAllocated;
10494 MrBFlt **q[100], **eigvecs, **inverseEigvecs;
10495 MrBFlt *eigenValues, *eigvalsImag, *cijk;
10496 MrBFlt *bs, *bsBase, *rateOmegaValues=NULL, rA=0.0, rS=0.0, posScaler, *omegaCatFreq=NULL;
10497 MrBComplex **Ceigvecs, **CinverseEigvecs;
10498 ModelInfo *m;
10499 Param *p;
10500 # if defined (BEAGLE_ENABLED)
10501 int u, divisionOffset;
10502 double *beagleEigvecs=NULL, *beagleInverseEigvecs=NULL;
10503 # endif
10504
10505 /* get a pointer to the model settings for this partition */
10506 m = &modelSettings[whichPart];
10507 assert (m->upDateCijk == YES);
10508
10509 /* we should only go through here if we have cijk information available for the partition */
10510 if (m->cijkLength > 0)
10511 {
10512 /* flip cijk space */
10513 FlipCijkSpace(m, whichChain);
10514
10515 /* figure out information on either omega values or rate values, if necessary */
10516 if (m->dataType == DNA || m->dataType == RNA)
10517 {
10518 if (m->nucModelId == NUCMODEL_CODON) /* we have a NY98 model */
10519 {
10520 rateOmegaValues = GetParamVals(m->omega, whichChain, state[whichChain]);
10521 if (m->numOmegaCats > 1)
10522 omegaCatFreq = GetParamSubVals (m->omega, whichChain, state[whichChain]);
10523 }
10524 else if (m->nCijkParts > 1 && m->nucModelId == NUCMODEL_4BY4 && m->numModelStates == 8)
10525 {
10526 /* we have a covarion (covariotide) model with rate variation */
10527 if (m->shape != NULL)
10528 rateOmegaValues = GetParamSubVals (m->shape, whichChain, state[whichChain]);
10529 else if (m->mixtureRates != NULL)
10530 rateOmegaValues = GetParamSubVals (m->mixtureRates, whichChain, state[whichChain]);
10531 }
10532 }
10533 else if (m->dataType == PROTEIN)
10534 {
10535 if (m->nCijkParts > 1)
10536 {
10537 /* we have a covarion model with rate variation */
10538 if (m->shape != NULL)
10539 rateOmegaValues = GetParamSubVals (m->shape, whichChain, state[whichChain]);
10540 else if (m->mixtureRates != NULL)
10541 rateOmegaValues = GetParamSubVals (m->mixtureRates, whichChain, state[whichChain]);
10542 }
10543 }
10544 # if defined (BEAGLE_ENABLED)
10545 else if (m->dataType == RESTRICTION){}
10546 # endif
10547 else if (m->dataType != STANDARD)
10548 {
10549 MrBayesPrint ("%s ERROR: Should not be updating cijks!\n", spacer);
10550 return (ERROR);
10551 }
10552
10553 if (m->dataType == STANDARD)
10554 {
10555 /* set pointers and other stuff needed */
10556 numQAllocated = 1;
10557 p = m->stateFreq;
10558 eigenValues = m->cijks[m->cijkIndex[whichChain]];
10559 q[0] = AllocateSquareDoubleMatrix (10);
10560 eigvecs = AllocateSquareDoubleMatrix (10);
10561 inverseEigvecs = AllocateSquareDoubleMatrix (10);
10562 Ceigvecs = AllocateSquareComplexMatrix (10);
10563 CinverseEigvecs = AllocateSquareComplexMatrix (10);
10564 bsBase = GetParamStdStateFreqs (m->stateFreq, whichChain, state[whichChain]);
10565
10566 /* cycle over characters needing cijks */
10567 for (c=0; c<p->nSympi; c++)
10568 {
10569 n = p->sympinStates[c];
10570 bs = bsBase + p->sympiBsIndex[c];
10571 cType = p->sympiCType[c];
10572 n3 = n * n * n;
10573 eigvalsImag = eigenValues + n;
10574 cijk = eigenValues + (2 * n);
10575 if (SetStdQMatrix (q[0], n, bs, cType) == ERROR)
10576 return (ERROR);
10577 isComplex = GetEigens (n, q[0], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
10578 if (isComplex == NO)
10579 {
10580 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
10581 }
10582 else
10583 {
10584 if (isComplex == YES)
10585 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
10586 else
10587 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
10588 goto errorExit;
10589 }
10590 eigenValues += (n3 + (2 * n));
10591 }
10592 }
10593 else
10594 {
10595 /* all other data types */
10596 numQAllocated = m->nCijkParts;
10597 sizeOfSingleCijk = m->cijkLength / m->nCijkParts;
10598 n = m->numModelStates;
10599 # if defined (BEAGLE_ENABLED)
10600 if (m->useBeagle == YES)
10601 eigenValues = m->cijks[m->cijkIndex[whichChain]/m->nCijkParts];
10602 else
10603 eigenValues = m->cijks[m->cijkIndex[whichChain]];
10604 # else
10605 eigenValues = m->cijks[m->cijkIndex[whichChain]];
10606 # endif
10607 eigvalsImag = eigenValues + n;
10608 cijk = eigenValues + (2 * n);
10609 for (k=0; k<numQAllocated; k++)
10610 q[k] = AllocateSquareDoubleMatrix (n);
10611 eigvecs = AllocateSquareDoubleMatrix (n);
10612 inverseEigvecs = AllocateSquareDoubleMatrix (n);
10613 Ceigvecs = AllocateSquareComplexMatrix (n);
10614 CinverseEigvecs = AllocateSquareComplexMatrix (n);
10615
10616 if (m->nCijkParts == 1)
10617 {
10618 if (m->dataType == DNA || m->dataType == RNA)
10619 {
10620 if (m->nucModelId == NUCMODEL_CODON)
10621 {
10622 if (SetNucQMatrix (q[0], n, whichChain, whichPart, rateOmegaValues[0], &rA, &rS) == ERROR)
10623 goto errorExit;
10624 }
10625 else
10626 {
10627 if (SetNucQMatrix (q[0], n, whichChain, whichPart, 1.0, &rA, &rS) == ERROR)
10628 goto errorExit;
10629 }
10630 }
10631 # if defined (BEAGLE_ENABLED)
10632 else if (m->dataType == RESTRICTION)
10633 {
10634 SetBinaryQMatrix (q[0], whichChain, whichPart);
10635 }
10636 # endif
10637 else
10638 {
10639 if (SetProteinQMatrix (q[0], n, whichChain, whichPart, 1.0) == ERROR)
10640 goto errorExit;
10641 }
10642 isComplex = GetEigens (n, q[0], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
10643 # if defined (BEAGLE_ENABLED)
10644 if (isComplex == YES)
10645 {
10646 if (isComplex == YES)
10647 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
10648 else
10649 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
10650 goto errorExit;
10651 }
10652 if (m->useBeagle == YES)
10653 {
10654 /* TODO: only allocate this space once at initialization */
10655 beagleEigvecs = (double*) SafeCalloc (2*n*n, sizeof(double));
10656 beagleInverseEigvecs = beagleEigvecs + n*n;
10657 for (i=k=0; i<n; i++)
10658 {
10659 // eigenValues[i] = 0.1;
10660 for (j=0; j<n; j++)
10661 {
10662 beagleEigvecs[k] = eigvecs[i][j];
10663 beagleInverseEigvecs[k] = inverseEigvecs[i][j];
10664 k++;
10665 }
10666 }
10667 divisionOffset = 0;
10668 if (m->useBeagleMultiPartitions == YES)
10669 divisionOffset = (numLocalChains + 1) * m->nCijkParts * m->divisionIndex;
10670 beagleSetEigenDecomposition(m->beagleInstance,
10671 m->cijkIndex[whichChain] + divisionOffset,
10672 beagleEigvecs,
10673 beagleInverseEigvecs,
10674 eigenValues);
10675 free(beagleEigvecs);
10676 }
10677 else
10678 {
10679 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
10680 }
10681 # else
10682 if (isComplex == NO)
10683 {
10684 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
10685 }
10686 else
10687 {
10688 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
10689 goto errorExit;
10690 }
10691 # endif
10692 }
10693 else
10694 {
10695 /* Here, we calculate the rate matrices (Q) for various nucleotide and amino acid
10696 data models. Usually, when the rate matrix is set in SetNucQMatrix, it is scaled
10697 such that the average substitution rate is one. However, there is a complication
10698 for positive selection models using codon rate matrices. First, we have more than
10699 one matrix; in fact, we have as many rate matrices as there are omega values. Second,
10700 the mean substitution rate still has to be one. And third, we want the synonymous
10701 rate to be the same across the rate matrices. For positive selection models, the Q
10702 matrix comes out of SetNucQMatrix unscaled. Once we have all m->nCijkParts rate
10703 matrices, we then scale again, this time to ensure that the mean substitution rate is one. */
10704
10705 /* First, calculate rate matrices for each category: */
10706 posScaler = 0.0;
10707 for (k=0; k<m->nCijkParts; k++)
10708 {
10709 if (m->dataType == DNA || m->dataType == RNA)
10710 {
10711 if (SetNucQMatrix (q[k], n, whichChain, whichPart, rateOmegaValues[k], &rA, &rS) == ERROR)
10712 goto errorExit;
10713 }
10714 else
10715 {
10716 if (SetProteinQMatrix (q[k], n, whichChain, whichPart, rateOmegaValues[k]) == ERROR)
10717 goto errorExit;
10718 }
10719 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
10720 posScaler += omegaCatFreq[k] * (rS + rA);
10721 }
10722
10723 /* Then rescale the rate matrices, if this is a positive selection model: */
10724 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
10725 {
10726 posScaler = 1.0 / posScaler;
10727 for (k=0; k<m->nCijkParts; k++)
10728 {
10729 for (i=0; i<n; i++)
10730 for (j=0; j<n; j++)
10731 q[k][i][j] *= posScaler;
10732 }
10733 }
10734
10735 /* Finally, calculate eigenvalues, etc.: */
10736 # if defined (BEAGLE_ENABLED)
10737 if (m->useBeagle == YES)
10738 {
10739 /* TODO: only allocate this space once at initialization */
10740 beagleEigvecs = (double*) SafeCalloc (2*n*n, sizeof(double));
10741 beagleInverseEigvecs = beagleEigvecs + n*n;
10742 }
10743 # endif
10744 for (k=0; k<m->nCijkParts; k++)
10745 {
10746 isComplex = GetEigens (n, q[k], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
10747 # if defined (BEAGLE_ENABLED)
10748 if (isComplex == YES)
10749 {
10750 if (isComplex == YES)
10751 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
10752 else
10753 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
10754 goto errorExit;
10755 }
10756 if (m->useBeagle == YES)
10757 {
10758 for (i=u=0; i<n; i++)
10759 {
10760 for (j=0; j<n; j++)
10761 {
10762 beagleEigvecs[u] = eigvecs[i][j];
10763 beagleInverseEigvecs[u] = inverseEigvecs[i][j];
10764 u++;
10765 }
10766 }
10767 divisionOffset = 0;
10768 if (m->useBeagleMultiPartitions == YES)
10769 divisionOffset = (numLocalChains + 1) * m->nCijkParts * m->divisionIndex;
10770 beagleSetEigenDecomposition(m->beagleInstance,
10771 m->cijkIndex[whichChain] + k + divisionOffset,
10772 beagleEigvecs,
10773 beagleInverseEigvecs,
10774 eigenValues);
10775 }
10776 else
10777 {
10778 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
10779 }
10780 # else
10781 if (isComplex == NO)
10782 {
10783 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
10784 }
10785 else
10786 {
10787 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
10788 goto errorExit;
10789 }
10790 # endif
10791 /* shift pointers */
10792 eigenValues += sizeOfSingleCijk;
10793 eigvalsImag += sizeOfSingleCijk;
10794 cijk += sizeOfSingleCijk;
10795 }
10796 # if defined (BEAGLE_ENABLED)
10797 free(beagleEigvecs);
10798 # endif
10799 }
10800 }
10801
10802 for (k=0; k<numQAllocated; k++)
10803 FreeSquareDoubleMatrix (q[k]);
10804 FreeSquareDoubleMatrix (eigvecs);
10805 FreeSquareDoubleMatrix (inverseEigvecs);
10806 FreeSquareComplexMatrix (Ceigvecs);
10807 FreeSquareComplexMatrix (CinverseEigvecs);
10808 }
10809
10810 return (NO_ERROR);
10811
10812 errorExit:
10813 for (k=0; k<numQAllocated; k++)
10814 FreeSquareDoubleMatrix (q[k]);
10815 FreeSquareDoubleMatrix (eigvecs);
10816 FreeSquareDoubleMatrix (inverseEigvecs);
10817 FreeSquareComplexMatrix (Ceigvecs);
10818 FreeSquareComplexMatrix (CinverseEigvecs);
10819
10820 return ERROR;
10821 }
10822
10823