1#include "binomial.ibf"; 2 3/*___________________________________________________________________________________________________________*/ 4 5function PadString (padLength,padChar) 6{ 7 padString = ""; 8 d=padString*padLength; 9 for (padCounter=0;padCounter<padLength;padCounter=padCounter+1) 10 { 11 d=padString*padChar; 12 } 13 d=padString*0; 14 fprintf (stdout,padString); 15 return padString; 16} 17 18/*___________________________________________________________________________________________________________*/ 19 20function PrintASCIITable (dataMatrix, titleMatrix) 21{ 22 columnWidths = {1,Columns(titleMatrix)}; 23 for (counter1=0; counter1<Columns(titleMatrix); counter1 = counter1+1) 24 { 25 counter2 = Abs (titleMatrix[0][counter1])+2; 26 if (counter2<12) 27 { 28 counter2 = 12; 29 } 30 columnWidths[0][counter1] = counter2; 31 } 32 fprintf (stdout, "\n"); 33 for (counter2 = 0; counter2 < Columns (titleMatrix); counter2 = counter2+1) 34 { 35 fprintf (stdout,"+-"); 36 dummy = PadString (columnWidths[0][counter2],"-"); 37 fprintf (stdout,"-"); 38 } 39 fprintf (stdout,"+\n| "); 40 41 for (counter1=0; counter1<Columns(titleMatrix); counter1 = counter1+1) 42 { 43 fprintf (stdout, titleMatrix[counter1]); 44 dummy = PadString (columnWidths[0][counter1]-Abs(titleMatrix[counter1])," "); 45 fprintf (stdout, " | "); 46 } 47 48 fprintf (stdout, "\n"); 49 50 for (counter1=-1; counter1<Rows(dataMatrix); counter1 = counter1 + 1) 51 { 52 if (counter1>=0) 53 { 54 fprintf (stdout,"| "); 55 fprintf (stdout,Format(counter1+1,columnWidths[0][0],0)); 56 for (counter2 = 1; counter2 < Columns (titleMatrix); counter2 = counter2+1) 57 { 58 fprintf (stdout," | "); 59 fprintf (stdout,Format(dataMatrix[counter1][counter2-1],columnWidths[0][counter2],-1)); 60 } 61 fprintf (stdout," "); 62 fprintf (stdout, "|\n"); 63 } 64 for (counter2 = 0; counter2 < Columns (titleMatrix); counter2 = counter2+1) 65 { 66 fprintf (stdout,"+-"); 67 dummy = PadString (columnWidths[0][counter2],"-"); 68 fprintf (stdout,"-"); 69 } 70 fprintf (stdout, "+\n"); 71 } 72 73 return 1; 74} 75 76/*___________________________________________________________________________________________________________*/ 77 78function mergeSimulatedNulls (nullAVL1, nullAVL2) 79{ 80 nullAVLJoint = {}; 81 82 ccode = Abs(nullAVL1); 83 84 for (counter1 = 0; counter1 < ccode; counter1 = counter1 + 1) 85 { 86 av1 = nullAVL1[counter1]; 87 av2 = nullAVL2[counter1]; 88 avJ = {}; 89 doneKeys = {}; 90 keys1 = Rows(av1); 91 keys2 = Rows(av2); 92 keyC = Columns(keys1); 93 94 for (counter2 = 0; counter2 < keyC; counter2 = counter2 + 1) 95 { 96 key1Value = keys1[counter2]; 97 mx1 = av1[key1Value]; 98 mx2 = av2[key1Value]; 99 if (Abs(mx2)) 100 { 101 /* merge the two matrices */ 102 103 d = Rows(mx1); 104 mx3 = {d,2}; 105 mx3 [0][0] = mx1[0][0] + mx2[0][0]; 106 w1 = mx1[0][0] / mx3 [0][0]; 107 w2 = mx2[0][0] / mx3 [0][0]; 108 109 for (counter3 = 1; counter3 < d; counter3 = counter3 + 1) 110 { 111 mx3[counter3][0] = mx1[counter3][0]; 112 mx3[counter3][1] = mx1[counter3][1]*w1 + mx2[counter3][1]*w2; 113 } 114 avJ [key1Value] = mx3; 115 } 116 else 117 { 118 avJ [key1Value] = mx1; 119 } 120 doneKeys[key1Value] = 1; 121 } 122 123 keyC = Columns(keys2); 124 for (counter2 = 0; counter2 < keyC; counter2 = counter2 + 1) 125 { 126 key1Value = keys2[counter2]; 127 if (doneKeys[key1Value] < 0.5) 128 { 129 avJ [key1Value] = av2[key1Value]; 130 } 131 } 132 133 nullAVLJoint[counter1] = avJ; 134 } 135 136 return nullAVLJoint; 137} 138 139/*___________________________________________________________________________________________________________*/ 140 141nucCharacters="ACGT"; 142function codonString (ccode) 143{ 144 return nucCharacters[ccode$16]+nucCharacters[(ccode%16)$4]+nucCharacters[ccode%4]; 145} 146 147/*___________________________________________________________________________________________________________*/ 148 149function PrintTableToFile (dataMatrix, titleMatrix, promptOrNot) 150{ 151 SetDialogPrompt ("Export tab separated data to:"); 152 153 if (promptOrNot) 154 { 155 fprintf (PROMPT_FOR_FILE, CLEAR_FILE); 156 } 157 158 bufferString = ""; 159 bufferString * 8192; 160 bufferString * titleMatrix[0][0]; 161 162 for (counter1=1; counter1<Columns(titleMatrix); counter1 = counter1+1) 163 { 164 bufferString * ("\t"+titleMatrix[0][counter1]); 165 } 166 167 for (counter1=0; counter1<Rows(dataMatrix); counter1 = counter1 + 1) 168 { 169 bufferString * ("\n"+dataMatrix[counter1][0]); 170 for (counter2 = 1; counter2 < Columns (dataMatrix); counter2 = counter2+1) 171 { 172 bufferString * ("\t"+dataMatrix[counter1][counter2]); 173 } 174 } 175 176 bufferString * 0; 177 fprintf (LAST_FILE_PATH,bufferString); 178 bufferString = 0; 179 180 return 1; 181} 182 183/*----------------------------------------------------------------------------*/ 184 185if (pipeThroughFlag == 0) 186{ 187 ChoiceList (ambChoice, "Treatment of Ambiguities",1,SKIP_NONE, 188 "Averaged","All possible resolutions are considered and averaged.", 189 "Resolved","The most frequent (for that site) resolution is chosen."); 190 191 if (ambChoice<0) 192 { 193 return; 194 } 195 196 if (useCustomCountingBias) 197 { 198 incFileName = "Distances/CodonToolsMain.def"; 199 } 200 else 201 { 202 incFileName = "Distances/CodonTools.def"; 203 } 204 205 ExecuteCommands ("#include \""+incFileName+"\";"); 206 207 ChoiceList (distribChoice, "Test Statistic",1,SKIP_NODE, 208 "Approximate", "Use the approximate extended binomial distribution (fast)", 209 "Simulated Null", "Dynamically generate the null distribution from the data (very slow)."); 210 211 if (distribChoice < 0) 212 { 213 return; 214 } 215} 216 217 218if (distribChoice > 0) 219{ 220 STORE_ROOT_SUPPORT = 1; 221} 222 223DataSet dsA = ReconstructAncestors (lf); 224 225if (distribChoice > 0) 226{ 227 STORE_ROOT_SUPPORT = 0; 228 SUPPORT_MATRIX_LIST = Transpose (SUPPORT_MATRIX_LIST[0]); 229} 230DataSetFilter filteredDataA = CreateFilter (dsA,3,"","",GeneticCodeExclusions); 231 232HarvestFrequencies (observedCEFV,filteredData,3,3,0); 233seqToBranchMap = {stateCharCount,1}; 234 235DataSet dsJoint = Combine(dsA,ds); 236DataSetFilter filteredDataJ = CreateFilter (dsJoint,3,"","",GeneticCodeExclusions); 237 238hShift = 0; 239 240for (k=0; k<64; k=k+1) 241{ 242 if (_Genetic_Code[k]==10) 243 { 244 hShift = hShift+1; 245 } 246 else 247 { 248 seqToBranchMap[k-hShift] = observedCEFV[k]; 249 } 250} 251 252observedCEFV = seqToBranchMap; 253 254 255branchNames = BranchName (givenTree,-1); 256h = Columns (branchNames); 257 258seqToBranchMap = {h, 2}; 259/* maps sequence names to branch order in column 1 260 and the other way around in column 2 */ 261 262for (k=0; k<filteredData.species; k=k+1) 263{ 264 GetString (seqName, filteredData, k); 265 seqToBranchMap[k][0] = -1; 266 for (v=0; v<h; v=v+1) 267 { 268 if (branchNames[v] % seqName) 269 { 270 seqToBranchMap[k][0] = v; 271 seqToBranchMap[v][1] = k; 272 break; 273 } 274 } 275} 276 277seqToBranchMap[filteredData.species][0] = h-1; 278seqToBranchMap[h-1][1] = filteredData.species; 279 280for (k=1; k<filteredDataA.species; k=k+1) 281{ 282 GetString (seqName, filteredDataA, k); 283 seqToBranchMap[filteredData.species+k][0] = -1; 284 for (v=0; v<h; v=v+1) 285 { 286 if (branchNames[v] % seqName) 287 { 288 seqToBranchMap[k+filteredData.species][0] = v; 289 seqToBranchMap[v][1] = k+filteredData.species; 290 break; 291 } 292 } 293} 294 295/*for (k=0; k<Rows(seqToBranchMap); k=k+1) 296{ 297 v = seqToBranchMap[k][0]; 298 fprintf (stdout, "\n", branchNames[v], "=>"); 299 if (k<filteredData.species) 300 { 301 GetString (seqName, filteredData, k); 302 } 303 else 304 { 305 GetString (seqName, filteredDataA, k-filteredData.species); 306 } 307 fprintf (stdout, seqName); 308} 309 310fprintf (stdout, "\n\n"); 311 312for (k=0; k<Rows(seqToBranchMap); k=k+1) 313{ 314 fprintf (stdout, "\n", branchNames[k], "=>"); 315 v = seqToBranchMap[k][1]; 316 if (v<filteredData.species) 317 { 318 GetString (seqName, filteredData, v); 319 } 320 else 321 { 322 GetString (seqName, filteredDataA, v-filteredData.species); 323 } 324 fprintf (stdout, seqName); 325} 326 327fprintf (stdout, "\n\n"); 328 329return 0;*/ 330 331/* total tree length */ 332 333totalTreeLength = 0; 334branchLengths = BranchLength(givenTree,-1); 335 336for (v=Columns(branchLengths)-1; v>=0; v=v-1) 337{ 338 totalTreeLength = totalTreeLength + branchLengths[v]; 339} 340 341/* get codon matrix */ 342 343filteredData.unique_sites = Rows(filteredData.site_freqs) * Columns (filteredData.site_freqs); 344 345codonInfo = {filteredData.species, filteredData.unique_sites}; 346codonInfo2 = {filteredDataA.species, filteredDataA.unique_sites}; 347 348GetDataInfo (dupInfo, filteredData); 349GetDataInfo (dupInfoA, filteredDataA); 350 351matrixTrick = {1,stateCharCount}; 352matrixTrick2 = {1,stateCharCount}; 353matrixTrick = matrixTrick["_MATRIX_ELEMENT_COLUMN_"]; 354matrixTrick2 = matrixTrick2["1"]; 355 356for (v=0; v<filteredData.unique_sites;v=v+1) 357{ 358 for (h=0; h<filteredData.species;h=h+1) 359 { 360 GetDataInfo (siteInfo, filteredData, h, v); 361 _SITE_ES_COUNT = matrixTrick2 * siteInfo; 362 if (_SITE_ES_COUNT[0] == 1) 363 { 364 siteInfo = matrixTrick * siteInfo; 365 codonInfo[h][v] = siteInfo[0]; 366 } 367 else 368 { 369 codonInfo[h][v] = -1; 370 } 371 } 372} 373 374for (v=0; v<filteredDataA.unique_sites;v=v+1) 375{ 376 for (h=0; h<filteredDataA.species;h=h+1) 377 { 378 GetDataInfo (siteInfo, filteredDataA, h, v); 379 if ((matrixTrick2*siteInfo)[0] > 1) 380 { 381 codonInfo2[h][v] = -1; 382 } 383 else 384 { 385 codonInfo2[h][v] = (matrixTrick * siteInfo)[0]; 386 } 387 } 388} 389 390_SITE_RESULTS = {4,filteredData.sites}; 391flatTreeRep = Abs (givenTree); 392 393resultMatrix = {filteredData.sites,12}; 394perBranchMatrix = {Columns(branchNames),2}; 395 396 397 398if (distribChoice) 399{ 400 dNdS = 1; 401 branchLengthsNeutral = BranchLength(codonTree,-1); 402 branchNamesList = BranchName (codonTree,-1); 403 404 ClearConstraints (codonTree); 405 /*fprintf (stdout, "\nTree before: ", Format (codonTree,1,1),"\n");*/ 406 407 for (v=Columns(branchLengthsNeutral)-1; v>=0; v=v-1) 408 { 409 ExecuteCommands ("codonTree."+branchNamesList[v]+".synRate = codonTree."+branchNamesList[v]+".synRate*branchLengths[v]/branchLengthsNeutral[v];"); 410 } 411 412 /*fprintf (stdout, "\nTree after: ", Format (codonTree,1,1),"\n");*/ 413 414 fprintf (stdout, "Simulating the null distribution for average branch lengths...\n"); 415 416 GetNeutralNull (simulatedNullAVL1, lf, _OBSERVED_S_, _OBSERVED_NS_, 33333); 417 418 for (v=Columns(branchLengthsNeutral)-1; v>=0; v=v-1) 419 { 420 ExecuteCommands ("codonTree."+branchNamesList[v]+".synRate = codonTree."+branchNamesList[v]+".synRate*0.125"); 421 } 422 423 fprintf (stdout, "Simulating the null distribution for short branch lengths...\n"); 424 GetNeutralNull (simulatedNullAVL2, lf, _OBSERVED_S_, _OBSERVED_NS_, 33333); 425 426 simulatedNullAVL = mergeSimulatedNulls(simulatedNullAVL1,simulatedNullAVL2); 427 428 for (v=Columns(branchLengthsNeutral)-1; v>=0; v=v-1) 429 { 430 ExecuteCommands ("codonTree."+branchNamesList[v]+".synRate = codonTree."+branchNamesList[v]+".synRate*32"); 431 } 432 433 fprintf (stdout, "Simulating the null distribution for long branch lengths...\n"); 434 GetNeutralNull (simulatedNullAVL3, lf, _OBSERVED_S_, _OBSERVED_NS_, 33333); 435 simulatedNullAVL = mergeSimulatedNulls(simulatedNullAVL,simulatedNullAVL3); 436} 437 438for (v=0; v<filteredData.sites;v=v+1) 439{ 440 _SITE_ES_COUNT = {stateCharCount,stateCharCount}; 441 _SITE_EN_COUNT = {stateCharCount,stateCharCount}; 442 _SITE_OS_COUNT = {stateCharCount,stateCharCount}; 443 _SITE_ON_COUNT = {stateCharCount,stateCharCount}; 444 445 /* do internal nodes first */ 446 447 k = filteredData.species+1; 448 449 /* first sequence is always the root */ 450 c1 = dupInfoA[v]; 451 if (codonInfo2[1][c1] >= stateCharCount) 452 { 453 continue; 454 } 455 for (h=1; h<filteredDataA.species; h=h+1) 456 { 457 p1 = seqToBranchMap[k][0]; 458 p2 = flatTreeRep[p1]; 459 p2 = seqToBranchMap[p2][1]-filteredData.species; 460 461 branchFactor = branchLengths[p1]/totalTreeLength; 462 463 cd1 = codonInfo2[h] [c1]; 464 cd2 = codonInfo2[p2][c1]; 465 466 if (cd1 >= 0 && cd2 >= 0) 467 { 468 _SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1; 469 _SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1; 470 _SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor; 471 _SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor; 472 473 if (_PAIRWISE_S_[cd1][cd2]) 474 { 475 perBranchMatrix[p1][0] = perBranchMatrix[p1][0]+_OBSERVED_S_[cd1][cd2]/_PAIRWISE_S_[cd1][cd2]; 476 } 477 perBranchMatrix[p1][1] = perBranchMatrix[p1][1]+_OBSERVED_NS_[cd1][cd2]/_PAIRWISE_NS_[cd1][cd2]; 478 } 479 k=k+1; 480 } 481 482 /* now do the leaves */ 483 484 observedCEFV = {{0}}; 485 486 c2 = dupInfo[v]; 487 for (h=0; h<filteredData.species; h=h+1) 488 { 489 p1 = seqToBranchMap[h][0]; 490 p2 = flatTreeRep[p1]; 491 p2 = seqToBranchMap[p2][1]-filteredData.species; 492 493 cd2 = codonInfo2[p2][c1]; 494 cd1 = codonInfo[h] [c2]; 495 496 branchFactor = branchLengths[p1]/totalTreeLength; 497 498 if (cd1>=0) 499 /* no ambiguities */ 500 { 501 /*p2 = flatTreeRep[p1]; 502 fprintf (stdout, "Change from ", codonString (cd1), " to ", codonString (cd2), " along ", branchNames[p1], " and ", branchNames[p2], "(",branchFactor,")\n"); 503 */ 504 _SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1; 505 _SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1; 506 _SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor; 507 _SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor; 508 509 if (_PAIRWISE_S_[cd1][cd2]) 510 { 511 perBranchMatrix[p1][0] = perBranchMatrix[p1][0]+_OBSERVED_S_[cd1][cd2]/_PAIRWISE_S_[cd1][cd2]; 512 } 513 perBranchMatrix[p1][1] = perBranchMatrix[p1][1]+_OBSERVED_NS_[cd1][cd2]/_PAIRWISE_NS_[cd1][cd2]; 514 } 515 else 516 /* ambiguities here */ 517 { 518 GetDataInfo (ambInfo, filteredData, h, c2); 519 if (Rows(observedCEFV) == 1) 520 { 521 siteFilter = ""+(v*3)+"-"+(v*3+2); 522 DataSetFilter filteredDataSite = CreateFilter (dsJoint,3,siteFilter,"",GeneticCodeExclusions); 523 HarvestFrequencies (observedCEFV,filteredDataSite,3,3,0); 524 tempMx = {stateCharCount,1}; 525 526 hShift = 0; 527 528 for (k=0; k<64; k=k+1) 529 { 530 if (_Genetic_Code[k]==10) 531 { 532 hShift = hShift+1; 533 } 534 else 535 { 536 tempMx[k-hShift] = observedCEFV[k]; 537 } 538 } 539 observedCEFV = tempMx; 540 } 541 542 weightFactor = matrixTrick2*ambInfo; 543 if (weightFactor[0]<stateCharCount) 544 { 545 ambInfo = ambInfo$observedCEFV; 546 547 if (ambChoice) 548 { 549 weightFactor = 0; 550 tempMx = -1; 551 for (k=0; k<stateCharCount; k=k+1) 552 { 553 if (ambInfo[k]>weightFactor) 554 { 555 weightFactor = ambInfo[k]; 556 tempMx = k; 557 } 558 } 559 if (tempMx>=0) 560 { 561 _SITE_OS_COUNT[tempMx][cd2] = _SITE_OS_COUNT[tempMx][cd2] + 1; 562 _SITE_ON_COUNT[tempMx][cd2] = _SITE_ON_COUNT[tempMx][cd2] + 1 ; 563 _SITE_ES_COUNT[tempMx][cd2] = _SITE_ES_COUNT[tempMx][cd2] + branchFactor; 564 _SITE_EN_COUNT[tempMx][cd2] = _SITE_EN_COUNT[tempMx][cd2] + branchFactor; 565 if (_PAIRWISE_S_[tempMx][cd2]) 566 { 567 perBranchMatrix[p1][0] = perBranchMatrix[p1][0]+_OBSERVED_S_[tempMx][cd2]/_PAIRWISE_S_[tempMx][cd2]; 568 } 569 perBranchMatrix[p1][1] = perBranchMatrix[p1][1]+_OBSERVED_NS_[tempMx][cd2]/_PAIRWISE_NS_[tempMx][cd2]; 570 } 571 } 572 else 573 { 574 weightFactor = matrixTrick2*ambInfo; 575 weightFactor = weightFactor[0]; 576 if (weightFactor) 577 { 578 ambInfo = ambInfo * (1/weightFactor); 579 for (k=0; k<stateCharCount; k=k+1) 580 { 581 weightFactor = ambInfo[k]; 582 if (weightFactor>0) 583 { 584 _SITE_OS_COUNT[k][cd2] = _SITE_OS_COUNT[k][cd2] + weightFactor; 585 _SITE_ON_COUNT[k][cd2] = _SITE_ON_COUNT[k][cd2] + weightFactor; 586 _SITE_ES_COUNT[k][cd2] = _SITE_ES_COUNT[k][cd2] + weightFactor*branchFactor; 587 _SITE_EN_COUNT[k][cd2] = _SITE_EN_COUNT[k][cd2] + weightFactor*branchFactor; 588 589 if (_PAIRWISE_S_[k][cd2]) 590 { 591 perBranchMatrix[p1][0] = perBranchMatrix[p1][0]+weightFactor*_OBSERVED_S_[k][cd2]/_PAIRWISE_S_[k][cd2]; 592 } 593 perBranchMatrix[p1][1] = perBranchMatrix[p1][1]+weightFactor*_OBSERVED_NS_[k][cd2]/_PAIRWISE_NS_[k][cd2]; 594 } 595 } 596 } 597 } 598 } 599 } 600 } 601 602 _SITE_OS_COUNT = matrixTrick2*(_OBSERVED_S_$_SITE_OS_COUNT)*Transpose(matrixTrick2); 603 _SITE_ON_COUNT = matrixTrick2*(_OBSERVED_NS_$_SITE_ON_COUNT)*Transpose(matrixTrick2); 604 _SITE_ES_COUNT = matrixTrick2*(_PAIRWISE_S_$_SITE_ES_COUNT)*Transpose(matrixTrick2); 605 _SITE_EN_COUNT = matrixTrick2*(_PAIRWISE_NS_$_SITE_EN_COUNT)*Transpose(matrixTrick2); 606 607 resultMatrix[v][0] = _SITE_OS_COUNT[0]; 608 resultMatrix[v][1] = _SITE_ON_COUNT[0]; 609 resultMatrix[v][2] = _SITE_ES_COUNT[0]; 610 resultMatrix[v][3] = _SITE_EN_COUNT[0]; 611 612 weight = _SITE_EN_COUNT[0]+_SITE_ES_COUNT[0]; 613 614 p = _SITE_ES_COUNT[0]/weight; 615 616 resultMatrix[v][5] = p; 617 618 p2 = resultMatrix[v][0]+resultMatrix[v][1]; 619 620 resultMatrix[v][7] = resultMatrix[v][1]/resultMatrix[v][3]; 621 622 if (resultMatrix[v][2]) 623 { 624 resultMatrix[v][6] = resultMatrix[v][0]/resultMatrix[v][2]; 625 resultMatrix[v][8] = resultMatrix[v][7]-resultMatrix[v][6]; 626 resultMatrix[v][11] = resultMatrix[v][8]/totalTreeLength; 627 } 628 629 if (p2) 630 { 631 resultMatrix[v][4] = resultMatrix[v][0]/p2; 632 if (distribChoice) 633 { 634 p3 = p2$1; 635 p4 = ((resultMatrix[v][0]*6)+0.5)$1; 636 637 waP = 0; 638 waN = 0; 639 640 if (p3 != p2) 641 { 642 /* use averaging */ 643 fprintf (stdout, "Interpolating at site ", v, ": ", p3, " ", p3+1, " ", p2, "\n"); 644 645 w1 = p2-p3; 646 w2 = 1-w1; 647 648 for (kk2 = 0; kk2 < stateCharCount; kk2 = kk2+1) 649 { 650 ambInfo = SUPPORT_MATRIX_LIST[kk2][v]; 651 if (ambInfo>0.000001) 652 { 653 subMatrixAVL = simulatedNullAVL[kk2]; 654 subMatrixAVL1 = subMatrixAVL[p3]; 655 subMatrixAVL2 = subMatrixAVL[p3+1]; 656 if (Abs(subMatrixAVL1) && Abs(subMatrixAVL2)) 657 { 658 pv1 = subMatrixAVL1[p4+1][1]; 659 pv2 = subMatrixAVL2[p4+1][1]; 660 if (subMatrixAVL1[0][0] >= 100 && subMatrixAVL2[0][0] >= 100) 661 { 662 waP = waP + (pv1*w1+pv2*w2)*ambInfo; 663 if (p4 == 0) 664 { 665 waN = waN + ambInfo; 666 } 667 else 668 { 669 waN = waN + (1 + w1*(pv1 - 2*subMatrixAVL1[p4][1]) + w2*(pv2 - 2*subMatrixAVL2[p4][1])) * ambInfo; 670 } 671 } 672 else 673 { 674 fprintf (stdout, "Bailing at site ", v, " codon ", kk2, " sim count ", subMatrixAVL[0][0], "\n"); 675 break; 676 } 677 } 678 else 679 { 680 fprintf (stdout, "Bailing at site ", v, " codon ", kk2 , " sim count 0 \n"); 681 break; 682 } 683 } 684 } 685 } 686 else 687 { 688 for (kk2 = 0; kk2 < stateCharCount; kk2 = kk2+1) 689 { 690 ambInfo = SUPPORT_MATRIX_LIST[kk2][v]; 691 if (ambInfo>0.000001) 692 { 693 subMatrixAVL = simulatedNullAVL[kk2]; 694 subMatrixAVL = subMatrixAVL[p3]; 695 if (Abs(subMatrixAVL)) 696 { 697 if (subMatrixAVL[0][0] >= 100) 698 { 699 waP = waP + subMatrixAVL[p4+1][1]*ambInfo; 700 if (p4 == 0) 701 { 702 waN = waN + ambInfo; 703 } 704 else 705 { 706 waN = waN + (1 + subMatrixAVL[p4+1][1] - 2*subMatrixAVL[p4][1]) * ambInfo; 707 } 708 } 709 else 710 { 711 fprintf (stdout, "Bailing at site ", v, " codon ", kk2, " sim count ", subMatrixAVL[0][0], "\n"); 712 break; 713 } 714 } 715 else 716 { 717 fprintf (stdout, "Bailing at site ", v, " codon ", kk2 , " sim count 0 \n"); 718 break; 719 } 720 } 721 } 722 } 723 724 725 726 if (kk2 == stateCharCount) 727 { 728 resultMatrix[v][9] = waP; 729 resultMatrix[v][10] = waN; 730 continue; 731 } 732 } 733 resultMatrix[v][9] = extendedBinTail (p2,p,resultMatrix[v][0]); 734 if (resultMatrix[v][0]>=1) 735 { 736 resultMatrix[v][10] = 1-extendedBinTail(p2,p,resultMatrix[v][0]-1); 737 } 738 else 739 { 740 resultMatrix[v][10] = 1-extendedBinTail (p2,p,0); 741 } 742 } 743} 744 745perBranchMatrix = perBranchMatrix * (1/filteredData.sites); 746if (pipeThroughFlag == 0) 747{ 748 749 labelMatrix = {1,12}; 750 labelMatrix[0] = "Observed S Changes"; 751 labelMatrix[1] = "Observed NS Changes"; 752 labelMatrix[2] = "E[S Sites]"; 753 labelMatrix[3] = "E[NS Sites]"; 754 labelMatrix[4] = "Observed S. Prop."; 755 labelMatrix[5] = "P{S}"; 756 labelMatrix[6] = "dS"; 757 labelMatrix[7] = "dN"; 758 labelMatrix[8] = "dN-dS"; 759 labelMatrix[9] = "P{S leq. observed}"; 760 labelMatrix[10] = "P{S geq. observed}"; 761 labelMatrix[11] = "Scaled dN-dS"; 762 763 sigLevel = -1; 764 765 fprintf (stdout, "\nTotal codon tree length (subs/nuc/unit time): ", Format (totalTreeLength,10,5), "\n"); 766 767 while ((sigLevel<=0)||(sigLevel>=1)) 768 { 769 fprintf (stdout, "\nSignificance level for a site to be classified as positively/negatively selected?"); 770 fscanf (stdin, "Number", sigLevel); 771 } 772 773 posSelected = 0; 774 negSelected = 0; 775 776 p = Rows(resultMatrix); 777 778 for (p2=0; p2<p; p2=p2+1) 779 { 780 v = resultMatrix [p2][8]; 781 if (v>0) 782 { 783 if (resultMatrix [p2][9] < sigLevel) 784 { 785 posSelected = posSelected+1; 786 } 787 } 788 else 789 { 790 if (v<0) 791 { 792 if (resultMatrix [p2][10] < sigLevel) 793 { 794 negSelected = negSelected+1; 795 } 796 } 797 } 798 } 799 800 selLabelMatrix = {{"Index","Site Index","dN-dS","p-value"}}; 801 802 if (posSelected) 803 { 804 psMatrix = {posSelected, 3}; 805 h = 0; 806 for (p2=0; p2<p; p2=p2+1) 807 { 808 v = resultMatrix [p2][8]; 809 if (v>0) 810 { 811 if (resultMatrix [p2][9] < sigLevel) 812 { 813 psMatrix[h][0] = p2+1; 814 psMatrix[h][1] = v; 815 psMatrix[h][2] = resultMatrix [p2][9]; 816 h = h+1; 817 } 818 } 819 } 820 821 fprintf (stdout,"\n******* FOUND ", posSelected, " POSITIVELY SELECTED SITES ********\n\n"); 822 dummy = PrintASCIITable (psMatrix, selLabelMatrix); 823 } 824 else 825 { 826 fprintf (stdout,"\n******* FOUND NO POSITIVELY SELECTED SITES ********\n\n"); 827 } 828 829 if (negSelected) 830 { 831 psMatrix = {negSelected, 3}; 832 h = 0; 833 for (p2=0; p2<p; p2=p2+1) 834 { 835 v = resultMatrix [p2][8]; 836 if (v<0) 837 { 838 if (resultMatrix [p2][10] < sigLevel) 839 { 840 psMatrix[h][0] = p2+1; 841 psMatrix[h][1] = v; 842 psMatrix[h][2] = resultMatrix [p2][10]; 843 h = h+1; 844 } 845 } 846 } 847 848 fprintf (stdout,"\n******* FOUND ", negSelected, " NEGATIVELY SELECTED SITES ********\n\n"); 849 dummy = PrintASCIITable (psMatrix, selLabelMatrix); 850 } 851 else 852 { 853 fprintf (stdout,"\n******* FOUND NO NEGATIVELY SELECTED SITES ********\n\n"); 854 } 855 856 ChoiceList (outputChoice, "Output Options",1,SKIP_NONE, 857 "ASCII Table", "Output is printed to the console as an ASCII table.", 858 "Export to File","Output is spooled to a tab separated file.", 859 "Chart","A HYPHY chart window is displayed (Mac/Doze Only)."); 860 if (outputChoice<0) 861 { 862 return; 863 } 864 865 if (outputChoice==0) 866 { 867 dummy = PrintASCIITable (resultMatrix, labelMatrix); 868 } 869 else 870 { 871 if (outputChoice == 1) 872 { 873 SetDialogPrompt ("Save result table to"); 874 dummy = PrintTableToFile (resultMatrix, labelMatrix, !pipeThroughFlag); 875 } 876 else 877 { 878 OpenWindow (CHARTWINDOW,{{"Rates by sites"} 879 {"labelMatrix"} 880 {"resultMatrix"} 881 {"Contrast Bars"} 882 {"Index"} 883 {labelMatrix[6]+";"+labelMatrix[7]} 884 {"Site Index"} 885 {"dN"} 886 {"dS"} 887 {"0"} 888 {""} 889 {"-1;-1"} 890 {"10;1.309;0.785398"} 891 {"Times:12:0;Times:10:0;Times:12:2"} 892 {"0;0;16777215;1644825;0;0;6579300;11842740;13158600;14474460;0;3947580;16777215;5000268;6845928;16771158;2984993;9199669;7018159;1460610;16748822;11184810;14173291"} 893 {"16,0,0"} 894 }, 895 "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-50)/2;50;50"); 896 } 897 } 898 899 defString = ""; 900 defString * 8192; 901 for (h=Rows(perBranchMatrix)-2; h>=0; h=h-1) 902 { 903 defString * ("_branchScaledTree."+branchNames[h]+".dS = "+perBranchMatrix[h][0]); 904 defString * (";_branchScaledTree."+branchNames[h]+".dN = "+perBranchMatrix[h][1]+";"); 905 if (perBranchMatrix[h][1]+perBranchMatrix[h][0]) 906 { 907 defString * ("_branchScaledTree."+branchNames[h]+".dNdS = "+Min(perBranchMatrix[h][1]/perBranchMatrix[h][0],5)+";"); 908 } 909 else 910 { 911 defString * ("_branchScaledTree."+branchNames[h]+".dNdS = 0;"); 912 } 913 } 914 defString * 0; 915 ExecuteCommands (defString); 916 UseModel (USE_NO_MODEL); 917 REPLACE_TREE_STRUCTURE = 1; 918 Tree _branchScaledTree = ""+givenTree; 919 REPLACE_TREE_STRUCTURE = 0; 920 OpenWindow (TREEWINDOW,{{"_branchScaledTree"}{"8211"}{""}{"dNdS"}},"SCREEN_WIDTH/2-30;SCREEN_HEIGHT-120;100;100"); 921} 922