1ModelMatrixDimension = 0; 2 3baselineOutput = ""; 4 5/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 6 7function computeExpSubWeights (dum) 8{ 9 rateTypeWeights = {6,2}; 10 rateTypeWeightsGY94 = {6,2}; 11 12 hshift = 0; 13 14 for (h=0; h<64; h=h+1) 15 { 16 if (_Genetic_Code[h]==10) 17 { 18 continue; 19 } 20 for (h=0; h<64; h=h+1) 21 { 22 if (_Genetic_Code[h]==10) 23 { 24 hshift = hshift+1; 25 continue; 26 } 27 vshift = hshift; 28 for (v = h+1; v<64; v=v+1) 29 { 30 diff = v-h; 31 if (_Genetic_Code[v]==10) 32 { 33 vshift = vshift+1; 34 continue; 35 } 36 nucPosInCodon = 2; 37 if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) 38 { 39 if (h$4==v$4) 40 { 41 transition = v%4; 42 transition2= h%4; 43 } 44 else 45 { 46 if(diff%16==0) 47 { 48 transition = v$16; 49 transition2= h$16; 50 nucPosInCodon = 0; 51 } 52 else 53 { 54 transition = v%16$4; 55 transition2= h%16$4; 56 nucPosInCodon = 1; 57 } 58 } 59 60 mxIndex = 0; 61 if (transition<transition2) 62 { 63 t1 = transition; 64 t2 = transition2; 65 } 66 else 67 { 68 t1 = transition2; 69 t2 = transition; 70 } 71 72 if (t1 == 0) 73 { 74 mxIndex = t2-1; 75 } 76 else 77 { 78 if (t1 == 1) 79 { 80 mxIndex = 1+t2; 81 } 82 else 83 { 84 mxIndex = 5; 85 } 86 } 87 88 t1 = (_Genetic_Code[0][h]!=_Genetic_Code[0][v]); 89 rateTypeWeights[mxIndex][t1] = rateTypeWeights[mxIndex][t1]+ 90 vectorOfFrequencies[h-hshift]*observedFreq[transition][nucPosInCodon]+ 91 vectorOfFrequencies[v-vshift]*observedFreq[transition2][nucPosInCodon]; 92 rateTypeWeightsGY94[mxIndex][t1] = rateTypeWeightsGY94[mxIndex][t1]+ 93 2*vectorOfFrequencies[h-hshift]*vectorOfFrequencies[v-vshift]; 94 } 95 } 96 } 97 } 98 return 0; 99} 100 101/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 102 103/* echoCatVar: calculate category mean and variance; output distribution info */ 104function echoCatVar (distrInfo) 105{ 106 DD = Columns(distrInfo); 107 EE = 0.0; 108 sampleVar = 0.0; 109 cumulative_weight = 0.0; 110 median = -1; 111 for (k=0; k<DD; k=k+1) 112 { 113 T = distrInfo[0][k]*distrInfo[1][k]; 114 EE = EE+T; 115 sampleVar = T*distrInfo[0][k]+sampleVar; 116 cumulative_weight = cumulative_weight + distrInfo[1][k]; 117 if (cumulative_weight >= 0.5 && median < 0) 118 { 119 median = distrInfo[0][k]; 120 } 121 } 122 123 sampleVar = sampleVar-EE*EE; 124 125 fprintf (distribOutput,"\n------------------------------------------------\n\nSample mean = ",EE, " (sample variance = ",sampleVar,", median = ",median,")\n"); 126 127 for (k=0; k<DD; k=k+1) 128 { 129 fprintf (distribOutput,"\nRate[",Format(k,0,0),"]=",Format(distrInfo[0][k],12,8), " (weight=", 130 Format(distrInfo[1][k],9,7),")"); 131 } 132 fprintf (distribOutput,"\n------------------------------------------------\n\n"); 133 return EE; 134} 135 136/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 137 138function echoRatio (distrInfo, distrInfo2) 139{ 140 D1 = Columns(distrInfo); 141 D2 = Columns(distrInfo2); 142 143 ratioInfo = {3,D1*D2}; 144 145 for (k=0; k<D1; k=k+1) 146 { 147 EE = k*D2; 148 for (k2 = 0; k2<D2; k2=k2+1) 149 { 150 ratioInfo [0][EE+k2] = distrInfo[0][k]/distrInfo2[0][k2]; 151 ratioInfo [1][EE+k2] = distrInfo[1][k]*distrInfo2[1][k2]; 152 ratioInfo [2][EE+k2] = k2*D1+k; 153 } 154 } 155 156 done = 0; 157 EE = D1*D2; 158 while (!done) 159 { 160 done = 1; 161 for (k=1; k<EE; k=k+1) 162 { 163 if (ratioInfo [0][k] < ratioInfo[0][k-1]) 164 { 165 DD = ratioInfo [0][k]; 166 ratioInfo [0][k] = ratioInfo [0][k-1]; 167 ratioInfo [0][k-1] = DD; 168 DD = ratioInfo [1][k]; 169 ratioInfo [1][k] = ratioInfo [1][k-1]; 170 ratioInfo [1][k-1] = DD; 171 DD = ratioInfo [2][k]; 172 ratioInfo [2][k] = ratioInfo [2][k-1]; 173 ratioInfo [2][k-1] = DD; 174 done = 0; 175 } 176 } 177 } 178 return echoCatVar (ratioInfo); 179} 180 181/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 182 183/* F3x4 empirical frequency estimation; obsF is a 4x3 matrix containing position-specific nucleotide frequencies: */ 184function BuildCodonFrequencies (obsF) 185{ 186 PIStop = 1.0; 187 result = {ModelMatrixDimension,1}; 188 hshift = 0; 189 190 for (h=0; h<64; h=h+1) 191 { 192 first = h$16; 193 second = h%16$4; 194 third = h%4; 195 if (_Genetic_Code[h]==10) 196 { 197 hshift = hshift+1; 198 PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2]; 199 continue; 200 } 201 result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2]; 202 } 203 return result*(1.0/PIStop); 204} 205 206/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 207fprintf (stdout, "\n+--------------------------------+\n", 208 "| PARRIS SELECTION TEST |\n", 209 "| written by |\n", 210 "| Konrad Scheffler |\n", 211 "| and |\n", 212 "| Cathal Seoighe |\n", 213 "| |\n", 214 "| If you use this analysis |\n", 215 "| in a publication, please cite |\n", 216 "| Bioinformatics, 22:2493�2499 |\n", 217 "+--------------------------------+\n"); 218 219 220#include "TemplateModels/chooseGeneticCode.def"; 221#include "_MFReader_.ibf"; 222 223/* _MFReader_ stores position-specific nucleotide frequencies in positionFrequencies (4x3) 224 and overall nucleotide frequencies in overallFrequencies (4x1). */ 225observedFreq = positionFrequencies; 226 227/* Input whether to optimise branch lengths in codon model or use pre-optimised nucleotide model branch lengths: */ 228ChoiceList (branchLengths,"Branch Lengths",1,SKIP_NONE, 229 "Codon Model","Jointly optimize rate parameters and branch lengths (slow and thorough)", 230 "Nucleotide Model","Estimate branch lengths once, using an appropriate nucleotide model (quick and dirty)." 231 ); 232 233if (branchLengths<0) 234{ 235 return 0; 236} 237 238/* Input type of underlying nucleotide model (optionally with "multi") and set up constraints: */ 239/* (In earlier versions these options were input as modelChoice using a single ChoiceList.) */ 240modelConstraintString = ""; 241 242ChoiceList (MGGYChoice, "Options for handling equilibrium frequencies",1,SKIP_NONE, 243 "MG", "Muse-Gaut 94: rates are proportional to position-specific target nucleotide frequency.", 244 "GY", "Goldman-Yang 94: rates are proportional to target codon frequency (F3x4 estimate)." 245 ); 246 247if (MGGYChoice<0) 248{ 249 return 0; 250} 251 252ChoiceList (nucModelChoice, "Nucleotide Rate Matrix Options",1,SKIP_NONE, 253 "Jukes-Cantor", "Single rate type (all substitutions equally likely).", 254 "HKY85","Two rate types (transition/transverion ratio parameter kappa).", 255 "REV","Fully reversible model (6 rate types - recommended rate matrix)", 256 "Custom","Arbitrary nucleotide reversible model, except F81." 257 ); 258 259if (nucModelChoice<0) 260{ 261 return 0; 262} 263 264ChoiceList (AAModelChoice, "Options for multiple classes of non-synonymous substitutions",1,SKIP_NONE, 265 "Single","Only one class of non-synonymous substitutions (standard model).", 266 "Multi","Multiple classes of non-synonymous substitutions.", 267 "NMulti","Multi with numerical bias corrections for various amino-acid substitutions." 268 ); 269 270if (AAModelChoice<0) 271{ 272 return 0; 273} 274 275modelInFile = "2RatesAnalyses/MG94GY94xREV_PARRIS_syn3.mdl"; 276if (MGGYChoice == 0) 277{ 278 ModelTitle = "MG94"; 279} 280else 281{ 282 ModelTitle = "GY94"; 283} 284 285if (nucModelChoice == 0) 286{ 287 modelConstraintString = "AC:=1;AT:=1;CG:=1;CT:=1;GT:=1"; 288} 289else 290{ 291 if (nucModelChoice == 1) 292 { 293 modelConstraintString = "CT:=1;AT:=AC;CG:=AC;GT:=AC"; 294 ModelTitle = ModelTitle+"xHKY85"; 295 } 296 else 297 { 298 if (nucModelChoice == 3) 299 { 300 done = 0; 301 while (!done) 302 { 303 fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):"); 304 fscanf (stdin,"String", modelDesc); 305 if (Abs(modelDesc)==6) 306 { 307 done = 1; 308 } 309 } 310 ModelTitle = ModelTitle+modelDesc[0]; 311 312 rateBiasTerms = {{"AC","1","AT","CG","CT","GT"}}; 313 paramCount = 0; 314 315 modelConstraintString = ""; 316 317 for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) 318 { 319 for (customLoopCounter=0; customLoopCounter<customLoopCounter2; customLoopCounter=customLoopCounter+1) 320 { 321 if (modelDesc[customLoopCounter2]==modelDesc[customLoopCounter]) 322 { 323 ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; 324 if (rateBiasTerms[customLoopCounter2] == "1") 325 { 326 modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter]+":="+rateBiasTerms[customLoopCounter2]+";"; 327 } 328 else 329 { 330 modelConstraintString = modelConstraintString + rateBiasTerms[customLoopCounter2]+":="+rateBiasTerms[customLoopCounter]+";"; 331 } 332 break; 333 } 334 } 335 if (customLoopCounter==customLoopCounter2) 336 { 337 ModelTitle = ModelTitle+modelDesc[customLoopCounter2]; 338 } 339 } 340 } 341 else 342 { 343 ModelTitle = ModelTitle+"xREV"; 344 } 345 } 346 347 if (AAModelChoice > 0) 348 { 349 fprintf(stdout, "Warning: running untested multi code!"); 350 if (AAModelChoice == 2) 351 { 352 ModelTitle = ModelTitle+"xNMulti"; 353 _AA_RM_NUMERIC = 2; 354 } 355 else 356 { 357 ModelTitle = ModelTitle+"xMulti"; 358 } 359 #include "TemplateModels/MGwAA.ibf"; 360 rateKeys = Rows (aaRateClassIDs); 361 userAARateMultipliers = {21,21}; 362 if (AAModelChoice == 2) 363 { 364 _AA_RM_NUMERIC = 0; 365 for (h=0; h<21;h=h+1) 366 { 367 for (v=0; v<21; v=v+1) 368 { 369 userAARateMultipliers[h][v] = "R*" + aaRateMultipliers[h][v] + "*"; 370 userAARateMultipliers[v][h] = "R*" + aaRateMultipliers[v][h] + "*"; 371 } 372 } 373 } 374 else 375 { 376 for (aaIndex = 0; aaIndex < Abs(aaRateClassIDs); aaIndex = aaIndex+1) 377 { 378 ExecuteCommands ("global DN_"+rateKeys[aaIndex]+" = 1;"); 379 } 380 for (h=0; h<21;h=h+1) 381 { 382 for (v=0; v<21; v=v+1) 383 { 384 userAARateMultipliers[h][v] = "DN_" + aaRateMultipliers[h][v] + "*"; 385 userAARateMultipliers[v][h] = "DN_" + aaRateMultipliers[v][h] + "*"; 386 } 387 } 388 } 389 } 390} 391 392ChoiceList (rateVarModelChoice, "Rate Variation Models",1,SKIP_NONE, 393 "Constant","Constant Rate Model: no rate variation across sites.", /* index 0 */ 394 "Proportional","Proportional Variable Rates Model: dS and dN vary along the sequence, but dN = R*dS for every site. Recommended model.", 395 "Nonsynonymous","Non-synonymous Variable Rates Model: dS = 1 for every site, while dN is drawn from a given distribution.", 396 "Dual","Dual Variable Rates Model: dS and dN are drawn from a bivariate distribution (independent or correlated components).", 397 "Lineage Dual","Lineage Dual Variable Rates Model: dS and dN are drawn from a bivariate distribution (independent or correlated components), plus each lineage has an adjustment factor for the E[dN]/E[dS]." 398 ); 399 400if (rateVarModelChoice<0) 401{ 402 return 0; 403} 404 405/* For models with dual rate variation, input whether NonSynRate should be independent or a multiplicative funtion of synRate, and whether to model synonymous rate variation at the codon level (syn1) or the nucleotide level (syn3): */ 406multiplicativeNonSynRate = 0; 407nucSynVar = 0; 408if (rateVarModelChoice >= 3) 409{ 410 ChoiceList (multiplicativeNonSynRate, "Independent or multiplicative nonsynonymous rate",1,SKIP_NONE, 411 "Independent","Nonsynonymous rate is independent of synonymous rate.", 412 "Multiplicative","Nonsynonymous rate is omega multiplied by synonymous rate." 413 ); 414 if (multiplicativeNonSynRate<0) 415 { 416 return 0; 417 } 418 419 ChoiceList (nucSynVar, "Codon or nucleotide level synonymous rate variation",1,SKIP_NONE, 420 "Codon (syn1)","1 synonymous rate per codon.", 421 "Nucleotide (syn3)","3 synonymous rates per codon.", 422 /*"syn2", "2 independent synonymous rates per codon (2nd pos is avg of 1st and 3rd)"*/ 423 ); 424 if (nucSynVar<0) 425 { 426 return 0; 427 } 428} 429 430/* Input list of models to be run (out of various rate variation options): */ 431/* (chosenModelList will contain nonzero entries for selected models) */ 432num_models = 8; 433chosenModelList = {num_models,1}; 434 435ChoiceList (modelChoice,"Distribution Options",1,SKIP_NONE, 436 "Run All","Run all available models.", 437 "Run Custom","Choose some of the available models.", 438 "PARRIS","Run PARRIS model comparison."); 439 440if (modelChoice<0) 441{ 442 return 0; 443} 444 445if (modelChoice==2) 446{ 447 for (mi = 0; mi<5; mi=mi+1) 448 { 449 chosenModelList[mi][0] = 0; 450 } 451 chosenModelList[5][0] = 1; 452 chosenModelList[6][0] = 1; 453 resp = 3; /* nr of synonymous rate classes (not used for PARRIS models except in output and postprocessing) */ 454 resp2 = 3; /* nr of nonsynonymous rate classes (in alternative model) (not used for PARRIS models except in output and postprocessing) */ 455 USE_LAST_RESULTS = 1; 456} 457else 458{ 459 460/* Input nrs of rate classes: (presumably unused when they don't apply) */ 461 resp = 0; 462 resp2 = 0; 463 464 while (resp<2) 465 { 466 fprintf (stdout,"Number of synonymous (and single variable rate models) rate classes (>=2):"); 467 fscanf (stdin, "Number", resp); 468 } 469 470 while (resp2<2) 471 { 472 fprintf (stdout,"Number of non-synonymous rate classes (>=2):"); 473 fscanf (stdin, "Number", resp2); 474 } 475 476 if (modelChoice==0) 477 { 478 for (mi = 0; mi<num_models; mi=mi+1) 479 { 480 chosenModelList[mi][0] = 1; 481 } 482 } 483 else 484 { 485 486/* Input distribution options: */ 487/* M3 is similar to Independent Discrete, differences being that it fixes the nr of rate categories and that it 488 allow use of syn3 files. Independent Discrete is the older (KP) implementation and does not support syn3. */ 489 490 ChoiceList (distribChoices, "Distribution Option",0,SKIP_NONE, 491 "Syn:Gamma, Non-syn:Gamma", "Both syn and non-syn rates are drawn from the gamma distributions for all models.", 492 "Syn:Gamma, Non-syn:Inv+Gamma","Rates are drawn from the gamma distributions for Proportional and Nonsynonymous. For Dual and Local Dual, syn rates are drawn from the gamma distribution, and non-syn rates - from Inv+Gamma.", 493 "Independent Discrete", "Independent General Discrete Distributions (Recommended setting)", 494 "Correlated Discrete", "Correlated General Discrete Distributions", 495 "Non-positive Discrete", "General Discrete Distribution for dS, and dN, but constrained so that dN<=dS. Useful to perform a LRT for presence of selection in an alignment", 496 "M1a", "General Discrete Distribution for dS (3 cat), M1a omega distribution", 497 "M2a", "General Discrete Distribution for dS (3 cat), M2a omega distribution", 498 "M3", "General Discrete Distribution for dS (3 cat), M3 omega distribution (3 cat)"); 499 500 501 if (distribChoices[0] < 0) 502 { 503 return 0; 504 } 505 506 for (mi = 0; mi < Rows(distribChoices)*Columns(distribChoices); mi = mi + 1) 507 { 508 modelChoice = distribChoices[mi]; 509 chosenModelList[modelChoice] = 1; 510 } 511 } 512} 513modelNamesShort = {{"GamGam","GamIGam","Discrete","CorrDiscrete","NPDiscrete","M1a","M2a", "M3"}}; 514 515 516/* Input whether to use default or randomised initial values for rate distribution parameters: */ 517ChoiceList (randomizeInitValues, "Initial Value Options",1,SKIP_NONE, 518 "Default", "Use default inital values for rate distribution parameters.", 519 "Randomized", "Select initial values for rate distribution parameters at random."); 520 521 522if (randomizeInitValues < 0) 523{ 524 return 0; 525} 526 527/* This variable is dereferenced in the unused function SetCodonNorm in the MG and GY model definition files, where it is used to scale codonFactor; keep it set to 1 and it will have no effect even if SetCodonNorm is used: */ 528fudgeFactor = 1.0; 529 530 531SetDialogPrompt ("Save summary result file to:"); 532fprintf (PROMPT_FOR_FILE,CLEAR_FILE); 533baselineOutput = LAST_FILE_PATH; 534 535distribOutput = baselineOutput + ".distributions"; 536fprintf (distribOutput,CLEAR_FILE); 537 538 539/* Don't know why this is negated: */ 540if (branchLengths) 541{ 542 branchLengths = -branchLengths; 543} 544 545 546/* Output stuff: */ 547separator = "+--------------------+----------------+---------------+-----------------+------------------+-----------+-----+-----------+\n"; 548 549fprintf (stdout, "\n\nRUNNING ",ModelTitle," MODEL COMPARISONS on ",fileCount, " files\n\n", 550 "\n\n########### ",resp,"x",resp2," CLASSES ###########\n\n", separator, 551 "| Model | Log Likelihood | Synonymous CV | NS Exp and CV | N/S Exp and CV | P-Value | Prm | AIC |\n", separator); 552 553fprintf (baselineOutput, "\n\nRUNNING ",ModelTitle," MODEL COMPARISONS on ",dataFilePath, "\n\n", 554 "\n\n########### ",resp,"x",resp2," CLASSES ###########\n\n", separator, 555 "| Model | Log Likelihood | Synonymous CV | NS Exp and CV | N/S Exp and CV | P-Value | Prm | AIC |\n", separator); 556 557 558modelNames = {{"| Gamma, Gamma |", 559 "| Gamma, Inv+Gamma |", 560 "| Independent Discr |", 561 "| Correlated Discr |", 562 "| Non-positive Discr |", 563 "| discr(3), M1a |", 564 "| discr(3), M2a |", 565 "| discr(3), discr(3) |"}}; 566 567 568if (MPI_NODE_COUNT>1) 569/* Check to see if we are running with more than one MPI node. 570 If not - bail to single CPU execution */ 571{ 572 MPINodeState = {MPI_NODE_COUNT-1,2}; 573 574 /* One of the nodes (0) will be the dispatcher, and 575 for every other node, we need to store two values: 576 whether it is busy or not (0 or 1), and 577 an integer (from 0 to 5) to indicate which model 578 (M0-M5) the node is working on */ 579 580 OPTIMIZE_SUMMATION_ORDER = 0; 581 582 /* the master node will be creating likelihood functions, 583 but not evaluating them - thus there is no need to perform 584 the extra step of optimizing data column ordering for faster 585 likelihood evaluations */ 586} 587 588doNucFit = 1; 589 590for (midx = 0; midx<num_models; midx=midx+1) 591{ 592 if (chosenModelList[midx]) 593 { 594 595/* Depending on distribution option, read category definitions from file: */ 596/* for gamma models, read from file into categDef1 (for syn distribution) and categDef2 597 (for nonsyn distribution) variables; 598 for discrete models, execute generator file which will create these variables (randomizeInitValues is used). 599 The category definitions are string variables to be executed in the mdl files. */ 600 if (midx<2) /* gamma rate models */ 601 { 602 fscanf ("2RatesAnalyses/gamma1.def","Raw",categDef1); 603 604 if (midx == 0) 605 { 606 fscanf ("2RatesAnalyses/gamma2.def","Raw",categDef2); 607 } 608 else 609 { 610 fscanf ("2RatesAnalyses/gamma2+Inv.def","Raw",categDef2); 611 } 612 } 613 else 614 { 615 if (midx < 5) /* general discrete rate models */ 616 { 617 if (midx < 4) 618 { 619 correlationOn = (distribChoice>2); 620 fscanf ("2RatesAnalyses/discreteGenerator.bf","Raw",tempstr); 621 } 622 else 623 { 624 fscanf ("2RatesAnalyses/discreteGeneratorNoPS.bf","Raw",tempstr); 625 } 626 ExecuteCommands (tempstr); 627 } 628 else /* PARRIS rate models */ 629 { 630 if (nucSynVar) 631 { 632 fscanf ("2RatesAnalyses/PARRIS_syn3.def","Raw",categDef1); 633 } 634 else 635 { 636 fscanf ("2RatesAnalyses/PARRIS_synvar.def","Raw",categDef1); 637 } 638 639 if (midx == 5) /* null (M1a omega distrib) */ 640 { 641 fscanf ("2RatesAnalyses/PARRIS_M1.def","Raw",categDef2); 642 } 643 if (midx == 6) /* alternative (M2a omega distrib) */ 644 { 645 fscanf ("2RatesAnalyses/PARRIS_M2.def","Raw",categDef2); 646 } 647 if (midx == 7) /* PARRIS discrete (M3 omega distrib) */ 648 { 649 fscanf ("2RatesAnalyses/PARRIS_M3.def","Raw",categDef2); 650 } 651 } 652 } 653 654/* execute model file: */ 655 ExecuteAFile (modelInFile); 656 657 if (Abs(modelConstraintString)) 658 { 659 ExecuteCommands (modelConstraintString); 660 } 661 662 663 theRateMatrix = 0; 664 665 MULTIPLY_BY_FREQS = PopulateModelMatrix ("theRateMatrix", observedFreq, rateVarModelChoice); 666 vectorOfFrequencies = BuildCodonFrequencies (observedFreq); 667 668 if (doNucFit) /* Fit nucleotide model to optimise branch lengths if this hasn't been done already: */ 669 { 670 ExecuteCommands (nucModelString+"\nModel nucModel = (nucModelMatrix,overallFrequencies);"); 671 672 populateTrees ("nucTree", fileCount); 673 ExecuteCommands (constructLF ("nuc_lf", "nucData", "nucTree", fileCount)); 674 Optimize (nuc_res, nuc_lf); 675 676 computeExpSubWeights (0); 677 global codonFactor = 0.33; 678 679 /* Count nr of branch lengths optimised: */ 680 totNumBranchLengths = 0; 681 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 682 { 683 ExecuteCommands ("totNumBranchLengths = totNumBranchLengths + TipCount (nucTree_"+fileID+") + BranchCount (nucTree_"+fileID+");"); 684 } 685 686 doNucFit = 0; 687 } 688 689 Model theModel = (theRateMatrix,vectorOfFrequencies,MULTIPLY_BY_FREQS); 690 populateTrees ("givenTree", fileCount); 691 692 693 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 694 { 695 if (branchLengths) 696 { 697 ExecuteCommands ("ClearConstraints (givenTree_" + fileID + ");"); 698 ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.t__/codonFactor\",givenTree_" + fileID + ",nucTree_" + fileID + ");"); 699 } 700 else 701 { 702 bnames = Eval ("BranchName (givenTree_"+fileID+",-1)"); 703 for (lc = 0; lc < Columns (bnames) - 1; lc = lc+1) 704 { 705 ExecuteCommands ("givenTree_" + fileID + "." + bnames[lc] + ".synRate=nucTree_" + fileID + "." + bnames[lc] + ".t/codonFactor;"); 706 } 707 } 708 } 709 710 ExecuteCommands (constructLF ("lf", "filteredData", "givenTree", fileCount)); 711 712 if (midx >= 4) /* nonpositive discrete distribution and PARRIS models */ 713 { 714 R := 1; 715 } 716 717 if (MPI_NODE_COUNT>1) 718 { 719 /* look for an idle node */ 720 for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode = mpiNode+1) 721 { 722 if (MPINodeState[mpiNode][0]==0) 723 { 724 break; 725 } 726 } 727 728 if (mpiNode==MPI_NODE_COUNT-1) 729 /* all nodes busy */ 730 { 731 /* wait for some node to complete and send out current job */ 732 mpiNode = ReceiveJobs (1); 733 } 734 else 735 { 736 /* send the job to an idle node; update node state */ 737 MPISend (mpiNode+1,lf); 738 MPINodeState[mpiNode][0] = 1; 739 MPINodeState[mpiNode][1] = midx; 740 } 741 } 742 else 743 { 744 /* Non-MPI execution */ 745 Optimize (res,lf); 746 modelIndex = midx; 747 ReceiveJobs (0); 748 } 749 } 750} 751 752if (MPI_NODE_COUNT>1) 753/* wait for all the jobs to finish, process their results */ 754{ 755 while (1) 756 { 757 for (nodeCounter = 0; nodeCounter < MPI_NODE_COUNT-1; nodeCounter = nodeCounter+1) 758 { 759 if (MPINodeState[nodeCounter][0]==1) 760 { 761 fromNode = ReceiveJobs (0); 762 break; 763 } 764 } 765 if (nodeCounter == MPI_NODE_COUNT-1) 766 { 767 break; 768 } 769 } 770 OPTIMIZE_SUMMATION_ORDER = 1; 771} 772 773/* ____________________________________________________________________________________________________________________*/ 774 775function ReceiveJobs (sendOrNot) 776 777/* This function receieves and processes 778 model results. The parameter is a boolean, 779 set to 1 if there are jobs waiting to be sent to 780 an MPI node */ 781{ 782 if (MPI_NODE_COUNT>1) 783 { 784 785 /* Fetch completed job (waiting if necessary); send new job if one is waiting; update node status; 786 reconstruct likelihood function and result matrix from completed job as if it had run locally. */ 787 788 MPIReceive (-1, fromNode, result_String); 789 modelIndex = MPINodeState[fromNode-1][1]; 790 791 if (sendOrNot) 792 { 793 /* send the likelihood function to the node which just finished */ 794 MPISend (fromNode,lf); 795 MPINodeState[fromNode-1][1] = midx; 796 } 797 else 798 { 799 /* mark the node as idle */ 800 MPINodeState[fromNode-1][0] = 0; 801 MPINodeState[fromNode-1][1] = 0; 802 } 803 804 /* reset category variables */ 805 /* (we are about to build the likelihood function of the completed job, 806 which may be incompatible with constraints still in place from the previous lf) */ 807 808 if (rateVarModelChoice) 809 { 810 ClearConstraints (c); 811 } 812 if (rateVarModelChoice>2) 813 { 814 ClearConstraints (d); 815 if (nucSynVar) 816 { 817 ClearConstraints (c1,c2,c3); 818 } 819 } 820 821 /* build the likelihood function lf with MLE parameter values 822 just returned from an MPI node. MLE matrix is returned 823 in lf_MLES. */ 824 825 ExecuteCommands (result_String); 826 827 /* store the result matrix */ 828 res = lf_MLES; 829 } 830 831 832 /* res[1][1] contains the number of independent parameters optimised. If branchlengths were fixed but 833 previously optimised on a nucleotide model, we need to add these, minus 1 for the codonFactor 834 (tree length scaling) parameter: */ 835 if (branchLengths) 836 { 837 res[1][1] = res[1][1] + totNumBranchLengths - 1; 838 } 839 840 /* Initialise output files, output whole-sequence results: */ 841 LIKELIHOOD_FUNCTION_OUTPUT = 6; 842 843 fprintf (stdout, modelNames[modelIndex], " ", Format (res[1][0],14,5)); 844 fprintf (baselineOutput, modelNames[modelIndex], " ", Format (res[1][0],14,5)); 845 846 fitOutFile = baselineOutput + "." + modelNamesShort[modelIndex] + ".fit"; 847 fprintf (fitOutFile,CLEAR_FILE,lf); 848 849 if (rateVarModelChoice) /* Create output file for marginals if a category variable exists */ 850 { 851 marginalOutFile = baselineOutput + "." + modelNamesShort[modelIndex] + ".marginals"; 852 fprintf (marginalOutFile,CLEAR_FILE); 853 } 854 855 fprintf (distribOutput, modelNames[modelIndex]); 856 if (rateVarModelChoice==0) /* No site-to-site rate variation: just output mean omega (variance of category variable is zero when only a single category exists */ 857 { 858 EE = 0; 859 sampleVar = 0; 860 fprintf (stdout, " | N/A | ", Format (R,7,5) , ",", Format (sampleVar,7,5), " | ", Format (R,7,5) , ",", Format (sampleVar,8,5), " | "); 861 fprintf (baselineOutput, " | N/A | ", Format (R,7,5) , ",", Format (sampleVar,7,5), " | ", Format (R,7,5) , ",", Format (sampleVar,8,5), " | "); 862 } 863 else 864 { 865 /* For models with category variables, calculate and output category mean and variance information: */ 866 if (rateVarModelChoice==2) /* Nonsynonymous rate variation only */ 867 { 868 GetInformation(NSdistrInfo,d); 869 DD = Columns (NSdistrInfo); 870 for (EE=0; EE<DD; EE=EE+1) 871 { 872 NSdistrInfo[0][EE] = R*NSdistrInfo[0][EE]; 873 } 874 875 EE = echoCatVar (NSdistrInfo); 876 fprintf (marginalOutFile, NSdistrInfo); 877 878 fprintf (stdout, " | N/A | ", Format (EE,7,5) , ",", Format (Sqrt(sampleVar)/EE,7,5), " | ", Format (EE,7,5) , ",", Format (Sqrt(sampleVar)/EE,8,5), " | "); 879 fprintf (baselineOutput, " | N/A | ", Format (EE,7,5) , ",", Format (Sqrt(sampleVar)/EE,7,5), " | ", Format (EE,7,5) , ",", Format (Sqrt(sampleVar)/EE,8,5), " | "); 880 } 881 else /* Model has synonymous rate variation */ 882 { 883 if (nucSynVar) /*syn3 */ 884 { 885 GetInformation(synDistrInfo,c1); /* for syn3 all 3 category variables are identical, so still only need to call this for one of them */ 886 } 887 else /* syn1 */ 888 { 889 GetInformation(synDistrInfo,c); 890 } 891 fprintf (marginalOutFile, synDistrInfo); 892 EE = echoCatVar (synDistrInfo); 893 synmedian = median; 894 fprintf (stdout, " | ",Format (Sqrt(sampleVar),13,8)," | "); 895 fprintf (baselineOutput, " | ",Format (Sqrt(sampleVar),13,8)," | "); 896 897 if (rateVarModelChoice>=3) /* Synonymous and nonsynonymous rate variation */ 898 { 899 GetInformation(NSdistrInfo,d); 900 DD = Columns (NSdistrInfo); 901 for (EE=0; EE<DD; EE=EE+1) 902 { 903 NSdistrInfo[0][EE] = R*NSdistrInfo[0][EE]; 904 } 905 fprintf (marginalOutFile, NSdistrInfo); 906 EEN = echoCatVar (NSdistrInfo); 907 varN = sampleVar; 908 EER = echoRatio (NSdistrInfo,synDistrInfo); 909 varR = sampleVar; 910 911 fprintf (stdout, Format (EEN,7,5) , ",", Format (Sqrt(varN)/EEN,7,5), " | ",Format (EER,7,5) , ",", Format (Sqrt(varR)/EER,8,5), " | "); 912 fprintf (baselineOutput, Format (EEN,7,5) , ",", Format (Sqrt(varN)/EEN,7,5), " | ",Format (EER,7,5) , ",", Format (Sqrt(varR)/EER,8,5), " | "); 913 914 } 915 else /* Proportional model: Nonsynonymous rate variation is just a scaled version of synonymous rate variation. */ 916 { 917 fprintf (stdout, Format (EE*R,7,5) , ",", Format (Sqrt(sampleVar)/EE,7,5), " | ", Format (R,7,5) , ",", Format (0,8,5), " | "); 918 fprintf (baselineOutput, Format (EE*R,7,5) , ",", Format (Sqrt(sampleVar)/EE,7,5), " | ", Format (R,7,5) , ",", Format (0,8,5), " | "); 919 } 920 } 921 } 922 923 /* Likelihood ratio tests: */ 924 if (modelIndex == 5) 925 { 926 nullLL = res[1][0]; 927 nullPC = res[1][1]; 928 } 929 /* we don't know that alternative returned before null, so we won't print the p-value in the MPI mode: */ 930 if ((modelIndex == 6)&&(MPI_NODE_COUNT<=1)) 931 { 932 fprintf (stdout, Format (1-CChi2(2*(res[1][0]-nullLL),res[1][1]-nullPC),9,7), " |"); 933 fprintf (baselineOutput, Format (1-CChi2(2*(res[1][0]-nullLL),res[1][1]-nullPC),9,7), " |"); 934 } 935 else 936 { 937 fprintf (stdout," N/A |"); 938 fprintf (baselineOutput, " N/A |"); 939 } 940 941 /* Output parameter count and AIC scores: */ 942 fprintf (stdout, Format (res[1][1],5,0), "|", Format (2*(res[1][1]-res[1][0]),11,2),"|"); 943 fprintf (baselineOutput, Format (res[1][1],5,0), "|", Format (2*(res[1][1]-res[1][0]),11,2),"|"); 944 fprintf (stdout, "\n",separator); 945 fprintf (baselineOutput, "\n",separator); 946 947 /* Site-specific posterior analysis: */ 948 if (rateVarModelChoice > 1 && nucSynVar < 2) /* Models with independent nonsynonymous rate variation */ 949 { 950 sigLevel = 0.95; 951 D = Columns(NSdistrInfo); 952 for (k=0; k<D; k=k+1) 953 { 954 if (NSdistrInfo[0][k]>1) break; 955 } 956 ConstructCategoryMatrix(site_likelihoods,lf,COMPLETE); 957 CC = Columns (site_likelihoods); 958 DD = Rows (site_likelihoods); 959 /* set up variable order information: 960 (ordernum[n] is the number of the nth variable in site_likelihoods, 961 where omega (d) is variable 0 and c (synRate, for syn1) or 962 c1, c2, c3 (for syn3) are variables 1 or 1-3): */ 963 GetInformation (order_ID, lf); 964 if (rateVarModelChoice < 3) 965 { 966 ordernum = {{0}}; 967 order0 = 0; 968 } else { 969 if (!nucSynVar) /* syn1 */ 970 { 971 if (order_ID[0] == "d") 972 { 973 ordernum = {{0,1}}; /* first variable is nonsyn (0), second is syn (1) */ 974 } else { 975 ordernum = {{1,0}}; /* first variable is syn (1), second is nonsyn (0) */ 976 } 977 order0 = ordernum[0]; 978 order1 = ordernum[1]; 979 } 980 else /* syn3 */ 981 { 982 if (multiplicativeNonSynRate) 983 { 984 ordernum = {1,4}; 985 for (posnum = 0; posnum < 4; posnum = posnum+1) 986 { 987 if (order_ID[posnum] == "d") 988 { 989 ordernum[posnum] = 0; 990 } 991 if (order_ID[posnum] == "c1") 992 { 993 ordernum[posnum] = 1; 994 } 995 if (order_ID[posnum] == "c2") 996 { 997 ordernum[posnum] = 2; 998 } 999 if (order_ID[posnum] == "c3") 1000 { 1001 ordernum[posnum] = 3; 1002 } 1003 } 1004 order0 = ordernum[0]; 1005 order1 = ordernum[1]; 1006 order2 = ordernum[2]; 1007 order3 = ordernum[3]; 1008 } 1009 else /* Independent syn3: c2 is not used. */ 1010 { 1011 ordernum = {1,3}; 1012 for (posnum = 0; posnum < 3; posnum = posnum+1) 1013 { 1014 if (order_ID[posnum] == "d") 1015 { 1016 ordernum[posnum] = 0; 1017 } 1018 if (order_ID[posnum] == "c1") 1019 { 1020 ordernum[posnum] = 1; 1021 } 1022 if (order_ID[posnum] == "c3") 1023 { 1024 ordernum[posnum] = 2; 1025 } 1026 } 1027 order0 = ordernum[0]; 1028 order1 = ordernum[1]; 1029 order2 = ordernum[2]; 1030 } 1031 } 1032 } 1033 1034 if (k<D) 1035 /* have rates > 1 */ 1036 { 1037 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\n Sites with dN/dS > 1 (Posterior cutoff = ",sigLevel,")\n\n"); 1038 } 1039 else 1040 { 1041 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\n No rate classes with dN/dS>1."); 1042 } 1043 1044 /* allocate memory for data structures: */ 1045 site_posteriors = {DD,CC}; 1046 negprobs = {1,CC}; /* vector for storing site-specific negative selection probs */ 1047 negsynprobs = {1,CC}; /* vector for storing site-specific negative synonymous selection probs */ 1048 negsynprobs_1 = {1,CC}; 1049 negsynprobs_2 = {1,CC}; 1050 negsynprobs_3 = {1,CC}; 1051 omega_marginals = {D,CC}; 1052 synrate_marginals = {resp,CC}; 1053 synrate_1_marginals = {resp,CC}; 1054 synrate_2_marginals = {resp,CC}; 1055 synrate_3_marginals = {resp,CC}; 1056 1057 for (v=0; v<CC; v=v+1) /* loop over sites */ 1058 { 1059 totProb = 0; 1060 positiveProb = 0; 1061 negativeProb = 0; 1062 negSynProb = 0; 1063 negSynProb_1 = 0; 1064 negSynProb_2 = 0; 1065 negSynProb_3 = 0; 1066 1067 /* initialise index and dimensions variables: */ 1068 indexes = {{0, 0, 0, 0}}; 1069 if (rateVarModelChoice < 3) 1070 { 1071 dimensions = {{D}}; 1072 } 1073 else 1074 { 1075 if (!nucSynVar) /* syn1 */ 1076 { 1077 dimensions = {{D, resp}}; 1078 } 1079 else /* syn3 */ 1080 { 1081 dimensions = {{D, resp, resp, resp}}; 1082 } 1083 } 1084 1085 /* loop through all category combinations */ 1086 for (h=0; h<DD; h=h+1) 1087 { 1088 idx0 = indexes[0]; /* index for nonsynonymous rate */ 1089 idx1 = indexes[1]; /* index for synonymous rate 1, etc */ 1090 idx2 = indexes[2]; 1091 idx3 = indexes[3]; 1092 1093 if (rateVarModelChoice < 3) 1094 { 1095 site_posteriors[h][v] = NSdistrInfo[1][h]*site_likelihoods[h][v]; 1096 } 1097 else 1098 { 1099 if (!nucSynVar) /* syn1 */ 1100 { 1101 site_posteriors[h][v] = NSdistrInfo[1][idx0]*synDistrInfo[1][idx1]*site_likelihoods[h][v]; 1102 } 1103 else /* syn3 */ 1104 { 1105 if (multiplicativeNonSynRate) 1106 { 1107 site_posteriors[h][v] = NSdistrInfo[1][idx0]*synDistrInfo[1][idx1]*synDistrInfo[1][idx2]*synDistrInfo[1][idx3]*site_likelihoods[h][v]; 1108 } 1109 else 1110 { 1111 site_posteriors[h][v] = NSdistrInfo[1][idx0]*synDistrInfo[1][idx1]*synDistrInfo[1][idx2]*site_likelihoods[h][v]; 1112 } 1113 } 1114 } 1115 totProb = totProb + site_posteriors[h][v]; /* accumulate conditional posteriors, summing to probability of data at this site */ 1116 1117 /* Accumulate all marginals of interest: */ 1118 if (NSdistrInfo[0][idx0]>1) /* positive AA selection category */ 1119 { 1120 positiveProb = positiveProb + site_posteriors[h][v]; 1121 } 1122 if (NSdistrInfo[0][idx0]<1) /* purifying AA selection category */ 1123 { 1124 negativeProb = negativeProb + site_posteriors[h][v]; 1125 } 1126 if (rateVarModelChoice >= 3) 1127 { 1128 if (!nucSynVar) /* syn1 */ 1129 { 1130 if (synDistrInfo[0][idx1]<synmedian) /* purifying nuc-level (synonymous) selection category (whole codon) */ 1131 { 1132 negSynProb = negSynProb + site_posteriors[h][v]; 1133 } 1134 omega_marginals[idx0][v] = omega_marginals[idx0][v] + site_posteriors[h][v]; 1135 synrate_marginals[idx1][v] = synrate_marginals[idx1][v] + site_posteriors[h][v]; 1136 } 1137 else /* syn3 */ 1138 { 1139 if (synDistrInfo[0][idx1]<synmedian) /* purifying nuc-level (synonymous) selection category (position 1) */ 1140 { 1141 negSynProb_1 = negSynProb_1 + site_posteriors[h][v]; 1142 } 1143 if (synDistrInfo[0][idx2]<synmedian) /* purifying nuc-level (synonymous) selection category (position 2) */ 1144 { 1145 negSynProb_2 = negSynProb_2 + site_posteriors[h][v]; 1146 } 1147 if (multiplicativeNonSynRate) 1148 { 1149 if (synDistrInfo[0][idx3]<synmedian) /* purifying nuc-level (synonymous) selection category (position 3) */ 1150 { 1151 negSynProb_3 = negSynProb_3 + site_posteriors[h][v]; 1152 } 1153 } 1154 omega_marginals[idx0][v] = omega_marginals[idx0][v] + site_posteriors[h][v]; 1155 synrate_1_marginals[idx1][v] = synrate_1_marginals[idx1][v] + site_posteriors[h][v]; 1156 synrate_2_marginals[idx2][v] = synrate_2_marginals[idx2][v] + site_posteriors[h][v]; 1157 if (multiplicativeNonSynRate) 1158 { 1159 synrate_3_marginals[idx3][v] = synrate_3_marginals[idx3][v] + site_posteriors[h][v]; 1160 } 1161 } 1162 } 1163 1164 /* keep track of individual category variable indices: */ 1165 if (rateVarModelChoice < 3) 1166 { 1167 indexes[0] = indexes[0]+1; 1168 } else { 1169 if (!nucSynVar) /* syn1 */ 1170 { 1171 indexes[order1] = indexes[order1]+1; /* increment index for second variable in category matrix */ 1172 if (indexes[order1] == dimensions[order1]) 1173 { 1174 indexes[order1] = 0; 1175 indexes[order0] = indexes[order0]+1; 1176 } 1177 } 1178 else /* syn3 */ 1179 { 1180 if (multiplicativeNonSynRate) 1181 { 1182 indexes[order3] = indexes[order3]+1; 1183 if (indexes[order3] == dimensions[order3]) 1184 { 1185 indexes[order3] = 0; 1186 indexes[order2] = indexes[order2]+1; 1187 if (indexes[order2] == dimensions[order2]) 1188 { 1189 indexes[order2] = 0; 1190 indexes[order1] = indexes[order1]+1; 1191 if (indexes[order1] == dimensions[order1]) 1192 { 1193 indexes[order1] = 0; 1194 indexes[order0] = indexes[order0]+1; 1195 } 1196 } 1197 } 1198 } 1199 else /* independent syn3: only 3 category variables */ 1200 { 1201 indexes[order2] = indexes[order2]+1; 1202 if (indexes[order2] == dimensions[order2]) 1203 { 1204 indexes[order2] = 0; 1205 indexes[order1] = indexes[order1]+1; 1206 if (indexes[order1] == dimensions[order1]) 1207 { 1208 indexes[order1] = 0; 1209 indexes[order0] = indexes[order0]+1; 1210 } 1211 } 1212 } 1213 } 1214 } 1215 } /* for h */ 1216 1217 positiveProb = positiveProb/totProb; 1218 negprobs[0][v] = negativeProb/totProb; 1219 if (rateVarModelChoice >= 3) 1220 { 1221 if (!nucSynVar) /* syn1 */ 1222 { 1223 negsynprobs[0][v] = negSynProb/totProb; 1224 for (h = 0; h < D; h=h+1) 1225 { 1226 omega_marginals[h][v] = omega_marginals[h][v]/totProb; 1227 } 1228 for (h = 0; h < resp; h=h+1) 1229 { 1230 synrate_marginals[h][v] = synrate_marginals[h][v]/totProb; 1231 } 1232 } 1233 else /* syn3 */ 1234 { 1235 negsynprobs_1[0][v] = negSynProb_1/totProb; 1236 negsynprobs_2[0][v] = negSynProb_2/totProb; 1237 if (multiplicativeNonSynRate) 1238 { 1239 negsynprobs_3[0][v] = negSynProb_3/totProb; 1240 } 1241 for (h = 0; h < D; h=h+1) 1242 { 1243 omega_marginals[h][v] = omega_marginals[h][v]/totProb; 1244 } 1245 for (h = 0; h < resp; h=h+1) 1246 { 1247 synrate_1_marginals[h][v] = synrate_1_marginals[h][v]/totProb; 1248 synrate_2_marginals[h][v] = synrate_2_marginals[h][v]/totProb; 1249 if (multiplicativeNonSynRate) 1250 { 1251 synrate_3_marginals[h][v] = synrate_3_marginals[h][v]/totProb; 1252 } 1253 } 1254 } 1255 } 1256 1257 for (h=0; h<DD; h=h+1) 1258 { 1259 site_posteriors[h][v] = site_posteriors[h][v]/totProb; 1260 } 1261 if (positiveProb>=sigLevel) 1262 { 1263 fprintf (marginalOutFile,Format (v+1,0,0)," (",positiveProb,")\n"); 1264 } 1265 } /* for v */ 1266 1267 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\n Sites with dN/dS < 1 (Posterior cutoff = ",sigLevel,")\n\n"); 1268 for (v=0; v<CC; v=v+1) 1269 { 1270 if (negprobs[0][v]>=sigLevel) 1271 { 1272 fprintf (marginalOutFile,Format (v+1,0,0)," (",negprobs[0][v],")\n"); 1273 } 1274 } 1275 1276 if (rateVarModelChoice >= 3) 1277 { 1278 if (!nucSynVar) /* syn1 */ 1279 { 1280 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\n Codon sites under purifying synonymous selection (Posterior cutoff = ",sigLevel,")\n\n"); 1281 for (v=0; v<CC; v=v+1) 1282 { 1283 if (negsynprobs[0][v]>=sigLevel) 1284 { 1285 fprintf (marginalOutFile,Format (v+1,0,0)," (",negsynprobs[0][v],")\n"); 1286 } 1287 } 1288 } 1289 else /* syn3 */ 1290 { 1291 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\n Nucleotide sites under purifying synonymous selection (Posterior cutoff = ",sigLevel,")\n\n"); 1292 fprintf (marginalOutFile,"Position 1:\n\n"); 1293 for (v=0; v<CC; v=v+1) 1294 { 1295 if (negsynprobs_1[0][v]>=sigLevel) 1296 { 1297 fprintf (marginalOutFile,Format (v+1,0,0)," (",negsynprobs_1[0][v],")\n"); 1298 } 1299 } 1300 if (multiplicativeNonSynRate) 1301 { 1302 fprintf (marginalOutFile,"\nPosition 2:\n\n"); 1303 } 1304 else 1305 { 1306 fprintf (marginalOutFile,"\nPosition 3:\n\n"); 1307 } 1308 for (v=0; v<CC; v=v+1) 1309 { 1310 if (negsynprobs_2[0][v]>=sigLevel) 1311 { 1312 fprintf (marginalOutFile,Format (v+1,0,0)," (",negsynprobs_2[0][v],")\n"); 1313 } 1314 } 1315 if (multiplicativeNonSynRate) 1316 { 1317 fprintf (marginalOutFile,"\nPosition 3:\n\n"); 1318 for (v=0; v<CC; v=v+1) 1319 { 1320 if (negsynprobs_3[0][v]>=sigLevel) 1321 { 1322 fprintf (marginalOutFile,Format (v+1,0,0)," (",negsynprobs_3[0][v],")\n"); 1323 } 1324 } 1325 } 1326 } 1327 } 1328 1329 1330/* tabulate posteriors for omega and synRate rate classes */ 1331 1332 fprintf (marginalOutFile,"\n\n------------------------------------------------\n\nTabulated Posteriors for Each Site\nSites in Columns, Rate Classes in Rows\nImport the following part into a data processing program\nfor further analysis\n\n"); 1333 if (rateVarModelChoice < 3) 1334 { 1335 outString = "Omega/Site"; 1336 for (v=0; v<D; v=v+1) 1337 { 1338 outString = outString + "\tOmega=" + NSdistrInfo[0][v]; 1339 } 1340 1341 for (v=0; v<CC; v=v+1) 1342 { 1343 outString = outString + "\n" + (v+1); 1344 for (h=0; h<D; h=h+1) 1345 { 1346 outString = outString + "\t" + site_posteriors[h][v]; 1347 } 1348 } 1349 } 1350 1351 if (rateVarModelChoice >= 3) 1352 { 1353 outString = "Rates/Site"; 1354 for (v=0; v<D; v=v+1) 1355 { 1356 outString = outString + "\tOmega=" + NSdistrInfo[0][v]; 1357 } 1358 if (!nucSynVar) /* syn1 */ 1359 { 1360 for (v=0; v<resp; v=v+1) 1361 { 1362 outString = outString + "\tS=" + synDistrInfo[0][v]; 1363 } 1364 1365 for (v=0; v<CC; v=v+1) 1366 { 1367 outString = outString + "\n" + (v+1); 1368 for (h=0; h<D; h=h+1) 1369 { 1370 outString = outString + "\t" + omega_marginals[h][v]; 1371 } 1372 for (h=0; h<resp; h=h+1) 1373 { 1374 outString = outString + "\t" + synrate_marginals[h][v]; 1375 } 1376 } 1377 } 1378 else /* syn3 */ 1379 { 1380 for (v=0; v<resp; v=v+1) 1381 { 1382 outString = outString + "\tS_pos1=" + synDistrInfo[0][v]; 1383 } 1384 if (multiplicativeNonSynRate) 1385 { 1386 for (v=0; v<resp; v=v+1) 1387 { 1388 outString = outString + "\tS_pos2=" + synDistrInfo[0][v]; 1389 } 1390 } 1391 for (v=0; v<resp; v=v+1) 1392 { 1393 outString = outString + "\tS_pos3=" + synDistrInfo[0][v]; 1394 } 1395 1396 for (v=0; v<CC; v=v+1) 1397 { 1398 outString = outString + "\n" + (v+1); 1399 for (h=0; h<D; h=h+1) 1400 { 1401 outString = outString + "\t" + omega_marginals[h][v]; 1402 } 1403 for (h=0; h<resp; h=h+1) 1404 { 1405 outString = outString + "\t" + synrate_1_marginals[h][v]; 1406 } 1407 for (h=0; h<resp; h=h+1) 1408 { 1409 outString = outString + "\t" + synrate_2_marginals[h][v]; 1410 } 1411 if (multiplicativeNonSynRate) 1412 { 1413 for (h=0; h<resp; h=h+1) 1414 { 1415 outString = outString + "\t" + synrate_3_marginals[h][v]; 1416 } 1417 } 1418 } 1419 } 1420 } 1421 fprintf (marginalOutFile,"\n", outString,"\n"); 1422 1423 site_posteriors = 0; 1424 } /* if rateVarModelChoice */ 1425 1426 return fromNode-1; 1427} 1428