1#include "binomial.ibf"; 2 3/*----------------------------------------------------------------------------*/ 4 5 6ChoiceList (ambChoice, "Treatment of Ambiguities",1,SKIP_NONE, 7 "Averaged","All possible resolutions are considered and averaged.", 8 "Resolved","The most frequent (for that site) resolution is chosen."); 9 10if (ambChoice<0) 11{ 12 return; 13} 14 15if (useCustomCountingBias) 16{ 17 incFileName = "Distances/CodonToolsMain.def"; 18} 19else 20{ 21 incFileName = "Distances/CodonTools.def"; 22} 23 24ExecuteCommands ("#include \""+incFileName+"\";"); 25 26SetDialogPrompt ("Please specify the first simulated file:"); 27 28DataSet dsA = ReadDataFile (PROMPT_FOR_FILE); 29FILE_BASE = LAST_FILE_PATH; 30 31iterates = 0; 32 33while (iterates<1) 34{ 35 fprintf (stdout, "\nHow many data files should I process?"); 36 fscanf (stdin, "Number", iterates); 37} 38 39it = 2; 40itidx = 0; 41 42tailEnds = -1; 43 44while ((tailEnds<=0) || (tailEnds >=.5)) 45{ 46 fprintf (stdout, "\nDistribution tails to tabulate (from 0 to 0.5)?"); 47 fscanf (stdin, "Number", tailEnds); 48} 49 50tailEnds = tailEnds * 100; 51 52ESResultMatrix = {iterates,filteredData.sites}; 53ENResultMatrix = {iterates,filteredData.sites}; 54OSResultMatrix = {iterates,filteredData.sites}; 55ONResultMatrix = {iterates,filteredData.sites}; 56dNmdS = {iterates,filteredData.sites}; 57pValuesPS = {iterates,filteredData.sites}; 58pValuesNS = {iterates,filteredData.sites}; 59 60while (1) 61{ 62 DataSetFilter filteredDataA = CreateFilter (dsA,3,"","",GeneticCodeExclusions); 63 64 HarvestFrequencies (observedCEFV,filteredData,3,3,0); 65 seqToBranchMap = {stateCharCount,1}; 66 67 DataSet dsJoint = Combine(dsA,ds); 68 DataSetFilter filteredDataJ = CreateFilter (dsJoint,3,"","",GeneticCodeExclusions); 69 70 hShift = 0; 71 72 for (k=0; k<64; k=k+1) 73 { 74 if (_Genetic_Code[k]==10) 75 { 76 hShift = hShift+1; 77 } 78 else 79 { 80 seqToBranchMap[k-hShift] = observedCEFV[k]; 81 } 82 } 83 84 observedCEFV = seqToBranchMap; 85 86 87 branchNames = BranchName (givenTree,-1); 88 h = Columns (branchNames); 89 90 seqToBranchMap = {h, 2}; 91 /* maps sequence names to branch order in column 1 92 and the other way around in column 2 */ 93 94 for (k=0; k<filteredData.species; k=k+1) 95 { 96 GetString (seqName, filteredData, k); 97 seqToBranchMap[k][0] = -1; 98 for (v=0; v<h; v=v+1) 99 { 100 if (branchNames[v] % seqName) 101 { 102 seqToBranchMap[k][0] = v; 103 seqToBranchMap[v][1] = k; 104 break; 105 } 106 } 107 } 108 109 110 seqToBranchMap[filteredData.species][0] = h-1; 111 seqToBranchMap[h-1][1] = filteredData.species; 112 113 for (k=1; k<filteredDataA.species; k=k+1) 114 { 115 GetString (seqName, filteredDataA, k); 116 seqToBranchMap[filteredData.species+k][0] = -1; 117 for (v=0; v<h; v=v+1) 118 { 119 if (branchNames[v] % seqName) 120 { 121 seqToBranchMap[k+filteredData.species][0] = v; 122 seqToBranchMap[v][1] = k+filteredData.species; 123 break; 124 } 125 } 126 } 127 128 /* total tree length */ 129 130 totalTreeLength = 0; 131 branchLengths = BranchLength(givenTree,-1); 132 133 for (v=Columns(branchLengths)-1; v>=0; v=v-1) 134 { 135 totalTreeLength = totalTreeLength + branchLengths[v]; 136 } 137 138 /* get codon matrix */ 139 140 filteredData.unique_sites = Rows(filteredData.site_freqs) * Columns(filteredData.site_freqs); 141 142 codonInfo = {filteredData.species, filteredData.unique_sites}; 143 codonInfo2 = {filteredDataA.species, filteredDataA.unique_sites}; 144 145 GetDataInfo (dupInfo, filteredData); 146 GetDataInfo (dupInfoA, filteredDataA); 147 148 matrixTrick = {1,stateCharCount}; 149 matrixTrick2 = {1,stateCharCount}; 150 151 for (h=Columns(matrixTrick)-1; h>=0; h=h-1) 152 { 153 matrixTrick [h] = h; 154 matrixTrick2 [h] = 1; 155 } 156 157 for (v=0; v<filteredData.unique_sites;v=v+1) 158 { 159 for (h=0; h<filteredData.species;h=h+1) 160 { 161 GetDataInfo (siteInfo, filteredData, h, v); 162 _SITE_ES_COUNT = matrixTrick2 * siteInfo; 163 if (_SITE_ES_COUNT[0] == 1) 164 { 165 siteInfo = matrixTrick * siteInfo; 166 codonInfo[h][v] = siteInfo[0]; 167 } 168 else 169 { 170 codonInfo[h][v] = -1; 171 } 172 } 173 } 174 175 for (v=0; v<filteredDataA.unique_sites;v=v+1) 176 { 177 for (h=0; h<filteredDataA.species;h=h+1) 178 { 179 GetDataInfo (siteInfo, filteredDataA, h, v); 180 siteInfo = matrixTrick * siteInfo; 181 codonInfo2[h][v] = siteInfo[0]; 182 } 183 } 184 185 _SITE_RESULTS = {4,filteredData.sites}; 186 flatTreeRep = Abs (givenTree); 187 188 resultMatrix = {filteredData.sites,4}; 189 190 for (v=0; v<filteredData.sites;v=v+1) 191 /*for (v=0; v<1;v=v+1)*/ 192 { 193 _SITE_ES_COUNT = {stateCharCount,stateCharCount}; 194 _SITE_EN_COUNT = {stateCharCount,stateCharCount}; 195 _SITE_OS_COUNT = {stateCharCount,stateCharCount}; 196 _SITE_ON_COUNT = {stateCharCount,stateCharCount}; 197 198 /* do internal nodes first */ 199 200 k = filteredData.species+1; 201 202 /* first sequence is always the root */ 203 c1 = dupInfoA[v]; 204 if (codonInfo2[1][c1] >= stateCharCount) 205 { 206 continue; 207 } 208 for (h=1; h<filteredDataA.species; h=h+1) 209 { 210 p1 = seqToBranchMap[k][0]; 211 p2 = flatTreeRep[p1]; 212 p2 = seqToBranchMap[p2][1]-filteredData.species; 213 214 branchFactor = branchLengths[p1]/totalTreeLength; 215 216 cd1 = codonInfo2[h] [c1]; 217 cd2 = codonInfo2[p2][c1]; 218 219 _SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1; 220 _SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1; 221 _SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor; 222 _SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor; 223 224 k=k+1; 225 } 226 227 /* now do the leaves */ 228 229 observedCEFV = {{0}}; 230 231 c2 = dupInfo[v]; 232 for (h=0; h<filteredData.species; h=h+1) 233 { 234 p1 = seqToBranchMap[h][0]; 235 p2 = flatTreeRep[p1]; 236 p2 = seqToBranchMap[p2][1]-filteredData.species; 237 238 cd2 = codonInfo2[p2][c1]; 239 cd1 = codonInfo[h] [c2]; 240 241 branchFactor = branchLengths[p1]/totalTreeLength; 242 243 if (cd1>=0) 244 /* no ambiguities */ 245 { 246 _SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1; 247 _SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1; 248 _SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor; 249 _SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor; 250 } 251 else 252 /* ambiguities here */ 253 { 254 GetDataInfo (ambInfo, filteredData, h, c2); 255 if (Rows(observedCEFV) == 1) 256 { 257 siteFilter = ""+(v*3)+"-"+(v*3+2); 258 DataSetFilter filteredDataSite = CreateFilter (dsJoint,3,siteFilter,"",GeneticCodeExclusions); 259 HarvestFrequencies (observedCEFV,filteredDataSite,3,3,0); 260 tempMx = {stateCharCount,1}; 261 262 hShift = 0; 263 264 for (k=0; k<64; k=k+1) 265 { 266 if (_Genetic_Code[k]==10) 267 { 268 hShift = hShift+1; 269 } 270 else 271 { 272 tempMx[k-hShift] = observedCEFV[k]; 273 } 274 } 275 observedCEFV = tempMx; 276 } 277 278 ambInfo = ambInfo$observedCEFV; 279 if (ambChoice) 280 { 281 weightFactor = 0; 282 tempMx = -1; 283 for (k=0; k<stateCharCount; k=k+1) 284 { 285 if (ambInfo[k]>weightFactor) 286 { 287 weightFactor = ambInfo[k]; 288 tempMx = k; 289 } 290 } 291 if (tempMx>=0) 292 { 293 _SITE_OS_COUNT[tempMx][cd2] = _SITE_OS_COUNT[tempMx][cd2] + 1; 294 _SITE_ON_COUNT[tempMx][cd2] = _SITE_ON_COUNT[tempMx][cd2] + 1 ; 295 _SITE_ES_COUNT[tempMx][cd2] = _SITE_ES_COUNT[tempMx][cd2] + branchFactor; 296 _SITE_EN_COUNT[tempMx][cd2] = _SITE_EN_COUNT[tempMx][cd2] + branchFactor; 297 } 298 } 299 else 300 { 301 weightFactor = matrixTrick2*ambInfo; 302 weightFactor = weightFactor[0]; 303 if (weightFactor) 304 { 305 ambInfo = ambInfo * (1/weightFactor); 306 for (k=0; k<stateCharCount; k=k+1) 307 { 308 weightFactor = ambInfo[k]; 309 if (weightFactor>0) 310 { 311 _SITE_OS_COUNT[k][cd2] = _SITE_OS_COUNT[k][cd2] + weightFactor; 312 _SITE_ON_COUNT[k][cd2] = _SITE_ON_COUNT[k][cd2] + weightFactor; 313 _SITE_ES_COUNT[k][cd2] = _SITE_ES_COUNT[k][cd2] + weightFactor*branchFactor; 314 _SITE_EN_COUNT[k][cd2] = _SITE_EN_COUNT[k][cd2] + weightFactor*branchFactor; 315 } 316 } 317 } 318 } 319 } 320 } 321 322 _SITE_OS_COUNT = matrixTrick2*(_OBSERVED_S_$_SITE_OS_COUNT)*Transpose(matrixTrick2); 323 _SITE_ON_COUNT = matrixTrick2*(_OBSERVED_NS_$_SITE_ON_COUNT)*Transpose(matrixTrick2); 324 _SITE_ES_COUNT = matrixTrick2*(_PAIRWISE_S_$_SITE_ES_COUNT)*Transpose(matrixTrick2); 325 _SITE_EN_COUNT = matrixTrick2*(_PAIRWISE_NS_$_SITE_EN_COUNT)*Transpose(matrixTrick2); 326 327 OSResultMatrix[itidx][v] = _SITE_OS_COUNT[0]; 328 ONResultMatrix[itidx][v] = _SITE_ON_COUNT[0]; 329 ESResultMatrix[itidx][v] = _SITE_ES_COUNT[0]; 330 ENResultMatrix[itidx][v] = _SITE_EN_COUNT[0]; 331 332 weight = _SITE_EN_COUNT[0]+_SITE_ES_COUNT[0]; 333 334 p = _SITE_ES_COUNT[0]/weight; 335 p2 = OSResultMatrix[itidx][v]+ONResultMatrix[itidx][v]; 336 ds = OSResultMatrix[itidx][v]/ESResultMatrix[itidx][v]; 337 dn = ONResultMatrix[itidx][v]/ENResultMatrix[itidx][v]; 338 339 if (ESResultMatrix[itidx][v]) 340 { 341 dNmdS[itidx][v] = dn-ds; 342 } 343 344 if (p2) 345 { 346 pv = extendedBinTail (p2,p,OSResultMatrix[itidx][v]); 347 if (OSResultMatrix[itidx][v]>=1) 348 { 349 pv2 = 1-pv+(pv-extendedBinTail(p2,p,OSResultMatrix[itidx][v]-1)); 350 } 351 else 352 { 353 pv2 = 1-pv+(pv-extendedBinTail (p2,p,0)); 354 } 355 pValuesPS[itidx][v] = pv; 356 pValuesNS[itidx][v] = pv2; 357 } 358 } 359 if (it <= iterates) 360 { 361 fprintf (stdout, "\nIterate ", it, "/", iterates); 362 fName = FILE_BASE+"_"+it; 363 DataSet dsA = ReadDataFile (fName); 364 it = it+1; 365 itidx = itidx + 1; 366 } 367 else 368 { 369 break; 370 } 371} 372 373fprintf (stdout, "\n"); 374 375/* generate medians and tailEnd% and (100-tailEnd)% profiles */ 376 377statisticalOverview = {filteredData.sites,12}; 378 379tailEnd1 = " "+tailEnds+"%"; 380tailEnd2 = " "+(100-tailEnds)+"%"; 381 382sovLabels = {{"","ES Median","","","EN Median","","","OS Median","","","ON Median",""}}; 383 384sovLabels[0] = "ES"+tailEnd1; 385sovLabels[2] = "ES"+tailEnd2; 386sovLabels[3] = "EN"+tailEnd1; 387sovLabels[5] = "EN"+tailEnd2; 388sovLabels[6] = "OS"+tailEnd1; 389sovLabels[8] = "OS"+tailEnd2; 390sovLabels[9] = "ON"+tailEnd1; 391sovLabels[11] = "ON"+tailEnd2; 392 393ColumnToSort = {iterates,1}; 394 395lowBar = (iterates*tailEnds*0.01)$1; 396highBar = (iterates*(100-tailEnds)*0.01)$1; 397 398/*if ((iterates*9)%10==0) 399{ 400 highBar = highBar-1; 401}*/ 402 403median = iterates$2; 404doAv = (iterates%2==0); 405 406for (v=0;v<filteredData.sites;v=v+1) 407{ 408 for (h=0;h<iterates;h=h+1) 409 { 410 ColumnToSort[h]=ESResultMatrix[h][v]; 411 } 412 ColumnToSort = ColumnToSort%0; 413 statisticalOverview[v][0] = ColumnToSort[lowBar]; 414 statisticalOverview[v][2] = ColumnToSort[highBar]; 415 if (doAv) 416 { 417 statisticalOverview[v][1] = (ColumnToSort[median]+ColumnToSort[median+1])/2; 418 } 419 else 420 { 421 statisticalOverview[v][1] = ColumnToSort[median]; 422 } 423} 424 425for (v=0;v<filteredData.sites;v=v+1) 426{ 427 for (h=0;h<iterates;h=h+1) 428 { 429 ColumnToSort[h]=ENResultMatrix[h][v]; 430 } 431 /*dummy = doTheSort (iterates);*/ 432 ColumnToSort = ColumnToSort%0; 433 statisticalOverview[v][3] = ColumnToSort[lowBar]; 434 statisticalOverview[v][5] = ColumnToSort[highBar]; 435 if (doAv) 436 { 437 statisticalOverview[v][4] = (ColumnToSort[median]+ColumnToSort[median+1])/2; 438 } 439 else 440 { 441 statisticalOverview[v][4] = ColumnToSort[median]; 442 } 443} 444 445for (v=0;v<filteredData.sites;v=v+1) 446{ 447 for (h=0;h<iterates;h=h+1) 448 { 449 ColumnToSort[h]=OSResultMatrix[h][v]; 450 } 451 ColumnToSort = ColumnToSort%0; 452 statisticalOverview[v][6] = ColumnToSort[lowBar]; 453 statisticalOverview[v][8] = ColumnToSort[highBar]; 454 if (doAv) 455 { 456 statisticalOverview[v][7] = (ColumnToSort[median]+ColumnToSort[median+1])/2; 457 } 458 else 459 { 460 statisticalOverview[v][7] = ColumnToSort[median]; 461 } 462} 463 464for (v=0;v<filteredData.sites;v=v+1) 465{ 466 for (h=0;h<iterates;h=h+1) 467 { 468 ColumnToSort[h]=ONResultMatrix[h][v]; 469 } 470 ColumnToSort = ColumnToSort%0; 471 statisticalOverview[v][9] = ColumnToSort[lowBar]; 472 statisticalOverview[v][11] = ColumnToSort[highBar]; 473 if (doAv) 474 { 475 statisticalOverview[v][10] = (ColumnToSort[median]+ColumnToSort[median+1])/2; 476 } 477 else 478 { 479 statisticalOverview[v][10] = ColumnToSort[median]; 480 } 481} 482 483selLabelMatrix = {1,filteredData.sites}; 484 485for (h=1; h<=filteredData.sites;h=h+1) 486{ 487 selLabelMatrix[h-1] = "Site " + h; 488} 489 490OpenWindow (CHARTWINDOW,{{"Simulated ES"} 491 {"selLabelMatrix"}, 492 {"ESResultMatrix"}, 493 {"None"}, 494 {"Index"}, 495 {""}, 496 {""}, 497 {""}, 498 {""}, 499 {"0"}}, 500 "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;15;30"); 501 502OpenWindow (CHARTWINDOW,{{"Simulated EN"} 503 {"selLabelMatrix"}, 504 {"ENResultMatrix"}, 505 {"None"}, 506 {"Index"}, 507 {""}, 508 {""}, 509 {""}, 510 {""}, 511 {"0"}}, 512 "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;30+(SCREEN_WIDTH-60)/2;30"); 513 514OpenWindow (CHARTWINDOW,{{"Simulated OS"} 515 {"selLabelMatrix"}, 516 {"OSResultMatrix"}, 517 {"None"}, 518 {"Index"}, 519 {""}, 520 {""}, 521 {""}, 522 {""}, 523 {"0"}}, 524 "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;15;45+(SCREEN_HEIGHT-60)/2"); 525 526OpenWindow (CHARTWINDOW,{{"Simulated ON"} 527 {"selLabelMatrix"}, 528 {"ONResultMatrix"}, 529 {"None"}, 530 {"Index"}, 531 {""}, 532 {""}, 533 {""}, 534 {""}, 535 {"0"}}, 536 "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;30+(SCREEN_WIDTH-60)/2;45+(SCREEN_HEIGHT-60)/2"); 537 538OpenWindow (CHARTWINDOW,{{"Summary"} 539 {"sovLabels"}, 540 {"statisticalOverview"}, 541 {"Line Plot"}, 542 {"Index"}, 543 {""}, 544 {""}, 545 {""}, 546 {""}, 547 {"0"}}, 548 "(SCREEN_WIDTH-200);(SCREEN_HEIGHT-200);100;100"); 549 550