1/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 2 3function echoCatVar (ps,values) 4{ 5 DD = Rows(ps); 6 EE = 0.0; 7 sampleVar = 0.0; 8 9 for (k=0; k<DD; k=k+1) 10 { 11 EE = ps[k]*values[k]+EE; 12 sampleVar = sampleVar+ps[k]*values[k]*values[k]; 13 } 14 15 sampleVar = sampleVar-EE*EE; 16 17 fprintf (stdout, "Mean = ",EE, 18 "\nVariance = ",sampleVar, 19 "\nCOV = ", Sqrt(sampleVar)/EE,"\n"); 20 21 for (k=0; k<DD; k=k+1) 22 { 23 fprintf (stdout,"\nRate[",Format(k,0,0),"]=",Format(values[k],12,8), " (weight=", 24 Format(ps[k],9,7),")"); 25 } 26 return EE; 27} 28 29/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 30 31function echoCovariance (ps,values1,values2) 32{ 33 DD = Rows(ps); 34 EE = 0.0; 35 EE2 = 0.0; 36 sampleVar = 0.0; 37 sampleVar2 = 0.0; 38 39 for (k=0; k<DD; k=k+1) 40 { 41 EE = ps[k]*values1[k]+EE; 42 EE2 = ps[k]*values2[k]+EE2; 43 sampleVar = sampleVar+ps[k]*values1[k]*values1[k]; 44 sampleVar2 = sampleVar2+ps[k]*values2[k]*values2[k]; 45 } 46 47 sampleVar = sampleVar-EE*EE; 48 sampleVar2 = sampleVar2-EE2*EE2; 49 50 cov = 0; 51 cov2 = 0; 52 for (k=0; k<DD; k=k+1) 53 { 54 cov = cov + ps[k]*(values1[k]-EE); 55 cov2 = cov2 + ps[k]*(values2[k]-EE2); 56 } 57 58 return cov*cov2/Sqrt(sampleVar*sampleVar2); 59} 60 61/*---------------------------------------------------------------------------------------------------------------------------------------------------*/ 62 63//ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); 64//ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"PS_Plotters.bf"); 65#include "TemplateModels/chooseGeneticCode.def"; 66#include "Utility/PS_Plotters.bf"; 67 68 69SetDialogPrompt ("Choose a model fit:"); 70 71ExecuteAFile (PROMPT_FOR_FILE); 72 73 74 75basePath = LAST_FILE_PATH; 76 77 78 79GetInformation (vars,"^P_[0-9]+"); 80 81 82rateCount = Columns (vars)+1; 83 84 85GetString (lfInfo, lf, -1); 86fileCount = Columns(lfInfo["Trees"])/rateCount; 87 88fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount, " rates\n"); 89 90ps = {rateCount,1}; 91rateInfo = {rateCount,4}; 92rateInfo = rateInfo["1"]; 93 94for (mi = 0; mi < rateCount-1; mi=mi+1) 95{ 96 ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";"); 97} 98 99for (mi = 0; mi < rateCount; mi=mi+1) 100{ 101 ExecuteCommands ("rateInfo[mi][1]="+"S_"+mi+"/c_scale;"); 102 ExecuteCommands ("rateInfo[mi][2]="+"NS_"+mi+"/c_scale;"); 103 rateInfo[mi][3] = rateInfo[mi][2]-rateInfo[mi][1]; 104} 105 106 107for (mi=0; mi<rateCount-1; mi=mi+1) 108{ 109 for (mi2 = 0; mi2 < mi; mi2=mi2+1) 110 { 111 rateInfo[mi][0] = rateInfo[mi][0] * (1-ps[mi2]); 112 } 113 rateInfo[mi][0] = rateInfo[mi][0] * ps[mi]; 114} 115 116 117for (mi2 = 0; mi2 < mi; mi2=mi2+1) 118{ 119 rateInfo[mi][0] = rateInfo[mi][0] * (1-ps[mi2]); 120} 121 122fprintf (stdout, "\n\n1).Synonymous rates:\n\n"); 123echoCatVar (rateInfo[-1][0],rateInfo[-1][1]); 124fprintf (stdout, "\n\n2).Non-synonymous rates:\n\n"); 125echoCatVar (rateInfo[-1][0],rateInfo[-1][2]); 126 127columnHeaders = {{"P","dS","dN","dN-dS"}}; 128 129 130 131 132 133ConstructCategoryMatrix (cm, lf, COMPLETE); 134 135site_count = Columns (cm)/rateCount; 136posteriorProbs = {site_count, rateCount}; 137columnHeaders = {1,rateCount}; 138 139for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1) 140{ 141 columnHeaders[rate_enumerator] = "Rate " + (rate_enumerator+1); 142} 143 144for (site_enumerator = 0; site_enumerator < site_count; site_enumerator = site_enumerator + 1) 145{ 146 sum = 0; 147 148 smallestScaler = 1e100; 149 150 for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1) 151 { 152 smallestScaler = Min(smallestScaler,cm.site_scalers[rate_enumerator*site_count+site_enumerator]); 153 } 154 155 for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1) 156 { 157 v = cm[rate_enumerator*site_count+site_enumerator] * rateInfo[rate_enumerator][0] * Exp(cm.log_scale_multiplier*(smallestScaler-cm.site_scalers[rate_enumerator*site_count+site_enumerator])); 158 posteriorProbs[site_enumerator] [rate_enumerator]= v; 159 sum = sum + v; 160 } 161 162 for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1) 163 { 164 posteriorProbs[site_enumerator] [rate_enumerator]= posteriorProbs[site_enumerator] [rate_enumerator]/sum; 165 } 166} 167 168distrInfo = ""; 169distrInfo * 128; 170distrInfo * "dN_minus_dS"; 171for (k = 0; k <= mi; k = k+1) 172{ 173 distrInfo * (":" + rateInfo[k][0]); 174} 175for (k = 0; k <= mi; k = k+1) 176{ 177 distrInfo * (":" + rateInfo[k][3]); 178} 179distrInfo * 0; 180 181{"SmallCodon_part_Categ:0.25:0.25:0.25:0.25:0.00116849:0.0498505:0.447572:3.50141"} 182 183 184 185 186nonStopCount = 0; 187 188for (h1 = 0; h1<64; h1=h1+1) 189{ 190 if (_Genetic_Code[h1]!=10) 191 { 192 nonStopCount = nonStopCount + 1; 193 } 194} 195 196/* make syn and non-syn template matrices */ 197 198ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Distances"+DIRECTORY_SEPARATOR+"CodonTools.def"); 199 200for (fileID = 1; fileID <= fileCount; fileID = fileID+1) 201{ 202 fprintf (stdout, "\n\nWorking on file ", fileID, "\n\n"); 203 ExecuteCommands ("codonCount=filteredData_"+fileID+".sites;"); 204 205 sSites = 0; 206 nsSites = 0; 207 208 synM = {nonStopCount,nonStopCount}; 209 nonSynM = {nonStopCount,nonStopCount}; 210 211 vertOnes = {nonStopCount,1}; 212 horOnes = {1,nonStopCount}; 213 214 for (h1 = 0; h1<nonStopCount; h1=h1+1) 215 { 216 vertOnes [h1] = 1; 217 horOnes [h1] = 1; 218 } 219 220 hShift = 0; 221 for (h1 = 0; h1 < 64; h1=h1+1) 222 { 223 gc1 = _Genetic_Code[h1]; 224 if (gc1 == 10) 225 { 226 hShift = hShift+1; 227 } 228 else 229 { 230 sSites = sSites + codonCount * _S_NS_POSITIONS_[0][h1] * vectorOfFrequencies[h1-hShift]; 231 nsSites = nsSites + codonCount * _S_NS_POSITIONS_[1][h1] * vectorOfFrequencies[h1-hShift]; 232 233 vShift = hShift; 234 for (v1 = h1+1; v1 < 64; v1=v1+1) 235 { 236 gc2 = _Genetic_Code[v1]; 237 if (gc2 == 10) 238 { 239 vShift = vShift + 1; 240 } 241 else 242 { 243 if (gc1 == gc2) 244 { 245 synM [h1-hShift][v1-vShift] = vectorOfFrequencies[h1-hShift]; 246 synM [v1-vShift][h1-hShift] = vectorOfFrequencies[v1-vShift]; 247 } 248 else 249 { 250 nonSynM [h1-hShift][v1-vShift] = vectorOfFrequencies[h1-hShift]; 251 nonSynM [v1-vShift][h1-hShift] = vectorOfFrequencies[v1-vShift]; 252 } 253 } 254 } 255 } 256 } 257 258 259 ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TreeTools.ibf"); 260 261 fprintf (stdout, "\nTotal nucleotide sites :", codonCount*3, 262 "\nSynonymous sites :", sSites, 263 "\nNonsynonymous sites :", nsSites, "\n"); 264 265 sSites = codonCount/sSites; 266 nsSites = codonCount/nsSites; 267 268 ExecuteCommands ("branchNames = BranchName (tree_"+fileID+"_0,-1);"); 269 T = Columns (branchNames); 270 271 synSubsAVL = {}; 272 dSAVL = {}; 273 nsSubsAVL = {}; 274 dNAVL = {}; 275 276 for (treeCounter = 0; treeCounter < rateCount; treeCounter = treeCounter + 1) 277 { 278 for (h1=0; h1 < T-1; h1=h1+1) 279 { 280 abn = branchNames[h1]; 281 ExecuteCommands("GetInformation (aRateMx, tree_"+fileID+"_"+treeCounter+"."+abn+");"); 282 synSubs = (horOnes*(aRateMx$synM))*vertOnes; 283 nsynSubs = (horOnes*(aRateMx$nonSynM))*vertOnes; 284 synSubs = synSubs[0]/3; 285 nsynSubs = nsynSubs[0]/3; 286 287 synSubsAVL[abn] = synSubsAVL[abn] + synSubs*rateInfo[treeCounter][0]; 288 nsSubsAVL [abn] = nsSubsAVL [abn] + nsynSubs*rateInfo[treeCounter][0]; 289 dSAVL[abn] = dSAVL[abn] + synSubs *sSites*rateInfo[treeCounter][0]; 290 dNAVL[abn] = dNAVL[abn] + nsynSubs*nsSites*rateInfo[treeCounter][0]; 291 } 292 } 293 294 ExecuteCommands ("treeAVL = tree_"+fileID+"_0^0"); 295 296 synTreeString = PostOrderAVL2StringDistances (treeAVL, synSubsAVL); 297 nonSynTreeString = PostOrderAVL2StringDistances (treeAVL, nsSubsAVL); 298 dSTreeString = PostOrderAVL2StringDistances (treeAVL, dSAVL); 299 dNTreeString = PostOrderAVL2StringDistances (treeAVL, dNAVL); 300 301 302 /*fprintf (stdout, "\nE[Syn subs/nucleotide site] tree: \n\t", synTreeString, "\n"); 303 fprintf (stdout, "\nE[Non-syn subs/nucleotide site] tree: \n\t", nonSynTreeString, "\n"); 304 fprintf (stdout, "\ndS tree: \n\t", dSTreeString, "\n"); 305 fprintf (stdout, "\ndN tree: \n\t", dNTreeString, "\n");*/ 306 307 UseModel (USE_NO_MODEL); 308 309 ExecuteCommands ("Tree synSubsTree_"+fileID+"= synTreeString;Tree nonsynSubsTree_"+fileID+" = nonSynTreeString;Tree dSTree_"+fileID+" = dSTreeString;Tree dNTree_"+fileID+" = dNTreeString;"); 310 311 ProcessATree ("synSubsTree_"+fileID); 312 ProcessATree ("nonsynSubsTree_"+fileID); 313 ProcessATree ("dSTree_"+fileID); 314 ProcessATree ("dNTree_"+fileID); 315 316 mxTreeSpec = {5,1}; 317 318 mxTreeSpec [0] = "nonsynSubsTree_"+fileID; 319 mxTreeSpec [3] = ""; 320 mxTreeSpec [4] = "Inferred_Tree."+nodeName; 321 mxTreeSpec [1] = "8211"; 322 mxTreeSpec [2] = ""; 323 324 325 326 327 mxTreeSpec [0] = "synSubsTree_"+fileID; 328 329 330 mxTreeSpec [0] = "dSTree_"+fileID; 331 332 333 mxTreeSpec [0] = "dNTree_"+fileID; 334 335} 336 337logRates = {rateCount,3}; 338epsilon = Exp(-4); 339for (mi = 0; mi < rateCount; mi=mi+1) 340{ 341 logRates[mi][0] = Min(Log(rateInfo[mi][1]+epsilon),4); 342 logRates[mi][1] = Min(Log(rateInfo[mi][2]+epsilon),4); 343 logRates[mi][2] = rateInfo[mi][0]; 344} 345 346psCode = ScaledDensityPlot ("logRates", {{-4,4}{-4,4}}, "Courier", {{400,400,14,40}}, 347 "_dNdSDensityPlot", 348 {{"","log (alpha)", "log (beta)"}}, 1, 1); 349 350psFile = basePath + ".ps"; 351fprintf (psFile, CLEAR_FILE, psCode); 352 353 354/*---------------------------------------------------------*/ 355 356function ProcessATree (treeName) 357{ 358 ExecuteCommands ("treeAVL2 = "+treeName + " ^ 0;leafCount=TipCount("+treeName+");"); 359 360 multFactors = {}; 361 for (k=1; k<Abs(treeAVL2); k=k+1) 362 { 363 aNode = treeAVL2[k]; 364 aNodeName = aNode["Name"]; 365 parentIndex = aNode["Parent"]; 366 k2 = Abs(aNode["Children"]); 367 if (k2) 368 { 369 currentDepth = aNode["Below"]; 370 multFactors[aNodeName] = currentDepth; 371 if (parentIndex > 0) 372 { 373 pInfo = treeAVL2[parentIndex]; 374 pInfo ["Below"] = pInfo ["Below"] + currentDepth; 375 treeAVL2[parentIndex] = pInfo; 376 } 377 } 378 else 379 { 380 multFactors[aNodeName] = 1; 381 pInfo = treeAVL2[parentIndex]; 382 pInfo ["Below"] = pInfo ["Below"] + 1; 383 treeAVL2[parentIndex] = pInfo; 384 } 385 386 } 387 388 pKeys = Rows(multFactors); 389 390 for (k=0; k<Columns(pKeys); k=k+1) 391 { 392 aNodeName = pKeys[k]; 393 multFactors[aNodeName] = multFactors[aNodeName] * (leafCount-multFactors[aNodeName]); 394 } 395 396 divInfo = computeTotalDivergence (treeName); 397 pInfo = 2*divInfo[0]/leafCount/(leafCount-1); 398 currentDepth = divInfo[1]/(Abs(treeAVL2)-2); 399 400 fprintf (stdout, "Mean pairwise divergence for ",treeName, " is ", pInfo, "\n"); 401 fprintf (stdout, "Mean branch length for ", treeName, " is ", currentDepth, "\n"); 402 return 0; 403} 404 405/*---------------------------------------------------------*/ 406 407function computeTotalDivergence (treeID) 408{ 409 ExecuteCommands ("bNames = BranchName ("+treeID+",-1);"); 410 ExecuteCommands ("bLen = BranchLength ("+treeID+",-1);"); 411 412 sum = 0; 413 sum2 = 0; 414 415 for (k=0; k<Columns(bNames); k=k+1) 416 { 417 aNodeName = bNames[k]; 418 sum = sum + bLen[k]*multFactors[aNodeName]; 419 sum2 = sum2 + bLen[k]; 420 } 421 return {{sum,sum2}}; 422} 423