1RequireVersion ("2.00.20110101"); 2 3ExecuteAFile (HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def"); 4ExecuteAFile (HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"2RatesAnalyses"+DIRECTORY_SEPARATOR+"MG94xREVxBivariate.mdl"); 5 6s_rateMin = 0.01; 7s_rateMax = 1.00; 8 9ns_rateMin = 0.01; 10ns_rateMax = 5.00; 11 12SetDialogPrompt ("Choose a model fit:"); 13ExecuteAFile (PROMPT_FOR_FILE); 14outFile = LAST_FILE_PATH; 15GetInformation(vars,"^P_[0-9]+"); 16rateCount = Columns (vars)+2; 17 18GetString (lfInfo, lf, -1); 19fileCount = Columns(lfInfo["Trees"])/(rateCount-1); 20 21LF_NEXUS_EXPORT_EXTRA = "bivariateFitHasMultipleCladeRates = " + bivariateFitHasMultipleCladeRates + ";"; 22 23fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount-1, " rates\n"); 24 25timerCap = 0; 26while (timerCap < 1) 27{ 28 fprintf (stdout, "\nSeconds (x3) to spend looking for directional improvement in fit (>=1, or -1 to search a fixed grid)?"); 29 fscanf (stdin, "Number",timerCap); 30 if (timerCap == (-1)) 31 { 32 break; 33 } 34} 35 36if (timerCap < 0) 37{ 38 fprintf (stdout, "Sampling 355 directions...\n"); 39} 40else 41{ 42 fprintf (stdout, "Allocated ", timerCap*3, " seconds for approximate search...\n"); 43} 44 45LFCompute (lf,LF_START_COMPUTE); 46LFCompute (lf,res); 47LFCompute (lf,LF_DONE_COMPUTE); 48 49fprintf (stdout, "\nBaseline likelihood:", res); 50 51totalCodonCount = 0; 52for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 53{ 54 ExecuteCommands ("totalCodonCount = totalCodonCount + filteredData_"+fileID+".sites;"); 55} 56observedFreq = {4,3}; 57for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 58{ 59 ExecuteCommands ("HarvestFrequencies (tp, filteredData_"+fileID+",3,1,1);cfs=filteredData_"+fileID+".sites;"); 60 observedFreq = observedFreq + tp*(cfs/totalCodonCount); 61} 62 63ExecuteCommands ("global S_"+(rateCount-1)+"=0.5; global NS_"+(rateCount-1)+"=0.5;global P_"+(rateCount-1)+"=0.5;P_"+(rateCount-1)+":<1;"); 64ExecuteCommands("PopulateModelMatrix(\"rate_matrix_"+(rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"NS_\"+(rateCount-1)+\"/c_scale\");"); 65ExecuteCommands("Model MG94MODEL_"+(rateCount-1)+"= (rate_matrix_"+(rateCount-1)+",vectorOfFrequencies,0);global P_"+(rateCount-1)+"=1/"+totalCodonCount+";"); 66 67for (_modelID = 1; _modelID < bivariateFitHasMultipleCladeRates; _modelID += 1) 68{ 69 _cladeRate = "clade_" + _modelID + "_NS_" + (rateCount-1); 70 ExecuteCommands ("global " + _cladeRate + "= 1;"); 71 ExecuteCommands("PopulateModelMatrix(\"rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"" + _cladeRate + "*NS_\"+(rateCount-1)+\"/c_scale\");"); 72 ExecuteCommands("Model MG94MODEL_CLADE"+_modelID + "_"+ (rateCount-1)+"= (rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+",vectorOfFrequencies,0);"); 73} 74 75for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 76{ 77 ExecuteCommands ("treeString = \"\"+ tree_"+ fileID+ "_0;"); 78 ExecuteCommands ("Tree tree_" + fileID + "_" + (rateCount-1) + "= treeString;"); 79 ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.synRate\",tree_"+fileID+"_"+(rateCount-1)+",tree_"+fileID+"_0);"); 80} 81 82lfDef = {}; 83 84for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 85{ 86 lfDef[fileID] = ""; 87 lfDef[fileID] * 1024; 88 lfDef[fileID] * "Log("; 89} 90 91ps = {rateCount,1}; 92 93for (mi = 0; mi < rateCount -2; mi=mi+1) 94{ 95 ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";"); 96} 97 98scaler = 1-1/totalCodonCount; 99freqValueMatrix = {rateCount,1}; 100freqStrMatrix = {rateCount,1}; 101freqValueMatrix = freqValueMatrix["scaler"]; 102 103for (mi=0; mi<rateCount-2; mi=mi+1) 104{ 105 freqStrMatrix[mi] = ""; 106 for (mi2 = 0; mi2 < mi; mi2=mi2+1) 107 { 108 freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]); 109 freqStrMatrix[mi] = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*"; 110 } 111 freqValueMatrix[mi] = freqValueMatrix[mi] * ps[mi]; 112 freqStrMatrix[mi] = freqStrMatrix[mi]+"P_"+(mi+1)+""; 113} 114 115freqStrMatrix[rateCount-2] = ""; 116freqStrMatrix[rateCount-1] = ""; 117 118 119for (mi2 = 0; mi2 < mi; mi2=mi2+1) 120{ 121 freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]); 122 freqStrMatrix[mi] = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*"; 123} 124 125 126freqStrMatrix[rateCount-1] = freqStrMatrix[rateCount-2]+"(1-P_"+(rateCount-1)+")"; 127freqStrMatrix[rateCount-2] = freqStrMatrix[rateCount-2]+"P_"+(rateCount-1); 128freqValueMatrix[rateCount-1] = 1-scaler; 129 130/* now solve for P_.. */ 131 132P_1 = freqValueMatrix[0]; 133runningTerm = 1-P_1; 134for (mi2 = 2; mi2 < rateCount-1; mi2=mi2+1) 135{ 136 ExecuteCommands ("P_"+mi2+"=freqValueMatrix[mi2-1]/runningTerm;runningTerm=runningTerm*(1-P_"+mi2+");"); 137} 138 139if (rateCount>2) 140{ 141 ExecuteCommands ("P_"+(rateCount-1)+"=1-freqValueMatrix[rateCount-1]/runningTerm;"); 142} 143 144lfParts = ""; 145lfParts * 128; 146lfParts * "LikelihoodFunction lf = ("; 147c_scalerS = ""; 148 149for (mi=0; mi<rateCount; mi=mi+1) 150{ 151 if (mi) 152 { 153 for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 154 { 155 lfDef[fileID] * "+"; 156 } 157 c_scalerS = c_scalerS+"+"; 158 } 159 160 for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 161 { 162 lfDef[fileID]*(""+freqStrMatrix[mi]+"*SITE_LIKELIHOOD["+(rateCount*fileID+mi)+"]"); 163 } 164 165 c_scalerS = c_scalerS + freqStrMatrix[mi] + "*S_"+ mi; 166} 167 168for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 169{ 170 for (mi=0; mi<rateCount; mi=mi+1) 171 { 172 if (mi || fileID) 173 { 174 lfParts * ","; 175 } 176 lfParts * ("filteredData_"+(fileID+1)+",tree_"+(fileID+1)+"_"+mi); 177 } 178} 179 180lfParts * ",\""; 181 182for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 183{ 184 lfDef[fileID] * 0; 185 if (fileID) 186 { 187 lfParts * "+"; 188 } 189 lfParts * (lfDef[fileID] + ")"); 190} 191 192lfParts * "\");"; 193lfParts * 0; 194 195ExecuteCommands ("c_scale:="+c_scalerS+";"); 196ExecuteCommands (lfParts); 197 198timer = Time(0); 199 200LFCompute (lf,LF_START_COMPUTE); 201bestDiff = 0; 202bestS = 0; 203bestNS = 0; 204bestAlpha = 0; 205bestBeta = 0; 206 207sampleCounter = 0; 208 209fprintf (stdout, "\nSampling negatively selected directions\n"); 210 211if (timerCap > 0) 212{ 213 while (Time(0)-timer <= timerCap) 214 { 215 v1 = Random(s_rateMin,s_rateMax); 216 v2 = Random(ns_rateMin,v1); 217 218 checkASample (0); 219 } 220} 221else 222{ 223 step = 1/16; 224 v1 = 0; 225 for (v1c = 0; v1c < 15; v1c = v1c + 1) 226 { 227 v1 = v1+step; 228 v2 = step/2; 229 for (v2c = 0; v2c < v1c; v2c = v2c+1) 230 { 231 checkASample (0); 232 v2 = v2 + step; 233 } 234 } 235} 236 237timer = Time(0); 238fprintf (stdout, "\nSampling neutral directions\n"); 239if (timerCap > 0) 240{ 241 while (Time(0)-timer <= timerCap) 242 { 243 v1 = Random(s_rateMin,s_rateMax); 244 v2 = v1; 245 246 checkASample (0); 247 } 248} 249else 250{ 251 step = 0.02; 252 v1 = 0; 253 for (v1c = 0; v1c < 50; v1c = v1c + 1) 254 { 255 v1 = v1 + step; 256 v2 = v1; 257 258 checkASample (0); 259 } 260} 261 262timer = Time(0); 263fprintf (stdout, "\nSampling positively selected directions\n"); 264if (timerCap > 0) 265{ 266 while (Time(0)-timer <= timerCap) 267 { 268 v1 = Random(s_rateMin,s_rateMax); 269 v2 = Random(v1,ns_rateMax); 270 checkASample (0); 271 } 272} 273else 274{ 275 step = 0.1; 276 step2 = 0.25; 277 278 v1 = 0.05; 279 for (v1c = 0; v1c < 10; v1c = v1c + 1) 280 { 281 v2 = v1+step; 282 for (v2c = 0; v2c < 20; v2c = v2c+1) 283 { 284 checkASample (0); 285 v2 = v2 + step2; 286 } 287 v1 = v1+step; 288 } 289} 290 291LFCompute (lf,LF_DONE_COMPUTE); 292 293fprintf (stdout, "\nExamined ", sampleCounter, " directions...\n"); 294 295if (bestDiff > 0) 296{ 297 fprintf (stdout, "\nFound a likelihood improvement in the direction (", bestS, ",", bestNS, ")\n"); 298 slfo = LIKELIHOOD_FUNCTION_OUTPUT; 299 LIKELIHOOD_FUNCTION_OUTPUT = 7; 300 outFile = outFile+"."+rateCount; 301 ExecuteCommands ("S_"+(rateCount-1)+"=bestAlpha;NS_"+(rateCount-1)+"=bestBeta;"); 302 fprintf(outFile,CLEAR_FILE,lf); 303 LIKELIHOOD_FUNCTION_OUTPUT = slfo; 304} 305else 306{ 307 fprintf (stdout, "\nFailed to find an improvement...\n"); 308} 309 310bivariateReturnAVL = {}; 311bivariateReturnAVL ["DIFF"] = bestDiff; 312 313return bivariateReturnAVL; 314 315/*------------------------------------------------------------------------------------------------------------*/ 316 317function checkASample (dummy) 318{ 319 ExecuteCommands ("S_"+(rateCount-1)+"=v1;NS_"+(rateCount-1)+"=v2;"); 320 LFCompute (lf,res_n); 321 if (res_n-res > bestDiff) 322 { 323 fprintf (stdout, "synRate = ", Format (v1,10,5), " nonSynRate = ", Format (v2,10,5), " Log Likelihood Diff: ", Format (res_n-res,10,5), "\n"); 324 bestDiff = res_n-res; 325 bestS = v1/c_scale; 326 bestNS = v2/c_scale; 327 bestAlpha = v1; 328 bestBeta = v2; 329 } 330 sampleCounter = sampleCounter + 1; 331 return 0; 332} 333