1RequireVersion ("0.99.20060817"); 2 3 4s_rateMin = 0.01; 5s_rateMax = 1.00; 6 7ns_rateMin = 0.01; 8ns_rateMax = 5.00; 9 10SetDialogPrompt ("Choose a model fit:"); 11ExecuteAFile (PROMPT_FOR_FILE); 12 13if (bivariateFitHasMultipleCladeRates>1) 14{ 15 LF_NEXUS_EXPORT_EXTRA = "bivariateFitHasMultipleCladeRates = " + bivariateFitHasMultipleCladeRates + ";"; 16 INCLUDE_MODEL_SPECS = 1; 17} 18 19outFile = LAST_FILE_PATH; 20GetInformation(vars,"^P_[0-9]+"); 21rateCount = Columns (vars)+2; 22 23GetString (lfInfo, lf, -1); 24fileCount = Columns(lfInfo["Trees"])/(rateCount-1); 25 26fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount-1, " rates\n"); 27 28timerCap = 0; 29while (timerCap < 1) 30{ 31 fprintf (stdout, "\nSeconds (x3) to spend looking for directional improvement in fit (>=1, or -1 to search a fixed grid)?"); 32 fscanf (stdin, "Number",timerCap); 33 if (timerCap == (-1)) 34 { 35 break; 36 } 37} 38 39if (timerCap < 0) 40{ 41 fprintf (stdout, "Sampling 355 directions...\n"); 42} 43else 44{ 45 fprintf (stdout, "Allocated ", timerCap*3, " seconds for approximate search...\n"); 46} 47 48LFCompute (lf,LF_START_COMPUTE); 49LFCompute (lf,res); 50LFCompute (lf,LF_DONE_COMPUTE); 51 52fprintf (stdout, "\nBaseline likelihood:", res); 53 54 55ExecuteCommands ("global S_"+(rateCount-1)+"=0.5; global NS_"+(rateCount-1)+"=0.5;global P_"+(rateCount-1)+"=0.5;P_"+(rateCount-1)+":<1;"); 56ExecuteCommands("PopulateModelMatrix(\"rate_matrix_"+(rateCount-1)+"\",paramFreqs,\"S_\"+(rateCount-1)+\"/c_scale\",\"NS_\"+(rateCount-1)+\"/c_scale\",aaRateMultipliers);"); 57ExecuteCommands("Model MG94MODEL_"+(rateCount-1)+"= (rate_matrix_"+(rateCount-1)+",vectorOfFrequencies,0);global P_"+(rateCount-1)+"=1/"+totalCodonCount+";"); 58 59for (_modelID = 1; _modelID < bivariateFitHasMultipleCladeRates; _modelID += 1) 60{ 61 _cladeRate = "clade_" + _modelID + "_NS_" + (rateCount-1); 62 ExecuteCommands ("global " + _cladeRate + " = 1"); 63 ExecuteCommands("PopulateModelMatrix(\"rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"" + _cladeRate + "*NS_\"+(rateCount-1)+\"/c_scale\",aaRateMultipliers);"); 64 ExecuteCommands("Model MG94MODEL_ "+ (rateCount-1)+"_CLADE_"+_modelID + "= (rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+",vectorOfFrequencies,0);"); 65} 66 67for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 68{ 69 ExecuteCommands ("treeString = \"\"+tree_"+ fileID+ "_0;"); 70 if (bivariateFitHasMultipleCladeRates>1) 71 { 72 treeString = treeString ^ {{"MG94MODEL_0","MG94MODEL_" + (rateCount-1)}}; 73 } 74 ExecuteCommands ("Tree tree_" + fileID + "_" + (rateCount-1) + "= treeString;"); 75 ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.synRate\",tree_"+fileID+"_"+(rateCount-1)+",tree_"+fileID+"_0);"); 76} 77 78lfDef = {}; 79 80for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 81{ 82 lfDef[fileID] = ""; 83 lfDef[fileID] * 1024; 84 lfDef[fileID] * "Log("; 85} 86 87ps = {rateCount,1}; 88 89for (mi = 0; mi < rateCount -2; mi=mi+1) 90{ 91 ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";"); 92} 93 94scaler = 1-1/totalCodonCount; 95freqValueMatrix = {rateCount,1}; 96freqStrMatrix = {rateCount,1}; 97freqValueMatrix = freqValueMatrix["scaler"]; 98 99for (mi=0; mi<rateCount-2; mi=mi+1) 100{ 101 freqStrMatrix[mi] = ""; 102 for (mi2 = 0; mi2 < mi; mi2=mi2+1) 103 { 104 freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]); 105 freqStrMatrix[mi] = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*"; 106 } 107 freqValueMatrix[mi] = freqValueMatrix[mi] * ps[mi]; 108 freqStrMatrix[mi] = freqStrMatrix[mi]+"P_"+(mi+1)+""; 109} 110 111freqStrMatrix[rateCount-2] = ""; 112freqStrMatrix[rateCount-1] = ""; 113 114 115for (mi2 = 0; mi2 < mi; mi2=mi2+1) 116{ 117 freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]); 118 freqStrMatrix[mi] = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*"; 119} 120 121 122freqStrMatrix[rateCount-1] = freqStrMatrix[rateCount-2]+"(1-P_"+(rateCount-1)+")"; 123freqStrMatrix[rateCount-2] = freqStrMatrix[rateCount-2]+"P_"+(rateCount-1); 124freqValueMatrix[rateCount-1] = 1-scaler; 125 126/* now solve for P_.. */ 127 128P_1 = freqValueMatrix[0]; 129runningTerm = 1-P_1; 130for (mi2 = 2; mi2 < rateCount-1; mi2=mi2+1) 131{ 132 ExecuteCommands ("P_"+mi2+"=freqValueMatrix[mi2-1]/runningTerm;runningTerm=runningTerm*(1-P_"+mi2+");"); 133} 134 135if (rateCount>2) 136{ 137 ExecuteCommands ("P_"+(rateCount-1)+"=1-freqValueMatrix[rateCount-1]/runningTerm;"); 138} 139 140lfParts = ""; 141lfParts * 128; 142lfParts * "LikelihoodFunction lf = ("; 143c_scalerS = ""; 144 145for (mi=0; mi<rateCount; mi=mi+1) 146{ 147 if (mi) 148 { 149 for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 150 { 151 lfDef[fileID] * "+"; 152 } 153 c_scalerS = c_scalerS+"+"; 154 } 155 156 for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 157 { 158 lfDef[fileID]*(""+freqStrMatrix[mi]+"*SITE_LIKELIHOOD["+(rateCount*fileID+mi)+"]"); 159 } 160 161 c_scalerS = c_scalerS + freqStrMatrix[mi] + "*S_"+ mi; 162} 163 164for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 165{ 166 for (mi=0; mi<rateCount; mi=mi+1) 167 { 168 if (mi || fileID) 169 { 170 lfParts * ","; 171 } 172 lfParts * ("filteredData_"+(fileID+1)+",tree_"+(fileID+1)+"_"+mi); 173 } 174} 175 176lfParts * ",\""; 177 178for (fileID = 0; fileID < fileCount; fileID = fileID + 1) 179{ 180 lfDef[fileID] * 0; 181 if (fileID) 182 { 183 lfParts * "+"; 184 } 185 lfParts * (lfDef[fileID] + ")"); 186} 187 188lfParts * "\");"; 189lfParts * 0; 190 191ExecuteCommands ("c_scale:="+c_scalerS+";"); 192ExecuteCommands (lfParts); 193 194timer = Time(0); 195 196LFCompute (lf,LF_START_COMPUTE); 197bestDiff = 0; 198bestS = 0; 199bestNS = 0; 200bestAlpha = 0; 201bestBeta = 0; 202 203sampleCounter = 0; 204 205fprintf (stdout, "\nSampling negatively selected directions\n"); 206 207if (timerCap > 0) 208{ 209 while (Time(0)-timer <= timerCap) 210 { 211 v1 = Random(s_rateMin,s_rateMax); 212 v2 = Random(ns_rateMin,v1); 213 214 checkASample (); 215 } 216} 217else 218{ 219 step = 1/16; 220 v1 = 0; 221 for (v1c = 0; v1c < 15; v1c = v1c + 1) 222 { 223 v1 = v1+step; 224 v2 = step/2; 225 for (v2c = 0; v2c < v1c; v2c = v2c+1) 226 { 227 checkASample (); 228 v2 = v2 + step; 229 } 230 } 231} 232 233timer = Time(0); 234fprintf (stdout, "\nSampling neutral directions\n"); 235if (timerCap > 0) 236{ 237 while (Time(0)-timer <= timerCap) 238 { 239 v1 = Random(s_rateMin,s_rateMax); 240 v2 = v1; 241 242 checkASample (); 243 } 244} 245else 246{ 247 step = 0.02; 248 v1 = 0; 249 for (v1c = 0; v1c < 50; v1c = v1c + 1) 250 { 251 v1 = v1 + step; 252 v2 = v1; 253 254 checkASample (); 255 } 256} 257 258timer = Time(0); 259fprintf (stdout, "\nSampling positively selected directions\n"); 260if (timerCap > 0) 261{ 262 while (Time(0)-timer <= timerCap) 263 { 264 v1 = Random(s_rateMin,s_rateMax); 265 v2 = Random(v1,ns_rateMax); 266 checkASample (); 267 } 268} 269else 270{ 271 step = 0.1; 272 step2 = 0.25; 273 274 v1 = 0.05; 275 for (v1c = 0; v1c < 10; v1c = v1c + 1) 276 { 277 v2 = v1+step; 278 for (v2c = 0; v2c < 20; v2c = v2c+1) 279 { 280 checkASample (); 281 v2 = v2 + step2; 282 } 283 v1 = v1+step; 284 } 285} 286 287LFCompute (lf,LF_DONE_COMPUTE); 288 289fprintf (stdout, "\nExamined ", sampleCounter, " directions...\n"); 290 291if (bestDiff > 0) 292{ 293 fprintf (stdout, "\nFound a likelihood improvement in the direction (", bestS, ",", bestNS, ")\n"); 294 slfo = LIKELIHOOD_FUNCTION_OUTPUT; 295 LIKELIHOOD_FUNCTION_OUTPUT = 7; 296 outFile = outFile+"."+rateCount; 297 ExecuteCommands ("S_"+(rateCount-1)+"=bestAlpha;NS_"+(rateCount-1)+"=bestBeta;"); 298 fprintf(outFile,CLEAR_FILE,lf); 299 LIKELIHOOD_FUNCTION_OUTPUT = slfo; 300} 301else 302{ 303 fprintf (stdout, "\nFailed to find an improvement...\n"); 304} 305 306bivariateReturnAVL = {}; 307bivariateReturnAVL ["DIFF"] = bestDiff; 308 309return bivariateReturnAVL; 310 311/*------------------------------------------------------------------------------------------------------------*/ 312 313function checkASample () 314{ 315 ExecuteCommands ("S_"+(rateCount-1)+"=v1;NS_"+(rateCount-1)+"=v2;"); 316 LFCompute (lf,res_n); 317 if (res_n-res > bestDiff) 318 { 319 fprintf (stdout, "synRate = ", Format (v1,10,5), " nonSynRate = ", Format (v2,10,5), " Log Likelihood Diff: ", Format (res_n-res,10,5), "\n"); 320 bestDiff = res_n-res; 321 bestS = v1/c_scale; 322 bestNS = v2/c_scale; 323 bestAlpha = v1; 324 bestBeta = v2; 325 } 326 sampleCounter = sampleCounter + 1; 327 return 0; 328} 329