1RequireVersion ("0.99.20060817");
2
3
4s_rateMin    = 0.01;
5s_rateMax    = 1.00;
6
7ns_rateMin    = 0.01;
8ns_rateMax    = 5.00;
9
10SetDialogPrompt ("Choose a model fit:");
11ExecuteAFile (PROMPT_FOR_FILE);
12
13if (bivariateFitHasMultipleCladeRates>1)
14{
15    LF_NEXUS_EXPORT_EXTRA = "bivariateFitHasMultipleCladeRates = " + bivariateFitHasMultipleCladeRates + ";";
16    INCLUDE_MODEL_SPECS = 1;
17}
18
19outFile = LAST_FILE_PATH;
20GetInformation(vars,"^P_[0-9]+");
21rateCount = Columns (vars)+2;
22
23GetString 		(lfInfo, lf, -1);
24fileCount = 	Columns(lfInfo["Trees"])/(rateCount-1);
25
26fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount-1, " rates\n");
27
28timerCap = 0;
29while (timerCap < 1)
30{
31	fprintf (stdout, "\nSeconds (x3) to spend looking for directional improvement in fit (>=1, or -1 to search a fixed grid)?");
32	fscanf	(stdin, "Number",timerCap);
33	if (timerCap == (-1))
34	{
35		break;
36	}
37}
38
39if (timerCap < 0)
40{
41	fprintf (stdout, "Sampling 355 directions...\n");
42}
43else
44{
45	fprintf (stdout, "Allocated ", timerCap*3, " seconds for approximate search...\n");
46}
47
48LFCompute (lf,LF_START_COMPUTE);
49LFCompute (lf,res);
50LFCompute (lf,LF_DONE_COMPUTE);
51
52fprintf (stdout, "\nBaseline likelihood:", res);
53
54
55ExecuteCommands ("global S_"+(rateCount-1)+"=0.5; global NS_"+(rateCount-1)+"=0.5;global P_"+(rateCount-1)+"=0.5;P_"+(rateCount-1)+":<1;");
56ExecuteCommands("PopulateModelMatrix(\"rate_matrix_"+(rateCount-1)+"\",paramFreqs,\"S_\"+(rateCount-1)+\"/c_scale\",\"NS_\"+(rateCount-1)+\"/c_scale\",aaRateMultipliers);");
57ExecuteCommands("Model MG94MODEL_"+(rateCount-1)+"= (rate_matrix_"+(rateCount-1)+",vectorOfFrequencies,0);global P_"+(rateCount-1)+"=1/"+totalCodonCount+";");
58
59for (_modelID = 1; _modelID <  bivariateFitHasMultipleCladeRates; _modelID += 1)
60{
61    _cladeRate = "clade_" + _modelID + "_NS_" + (rateCount-1);
62    ExecuteCommands ("global " + _cladeRate + " = 1");
63    ExecuteCommands("PopulateModelMatrix(\"rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"" + _cladeRate + "*NS_\"+(rateCount-1)+\"/c_scale\",aaRateMultipliers);");
64    ExecuteCommands("Model MG94MODEL_ "+ (rateCount-1)+"_CLADE_"+_modelID + "= (rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+",vectorOfFrequencies,0);");
65}
66
67for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
68{
69	ExecuteCommands ("treeString = \"\"+tree_"+ fileID+ "_0;");
70    if (bivariateFitHasMultipleCladeRates>1)
71    {
72        treeString = treeString ^ {{"MG94MODEL_0","MG94MODEL_" + (rateCount-1)}};
73    }
74	ExecuteCommands ("Tree tree_" + fileID + "_" + (rateCount-1) + "= treeString;");
75	ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.synRate\",tree_"+fileID+"_"+(rateCount-1)+",tree_"+fileID+"_0);");
76}
77
78lfDef	  = {};
79
80for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
81{
82	lfDef[fileID] = "";
83	lfDef[fileID] * 1024;
84	lfDef[fileID] * "Log(";
85}
86
87ps 			 = {rateCount,1};
88
89for (mi = 0; mi < rateCount -2; mi=mi+1)
90{
91	ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";");
92}
93
94scaler		 = 1-1/totalCodonCount;
95freqValueMatrix    = {rateCount,1};
96freqStrMatrix      = {rateCount,1};
97freqValueMatrix    = freqValueMatrix["scaler"];
98
99for (mi=0; mi<rateCount-2; mi=mi+1)
100{
101	freqStrMatrix[mi]   = "";
102	for (mi2 = 0; mi2 < mi; mi2=mi2+1)
103	{
104		freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]);
105		freqStrMatrix[mi]   = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*";
106	}
107	freqValueMatrix[mi] = freqValueMatrix[mi] * ps[mi];
108	freqStrMatrix[mi]   = freqStrMatrix[mi]+"P_"+(mi+1)+"";
109}
110
111freqStrMatrix[rateCount-2]   = "";
112freqStrMatrix[rateCount-1]   = "";
113
114
115for (mi2 = 0; mi2 < mi; mi2=mi2+1)
116{
117	freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]);
118	freqStrMatrix[mi]   = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*";
119}
120
121
122freqStrMatrix[rateCount-1] = freqStrMatrix[rateCount-2]+"(1-P_"+(rateCount-1)+")";
123freqStrMatrix[rateCount-2] = freqStrMatrix[rateCount-2]+"P_"+(rateCount-1);
124freqValueMatrix[rateCount-1] = 1-scaler;
125
126/* now solve for P_.. */
127
128P_1 = freqValueMatrix[0];
129runningTerm = 1-P_1;
130for (mi2 = 2; mi2 < rateCount-1; mi2=mi2+1)
131{
132	ExecuteCommands ("P_"+mi2+"=freqValueMatrix[mi2-1]/runningTerm;runningTerm=runningTerm*(1-P_"+mi2+");");
133}
134
135if (rateCount>2)
136{
137	ExecuteCommands ("P_"+(rateCount-1)+"=1-freqValueMatrix[rateCount-1]/runningTerm;");
138}
139
140lfParts = "";
141lfParts * 128;
142lfParts * "LikelihoodFunction lf = (";
143c_scalerS = "";
144
145for (mi=0; mi<rateCount; mi=mi+1)
146{
147	if (mi)
148	{
149		for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
150		{
151			lfDef[fileID] * "+";
152		}
153		c_scalerS = c_scalerS+"+";
154	}
155
156	for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
157	{
158		lfDef[fileID]*(""+freqStrMatrix[mi]+"*SITE_LIKELIHOOD["+(rateCount*fileID+mi)+"]");
159	}
160
161	c_scalerS = c_scalerS + freqStrMatrix[mi] + "*S_"+ mi;
162}
163
164for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
165{
166	for (mi=0; mi<rateCount; mi=mi+1)
167	{
168		if (mi || fileID)
169		{
170			lfParts * ",";
171		}
172		lfParts * ("filteredData_"+(fileID+1)+",tree_"+(fileID+1)+"_"+mi);
173	}
174}
175
176lfParts * ",\"";
177
178for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
179{
180	lfDef[fileID] * 0;
181	if (fileID)
182	{
183		lfParts * "+";
184	}
185	lfParts * (lfDef[fileID] + ")");
186}
187
188lfParts * "\");";
189lfParts * 0;
190
191ExecuteCommands ("c_scale:="+c_scalerS+";");
192ExecuteCommands (lfParts);
193
194timer = Time(0);
195
196LFCompute (lf,LF_START_COMPUTE);
197bestDiff = 0;
198bestS    = 0;
199bestNS   = 0;
200bestAlpha = 0;
201bestBeta  = 0;
202
203sampleCounter = 0;
204
205fprintf (stdout, "\nSampling negatively selected directions\n");
206
207if (timerCap > 0)
208{
209	while (Time(0)-timer <= timerCap)
210	{
211		v1 = Random(s_rateMin,s_rateMax);
212		v2 = Random(ns_rateMin,v1);
213
214		checkASample ();
215	}
216}
217else
218{
219	step = 1/16;
220	v1 = 0;
221	for (v1c = 0; v1c < 15; v1c = v1c + 1)
222	{
223		v1 = v1+step;
224		v2 = step/2;
225		for (v2c = 0; v2c < v1c; v2c = v2c+1)
226		{
227			checkASample ();
228			v2 = v2 + step;
229		}
230	}
231}
232
233timer = Time(0);
234fprintf (stdout, "\nSampling neutral directions\n");
235if (timerCap > 0)
236{
237	while (Time(0)-timer <= timerCap)
238	{
239		v1 = Random(s_rateMin,s_rateMax);
240		v2 = v1;
241
242		checkASample ();
243	}
244}
245else
246{
247	step = 0.02;
248	v1 = 0;
249	for (v1c = 0; v1c < 50; v1c = v1c + 1)
250	{
251		v1 = v1 + step;
252		v2 = v1;
253
254		checkASample ();
255	}
256}
257
258timer = Time(0);
259fprintf (stdout, "\nSampling positively selected directions\n");
260if (timerCap > 0)
261{
262	while (Time(0)-timer <= timerCap)
263	{
264		v1 = Random(s_rateMin,s_rateMax);
265		v2 = Random(v1,ns_rateMax);
266		checkASample ();
267	}
268}
269else
270{
271	step  = 0.1;
272	step2 = 0.25;
273
274	v1 = 0.05;
275	for (v1c = 0; v1c < 10; v1c = v1c + 1)
276	{
277		v2 = v1+step;
278		for (v2c = 0; v2c < 20; v2c = v2c+1)
279		{
280			checkASample ();
281			v2 = v2 + step2;
282		}
283		v1 = v1+step;
284	}
285}
286
287LFCompute (lf,LF_DONE_COMPUTE);
288
289fprintf (stdout, "\nExamined ", sampleCounter, " directions...\n");
290
291if (bestDiff > 0)
292{
293	fprintf (stdout, "\nFound a likelihood improvement in the direction (", bestS, ",", bestNS, ")\n");
294	slfo = LIKELIHOOD_FUNCTION_OUTPUT;
295	LIKELIHOOD_FUNCTION_OUTPUT = 7;
296	outFile = outFile+"."+rateCount;
297	ExecuteCommands ("S_"+(rateCount-1)+"=bestAlpha;NS_"+(rateCount-1)+"=bestBeta;");
298	fprintf(outFile,CLEAR_FILE,lf);
299	LIKELIHOOD_FUNCTION_OUTPUT = slfo;
300}
301else
302{
303	fprintf (stdout, "\nFailed to find an improvement...\n");
304}
305
306bivariateReturnAVL 			= {};
307bivariateReturnAVL ["DIFF"] = bestDiff;
308
309return bivariateReturnAVL;
310
311/*------------------------------------------------------------------------------------------------------------*/
312
313function checkASample ()
314{
315	ExecuteCommands ("S_"+(rateCount-1)+"=v1;NS_"+(rateCount-1)+"=v2;");
316	LFCompute (lf,res_n);
317	if (res_n-res > bestDiff)
318	{
319		fprintf (stdout, "synRate = ", Format (v1,10,5), " nonSynRate = ", Format (v2,10,5), " Log Likelihood Diff: ", Format (res_n-res,10,5), "\n");
320		bestDiff 	= res_n-res;
321		bestS 		= v1/c_scale;
322		bestNS 		= v2/c_scale;
323		bestAlpha 	= v1;
324		bestBeta  	= v2;
325	}
326	sampleCounter = sampleCounter + 1;
327	return 0;
328}
329