1RequireVersion ("2.00.20110101");
2
3ExecuteAFile  (HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def");
4ExecuteAFile  (HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"2RatesAnalyses"+DIRECTORY_SEPARATOR+"MG94xREVxBivariate.mdl");
5
6s_rateMin    = 0.01;
7s_rateMax    = 1.00;
8
9ns_rateMin    = 0.01;
10ns_rateMax    = 5.00;
11
12SetDialogPrompt ("Choose a model fit:");
13ExecuteAFile (PROMPT_FOR_FILE);
14outFile = LAST_FILE_PATH;
15GetInformation(vars,"^P_[0-9]+");
16rateCount = Columns (vars)+2;
17
18GetString 		(lfInfo, lf, -1);
19fileCount = 	Columns(lfInfo["Trees"])/(rateCount-1);
20
21LF_NEXUS_EXPORT_EXTRA = "bivariateFitHasMultipleCladeRates = " + bivariateFitHasMultipleCladeRates + ";";
22
23fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount-1, " rates\n");
24
25timerCap = 0;
26while (timerCap < 1)
27{
28	fprintf (stdout, "\nSeconds (x3) to spend looking for directional improvement in fit (>=1, or -1 to search a fixed grid)?");
29	fscanf	(stdin, "Number",timerCap);
30	if (timerCap == (-1))
31	{
32		break;
33	}
34}
35
36if (timerCap < 0)
37{
38	fprintf (stdout, "Sampling 355 directions...\n");
39}
40else
41{
42	fprintf (stdout, "Allocated ", timerCap*3, " seconds for approximate search...\n");
43}
44
45LFCompute (lf,LF_START_COMPUTE);
46LFCompute (lf,res);
47LFCompute (lf,LF_DONE_COMPUTE);
48
49fprintf (stdout, "\nBaseline likelihood:", res);
50
51totalCodonCount = 0;
52for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
53{
54	ExecuteCommands		("totalCodonCount = totalCodonCount + filteredData_"+fileID+".sites;");
55}
56observedFreq       = {4,3};
57for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
58{
59	ExecuteCommands 	  ("HarvestFrequencies (tp, filteredData_"+fileID+",3,1,1);cfs=filteredData_"+fileID+".sites;");
60	observedFreq 		= observedFreq 		 + tp*(cfs/totalCodonCount);
61}
62
63ExecuteCommands ("global S_"+(rateCount-1)+"=0.5; global NS_"+(rateCount-1)+"=0.5;global P_"+(rateCount-1)+"=0.5;P_"+(rateCount-1)+":<1;");
64ExecuteCommands("PopulateModelMatrix(\"rate_matrix_"+(rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"NS_\"+(rateCount-1)+\"/c_scale\");");
65ExecuteCommands("Model MG94MODEL_"+(rateCount-1)+"= (rate_matrix_"+(rateCount-1)+",vectorOfFrequencies,0);global P_"+(rateCount-1)+"=1/"+totalCodonCount+";");
66
67for (_modelID = 1; _modelID <  bivariateFitHasMultipleCladeRates; _modelID += 1)
68{
69    _cladeRate = "clade_" + _modelID + "_NS_" + (rateCount-1);
70    ExecuteCommands ("global " + _cladeRate + "= 1;");
71    ExecuteCommands("PopulateModelMatrix(\"rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+"\",observedFreq,\"S_\"+(rateCount-1)+\"/c_scale\",\"" + _cladeRate + "*NS_\"+(rateCount-1)+\"/c_scale\");");
72    ExecuteCommands("Model MG94MODEL_CLADE"+_modelID + "_"+ (rateCount-1)+"= (rate_matrix_clade_"+_modelID + "_" + (rateCount-1)+",vectorOfFrequencies,0);");
73}
74
75for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
76{
77	ExecuteCommands ("treeString = \"\"+ tree_"+ fileID+ "_0;");
78	ExecuteCommands ("Tree tree_" + fileID + "_" + (rateCount-1) + "= treeString;");
79	ExecuteCommands ("ReplicateConstraint (\"this1.?.synRate:=this2.?.synRate\",tree_"+fileID+"_"+(rateCount-1)+",tree_"+fileID+"_0);");
80}
81
82lfDef	  = {};
83
84for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
85{
86	lfDef[fileID] = "";
87	lfDef[fileID] * 1024;
88	lfDef[fileID] * "Log(";
89}
90
91ps 			 = {rateCount,1};
92
93for (mi = 0; mi < rateCount -2; mi=mi+1)
94{
95	ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";");
96}
97
98scaler		 = 1-1/totalCodonCount;
99freqValueMatrix    = {rateCount,1};
100freqStrMatrix      = {rateCount,1};
101freqValueMatrix    = freqValueMatrix["scaler"];
102
103for (mi=0; mi<rateCount-2; mi=mi+1)
104{
105	freqStrMatrix[mi]   = "";
106	for (mi2 = 0; mi2 < mi; mi2=mi2+1)
107	{
108		freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]);
109		freqStrMatrix[mi]   = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*";
110	}
111	freqValueMatrix[mi] = freqValueMatrix[mi] * ps[mi];
112	freqStrMatrix[mi]   = freqStrMatrix[mi]+"P_"+(mi+1)+"";
113}
114
115freqStrMatrix[rateCount-2]   = "";
116freqStrMatrix[rateCount-1]   = "";
117
118
119for (mi2 = 0; mi2 < mi; mi2=mi2+1)
120{
121	freqValueMatrix[mi] = freqValueMatrix[mi] * (1-ps[mi2]);
122	freqStrMatrix[mi]   = freqStrMatrix[mi]+"(1-P_"+(mi2+1)+")*";
123}
124
125
126freqStrMatrix[rateCount-1] = freqStrMatrix[rateCount-2]+"(1-P_"+(rateCount-1)+")";
127freqStrMatrix[rateCount-2] = freqStrMatrix[rateCount-2]+"P_"+(rateCount-1);
128freqValueMatrix[rateCount-1] = 1-scaler;
129
130/* now solve for P_.. */
131
132P_1 = freqValueMatrix[0];
133runningTerm = 1-P_1;
134for (mi2 = 2; mi2 < rateCount-1; mi2=mi2+1)
135{
136	ExecuteCommands ("P_"+mi2+"=freqValueMatrix[mi2-1]/runningTerm;runningTerm=runningTerm*(1-P_"+mi2+");");
137}
138
139if (rateCount>2)
140{
141	ExecuteCommands ("P_"+(rateCount-1)+"=1-freqValueMatrix[rateCount-1]/runningTerm;");
142}
143
144lfParts = "";
145lfParts * 128;
146lfParts * "LikelihoodFunction lf = (";
147c_scalerS = "";
148
149for (mi=0; mi<rateCount; mi=mi+1)
150{
151	if (mi)
152	{
153		for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
154		{
155			lfDef[fileID] * "+";
156		}
157		c_scalerS = c_scalerS+"+";
158	}
159
160	for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
161	{
162		lfDef[fileID]*(""+freqStrMatrix[mi]+"*SITE_LIKELIHOOD["+(rateCount*fileID+mi)+"]");
163	}
164
165	c_scalerS = c_scalerS + freqStrMatrix[mi] + "*S_"+ mi;
166}
167
168for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
169{
170	for (mi=0; mi<rateCount; mi=mi+1)
171	{
172		if (mi || fileID)
173		{
174			lfParts * ",";
175		}
176		lfParts * ("filteredData_"+(fileID+1)+",tree_"+(fileID+1)+"_"+mi);
177	}
178}
179
180lfParts * ",\"";
181
182for (fileID = 0; fileID < fileCount; fileID = fileID + 1)
183{
184	lfDef[fileID] * 0;
185	if (fileID)
186	{
187		lfParts * "+";
188	}
189	lfParts * (lfDef[fileID] + ")");
190}
191
192lfParts * "\");";
193lfParts * 0;
194
195ExecuteCommands ("c_scale:="+c_scalerS+";");
196ExecuteCommands (lfParts);
197
198timer = Time(0);
199
200LFCompute (lf,LF_START_COMPUTE);
201bestDiff = 0;
202bestS    = 0;
203bestNS   = 0;
204bestAlpha = 0;
205bestBeta  = 0;
206
207sampleCounter = 0;
208
209fprintf (stdout, "\nSampling negatively selected directions\n");
210
211if (timerCap > 0)
212{
213	while (Time(0)-timer <= timerCap)
214	{
215		v1 = Random(s_rateMin,s_rateMax);
216		v2 = Random(ns_rateMin,v1);
217
218		checkASample (0);
219	}
220}
221else
222{
223	step = 1/16;
224	v1 = 0;
225	for (v1c = 0; v1c < 15; v1c = v1c + 1)
226	{
227		v1 = v1+step;
228		v2 = step/2;
229		for (v2c = 0; v2c < v1c; v2c = v2c+1)
230		{
231			checkASample (0);
232			v2 = v2 + step;
233		}
234	}
235}
236
237timer = Time(0);
238fprintf (stdout, "\nSampling neutral directions\n");
239if (timerCap > 0)
240{
241	while (Time(0)-timer <= timerCap)
242	{
243		v1 = Random(s_rateMin,s_rateMax);
244		v2 = v1;
245
246		checkASample (0);
247	}
248}
249else
250{
251	step = 0.02;
252	v1 = 0;
253	for (v1c = 0; v1c < 50; v1c = v1c + 1)
254	{
255		v1 = v1 + step;
256		v2 = v1;
257
258		checkASample (0);
259	}
260}
261
262timer = Time(0);
263fprintf (stdout, "\nSampling positively selected directions\n");
264if (timerCap > 0)
265{
266	while (Time(0)-timer <= timerCap)
267	{
268		v1 = Random(s_rateMin,s_rateMax);
269		v2 = Random(v1,ns_rateMax);
270		checkASample (0);
271	}
272}
273else
274{
275	step  = 0.1;
276	step2 = 0.25;
277
278	v1 = 0.05;
279	for (v1c = 0; v1c < 10; v1c = v1c + 1)
280	{
281		v2 = v1+step;
282		for (v2c = 0; v2c < 20; v2c = v2c+1)
283		{
284			checkASample (0);
285			v2 = v2 + step2;
286		}
287		v1 = v1+step;
288	}
289}
290
291LFCompute (lf,LF_DONE_COMPUTE);
292
293fprintf (stdout, "\nExamined ", sampleCounter, " directions...\n");
294
295if (bestDiff > 0)
296{
297	fprintf (stdout, "\nFound a likelihood improvement in the direction (", bestS, ",", bestNS, ")\n");
298	slfo = LIKELIHOOD_FUNCTION_OUTPUT;
299	LIKELIHOOD_FUNCTION_OUTPUT = 7;
300	outFile = outFile+"."+rateCount;
301	ExecuteCommands ("S_"+(rateCount-1)+"=bestAlpha;NS_"+(rateCount-1)+"=bestBeta;");
302	fprintf(outFile,CLEAR_FILE,lf);
303	LIKELIHOOD_FUNCTION_OUTPUT = slfo;
304}
305else
306{
307	fprintf (stdout, "\nFailed to find an improvement...\n");
308}
309
310bivariateReturnAVL 			= {};
311bivariateReturnAVL ["DIFF"] = bestDiff;
312
313return bivariateReturnAVL;
314
315/*------------------------------------------------------------------------------------------------------------*/
316
317function checkASample (dummy)
318{
319	ExecuteCommands ("S_"+(rateCount-1)+"=v1;NS_"+(rateCount-1)+"=v2;");
320	LFCompute (lf,res_n);
321	if (res_n-res > bestDiff)
322	{
323		fprintf (stdout, "synRate = ", Format (v1,10,5), " nonSynRate = ", Format (v2,10,5), " Log Likelihood Diff: ", Format (res_n-res,10,5), "\n");
324		bestDiff 	= res_n-res;
325		bestS 		= v1/c_scale;
326		bestNS 		= v2/c_scale;
327		bestAlpha 	= v1;
328		bestBeta  	= v2;
329	}
330	sampleCounter = sampleCounter + 1;
331	return 0;
332}
333