1/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
2
3function echoCatVar (ps,values)
4{
5	DD 			= Rows(ps);
6	EE 			= 0.0;
7	sampleVar 	= 0.0;
8
9	for (k=0; k<DD; k=k+1)
10	{
11		EE = ps[k]*values[k]+EE;
12		sampleVar = sampleVar+ps[k]*values[k]*values[k];
13	}
14
15	sampleVar = sampleVar-EE*EE;
16
17	fprintf  (stdout,  "Mean     = ",EE,
18					 "\nVariance = ",sampleVar,
19					 "\nCOV      = ", Sqrt(sampleVar)/EE,"\n");
20
21	for (k=0; k<DD; k=k+1)
22	{
23		fprintf (stdout,"\nRate[",Format(k,0,0),"]=",Format(values[k],12,8), " (weight=",
24						  Format(ps[k],9,7),")");
25	}
26	return EE;
27}
28
29/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
30
31function echoCovariance (ps,values1,values2)
32{
33	DD 			= Rows(ps);
34	EE 			= 0.0;
35	EE2			= 0.0;
36	sampleVar 	= 0.0;
37	sampleVar2 	= 0.0;
38
39	for (k=0; k<DD; k=k+1)
40	{
41		EE  = ps[k]*values1[k]+EE;
42		EE2 = ps[k]*values2[k]+EE2;
43		sampleVar = sampleVar+ps[k]*values1[k]*values1[k];
44		sampleVar2 = sampleVar2+ps[k]*values2[k]*values2[k];
45	}
46
47	sampleVar = sampleVar-EE*EE;
48	sampleVar2 = sampleVar2-EE2*EE2;
49
50	cov  = 0;
51	cov2 = 0;
52	for (k=0; k<DD; k=k+1)
53	{
54		cov  = cov  + ps[k]*(values1[k]-EE);
55		cov2 = cov2 + ps[k]*(values2[k]-EE2);
56	}
57
58	return cov*cov2/Sqrt(sampleVar*sampleVar2);
59}
60
61/*---------------------------------------------------------------------------------------------------------------------------------------------------*/
62
63//ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def");
64//ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility"+DIRECTORY_SEPARATOR+"PS_Plotters.bf");
65#include "TemplateModels/chooseGeneticCode.def";
66#include "Utility/PS_Plotters.bf";
67
68
69SetDialogPrompt		("Choose a model fit:");
70
71ExecuteAFile (PROMPT_FOR_FILE);
72
73
74
75basePath		    = LAST_FILE_PATH;
76
77
78
79GetInformation		(vars,"^P_[0-9]+");
80
81
82rateCount			= Columns (vars)+1;
83
84
85GetString			(lfInfo, lf, -1);
86fileCount			= Columns(lfInfo["Trees"])/rateCount;
87
88fprintf (stdout, "\nLoaded a fit on ", fileCount, " data sets with ", rateCount, " rates\n");
89
90ps 			 = {rateCount,1};
91rateInfo	 = {rateCount,4};
92rateInfo	 = rateInfo["1"];
93
94for (mi = 0; mi < rateCount-1; mi=mi+1)
95{
96	ExecuteCommands ("ps["+mi+"]="+"P_"+(mi+1)+";");
97}
98
99for (mi = 0; mi < rateCount; mi=mi+1)
100{
101	ExecuteCommands ("rateInfo[mi][1]="+"S_"+mi+"/c_scale;");
102	ExecuteCommands ("rateInfo[mi][2]="+"NS_"+mi+"/c_scale;");
103	rateInfo[mi][3] = rateInfo[mi][2]-rateInfo[mi][1];
104}
105
106
107for (mi=0; mi<rateCount-1; mi=mi+1)
108{
109	for (mi2 = 0; mi2 < mi; mi2=mi2+1)
110	{
111		rateInfo[mi][0] = rateInfo[mi][0] * (1-ps[mi2]);
112	}
113	rateInfo[mi][0] = rateInfo[mi][0] * ps[mi];
114}
115
116
117for (mi2 = 0; mi2 < mi; mi2=mi2+1)
118{
119	rateInfo[mi][0] = rateInfo[mi][0] * (1-ps[mi2]);
120}
121
122fprintf (stdout, "\n\n1).Synonymous rates:\n\n");
123echoCatVar (rateInfo[-1][0],rateInfo[-1][1]);
124fprintf (stdout, "\n\n2).Non-synonymous rates:\n\n");
125echoCatVar (rateInfo[-1][0],rateInfo[-1][2]);
126
127columnHeaders = {{"P","dS","dN","dN-dS"}};
128
129
130
131
132
133ConstructCategoryMatrix (cm, lf, COMPLETE);
134
135site_count 		= Columns (cm)/rateCount;
136posteriorProbs  = {site_count, rateCount};
137columnHeaders   = {1,rateCount};
138
139for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1)
140{
141	columnHeaders[rate_enumerator] = "Rate " + (rate_enumerator+1);
142}
143
144for (site_enumerator = 0; site_enumerator < site_count; site_enumerator = site_enumerator + 1)
145{
146	sum = 0;
147
148	smallestScaler = 1e100;
149
150	for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1)
151	{
152		smallestScaler = Min(smallestScaler,cm.site_scalers[rate_enumerator*site_count+site_enumerator]);
153	}
154
155	for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1)
156	{
157		v = cm[rate_enumerator*site_count+site_enumerator] * rateInfo[rate_enumerator][0] * Exp(cm.log_scale_multiplier*(smallestScaler-cm.site_scalers[rate_enumerator*site_count+site_enumerator]));
158		posteriorProbs[site_enumerator] [rate_enumerator]= v;
159		sum = sum + v;
160	}
161
162	for (rate_enumerator = 0; rate_enumerator < rateCount; rate_enumerator = rate_enumerator + 1)
163	{
164		posteriorProbs[site_enumerator] [rate_enumerator]= posteriorProbs[site_enumerator] [rate_enumerator]/sum;
165	}
166}
167
168distrInfo = "";
169distrInfo * 128;
170distrInfo * "dN_minus_dS";
171for (k = 0; k <= mi; k = k+1)
172{
173	distrInfo * (":" + rateInfo[k][0]);
174}
175for (k = 0; k <= mi; k = k+1)
176{
177	distrInfo * (":" + rateInfo[k][3]);
178}
179distrInfo * 0;
180
181{"SmallCodon_part_Categ:0.25:0.25:0.25:0.25:0.00116849:0.0498505:0.447572:3.50141"}
182
183
184
185
186nonStopCount = 0;
187
188for (h1 = 0; h1<64; h1=h1+1)
189{
190	if (_Genetic_Code[h1]!=10)
191	{
192		nonStopCount = nonStopCount + 1;
193	}
194}
195
196/* make syn and non-syn template matrices */
197
198ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Distances"+DIRECTORY_SEPARATOR+"CodonTools.def");
199
200for (fileID = 1; fileID <= fileCount; fileID = fileID+1)
201{
202	fprintf (stdout, "\n\nWorking on file ", fileID, "\n\n");
203	ExecuteCommands ("codonCount=filteredData_"+fileID+".sites;");
204
205	sSites  = 0;
206	nsSites = 0;
207
208	synM    = {nonStopCount,nonStopCount};
209	nonSynM = {nonStopCount,nonStopCount};
210
211	vertOnes = {nonStopCount,1};
212	horOnes  = {1,nonStopCount};
213
214	for (h1 = 0; h1<nonStopCount; h1=h1+1)
215	{
216		vertOnes [h1] = 1;
217		horOnes  [h1] = 1;
218	}
219
220	hShift = 0;
221	for (h1 = 0; h1 < 64; h1=h1+1)
222	{
223		gc1 = _Genetic_Code[h1];
224		if (gc1 == 10)
225		{
226			hShift = hShift+1;
227		}
228		else
229		{
230			sSites = sSites   + codonCount * _S_NS_POSITIONS_[0][h1] * vectorOfFrequencies[h1-hShift];
231			nsSites = nsSites + codonCount * _S_NS_POSITIONS_[1][h1] * vectorOfFrequencies[h1-hShift];
232
233			vShift = hShift;
234			for (v1 = h1+1; v1 < 64; v1=v1+1)
235			{
236				gc2 = _Genetic_Code[v1];
237				if (gc2 == 10)
238				{
239					vShift = vShift + 1;
240				}
241				else
242				{
243					if (gc1 == gc2)
244					{
245						synM [h1-hShift][v1-vShift] = vectorOfFrequencies[h1-hShift];
246						synM [v1-vShift][h1-hShift] = vectorOfFrequencies[v1-vShift];
247					}
248					else
249					{
250						nonSynM [h1-hShift][v1-vShift] = vectorOfFrequencies[h1-hShift];
251						nonSynM [v1-vShift][h1-hShift] = vectorOfFrequencies[v1-vShift];
252					}
253				}
254			}
255		}
256	}
257
258
259	ExecuteAFile(HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TreeTools.ibf");
260
261	fprintf (stdout, "\nTotal nucleotide sites :", codonCount*3,
262					 "\nSynonymous  sites      :", sSites,
263					 "\nNonsynonymous  sites   :", nsSites, "\n");
264
265	sSites  = codonCount/sSites;
266	nsSites = codonCount/nsSites;
267
268	ExecuteCommands ("branchNames = BranchName (tree_"+fileID+"_0,-1);");
269	T 			= Columns    (branchNames);
270
271	synSubsAVL = {};
272	dSAVL	   = {};
273	nsSubsAVL  = {};
274	dNAVL	   = {};
275
276	for (treeCounter = 0; treeCounter < rateCount; treeCounter = treeCounter + 1)
277	{
278		for (h1=0; h1 < T-1; h1=h1+1)
279		{
280			abn = branchNames[h1];
281			ExecuteCommands("GetInformation (aRateMx, tree_"+fileID+"_"+treeCounter+"."+abn+");");
282			synSubs  = (horOnes*(aRateMx$synM))*vertOnes;
283			nsynSubs = (horOnes*(aRateMx$nonSynM))*vertOnes;
284			synSubs = synSubs[0]/3;
285			nsynSubs = nsynSubs[0]/3;
286
287			synSubsAVL[abn] = synSubsAVL[abn] + synSubs*rateInfo[treeCounter][0];
288			nsSubsAVL [abn] = nsSubsAVL [abn] + nsynSubs*rateInfo[treeCounter][0];
289			dSAVL[abn]	    = dSAVL[abn] + synSubs *sSites*rateInfo[treeCounter][0];
290			dNAVL[abn]	    = dNAVL[abn] + nsynSubs*nsSites*rateInfo[treeCounter][0];
291		}
292	}
293
294	ExecuteCommands ("treeAVL = tree_"+fileID+"_0^0");
295
296	synTreeString 		= PostOrderAVL2StringDistances (treeAVL, synSubsAVL);
297	nonSynTreeString	= PostOrderAVL2StringDistances (treeAVL, nsSubsAVL);
298	dSTreeString 		= PostOrderAVL2StringDistances (treeAVL, dSAVL);
299	dNTreeString	    = PostOrderAVL2StringDistances (treeAVL, dNAVL);
300
301
302	/*fprintf (stdout, "\nE[Syn subs/nucleotide site] tree: \n\t",     synTreeString, 	   "\n");
303	fprintf (stdout, "\nE[Non-syn subs/nucleotide site] tree: \n\t", nonSynTreeString, "\n");
304	fprintf (stdout, "\ndS tree: \n\t", dSTreeString, "\n");
305	fprintf (stdout, "\ndN tree: \n\t", dNTreeString, "\n");*/
306
307	UseModel (USE_NO_MODEL);
308
309	ExecuteCommands ("Tree 	synSubsTree_"+fileID+"= synTreeString;Tree	nonsynSubsTree_"+fileID+" 	= nonSynTreeString;Tree 	dSTree_"+fileID+" 			= dSTreeString;Tree	dNTree_"+fileID+" = dNTreeString;");
310
311	ProcessATree ("synSubsTree_"+fileID);
312	ProcessATree ("nonsynSubsTree_"+fileID);
313	ProcessATree ("dSTree_"+fileID);
314	ProcessATree ("dNTree_"+fileID);
315
316	mxTreeSpec  = {5,1};
317
318	mxTreeSpec [0] = "nonsynSubsTree_"+fileID;
319	mxTreeSpec [3] = "";
320	mxTreeSpec [4] = "Inferred_Tree."+nodeName;
321	mxTreeSpec [1] = "8211";
322	mxTreeSpec [2] = "";
323
324
325
326
327	mxTreeSpec [0] = "synSubsTree_"+fileID;
328
329
330	mxTreeSpec [0] = "dSTree_"+fileID;
331
332
333	mxTreeSpec [0] = "dNTree_"+fileID;
334
335}
336
337logRates = {rateCount,3};
338epsilon  = Exp(-4);
339for (mi = 0; mi < rateCount; mi=mi+1)
340{
341	logRates[mi][0] = Min(Log(rateInfo[mi][1]+epsilon),4);
342	logRates[mi][1] = Min(Log(rateInfo[mi][2]+epsilon),4);
343	logRates[mi][2] = rateInfo[mi][0];
344}
345
346psCode = ScaledDensityPlot ("logRates", {{-4,4}{-4,4}}, "Courier", {{400,400,14,40}},
347								"_dNdSDensityPlot",
348								{{"","log (alpha)", "log (beta)"}}, 1, 1);
349
350psFile = basePath + ".ps";
351fprintf (psFile, CLEAR_FILE, psCode);
352
353
354/*---------------------------------------------------------*/
355
356function ProcessATree (treeName)
357{
358	ExecuteCommands ("treeAVL2 = "+treeName + " ^ 0;leafCount=TipCount("+treeName+");");
359
360	multFactors = {};
361	for (k=1; k<Abs(treeAVL2); k=k+1)
362	{
363		aNode = treeAVL2[k];
364		aNodeName = aNode["Name"];
365		parentIndex = aNode["Parent"];
366		k2 = Abs(aNode["Children"]);
367		if (k2)
368		{
369			currentDepth = aNode["Below"];
370			multFactors[aNodeName] = currentDepth;
371			if (parentIndex > 0)
372			{
373				pInfo = treeAVL2[parentIndex];
374				pInfo ["Below"] = pInfo ["Below"] + currentDepth;
375				treeAVL2[parentIndex] = pInfo;
376			}
377		}
378		else
379		{
380			multFactors[aNodeName] = 1;
381			pInfo = treeAVL2[parentIndex];
382			pInfo ["Below"] = pInfo ["Below"] + 1;
383			treeAVL2[parentIndex] = pInfo;
384		}
385
386	}
387
388	pKeys 			= Rows(multFactors);
389
390	for (k=0; k<Columns(pKeys); k=k+1)
391	{
392		aNodeName = pKeys[k];
393		multFactors[aNodeName] = multFactors[aNodeName] * (leafCount-multFactors[aNodeName]);
394	}
395
396	divInfo 		=	 computeTotalDivergence (treeName);
397	pInfo 			= 	2*divInfo[0]/leafCount/(leafCount-1);
398	currentDepth	= 	divInfo[1]/(Abs(treeAVL2)-2);
399
400	fprintf (stdout, "Mean pairwise divergence for ",treeName, " is ", pInfo, 	   "\n");
401	fprintf (stdout, "Mean branch length for ",      treeName, " is ", currentDepth, "\n");
402	return 0;
403}
404
405/*---------------------------------------------------------*/
406
407function computeTotalDivergence (treeID)
408{
409	ExecuteCommands ("bNames = BranchName   ("+treeID+",-1);");
410	ExecuteCommands ("bLen   = BranchLength ("+treeID+",-1);");
411
412	sum  = 0;
413	sum2 = 0;
414
415	for (k=0; k<Columns(bNames); k=k+1)
416	{
417		aNodeName = bNames[k];
418		sum  = sum + bLen[k]*multFactors[aNodeName];
419		sum2 = sum2 + bLen[k];
420	}
421	return {{sum,sum2}};
422}
423