1/* This file defines the transition matrix for the Muse-Gaut 94 model x an arbitrary 4x4 rate matrix
2   for nucleotide substituions.
3
4   The file should be used as follows:
5
6   1) Read Data File and create datafilter filteredData
7   2) #include this file (or use SelectTemplateModel(filteredData);)
8   3) Define the tree
9   4) Proceed with the likelihood function using 'vectorOfFrequencies' as the vector to pass to the constructor.
10
11   This model has the following signature:
12   	#Short:MG94custom#
13   	#Desc:Muse-Gaut 94 x an arbitrary 4x4 rate matrix and 9 (3x4) frequency parameters. Possible Gamma Variation.#
14   	#Dimension:*#
15    #DataType:codon#
16   	#FileName:MG94custom.mdl#
17
18   04/18/2002  by Sergei L. Kosakovsky Pond
19*/
20
21ModelMatrixDimension = 0;
22
23global		R;
24global     AC;
25global 	   AT;
26global     CG;
27global	   CT;
28global     GT;
29
30
31/*---------------------------------------------------------------------------------------------------------------------------------------------*/
32
33function PopulateModelMatrix (ModelMatrixName&, EFV)
34{
35	if (!ModelMatrixDimension)
36	{
37		ModelMatrixDimension = 64;
38		for (h = 0 ;h<64; h=h+1)
39		{
40			if (_Genetic_Code[h]==10)
41			{
42				ModelMatrixDimension = ModelMatrixDimension-1;
43			}
44		}
45	}
46
47	R  = R;
48	AT = AT;
49	CG = CG;
50	CT = CT;
51	GT = GT;
52	AC = AC;
53
54	ModelMatrixName = {ModelMatrixDimension,ModelMatrixDimension};
55	ExecuteCommands (categDef1);
56	ExecuteCommands (categDef2);
57
58 	hshift = 0;
59
60	for (h=0; h<64; h=h+1)
61	{
62		if (_Genetic_Code[h]==10)
63		{
64			hshift = hshift+1;
65			continue;
66		}
67		vshift = hshift;
68		for (v = h+1; v<64; v=v+1)
69		{
70			diff = v-h;
71			if (_Genetic_Code[v]==10)
72			{
73				vshift = vshift+1;
74				continue;
75			}
76			nucPosInCodon = 2;
77			if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0))
78			{
79				if (h$4==v$4)
80				{
81					transition = v%4;
82					transition2= h%4;
83				}
84				else
85				{
86					if(diff%16==0)
87					{
88						transition = v$16;
89						transition2= h$16;
90						nucPosInCodon = 0;
91					}
92					else
93					{
94						transition = v%16$4;
95						transition2= h%16$4;
96						nucPosInCodon = 1;
97					}
98				}
99
100				if (transition<transition2)
101				{
102					trSM = transition;
103					trLG = transition2;
104				}
105				else
106				{
107					trSM = transition2;
108					trLG = transition;
109				}
110
111				if (trSM==0)
112				{
113					if (trLG==1)
114					{
115						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
116						{
117							ModelMatrixName[h-hshift][v-vshift] := AC*c*synRate*EFV__[transition__][nucPosInCodon__];
118							ModelMatrixName[v-vshift][h-hshift] := AC*c*synRate*EFV__[transition2__][nucPosInCodon__];
119						}
120						else
121						{
122				  			if (modelType!=4)
123				  			{
124					  			ModelMatrixName[h-hshift][v-vshift] := AC*R*synRate*d*EFV__[transition__][nucPosInCodon__];
125				  				ModelMatrixName[v-vshift][h-hshift] := AC*R*synRate*d*EFV__[transition2__][nucPosInCodon__];
126				  			}
127				  			else
128				  			{
129							  	ModelMatrixName[h-hshift][v-vshift] := AC*r*synRate*d*EFV__[transition__][nucPosInCodon__];
130					  			ModelMatrixName[v-vshift][h-hshift] := AC*r*synRate*d*EFV__[transition2__][nucPosInCodon__];
131				  			}
132						}
133					}
134					else
135					{
136						if (trLG==2)
137						{
138							if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
139							{
140								ModelMatrixName[h-hshift][v-vshift] := c*synRate*EFV__[transition__][nucPosInCodon__];
141								ModelMatrixName[v-vshift][h-hshift] := c*synRate*EFV__[transition2__][nucPosInCodon__];
142							}
143							else
144							{
145					  			if (modelType!=4)
146					  			{
147						  			ModelMatrixName[h-hshift][v-vshift] := R*synRate*d*EFV__[transition__][nucPosInCodon__];
148					  				ModelMatrixName[v-vshift][h-hshift] := R*synRate*d*EFV__[transition2__][nucPosInCodon__];
149					  			}
150					  			else
151					  			{
152								  	ModelMatrixName[h-hshift][v-vshift] := r*synRate*d*EFV__[transition__][nucPosInCodon__];
153						  			ModelMatrixName[v-vshift][h-hshift] := r*synRate*d*EFV__[transition2__][nucPosInCodon__];
154					  			}
155							}
156						}
157						else
158						{
159							if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
160							{
161								ModelMatrixName[h-hshift][v-vshift] := AT*c*synRate*EFV__[transition__][nucPosInCodon__];
162								ModelMatrixName[v-vshift][h-hshift] := AT*c*synRate*EFV__[transition2__][nucPosInCodon__];
163							}
164							else
165							{
166					  			if (modelType!=4)
167					  			{
168						  			ModelMatrixName[h-hshift][v-vshift] := AT*R*synRate*d*EFV__[transition__][nucPosInCodon__];
169					  				ModelMatrixName[v-vshift][h-hshift] := AT*R*synRate*d*EFV__[transition2__][nucPosInCodon__];
170					  			}
171					  			else
172					  			{
173								  	ModelMatrixName[h-hshift][v-vshift] := AT*r*synRate*d*EFV__[transition__][nucPosInCodon__];
174						  			ModelMatrixName[v-vshift][h-hshift] := AT*r*synRate*d*EFV__[transition2__][nucPosInCodon__];
175					  			}
176							}
177						}
178					}
179				}
180				else
181				{
182					if (trSM==1)
183					{
184						if (trLG==2)
185						{
186							if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
187							{
188								ModelMatrixName[h-hshift][v-vshift] := CG*c*synRate*EFV__[transition__][nucPosInCodon__];
189								ModelMatrixName[v-vshift][h-hshift] := CG*c*synRate*EFV__[transition2__][nucPosInCodon__];
190							}
191							else
192							{
193					  			if (modelType!=4)
194					  			{
195						  			ModelMatrixName[h-hshift][v-vshift] := CG*R*synRate*d*EFV__[transition__][nucPosInCodon__];
196					  				ModelMatrixName[v-vshift][h-hshift] := CG*R*synRate*d*EFV__[transition2__][nucPosInCodon__];
197					  			}
198					  			else
199					  			{
200								  	ModelMatrixName[h-hshift][v-vshift] := CG*r*synRate*d*EFV__[transition__][nucPosInCodon__];
201						  			ModelMatrixName[v-vshift][h-hshift] := CG*r*synRate*d*EFV__[transition2__][nucPosInCodon__];
202					  			}
203							}
204						}
205						else
206						{
207							if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
208							{
209								ModelMatrixName[h-hshift][v-vshift] := CT*c*synRate*EFV__[transition__][nucPosInCodon__];
210								ModelMatrixName[v-vshift][h-hshift] := CT*c*synRate*EFV__[transition2__][nucPosInCodon__];
211							}
212							else
213							{
214					  			if (modelType!=4)
215					  			{
216						  			ModelMatrixName[h-hshift][v-vshift] := CT*R*synRate*d*EFV__[transition__][nucPosInCodon__];
217					  				ModelMatrixName[v-vshift][h-hshift] := CT*R*synRate*d*EFV__[transition2__][nucPosInCodon__];
218					  			}
219					  			else
220					  			{
221								  	ModelMatrixName[h-hshift][v-vshift] := CT*r*synRate*d*EFV__[transition__][nucPosInCodon__];
222						  			ModelMatrixName[v-vshift][h-hshift] := CT*r*synRate*d*EFV__[transition2__][nucPosInCodon__];
223					  			}
224							}
225						}
226					}
227					else
228					{
229						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
230						{
231							ModelMatrixName[h-hshift][v-vshift] := GT*c*synRate*EFV__[transition__][nucPosInCodon__];
232							ModelMatrixName[v-vshift][h-hshift] := GT*c*synRate*EFV__[transition2__][nucPosInCodon__];
233						}
234						else
235						{
236				  			if (modelType!=4)
237				  			{
238					  			ModelMatrixName[h-hshift][v-vshift] := GT*R*synRate*d*EFV__[transition__][nucPosInCodon__];
239				  				ModelMatrixName[v-vshift][h-hshift] := GT*R*synRate*d*EFV__[transition2__][nucPosInCodon__];
240				  			}
241				  			else
242				  			{
243							  	ModelMatrixName[h-hshift][v-vshift] := GT*r*synRate*d*EFV__[transition__][nucPosInCodon__];
244					  			ModelMatrixName[v-vshift][h-hshift] := GT*r*synRate*d*EFV__[transition2__][nucPosInCodon__];
245				  			}
246						}
247					}
248				}
249			}
250	   }
251	}
252
253	if (Abs(MGCustomModelConstraintString))
254	{
255		ExecuteCommands (MGCustomModelConstraintString);
256	}
257	return 0;
258}
259
260
261/*---------------------------------------------------------------------------------------------------------------------------------------------*/
262
263function BuildCodonFrequencies (obsF)
264{
265	PIStop = 1.0;
266	result = {ModelMatrixDimension,1};
267	hshift = 0;
268
269	for (h=0; h<64; h=h+1)
270	{
271		first = h$16;
272		second = h%16$4;
273		third = h%4;
274		if (_Genetic_Code[h]==10)
275		{
276			hshift = hshift+1;
277			PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2];
278			continue;
279		}
280		result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2];
281	}
282	return result*(1.0/PIStop);
283}
284
285/*---------------------------------------------------------------------------------------------------------------------------------------------*/
286
287categoriesUsed = 2;
288sharedFlag 	   = 1;
289
290
291
292if (!SKIP_MODEL_PARAMETER_LIST)
293{
294	ChoiceList (modelChoice, "Distribution Option",1,SKIP_NONE,
295				"Syn:Gamma, Non-syn:Gamma",	 "Both syn and non-syn rates are drawn from the gamma distributions for all models.",
296				"Syn:Gamma, Non-syn:Inv+Gamma","Syn and non-syn rates are drawn from the gamma distributions for all models for PVRM and NSRV. For DVRM and LDVRM, syn rates are drawn from the gamma distribution, and non-syn rates - from Inv+Gamma.",
297				"Independent Discrete", "Independent General Discrete Distributions (Recommended setting)",
298				"Correlated Discrete", "Correlated General Discrete Distributions");
299
300
301	if (modelChoice < 0)
302	{
303		return;
304	}
305
306	resp  = 0;
307	resp2 = 0;
308
309	while (resp<2)
310	{
311		fprintf (stdout,"\nNumber of synonymous rate classes (>=2):");
312		fscanf  (stdin, "Number", resp);
313	}
314
315	while (resp2<2)
316	{
317		fprintf (stdout,"\nNumber of non-synonymous rate classes (>=2):");
318		fscanf  (stdin, "Number", resp2);
319	}
320
321	if (modelChoice<2)
322	{
323		fscanf ("../2RatesAnalyses/gamma1.def","Raw",categDef1);
324
325		if (modelChoice == 0)
326		{
327			fscanf ("../2RatesAnalyses/gamma2.def","Raw",categDef2);
328		}
329		else
330		{
331			fscanf ("../2RatesAnalyses/gamma2+Inv.def","Raw",categDef2);
332		}
333	}
334	else
335	{
336		correlationOn = (modelChoice>3);
337		fscanf ("2RatesAnalyses/discreteGenerator.bf","Raw",mi);
338		ExecuteCommands (mi);
339	}
340
341	done = 0;
342	while (!done)
343	{
344		fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):");
345		fscanf  (stdin,"String", modelDesc);
346		if (Abs(modelDesc)==6)
347		{
348			done = 1;
349		}
350	}
351}
352
353MGCustomRateBiasTerms = {{"AC","1","AT","CG","CT","GT"}};
354paramCount	  = 0;
355
356MGCustomModelConstraintString = "";
357
358for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1)
359{
360	for (customLoopCounter=0; customLoopCounter<customLoopCounter2; customLoopCounter=customLoopCounter+1)
361	{
362		if (modelDesc[customLoopCounter2]==modelDesc[customLoopCounter])
363		{
364			if (MGCustomRateBiasTerms[customLoopCounter2] == "1")
365			{
366				MGCustomModelConstraintString = MGCustomModelConstraintString + MGCustomRateBiasTerms[customLoopCounter]+":="+MGCustomRateBiasTerms[customLoopCounter2]+";";
367			}
368			else
369			{
370				MGCustomModelConstraintString = MGCustomModelConstraintString + MGCustomRateBiasTerms[customLoopCounter2]+":="+MGCustomRateBiasTerms[customLoopCounter]+";";
371			}
372			break;
373		}
374	}
375}
376
377if (!SKIP_HARVEST_FREQ)
378{
379	HarvestFrequencies (observedFreq,filteredData,3,1,1);
380}
381
382NICETY_LEVEL = 3;
383
384MG94custom = 0;
385
386MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq);
387
388FREQUENCY_SENSITIVE = 1;
389
390vectorOfFrequencies = BuildCodonFrequencies (observedFreq);
391
392Model MG94customModel = (MG94custom,vectorOfFrequencies,0);
393
394USE_POSITION_SPECIFIC_FREQS = 1;
395
396_rateDescriptors = {{"Synonymous rates","Non-synonymous rates"}};
397