1/* This file defines the transition matrix for the General Reversible model
2   The file should be used as follows:
3
4   1) Read Data File and create datafilter filteredData
5   2) #include this file (or use SelectTemplateModel(filteredData);)
6   3) Define the tree
7   4) Proceed with the likelihood function using 'vectorOfFrequencies' as the vector to pass to the constructor.
8
9   This model has the following signature:
10    #Short:RNA16
11   	#Desc:Model allowing for secondary structure constraints in the RNA evolution.#
12   	#Dimension:16#
13    #DataType:di-nucleotide#
14   	#FileName:RNA16.mdl#
15
16   05/19/2005  by Sergei L. Kosakovsky Pond
17*/
18
19
20/*----------------------------------------------------------------------------------------------------*/
21
22modelType = 0;
23
24#include "modelParameters2.mdl";
25
26/* ADD NEW RATE CLASS MATRICES HERE */
27
28DiNucClasses = {{"RNAEqualInput","A 16x16 matrix with a single substitution rate. Not expected to fit the data well, and should only be used as a 'bad' model to compare other models to."}
29				{"RNAF81","A 16x16 matrix with a single substitution rate for single nucleotide substitutions and a zero rate for double substitutions. Not expected to fit the data well, and should only be used as a 'bad' model to compare other models to."}
30				{"RNA16A","5 rate parameters. Defined in Savill, Hoyle and Higgs. Genetics 157: 399-411."}
31				{"RNAREV_1","A subset of the general reversible model with 47 rate parameters, which disallows double instantaneous substitutions."}
32				{"RNAREV","A general reversible model with 119 rate parameters."}
33				{"Custom Esimated","Load a custom rate matrix from file. Rates estimated from the data."}
34				{"Custom Fixed","Load a custom rate matrix from file. Rates are fixed at the values read from the numeric matrix."}};
35
36
37/* END ADD NEW RATE CLASS MATRICES HERE */
38
39ChoiceList	(dinucModelType,"Dinucleotide Rate Class Model",1,SKIP_NONE,DiNucClasses);
40
41if (dinucModelType < 0)
42{
43	return 0;
44}
45
46if (dinucModelType >= Rows(DiNucClasses)-2)
47{
48	SetDialogPrompt ("Locate an di-nucleotide rate profile matrix file:");
49	fscanf (PROMPT_FOR_FILE,"Matrix",diNucRateMatrix);
50	W_MATRIX_FILE = LAST_FILE_PATH;
51}
52else
53{
54	W_MATRIX_FILE = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+SELECTION_STRINGS;
55	fscanf (W_MATRIX_FILE,"Matrix",diNucRateMatrix);
56}
57
58if (Rows (diNucRateMatrix) != 16 || Columns (diNucRateMatrix) != 16)
59{
60	fprintf (stdout, "ERROR: A 16x16 matrix was expected, but not found in rate definition file\n");
61	return 0;
62}
63
64if (modelType == 1)
65{
66	#include "defineGamma.mdl";
67}
68if (modelType == 2)
69{
70	#include "defineHM.mdl";
71}
72
73
74ChoiceList  (freqResp,"Equilibrium Frequencies",1,NO_SKIP,
75			 "Dinucleotide counts","State frequencies are collected from observed dinucleotide counts.",
76			 "Nucleotide counts","State frequencies are estimates from products of respective nucleotide counts.",
77			 "Estimated","Frequencies are estimated by ML.");
78
79if (freqResp<0)
80{
81	return 1;
82}
83else
84{
85	 if (freqResp == 0)
86	 {
87		HarvestFrequencies (vectorOfFrequencies, filteredData,2,2,0);
88		FREQUENCY_SENSITIVE = 1;
89	 }
90	 else
91	 {
92		vectorOfFrequencies = {16,1};
93	 	if (freqResp == 1)
94	 	{
95			HarvestFrequencies (obsFreqs,filteredData,2,1,1);
96			for (h=0; h<4; h=h+1)
97			{
98				for (v=0; v<4; v=v+1)
99				{
100					vectorOfFrequencies[h*4+v] = obsFreqs[h][0]*obsFreqs[v][1];
101				}
102			}
103			FREQUENCY_SENSITIVE = 1;
104	 	}
105	 	else
106	 	{
107			HarvestFrequencies (obsFreqs,filteredData,2,2,1);
108			v = "";
109			global f_weight = 1;
110
111			nucChar = "ACGT";
112
113			for (h=0;h<4;h=h+1)
114			{
115				for (h2=0;h2<4;h2=h2+1)
116				{
117					varName = "f_"+nucChar[h]+nucChar[h2];
118					idx = h*4+h2;
119
120					ExecuteCommands ("global "+varName+"=obsFreqs[idx];"+varName+":<1;vectorOfFrequencies["+idx+"]:="+varName+"/f_weight;");
121					v = v + "+" + varName;
122				}
123			}
124			ExecuteCommands ("f_weight:="+v[1][Abs(v)-1]+";");
125	 	}
126	 }
127}
128
129/*----------------------------------------------------------------------------------*/
130
131function PopulateModelMatrix (ModelMatrixName&, EFV)
132{
133	ModelMatrixName = {16,16};
134	global rate_Normalizer = 1;
135
136	if (dinucModelType == Rows(DiNucClasses)-1)
137	{
138		rate_Norm = 0;
139		for (h=0; h<16; h=h+1)
140		{
141			for (v=h+1; v<16; v=v+1)
142			{
143				if (v!=h)
144				{
145					if (Abs("" + diNucRateMatrix[h][v]))
146					{
147						rateEntry = ""+diNucRateMatrix[h][v];
148						rate_Norm = rate_Norm + rateEntry;
149
150						if (modelType>=1)
151						{
152							rateEntry = rateEntry + "*c";
153						}
154
155						ExecuteCommands ("ModelMatrixName["+h+"]["+v+"]:="+rateEntry  +"*rate_Normalizer*t");
156						ExecuteCommands ("ModelMatrixName["+v+"]["+h+"]:="+rateEntry  +"*rate_Normalizer*t");
157					}
158				}
159			}
160		}
161		ExecuteCommands ("rate_Normalizer:=100./rate_Norm__;");
162	}
163	else
164	{
165		rate_Norm = "";
166		alreadyDefined = {};
167		for (h=0; h<16; h=h+1)
168		{
169			for (v=h+1; v<16; v=v+1)
170			{
171				if (v!=h)
172				{
173					if (Abs("" + diNucRateMatrix[h][v]))
174					{
175						rateEntry = "R_"+diNucRateMatrix[h][v];
176						if (alreadyDefined[rateEntry] == 0)
177						{
178							ExecuteCommands ("global "+rateEntry + "=1;");
179							rate_Norm = rate_Norm + "+" + rateEntry;
180							alreadyDefined[rateEntry] = 1;
181						}
182
183						if (modelType>=1)
184						{
185							rateEntry = rateEntry + "*c";
186						}
187
188						ExecuteCommands ("ModelMatrixName["+h+"]["+v+"]:="+rateEntry  +"*rate_Normalizer*t");
189						ExecuteCommands ("ModelMatrixName["+v+"]["+h+"]:="+rateEntry  +"*rate_Normalizer*t");
190					}
191				}
192			}
193		}
194		ExecuteCommands ("rate_Normalizer:=100./("+rate_Norm[1][Abs(rate_Norm)-1]+");");
195	}
196
197	return 1;
198}
199
200/*----------------------------------------------------------------------------------*/
201
202MULTIPLY_BY_FREQS = PopulateModelMatrix ("RNA16",observedFreq);
203Model RNA16Model = (RNA16, vectorOfFrequencies, MULTIPLY_BY_FREQS);
204