1/* This file defines the transition matrix for the General Reversible model 2 The file should be used as follows: 3 4 1) Read Data File and create datafilter filteredData 5 2) #include this file (or use SelectTemplateModel(filteredData);) 6 3) Define the tree 7 4) Proceed with the likelihood function using 'vectorOfFrequencies' as the vector to pass to the constructor. 8 9 This model has the following signature: 10 #Short:RNA16 11 #Desc:Model allowing for secondary structure constraints in the RNA evolution.# 12 #Dimension:16# 13 #DataType:di-nucleotide# 14 #FileName:RNA16.mdl# 15 16 05/19/2005 by Sergei L. Kosakovsky Pond 17*/ 18 19 20/*----------------------------------------------------------------------------------------------------*/ 21 22modelType = 0; 23 24#include "modelParameters2.mdl"; 25 26/* ADD NEW RATE CLASS MATRICES HERE */ 27 28DiNucClasses = {{"RNAEqualInput","A 16x16 matrix with a single substitution rate. Not expected to fit the data well, and should only be used as a 'bad' model to compare other models to."} 29 {"RNAF81","A 16x16 matrix with a single substitution rate for single nucleotide substitutions and a zero rate for double substitutions. Not expected to fit the data well, and should only be used as a 'bad' model to compare other models to."} 30 {"RNA16A","5 rate parameters. Defined in Savill, Hoyle and Higgs. Genetics 157: 399-411."} 31 {"RNAREV_1","A subset of the general reversible model with 47 rate parameters, which disallows double instantaneous substitutions."} 32 {"RNAREV","A general reversible model with 119 rate parameters."} 33 {"Custom Esimated","Load a custom rate matrix from file. Rates estimated from the data."} 34 {"Custom Fixed","Load a custom rate matrix from file. Rates are fixed at the values read from the numeric matrix."}}; 35 36 37/* END ADD NEW RATE CLASS MATRICES HERE */ 38 39ChoiceList (dinucModelType,"Dinucleotide Rate Class Model",1,SKIP_NONE,DiNucClasses); 40 41if (dinucModelType < 0) 42{ 43 return 0; 44} 45 46if (dinucModelType >= Rows(DiNucClasses)-2) 47{ 48 SetDialogPrompt ("Locate an di-nucleotide rate profile matrix file:"); 49 fscanf (PROMPT_FOR_FILE,"Matrix",diNucRateMatrix); 50 W_MATRIX_FILE = LAST_FILE_PATH; 51} 52else 53{ 54 W_MATRIX_FILE = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+SELECTION_STRINGS; 55 fscanf (W_MATRIX_FILE,"Matrix",diNucRateMatrix); 56} 57 58if (Rows (diNucRateMatrix) != 16 || Columns (diNucRateMatrix) != 16) 59{ 60 fprintf (stdout, "ERROR: A 16x16 matrix was expected, but not found in rate definition file\n"); 61 return 0; 62} 63 64if (modelType == 1) 65{ 66 #include "defineGamma.mdl"; 67} 68if (modelType == 2) 69{ 70 #include "defineHM.mdl"; 71} 72 73 74ChoiceList (freqResp,"Equilibrium Frequencies",1,NO_SKIP, 75 "Dinucleotide counts","State frequencies are collected from observed dinucleotide counts.", 76 "Nucleotide counts","State frequencies are estimates from products of respective nucleotide counts.", 77 "Estimated","Frequencies are estimated by ML."); 78 79if (freqResp<0) 80{ 81 return 1; 82} 83else 84{ 85 if (freqResp == 0) 86 { 87 HarvestFrequencies (vectorOfFrequencies, filteredData,2,2,0); 88 FREQUENCY_SENSITIVE = 1; 89 } 90 else 91 { 92 vectorOfFrequencies = {16,1}; 93 if (freqResp == 1) 94 { 95 HarvestFrequencies (obsFreqs,filteredData,2,1,1); 96 for (h=0; h<4; h=h+1) 97 { 98 for (v=0; v<4; v=v+1) 99 { 100 vectorOfFrequencies[h*4+v] = obsFreqs[h][0]*obsFreqs[v][1]; 101 } 102 } 103 FREQUENCY_SENSITIVE = 1; 104 } 105 else 106 { 107 HarvestFrequencies (obsFreqs,filteredData,2,2,1); 108 v = ""; 109 global f_weight = 1; 110 111 nucChar = "ACGT"; 112 113 for (h=0;h<4;h=h+1) 114 { 115 for (h2=0;h2<4;h2=h2+1) 116 { 117 varName = "f_"+nucChar[h]+nucChar[h2]; 118 idx = h*4+h2; 119 120 ExecuteCommands ("global "+varName+"=obsFreqs[idx];"+varName+":<1;vectorOfFrequencies["+idx+"]:="+varName+"/f_weight;"); 121 v = v + "+" + varName; 122 } 123 } 124 ExecuteCommands ("f_weight:="+v[1][Abs(v)-1]+";"); 125 } 126 } 127} 128 129/*----------------------------------------------------------------------------------*/ 130 131function PopulateModelMatrix (ModelMatrixName&, EFV) 132{ 133 ModelMatrixName = {16,16}; 134 global rate_Normalizer = 1; 135 136 if (dinucModelType == Rows(DiNucClasses)-1) 137 { 138 rate_Norm = 0; 139 for (h=0; h<16; h=h+1) 140 { 141 for (v=h+1; v<16; v=v+1) 142 { 143 if (v!=h) 144 { 145 if (Abs("" + diNucRateMatrix[h][v])) 146 { 147 rateEntry = ""+diNucRateMatrix[h][v]; 148 rate_Norm = rate_Norm + rateEntry; 149 150 if (modelType>=1) 151 { 152 rateEntry = rateEntry + "*c"; 153 } 154 155 ExecuteCommands ("ModelMatrixName["+h+"]["+v+"]:="+rateEntry +"*rate_Normalizer*t"); 156 ExecuteCommands ("ModelMatrixName["+v+"]["+h+"]:="+rateEntry +"*rate_Normalizer*t"); 157 } 158 } 159 } 160 } 161 ExecuteCommands ("rate_Normalizer:=100./rate_Norm__;"); 162 } 163 else 164 { 165 rate_Norm = ""; 166 alreadyDefined = {}; 167 for (h=0; h<16; h=h+1) 168 { 169 for (v=h+1; v<16; v=v+1) 170 { 171 if (v!=h) 172 { 173 if (Abs("" + diNucRateMatrix[h][v])) 174 { 175 rateEntry = "R_"+diNucRateMatrix[h][v]; 176 if (alreadyDefined[rateEntry] == 0) 177 { 178 ExecuteCommands ("global "+rateEntry + "=1;"); 179 rate_Norm = rate_Norm + "+" + rateEntry; 180 alreadyDefined[rateEntry] = 1; 181 } 182 183 if (modelType>=1) 184 { 185 rateEntry = rateEntry + "*c"; 186 } 187 188 ExecuteCommands ("ModelMatrixName["+h+"]["+v+"]:="+rateEntry +"*rate_Normalizer*t"); 189 ExecuteCommands ("ModelMatrixName["+v+"]["+h+"]:="+rateEntry +"*rate_Normalizer*t"); 190 } 191 } 192 } 193 } 194 ExecuteCommands ("rate_Normalizer:=100./("+rate_Norm[1][Abs(rate_Norm)-1]+");"); 195 } 196 197 return 1; 198} 199 200/*----------------------------------------------------------------------------------*/ 201 202MULTIPLY_BY_FREQS = PopulateModelMatrix ("RNA16",observedFreq); 203Model RNA16Model = (RNA16, vectorOfFrequencies, MULTIPLY_BY_FREQS); 204