1/* This file defines the transition matrix for the Muse-Gaut 94 model x an arbitrary 4x4 rate matrix 2 for nucleotide substituions. 3 4 The file should be used as follows: 5 6 1) Read Data File and create datafilter filteredData 7 2) #include this file (or use SelectTemplateModel(filteredData);) 8 3) Define the tree 9 4) Proceed with the likelihood function using 'vectorOfFrequencies' as the vector to pass to the constructor. 10 11 This model has the following signature: 12 #Short:MG94custom# 13 #Desc:Muse-Gaut 94 x an arbitrary 4x4 rate matrix and 9 (3x4) frequency parameters. Possible Gamma Variation.# 14 #Dimension:*# 15 #DataType:codon# 16 #FileName:MG94custom.mdl# 17 18 04/18/2002 by Sergei L. Kosakovsky Pond 19*/ 20 21ModelMatrixDimension = 0; 22 23global R; 24global AC; 25global AT; 26global CG; 27global CT; 28global GT; 29 30 31/*---------------------------------------------------------------------------------------------------------------------------------------------*/ 32 33function PopulateModelMatrix (ModelMatrixName&, EFV) 34{ 35 if (!ModelMatrixDimension) 36 { 37 ModelMatrixDimension = 64; 38 for (h = 0 ;h<64; h=h+1) 39 { 40 if (_Genetic_Code[h]==10) 41 { 42 ModelMatrixDimension = ModelMatrixDimension-1; 43 } 44 } 45 } 46 47 R = R; 48 AT = AT; 49 CG = CG; 50 CT = CT; 51 GT = GT; 52 AC = AC; 53 54 ModelMatrixName = {ModelMatrixDimension,ModelMatrixDimension}; 55 ExecuteCommands (categDef1); 56 ExecuteCommands (categDef2); 57 58 hshift = 0; 59 60 for (h=0; h<64; h=h+1) 61 { 62 if (_Genetic_Code[h]==10) 63 { 64 hshift = hshift+1; 65 continue; 66 } 67 vshift = hshift; 68 for (v = h+1; v<64; v=v+1) 69 { 70 diff = v-h; 71 if (_Genetic_Code[v]==10) 72 { 73 vshift = vshift+1; 74 continue; 75 } 76 nucPosInCodon = 2; 77 if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) 78 { 79 if (h$4==v$4) 80 { 81 transition = v%4; 82 transition2= h%4; 83 } 84 else 85 { 86 if(diff%16==0) 87 { 88 transition = v$16; 89 transition2= h$16; 90 nucPosInCodon = 0; 91 } 92 else 93 { 94 transition = v%16$4; 95 transition2= h%16$4; 96 nucPosInCodon = 1; 97 } 98 } 99 100 if (transition<transition2) 101 { 102 trSM = transition; 103 trLG = transition2; 104 } 105 else 106 { 107 trSM = transition2; 108 trLG = transition; 109 } 110 111 if (trSM==0) 112 { 113 if (trLG==1) 114 { 115 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 116 { 117 ModelMatrixName[h-hshift][v-vshift] := AC*c*synRate*EFV__[transition__][nucPosInCodon__]; 118 ModelMatrixName[v-vshift][h-hshift] := AC*c*synRate*EFV__[transition2__][nucPosInCodon__]; 119 } 120 else 121 { 122 if (modelType!=4) 123 { 124 ModelMatrixName[h-hshift][v-vshift] := AC*R*synRate*d*EFV__[transition__][nucPosInCodon__]; 125 ModelMatrixName[v-vshift][h-hshift] := AC*R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 126 } 127 else 128 { 129 ModelMatrixName[h-hshift][v-vshift] := AC*r*synRate*d*EFV__[transition__][nucPosInCodon__]; 130 ModelMatrixName[v-vshift][h-hshift] := AC*r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 131 } 132 } 133 } 134 else 135 { 136 if (trLG==2) 137 { 138 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 139 { 140 ModelMatrixName[h-hshift][v-vshift] := c*synRate*EFV__[transition__][nucPosInCodon__]; 141 ModelMatrixName[v-vshift][h-hshift] := c*synRate*EFV__[transition2__][nucPosInCodon__]; 142 } 143 else 144 { 145 if (modelType!=4) 146 { 147 ModelMatrixName[h-hshift][v-vshift] := R*synRate*d*EFV__[transition__][nucPosInCodon__]; 148 ModelMatrixName[v-vshift][h-hshift] := R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 149 } 150 else 151 { 152 ModelMatrixName[h-hshift][v-vshift] := r*synRate*d*EFV__[transition__][nucPosInCodon__]; 153 ModelMatrixName[v-vshift][h-hshift] := r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 154 } 155 } 156 } 157 else 158 { 159 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 160 { 161 ModelMatrixName[h-hshift][v-vshift] := AT*c*synRate*EFV__[transition__][nucPosInCodon__]; 162 ModelMatrixName[v-vshift][h-hshift] := AT*c*synRate*EFV__[transition2__][nucPosInCodon__]; 163 } 164 else 165 { 166 if (modelType!=4) 167 { 168 ModelMatrixName[h-hshift][v-vshift] := AT*R*synRate*d*EFV__[transition__][nucPosInCodon__]; 169 ModelMatrixName[v-vshift][h-hshift] := AT*R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 170 } 171 else 172 { 173 ModelMatrixName[h-hshift][v-vshift] := AT*r*synRate*d*EFV__[transition__][nucPosInCodon__]; 174 ModelMatrixName[v-vshift][h-hshift] := AT*r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 175 } 176 } 177 } 178 } 179 } 180 else 181 { 182 if (trSM==1) 183 { 184 if (trLG==2) 185 { 186 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 187 { 188 ModelMatrixName[h-hshift][v-vshift] := CG*c*synRate*EFV__[transition__][nucPosInCodon__]; 189 ModelMatrixName[v-vshift][h-hshift] := CG*c*synRate*EFV__[transition2__][nucPosInCodon__]; 190 } 191 else 192 { 193 if (modelType!=4) 194 { 195 ModelMatrixName[h-hshift][v-vshift] := CG*R*synRate*d*EFV__[transition__][nucPosInCodon__]; 196 ModelMatrixName[v-vshift][h-hshift] := CG*R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 197 } 198 else 199 { 200 ModelMatrixName[h-hshift][v-vshift] := CG*r*synRate*d*EFV__[transition__][nucPosInCodon__]; 201 ModelMatrixName[v-vshift][h-hshift] := CG*r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 202 } 203 } 204 } 205 else 206 { 207 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 208 { 209 ModelMatrixName[h-hshift][v-vshift] := CT*c*synRate*EFV__[transition__][nucPosInCodon__]; 210 ModelMatrixName[v-vshift][h-hshift] := CT*c*synRate*EFV__[transition2__][nucPosInCodon__]; 211 } 212 else 213 { 214 if (modelType!=4) 215 { 216 ModelMatrixName[h-hshift][v-vshift] := CT*R*synRate*d*EFV__[transition__][nucPosInCodon__]; 217 ModelMatrixName[v-vshift][h-hshift] := CT*R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 218 } 219 else 220 { 221 ModelMatrixName[h-hshift][v-vshift] := CT*r*synRate*d*EFV__[transition__][nucPosInCodon__]; 222 ModelMatrixName[v-vshift][h-hshift] := CT*r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 223 } 224 } 225 } 226 } 227 else 228 { 229 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 230 { 231 ModelMatrixName[h-hshift][v-vshift] := GT*c*synRate*EFV__[transition__][nucPosInCodon__]; 232 ModelMatrixName[v-vshift][h-hshift] := GT*c*synRate*EFV__[transition2__][nucPosInCodon__]; 233 } 234 else 235 { 236 if (modelType!=4) 237 { 238 ModelMatrixName[h-hshift][v-vshift] := GT*R*synRate*d*EFV__[transition__][nucPosInCodon__]; 239 ModelMatrixName[v-vshift][h-hshift] := GT*R*synRate*d*EFV__[transition2__][nucPosInCodon__]; 240 } 241 else 242 { 243 ModelMatrixName[h-hshift][v-vshift] := GT*r*synRate*d*EFV__[transition__][nucPosInCodon__]; 244 ModelMatrixName[v-vshift][h-hshift] := GT*r*synRate*d*EFV__[transition2__][nucPosInCodon__]; 245 } 246 } 247 } 248 } 249 } 250 } 251 } 252 253 if (Abs(MGCustomModelConstraintString)) 254 { 255 ExecuteCommands (MGCustomModelConstraintString); 256 } 257 return 0; 258} 259 260 261/*---------------------------------------------------------------------------------------------------------------------------------------------*/ 262 263function BuildCodonFrequencies (obsF) 264{ 265 PIStop = 1.0; 266 result = {ModelMatrixDimension,1}; 267 hshift = 0; 268 269 for (h=0; h<64; h=h+1) 270 { 271 first = h$16; 272 second = h%16$4; 273 third = h%4; 274 if (_Genetic_Code[h]==10) 275 { 276 hshift = hshift+1; 277 PIStop = PIStop-obsF[first][0]*obsF[second][1]*obsF[third][2]; 278 continue; 279 } 280 result[h-hshift][0]=obsF[first][0]*obsF[second][1]*obsF[third][2]; 281 } 282 return result*(1.0/PIStop); 283} 284 285/*---------------------------------------------------------------------------------------------------------------------------------------------*/ 286 287categoriesUsed = 2; 288sharedFlag = 1; 289 290 291 292if (!SKIP_MODEL_PARAMETER_LIST) 293{ 294 ChoiceList (modelChoice, "Distribution Option",1,SKIP_NONE, 295 "Syn:Gamma, Non-syn:Gamma", "Both syn and non-syn rates are drawn from the gamma distributions for all models.", 296 "Syn:Gamma, Non-syn:Inv+Gamma","Syn and non-syn rates are drawn from the gamma distributions for all models for PVRM and NSRV. For DVRM and LDVRM, syn rates are drawn from the gamma distribution, and non-syn rates - from Inv+Gamma.", 297 "Independent Discrete", "Independent General Discrete Distributions (Recommended setting)", 298 "Correlated Discrete", "Correlated General Discrete Distributions"); 299 300 301 if (modelChoice < 0) 302 { 303 return; 304 } 305 306 resp = 0; 307 resp2 = 0; 308 309 while (resp<2) 310 { 311 fprintf (stdout,"\nNumber of synonymous rate classes (>=2):"); 312 fscanf (stdin, "Number", resp); 313 } 314 315 while (resp2<2) 316 { 317 fprintf (stdout,"\nNumber of non-synonymous rate classes (>=2):"); 318 fscanf (stdin, "Number", resp2); 319 } 320 321 if (modelChoice<2) 322 { 323 fscanf ("../2RatesAnalyses/gamma1.def","Raw",categDef1); 324 325 if (modelChoice == 0) 326 { 327 fscanf ("../2RatesAnalyses/gamma2.def","Raw",categDef2); 328 } 329 else 330 { 331 fscanf ("../2RatesAnalyses/gamma2+Inv.def","Raw",categDef2); 332 } 333 } 334 else 335 { 336 correlationOn = (modelChoice>3); 337 fscanf ("2RatesAnalyses/discreteGenerator.bf","Raw",mi); 338 ExecuteCommands (mi); 339 } 340 341 done = 0; 342 while (!done) 343 { 344 fprintf (stdout,"\nPlease enter a 6 character model designation (e.g:010010 defines HKY85):"); 345 fscanf (stdin,"String", modelDesc); 346 if (Abs(modelDesc)==6) 347 { 348 done = 1; 349 } 350 } 351} 352 353MGCustomRateBiasTerms = {{"AC","1","AT","CG","CT","GT"}}; 354paramCount = 0; 355 356MGCustomModelConstraintString = ""; 357 358for (customLoopCounter2=1; customLoopCounter2<6; customLoopCounter2=customLoopCounter2+1) 359{ 360 for (customLoopCounter=0; customLoopCounter<customLoopCounter2; customLoopCounter=customLoopCounter+1) 361 { 362 if (modelDesc[customLoopCounter2]==modelDesc[customLoopCounter]) 363 { 364 if (MGCustomRateBiasTerms[customLoopCounter2] == "1") 365 { 366 MGCustomModelConstraintString = MGCustomModelConstraintString + MGCustomRateBiasTerms[customLoopCounter]+":="+MGCustomRateBiasTerms[customLoopCounter2]+";"; 367 } 368 else 369 { 370 MGCustomModelConstraintString = MGCustomModelConstraintString + MGCustomRateBiasTerms[customLoopCounter2]+":="+MGCustomRateBiasTerms[customLoopCounter]+";"; 371 } 372 break; 373 } 374 } 375} 376 377if (!SKIP_HARVEST_FREQ) 378{ 379 HarvestFrequencies (observedFreq,filteredData,3,1,1); 380} 381 382NICETY_LEVEL = 3; 383 384MG94custom = 0; 385 386MULTIPLY_BY_FREQS = PopulateModelMatrix ("MG94custom", observedFreq); 387 388FREQUENCY_SENSITIVE = 1; 389 390vectorOfFrequencies = BuildCodonFrequencies (observedFreq); 391 392Model MG94customModel = (MG94custom,vectorOfFrequencies,0); 393 394USE_POSITION_SPECIFIC_FREQS = 1; 395 396_rateDescriptors = {{"Synonymous rates","Non-synonymous rates"}}; 397