1fprintf (stdout, "\n\nPhase 1:Nucleotide Model (",ModelTitle,") Model Fit\n\n"); 2 3global dNdS = 1; 4 5if (nrChoice == 0) 6{ 7 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 8 { 9 ExecuteCommands ("Tree nucTree_" + fileID + "=" + treeStrings[fileID] + ";"); 10 } 11 ExecuteCommands (constructLF("nucLF", "nucData", "nucTree", fileCount)); 12 Optimize (res,nucLF); 13 stashLOF = LIKELIHOOD_FUNCTION_OUTPUT ; 14 LIKELIHOOD_FUNCTION_OUTPUT = 6; 15 if (Abs(NUC_FILE_PATH)) 16 { 17 fprintf (NUC_FILE_PATH,CLEAR_FILE,nucLF); 18 } 19 LIKELIHOOD_FUNCTION_OUTPUT = stashLOF; 20} 21 22fprintf (stdout, "\n",nucLF); 23fprintf (stdout, "\n\nPhase 2:MG94x(",ModelTitle,") Model Fit\n\n"); 24 25CodonMatrix = {ModelMatrixDimension,ModelMatrixDimension}; 26 27hshift = 0; 28 29for (h=0; h<64; h=h+1) 30{ 31 if (_Genetic_Code[h]==10) 32 { 33 hshift = hshift+1; 34 continue; 35 } 36 vshift = hshift; 37 for (v = h+1; v<64; v=v+1) 38 { 39 diff = v-h; 40 if (_Genetic_Code[v]==10) 41 { 42 vshift = vshift+1; 43 continue; 44 } 45 nucPosInCodon = 2; 46 if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0)) 47 { 48 if (h$4==v$4) 49 { 50 transition = v%4; 51 transition2= h%4; 52 } 53 else 54 { 55 if(diff%16==0) 56 { 57 transition = v$16; 58 transition2= h$16; 59 nucPosInCodon = 0; 60 } 61 else 62 { 63 transition = v%16$4; 64 transition2= h%16$4; 65 nucPosInCodon = 1; 66 } 67 } 68 if (transition<transition2) 69 { 70 trSM = transition; 71 trLG = transition2; 72 } 73 else 74 { 75 trSM = transition2; 76 trLG = transition; 77 } 78 79 if (trSM==0) 80 { 81 if (trLG==1) 82 { 83 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 84 { 85 CodonMatrix[h-hshift][v-vshift] := AC__*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 86 CodonMatrix[v-vshift][h-hshift] := AC__*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 87 } 88 else 89 { 90 CodonMatrix[h-hshift][v-vshift] := AC__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 91 CodonMatrix[v-vshift][h-hshift] := AC__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 92 } 93 } 94 else 95 { 96 if (trLG==2) 97 { 98 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 99 { 100 CodonMatrix[h-hshift][v-vshift] := synRate*positionFrequencies__[transition__][nucPosInCodon__]; 101 CodonMatrix[v-vshift][h-hshift] := synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 102 } 103 else 104 { 105 CodonMatrix[h-hshift][v-vshift] := dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 106 CodonMatrix[v-vshift][h-hshift] := dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 107 } 108 } 109 else 110 { 111 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 112 { 113 CodonMatrix[h-hshift][v-vshift] := AT__*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 114 CodonMatrix[v-vshift][h-hshift] := AT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 115 } 116 else 117 { 118 CodonMatrix[h-hshift][v-vshift] := AT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 119 CodonMatrix[v-vshift][h-hshift] := AT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 120 } 121 } 122 } 123 } 124 else 125 { 126 if (trSM==1) 127 { 128 if (trLG==2) 129 { 130 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 131 { 132 CodonMatrix[h-hshift][v-vshift] := CG__*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 133 CodonMatrix[v-vshift][h-hshift] := CG__*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 134 } 135 else 136 { 137 CodonMatrix[h-hshift][v-vshift] := CG__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 138 CodonMatrix[v-vshift][h-hshift] := CG__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 139 } 140 } 141 else 142 { 143 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 144 { 145 CodonMatrix[h-hshift][v-vshift] := CT__*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 146 CodonMatrix[v-vshift][h-hshift] := CT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 147 } 148 else 149 { 150 CodonMatrix[h-hshift][v-vshift] := CT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 151 CodonMatrix[v-vshift][h-hshift] := CT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 152 } 153 } 154 } 155 else 156 { 157 if (_Genetic_Code[0][h]==_Genetic_Code[0][v]) 158 { 159 CodonMatrix[h-hshift][v-vshift] := GT__*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 160 CodonMatrix[v-vshift][h-hshift] := GT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 161 } 162 else 163 { 164 CodonMatrix[h-hshift][v-vshift] := GT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__]; 165 CodonMatrix[v-vshift][h-hshift] := GT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__]; 166 } 167 } 168 } 169 } 170 } 171} 172 173Model MGModel = (CodonMatrix,codonFrequencies,0); 174for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 175{ 176 ExecuteCommands ("Tree codonTree_" + fileID + "=" + treeStrings[fileID] + ";"); 177} 178 179/* compute the branch conversion factor */ 180 181synRate = 1; 182blCodon = 0; 183 184for (h=0; h<ModelMatrixDimension; h=h+1) 185{ 186 blCodon = blCodon - CodonMatrix[h][h]*codonFrequencies[h]; 187} 188 189blNuc = 0; 190 191t = 1; 192 193for (h=0; h<4; h=h+1) 194{ 195 blCodon2 = 0; 196 for (v=0; v<4; v=v+1) 197 { 198 if (h==v) 199 { 200 continue; 201 } 202 blCodon2 = blCodon2 + NucleotideMatrix[h][v]*overallFrequencies[v]; 203 } 204 blNuc = blNuc + overallFrequencies[h]*blCodon2; 205} 206 207ExecuteCommands (constructLF("lf", "filteredData", "codonTree", fileCount)); 208 209if (rOptions>=2) 210{ 211 global rConstr = 1; 212 blCodon2 = 0; 213 dNdS = 2; 214 215 for (h=0; h<ModelMatrixDimension; h=h+1) 216 { 217 blCodon2 = blCodon2 - CodonMatrix[h][h]*codonFrequencies[h]; 218 } 219 220 blCodon2 = blCodon2-blCodon; 221 blCodon = blCodon-blCodon2; 222 223 dNdS = 0.5; 224 225 fprintf (stdout, "\n\nPhase 3:Estimating dN/dS\n\n"); 226 if (rOptions == 4) 227 { 228 global rConstr := 3*blNuc__/(blCodon__+dNdS*blCodon2__); 229 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 230 { 231 ExecuteCommands ("ReplicateConstraint(\"this1.?.synRate:=rConstr*this2.?.t__\",codonTree_"+fileID+",nucTree_"+fileID+")"); 232 } 233 } 234 else 235 { 236 global rConstr = 1; /*3*blNuc/(blCodon+dNdS*blCodon2)*/; 237 rConstr :> 0; 238 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 239 { 240 ExecuteCommands ("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr\",codonTree_"+fileID+",nucTree_"+fileID+")"); 241 } 242 } 243 244 saveOM = SKIP_CONJUGATE_GRADIENT; 245 SKIP_CONJUGATE_GRADIENT = 1; 246 247 Optimize (resC,lf); 248 SKIP_CONJUGATE_GRADIENT = saveOM; 249 250 fprintf (stdout, "\nNuc->codon scaling factor:", (3*blNuc/(blCodon+dNdS*blCodon2)),"\nRaw scaling factor:", rConstr, "\nTree scaling factor(S): ", rConstr/(3*blNuc/(blCodon+dNdS*blCodon2))); 251 252 if (rOptions == 3 || rOptions == 5) 253 { 254 COVARIANCE_PRECISION = 0.95; 255 ClearConstraints(codonTree); 256 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 257 { 258 ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=rConstr__*this2.?.t__\",codonTree_"+fileID+",nucTree_"+fileID+")"); 259 } 260 savedNdS = dNdS; 261 COVARIANCE_PARAMETER = "dNdS"; 262 CovarianceMatrix (covMx, lf); 263 fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "(Estimated 95% CI = [", covMx[0][0], "," , covMx[0][2], "])\nCodon model:", lf); 264 dNdS = savedNdS; 265 COVARIANCE_PARAMETER = 0; 266 } 267 else 268 { 269 fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "\nCodon model:", lf); 270 } 271} 272else 273{ 274 blNuc = 3*blNuc/blCodon; 275 276 if (pipeThroughFlag == 0) 277 { 278 global rConstr = 1; 279 rConstr :> 0; 280 fprintf (stdout, "Branch Corrections Factor (<0 to estimate):"); 281 fscanf (stdin, "Number", rConstr); 282 } 283 284 if (rConstr<=0.0) 285 { 286 rConstr = blNuc; 287 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 288 { 289 ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr\",codonTree_"+fileID+",nucTree_"+fileID+")"); 290 } 291 dNdS := dNdS__; 292 saveOM = SKIP_CONJUGATE_GRADIENT; 293 SKIP_CONJUGATE_GRADIENT = 1; 294 Optimize (resC,lf); 295 SKIP_CONJUGATE_GRADIENT = saveOM; 296 dNdS = dNdS; 297 } 298 else 299 { 300 for (fileID = 1; fileID <= fileCount; fileID = fileID + 1) 301 { 302 ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr__\",codonTree_"+fileID+",nucTree_"+fileID+")"); 303 } 304 } 305 306 fprintf (stdout, "\nNuc->Codon branch correction factor: ", blNuc); 307 fprintf (stdout, "\nRaw scaling factor: ", rConstr); 308 fprintf (stdout, "\nTree scaling factor(S): ", rConstr/blNuc); 309 fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "\nCodon model:", lf); 310} 311 312// ------ MEME helper function -------- 313 314function obtainBranchWiseEBEstimates (_sFactor,_nsFactor1,_nsFactor2, _mixingP,filterString) { 315 316 ClearConstraints (perBranchTree); 317 ReplicateConstraint ("this1.?.alpha:=_sFactor*this2.?.synRate__",perBranchTree,codonTree); 318 ReplicateConstraint ("this1.?.beta1:=_nsFactor1*sFactor*this2.?.synRate__",perBranchTree,codonTree); 319 ReplicateConstraint ("this1.?.beta2:=_nsFactor2*this2.?.synRate__",perBranchTree,codonTree); 320 ReplicateConstraint ("this1.?.lmp:=_mixingP", perBranchTree); 321 322 323 LoadFunctionLibrary ("AncestralMapper"); 324 ancID = _buildAncestralCache ("siteLikelihood",0); 325 subMap = _tabulateSubstitutionsAtSiteByBranch (ancID,0); 326 _destroyAncestralCache (ancID); 327 328 _bn = BranchName (perBranchTree, -1); 329 330 DataSetFilter locSiteFilter = CreateFilter (ds,3,filterString,"",GeneticCodeExclusions); 331 332 LikelihoodFunction siteLikelihoodLoc = (locSiteFilter, perBranchTree); 333 LFCompute (siteLikelihoodLoc,LF_START_COMPUTE); 334 LFCompute (siteLikelihoodLoc,baseline); 335 336 _totalBranchCount = Columns (_bn) - 1; 337 posteriorEstimates = {}; 338 339 if (_mixingP != 1 && _mixingP != 0) { 340 _priorOdds = (1-_mixingP)/_mixingP; 341 } else { 342 _priorOdds = 0; 343 } 344 345 for (k = 0; k < _totalBranchCount; k+=1) 346 { 347 _pname = "perBranchTree." + _bn[k] + ".lmp"; 348 ExecuteCommands ("`_pname`=1"); 349 LFCompute (siteLikelihoodLoc,LOGL0); 350 351 MaxL = -Max (LOGL0,baseline); 352 353 baseline += MaxL; 354 LOGL0 = Exp(MaxL+LOGL0); 355 LOGL1 = (Exp(baseline) - _mixingP * LOGL0) / (1-_mixingP); 356 357 ExecuteCommands ("`_pname`=_mixingP"); 358 _posteriorProb = {{LOGL0 * _mixingP, LOGL1 * (1-_mixingP)}}; 359 _posteriorProb = _posteriorProb * (1/(+_posteriorProb)); 360 if ( _priorOdds != 0) { 361 eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds; 362 } else { 363 eBF = 1; 364 } 365 posteriorEstimates [_bn[k]] = {1,4}; 366 (posteriorEstimates [_bn[k]])[0] = _posteriorProb[1]; 367 (posteriorEstimates [_bn[k]])[1] = eBF; 368 (posteriorEstimates [_bn[k]])[2] = (subMap[_bn[k]])[0]; 369 (posteriorEstimates [_bn[k]])[3] = (subMap[_bn[k]])[1]; 370 baseline += -MaxL; 371 } 372 LFCompute (siteLikelihoodLoc,LF_DONE_COMPUTE); 373 374 return posteriorEstimates; 375} 376 377// ------ MEME helper function -------- 378 379function obtainBranchWiseEBEstimatesMPI (_sFactor,_nsFactor1,_nsFactor2,_mixingP) { 380 if (_nsFactor2 <= _sFactor || _mixingP == 1 || _mixingP == 0) 381 { 382 return {}; 383 } 384 385 sFactor = _sFactor; 386 nsFactor1 = _nsFactor1; 387 nsFactor2 = _nsFactor2; 388 mixingP = _mixingP; 389 390 treeString = Format (siteTree,1,1); 391 392 LoadFunctionLibrary ("AncestralMapper"); 393 ancID = _buildAncestralCache ("siteLikelihood",0); 394 subMap = _tabulateSubstitutionsAtSiteByBranch (ancID,0); 395 _destroyAncestralCache (ancID); 396 397 Model MGLocalMix = ("Exp(MGMatrix1)*lmp+Exp(MGMatrix2)*(1-lmp)",codonFrequencies,EXPLICIT_FORM_MATRIX_EXPONENTIAL); 398 Tree perBranchTree = treeString; 399 ClearConstraints (perBranchTree); 400 ReplicateConstraint ("this1.?.alpha:=this2.?.alpha__",perBranchTree,siteTree); 401 ReplicateConstraint ("this1.?.beta1:=this2.?.beta1__",perBranchTree,siteTree); 402 ReplicateConstraint ("this1.?.beta2:=this2.?.beta2__",perBranchTree,siteTree); 403 ReplicateConstraint ("this1.?.lmp:=_mixingP", perBranchTree); 404 405 _bn = BranchName (perBranchTree, -1); 406 407 LikelihoodFunction siteLikelihoodLoc = (siteFilter, perBranchTree); 408 LFCompute (siteLikelihoodLoc,LF_START_COMPUTE); 409 LFCompute (siteLikelihoodLoc,baseline); 410 411 _totalBranchCount = Columns (_bn) - 1; 412 posteriorEstimates = {}; 413 414 _priorOdds = (1-_mixingP)/_mixingP; 415 416 for (k = 0; k < _totalBranchCount; k+=1) 417 { 418 _pname = "perBranchTree." + _bn[k] + ".lmp"; 419 ExecuteCommands ("`_pname`=1"); 420 LFCompute (siteLikelihoodLoc,LOGL0); 421 422 MaxL = -Max (LOGL0,baseline); 423 424 baseline += MaxL; 425 LOGL0 = Exp(MaxL+LOGL0); 426 LOGL1 = (Exp(baseline) - _mixingP * LOGL0) / (1-_mixingP); 427 428 ExecuteCommands ("`_pname`=_mixingP"); 429 _posteriorProb = {{LOGL0 * _mixingP, LOGL1 * (1-_mixingP)}}; 430 _posteriorProb = _posteriorProb * (1/(+_posteriorProb)); 431 if ( _priorOdds != 0) { 432 eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds; 433 } else { 434 eBF = 1; 435 } 436 posteriorEstimates [_bn[k]] = {1,4}; 437 (posteriorEstimates [_bn[k]])[0] = _posteriorProb[1]; 438 (posteriorEstimates [_bn[k]])[1] = eBF; 439 (posteriorEstimates [_bn[k]])[2] = (subMap[_bn[k]])[0]; 440 (posteriorEstimates [_bn[k]])[3] = (subMap[_bn[k]])[1]; 441 baseline += -MaxL; 442 } 443 444 LFCompute (siteLikelihoodLoc,LF_DONE_COMPUTE); 445 446 447 return posteriorEstimates; 448} 449