1fprintf (stdout, "\n\nPhase 1:Nucleotide Model (",ModelTitle,") Model Fit\n\n");
2
3global dNdS    = 1;
4
5if (nrChoice == 0)
6{
7	for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
8	{
9		ExecuteCommands ("Tree nucTree_" + fileID + "=" + treeStrings[fileID] + ";");
10	}
11	ExecuteCommands (constructLF("nucLF", "nucData", "nucTree", fileCount));
12	Optimize (res,nucLF);
13	stashLOF = LIKELIHOOD_FUNCTION_OUTPUT ;
14	LIKELIHOOD_FUNCTION_OUTPUT  = 6;
15	if (Abs(NUC_FILE_PATH))
16	{
17		fprintf (NUC_FILE_PATH,CLEAR_FILE,nucLF);
18	}
19	LIKELIHOOD_FUNCTION_OUTPUT  = stashLOF;
20}
21
22fprintf (stdout, "\n",nucLF);
23fprintf (stdout, "\n\nPhase 2:MG94x(",ModelTitle,") Model Fit\n\n");
24
25CodonMatrix = {ModelMatrixDimension,ModelMatrixDimension};
26
27hshift = 0;
28
29for (h=0; h<64; h=h+1)
30{
31	if (_Genetic_Code[h]==10)
32	{
33		hshift = hshift+1;
34		continue;
35	}
36	vshift = hshift;
37	for (v = h+1; v<64; v=v+1)
38	{
39		diff = v-h;
40		if (_Genetic_Code[v]==10)
41		{
42			vshift = vshift+1;
43			continue;
44		}
45		nucPosInCodon = 2;
46		if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0))
47		{
48			if (h$4==v$4)
49			{
50				transition = v%4;
51				transition2= h%4;
52			}
53			else
54			{
55				if(diff%16==0)
56				{
57					transition = v$16;
58					transition2= h$16;
59					nucPosInCodon = 0;
60				}
61				else
62				{
63					transition = v%16$4;
64					transition2= h%16$4;
65					nucPosInCodon = 1;
66				}
67			}
68			if (transition<transition2)
69			{
70				trSM = transition;
71				trLG = transition2;
72			}
73			else
74			{
75				trSM = transition2;
76				trLG = transition;
77			}
78
79			if (trSM==0)
80			{
81				if (trLG==1)
82				{
83					if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
84					{
85						CodonMatrix[h-hshift][v-vshift] := AC__*synRate*positionFrequencies__[transition__][nucPosInCodon__];
86						CodonMatrix[v-vshift][h-hshift] := AC__*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
87					}
88					else
89					{
90						CodonMatrix[h-hshift][v-vshift] := AC__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
91						CodonMatrix[v-vshift][h-hshift] := AC__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
92					}
93				}
94				else
95				{
96					if (trLG==2)
97					{
98						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
99						{
100							CodonMatrix[h-hshift][v-vshift] := synRate*positionFrequencies__[transition__][nucPosInCodon__];
101							CodonMatrix[v-vshift][h-hshift] := synRate*positionFrequencies__[transition2__][nucPosInCodon__];
102						}
103						else
104						{
105							CodonMatrix[h-hshift][v-vshift] := dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
106							CodonMatrix[v-vshift][h-hshift] := dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
107						}
108					}
109					else
110					{
111						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
112						{
113							CodonMatrix[h-hshift][v-vshift] := AT__*synRate*positionFrequencies__[transition__][nucPosInCodon__];
114							CodonMatrix[v-vshift][h-hshift] := AT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
115						}
116						else
117						{
118							CodonMatrix[h-hshift][v-vshift] := AT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
119							CodonMatrix[v-vshift][h-hshift] := AT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
120						}
121					}
122				}
123			}
124			else
125			{
126				if (trSM==1)
127				{
128					if (trLG==2)
129					{
130						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
131						{
132							CodonMatrix[h-hshift][v-vshift] := CG__*synRate*positionFrequencies__[transition__][nucPosInCodon__];
133							CodonMatrix[v-vshift][h-hshift] := CG__*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
134						}
135						else
136						{
137							CodonMatrix[h-hshift][v-vshift] := CG__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
138							CodonMatrix[v-vshift][h-hshift] := CG__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
139						}
140					}
141					else
142					{
143						if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
144						{
145							CodonMatrix[h-hshift][v-vshift] := CT__*synRate*positionFrequencies__[transition__][nucPosInCodon__];
146							CodonMatrix[v-vshift][h-hshift] := CT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
147						}
148						else
149						{
150							CodonMatrix[h-hshift][v-vshift] := CT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
151							CodonMatrix[v-vshift][h-hshift] := CT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
152						}
153					}
154				}
155				else
156				{
157					if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
158					{
159						CodonMatrix[h-hshift][v-vshift] := GT__*synRate*positionFrequencies__[transition__][nucPosInCodon__];
160						CodonMatrix[v-vshift][h-hshift] := GT__*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
161					}
162					else
163					{
164						CodonMatrix[h-hshift][v-vshift] := GT__*dNdS*synRate*positionFrequencies__[transition__][nucPosInCodon__];
165						CodonMatrix[v-vshift][h-hshift] := GT__*dNdS*synRate*positionFrequencies__[transition2__][nucPosInCodon__];
166					}
167				}
168			}
169		}
170   }
171}
172
173Model MGModel   = (CodonMatrix,codonFrequencies,0);
174for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
175{
176	ExecuteCommands ("Tree codonTree_" + fileID + "=" + treeStrings[fileID] + ";");
177}
178
179/* compute the branch conversion factor */
180
181synRate = 1;
182blCodon = 0;
183
184for (h=0; h<ModelMatrixDimension; h=h+1)
185{
186	blCodon = blCodon - CodonMatrix[h][h]*codonFrequencies[h];
187}
188
189blNuc = 0;
190
191t = 1;
192
193for  (h=0; h<4; h=h+1)
194{
195	blCodon2 = 0;
196	for (v=0; v<4; v=v+1)
197	{
198		if (h==v)
199		{
200			continue;
201		}
202		blCodon2 = blCodon2 + NucleotideMatrix[h][v]*overallFrequencies[v];
203	}
204	blNuc = blNuc + overallFrequencies[h]*blCodon2;
205}
206
207ExecuteCommands (constructLF("lf", "filteredData", "codonTree", fileCount));
208
209if (rOptions>=2)
210{
211	global rConstr = 1;
212	blCodon2 	   = 0;
213	dNdS 		   = 2;
214
215	for (h=0; h<ModelMatrixDimension; h=h+1)
216	{
217		blCodon2 = blCodon2 - CodonMatrix[h][h]*codonFrequencies[h];
218	}
219
220	blCodon2 = blCodon2-blCodon;
221	blCodon  = blCodon-blCodon2;
222
223	dNdS = 0.5;
224
225	fprintf (stdout, "\n\nPhase 3:Estimating dN/dS\n\n");
226	if (rOptions == 4)
227	{
228		global 	   rConstr := 3*blNuc__/(blCodon__+dNdS*blCodon2__);
229		for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
230		{
231			ExecuteCommands ("ReplicateConstraint(\"this1.?.synRate:=rConstr*this2.?.t__\",codonTree_"+fileID+",nucTree_"+fileID+")");
232		}
233	}
234	else
235	{
236		global 		   rConstr = 1; /*3*blNuc/(blCodon+dNdS*blCodon2)*/;
237		rConstr :> 0;
238		for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
239		{
240			ExecuteCommands ("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr\",codonTree_"+fileID+",nucTree_"+fileID+")");
241		}
242	}
243
244	saveOM = SKIP_CONJUGATE_GRADIENT;
245	SKIP_CONJUGATE_GRADIENT = 1;
246
247	Optimize (resC,lf);
248	SKIP_CONJUGATE_GRADIENT = saveOM;
249
250	fprintf (stdout, "\nNuc->codon scaling factor:", (3*blNuc/(blCodon+dNdS*blCodon2)),"\nRaw scaling factor:", rConstr, "\nTree scaling factor(S): ", rConstr/(3*blNuc/(blCodon+dNdS*blCodon2)));
251
252	if (rOptions == 3 || rOptions == 5)
253	{
254		COVARIANCE_PRECISION = 0.95;
255		ClearConstraints(codonTree);
256		for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
257		{
258			ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=rConstr__*this2.?.t__\",codonTree_"+fileID+",nucTree_"+fileID+")");
259		}
260		savedNdS = dNdS;
261		COVARIANCE_PARAMETER = "dNdS";
262		CovarianceMatrix (covMx, lf);
263		fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "(Estimated 95% CI = [", covMx[0][0], "," , covMx[0][2], "])\nCodon model:", lf);
264		dNdS = savedNdS;
265		COVARIANCE_PARAMETER = 0;
266	}
267	else
268	{
269		fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "\nCodon model:", lf);
270	}
271}
272else
273{
274	blNuc = 3*blNuc/blCodon;
275
276	if (pipeThroughFlag == 0)
277	{
278		global 		   rConstr = 1;
279		rConstr :> 0;
280		fprintf (stdout, "Branch Corrections Factor (<0 to estimate):");
281		fscanf  (stdin,  "Number", rConstr);
282	}
283
284	if (rConstr<=0.0)
285	{
286		rConstr = blNuc;
287		for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
288		{
289			ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr\",codonTree_"+fileID+",nucTree_"+fileID+")");
290		}
291		dNdS := dNdS__;
292		saveOM = SKIP_CONJUGATE_GRADIENT;
293		SKIP_CONJUGATE_GRADIENT = 1;
294		Optimize (resC,lf);
295		SKIP_CONJUGATE_GRADIENT = saveOM;
296		dNdS = dNdS;
297	}
298	else
299	{
300		for (fileID = 1; fileID <= fileCount; fileID = fileID + 1)
301		{
302			ExecuteCommands("ReplicateConstraint(\"this1.?.synRate:=this2.?.t__*rConstr__\",codonTree_"+fileID+",nucTree_"+fileID+")");
303		}
304	}
305
306	fprintf (stdout, "\nNuc->Codon branch correction factor: ", blNuc);
307	fprintf (stdout, "\nRaw scaling factor: ", rConstr);
308	fprintf (stdout, "\nTree scaling factor(S): ", rConstr/blNuc);
309	fprintf (stdout,"\n\nUsing dN/dS=", dNdS, "\nCodon model:", lf);
310}
311
312// ------ MEME helper function --------
313
314function obtainBranchWiseEBEstimates (_sFactor,_nsFactor1,_nsFactor2, _mixingP,filterString) {
315
316        ClearConstraints    (perBranchTree);
317        ReplicateConstraint ("this1.?.alpha:=_sFactor*this2.?.synRate__",perBranchTree,codonTree);
318        ReplicateConstraint ("this1.?.beta1:=_nsFactor1*sFactor*this2.?.synRate__",perBranchTree,codonTree);
319        ReplicateConstraint ("this1.?.beta2:=_nsFactor2*this2.?.synRate__",perBranchTree,codonTree);
320        ReplicateConstraint ("this1.?.lmp:=_mixingP", perBranchTree);
321
322
323        LoadFunctionLibrary ("AncestralMapper");
324        ancID = _buildAncestralCache ("siteLikelihood",0);
325        subMap = _tabulateSubstitutionsAtSiteByBranch (ancID,0);
326        _destroyAncestralCache (ancID);
327
328       _bn = BranchName (perBranchTree, -1);
329
330        DataSetFilter locSiteFilter = CreateFilter (ds,3,filterString,"",GeneticCodeExclusions);
331
332		LikelihoodFunction siteLikelihoodLoc = (locSiteFilter, perBranchTree);
333        LFCompute (siteLikelihoodLoc,LF_START_COMPUTE);
334        LFCompute (siteLikelihoodLoc,baseline);
335
336        _totalBranchCount  = Columns (_bn) - 1;
337        posteriorEstimates = {};
338
339        if (_mixingP != 1 && _mixingP != 0) {
340            _priorOdds = (1-_mixingP)/_mixingP;
341        } else {
342            _priorOdds = 0;
343        }
344
345        for (k = 0; k < _totalBranchCount; k+=1)
346        {
347             _pname = "perBranchTree." + _bn[k] + ".lmp";
348            ExecuteCommands ("`_pname`=1");
349            LFCompute (siteLikelihoodLoc,LOGL0);
350
351            MaxL     = -Max (LOGL0,baseline);
352
353            baseline += MaxL;
354            LOGL0 = Exp(MaxL+LOGL0);
355            LOGL1 = (Exp(baseline) - _mixingP * LOGL0) / (1-_mixingP);
356
357            ExecuteCommands ("`_pname`=_mixingP");
358            _posteriorProb = {{LOGL0 * _mixingP, LOGL1 * (1-_mixingP)}};
359            _posteriorProb = _posteriorProb * (1/(+_posteriorProb));
360            if ( _priorOdds != 0) {
361                eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds;
362            } else {
363                eBF = 1;
364            }
365            posteriorEstimates [_bn[k]] = {1,4};
366            (posteriorEstimates [_bn[k]])[0] = _posteriorProb[1];
367            (posteriorEstimates [_bn[k]])[1] = eBF;
368            (posteriorEstimates [_bn[k]])[2] = (subMap[_bn[k]])[0];
369            (posteriorEstimates [_bn[k]])[3] = (subMap[_bn[k]])[1];
370            baseline += -MaxL;
371        }
372        LFCompute (siteLikelihoodLoc,LF_DONE_COMPUTE);
373
374        return posteriorEstimates;
375}
376
377// ------ MEME helper function --------
378
379function obtainBranchWiseEBEstimatesMPI (_sFactor,_nsFactor1,_nsFactor2,_mixingP) {
380        if (_nsFactor2 <= _sFactor || _mixingP == 1 || _mixingP == 0)
381        {
382            return {};
383        }
384
385        sFactor   = _sFactor;
386        nsFactor1 = _nsFactor1;
387        nsFactor2 = _nsFactor2;
388        mixingP   = _mixingP;
389
390        treeString = Format (siteTree,1,1);
391
392        LoadFunctionLibrary ("AncestralMapper");
393        ancID = _buildAncestralCache ("siteLikelihood",0);
394        subMap = _tabulateSubstitutionsAtSiteByBranch (ancID,0);
395        _destroyAncestralCache (ancID);
396
397        Model 		MGLocalMix		=		  ("Exp(MGMatrix1)*lmp+Exp(MGMatrix2)*(1-lmp)",codonFrequencies,EXPLICIT_FORM_MATRIX_EXPONENTIAL);
398        Tree        perBranchTree 			= treeString;
399        ClearConstraints    (perBranchTree);
400        ReplicateConstraint ("this1.?.alpha:=this2.?.alpha__",perBranchTree,siteTree);
401        ReplicateConstraint ("this1.?.beta1:=this2.?.beta1__",perBranchTree,siteTree);
402        ReplicateConstraint ("this1.?.beta2:=this2.?.beta2__",perBranchTree,siteTree);
403        ReplicateConstraint ("this1.?.lmp:=_mixingP", perBranchTree);
404
405        _bn = BranchName (perBranchTree, -1);
406
407		LikelihoodFunction siteLikelihoodLoc = (siteFilter, perBranchTree);
408        LFCompute (siteLikelihoodLoc,LF_START_COMPUTE);
409        LFCompute (siteLikelihoodLoc,baseline);
410
411        _totalBranchCount  = Columns (_bn) - 1;
412        posteriorEstimates = {};
413
414        _priorOdds = (1-_mixingP)/_mixingP;
415
416        for (k = 0; k < _totalBranchCount; k+=1)
417        {
418             _pname = "perBranchTree." + _bn[k] + ".lmp";
419            ExecuteCommands ("`_pname`=1");
420            LFCompute (siteLikelihoodLoc,LOGL0);
421
422            MaxL     = -Max (LOGL0,baseline);
423
424            baseline += MaxL;
425            LOGL0 = Exp(MaxL+LOGL0);
426            LOGL1 = (Exp(baseline) - _mixingP * LOGL0) / (1-_mixingP);
427
428            ExecuteCommands ("`_pname`=_mixingP");
429            _posteriorProb = {{LOGL0 * _mixingP, LOGL1 * (1-_mixingP)}};
430            _posteriorProb = _posteriorProb * (1/(+_posteriorProb));
431            if ( _priorOdds != 0) {
432                eBF = _posteriorProb[1] / (1 - _posteriorProb[1]) / _priorOdds;
433            } else {
434                eBF = 1;
435            }
436            posteriorEstimates [_bn[k]] = {1,4};
437            (posteriorEstimates [_bn[k]])[0] = _posteriorProb[1];
438            (posteriorEstimates [_bn[k]])[1] = eBF;
439            (posteriorEstimates [_bn[k]])[2] = (subMap[_bn[k]])[0];
440            (posteriorEstimates [_bn[k]])[3] = (subMap[_bn[k]])[1];
441            baseline += -MaxL;
442        }
443
444        LFCompute (siteLikelihoodLoc,LF_DONE_COMPUTE);
445
446
447        return posteriorEstimates;
448}
449