1VERBOSITY_LEVEL = -1;
2incFileName = HYPHY_LIB_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "Utility" + DIRECTORY_SEPARATOR + "NJ.bf";
3ExecuteCommands ("#include \""+incFileName + "\";");
4
5/* ________________________________________________________________________________________________*/
6
7
8function processAPartition (dataFilePath, partitionMatrix)
9{
10	if (Abs (dataFilePath))
11	{
12		DataSet 	ds = ReadDataFile (dataFilePath);
13	}
14	readPCount 	   = Columns (partitionMatrix);
15	/*fprintf (stdout, "\nLoaded ", readPCount , " partitions\n");*/
16
17	outAVL = {};
18
19	partitionStrings = {};
20	treeStrings		= {};
21
22	lastP = 0;
23	for (h=0; h<readPCount; h=h+1)
24	{
25		pString 					= ""+lastP+"-"+partitionMatrix[h];
26		lastP 						= partitionMatrix[h]+1;
27		partitionStrings [h] 		= pString;
28		DataSetFilter filteredData 	= CreateFilter (ds,1,pString);
29		treeStrings[h] 				= InferTreeTopology(filteredData);
30	}
31
32	pString = ""+lastP+"-"+(ds.sites-1);
33	partitionStrings [h] = pString;
34	DataSetFilter filteredData = CreateFilter (ds,1,pString);
35	treeStrings[h] = InferTreeTopology(filteredData);
36
37	resp = 4;
38
39	global betaP = 1;
40	global betaQ = 1;
41	betaP:>0.05;betaP:<85;
42	betaQ:>0.05;betaQ:<85;
43
44	category pc = (resp-1, EQUAL, MEAN,
45					_x_^(betaP-1)*(1-_x_)^(betaQ-1)/Beta(betaP,betaQ), /* density */
46					IBeta(_x_,betaP,betaQ), /*CDF*/
47					0, 				   /*left bound*/
48					1, 			   /*right bound*/
49					IBeta(_x_,betaP+1,betaQ)*betaP/(betaP+betaQ)
50				   );
51
52
53	global alpha = .5;
54	alpha:>0.01;alpha:<100;
55	category c = (resp, pc, MEAN,
56					GammaDist(_x_,alpha,alpha),
57					CGammaDist(_x_,alpha,alpha),
58					0 ,
59			  	    1e25,
60			  	    CGammaDist(_x_,alpha+1,alpha)
61			  	 );
62
63	global		 AC = 1;
64	global		 AT = 1;
65	global		 CG = 1;
66	global		 CT = 1;
67	global		 GT = 1;
68
69	GTR_Matrix = {{*,AC*t*c,t*c,AT*t*c}
70				 {AC*t*c,*,CG*t*c,CT*t*c}
71				 {t*c,CG*t*c,*,GT*t*c}
72				 {AT*t*c,CT*t*c,GT*t*c,*}};
73
74	DataSetFilter	filteredData = CreateFilter (ds,1);
75
76	bppMap = {};
77	for (h=0; h<filteredData.sites; h=h+1)
78	{
79		filterString = "" + h;
80		DataSetFilter siteFilter = CreateFilter (filteredData,1,filterString);
81
82		HarvestFrequencies (f1, siteFilter, 1, 1, 0);
83		m1 = 0;
84		for (mpiNode=0; (mpiNode < 4) && (m1<=1) ; mpiNode=mpiNode+1)
85		{
86			if (f1[mpiNode]>0)
87			{
88				m1=m1+1;
89			}
90		}
91		if (m1>1)
92		{
93			bppMap[Abs(bppMap)] = h;
94		}
95	}
96
97	/*fprintf (stdout, "\nSequences :", filteredData.species,
98					 "\nSites     :", filteredData.sites,
99					 "\nVariable  :", Abs(bppMap), "\n"); */
100
101	outAVL ["VS"] = Abs(bppMap);
102
103	HarvestFrequencies (baseFreqs,ds,1,1,1);
104
105	/*fprintf (stdout, "\n\nf(A) = ", baseFreqs[0],
106					 "\nf(C) = ", baseFreqs[1],
107					 "\nf(G) = ", baseFreqs[2],
108					 "\nf(T) = ", baseFreqs[3],"\n");*/
109
110	Model GTR_Model =  (GTR_Matrix, baseFreqs, 1);
111
112	treeString 	= InferTreeTopology   (0);
113	Tree givenTree = treeString;
114
115	LikelihoodFunction singlePart = (filteredData, givenTree);
116	Optimize (res, singlePart);
117
118	baseParams = res[1][2];
119
120	if (baseParams>0)
121	{
122
123		ConstraintString = "";
124		ConstraintString*256;
125		for (h=0; h<baseParams; h=h+1)
126		{
127			GetString (v,singlePart,h);
128			ConstraintString * (v+":="+v+"__;\n");
129		}
130		ConstraintString*0;
131		ExecuteCommands (ConstraintString);
132	}
133
134	nullCAIC = -2(res[1][0]-res[1][1]*filteredData.sites/(filteredData.sites-res[1][1]-1));
135
136	/*fprintf  (stdout, "\n\nc-AIC = ", nullCAIC, "\n", singlePart);*/
137
138	outAVL["AC"] = AC;
139	outAVL["AT"] = AT;
140	outAVL["CG"] = CG;
141	outAVL["CT"] = CT;
142	outAVL["GT"] = GT;
143	outAVL["alpha"] = alpha;
144
145	m1 = computeMeanDivergence ("givenTree");
146
147	/*fprintf  (stdout, "\nMean divergence : ", m1*100, "%\n");*/
148	outAVL ["Divergence"] = m1*100;
149
150	USE_DISTANCES = 0;
151
152	if (readPCount == 0)
153	{
154		return outAVL;
155	}
156
157	lfDef = "";
158	lfDef * 128;
159	lfDef  * "LikelihoodFunction multiPart  = (";
160
161	for (pccounter = 0; pccounter <= readPCount; pccounter = pccounter + 1)
162	{
163		ExecuteCommands ("DataSetFilter part_" + pccounter + " = CreateFilter (ds, 1, \"" + partitionStrings[pccounter] + "\");");
164		ExecuteCommands ("Tree tree_" + pccounter + " = " + treeStrings[pccounter] + ";");
165		if (pccounter)
166		{
167			lfDef * ",";
168		}
169		lfDef  * ("part_"+pccounter+",tree_"+pccounter);
170	}
171	lfDef  * ");";
172	lfDef  * 0;
173	ExecuteCommands (lfDef);
174	Optimize (res2, multiPart);
175
176	/*fprintf (stdout, multiPart,"\n",res,"\n", res2);*/
177
178	myDF = baseParams + res2[1][1];
179
180	fullCAIC = -2(res2[1][0]-myDF*filteredData.sites/(filteredData.sites-myDF-1));
181	/*fprintf  (stdout, "\n\nc-AIC = ", fullCAIC, "\nDelta AIC = ", nullCAIC-fullCAIC,"\n\n");*/
182	outAVL ["DAIC"]  = nullCAIC-fullCAIC;
183	outAVL ["DLogL"] = res2[1][0]-res[1][0];
184
185	if (outAVL ["DAIC"] < 0)
186	{
187		return outAVL;
188	}
189
190	fragMatrix = {2,readPCount+1};
191
192	for (pccounter = 0; pccounter <=  readPCount; pccounter = pccounter + 1)
193	{
194		ExecuteCommands ("siteCount = part_" + pccounter + ".sites;");
195		/*fprintf (stdout, "Partition ", pccounter+1, " : ", siteCount, " sites\n");*/
196		fragMatrix [0][pccounter] = siteCount;
197	}
198
199	pairWiseSplitMatch = {readPCount+1,readPCount+1};
200
201	for (pccounter = 0; pccounter <=  readPCount; pccounter = pccounter + 1)
202	{
203		for (pc2 = pccounter+1; pc2 <=  readPCount; pc2 = pc2 + 1)
204		{
205			ExecuteCommands ("siteCount = compareTreesForSplits(\"tree_" +pccounter + "\",\"tree_"+pc2+"\");");
206			pairWiseSplitMatch[pccounter][pc2] = siteCount[2]/Min(siteCount[0],siteCount[1]);
207		}
208	}
209
210	/* now do SH testing */
211
212	conditionals 	 = {};
213	likelihoodScores = {readPCount+1,readPCount+1};
214	pairwiseP		 = {readPCount+1,readPCount+1};
215
216	treeSplitMatches 	 = 0;
217	khIterations		 = 1000;
218
219	for (pccounter = 0; pccounter <=  readPCount; pccounter = pccounter + 1)
220	{
221		for (pc2 = 0; pc2 <=  readPCount; pc2 = pc2 + 1)
222		{
223			if (Abs(pc2-pccounter) <= 1)
224			{
225				DataSetFilter 		aPart = CreateFilter (ds,1,partitionStrings[pccounter]);
226				Tree		  		aTree = treeStrings[pc2];
227				LikelihoodFunction	aLF	= (aPart,aTree);
228
229				/*fprintf 		(stdout, "\n\nFitting tree ", pc2+1, " to partition ", pccounter+1, "\n");*/
230				Optimize 		(aRes, aLF);
231				/*fprintf			(stdout, aLF);*/
232				GetInformation  (cI,c);
233				cI 				= cI[1][-1];
234				ConstructCategoryMatrix (conds, aLF);
235				conds 			= Log(cI*conds);
236				conditionals [""+pccounter+","+pc2] = conds;
237				likelihoodScores[pccounter][pc2] = aRes[1][0];
238				treeSplitMatches = treeSplitMatches + pairWiseSplitMatch[pccounter][pc2];
239			}
240		}
241
242		/*fprintf (stdout, "\nKH Testing partition ", pccounter+1, "\n");*/
243		partTreeConds = conditionals[""+pccounter+","+pccounter];
244
245		for (pc2 = 0; pc2 <=  readPCount; pc2 = pc2 + 1)
246		{
247			if (Abs(pc2-pccounter) == 1)
248			{
249				otherPartTree = conditionals[""+pccounter+","+pc2];
250				baseLRT = 2*(likelihoodScores[pccounter][pccounter]-likelihoodScores[pccounter][pc2]);
251				/*fprintf (stdout, "Tree ", pc2+1, " base LRT = ", baseLRT, ". p-value = ");*/
252				textMx = testLRT(partTreeConds,otherPartTree,khIterations) % 0;
253				for (kk=0; kk<khIterations; kk=kk+1)
254				{
255					if (textMx[kk]>=0)
256					{
257						break;
258					}
259				}
260				pval = Max(1,kk)/khIterations;
261				/*
262				fprintf (stdout, pval , "\n");
263				*/
264
265				pairwiseP[pccounter][pc2] = pval;
266			}
267		}
268	}
269
270	treeSplitMatches = treeSplitMatches*2/readPCount/(readPCount+1);
271	totalComparisons = readPCount*2;
272	threshP			 = 0.01/totalComparisons;
273
274	significantCouplings = 0;
275
276	for (pccounter = 1; pccounter <=  readPCount; pccounter = pccounter + 1)
277	{
278		if (pairwiseP[pccounter][pccounter-1] <= threshP && pairwiseP[pccounter-1][pccounter] <= threshP)
279		{
280			significantCouplings = significantCouplings + 1;
281		}
282		fragMatrix [1][pccounter] = totalComparisons*Max (pairwiseP[pccounter][pccounter-1],pairwiseP[pccounter-1][pccounter]);
283	}
284
285	/*fprintf (stdout, "A total of ", significantCouplings, "/", readPCount, " significant couplings\n");
286	fprintf (stdout, "Mean splits identify: ",treeSplitMatches, "\n" );*/
287	outAVL["SIG_CPL"] = significantCouplings;
288	outAVL["SPLITS"] = treeSplitMatches;
289	outAVL["Fragments"] = fragMatrix;
290
291	return outAVL;
292}
293
294/* ________________________________________________________________________________________________*/
295
296function computeMeanDivergence (treeID)
297{
298	ExecuteCommands ("treeAVL2="+treeID+"^0;leafCount=TipCount("+treeID+")");
299	multFactors = {};
300
301	for (k=1; k<Abs(treeAVL2); k=k+1)
302	{
303		aNode = treeAVL2[k];
304		aNodeName = aNode["Name"];
305		parentIndex = aNode["Parent"];
306		k2 = Abs(aNode["Children"]);
307		if (k2)
308		{
309			currentDepth = aNode["Below"];
310			multFactors[aNodeName] = currentDepth;
311			if (parentIndex > 0)
312			{
313				pInfo = treeAVL2[parentIndex];
314				pInfo ["Below"] = pInfo ["Below"] + currentDepth;
315				treeAVL2[parentIndex] = pInfo;
316			}
317		}
318		else
319		{
320			multFactors[aNodeName] = 1;
321			pInfo = treeAVL2[parentIndex];
322			pInfo ["Below"] = pInfo ["Below"] + 1;
323			treeAVL2[parentIndex] = pInfo;
324		}
325	}
326
327	pKeys 			= Rows(multFactors);
328
329	for (k=0; k<Columns(pKeys); k=k+1)
330	{
331		aNodeName = pKeys[k];
332		multFactors[aNodeName] = multFactors[aNodeName] * (leafCount-multFactors[aNodeName]);
333	}
334
335	ExecuteCommands ("bNames = BranchName   ("+treeID+",-1);");
336	ExecuteCommands ("bLen   = BranchLength ("+treeID+",-1);");
337
338	sum = 0;
339	bc  = Columns(bNames)-1;
340
341	for (k=0; k<bc; k=k+1)
342	{
343		aNodeName = bNames[k];
344		sum = sum + bLen[k]*multFactors[aNodeName];
345	}
346	return sum/(leafCount*(leafCount-1)*0.5);
347}
348
349/* ________________________________________________________________________________________________*/
350
351function compareTreesForSplits (tName, tName2)
352{
353	/* obtain an AVL data structure of the tree, post-order layout */
354
355	ExecuteCommands   ("_astavl_="+tName+"^1;");
356	_tree_size_ = Abs (_astavl_);
357
358
359	nodeMapAVL = {};
360
361	for (_a_node = 2; _a_node < _tree_size_; _a_node = _a_node + 1)
362	{
363		_node_info = _astavl_[_a_node];
364		myDegree = Abs(_node_info["Children"]);
365
366		if (myDegree == 0)
367		{
368			nodeName = _node_info["Name"];
369			nodeMapAVL [nodeName] = Abs(nodeMapAVL)+1;
370		}
371	}
372
373	split1 = getSplits(0);
374	ExecuteCommands   ("_astavl_="+tName2+"^1;");
375	_tree_size_ = Abs (_astavl_);
376	split2 = getSplits(0);
377
378	s1c = Abs(split1);
379	s2c = Abs(split2);
380
381	matches = {};
382	match1  = {};
383	match2  = {};
384	for (r1 = 0; r1 < s1c; r1 = r1 + 1)
385	{
386		if (match1[r1] == 0)
387		{
388			for (r2 = 0; r2 < s2c; r2 = r2 + 1)
389			{
390				if (match2[r2] == 0)
391				{
392					splitCheck = compareSplits (split1[r1],split2[r2]);
393					if (splitCheck)
394					{
395						mr = {{r1,r2}};
396						matches [Abs(matches)] = mr;
397						match1[r1] = 1;
398						match2[r2] = 1;
399					}
400				}
401			}
402		}
403	}
404
405
406	return {{s1c,s2c,Abs(matches)}};
407}
408
409/* ________________________________________________________________________________________________*/
410
411function getSplits (dummy)
412{
413	treeSplits = {};
414	for (_a_node = 2; _a_node < _tree_size_; _a_node = _a_node + 1)
415	{
416		_node_info = _astavl_[_a_node];
417		myDegree = Abs(_node_info["Children"]);
418		myDepth  = _node_info["Depth"];
419
420
421		if (myDegree && _node_info["Length"]>0)
422		{
423			nodeName = _node_info["Name"];
424			aSplit = {Abs(nodeMapAVL),1};
425			for (_b_node = _a_node + 1; _b_node < _tree_size_; _b_node = _b_node + 1)
426			{
427				_bnode_info = _astavl_[_b_node];
428					if (_bnode_info["Depth"] <= myDepth)
429				{
430					break;
431				}
432				if (Abs(_bnode_info["Children"])==0)
433				{
434					_bnode_info = _bnode_info["Name"];
435					_bnode_info = nodeMapAVL[_bnode_info];
436					if (_bnode_info == 0)
437					{
438						fprintf (stdout, "Error: extraneous node name\n");
439						return 0;
440					}
441					aSplit [_bnode_info-1] = 1;
442				}
443			}
444			treeSplits [Abs(treeSplits)] = aSplit;
445		}
446	}
447	return treeSplits;
448}
449
450/* ________________________________________________________________________________________________*/
451
452function compareSplits (s1, s2)
453{
454	sl            = Rows(s1);
455	positiveCheck = (s1[0] == s2[0]);
456	for (k=1; k<sl; k=k+1)
457	{
458		if (s1[k] != s2[k])
459		{
460			if (positiveCheck)
461			{
462				return 0;
463			}
464		}
465		else
466		{
467			if (!positiveCheck)
468			{
469				return 0;
470			}
471		}
472	}
473	return -1+2*positiveCheck;
474}
475
476/*--------------------------------------------------------------------------------------*/
477
478function testLRT (vec1, vec2, itCount)
479{
480	size1 = Columns(vec1);
481
482	sumVec1 = {size1,1};
483	jvec	= {2,size1};
484	resMx1	= {itCount,1};
485	resMx2	= {itCount,1};
486
487	for (k=0; k<size1; k=k+1)
488	{
489		sumVec1 [k]	   = 1;
490		jvec	[0][k] = vec1[k];
491		jvec	[1][k] = vec2[k];
492	}
493
494
495	for (k=0; k<itCount; k=k+1)
496	{
497		resampled = Random(jvec,1);
498		resampled = resampled*sumVec1;
499		resMx1[k] = resampled[0];
500		resMx2[k] = resampled[1];
501	}
502
503	return (resMx1-resMx2)*2;
504}
505