1ChoiceList (testType,"Test for",1,SKIP_NONE,"Mean branch length","Compare mean branch lengths between two or more non-nested clades.",
2				     "Mean pairwise divergence","Compare mean within-clade pairwise divergence between two or more non-nested clades.");
3
4if (testType < 0)
5{
6	return 0;
7}
8
9if (testType)
10{
11	echoString = "mean within-clade divergence";
12}
13else
14{
15	echoString = "mean within-clade branch length (or component branch length)";
16}
17ChoiceList (dataType,"Data type",1,SKIP_NONE,"Nucleotide/Protein","Nucleotide or amino-acid (protein).",
18				     "Codon","Codon (several available genetic codes).");
19
20if (dataType<0)
21{
22	return;
23}
24
25if (dataType)
26{
27	NICETY_LEVEL = 3;
28	SetDialogPrompt ("Please choose a codon data file:");
29	incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TemplateModels"+DIRECTORY_SEPARATOR+"chooseGeneticCode.def";
30	ExecuteCommands  ("#include \""+incFileName+"\";");
31}
32else
33{
34	SetDialogPrompt ("Please choose a nucleotide or amino-acid data file:");
35}
36
37DataSet ds = ReadDataFile (PROMPT_FOR_FILE);
38
39if (dataType)
40{
41	DataSetFilter filteredData = CreateFilter (ds,3,"","",GeneticCodeExclusions);
42}
43else
44{
45	DataSetFilter filteredData = CreateFilter (ds,1);
46}
47
48fprintf (stdout,"\n______________READ THE FOLLOWING DATA______________\n",ds);
49
50
51_DO_TREE_REBALANCE_ = 0;
52
53SelectTemplateModel(filteredData);
54
55if (Rows("LAST_MODEL_PARAMETER_LIST")>1)
56{
57	ChoiceList (parameter2Constrain, "Parameter(s) to constrain:",1,SKIP_NONE,LAST_MODEL_PARAMETER_LIST);
58
59	if (parameter2Constrain<0)
60	{
61		return 0;
62	}
63	if (parameter2Constrain==0)
64	{
65		fprintf (stdout, "ERROR: Multiple parameter constraints are not supported by this analysis; sorry!\n");
66		return 0;
67	}
68}
69else
70{
71	parameter2Constrain = 1;
72}
73
74GetString (funnyString,LAST_MODEL_PARAMETER_LIST,parameter2Constrain-1);
75
76_DO_TREE_REBALANCE_ = 0;
77incFileName = HYPHY_LIB_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"queryTree.bf";
78ExecuteCommands  ("#include \""+incFileName+"\";");
79
80treeAVL  = givenTree ^ 1; /* pre-order traversal */
81
82if (testType)
83{
84	treeAVL2 = givenTree ^ 0; /* post-order traversal */
85
86	multFactors = {};
87	for (k=1; k<Abs(treeAVL2); k=k+1)
88	{
89		aNode = treeAVL2[k];
90		aNodeName = aNode["Name"];
91		parentIndex = aNode["Parent"];
92		k2 = Abs(aNode["Children"]);
93		if (k2)
94		{
95			currentDepth = aNode["Below"];
96			multFactors[aNodeName] = currentDepth;
97			if (parentIndex > 0)
98			{
99				pInfo = treeAVL2[parentIndex];
100				pInfo ["Below"] = pInfo ["Below"] + currentDepth;
101				treeAVL2[parentIndex] = pInfo;
102			}
103		}
104		else
105		{
106			multFactors[aNodeName] = 1;
107			pInfo = treeAVL2[parentIndex];
108			pInfo ["Below"] = pInfo ["Below"] + 1;
109			treeAVL2[parentIndex] = pInfo;
110		}
111
112	}
113	treeAVL2 		= 0;
114}
115
116branchNameList 	= BranchName (givenTree,-1);
117mapToBL		    = {};
118
119for (k=0; k<Columns(branchNameList); k=k+1)
120{
121	aNode = branchNameList[k];
122	mapToBL [aNode] = k;
123}
124
125cladeNodeLists 	= {};
126cladeIDLists    = {};
127cladeSumVar		= {};
128nameToAVLIdx	= {};
129cladeLeafCount  = {};
130
131for (k=1; k<Abs(treeAVL); k=k+1)
132{
133	aNode 	  = treeAVL[k];
134	aNodeName = aNode["Name"]; /*upcase*/
135	nameToAVLIdx [aNodeName] = k;
136	match = (aNodeName&&1)$"^CLADE[0-9]+$";
137	if (match[0]>=0 && Abs (aNode["Children"])>0)
138	/* valid clade */
139	{
140		currentDepth = aNode["Depth"];
141		nodeList = {};
142
143		leafCount = 0;
144
145		for (k2=k+1; k2<Abs(treeAVL); k2=k2+1)
146		{
147			aNode2 = treeAVL[k2];
148			if (aNode2["Depth"] <= currentDepth)
149			{
150				break;
151			}
152			if (Abs(aNode2["Children"]) == 0)
153			{
154				leafCount = leafCount + 1;
155			}
156			minCladeIndex = aNode2["Name"];
157			nodeList[Abs(nodeList)] = minCladeIndex;
158			nameToAVLIdx [minCladeIndex] = k2;
159		}
160
161		k = k2-1;
162		cladesFound = Abs(nodeList);
163		cladeNodeLists[aNodeName] = nodeList;
164		indexMatrix = {cladesFound,1};
165		for (k2 = 0; k2 < cladesFound; k2=k2+1)
166		{
167			aNode2 = nodeList[k2];
168			indexMatrix [k2] = mapToBL[aNode2];
169		}
170
171		cladeIDLists[aNodeName] = indexMatrix;
172		cladeLeafCount[aNodeName] = leafCount;
173	}
174}
175
176
177cladesFound = Abs (cladeNodeLists);
178if (cladesFound < 2)
179{
180	fprintf (stdout, "ERROR: Couldn't find 2 or more (non-nested) clades with roots labeled by CladeN, where N is a number. Found ", cladesFound, " labeled clades\n");
181	return 0;
182}
183
184cladeRoots = Rows (cladeNodeLists);
185
186minCladeSize  = 1e100;
187minCladeIndex = 0;
188
189mFactors  = {};
190bmFactors = {};
191
192fprintf (stdout, "\n\nFound ", cladesFound, " labeled clades\n");
193for (k=0; k<cladesFound; k=k+1)
194{
195	cladeName = cladeRoots[k];
196	cladeSize =  Abs(cladeNodeLists[cladeName]);
197	leafCount =  Abs(cladeLeafCount[cladeName]);
198
199	fprintf (stdout, "Clade rooted at ", cladeName, " with ", cladeSize , " branches and ", leafCount, " leaves.\n");
200	nodeList = cladeNodeLists[cladeName];
201
202	if (testType)
203	{
204		if (minCladeSize > leafCount)
205		{
206			minCladeSize = leafCount;
207			minCladeIndex = k;
208		}
209	}
210	else
211	{
212		if (minCladeSize > cladeSize)
213		{
214			minCladeSize = cladeSize;
215			minCladeIndex = k;
216		}
217	}
218
219	bmFactor = {Columns(branchNameList),1};
220	cString = "";
221	cString * 256;
222	for (k2 = 0; k2<Abs(nodeList); k2=k2+1)
223	{
224		aNodeName = nodeList[k2];
225		aNode = nameToAVLIdx[aNodeName];
226		aNode = treeAVL[aNode];
227		if (testType)
228		{
229			cCount = Abs(aNode["Children"]);
230			if (cCount)
231			{
232				cCount = multFactors[aNodeName];
233				mFactors[aNodeName] = (leafCount-cCount)*cCount
234			}
235			else
236			{
237				mFactors[aNodeName] = leafCount-1;
238			}
239		}
240		else
241		{
242			mFactors[aNodeName] = 1;
243		}
244		k3 = mapToBL[aNodeName];
245		bmFactor [k3] = mFactors[aNodeName];
246		if (k2)
247		{
248			cString * "+";
249		}
250		cString * ("TREE_PLACE_HOLDER."+aNodeName + "." + funnyString + "*" + mFactors[aNodeName]);
251	}
252	cString * 0;
253	cladeSumVar[cladeName] = cString;
254	bmFactors[cladeName] = bmFactor;
255
256}
257
258
259fprintf (stdout, "1). Running an unconstrained fit\n");
260
261Tree aTree = treeString;
262
263USE_LAST_RESULTS = 0;
264
265LikelihoodFunction lf = (filteredData,aTree);
266
267Optimize (res_free,lf);
268fprintf  (stdout, lf, "\n");
269
270blVector = BranchLength (aTree,-1);
271ReportMeans (blVector);
272
273USE_LAST_RESULTS = 1;
274
275fprintf (stdout, "2). Running a completely constrained fit\n");
276
277Tree mirrorTree = treeString;
278
279
280ReplicateConstraint ("this1.?.?:=this2.?.?",mirrorTree,aTree);
281
282ClearConstraints (mirrorTree);
283
284for (k=0; k<cladesFound; k=k+1)
285{
286	if (k==minCladeIndex)
287	{
288		smallCladeName = cladeRoots[minCladeIndex];
289		cladeConstraint = cladeSumVar[smallCladeName] ^ {{"TREE_PLACE_HOLDER","aTree"}};
290		ExecuteCommands ("global SUM_"+smallCladeName+":="+cladeConstraint+";");
291	}
292	else
293	{
294		cladeName = cladeRoots[k];
295		cladeConstraint = cladeSumVar[cladeName] ^ {{"TREE_PLACE_HOLDER","mirrorTree"}};
296		ExecuteCommands ("global SUM_"+cladeName+":="+cladeConstraint+";");
297	}
298}
299
300
301for (k=0; k<cladesFound; k=k+1)
302{
303	if (k!=minCladeIndex)
304	{
305		cladeName = cladeRoots[k];
306		if (testType)
307		{
308			cladeSize =  cladeLeafCount[cladeName];
309			ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize*(cladeSize-1)/minCladeSize/(minCladeSize-1))+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");");
310		}
311		else
312		{
313			cladeSize =  Abs(cladeNodeLists[cladeName]);
314			ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize/minCladeSize)+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");");
315		}
316		ExecuteCommands ("aTree."+cladeName+"."+funnyString+"=0.1;");
317	}
318}
319
320LikelihoodFunction lf = (filteredData,aTree);
321Optimize (res_contsr,lf);
322
323fprintf  (stdout, "\n\nLRT for difference in mean divergences\n",
324				  "\nLR = ", 2*(res_free[1][0]-res_contsr[1][0]),
325				  "\np-value = ", 1-CChi2(2*(res_free[1][0]-res_contsr[1][0]),cladesFound-1),"\n");
326
327fprintf  (stdout, lf, "\n");
328
329blVector = BranchLength (aTree,-1);
330ReportMeans (blVector);
331
332comparisonIndex = 3;
333
334ClearConstraints (aTree);
335
336
337if (cladesFound > 2)
338{
339	for (c1 = 0; c1<cladesFound-1; c1 = c1+1)
340	{
341		smallCladeName = cladeRoots[c1];
342		if (testType)
343		{
344			minCladeSize =  cladeLeafCount[smallCladeName];
345		}
346		else
347		{
348			minCladeSize =  Abs(cladeNodeLists[smallCladeName]);
349		}
350
351		cladeConstraint = cladeSumVar[smallCladeName] ^ {{"TREE_PLACE_HOLDER","aTree"}};
352		ExecuteCommands ("global SUM_"+smallCladeName+":="+cladeConstraint+";");
353
354		for (c2 = c1+1; c2 < cladesFound; c2=c2+1)
355		{
356			cladeName = cladeRoots[c2];
357
358			cladeConstraint = cladeSumVar[cladeName] ^ {{"TREE_PLACE_HOLDER","mirrorTree"}};
359			ExecuteCommands ("global SUM_"+cladeName+":="+cladeConstraint+";");
360
361			if (testType)
362			{
363				cladeSize =  cladeLeafCount[cladeName];
364				ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize*(cladeSize-1)/minCladeSize/(minCladeSize-1))+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");");
365			}
366			else
367			{
368				cladeSize =  Abs(cladeNodeLists[cladeName]);
369				ExecuteCommands ("ReplicateConstraint (\"this1.?."+funnyString+":= SUM_"+smallCladeName+"*"+(cladeSize/minCladeSize)+"/SUM_"+cladeName+"*this2.?."+funnyString+"\",aTree."+cladeName+",mirrorTree."+cladeName+");");
370			}
371
372			ExecuteCommands ("aTree."+cladeName+".t=0.1;");
373			fprintf (stdout, comparisonIndex, "). Running a constrained fit on clades ", smallCladeName, " and ", cladeName, "\n");
374
375			LikelihoodFunction lf = (filteredData,aTree);
376			Optimize (res_2c,lf);
377
378fprintf  (stdout, "\n\nLRT for difference in mean divergences\n",
379				  "\nLR = ", 2*(res_free[1][0]-res_2c[1][0]),
380				  "\np-value = ", 1-CChi2(2*(res_free[1][0]-res_2c[1][0]),1),"\n");
381
382			fprintf  (stdout, lf, "\n");
383
384			blVector = BranchLength (aTree,-1);
385			ReportMeans (blVector);
386			comparisonIndex = comparisonIndex + 1;
387
388			ClearConstraints (aTree);
389		}
390
391	}
392}
393
394USE_LAST_RESULTS = 0;
395
396/*--------------------------------------------------------------------------------------------------------------*/
397
398function ReportMeans (branches)
399{
400	for (k=0; k<cladesFound; k=k+1)
401	{
402		cladeName = cladeRoots[k];
403		sum = branches*bmFactors[cladeName];
404
405		if (testType)
406		{
407			leafCount = cladeLeafCount[cladeName];
408			sum = sum[0]*2/leafCount/(leafCount-1);
409		}
410		else
411		{
412			cladeSize =  Abs(cladeNodeLists[cladeName]);
413			sum = sum[0]/cladeSize;
414		}
415
416		fprintf (stdout, "Clade rooted at ", cladeName, " has ", echoString, " of ", sum, "\n");
417
418	}
419	return 0;
420}
421