1#include "binomial.ibf";
2
3/*----------------------------------------------------------------------------*/
4
5
6ChoiceList (ambChoice, "Treatment of Ambiguities",1,SKIP_NONE,
7			"Averaged","All possible resolutions are considered and averaged.",
8			"Resolved","The most frequent (for that site) resolution is chosen.");
9
10if (ambChoice<0)
11{
12	return;
13}
14
15if (useCustomCountingBias)
16{
17	incFileName = "Distances/CodonToolsMain.def";
18}
19else
20{
21	incFileName = "Distances/CodonTools.def";
22}
23
24ExecuteCommands  ("#include \""+incFileName+"\";");
25
26SetDialogPrompt ("Please specify the first simulated file:");
27
28DataSet 	  dsA 		   = ReadDataFile (PROMPT_FOR_FILE);
29FILE_BASE 				   = LAST_FILE_PATH;
30
31iterates = 0;
32
33while (iterates<1)
34{
35	fprintf (stdout, "\nHow many data files should I process?");
36	fscanf  (stdin, "Number", iterates);
37}
38
39it 	  = 2;
40itidx = 0;
41
42tailEnds = -1;
43
44while ((tailEnds<=0) || (tailEnds >=.5))
45{
46	fprintf (stdout, "\nDistribution tails to tabulate (from 0 to 0.5)?");
47	fscanf  (stdin, "Number", tailEnds);
48}
49
50tailEnds = tailEnds * 100;
51
52ESResultMatrix = {iterates,filteredData.sites};
53ENResultMatrix = {iterates,filteredData.sites};
54OSResultMatrix = {iterates,filteredData.sites};
55ONResultMatrix = {iterates,filteredData.sites};
56dNmdS		   = {iterates,filteredData.sites};
57pValuesPS	   = {iterates,filteredData.sites};
58pValuesNS	   = {iterates,filteredData.sites};
59
60while (1)
61{
62	DataSetFilter filteredDataA = CreateFilter (dsA,3,"","",GeneticCodeExclusions);
63
64	HarvestFrequencies			  (observedCEFV,filteredData,3,3,0);
65	seqToBranchMap 				= {stateCharCount,1};
66
67	DataSet		   dsJoint = Combine(dsA,ds);
68	DataSetFilter filteredDataJ = CreateFilter (dsJoint,3,"","",GeneticCodeExclusions);
69
70	hShift = 0;
71
72	for (k=0; k<64; k=k+1)
73	{
74		if (_Genetic_Code[k]==10)
75		{
76			hShift = hShift+1;
77		}
78		else
79		{
80			seqToBranchMap[k-hShift] = observedCEFV[k];
81		}
82	}
83
84	observedCEFV = seqToBranchMap;
85
86
87	branchNames = BranchName (givenTree,-1);
88	h = Columns (branchNames);
89
90	seqToBranchMap 	= {h, 2};
91	/* maps sequence names to branch order in column 1
92	   and the other way around in column 2 */
93
94	for (k=0; k<filteredData.species; k=k+1)
95	{
96		GetString (seqName, filteredData, k);
97		seqToBranchMap[k][0] = -1;
98		for (v=0; v<h; v=v+1)
99		{
100			if (branchNames[v] % seqName)
101			{
102				seqToBranchMap[k][0] = v;
103				seqToBranchMap[v][1] = k;
104				break;
105			}
106		}
107	}
108
109
110	seqToBranchMap[filteredData.species][0] = h-1;
111	seqToBranchMap[h-1][1] = filteredData.species;
112
113	for (k=1; k<filteredDataA.species; k=k+1)
114	{
115		GetString (seqName, filteredDataA, k);
116		seqToBranchMap[filteredData.species+k][0] = -1;
117		for (v=0; v<h; v=v+1)
118		{
119			if (branchNames[v] % seqName)
120			{
121				seqToBranchMap[k+filteredData.species][0] = v;
122				seqToBranchMap[v][1] = k+filteredData.species;
123				break;
124			}
125		}
126	}
127
128	/* total tree length */
129
130	totalTreeLength = 0;
131	branchLengths   = BranchLength(givenTree,-1);
132
133	for (v=Columns(branchLengths)-1; v>=0; v=v-1)
134	{
135		totalTreeLength = totalTreeLength + branchLengths[v];
136	}
137
138	/* get codon matrix */
139
140	filteredData.unique_sites = Rows(filteredData.site_freqs) * Columns(filteredData.site_freqs);
141
142	codonInfo  = {filteredData.species, filteredData.unique_sites};
143	codonInfo2 = {filteredDataA.species, filteredDataA.unique_sites};
144
145	GetDataInfo    (dupInfo, filteredData);
146	GetDataInfo	   (dupInfoA, filteredDataA);
147
148	matrixTrick  = {1,stateCharCount};
149	matrixTrick2 = {1,stateCharCount};
150
151	for (h=Columns(matrixTrick)-1; h>=0; h=h-1)
152	{
153		matrixTrick  [h] = h;
154		matrixTrick2 [h] = 1;
155	}
156
157	for (v=0; v<filteredData.unique_sites;v=v+1)
158	{
159		for (h=0; h<filteredData.species;h=h+1)
160		{
161			GetDataInfo (siteInfo, filteredData, h, v);
162			_SITE_ES_COUNT = matrixTrick2 * siteInfo;
163			if (_SITE_ES_COUNT[0] == 1)
164			{
165				siteInfo = matrixTrick * siteInfo;
166				codonInfo[h][v] = siteInfo[0];
167			}
168			else
169			{
170				codonInfo[h][v] = -1;
171			}
172		}
173	}
174
175	for (v=0; v<filteredDataA.unique_sites;v=v+1)
176	{
177		for (h=0; h<filteredDataA.species;h=h+1)
178		{
179			GetDataInfo (siteInfo, filteredDataA, h, v);
180			siteInfo = matrixTrick * siteInfo;
181			codonInfo2[h][v] = siteInfo[0];
182		}
183	}
184
185	_SITE_RESULTS = {4,filteredData.sites};
186	flatTreeRep	  = Abs (givenTree);
187
188	resultMatrix = {filteredData.sites,4};
189
190	for (v=0; v<filteredData.sites;v=v+1)
191	/*for (v=0; v<1;v=v+1)*/
192	{
193		_SITE_ES_COUNT = {stateCharCount,stateCharCount};
194		_SITE_EN_COUNT = {stateCharCount,stateCharCount};
195		_SITE_OS_COUNT = {stateCharCount,stateCharCount};
196		_SITE_ON_COUNT = {stateCharCount,stateCharCount};
197
198		/* do internal nodes first */
199
200		k = filteredData.species+1;
201
202		/* first sequence is always the root */
203		c1 = dupInfoA[v];
204		if (codonInfo2[1][c1] >= stateCharCount)
205		{
206			continue;
207		}
208		for (h=1; h<filteredDataA.species; h=h+1)
209		{
210			p1 = seqToBranchMap[k][0];
211			p2 = flatTreeRep[p1];
212			p2 = seqToBranchMap[p2][1]-filteredData.species;
213
214			branchFactor = branchLengths[p1]/totalTreeLength;
215
216			cd1 = codonInfo2[h] [c1];
217			cd2 = codonInfo2[p2][c1];
218
219			_SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1;
220			_SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1;
221			_SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor;
222			_SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor;
223
224			k=k+1;
225		}
226
227		/* now do the leaves */
228
229		observedCEFV = {{0}};
230
231		c2 = dupInfo[v];
232		for (h=0; h<filteredData.species; h=h+1)
233		{
234			p1 = seqToBranchMap[h][0];
235			p2 = flatTreeRep[p1];
236			p2 = seqToBranchMap[p2][1]-filteredData.species;
237
238			cd2 = codonInfo2[p2][c1];
239			cd1 = codonInfo[h] [c2];
240
241			branchFactor = branchLengths[p1]/totalTreeLength;
242
243			if (cd1>=0)
244			/* no ambiguities */
245			{
246				_SITE_OS_COUNT[cd1][cd2] = _SITE_OS_COUNT[cd1][cd2] + 1;
247				_SITE_ON_COUNT[cd1][cd2] = _SITE_ON_COUNT[cd1][cd2] + 1;
248				_SITE_ES_COUNT[cd1][cd2] = _SITE_ES_COUNT[cd1][cd2] + branchFactor;
249				_SITE_EN_COUNT[cd1][cd2] = _SITE_EN_COUNT[cd1][cd2] + branchFactor;
250			}
251			else
252			/* ambiguities here */
253			{
254				GetDataInfo    (ambInfo, filteredData, h, c2);
255				if (Rows(observedCEFV) == 1)
256				{
257					siteFilter = ""+(v*3)+"-"+(v*3+2);
258					DataSetFilter filteredDataSite = CreateFilter (dsJoint,3,siteFilter,"",GeneticCodeExclusions);
259					HarvestFrequencies			  (observedCEFV,filteredDataSite,3,3,0);
260					tempMx = {stateCharCount,1};
261
262					hShift = 0;
263
264					for (k=0; k<64; k=k+1)
265					{
266						if (_Genetic_Code[k]==10)
267						{
268							hShift = hShift+1;
269						}
270						else
271						{
272							tempMx[k-hShift] = observedCEFV[k];
273						}
274					}
275					observedCEFV = tempMx;
276				}
277
278				ambInfo  	 = ambInfo$observedCEFV;
279				if (ambChoice)
280				{
281					weightFactor = 0;
282					tempMx = -1;
283					for (k=0; k<stateCharCount; k=k+1)
284					{
285						if (ambInfo[k]>weightFactor)
286						{
287							weightFactor = ambInfo[k];
288							tempMx = k;
289						}
290					}
291					if (tempMx>=0)
292					{
293						_SITE_OS_COUNT[tempMx][cd2] = _SITE_OS_COUNT[tempMx][cd2] + 1;
294						_SITE_ON_COUNT[tempMx][cd2] = _SITE_ON_COUNT[tempMx][cd2] + 1 ;
295						_SITE_ES_COUNT[tempMx][cd2] = _SITE_ES_COUNT[tempMx][cd2] + branchFactor;
296						_SITE_EN_COUNT[tempMx][cd2] = _SITE_EN_COUNT[tempMx][cd2] + branchFactor;
297					}
298				}
299				else
300				{
301					weightFactor = matrixTrick2*ambInfo;
302					weightFactor = weightFactor[0];
303					if (weightFactor)
304					{
305						ambInfo		 = ambInfo * (1/weightFactor);
306						for (k=0; k<stateCharCount; k=k+1)
307						{
308							weightFactor = ambInfo[k];
309							if (weightFactor>0)
310							{
311								_SITE_OS_COUNT[k][cd2] = _SITE_OS_COUNT[k][cd2] + weightFactor;
312								_SITE_ON_COUNT[k][cd2] = _SITE_ON_COUNT[k][cd2] + weightFactor;
313								_SITE_ES_COUNT[k][cd2] = _SITE_ES_COUNT[k][cd2] + weightFactor*branchFactor;
314								_SITE_EN_COUNT[k][cd2] = _SITE_EN_COUNT[k][cd2] + weightFactor*branchFactor;
315							}
316						}
317					}
318				}
319			}
320		}
321
322		_SITE_OS_COUNT = matrixTrick2*(_OBSERVED_S_$_SITE_OS_COUNT)*Transpose(matrixTrick2);
323		_SITE_ON_COUNT = matrixTrick2*(_OBSERVED_NS_$_SITE_ON_COUNT)*Transpose(matrixTrick2);
324		_SITE_ES_COUNT = matrixTrick2*(_PAIRWISE_S_$_SITE_ES_COUNT)*Transpose(matrixTrick2);
325		_SITE_EN_COUNT = matrixTrick2*(_PAIRWISE_NS_$_SITE_EN_COUNT)*Transpose(matrixTrick2);
326
327		OSResultMatrix[itidx][v] = _SITE_OS_COUNT[0];
328		ONResultMatrix[itidx][v] = _SITE_ON_COUNT[0];
329		ESResultMatrix[itidx][v] = _SITE_ES_COUNT[0];
330		ENResultMatrix[itidx][v] = _SITE_EN_COUNT[0];
331
332		weight = _SITE_EN_COUNT[0]+_SITE_ES_COUNT[0];
333
334		p = _SITE_ES_COUNT[0]/weight;
335		p2 = OSResultMatrix[itidx][v]+ONResultMatrix[itidx][v];
336		ds = OSResultMatrix[itidx][v]/ESResultMatrix[itidx][v];
337		dn = ONResultMatrix[itidx][v]/ENResultMatrix[itidx][v];
338
339		if (ESResultMatrix[itidx][v])
340		{
341			dNmdS[itidx][v] = dn-ds;
342		}
343
344		if (p2)
345		{
346			pv  = extendedBinTail (p2,p,OSResultMatrix[itidx][v]);
347			if (OSResultMatrix[itidx][v]>=1)
348			{
349				pv2 = 1-pv+(pv-extendedBinTail(p2,p,OSResultMatrix[itidx][v]-1));
350			}
351			else
352			{
353				pv2 = 1-pv+(pv-extendedBinTail (p2,p,0));
354			}
355			pValuesPS[itidx][v] = pv;
356			pValuesNS[itidx][v] = pv2;
357		}
358	}
359	if (it <= iterates)
360	{
361		fprintf (stdout, "\nIterate ", it, "/", iterates);
362		fName = FILE_BASE+"_"+it;
363		DataSet 	  dsA 		   = ReadDataFile (fName);
364		it = it+1;
365		itidx = itidx + 1;
366	}
367	else
368	{
369		break;
370	}
371}
372
373fprintf (stdout, "\n");
374
375/* generate medians and tailEnd% and (100-tailEnd)% profiles */
376
377statisticalOverview = {filteredData.sites,12};
378
379tailEnd1 = " "+tailEnds+"%";
380tailEnd2 = " "+(100-tailEnds)+"%";
381
382sovLabels = {{"","ES Median","","","EN Median","","","OS Median","","","ON Median",""}};
383
384sovLabels[0]  = "ES"+tailEnd1;
385sovLabels[2]  = "ES"+tailEnd2;
386sovLabels[3]  = "EN"+tailEnd1;
387sovLabels[5]  = "EN"+tailEnd2;
388sovLabels[6]  = "OS"+tailEnd1;
389sovLabels[8]  = "OS"+tailEnd2;
390sovLabels[9]  = "ON"+tailEnd1;
391sovLabels[11] = "ON"+tailEnd2;
392
393ColumnToSort = {iterates,1};
394
395lowBar  = (iterates*tailEnds*0.01)$1;
396highBar = (iterates*(100-tailEnds)*0.01)$1;
397
398/*if ((iterates*9)%10==0)
399{
400	highBar = highBar-1;
401}*/
402
403median	= iterates$2;
404doAv	= (iterates%2==0);
405
406for (v=0;v<filteredData.sites;v=v+1)
407{
408	for (h=0;h<iterates;h=h+1)
409	{
410		ColumnToSort[h]=ESResultMatrix[h][v];
411	}
412	ColumnToSort = ColumnToSort%0;
413	statisticalOverview[v][0] = ColumnToSort[lowBar];
414	statisticalOverview[v][2] = ColumnToSort[highBar];
415	if (doAv)
416	{
417		statisticalOverview[v][1] = (ColumnToSort[median]+ColumnToSort[median+1])/2;
418	}
419	else
420	{
421		statisticalOverview[v][1] = ColumnToSort[median];
422	}
423}
424
425for (v=0;v<filteredData.sites;v=v+1)
426{
427	for (h=0;h<iterates;h=h+1)
428	{
429		ColumnToSort[h]=ENResultMatrix[h][v];
430	}
431	/*dummy = doTheSort (iterates);*/
432	ColumnToSort = ColumnToSort%0;
433	statisticalOverview[v][3] = ColumnToSort[lowBar];
434	statisticalOverview[v][5] = ColumnToSort[highBar];
435	if (doAv)
436	{
437		statisticalOverview[v][4] = (ColumnToSort[median]+ColumnToSort[median+1])/2;
438	}
439	else
440	{
441		statisticalOverview[v][4] = ColumnToSort[median];
442	}
443}
444
445for (v=0;v<filteredData.sites;v=v+1)
446{
447	for (h=0;h<iterates;h=h+1)
448	{
449		ColumnToSort[h]=OSResultMatrix[h][v];
450	}
451	ColumnToSort = ColumnToSort%0;
452	statisticalOverview[v][6] = ColumnToSort[lowBar];
453	statisticalOverview[v][8] = ColumnToSort[highBar];
454	if (doAv)
455	{
456		statisticalOverview[v][7] = (ColumnToSort[median]+ColumnToSort[median+1])/2;
457	}
458	else
459	{
460		statisticalOverview[v][7] = ColumnToSort[median];
461	}
462}
463
464for (v=0;v<filteredData.sites;v=v+1)
465{
466	for (h=0;h<iterates;h=h+1)
467	{
468		ColumnToSort[h]=ONResultMatrix[h][v];
469	}
470	ColumnToSort = ColumnToSort%0;
471	statisticalOverview[v][9] = ColumnToSort[lowBar];
472	statisticalOverview[v][11] = ColumnToSort[highBar];
473	if (doAv)
474	{
475		statisticalOverview[v][10] = (ColumnToSort[median]+ColumnToSort[median+1])/2;
476	}
477	else
478	{
479		statisticalOverview[v][10] = ColumnToSort[median];
480	}
481}
482
483selLabelMatrix = {1,filteredData.sites};
484
485for (h=1; h<=filteredData.sites;h=h+1)
486{
487	selLabelMatrix[h-1] = "Site " + h;
488}
489
490OpenWindow (CHARTWINDOW,{{"Simulated ES"}
491						   {"selLabelMatrix"},
492						   {"ESResultMatrix"},
493						   {"None"},
494						   {"Index"},
495						   {""},
496						   {""},
497						   {""},
498						   {""},
499						   {"0"}},
500						   "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;15;30");
501
502OpenWindow (CHARTWINDOW,{{"Simulated EN"}
503						   {"selLabelMatrix"},
504						   {"ENResultMatrix"},
505						   {"None"},
506						   {"Index"},
507						   {""},
508						   {""},
509						   {""},
510						   {""},
511						   {"0"}},
512						   "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;30+(SCREEN_WIDTH-60)/2;30");
513
514OpenWindow (CHARTWINDOW,{{"Simulated OS"}
515						   {"selLabelMatrix"},
516						   {"OSResultMatrix"},
517						   {"None"},
518						   {"Index"},
519						   {""},
520						   {""},
521						   {""},
522						   {""},
523						   {"0"}},
524						   "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;15;45+(SCREEN_HEIGHT-60)/2");
525
526OpenWindow (CHARTWINDOW,{{"Simulated ON"}
527						   {"selLabelMatrix"},
528						   {"ONResultMatrix"},
529						   {"None"},
530						   {"Index"},
531						   {""},
532						   {""},
533						   {""},
534						   {""},
535						   {"0"}},
536						   "(SCREEN_WIDTH-60)/2;(SCREEN_HEIGHT-60)/2;30+(SCREEN_WIDTH-60)/2;45+(SCREEN_HEIGHT-60)/2");
537
538OpenWindow (CHARTWINDOW,{{"Summary"}
539						   {"sovLabels"},
540						   {"statisticalOverview"},
541						   {"Line Plot"},
542						   {"Index"},
543						   {""},
544						   {""},
545						   {""},
546						   {""},
547						   {"0"}},
548						   "(SCREEN_WIDTH-200);(SCREEN_HEIGHT-200);100;100");
549
550