1 #include "muscle.h"
2 #include "tree.h"
3 #include "edgelist.h"
4 
5 #define TRACE	0
6 
7 struct EdgeInfo
8 	{
EdgeInfoEdgeInfo9 	EdgeInfo()
10 		{
11 		m_bSet = false;
12 		}
13 // Is data in this structure valid (i.e, has been set)?
14 	bool m_bSet;
15 
16 // Node at start of this edge
17 	unsigned m_uNode1;
18 
19 // Node at end of this edge
20 	unsigned m_uNode2;
21 
22 // Maximum distance from Node2 to a leaf
23 	double m_dMaxDistToLeaf;
24 
25 // Sum of distances from Node2 to all leaves under Node2
26 	double m_dTotalDistToLeaves;
27 
28 // Next node on path from Node2 to most distant leaf
29 	unsigned m_uMaxStep;
30 
31 // Most distant leaf from Node2 (used for debugging only)
32 	unsigned m_uMostDistantLeaf;
33 
34 // Number of leaves under Node2
35 	unsigned m_uLeafCount;
36 	};
37 
38 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
39   unsigned *ptruNode1, unsigned *ptruNode2,
40   double *ptrdLength1, double *ptrdLength2);
41 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
42   unsigned *ptruNode1, unsigned *ptruNode2,
43   double *ptrdLength1, double *ptrdLength2);
44 
ListEIs(EdgeInfo ** EIs,unsigned uNodeCount)45 static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)
46 	{
47 	Log("Node1  Node2  MaxDist  TotDist  MostDist  LeafCount  Step\n");
48 	Log("-----  -----  -------  -------  --------  ---------  ----\n");
49 	//    12345  12345  1234567  1234567  12345678  123456789
50 
51 	for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)
52 		for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)
53 			{
54 			const EdgeInfo &EI = EIs[uNode][uNeighbor];
55 			if (!EI.m_bSet)
56 				continue;
57 			Log("%5u  %5u  %7.3g  %7.3g  %8u  %9u",
58 			  EI.m_uNode1,
59 			  EI.m_uNode2,
60 			  EI.m_dMaxDistToLeaf,
61 			  EI.m_dTotalDistToLeaves,
62 			  EI.m_uMostDistantLeaf,
63 			  EI.m_uLeafCount);
64 			if (NULL_NEIGHBOR != EI.m_uMaxStep)
65 				Log("  %4u", EI.m_uMaxStep);
66 			Log("\n");
67 			}
68 	}
69 
CalcInfo(const Tree & tree,unsigned uNode1,unsigned uNode2,EdgeInfo ** EIs)70 static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
71 	{
72 	const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
73 	EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
74 	EI.m_uNode1 = uNode1;
75 	EI.m_uNode2 = uNode2;
76 
77 	if (tree.IsLeaf(uNode2))
78 		{
79 		EI.m_dMaxDistToLeaf = 0;
80 		EI.m_dTotalDistToLeaves = 0;
81 		EI.m_uMaxStep = NULL_NEIGHBOR;
82 		EI.m_uMostDistantLeaf = uNode2;
83 		EI.m_uLeafCount = 1;
84 		EI.m_bSet = true;
85 		return;
86 		}
87 
88 	double dMaxDistToLeaf = -1e29;
89 	double dTotalDistToLeaves = 0.0;
90 	unsigned uLeafCount = 0;
91 	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
92 	unsigned uMaxStep = NULL_NEIGHBOR;
93 
94 	const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
95 	for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
96 		{
97 		const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
98 		if (uNode3 == uNode1)
99 			continue;
100 		const EdgeInfo &EINext = EIs[uNode2][uSub];
101 		if (!EINext.m_bSet)
102 			Quit("CalcInfo: internal error, dist %u->%u not known",
103 				uNode2, uNode3);
104 
105 
106 		uLeafCount += EINext.m_uLeafCount;
107 
108 		const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
109 		const double dTotalDist = EINext.m_dTotalDistToLeaves +
110 		  EINext.m_uLeafCount*dEdgeLength;
111 		dTotalDistToLeaves += dTotalDist;
112 
113 		const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
114 		if (dDist > dMaxDistToLeaf)
115 			{
116 			dMaxDistToLeaf = dDist;
117 			uMostDistantLeaf = EINext.m_uMostDistantLeaf;
118 			uMaxStep = uNode3;
119 			}
120 		}
121 	if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
122 	  0 == uLeafCount)
123 		Quit("CalcInfo: internal error 2");
124 
125 	const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
126 	EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
127 	EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
128 	EI.m_uMaxStep = uMaxStep;
129 	EI.m_uMostDistantLeaf = uMostDistantLeaf;
130 	EI.m_uLeafCount = uLeafCount;
131 	EI.m_bSet = true;
132 	}
133 
Known(const Tree & tree,EdgeInfo ** EIs,unsigned uNodeFrom,unsigned uNodeTo)134 static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
135   unsigned uNodeTo)
136 	{
137 	const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);
138 	return EIs[uNodeFrom][uSub].m_bSet;
139 	}
140 
AllKnownOut(const Tree & tree,EdgeInfo ** EIs,unsigned uNodeFrom,unsigned uNodeTo)141 static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
142   unsigned uNodeTo)
143 	{
144 	const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);
145 	for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
146 		{
147 		unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);
148 		if (uNeighborIndex == uNodeFrom)
149 			continue;
150 		if (!EIs[uNodeTo][uSub].m_bSet)
151 			return false;
152 		}
153 	return true;
154 	}
155 
FindRoot(const Tree & tree,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2,ROOT RootMethod)156 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
157   double *ptrdLength1, double *ptrdLength2,
158   ROOT RootMethod)
159 	{
160 #if	TRACE
161 	tree.LogMe();
162 #endif
163 	if (tree.IsRooted())
164 		Quit("FindRoot: tree already rooted");
165 
166 	const unsigned uNodeCount = tree.GetNodeCount();
167 	const unsigned uLeafCount = tree.GetLeafCount();
168 
169 	if (uNodeCount < 2)
170 		Quit("Root: don't support trees with < 2 edges");
171 
172 	EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
173 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
174 		EIs[uNodeIndex] = new EdgeInfo[3];
175 
176 	EdgeList Edges;
177 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
178 		if (tree.IsLeaf(uNodeIndex))
179 			{
180 			unsigned uParent = tree.GetNeighbor1(uNodeIndex);
181 			Edges.Add(uParent, uNodeIndex);
182 			}
183 
184 #if	TRACE
185 	Log("Edges: ");
186 	Edges.LogMe();
187 #endif
188 
189 // Main loop: iterate until all distances known
190 	double dAllMaxDist = -1e20;
191 	unsigned uMaxFrom = NULL_NEIGHBOR;
192 	unsigned uMaxTo = NULL_NEIGHBOR;
193 	for (;;)
194 		{
195 		EdgeList NextEdges;
196 
197 #if	TRACE
198 		Log("\nTop of main loop\n");
199 		Log("Edges: ");
200 		Edges.LogMe();
201 		Log("MDs:\n");
202 		ListEIs(EIs, uNodeCount);
203 #endif
204 
205 	// For all edges
206 		const unsigned uEdgeCount = Edges.GetCount();
207 		if (0 == uEdgeCount)
208 			break;
209 		for (unsigned n = 0; n < uEdgeCount; ++n)
210 			{
211 			unsigned uNodeFrom;
212 			unsigned uNodeTo;
213 			Edges.GetEdge(n, &uNodeFrom, &uNodeTo);
214 
215 			CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
216 #if	TRACE
217 			Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
218 #endif
219 			const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
220 			for (unsigned i = 0; i < uNeighborCount; ++i)
221 				{
222 				const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
223 				if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
224 				  AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
225 					NextEdges.Add(uNeighborIndex, uNodeFrom);
226 				}
227 			}
228 		Edges.Copy(NextEdges);
229 		}
230 
231 #if	TRACE
232 	ListEIs(EIs, uNodeCount);
233 #endif
234 
235 	switch (RootMethod)
236 		{
237 	case ROOT_MidLongestSpan:
238 		RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
239 		  ptrdLength1, ptrdLength2);
240 		break;
241 
242 	case ROOT_MinAvgLeafDist:
243 		RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
244 		  ptrdLength1, ptrdLength2);
245 		break;
246 
247 	default:
248 		Quit("Invalid RootMethod=%d", RootMethod);
249 		}
250 
251 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
252 		delete[] EIs[uNodeIndex];
253 	delete[] EIs;
254 	}
255 
RootByMidLongestSpan(const Tree & tree,EdgeInfo ** EIs,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2)256 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
257   unsigned *ptruNode1, unsigned *ptruNode2,
258   double *ptrdLength1, double *ptrdLength2)
259 	{
260 	const unsigned uNodeCount = tree.GetNodeCount();
261 
262 	unsigned uLeaf1 = NULL_NEIGHBOR;
263 	unsigned uMostDistantLeaf = NULL_NEIGHBOR;
264 	double dMaxDist = -VERY_LARGE_DOUBLE;
265 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
266 		{
267 		if (!tree.IsLeaf(uNodeIndex))
268 			continue;
269 
270 		const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
271 		if (NULL_NEIGHBOR == uNode2)
272 			Quit("RootByMidLongestSpan: internal error 0");
273 		const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
274 		const EdgeInfo &EI = EIs[uNodeIndex][0];
275 		if (!EI.m_bSet)
276 			Quit("RootByMidLongestSpan: internal error 1");
277 		if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
278 			Quit("RootByMidLongestSpan: internal error 2");
279 		const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
280 		if (dSpanLength > dMaxDist)
281 			{
282 			dMaxDist = dSpanLength;
283 			uLeaf1 = uNodeIndex;
284 			uMostDistantLeaf = EI.m_uMostDistantLeaf;
285 			}
286 		}
287 
288 	if (NULL_NEIGHBOR == uLeaf1)
289 		Quit("RootByMidLongestSpan: internal error 3");
290 
291 	const double dTreeHeight = dMaxDist/2.0;
292 	unsigned uNode1 = uLeaf1;
293 	unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
294 	double dAccumSpanLength = 0;
295 
296 #if	TRACE
297 	Log("RootByMidLongestSpan: span=%u", uLeaf1);
298 #endif
299 
300 	for (;;)
301 		{
302 		const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
303 #if	TRACE
304 		Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
305 #endif
306 		if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
307 			{
308 			*ptruNode1 = uNode1;
309 			*ptruNode2 = uNode2;
310 			*ptrdLength1 = dTreeHeight - dAccumSpanLength;
311 			*ptrdLength2 = dEdgeLength - *ptrdLength1;
312 #if	TRACE
313 			{
314 			const EdgeInfo &EI = EIs[uLeaf1][0];
315 			Log("...\n");
316 			Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
317 			  uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
318 			}
319 #endif
320 			return;
321 			}
322 
323 		if (tree.IsLeaf(uNode2))
324 			Quit("RootByMidLongestSpan: internal error 4");
325 
326 		dAccumSpanLength += dEdgeLength;
327 		const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
328 		const EdgeInfo &EI = EIs[uNode1][uSub];
329 		if (!EI.m_bSet)
330 			Quit("RootByMidLongestSpan: internal error 5");
331 
332 		uNode1 = uNode2;
333 		uNode2 = EI.m_uMaxStep;
334 		}
335 	}
336 
337 /***
338 Root by balancing average distance to leaves.
339 The root is a point p such that the average
340 distance to leaves to the left of p is the
341 same as the to the right.
342 
343 This is the method used by CLUSTALW, which
344 was originally used in PROFILEWEIGHT:
345 
346 	Thompson et al. (1994) CABIOS (10) 1, 19-29.
347 ***/
348 
RootByMinAvgLeafDist(const Tree & tree,EdgeInfo ** EIs,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2)349 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
350   unsigned *ptruNode1, unsigned *ptruNode2,
351   double *ptrdLength1, double *ptrdLength2)
352 	{
353 	const unsigned uNodeCount = tree.GetNodeCount();
354 	const unsigned uLeafCount = tree.GetLeafCount();
355 	unsigned uNode1 = NULL_NEIGHBOR;
356 	unsigned uNode2 = NULL_NEIGHBOR;
357 	double dMinHeight = VERY_LARGE_DOUBLE;
358 	double dBestLength1 = VERY_LARGE_DOUBLE;
359 	double dBestLength2 = VERY_LARGE_DOUBLE;
360 
361 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
362 		{
363 		const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
364 		for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
365 			{
366 			const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);
367 
368 		// Avoid visiting same edge a second time in reversed order.
369 			if (uNeighborIndex < uNodeIndex)
370 				continue;
371 
372 			const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
373 			if (NULL_NEIGHBOR == uSubRev)
374 				Quit("RootByMinAvgLeafDist, internal error 1");
375 
376 		// Get info for edges Node1->Node2 and Node2->Node1 (reversed)
377 			const EdgeInfo &EI = EIs[uNodeIndex][uSub];
378 			const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];
379 
380 			if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
381 			  EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
382 				Quit("RootByMinAvgLeafDist, internal error 2");
383 			if (!EI.m_bSet)
384 				Quit("RootByMinAvgLeafDist, internal error 3");
385 			if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
386 				Quit("RootByMinAvgLeafDist, internal error 4");
387 
388 			const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
389 			if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
390 				Quit("RootByMinAvgLeafDist, internal error 5");
391 
392 		// Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
393 		//
394         //	-----         ----
395         //	     |       |
396         //	     1----p--2
397         //	     |       |
398         //	-----         ----
399 		//
400 		// Define:
401 		//    ADLp = average distance to leaves to left of point p.
402 		//	  ADRp = average distance to leaves to right of point p.
403 		//	  L = edge length = distance 12
404 		//    x = distance 1p
405 		// So distance p2 = L - x.
406 		// Average distance from p to leaves on left of p is:
407 		//		ADLp = ADL1 + x
408 		// Average distance from p to leaves on right of p is:
409 		//		ADRp = ADR2 + (L - x)
410 		// To be a root, we require these two distances to be equal,
411 		//		ADLp = ADRp
412 		//		ADL1 + x = ADR2 + (L - x)
413 		// Solving for x,
414 		//		x = (ADR2 - ADL1 + L)/2
415 		// If 0 <= x <= L, we can place the root on edge 12.
416 
417 			const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
418 			const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;
419 
420 			const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
421 			if (x >= 0 && x <= dEdgeLength)
422 				{
423 				const double dLength1 = x;
424 				const double dLength2 = dEdgeLength - x;
425 				const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
426 				const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
427 				const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
428 #if	TRACE
429 				Log("Candidate root Node1=%u Node2=%u Height=%g\n",
430 				  uNodeIndex, uNeighborIndex, dHeight);
431 #endif
432 				if (dHeight < dMinHeight)
433 					{
434 					uNode1 = uNodeIndex;
435 					uNode2 = uNeighborIndex;
436 					dBestLength1 = dLength1;
437 					dBestLength2 = dLength2;
438 					dMinHeight = dHeight;
439 					}
440 				}
441 			}
442 		}
443 
444 	if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
445 		Quit("RootByMinAvgLeafDist, internal error 6");
446 
447 #if	TRACE
448 	Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
449 	  uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
450 #endif
451 
452 	*ptruNode1 = uNode1;
453 	*ptruNode2 = uNode2;
454 	*ptrdLength1 = dBestLength1;
455 	*ptrdLength2 = dBestLength2;
456 	}
457 
FixRoot(Tree & tree,ROOT Method)458 void FixRoot(Tree &tree, ROOT Method)
459 	{
460 	if (!tree.IsRooted())
461 		Quit("FixRoot: expecting rooted tree");
462 
463 	// Pseudo-root: keep root assigned by clustering
464 	if (ROOT_Pseudo == Method)
465 		return;
466 
467 	tree.UnrootByDeletingRoot();
468 	tree.RootUnrootedTree(Method);
469 	}
470