1 #include "muscle.h"
2 #include "tree.h"
3 #include "edgelist.h"
4
5 #define TRACE 0
6
7 struct EdgeInfo
8 {
EdgeInfoEdgeInfo9 EdgeInfo()
10 {
11 m_bSet = false;
12 }
13 // Is data in this structure valid (i.e, has been set)?
14 bool m_bSet;
15
16 // Node at start of this edge
17 unsigned m_uNode1;
18
19 // Node at end of this edge
20 unsigned m_uNode2;
21
22 // Maximum distance from Node2 to a leaf
23 double m_dMaxDistToLeaf;
24
25 // Sum of distances from Node2 to all leaves under Node2
26 double m_dTotalDistToLeaves;
27
28 // Next node on path from Node2 to most distant leaf
29 unsigned m_uMaxStep;
30
31 // Most distant leaf from Node2 (used for debugging only)
32 unsigned m_uMostDistantLeaf;
33
34 // Number of leaves under Node2
35 unsigned m_uLeafCount;
36 };
37
38 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
39 unsigned *ptruNode1, unsigned *ptruNode2,
40 double *ptrdLength1, double *ptrdLength2);
41 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
42 unsigned *ptruNode1, unsigned *ptruNode2,
43 double *ptrdLength1, double *ptrdLength2);
44
ListEIs(EdgeInfo ** EIs,unsigned uNodeCount)45 static void ListEIs(EdgeInfo **EIs, unsigned uNodeCount)
46 {
47 Log("Node1 Node2 MaxDist TotDist MostDist LeafCount Step\n");
48 Log("----- ----- ------- ------- -------- --------- ----\n");
49 // 12345 12345 1234567 1234567 12345678 123456789
50
51 for (unsigned uNode = 0; uNode < uNodeCount; ++uNode)
52 for (unsigned uNeighbor = 0; uNeighbor < 3; ++uNeighbor)
53 {
54 const EdgeInfo &EI = EIs[uNode][uNeighbor];
55 if (!EI.m_bSet)
56 continue;
57 Log("%5u %5u %7.3g %7.3g %8u %9u",
58 EI.m_uNode1,
59 EI.m_uNode2,
60 EI.m_dMaxDistToLeaf,
61 EI.m_dTotalDistToLeaves,
62 EI.m_uMostDistantLeaf,
63 EI.m_uLeafCount);
64 if (NULL_NEIGHBOR != EI.m_uMaxStep)
65 Log(" %4u", EI.m_uMaxStep);
66 Log("\n");
67 }
68 }
69
CalcInfo(const Tree & tree,unsigned uNode1,unsigned uNode2,EdgeInfo ** EIs)70 static void CalcInfo(const Tree &tree, unsigned uNode1, unsigned uNode2, EdgeInfo **EIs)
71 {
72 const unsigned uNeighborIndex = tree.GetNeighborSubscript(uNode1, uNode2);
73 EdgeInfo &EI = EIs[uNode1][uNeighborIndex];
74 EI.m_uNode1 = uNode1;
75 EI.m_uNode2 = uNode2;
76
77 if (tree.IsLeaf(uNode2))
78 {
79 EI.m_dMaxDistToLeaf = 0;
80 EI.m_dTotalDistToLeaves = 0;
81 EI.m_uMaxStep = NULL_NEIGHBOR;
82 EI.m_uMostDistantLeaf = uNode2;
83 EI.m_uLeafCount = 1;
84 EI.m_bSet = true;
85 return;
86 }
87
88 double dMaxDistToLeaf = -1e29;
89 double dTotalDistToLeaves = 0.0;
90 unsigned uLeafCount = 0;
91 unsigned uMostDistantLeaf = NULL_NEIGHBOR;
92 unsigned uMaxStep = NULL_NEIGHBOR;
93
94 const unsigned uNeighborCount = tree.GetNeighborCount(uNode2);
95 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
96 {
97 const unsigned uNode3 = tree.GetNeighbor(uNode2, uSub);
98 if (uNode3 == uNode1)
99 continue;
100 const EdgeInfo &EINext = EIs[uNode2][uSub];
101 if (!EINext.m_bSet)
102 Quit("CalcInfo: internal error, dist %u->%u not known",
103 uNode2, uNode3);
104
105
106 uLeafCount += EINext.m_uLeafCount;
107
108 const double dEdgeLength = tree.GetEdgeLength(uNode2, uNode3);
109 const double dTotalDist = EINext.m_dTotalDistToLeaves +
110 EINext.m_uLeafCount*dEdgeLength;
111 dTotalDistToLeaves += dTotalDist;
112
113 const double dDist = EINext.m_dMaxDistToLeaf + dEdgeLength;
114 if (dDist > dMaxDistToLeaf)
115 {
116 dMaxDistToLeaf = dDist;
117 uMostDistantLeaf = EINext.m_uMostDistantLeaf;
118 uMaxStep = uNode3;
119 }
120 }
121 if (NULL_NEIGHBOR == uMaxStep || NULL_NEIGHBOR == uMostDistantLeaf ||
122 0 == uLeafCount)
123 Quit("CalcInfo: internal error 2");
124
125 const double dThisDist = tree.GetEdgeLength(uNode1, uNode2);
126 EI.m_dMaxDistToLeaf = dMaxDistToLeaf;
127 EI.m_dTotalDistToLeaves = dTotalDistToLeaves;
128 EI.m_uMaxStep = uMaxStep;
129 EI.m_uMostDistantLeaf = uMostDistantLeaf;
130 EI.m_uLeafCount = uLeafCount;
131 EI.m_bSet = true;
132 }
133
Known(const Tree & tree,EdgeInfo ** EIs,unsigned uNodeFrom,unsigned uNodeTo)134 static bool Known(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
135 unsigned uNodeTo)
136 {
137 const unsigned uSub = tree.GetNeighborSubscript(uNodeFrom, uNodeTo);
138 return EIs[uNodeFrom][uSub].m_bSet;
139 }
140
AllKnownOut(const Tree & tree,EdgeInfo ** EIs,unsigned uNodeFrom,unsigned uNodeTo)141 static bool AllKnownOut(const Tree &tree, EdgeInfo **EIs, unsigned uNodeFrom,
142 unsigned uNodeTo)
143 {
144 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeTo);
145 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
146 {
147 unsigned uNeighborIndex = tree.GetNeighbor(uNodeTo, uSub);
148 if (uNeighborIndex == uNodeFrom)
149 continue;
150 if (!EIs[uNodeTo][uSub].m_bSet)
151 return false;
152 }
153 return true;
154 }
155
FindRoot(const Tree & tree,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2,ROOT RootMethod)156 void FindRoot(const Tree &tree, unsigned *ptruNode1, unsigned *ptruNode2,
157 double *ptrdLength1, double *ptrdLength2,
158 ROOT RootMethod)
159 {
160 #if TRACE
161 tree.LogMe();
162 #endif
163 if (tree.IsRooted())
164 Quit("FindRoot: tree already rooted");
165
166 const unsigned uNodeCount = tree.GetNodeCount();
167 const unsigned uLeafCount = tree.GetLeafCount();
168
169 if (uNodeCount < 2)
170 Quit("Root: don't support trees with < 2 edges");
171
172 EdgeInfo **EIs = new EdgeInfo *[uNodeCount];
173 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
174 EIs[uNodeIndex] = new EdgeInfo[3];
175
176 EdgeList Edges;
177 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
178 if (tree.IsLeaf(uNodeIndex))
179 {
180 unsigned uParent = tree.GetNeighbor1(uNodeIndex);
181 Edges.Add(uParent, uNodeIndex);
182 }
183
184 #if TRACE
185 Log("Edges: ");
186 Edges.LogMe();
187 #endif
188
189 // Main loop: iterate until all distances known
190 double dAllMaxDist = -1e20;
191 unsigned uMaxFrom = NULL_NEIGHBOR;
192 unsigned uMaxTo = NULL_NEIGHBOR;
193 for (;;)
194 {
195 EdgeList NextEdges;
196
197 #if TRACE
198 Log("\nTop of main loop\n");
199 Log("Edges: ");
200 Edges.LogMe();
201 Log("MDs:\n");
202 ListEIs(EIs, uNodeCount);
203 #endif
204
205 // For all edges
206 const unsigned uEdgeCount = Edges.GetCount();
207 if (0 == uEdgeCount)
208 break;
209 for (unsigned n = 0; n < uEdgeCount; ++n)
210 {
211 unsigned uNodeFrom;
212 unsigned uNodeTo;
213 Edges.GetEdge(n, &uNodeFrom, &uNodeTo);
214
215 CalcInfo(tree, uNodeFrom, uNodeTo, EIs);
216 #if TRACE
217 Log("Edge %u -> %u\n", uNodeFrom, uNodeTo);
218 #endif
219 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeFrom);
220 for (unsigned i = 0; i < uNeighborCount; ++i)
221 {
222 const unsigned uNeighborIndex = tree.GetNeighbor(uNodeFrom, i);
223 if (!Known(tree, EIs, uNeighborIndex, uNodeFrom) &&
224 AllKnownOut(tree, EIs, uNeighborIndex, uNodeFrom))
225 NextEdges.Add(uNeighborIndex, uNodeFrom);
226 }
227 }
228 Edges.Copy(NextEdges);
229 }
230
231 #if TRACE
232 ListEIs(EIs, uNodeCount);
233 #endif
234
235 switch (RootMethod)
236 {
237 case ROOT_MidLongestSpan:
238 RootByMidLongestSpan(tree, EIs, ptruNode1, ptruNode2,
239 ptrdLength1, ptrdLength2);
240 break;
241
242 case ROOT_MinAvgLeafDist:
243 RootByMinAvgLeafDist(tree, EIs, ptruNode1, ptruNode2,
244 ptrdLength1, ptrdLength2);
245 break;
246
247 default:
248 Quit("Invalid RootMethod=%d", RootMethod);
249 }
250
251 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
252 delete[] EIs[uNodeIndex];
253 delete[] EIs;
254 }
255
RootByMidLongestSpan(const Tree & tree,EdgeInfo ** EIs,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2)256 static void RootByMidLongestSpan(const Tree &tree, EdgeInfo **EIs,
257 unsigned *ptruNode1, unsigned *ptruNode2,
258 double *ptrdLength1, double *ptrdLength2)
259 {
260 const unsigned uNodeCount = tree.GetNodeCount();
261
262 unsigned uLeaf1 = NULL_NEIGHBOR;
263 unsigned uMostDistantLeaf = NULL_NEIGHBOR;
264 double dMaxDist = -VERY_LARGE_DOUBLE;
265 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
266 {
267 if (!tree.IsLeaf(uNodeIndex))
268 continue;
269
270 const unsigned uNode2 = tree.GetNeighbor1(uNodeIndex);
271 if (NULL_NEIGHBOR == uNode2)
272 Quit("RootByMidLongestSpan: internal error 0");
273 const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNode2);
274 const EdgeInfo &EI = EIs[uNodeIndex][0];
275 if (!EI.m_bSet)
276 Quit("RootByMidLongestSpan: internal error 1");
277 if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNode2)
278 Quit("RootByMidLongestSpan: internal error 2");
279 const double dSpanLength = dEdgeLength + EI.m_dMaxDistToLeaf;
280 if (dSpanLength > dMaxDist)
281 {
282 dMaxDist = dSpanLength;
283 uLeaf1 = uNodeIndex;
284 uMostDistantLeaf = EI.m_uMostDistantLeaf;
285 }
286 }
287
288 if (NULL_NEIGHBOR == uLeaf1)
289 Quit("RootByMidLongestSpan: internal error 3");
290
291 const double dTreeHeight = dMaxDist/2.0;
292 unsigned uNode1 = uLeaf1;
293 unsigned uNode2 = tree.GetNeighbor1(uLeaf1);
294 double dAccumSpanLength = 0;
295
296 #if TRACE
297 Log("RootByMidLongestSpan: span=%u", uLeaf1);
298 #endif
299
300 for (;;)
301 {
302 const double dEdgeLength = tree.GetEdgeLength(uNode1, uNode2);
303 #if TRACE
304 Log("->%u(%g;%g)", uNode2, dEdgeLength, dAccumSpanLength);
305 #endif
306 if (dAccumSpanLength + dEdgeLength >= dTreeHeight)
307 {
308 *ptruNode1 = uNode1;
309 *ptruNode2 = uNode2;
310 *ptrdLength1 = dTreeHeight - dAccumSpanLength;
311 *ptrdLength2 = dEdgeLength - *ptrdLength1;
312 #if TRACE
313 {
314 const EdgeInfo &EI = EIs[uLeaf1][0];
315 Log("...\n");
316 Log("Midpoint: Leaf1=%u Leaf2=%u Node1=%u Node2=%u Length1=%g Length2=%g\n",
317 uLeaf1, EI.m_uMostDistantLeaf, *ptruNode1, *ptruNode2, *ptrdLength1, *ptrdLength2);
318 }
319 #endif
320 return;
321 }
322
323 if (tree.IsLeaf(uNode2))
324 Quit("RootByMidLongestSpan: internal error 4");
325
326 dAccumSpanLength += dEdgeLength;
327 const unsigned uSub = tree.GetNeighborSubscript(uNode1, uNode2);
328 const EdgeInfo &EI = EIs[uNode1][uSub];
329 if (!EI.m_bSet)
330 Quit("RootByMidLongestSpan: internal error 5");
331
332 uNode1 = uNode2;
333 uNode2 = EI.m_uMaxStep;
334 }
335 }
336
337 /***
338 Root by balancing average distance to leaves.
339 The root is a point p such that the average
340 distance to leaves to the left of p is the
341 same as the to the right.
342
343 This is the method used by CLUSTALW, which
344 was originally used in PROFILEWEIGHT:
345
346 Thompson et al. (1994) CABIOS (10) 1, 19-29.
347 ***/
348
RootByMinAvgLeafDist(const Tree & tree,EdgeInfo ** EIs,unsigned * ptruNode1,unsigned * ptruNode2,double * ptrdLength1,double * ptrdLength2)349 static void RootByMinAvgLeafDist(const Tree &tree, EdgeInfo **EIs,
350 unsigned *ptruNode1, unsigned *ptruNode2,
351 double *ptrdLength1, double *ptrdLength2)
352 {
353 const unsigned uNodeCount = tree.GetNodeCount();
354 const unsigned uLeafCount = tree.GetLeafCount();
355 unsigned uNode1 = NULL_NEIGHBOR;
356 unsigned uNode2 = NULL_NEIGHBOR;
357 double dMinHeight = VERY_LARGE_DOUBLE;
358 double dBestLength1 = VERY_LARGE_DOUBLE;
359 double dBestLength2 = VERY_LARGE_DOUBLE;
360
361 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
362 {
363 const unsigned uNeighborCount = tree.GetNeighborCount(uNodeIndex);
364 for (unsigned uSub = 0; uSub < uNeighborCount; ++uSub)
365 {
366 const unsigned uNeighborIndex = tree.GetNeighbor(uNodeIndex, uSub);
367
368 // Avoid visiting same edge a second time in reversed order.
369 if (uNeighborIndex < uNodeIndex)
370 continue;
371
372 const unsigned uSubRev = tree.GetNeighborSubscript(uNeighborIndex, uNodeIndex);
373 if (NULL_NEIGHBOR == uSubRev)
374 Quit("RootByMinAvgLeafDist, internal error 1");
375
376 // Get info for edges Node1->Node2 and Node2->Node1 (reversed)
377 const EdgeInfo &EI = EIs[uNodeIndex][uSub];
378 const EdgeInfo &EIRev = EIs[uNeighborIndex][uSubRev];
379
380 if (EI.m_uNode1 != uNodeIndex || EI.m_uNode2 != uNeighborIndex ||
381 EIRev.m_uNode1 != uNeighborIndex || EIRev.m_uNode2 != uNodeIndex)
382 Quit("RootByMinAvgLeafDist, internal error 2");
383 if (!EI.m_bSet)
384 Quit("RootByMinAvgLeafDist, internal error 3");
385 if (uLeafCount != EI.m_uLeafCount + EIRev.m_uLeafCount)
386 Quit("RootByMinAvgLeafDist, internal error 4");
387
388 const double dEdgeLength = tree.GetEdgeLength(uNodeIndex, uNeighborIndex);
389 if (dEdgeLength != tree.GetEdgeLength(uNeighborIndex, uNodeIndex))
390 Quit("RootByMinAvgLeafDist, internal error 5");
391
392 // Consider point p on edge 12 in tree (1=Node, 2=Neighbor).
393 //
394 // ----- ----
395 // | |
396 // 1----p--2
397 // | |
398 // ----- ----
399 //
400 // Define:
401 // ADLp = average distance to leaves to left of point p.
402 // ADRp = average distance to leaves to right of point p.
403 // L = edge length = distance 12
404 // x = distance 1p
405 // So distance p2 = L - x.
406 // Average distance from p to leaves on left of p is:
407 // ADLp = ADL1 + x
408 // Average distance from p to leaves on right of p is:
409 // ADRp = ADR2 + (L - x)
410 // To be a root, we require these two distances to be equal,
411 // ADLp = ADRp
412 // ADL1 + x = ADR2 + (L - x)
413 // Solving for x,
414 // x = (ADR2 - ADL1 + L)/2
415 // If 0 <= x <= L, we can place the root on edge 12.
416
417 const double ADL1 = EI.m_dTotalDistToLeaves / EI.m_uLeafCount;
418 const double ADR2 = EIRev.m_dTotalDistToLeaves / EIRev.m_uLeafCount;
419
420 const double x = (ADR2 - ADL1 + dEdgeLength)/2.0;
421 if (x >= 0 && x <= dEdgeLength)
422 {
423 const double dLength1 = x;
424 const double dLength2 = dEdgeLength - x;
425 const double dHeight1 = EI.m_dMaxDistToLeaf + dLength1;
426 const double dHeight2 = EIRev.m_dMaxDistToLeaf + dLength2;
427 const double dHeight = dHeight1 >= dHeight2 ? dHeight1 : dHeight2;
428 #if TRACE
429 Log("Candidate root Node1=%u Node2=%u Height=%g\n",
430 uNodeIndex, uNeighborIndex, dHeight);
431 #endif
432 if (dHeight < dMinHeight)
433 {
434 uNode1 = uNodeIndex;
435 uNode2 = uNeighborIndex;
436 dBestLength1 = dLength1;
437 dBestLength2 = dLength2;
438 dMinHeight = dHeight;
439 }
440 }
441 }
442 }
443
444 if (NULL_NEIGHBOR == uNode1 || NULL_NEIGHBOR == uNode2)
445 Quit("RootByMinAvgLeafDist, internal error 6");
446
447 #if TRACE
448 Log("Best root Node1=%u Node2=%u Length1=%g Length2=%g Height=%g\n",
449 uNode1, uNode2, dBestLength1, dBestLength2, dMinHeight);
450 #endif
451
452 *ptruNode1 = uNode1;
453 *ptruNode2 = uNode2;
454 *ptrdLength1 = dBestLength1;
455 *ptrdLength2 = dBestLength2;
456 }
457
FixRoot(Tree & tree,ROOT Method)458 void FixRoot(Tree &tree, ROOT Method)
459 {
460 if (!tree.IsRooted())
461 Quit("FixRoot: expecting rooted tree");
462
463 // Pseudo-root: keep root assigned by clustering
464 if (ROOT_Pseudo == Method)
465 return;
466
467 tree.UnrootByDeletingRoot();
468 tree.RootUnrootedTree(Method);
469 }
470