1 #include "muscle.h"
2 #include "tree.h"
3 #include "msa.h"
4
5 /***
6 Compute weights by the CLUSTALW method.
7 Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
8 see also CLUSTALW paper.
9
10 Weights are computed from the edge lengths of a rooted tree.
11
12 Define the strength of an edge to be its length divided by the number
13 of leaves under that edge. The weight of a sequence is then the sum
14 of edge strengths on the path from the root to the leaf.
15
16 Example.
17
18 0.2
19 -----A 0.1
20 -x ------- B 0.7
21 --------y ----------- C
22 0.3 ----------z
23 0.4 -------------- D
24 0.8
25
26 Edge Length Leaves Strength
27 ---- ----- ------ --------
28 xy 0.3 3 0.1
29 xA 0.2 1 0.2
30 yz 0.4 2 0.2
31 yB 0.1 1 0.1
32 zC 0.7 1 0.7
33 zD 0.8 1 0.8
34
35 Leaf Path Strengths Weight
36 ---- ---- --------- ------
37 A xA 0.2 0.2
38 B xy-yB 0.1 + 0.1 0.2
39 C xy-yz-zC 0.1 + 0.2 + 0.7 1.0
40 D xy-yz-zD 0.1 + 0.2 + 0.8 1.1
41
42 ***/
43
44 #define TRACE 0
45
CountLeaves(const Tree & tree,unsigned uNodeIndex,unsigned LeavesUnderNode[])46 static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
47 unsigned LeavesUnderNode[])
48 {
49 if (tree.IsLeaf(uNodeIndex))
50 {
51 LeavesUnderNode[uNodeIndex] = 1;
52 return 1;
53 }
54
55 const unsigned uLeft = tree.GetLeft(uNodeIndex);
56 const unsigned uRight = tree.GetRight(uNodeIndex);
57 const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
58 const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
59 const unsigned uCount = uRightCount + uLeftCount;
60 LeavesUnderNode[uNodeIndex] = uCount;
61 return uCount;
62 }
63
CalcClustalWWeights(const Tree & tree,WEIGHT Weights[])64 void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
65 {
66 #if TRACE
67 Log("CalcClustalWWeights\n");
68 tree.LogMe();
69 #endif
70
71 const unsigned uLeafCount = tree.GetLeafCount();
72 if (0 == uLeafCount)
73 return;
74 else if (1 == uLeafCount)
75 {
76 Weights[0] = (WEIGHT) 1.0;
77 return;
78 }
79 else if (2 == uLeafCount)
80 {
81 Weights[0] = (WEIGHT) 0.5;
82 Weights[1] = (WEIGHT) 0.5;
83 return;
84 }
85
86 if (!tree.IsRooted())
87 Quit("CalcClustalWWeights requires rooted tree");
88
89 const unsigned uNodeCount = tree.GetNodeCount();
90 unsigned *LeavesUnderNode = new unsigned[uNodeCount];
91 memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
92
93 const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
94 unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
95 if (uLeavesUnderRoot != uLeafCount)
96 Quit("WeightsFromTreee: Internal error, root count %u %u",
97 uLeavesUnderRoot, uLeafCount);
98
99 #if TRACE
100 Log("Node Leaves Length Strength\n");
101 Log("---- ------ -------- --------\n");
102 // 1234 123456 12345678 12345678
103 #endif
104
105 double *Strengths = new double[uNodeCount];
106 for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
107 {
108 if (tree.IsRoot(uNodeIndex))
109 {
110 Strengths[uNodeIndex] = 0.0;
111 continue;
112 }
113 const unsigned uParent = tree.GetParent(uNodeIndex);
114 const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
115 const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
116 const double dStrength = dLength / (double) uLeaves;
117 Strengths[uNodeIndex] = dStrength;
118 #if TRACE
119 Log("%4u %6u %8g %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
120 #endif
121 }
122
123 #if TRACE
124 Log("\n");
125 Log(" Seq Path..Weight\n");
126 Log("-------------------- ------------\n");
127 #endif
128 for (unsigned n = 0; n < uLeafCount; ++n)
129 {
130 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
131 #if TRACE
132 Log("%20.20s %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
133 #endif
134 if (!tree.IsLeaf(uLeafNodeIndex))
135 Quit("CalcClustalWWeights: leaf");
136
137 double dWeight = 0;
138 unsigned uNode = uLeafNodeIndex;
139 while (!tree.IsRoot(uNode))
140 {
141 dWeight += Strengths[uNode];
142 uNode = tree.GetParent(uNode);
143 #if TRACE
144 Log("->%u(%g)", uNode, Strengths[uNode]);
145 #endif
146 }
147 if (dWeight < 0.0001)
148 {
149 #if TRACE
150 Log("zero->one");
151 #endif
152 dWeight = 1.0;
153 }
154 Weights[n] = (WEIGHT) dWeight;
155 #if TRACE
156 Log(" = %g\n", dWeight);
157 #endif
158 }
159
160 delete[] Strengths;
161 delete[] LeavesUnderNode;
162
163 Normalize(Weights, uLeafCount);
164 }
165
SetClustalWWeights(const Tree & tree)166 void MSA::SetClustalWWeights(const Tree &tree)
167 {
168 const unsigned uLeafCount = tree.GetLeafCount();
169
170 WEIGHT *Weights = new WEIGHT[uLeafCount];
171
172 CalcClustalWWeights(tree, Weights);
173
174 for (unsigned n = 0; n < uLeafCount; ++n)
175 {
176 const WEIGHT w = Weights[n];
177 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
178 const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
179 const unsigned uSeqIndex = GetSeqIndex(uId);
180 #if DEBUG
181 if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
182 Quit("MSA::SetClustalWWeights: names don't match");
183 #endif
184 SetSeqWeight(uSeqIndex, w);
185 }
186 NormalizeWeights((WEIGHT) 1.0);
187
188 delete[] Weights;
189 }
190