1 #include "muscle.h"
2 #include "tree.h"
3 #include "msa.h"
4 
5 /***
6 Compute weights by the CLUSTALW method.
7 Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;
8 see also CLUSTALW paper.
9 
10 Weights are computed from the edge lengths of a rooted tree.
11 
12 Define the strength of an edge to be its length divided by the number
13 of leaves under that edge. The weight of a sequence is then the sum
14 of edge strengths on the path from the root to the leaf.
15 
16 Example.
17 
18         0.2
19        -----A     0.1
20 	 -x         ------- B     0.7
21 	   --------y           ----------- C
22 	    0.3     ----------z
23                     0.4    -------------- D
24                                  0.8
25 
26 Edge	Length	Leaves	Strength
27 ----	-----	------	--------
28 xy		0.3		3		0.1
29 xA		0.2		1		0.2
30 yz		0.4		2		0.2
31 yB		0.1		1		0.1
32 zC		0.7		1		0.7
33 zD		0.8		1		0.8
34 
35 Leaf	Path		Strengths			Weight
36 ----	----		---------			------
37 A		xA			0.2					0.2
38 B		xy-yB		0.1 + 0.1			0.2
39 C		xy-yz-zC	0.1 + 0.2 + 0.7		1.0
40 D		xy-yz-zD	0.1 + 0.2 + 0.8		1.1
41 
42 ***/
43 
44 #define TRACE 0
45 
CountLeaves(const Tree & tree,unsigned uNodeIndex,unsigned LeavesUnderNode[])46 static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,
47   unsigned LeavesUnderNode[])
48 	{
49 	if (tree.IsLeaf(uNodeIndex))
50 		{
51 		LeavesUnderNode[uNodeIndex] = 1;
52 		return 1;
53 		}
54 
55 	const unsigned uLeft = tree.GetLeft(uNodeIndex);
56 	const unsigned uRight = tree.GetRight(uNodeIndex);
57 	const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);
58 	const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);
59 	const unsigned uCount = uRightCount + uLeftCount;
60 	LeavesUnderNode[uNodeIndex] = uCount;
61 	return uCount;
62 	}
63 
CalcClustalWWeights(const Tree & tree,WEIGHT Weights[])64 void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])
65 	{
66 #if	TRACE
67 	Log("CalcClustalWWeights\n");
68 	tree.LogMe();
69 #endif
70 
71 	const unsigned uLeafCount = tree.GetLeafCount();
72 	if (0 == uLeafCount)
73 		return;
74 	else if (1 == uLeafCount)
75 		{
76 		Weights[0] = (WEIGHT) 1.0;
77 		return;
78 		}
79 	else if (2 == uLeafCount)
80 		{
81 		Weights[0] = (WEIGHT) 0.5;
82 		Weights[1] = (WEIGHT) 0.5;
83 		return;
84 		}
85 
86 	if (!tree.IsRooted())
87 		Quit("CalcClustalWWeights requires rooted tree");
88 
89 	const unsigned uNodeCount = tree.GetNodeCount();
90 	unsigned *LeavesUnderNode = new unsigned[uNodeCount];
91 	memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));
92 
93 	const unsigned uRootNodeIndex = tree.GetRootNodeIndex();
94 	unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);
95 	if (uLeavesUnderRoot != uLeafCount)
96 		Quit("WeightsFromTreee: Internal error, root count %u %u",
97 		  uLeavesUnderRoot, uLeafCount);
98 
99 #if	TRACE
100 	Log("Node  Leaves    Length  Strength\n");
101 	Log("----  ------  --------  --------\n");
102 	//    1234  123456  12345678  12345678
103 #endif
104 
105 	double *Strengths = new double[uNodeCount];
106 	for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)
107 		{
108 		if (tree.IsRoot(uNodeIndex))
109 			{
110 			Strengths[uNodeIndex] = 0.0;
111 			continue;
112 			}
113 		const unsigned uParent = tree.GetParent(uNodeIndex);
114 		const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);
115 		const unsigned uLeaves = LeavesUnderNode[uNodeIndex];
116 		const double dStrength = dLength / (double) uLeaves;
117 		Strengths[uNodeIndex] = dStrength;
118 #if	TRACE
119 		Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);
120 #endif
121 		}
122 
123 #if	TRACE
124 	Log("\n");
125 	Log("                 Seq  Path..Weight\n");
126 	Log("--------------------  ------------\n");
127 #endif
128 	for (unsigned n = 0; n < uLeafCount; ++n)
129 		{
130 		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
131 #if	TRACE
132 		Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);
133 #endif
134 		if (!tree.IsLeaf(uLeafNodeIndex))
135 			Quit("CalcClustalWWeights: leaf");
136 
137 		double dWeight = 0;
138 		unsigned uNode = uLeafNodeIndex;
139 		while (!tree.IsRoot(uNode))
140 			{
141 			dWeight += Strengths[uNode];
142 			uNode = tree.GetParent(uNode);
143 #if	TRACE
144 			Log("->%u(%g)", uNode, Strengths[uNode]);
145 #endif
146 			}
147 		if (dWeight < 0.0001)
148 			{
149 #if	TRACE
150 			Log("zero->one");
151 #endif
152 			dWeight = 1.0;
153 			}
154 		Weights[n] = (WEIGHT) dWeight;
155 #if	TRACE
156 		Log(" = %g\n", dWeight);
157 #endif
158 		}
159 
160 	delete[] Strengths;
161 	delete[] LeavesUnderNode;
162 
163 	Normalize(Weights, uLeafCount);
164 	}
165 
SetClustalWWeights(const Tree & tree)166 void MSA::SetClustalWWeights(const Tree &tree)
167 	{
168 	const unsigned uLeafCount = tree.GetLeafCount();
169 
170     WEIGHT *Weights = new WEIGHT[uLeafCount];
171 
172 	CalcClustalWWeights(tree, Weights);
173 
174 	for (unsigned n = 0; n < uLeafCount; ++n)
175 		{
176 		const WEIGHT w = Weights[n];
177 		const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);
178 		const unsigned uId = tree.GetLeafId(uLeafNodeIndex);
179 		const unsigned uSeqIndex = GetSeqIndex(uId);
180 #if	DEBUG
181 		if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))
182 			Quit("MSA::SetClustalWWeights: names don't match");
183 #endif
184 		SetSeqWeight(uSeqIndex, w);
185 		}
186 	NormalizeWeights((WEIGHT) 1.0);
187 
188 	delete[] Weights;
189 	}
190