1 /**
2 * Author: Mark Larkin
3 *
4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5 */
6 #ifdef HAVE_CONFIG_H
7 #include "config.h"
8 #endif
9 #include "ClusterTreeOutput.h"
10
11 namespace clustalw
12 {
13
ClusterTreeOutput(SeqInfo * seqInfo,int boot)14 ClusterTreeOutput::ClusterTreeOutput(SeqInfo* seqInfo, int boot)
15 : bootstrap(boot)
16 {
17 firstSeq = seqInfo->firstSeq;
18 lastSeq = seqInfo->lastSeq;
19 numSeqs = seqInfo->numSeqs;
20 }
21
printTreeDesc(PhyloTree * phyloTree)22 void ClusterTreeOutput::printTreeDesc(PhyloTree* phyloTree)
23 {
24 for(int i = 1; i <= numSeqs; i++)
25 {
26 for(int j = 1; j <= numSeqs; j++)
27 {
28 cout << " " << phyloTree->treeDesc[i][j];
29 }
30 cout << "\n";
31 }
32 }
33
34
35 /**
36 * The function printPhylipTree is used to print out the unrooted clustered tree in
37 * phylip format.
38 * @param phyloTree A pointer to the PhyloTree struct that contains the description of the
39 * tree.
40 * @param tree The file to print the phylip tree to. Must be open!
41 * @param alignPtr The alignment object. Needed for the names.
42 * @param distMat The distance matrix that was used for the generation of the cluster.
43 * @param bootTotals Holds the bootstrap values. Only used if the tree has been bootstrapped
44 */
printPhylipTree(PhyloTree * phyloTree,ofstream * tree,Alignment * alignPtr,DistMatrix * distMat,vector<int> * bootTotals)45 void ClusterTreeOutput::printPhylipTree(PhyloTree* phyloTree, ofstream* tree,
46 Alignment *alignPtr, DistMatrix* distMat, vector<int>* bootTotals)
47 {
48 int oldRow;
49 if (lastSeq - firstSeq + 1 == 2)
50 {
51 (*tree) << "(" << alignPtr->getName(firstSeq) << ":" << fixed <<setprecision(5)
52 << (*distMat)(firstSeq, firstSeq + 1) << "," << alignPtr->getName(firstSeq + 1)
53 << ":" << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1)
54 << ");";
55 return ;
56 }
57
58 (*tree) << "(\n";
59
60 oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
61 1, alignPtr, bootTotals);
62
63 (*tree) << ":" << fixed << setprecision(5)
64 << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 2];
65
66 if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
67 ((*bootTotals)[oldRow] > 0))
68 {
69 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
70 }
71 (*tree) << ",\n";
72
73 oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
74 2, alignPtr, bootTotals);
75
76 (*tree) << ":" << fixed << setprecision(5)
77 << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 1];
78
79 if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
80 ((*bootTotals)[oldRow] > 0))
81 {
82 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
83 }
84 (*tree) << ",\n";
85
86 oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
87 3, alignPtr, bootTotals);
88
89 (*tree) << ":" << fixed << setprecision(5)
90 << phyloTree->leftBranch[lastSeq - firstSeq + 1];
91
92 if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
93 ((*bootTotals)[oldRow] > 0))
94 {
95 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
96 }
97 (*tree) << ")";
98
99 if (bootstrap == BS_NODE_LABELS)
100 {
101 (*tree) << "TRICHOTOMY";
102 }
103 (*tree) << ";\n";
104 }
105
twoWaySplit(PhyloTree * phyloTree,ofstream * tree,int startRow,int flag,Alignment * alignPtr,vector<int> * bootTotals)106 int ClusterTreeOutput::twoWaySplit(PhyloTree* phyloTree, ofstream* tree,
107 int startRow, int flag, Alignment *alignPtr, vector<int>* bootTotals)
108 {
109 int row, newRow = 0, oldRow, col, testCol = 0;
110 bool singleSeq;
111
112 if (startRow != lastSeq - firstSeq + 1-2)
113 {
114 (*tree) << "(\n";
115 }
116
117 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
118 {
119 if (phyloTree->treeDesc[startRow][col] == flag)
120 {
121 testCol = col;
122 break;
123 }
124 }
125
126 singleSeq = true;
127 for (row = startRow - 1; row >= 1; row--)
128 if (phyloTree->treeDesc[row][testCol] == 1)
129 {
130 singleSeq = false;
131 newRow = row;
132 break;
133 }
134
135 if (singleSeq)
136 {
137 phyloTree->treeDesc[startRow][testCol] = 0;
138 (*tree) << alignPtr->getName(testCol + firstSeq - 1);
139 if (startRow == lastSeq - firstSeq + 1 - 2)
140 {
141 return (0);
142 }
143
144 (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow]
145 << ",\n";
146 }
147 else
148 {
149 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
150 {
151 if ((phyloTree->treeDesc[startRow][col] == 1) &&
152 (phyloTree->treeDesc[newRow][col] == 1))
153 {
154 phyloTree->treeDesc[startRow][col] = 0;
155 }
156 }
157 oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
158
159 if (startRow == lastSeq - firstSeq + 1-2)
160 {
161 return (newRow);
162 }
163
164 (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow];
165
166 if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
167 {
168 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
169 }
170
171 (*tree) << ",\n";
172 }
173
174
175 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
176 if (phyloTree->treeDesc[startRow][col] == flag)
177 {
178 testCol = col;
179 break;
180 }
181
182 singleSeq = true;
183 newRow = 0;
184 for (row = startRow - 1; row >= 1; row--)
185 if (phyloTree->treeDesc[row][testCol] == 1)
186 {
187 singleSeq = false;
188 newRow = row;
189 break;
190 }
191
192 if (singleSeq)
193 {
194 phyloTree->treeDesc[startRow][testCol] = 0;
195 (*tree) << alignPtr->getName(testCol + firstSeq - 1);
196 (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow]
197 <<")\n";
198 }
199 else
200 {
201 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
202 {
203 if ((phyloTree->treeDesc[startRow][col] == 1) &&
204 (phyloTree->treeDesc[newRow][col] == 1))
205 {
206 phyloTree->treeDesc[startRow][col] = 0;
207 }
208 }
209 oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
210 (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow];
211
212 if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
213 {
214 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
215 }
216
217 (*tree) << ")\n";
218 }
219 if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0))
220 {
221 (*tree) << (*bootTotals)[startRow];
222 }
223
224 return (startRow);
225 }
226
printNexusTree(PhyloTree * phyloTree,ofstream * tree,Alignment * alignPtr,DistMatrix * distMat,vector<int> * bootTotals)227 void ClusterTreeOutput::printNexusTree(PhyloTree* phyloTree, ofstream* tree,
228 Alignment *alignPtr, DistMatrix* distMat, vector<int>* bootTotals)
229 {
230 int i;
231 int oldRow;
232
233 (*tree) << "#NEXUS\n\n";
234
235 (*tree) << "BEGIN TREES;\n\n";
236 (*tree) << "\tTRANSLATE\n";
237
238 for(i = 1; i < numSeqs; i++)
239 {
240 (*tree) << "\t\t" << i << "\t" << alignPtr->getName(i) <<",\n";
241 }
242 (*tree) << "\t\t" << numSeqs << "\t" << alignPtr->getName(numSeqs) <<"\n";
243 (*tree) << "\t\t;\n";
244
245 (*tree) << "\tUTREE PAUP_1= ";
246
247 if(lastSeq - firstSeq + 1 == 2)
248 {
249 (*tree) << "(" << firstSeq << ":" << fixed << setprecision(5)
250 << (*distMat)(firstSeq, firstSeq + 1) << "," << firstSeq + 1 << ":"
251 << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ")";
252 }
253 else
254 {
255
256 (*tree) << "(";
257
258 oldRow = twoWaySplitNexus(phyloTree, tree,
259 lastSeq - firstSeq + 1 - 2, 1, alignPtr, bootTotals);
260
261 (*tree) << ":" << fixed << setprecision(5)
262 << phyloTree->leftBranch[lastSeq-firstSeq+1-2];
263
264 if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow]>0))
265 {
266 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
267 }
268
269 (*tree) << ",";
270
271 oldRow = twoWaySplitNexus(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 2,
272 alignPtr, bootTotals);
273 (*tree) << ":" << fixed << setprecision(5)
274 << phyloTree->leftBranch[lastSeq-firstSeq+1-1];
275
276 if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0))
277 {
278 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
279 }
280
281 (*tree) << ",";
282
283 oldRow = twoWaySplitNexus(phyloTree, tree,
284 lastSeq-firstSeq+1-2, 3, alignPtr, bootTotals);
285
286 (*tree) << ":" << fixed << setprecision(5)
287 << phyloTree->leftBranch[lastSeq-firstSeq+1];
288
289 if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0))
290 {
291 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
292 }
293
294 (*tree) << ")";
295
296 if (bootstrap == BS_NODE_LABELS)
297 {
298 (*tree) << "TRICHOTOMY";
299 }
300 (*tree) << ";";
301 }
302 (*tree) << "\nENDBLOCK;\n";
303 }
304
twoWaySplitNexus(PhyloTree * phyloTree,ofstream * tree,int startRow,int flag,Alignment * alignPtr,vector<int> * bootTotals)305 int ClusterTreeOutput::twoWaySplitNexus(PhyloTree* phyloTree, ofstream* tree,
306 int startRow, int flag, Alignment *alignPtr, vector<int>* bootTotals)
307 {
308 int row, newRow = 0, oldRow, col, testCol = 0;
309 bool singleSeq;
310
311 if (startRow != lastSeq - firstSeq + 1 - 2)
312 {
313 (*tree) << "(";
314 }
315
316 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
317 {
318 if (phyloTree->treeDesc[startRow][col] == flag)
319 {
320 testCol = col;
321 break;
322 }
323 }
324
325 singleSeq = true;
326 for (row = startRow - 1; row >= 1; row--)
327 if (phyloTree->treeDesc[row][testCol] == 1)
328 {
329 singleSeq = false;
330 newRow = row;
331 break;
332 }
333
334 if (singleSeq)
335 {
336 phyloTree->treeDesc[startRow][testCol] = 0;
337 (*tree) << testCol + firstSeq - 1;;
338 if (startRow == lastSeq - firstSeq + 1-2)
339 {
340 return (0);
341 }
342
343 (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow] << ",";
344 }
345 else
346 {
347 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
348 {
349 if ((phyloTree->treeDesc[startRow][col] == 1) &&
350 (phyloTree->treeDesc[newRow][col] == 1))
351 {
352 phyloTree->treeDesc[startRow][col] = 0;
353 }
354 }
355 oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
356
357 if (startRow == lastSeq - firstSeq + 1-2)
358 {
359 return (newRow);
360 }
361
362 (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow];
363 if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
364 {
365 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
366 }
367
368 (*tree) << ",";
369 }
370
371
372 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
373 if (phyloTree->treeDesc[startRow][col] == flag)
374 {
375 testCol = col;
376 break;
377 }
378
379 singleSeq = true;
380 newRow = 0;
381 for (row = startRow - 1; row >= 1; row--)
382 if (phyloTree->treeDesc[row][testCol] == 1)
383 {
384 singleSeq = false;
385 newRow = row;
386 break;
387 }
388
389 if (singleSeq)
390 {
391 phyloTree->treeDesc[startRow][testCol] = 0;
392 (*tree) << testCol + firstSeq - 1;
393 (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow] << ")";
394 }
395 else
396 {
397 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
398 {
399 if ((phyloTree->treeDesc[startRow][col] == 1) &&
400 (phyloTree->treeDesc[newRow][col] == 1))
401 {
402 phyloTree->treeDesc[startRow][col] = 0;
403 }
404 }
405 oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
406
407 (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow];
408 if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
409 {
410 (*tree) << "[" << (*bootTotals)[oldRow] << "]";
411 }
412
413 (*tree) << ")";
414 }
415 if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0))
416 {
417 (*tree) << (*bootTotals)[startRow];
418 }
419
420 return (startRow);
421 }
422
printTree(PhyloTree * phyloTree,ofstream * tree,vector<int> * totals)423 void ClusterTreeOutput::printTree(PhyloTree* phyloTree, ofstream* tree,
424 vector<int>* totals)
425 {
426 int row, col;
427
428 (*tree) << "\n";
429
430 for (row = 1; row <= lastSeq - firstSeq + 1 - 3; row++)
431 {
432 (*tree) << " \n";
433 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
434 {
435 if (phyloTree->treeDesc[row][col] == 0)
436 {
437 (*tree) << "*";
438 }
439 else
440 {
441 (*tree) << ".";
442 }
443 }
444 if ((*totals)[row] > 0)
445 {
446 (*tree) << setw(7) << (*totals)[row];
447 }
448 }
449 (*tree) << " \n";
450 for (col = 1; col <= lastSeq - firstSeq + 1; col++)
451 {
452 (*tree) << setw(1) << phyloTree->treeDesc[lastSeq - firstSeq + 1 -2][col];
453 }
454 (*tree) << "\n";
455 }
456
457 }
458