1 /**
2  * Author: Mark Larkin
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  */
6 #ifdef HAVE_CONFIG_H
7     #include "config.h"
8 #endif
9 #include "ClusterTreeOutput.h"
10 
11 namespace clustalw
12 {
13 
ClusterTreeOutput(SeqInfo * seqInfo,int boot)14 ClusterTreeOutput::ClusterTreeOutput(SeqInfo* seqInfo, int boot)
15 : bootstrap(boot)
16 {
17     firstSeq = seqInfo->firstSeq;
18     lastSeq = seqInfo->lastSeq;
19     numSeqs = seqInfo->numSeqs;
20 }
21 
printTreeDesc(PhyloTree * phyloTree)22 void ClusterTreeOutput::printTreeDesc(PhyloTree* phyloTree)
23 {
24     for(int i = 1; i <= numSeqs; i++)
25     {
26         for(int j = 1; j <= numSeqs; j++)
27         {
28             cout << " " << phyloTree->treeDesc[i][j];
29         }
30         cout << "\n";
31     }
32 }
33 
34 
35 /**
36  * The function printPhylipTree is used to print out the unrooted clustered tree in
37  * phylip format.
38  * @param phyloTree A pointer to the PhyloTree struct that contains the description of the
39  *                  tree.
40  * @param tree The file to print the phylip tree to. Must be open!
41  * @param alignPtr The alignment object. Needed for the names.
42  * @param distMat The distance matrix that was used for the generation of the cluster.
43  * @param bootTotals Holds the bootstrap values. Only used if the tree has been bootstrapped
44  */
printPhylipTree(PhyloTree * phyloTree,ofstream * tree,Alignment * alignPtr,DistMatrix * distMat,vector<int> * bootTotals)45 void ClusterTreeOutput::printPhylipTree(PhyloTree* phyloTree, ofstream* tree,
46       Alignment *alignPtr, DistMatrix* distMat, vector<int>* bootTotals)
47 {
48     int oldRow;
49     if (lastSeq - firstSeq + 1 == 2)
50     {
51         (*tree) << "(" << alignPtr->getName(firstSeq) << ":" << fixed <<setprecision(5)
52                 << (*distMat)(firstSeq, firstSeq + 1) << "," << alignPtr->getName(firstSeq + 1)
53                 << ":" << fixed << setprecision(5)  << (*distMat)(firstSeq, firstSeq + 1)
54                 << ");";
55         return ;
56     }
57 
58     (*tree) << "(\n";
59 
60     oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
61         1, alignPtr, bootTotals);
62 
63     (*tree) << ":" << fixed << setprecision(5)
64             << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 2];
65 
66     if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
67         ((*bootTotals)[oldRow] > 0))
68     {
69         (*tree) << "[" << (*bootTotals)[oldRow] << "]";
70     }
71     (*tree) << ",\n";
72 
73     oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
74                          2, alignPtr, bootTotals);
75 
76     (*tree) << ":" << fixed << setprecision(5)
77             << phyloTree->leftBranch[lastSeq - firstSeq + 1 - 1];
78 
79     if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
80         ((*bootTotals)[oldRow] > 0))
81     {
82         (*tree) << "[" << (*bootTotals)[oldRow] << "]";
83     }
84     (*tree) << ",\n";
85 
86     oldRow = twoWaySplit(phyloTree, tree, lastSeq - firstSeq + 1 - 2,
87                          3, alignPtr, bootTotals);
88 
89     (*tree) << ":" << fixed << setprecision(5)
90             << phyloTree->leftBranch[lastSeq - firstSeq + 1];
91 
92     if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) &&
93         ((*bootTotals)[oldRow] > 0))
94     {
95         (*tree) << "[" << (*bootTotals)[oldRow] << "]";
96     }
97     (*tree) << ")";
98 
99     if (bootstrap == BS_NODE_LABELS)
100     {
101         (*tree) << "TRICHOTOMY";
102     }
103     (*tree) << ";\n";
104 }
105 
twoWaySplit(PhyloTree * phyloTree,ofstream * tree,int startRow,int flag,Alignment * alignPtr,vector<int> * bootTotals)106 int ClusterTreeOutput::twoWaySplit(PhyloTree* phyloTree, ofstream* tree,
107         int startRow, int flag, Alignment *alignPtr, vector<int>* bootTotals)
108 {
109     int row, newRow = 0, oldRow, col, testCol = 0;
110     bool singleSeq;
111 
112     if (startRow != lastSeq - firstSeq + 1-2)
113     {
114         (*tree) <<  "(\n";
115     }
116 
117     for (col = 1; col <= lastSeq - firstSeq + 1; col++)
118     {
119         if (phyloTree->treeDesc[startRow][col] == flag)
120         {
121             testCol = col;
122             break;
123         }
124     }
125 
126     singleSeq = true;
127     for (row = startRow - 1; row >= 1; row--)
128     if (phyloTree->treeDesc[row][testCol] == 1)
129     {
130         singleSeq = false;
131         newRow = row;
132         break;
133     }
134 
135     if (singleSeq)
136     {
137         phyloTree->treeDesc[startRow][testCol] = 0;
138         (*tree) << alignPtr->getName(testCol + firstSeq - 1);
139         if (startRow == lastSeq - firstSeq + 1 - 2)
140         {
141             return (0);
142         }
143 
144         (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow]
145                 << ",\n";
146     }
147     else
148     {
149         for (col = 1; col <= lastSeq - firstSeq + 1; col++)
150         {
151             if ((phyloTree->treeDesc[startRow][col] == 1) &&
152                 (phyloTree->treeDesc[newRow][col] == 1))
153             {
154                 phyloTree->treeDesc[startRow][col] = 0;
155             }
156         }
157         oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
158 
159         if (startRow == lastSeq - firstSeq + 1-2)
160         {
161             return (newRow);
162         }
163 
164         (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow];
165 
166         if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
167         {
168             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
169         }
170 
171         (*tree) << ",\n";
172     }
173 
174 
175     for (col = 1; col <= lastSeq - firstSeq + 1; col++)
176     if (phyloTree->treeDesc[startRow][col] == flag)
177     {
178         testCol = col;
179         break;
180     }
181 
182     singleSeq = true;
183     newRow = 0;
184     for (row = startRow - 1; row >= 1; row--)
185     if (phyloTree->treeDesc[row][testCol] == 1)
186     {
187         singleSeq = false;
188         newRow = row;
189         break;
190     }
191 
192     if (singleSeq)
193     {
194         phyloTree->treeDesc[startRow][testCol] = 0;
195         (*tree) << alignPtr->getName(testCol + firstSeq - 1);
196         (*tree) << ":" << fixed << setprecision(5)  << phyloTree->rightBranch[startRow]
197                 <<")\n";
198     }
199     else
200     {
201         for (col = 1; col <= lastSeq - firstSeq + 1; col++)
202         {
203             if ((phyloTree->treeDesc[startRow][col] == 1) &&
204                 (phyloTree->treeDesc[newRow][col] == 1))
205             {
206                 phyloTree->treeDesc[startRow][col] = 0;
207             }
208         }
209         oldRow = twoWaySplit(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
210         (*tree) << ":" << fixed << setprecision(5)  << phyloTree->rightBranch[startRow];
211 
212         if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
213         {
214             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
215         }
216 
217         (*tree) << ")\n";
218     }
219     if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0))
220     {
221         (*tree) << (*bootTotals)[startRow];
222     }
223 
224     return (startRow);
225 }
226 
printNexusTree(PhyloTree * phyloTree,ofstream * tree,Alignment * alignPtr,DistMatrix * distMat,vector<int> * bootTotals)227 void ClusterTreeOutput::printNexusTree(PhyloTree* phyloTree, ofstream* tree,
228                    Alignment *alignPtr, DistMatrix* distMat, vector<int>* bootTotals)
229 {
230     int i;
231     int oldRow;
232 
233     (*tree) << "#NEXUS\n\n";
234 
235     (*tree) << "BEGIN TREES;\n\n";
236     (*tree) << "\tTRANSLATE\n";
237 
238     for(i = 1; i < numSeqs; i++)
239     {
240         (*tree) << "\t\t" << i << "\t" << alignPtr->getName(i) <<",\n";
241     }
242     (*tree) << "\t\t" << numSeqs << "\t" << alignPtr->getName(numSeqs) <<"\n";
243     (*tree) << "\t\t;\n";
244 
245     (*tree) << "\tUTREE PAUP_1= ";
246 
247     if(lastSeq - firstSeq + 1 == 2)
248     {
249         (*tree) << "(" << firstSeq << ":" << fixed << setprecision(5)
250         << (*distMat)(firstSeq, firstSeq + 1) << "," << firstSeq + 1 << ":"
251         << fixed << setprecision(5) << (*distMat)(firstSeq, firstSeq + 1) << ")";
252     }
253     else
254     {
255 
256         (*tree) << "(";
257 
258         oldRow = twoWaySplitNexus(phyloTree, tree,
259                        lastSeq - firstSeq + 1 - 2, 1, alignPtr, bootTotals);
260 
261         (*tree) << ":" << fixed << setprecision(5)
262                 << phyloTree->leftBranch[lastSeq-firstSeq+1-2];
263 
264         if ((bootstrap == BS_BRANCH_LABELS) && (oldRow > 0) && ((*bootTotals)[oldRow]>0))
265         {
266             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
267         }
268 
269         (*tree) << ",";
270 
271         oldRow = twoWaySplitNexus(phyloTree, tree, lastSeq - firstSeq + 1 - 2, 2,
272                                   alignPtr, bootTotals);
273         (*tree) << ":" << fixed << setprecision(5)
274                        << phyloTree->leftBranch[lastSeq-firstSeq+1-1];
275 
276         if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0))
277         {
278             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
279         }
280 
281         (*tree) << ",";
282 
283         oldRow = twoWaySplitNexus(phyloTree, tree,
284                  lastSeq-firstSeq+1-2, 3, alignPtr, bootTotals);
285 
286         (*tree) << ":" << fixed << setprecision(5)
287                 << phyloTree->leftBranch[lastSeq-firstSeq+1];
288 
289         if ((bootstrap==BS_BRANCH_LABELS) && (oldRow>0) && ((*bootTotals)[oldRow]>0))
290         {
291             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
292         }
293 
294         (*tree) << ")";
295 
296         if (bootstrap == BS_NODE_LABELS)
297         {
298             (*tree) << "TRICHOTOMY";
299         }
300         (*tree) << ";";
301     }
302     (*tree) << "\nENDBLOCK;\n";
303 }
304 
twoWaySplitNexus(PhyloTree * phyloTree,ofstream * tree,int startRow,int flag,Alignment * alignPtr,vector<int> * bootTotals)305 int ClusterTreeOutput::twoWaySplitNexus(PhyloTree* phyloTree, ofstream* tree,
306                 int startRow, int flag, Alignment *alignPtr, vector<int>* bootTotals)
307 {
308     int row, newRow = 0, oldRow, col, testCol = 0;
309     bool singleSeq;
310 
311     if (startRow != lastSeq - firstSeq + 1 - 2)
312     {
313         (*tree) <<  "(";
314     }
315 
316     for (col = 1; col <= lastSeq - firstSeq + 1; col++)
317     {
318         if (phyloTree->treeDesc[startRow][col] == flag)
319         {
320             testCol = col;
321             break;
322         }
323     }
324 
325     singleSeq = true;
326     for (row = startRow - 1; row >= 1; row--)
327     if (phyloTree->treeDesc[row][testCol] == 1)
328     {
329         singleSeq = false;
330         newRow = row;
331         break;
332     }
333 
334     if (singleSeq)
335     {
336         phyloTree->treeDesc[startRow][testCol] = 0;
337         (*tree) << testCol + firstSeq - 1;;
338         if (startRow == lastSeq - firstSeq + 1-2)
339         {
340             return (0);
341         }
342 
343         (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow] << ",";
344     }
345     else
346     {
347         for (col = 1; col <= lastSeq - firstSeq + 1; col++)
348         {
349             if ((phyloTree->treeDesc[startRow][col] == 1) &&
350                 (phyloTree->treeDesc[newRow][col] == 1))
351             {
352                 phyloTree->treeDesc[startRow][col] = 0;
353             }
354         }
355         oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
356 
357         if (startRow == lastSeq - firstSeq + 1-2)
358         {
359             return (newRow);
360         }
361 
362         (*tree) << ":" << fixed << setprecision(5) << phyloTree->leftBranch[startRow];
363         if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
364         {
365             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
366         }
367 
368         (*tree) << ",";
369     }
370 
371 
372     for (col = 1; col <= lastSeq - firstSeq + 1; col++)
373     if (phyloTree->treeDesc[startRow][col] == flag)
374     {
375         testCol = col;
376         break;
377     }
378 
379     singleSeq = true;
380     newRow = 0;
381     for (row = startRow - 1; row >= 1; row--)
382     if (phyloTree->treeDesc[row][testCol] == 1)
383     {
384         singleSeq = false;
385         newRow = row;
386         break;
387     }
388 
389     if (singleSeq)
390     {
391         phyloTree->treeDesc[startRow][testCol] = 0;
392         (*tree) << testCol + firstSeq - 1;
393         (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow] << ")";
394     }
395     else
396     {
397         for (col = 1; col <= lastSeq - firstSeq + 1; col++)
398         {
399             if ((phyloTree->treeDesc[startRow][col] == 1) &&
400                 (phyloTree->treeDesc[newRow][col] == 1))
401             {
402                 phyloTree->treeDesc[startRow][col] = 0;
403             }
404         }
405         oldRow = twoWaySplitNexus(phyloTree, tree, newRow, 1, alignPtr, bootTotals);
406 
407         (*tree) << ":" << fixed << setprecision(5) << phyloTree->rightBranch[startRow];
408         if ((bootstrap == BS_BRANCH_LABELS) && ((*bootTotals)[oldRow] > 0))
409         {
410             (*tree) << "[" << (*bootTotals)[oldRow] << "]";
411         }
412 
413         (*tree) << ")";
414     }
415     if ((bootstrap == BS_NODE_LABELS) && ((*bootTotals)[startRow] > 0))
416     {
417         (*tree) << (*bootTotals)[startRow];
418     }
419 
420     return (startRow);
421 }
422 
printTree(PhyloTree * phyloTree,ofstream * tree,vector<int> * totals)423 void ClusterTreeOutput::printTree(PhyloTree* phyloTree, ofstream* tree,
424                                   vector<int>* totals)
425 {
426     int row, col;
427 
428     (*tree) << "\n";
429 
430     for (row = 1; row <= lastSeq - firstSeq + 1 - 3; row++)
431     {
432         (*tree) << " \n";
433         for (col = 1; col <= lastSeq - firstSeq + 1; col++)
434         {
435             if (phyloTree->treeDesc[row][col] == 0)
436             {
437                 (*tree) << "*";
438             }
439             else
440             {
441                 (*tree) << ".";
442             }
443         }
444         if ((*totals)[row] > 0)
445         {
446             (*tree) << setw(7) << (*totals)[row];
447         }
448     }
449     (*tree) << " \n";
450     for (col = 1; col <= lastSeq - firstSeq + 1; col++)
451     {
452         (*tree) << setw(1) << phyloTree->treeDesc[lastSeq - firstSeq + 1 -2][col];
453     }
454     (*tree) << "\n";
455 }
456 
457 }
458