1 #include "muscle.h"
2 #include "profile.h"
3 #include "msa.h"
4 #include "pwpath.h"
5 #include "seqvect.h"
6 #include "clust.h"
7 #include "tree.h"
8
9 #define TRACE 0
10
11 struct Range
12 {
13 unsigned m_uBestColLeft;
14 unsigned m_uBestColRight;
15 };
16
ListVertSavings(unsigned uColCount,unsigned uAnchorColCount,const Range * Ranges,unsigned uRangeCount)17 static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,
18 const Range *Ranges, unsigned uRangeCount)
19 {
20 if (!g_bVerbose || !g_bAnchors)
21 return;
22 double dTotalArea = uColCount*uColCount;
23 double dArea = 0.0;
24 for (unsigned i = 0; i < uRangeCount; ++i)
25 {
26 unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;
27 dArea += uLength*uLength;
28 }
29 double dPct = (dTotalArea - dArea)*100.0/dTotalArea;
30 Log("Anchor columns found %u\n", uAnchorColCount);
31 Log("DP area saved by anchors %-4.1f%%\n", dPct);
32 }
33
ColsToRanges(const unsigned BestCols[],unsigned uBestColCount,unsigned uColCount,Range Ranges[])34 static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,
35 unsigned uColCount, Range Ranges[])
36 {
37 // N best columns produces N+1 vertical blocks.
38 const unsigned uRangeCount = uBestColCount + 1;
39 for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)
40 {
41 unsigned uBestColLeft = 0;
42 if (uIndex > 0)
43 uBestColLeft = BestCols[uIndex-1];
44
45 unsigned uBestColRight = uColCount;
46 if (uIndex < uBestColCount)
47 uBestColRight = BestCols[uIndex];
48
49 Ranges[uIndex].m_uBestColLeft = uBestColLeft;
50 Ranges[uIndex].m_uBestColRight = uBestColRight;
51 }
52 }
53
54 // Return true if any changes made
RefineVert(MSA & msaIn,const Tree & tree,unsigned uIters)55 bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
56 {
57 bool bAnyChanges = false;
58
59 const unsigned uColCountIn = msaIn.GetColCount();
60 const unsigned uSeqCountIn = msaIn.GetSeqCount();
61
62 if (uColCountIn < 3 || uSeqCountIn < 3)
63 return false;
64
65 unsigned *AnchorCols = new unsigned[uColCountIn];
66 unsigned uAnchorColCount;
67 SetMSAWeightsMuscle(msaIn);
68 FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);
69
70 const unsigned uRangeCount = uAnchorColCount + 1;
71 Range *Ranges = new Range[uRangeCount];
72
73 #if TRACE
74 Log("%u ranges\n", uRangeCount);
75 #endif
76
77 ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
78 ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);
79
80 #if TRACE
81 {
82 Log("Anchor cols: ");
83 for (unsigned i = 0; i < uAnchorColCount; ++i)
84 Log(" %u", AnchorCols[i]);
85 Log("\n");
86
87 Log("Ranges:\n");
88 for (unsigned i = 0; i < uRangeCount; ++i)
89 Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
90 }
91 #endif
92
93 delete[] AnchorCols;
94
95 MSA msaOut;
96 msaOut.SetSize(uSeqCountIn, 0);
97
98 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
99 {
100 const char *ptrName = msaIn.GetSeqName(uSeqIndex);
101 unsigned uId = msaIn.GetSeqId(uSeqIndex);
102 msaOut.SetSeqName(uSeqIndex, ptrName);
103 msaOut.SetSeqId(uSeqIndex, uId);
104 }
105
106 for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
107 {
108 MSA msaRange;
109
110 const Range &r = Ranges[uRangeIndex];
111
112 const unsigned uFromColIndex = r.m_uBestColLeft;
113 const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;
114
115 if (0 == uRangeColCount)
116 continue;
117 else if (1 == uRangeColCount)
118 {
119 MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
120 MSAAppend(msaOut, msaRange);
121 continue;
122 }
123 MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);
124
125 #if TRACE
126 Log("\n-------------\n");
127 Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
128 Log("Before:\n");
129 msaRange.LogMe();
130 #endif
131
132 bool bLockLeft = (0 != uRangeIndex);
133 bool bLockRight = (uRangeCount - 1 != uRangeIndex);
134 bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
135 bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);
136
137 #if TRACE
138 Log("After:\n");
139 msaRange.LogMe();
140 #endif
141
142 MSAAppend(msaOut, msaRange);
143
144 #if TRACE
145 Log("msaOut after Cat:\n");
146 msaOut.LogMe();
147 #endif
148 }
149
150 #if DEBUG
151 // Sanity check
152 AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
153 #endif
154
155 delete[] Ranges;
156 if (bAnyChanges)
157 msaIn.Copy(msaOut);
158 return bAnyChanges;
159 }
160