1 #include "muscle.h"
2 #include "profile.h"
3 #include "msa.h"
4 #include "pwpath.h"
5 #include "seqvect.h"
6 #include "clust.h"
7 #include "tree.h"
8 
9 #define TRACE 0
10 
11 struct Range
12 	{
13 	unsigned m_uBestColLeft;
14 	unsigned m_uBestColRight;
15 	};
16 
ListVertSavings(unsigned uColCount,unsigned uAnchorColCount,const Range * Ranges,unsigned uRangeCount)17 static void ListVertSavings(unsigned uColCount, unsigned uAnchorColCount,
18   const Range *Ranges, unsigned uRangeCount)
19 	{
20 	if (!g_bVerbose || !g_bAnchors)
21 		return;
22 	double dTotalArea = uColCount*uColCount;
23 	double dArea = 0.0;
24 	for (unsigned i = 0; i < uRangeCount; ++i)
25 		{
26 		unsigned uLength = Ranges[i].m_uBestColRight - Ranges[i].m_uBestColLeft;
27 		dArea += uLength*uLength;
28 		}
29 	double dPct = (dTotalArea - dArea)*100.0/dTotalArea;
30 	Log("Anchor columns found       %u\n", uAnchorColCount);
31 	Log("DP area saved by anchors   %-4.1f%%\n", dPct);
32 	}
33 
ColsToRanges(const unsigned BestCols[],unsigned uBestColCount,unsigned uColCount,Range Ranges[])34 static void ColsToRanges(const unsigned BestCols[], unsigned uBestColCount,
35   unsigned uColCount, Range Ranges[])
36 	{
37 // N best columns produces N+1 vertical blocks.
38 	const unsigned uRangeCount = uBestColCount + 1;
39 	for (unsigned uIndex = 0; uIndex < uRangeCount ; ++uIndex)
40 		{
41 		unsigned uBestColLeft = 0;
42 		if (uIndex > 0)
43 			uBestColLeft = BestCols[uIndex-1];
44 
45 		unsigned uBestColRight = uColCount;
46 		if (uIndex < uBestColCount)
47 			uBestColRight = BestCols[uIndex];
48 
49 		Ranges[uIndex].m_uBestColLeft = uBestColLeft;
50 		Ranges[uIndex].m_uBestColRight = uBestColRight;
51 		}
52 	}
53 
54 // Return true if any changes made
RefineVert(MSA & msaIn,const Tree & tree,unsigned uIters)55 bool RefineVert(MSA &msaIn, const Tree &tree, unsigned uIters)
56 	{
57 	bool bAnyChanges = false;
58 
59 	const unsigned uColCountIn = msaIn.GetColCount();
60 	const unsigned uSeqCountIn = msaIn.GetSeqCount();
61 
62 	if (uColCountIn < 3 || uSeqCountIn < 3)
63 		return false;
64 
65 	unsigned *AnchorCols = new unsigned[uColCountIn];
66 	unsigned uAnchorColCount;
67 	SetMSAWeightsMuscle(msaIn);
68 	FindAnchorCols(msaIn, AnchorCols, &uAnchorColCount);
69 
70 	const unsigned uRangeCount = uAnchorColCount + 1;
71 	Range *Ranges = new Range[uRangeCount];
72 
73 #if	TRACE
74 	Log("%u ranges\n", uRangeCount);
75 #endif
76 
77 	ColsToRanges(AnchorCols, uAnchorColCount, uColCountIn, Ranges);
78 	ListVertSavings(uColCountIn, uAnchorColCount, Ranges, uRangeCount);
79 
80 #if	TRACE
81 	{
82 	Log("Anchor cols: ");
83 	for (unsigned i = 0; i < uAnchorColCount; ++i)
84 		Log(" %u", AnchorCols[i]);
85 	Log("\n");
86 
87 	Log("Ranges:\n");
88 	for (unsigned i = 0; i < uRangeCount; ++i)
89 		Log("%4u - %4u\n", Ranges[i].m_uBestColLeft, Ranges[i].m_uBestColRight);
90 	}
91 #endif
92 
93 	delete[] AnchorCols;
94 
95 	MSA msaOut;
96 	msaOut.SetSize(uSeqCountIn, 0);
97 
98 	for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCountIn; ++uSeqIndex)
99 		{
100 		const char *ptrName = msaIn.GetSeqName(uSeqIndex);
101 		unsigned uId = msaIn.GetSeqId(uSeqIndex);
102 		msaOut.SetSeqName(uSeqIndex, ptrName);
103 		msaOut.SetSeqId(uSeqIndex, uId);
104 		}
105 
106 	for (unsigned uRangeIndex = 0; uRangeIndex < uRangeCount; ++uRangeIndex)
107 		{
108 		MSA msaRange;
109 
110 		const Range &r = Ranges[uRangeIndex];
111 
112 		const unsigned uFromColIndex = r.m_uBestColLeft;
113 		const unsigned uRangeColCount = r.m_uBestColRight - uFromColIndex;
114 
115 		if (0 == uRangeColCount)
116 			continue;
117 		else if (1 == uRangeColCount)
118 			{
119 			MSAFromColRange(msaIn, uFromColIndex, 1, msaRange);
120 			MSAAppend(msaOut, msaRange);
121 			continue;
122 			}
123 		MSAFromColRange(msaIn, uFromColIndex, uRangeColCount, msaRange);
124 
125 #if	TRACE
126 		Log("\n-------------\n");
127 		Log("Range %u - %u count=%u\n", r.m_uBestColLeft, r.m_uBestColRight, uRangeColCount);
128 		Log("Before:\n");
129 		msaRange.LogMe();
130 #endif
131 
132 		bool bLockLeft = (0 != uRangeIndex);
133 		bool bLockRight = (uRangeCount - 1 != uRangeIndex);
134 		bool bAnyChangesThisBlock = RefineHoriz(msaRange, tree, uIters, bLockLeft, bLockRight);
135 		bAnyChanges = (bAnyChanges || bAnyChangesThisBlock);
136 
137 #if	TRACE
138 		Log("After:\n");
139 		msaRange.LogMe();
140 #endif
141 
142 		MSAAppend(msaOut, msaRange);
143 
144 #if	TRACE
145 		Log("msaOut after Cat:\n");
146 		msaOut.LogMe();
147 #endif
148 		}
149 
150 #if	DEBUG
151 // Sanity check
152 	AssertMSAEqIgnoreCaseAndGaps(msaIn, msaOut);
153 #endif
154 
155 	delete[] Ranges;
156 	if (bAnyChanges)
157 		msaIn.Copy(msaOut);
158 	return bAnyChanges;
159 	}
160