1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 #ifndef SEQAN_HEADER_GRAPH_ALIGN_BANDED_SMITH_WATERMAN_CLUMP_H
34 #define SEQAN_HEADER_GRAPH_ALIGN_BANDED_SMITH_WATERMAN_CLUMP_H
35
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38
39 //////////////////////////////////////////////////////////////////////////////
40
41 template<typename TString>
42 inline void
_initAlign(Graph<Alignment<StringSet<TString,Dependent<>>>> & align,StringSet<TString,Dependent<>> const & str)43 _initAlign(Graph<Alignment<StringSet<TString, Dependent<> > > >& align,
44 StringSet<TString, Dependent<> > const& str) {
45 SEQAN_CHECKPOINT
46 assignStringSet(align, str);
47 }
48
49 template<typename TString, typename TPos>
50 inline void
_finishAlign(Graph<Alignment<StringSet<TString,Dependent<>>>> &,TPos,TPos,TPos,TPos)51 _finishAlign(Graph<Alignment<StringSet<TString, Dependent<> > > >&,
52 TPos,
53 TPos,
54 TPos,
55 TPos) {
56 SEQAN_CHECKPOINT
57 // Nothing to be done?
58 //stringSet(g)[0] = infix(stringSet(g)[0], begin1, end1);
59 //stringSet(g)[1] = infix(stringSet(g)[1], begin2, end2);
60 }
61
62 //////////////////////////////////////////////////////////////////////////////
63
64 template <typename TStringSet, typename TAlign, typename TTrace, typename TVal, typename TIndexPair, typename TDiagonal, typename TForbidden>
65 inline void
_alignBandedSmithWatermanTrace(TStringSet const & str,TAlign & align,TTrace const & trace,TVal const initialDir,TIndexPair const & indexPair,TDiagonal diagL,TDiagonal diagU,TForbidden & forbidden)66 _alignBandedSmithWatermanTrace(TStringSet const& str,
67 TAlign& align,
68 TTrace const& trace,
69 TVal const initialDir,
70 TIndexPair const& indexPair,
71 TDiagonal diagL,
72 TDiagonal diagU,
73 TForbidden& forbidden) {
74 SEQAN_CHECKPOINT
75 typedef typename Value<TStringSet>::Type TString;
76 typedef typename Id<TStringSet>::Type TId;
77 typedef typename Size<TTrace>::Type TSize;
78 typedef typename Value<TTrace>::Type TTraceValue;
79
80 // Traceback values
81 TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3;
82
83 // Initialization
84 _initAlign(align, str);
85 TString const& str1 = str[0];
86 TString const& str2 = str[1];
87 TId id1 = positionToId(const_cast<TStringSet&>(str), 0);
88 TId id2 = positionToId(const_cast<TStringSet&>(str), 1);
89 TSize len1 = length(str1);
90 TSize len2 = length(str2);
91 TSize lo_row = (diagU <= 0) ? -diagU : 0;
92 TSize diagonalWidth = (TSize) (diagU - diagL + 1);
93
94 // Start the trace from the cell with the max value
95 TSize row = indexPair[0];
96 TSize col = indexPair[1];
97 TSize endRow = indexPair[0] + lo_row;
98 TSize endCol = indexPair[1] + diagL + endRow;
99 TSize actualRow = row + lo_row;
100 TSize actualCol = col + diagL + actualRow;
101 if ((actualCol == 0) || (actualRow == 0)) return;
102 if (actualCol < len1) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, len1 - actualCol, Horizontal);
103 if (actualRow < len2) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, len2 - actualRow, Vertical);
104 _setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
105
106 TTraceValue traceValue = initialDir;
107 TTraceValue nextTraceValue = traceValue;
108 if (traceValue == Diagonal) nextTraceValue = trace[(--row) * diagonalWidth + col];
109 else if (traceValue == Vertical) nextTraceValue = trace[(--row) * diagonalWidth + (++col)];
110 else if (traceValue == Horizontal) nextTraceValue = trace[row * diagonalWidth + (--col)];
111 actualRow = row + lo_row;
112 actualCol = col + diagL + actualRow;
113 if (nextTraceValue == Diagonal) _setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
114 TSize segLen = 1;
115
116 while (nextTraceValue != Stop) {
117 if (traceValue == nextTraceValue) {
118 ++segLen;
119 } else {
120 _alignTracePrint(align, str, id1, actualCol, id2, actualRow, segLen, traceValue);
121 segLen = 1;
122 }
123 traceValue = nextTraceValue;
124 if (traceValue == Diagonal) nextTraceValue = trace[(--row) * diagonalWidth + col];
125 else if (traceValue == Vertical) nextTraceValue = trace[(--row) * diagonalWidth + (++col)];
126 else if (traceValue == Horizontal) nextTraceValue = trace[row * diagonalWidth + (--col)];
127 actualRow = row + lo_row;
128 actualCol = col + diagL + actualRow;
129 if (nextTraceValue == Diagonal) _setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
130 }
131 if (segLen) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, segLen, traceValue);
132
133 // Handle the remaining sequence
134 if (actualCol != 0) _alignTracePrint(align, str, (TId) id1, (TSize) 0, (TId) 0, (TSize) 0, (TSize) actualCol, Horizontal);
135 if (actualRow != 0) _alignTracePrint(align, str, (TId) 0, (TSize) 0, (TId) id2, (TSize) 0, (TSize) actualRow, Vertical);
136
137 _finishAlign(align, actualCol, endCol, actualRow, endRow);
138 }
139
140 ////////////////////////////////////////////////////////////////////////////
141
142 template <typename TTrace, typename TStringSet, typename TScore, typename TIndexPair, typename TDiagonal, typename TForbidden>
143 inline typename Value<TScore>::Type
_alignBandedSmithWaterman(TTrace & trace,TStringSet const & str,TScore const & sc,typename Value<TTrace>::Type & initialDir,TIndexPair & indexPair,TDiagonal diagL,TDiagonal diagU,TForbidden forbidden)144 _alignBandedSmithWaterman(TTrace& trace,
145 TStringSet const& str,
146 TScore const& sc,
147 typename Value<TTrace>::Type& initialDir,
148 TIndexPair& indexPair,
149 TDiagonal diagL,
150 TDiagonal diagU,
151 TForbidden forbidden) {
152 SEQAN_CHECKPOINT
153 typedef typename Value<TTrace>::Type TTraceValue;
154 typedef typename Value<TScore>::Type TScoreValue;
155 typedef typename Value<TStringSet>::Type TString;
156 typedef typename Size<TTrace>::Type TSize;
157
158 // TraceBack values for Smith Waterman
159 TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3;
160
161 // Initialization
162 TString const& str1 = str[0];
163 TString const& str2 = str[1];
164 TSize len1 = length(str1) + 1;
165 TSize len2 = length(str2) + 1;
166 TSize diagonalWidth = (TSize) (diagU - diagL + 1);
167 TSize hi_diag = diagonalWidth;
168 TSize lo_diag = 0;
169 if (diagL > 0) lo_diag = 0;
170 else lo_diag = (diagU < 0) ? hi_diag : (TSize) (1 - diagL);
171 TSize lo_row = (diagU <= 0) ? -diagU : 0;
172 TSize hi_row = len2;
173 if (len1 - diagL < hi_row) hi_row = len1 - diagL;
174 TSize height = hi_row - lo_row;
175
176 typedef String<TScoreValue> TColumn;
177 TColumn mat;
178 resize(mat, diagonalWidth, 0);
179 resize(trace, height * diagonalWidth, Stop);
180
181 // Record the max score
182 TScoreValue score_max = 0;
183 indexPair[0] = 0; indexPair[1] = 0;
184 initialDir = Stop;
185
186 // Classical DP
187 //TScoreValue max_val = 0;
188 TSize actualCol, actualRow;
189 TScoreValue verti_val, hori_val;
190
191 // Initialize first row
192 typedef typename Iterator<TTrace, Standard>::Type TTraceIter;
193 typedef typename Iterator<TColumn, Standard>::Type TMatIter;
194 TTraceIter traceIt;
195 TMatIter matIt;
196
197 if (lo_diag > 0) --lo_diag;
198 if (lo_row >= len1 - diagU) --hi_diag;
199
200 for (TSize row = 1; row < height; ++row) {
201 actualRow = row + lo_row;
202 if (lo_diag > 0) --lo_diag;
203 if (actualRow >= len1 - diagU) --hi_diag;
204 traceIt = begin(trace, Standard()) + row * diagonalWidth + lo_diag;
205 matIt = begin(mat, Standard()) + lo_diag;
206 hori_val = 0;
207
208 for (TSize col = lo_diag; col < hi_diag; ++col, ++matIt, ++traceIt) {
209 actualCol = col + diagL + actualRow;
210 if (actualCol != 0) {
211 if (_isClumping(forbidden, col+1, row+1, diagonalWidth)) {
212 *matIt = 0;
213 } else {
214 // Get the new maximum for mat
215 *matIt += score(const_cast<TScore&>(sc), ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
216 *traceIt = Diagonal;
217 }
218
219 // Get the new maximum for vertical
220 if (col < diagonalWidth - 1) {
221 verti_val = *(matIt+1) + scoreGapExtendVertical(sc, ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
222 } else {
223 verti_val = -2;
224 }
225 if (verti_val > *matIt) {
226 *matIt = verti_val;
227 *traceIt = Vertical;
228 }
229
230 // Get the new maximum for horizontal
231 if (col > 0) {
232 hori_val = hori_val + scoreGapExtendHorizontal(sc, ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
233 } else {
234 hori_val = -2;
235 }
236 if (hori_val > *matIt) {
237 *matIt = hori_val;
238 *traceIt = Horizontal;
239 }
240
241 // Check if new maximum is greater than 0
242 if (0 > *matIt) {
243 *matIt = 0;
244 *traceIt = Stop;
245
246 }
247
248 // Record the new best score
249 if (*matIt > score_max) {
250 indexPair[0] = row; indexPair[1] = col;
251 score_max = *matIt;
252 initialDir = *traceIt;
253 }
254 } else {
255 // Init first col
256 *matIt = 0;
257 *traceIt = Stop;
258 }
259 hori_val = *matIt;
260 }
261 }
262 // // Debug code
263 // std::cerr << std::endl;
264 //for(TSize i= 0; i<height;++i) {
265 // for(TSize j= 0; j<diagonalWidth;++j) {
266 // std::cerr << (TSize) getValue(trace, i*diagonalWidth + j) << ',';
267 // }
268 // std::cerr << std::endl;
269 //}
270 //std::cerr << "Max score: " << indexPair[0] << ',' << indexPair[1] << ':' << score_max << " (" << (TSize) initialDir << ")" << std::endl;
271 return score_max;
272 }
273
274 ////////////////////////////////////////////////////////////////////////////
275
276 template<typename TAlign, typename TStringSet, typename TForbidden, typename TScore, typename TDiagonal>
277 inline typename Value<TScore>::Type
_localAlignment(TStringSet & str,TAlign & align,TForbidden & forbidden,TScore const & sc,TDiagonal diag1,TDiagonal diag2,BandedSmithWatermanClump)278 _localAlignment(TStringSet& str,
279 TAlign& align,
280 TForbidden& forbidden,
281 TScore const& sc,
282 TDiagonal diag1,
283 TDiagonal diag2,
284 BandedSmithWatermanClump) {
285 SEQAN_CHECKPOINT
286 typedef typename Value<TScore>::Type TScoreValue;
287 typedef typename Size<TStringSet>::Type TSize;
288 typedef unsigned char TDir;
289
290 // Maximum value
291 TScoreValue maxScore;
292 TSize indexPair[2];
293
294 // Create the trace
295 TDir initialDir;
296 String<TDir> trace;
297 maxScore = _alignBandedSmithWaterman(trace, str, sc, initialDir, indexPair, diag1, diag2, forbidden);
298
299 // Follow the trace and create the alignment
300 _alignBandedSmithWatermanTrace(str, align, trace, initialDir, indexPair, diag1, diag2, forbidden);
301
302 return maxScore;
303 }
304
305 template<typename TString, typename TAlignments, typename TScores, typename TScoreValue, typename TSpec2, typename TDiagonal>
306 inline void
_localAlignment(StringSet<TString,Dependent<>> const & str,TAlignments & alignments,TScores & scores,Score<TScoreValue,TSpec2> const & sc,TScoreValue minScore,TDiagonal diag1,TDiagonal diag2,BandedSmithWatermanClump)307 _localAlignment(StringSet<TString, Dependent<> > const& str,
308 TAlignments& alignments,
309 TScores& scores,
310 Score<TScoreValue, TSpec2> const& sc,
311 TScoreValue minScore,
312 TDiagonal diag1,
313 TDiagonal diag2,
314 BandedSmithWatermanClump) {
315 SEQAN_CHECKPOINT
316 typedef typename Value<TAlignments>::Type TAlign;
317 typedef typename Size<TString>::Type TSize;
318
319 // For clumpping remember the used positions
320 TSize len1 = length(str[0]);
321 TSize len2 = length(str[1]);
322 TSize diagonalWidth = (TSize) (diag2 - diag1 + 1);
323
324 TSize lo_row = (diag2 < 0) ? -diag2 : 0;
325 TSize hi_row = (len1 - diag1 < len2) ? len1 - diag1 : len2;
326 TSize height = hi_row - lo_row;
327
328 String<bool> forbidden;
329 resize(forbidden, (height+1) * diagonalWidth, false);//(len1+1) * (len2+1), false);
330
331 // Stop looking for local alignments, if their score is too low
332 while (true) {
333 // Create the local alignment
334 TAlign align;
335 TScoreValue local_score = _localAlignment(str, align, forbidden, sc, diag1, diag2, BandedSmithWatermanClump());
336
337 //// Debug code
338 //for (int i = 0; i < height; ++i) {
339 // for (int j = 0; j < diagonalWidth; ++j) {
340 // std::cerr << forbidden[i * diagonalWidth + j] << ", ";
341 // }
342 // std::cerr << std::endl;
343 //}
344
345 if (local_score >= minScore) {
346 appendValue(alignments, align);
347 appendValue(scores, local_score);
348 } else break;
349 }
350 }
351
352
353 }// namespace SEQAN_NAMESPACE_MAIN
354
355 #endif //#ifndef SEQAN_HEADER_...
356
357