1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 #ifndef SEQAN_HEADER_GRAPH_ALIGN_BANDED_SMITH_WATERMAN_CLUMP_H
34 #define SEQAN_HEADER_GRAPH_ALIGN_BANDED_SMITH_WATERMAN_CLUMP_H
35 
36 namespace SEQAN_NAMESPACE_MAIN
37 {
38 
39 //////////////////////////////////////////////////////////////////////////////
40 
41 template<typename TString>
42 inline void
_initAlign(Graph<Alignment<StringSet<TString,Dependent<>>>> & align,StringSet<TString,Dependent<>> const & str)43 _initAlign(Graph<Alignment<StringSet<TString, Dependent<> > > >& align,
44            StringSet<TString, Dependent<> > const& str) {
45 SEQAN_CHECKPOINT
46 	assignStringSet(align, str);
47 }
48 
49 template<typename TString, typename TPos>
50 inline void
_finishAlign(Graph<Alignment<StringSet<TString,Dependent<>>>> &,TPos,TPos,TPos,TPos)51 _finishAlign(Graph<Alignment<StringSet<TString, Dependent<> > > >&,
52              TPos,
53              TPos,
54              TPos,
55              TPos) {
56 SEQAN_CHECKPOINT
57     // Nothing to be done?
58     //stringSet(g)[0] = infix(stringSet(g)[0], begin1, end1);
59     //stringSet(g)[1] = infix(stringSet(g)[1], begin2, end2);
60 }
61 
62 //////////////////////////////////////////////////////////////////////////////
63 
64 template <typename TStringSet, typename TAlign, typename TTrace, typename TVal, typename TIndexPair, typename TDiagonal, typename TForbidden>
65 inline void
_alignBandedSmithWatermanTrace(TStringSet const & str,TAlign & align,TTrace const & trace,TVal const initialDir,TIndexPair const & indexPair,TDiagonal diagL,TDiagonal diagU,TForbidden & forbidden)66 _alignBandedSmithWatermanTrace(TStringSet const& str,
67                        TAlign& align,
68                        TTrace const& trace,
69                        TVal const initialDir,
70                        TIndexPair const& indexPair,
71                        TDiagonal diagL,
72                        TDiagonal diagU,
73                        TForbidden& forbidden) {
74 SEQAN_CHECKPOINT
75     typedef typename Value<TStringSet>::Type TString;
76     typedef typename Id<TStringSet>::Type TId;
77     typedef typename Size<TTrace>::Type TSize;
78     typedef typename Value<TTrace>::Type TTraceValue;
79 
80     // Traceback values
81 	TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3;
82 
83     // Initialization
84     _initAlign(align, str);
85     TString const& str1 = str[0];
86     TString const& str2 = str[1];
87     TId id1 = positionToId(const_cast<TStringSet&>(str), 0);
88     TId id2 = positionToId(const_cast<TStringSet&>(str), 1);
89     TSize len1 = length(str1);
90     TSize len2 = length(str2);
91     TSize lo_row = (diagU <= 0) ? -diagU : 0;
92     TSize diagonalWidth = (TSize) (diagU - diagL + 1);
93 
94     // Start the trace from the cell with the max value
95     TSize row = indexPair[0];
96     TSize col = indexPair[1];
97     TSize endRow = indexPair[0] + lo_row;
98     TSize endCol = indexPair[1] + diagL + endRow;
99 	TSize actualRow = row + lo_row;
100     TSize actualCol = col + diagL + actualRow;
101     if ((actualCol == 0) || (actualRow == 0)) return;
102 	if (actualCol < len1) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, len1 - actualCol, Horizontal);
103 	if (actualRow < len2) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, len2 - actualRow, Vertical);
104 	_setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
105 
106     TTraceValue traceValue = initialDir;
107     TTraceValue nextTraceValue = traceValue;
108     if (traceValue == Diagonal) nextTraceValue = trace[(--row) * diagonalWidth + col];
109     else if (traceValue == Vertical) nextTraceValue = trace[(--row) * diagonalWidth + (++col)];
110     else if (traceValue == Horizontal) nextTraceValue = trace[row * diagonalWidth + (--col)];
111     actualRow = row + lo_row;
112     actualCol = col + diagL + actualRow;
113 	if (nextTraceValue == Diagonal) _setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
114     TSize segLen = 1;
115 
116     while (nextTraceValue != Stop) {
117         if (traceValue == nextTraceValue) {
118             ++segLen;
119         } else {
120             _alignTracePrint(align, str, id1, actualCol, id2, actualRow, segLen, traceValue);
121             segLen = 1;
122         }
123         traceValue = nextTraceValue;
124         if (traceValue == Diagonal) nextTraceValue = trace[(--row) * diagonalWidth + col];
125         else if (traceValue == Vertical) nextTraceValue = trace[(--row) * diagonalWidth + (++col)];
126         else if (traceValue == Horizontal) nextTraceValue = trace[row * diagonalWidth + (--col)];
127         actualRow = row + lo_row;
128         actualCol = col + diagL + actualRow;
129 		if (nextTraceValue == Diagonal) _setForbiddenCell(forbidden, row+1, col+1, diagonalWidth);
130     }
131     if (segLen) _alignTracePrint(align, str, id1, actualCol, id2, actualRow, segLen, traceValue);
132 
133 	// Handle the remaining sequence
134 	if (actualCol != 0) _alignTracePrint(align, str, (TId) id1, (TSize) 0, (TId) 0, (TSize) 0, (TSize) actualCol, Horizontal);
135 	if (actualRow != 0) _alignTracePrint(align, str, (TId) 0, (TSize) 0, (TId) id2, (TSize) 0, (TSize) actualRow, Vertical);
136 
137     _finishAlign(align, actualCol, endCol, actualRow, endRow);
138 }
139 
140 ////////////////////////////////////////////////////////////////////////////
141 
142 template <typename TTrace, typename TStringSet, typename TScore, typename TIndexPair, typename TDiagonal, typename TForbidden>
143 inline typename Value<TScore>::Type
_alignBandedSmithWaterman(TTrace & trace,TStringSet const & str,TScore const & sc,typename Value<TTrace>::Type & initialDir,TIndexPair & indexPair,TDiagonal diagL,TDiagonal diagU,TForbidden forbidden)144 _alignBandedSmithWaterman(TTrace& trace,
145                  TStringSet const& str,
146                  TScore const& sc,
147                  typename Value<TTrace>::Type& initialDir,
148                  TIndexPair& indexPair,
149                  TDiagonal diagL,
150                  TDiagonal diagU,
151                  TForbidden forbidden) {
152 SEQAN_CHECKPOINT
153     typedef typename Value<TTrace>::Type TTraceValue;
154     typedef typename Value<TScore>::Type TScoreValue;
155     typedef typename Value<TStringSet>::Type TString;
156     typedef typename Size<TTrace>::Type TSize;
157 
158     // TraceBack values for Smith Waterman
159     TTraceValue Diagonal = 0; TTraceValue Horizontal = 1; TTraceValue Vertical = 2; TTraceValue Stop = 3;
160 
161     // Initialization
162     TString const& str1 = str[0];
163     TString const& str2 = str[1];
164     TSize len1 = length(str1) + 1;
165     TSize len2 = length(str2) + 1;
166     TSize diagonalWidth = (TSize) (diagU - diagL + 1);
167     TSize hi_diag = diagonalWidth;
168     TSize lo_diag = 0;
169     if (diagL > 0) lo_diag = 0;
170     else lo_diag = (diagU < 0) ? hi_diag : (TSize) (1 - diagL);
171     TSize lo_row = (diagU <= 0) ? -diagU : 0;
172     TSize hi_row = len2;
173     if (len1 - diagL < hi_row) hi_row = len1 - diagL;
174     TSize height = hi_row - lo_row;
175 
176     typedef String<TScoreValue> TColumn;
177     TColumn mat;
178     resize(mat, diagonalWidth, 0);
179     resize(trace, height * diagonalWidth, Stop);
180 
181     // Record the max score
182     TScoreValue score_max = 0;
183     indexPair[0] = 0; indexPair[1] = 0;
184     initialDir = Stop;
185 
186     // Classical DP
187     //TScoreValue max_val = 0;
188     TSize actualCol, actualRow;
189     TScoreValue verti_val, hori_val;
190 
191     // Initialize first row
192     typedef typename Iterator<TTrace, Standard>::Type TTraceIter;
193     typedef typename Iterator<TColumn, Standard>::Type TMatIter;
194     TTraceIter traceIt;
195     TMatIter matIt;
196 
197     if (lo_diag > 0) --lo_diag;
198     if (lo_row >= len1 - diagU) --hi_diag;
199 
200     for (TSize row = 1; row < height; ++row) {
201         actualRow = row + lo_row;
202         if (lo_diag > 0) --lo_diag;
203         if (actualRow >= len1 - diagU) --hi_diag;
204         traceIt = begin(trace, Standard()) + row * diagonalWidth + lo_diag;
205         matIt = begin(mat, Standard()) + lo_diag;
206         hori_val = 0;
207 
208         for (TSize col = lo_diag; col < hi_diag; ++col, ++matIt, ++traceIt) {
209             actualCol = col + diagL + actualRow;
210             if (actualCol != 0) {
211                 if (_isClumping(forbidden, col+1, row+1, diagonalWidth)) {
212                     *matIt = 0;
213                 } else {
214                     // Get the new maximum for mat
215                     *matIt += score(const_cast<TScore&>(sc), ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
216                     *traceIt = Diagonal;
217                 }
218 
219                 // Get the new maximum for vertical
220                 if (col < diagonalWidth - 1) {
221                     verti_val = *(matIt+1) + scoreGapExtendVertical(sc, ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
222                 } else {
223                     verti_val = -2;
224                 }
225                 if (verti_val > *matIt) {
226                     *matIt = verti_val;
227                     *traceIt = Vertical;
228                 }
229 
230                 // Get the new maximum for horizontal
231                 if (col > 0) {
232                     hori_val = hori_val + scoreGapExtendHorizontal(sc, ((int) actualCol - 1), ((int) actualRow - 1), str1, str2);
233                 } else {
234                     hori_val = -2;
235                 }
236                 if (hori_val > *matIt) {
237                     *matIt = hori_val;
238                     *traceIt = Horizontal;
239                 }
240 
241                 // Check if new maximum is greater than 0
242                 if (0 > *matIt) {
243                     *matIt = 0;
244                     *traceIt = Stop;
245 
246                 }
247 
248                 // Record the new best score
249                 if (*matIt > score_max) {
250                     indexPair[0] = row; indexPair[1] = col;
251                     score_max = *matIt;
252                     initialDir = *traceIt;
253                 }
254             } else {
255                 // Init first col
256                 *matIt = 0;
257                 *traceIt = Stop;
258             }
259             hori_val = *matIt;
260         }
261     }
262  //   // Debug code
263  //   std::cerr << std::endl;
264 	//for(TSize i= 0; i<height;++i) {
265 	//	for(TSize j= 0; j<diagonalWidth;++j) {
266 	//		std::cerr << (TSize) getValue(trace, i*diagonalWidth + j) << ',';
267 	//	}
268 	//	std::cerr << std::endl;
269 	//}
270 	//std::cerr << "Max score: " << indexPair[0] << ',' << indexPair[1] << ':' << score_max << " (" << (TSize) initialDir << ")" << std::endl;
271     return score_max;
272 }
273 
274 ////////////////////////////////////////////////////////////////////////////
275 
276 template<typename TAlign, typename TStringSet, typename TForbidden, typename TScore, typename TDiagonal>
277 inline typename Value<TScore>::Type
_localAlignment(TStringSet & str,TAlign & align,TForbidden & forbidden,TScore const & sc,TDiagonal diag1,TDiagonal diag2,BandedSmithWatermanClump)278 _localAlignment(TStringSet& str,
279                 TAlign& align,
280                 TForbidden& forbidden,
281                 TScore const& sc,
282                 TDiagonal diag1,
283                 TDiagonal diag2,
284                 BandedSmithWatermanClump) {
285 SEQAN_CHECKPOINT
286     typedef typename Value<TScore>::Type TScoreValue;
287     typedef typename Size<TStringSet>::Type TSize;
288     typedef unsigned char TDir;
289 
290     // Maximum value
291     TScoreValue maxScore;
292     TSize indexPair[2];
293 
294     // Create the trace
295     TDir initialDir;
296     String<TDir> trace;
297     maxScore = _alignBandedSmithWaterman(trace, str, sc, initialDir, indexPair, diag1, diag2, forbidden);
298 
299     // Follow the trace and create the alignment
300     _alignBandedSmithWatermanTrace(str, align, trace, initialDir, indexPair, diag1, diag2, forbidden);
301 
302     return maxScore;
303 }
304 
305 template<typename TString, typename TAlignments, typename TScores, typename TScoreValue, typename TSpec2, typename TDiagonal>
306 inline void
_localAlignment(StringSet<TString,Dependent<>> const & str,TAlignments & alignments,TScores & scores,Score<TScoreValue,TSpec2> const & sc,TScoreValue minScore,TDiagonal diag1,TDiagonal diag2,BandedSmithWatermanClump)307 _localAlignment(StringSet<TString, Dependent<> > const& str,
308 				TAlignments& alignments,
309 				TScores& scores,
310 				Score<TScoreValue, TSpec2> const& sc,
311 				TScoreValue minScore,
312 				TDiagonal diag1,
313 				TDiagonal diag2,
314 				BandedSmithWatermanClump) {
315 SEQAN_CHECKPOINT
316 	typedef typename Value<TAlignments>::Type TAlign;
317 	typedef typename Size<TString>::Type TSize;
318 
319 	// For clumpping remember the used positions
320 	TSize len1 = length(str[0]);
321 	TSize len2 = length(str[1]);
322     TSize diagonalWidth = (TSize) (diag2 - diag1 + 1);
323 
324     TSize lo_row = (diag2 < 0) ? -diag2 : 0;
325     TSize hi_row = (len1 - diag1 < len2) ? len1 - diag1 : len2;
326     TSize height = hi_row - lo_row;
327 
328 	String<bool> forbidden;
329 	resize(forbidden, (height+1) * diagonalWidth, false);//(len1+1) * (len2+1), false);
330 
331 	// Stop looking for local alignments, if their score is too low
332 	while (true) {
333 		// Create the local alignment
334         TAlign align;
335 		TScoreValue local_score = _localAlignment(str, align, forbidden, sc, diag1, diag2, BandedSmithWatermanClump());
336 
337         //// Debug code
338         //for (int i = 0; i < height; ++i) {
339         //    for (int j = 0; j < diagonalWidth; ++j) {
340         //        std::cerr << forbidden[i * diagonalWidth + j] << ", ";
341         //    }
342         //    std::cerr << std::endl;
343         //}
344 
345 		if (local_score >= minScore) {
346             appendValue(alignments, align);
347             appendValue(scores, local_score);
348 		} else break;
349 	}
350 }
351 
352 
353 }// namespace SEQAN_NAMESPACE_MAIN
354 
355 #endif //#ifndef SEQAN_HEADER_...
356 
357