1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin
5 // Copyright (c) 2013 NVIDIA Corporation
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are met:
10 //
11 //     * Redistributions of source code must retain the above copyright
12 //       notice, this list of conditions and the following disclaimer.
13 //     * Redistributions in binary form must reproduce the above copyright
14 //       notice, this list of conditions and the following disclaimer in the
15 //       documentation and/or other materials provided with the distribution.
16 //     * Neither the name of NVIDIA Corporation nor the names of
17 //       its contributors may be used to endorse or promote products derived
18 //       from this software without specific prior written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 // ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
24 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
30 // DAMAGE.
31 //
32 // ==========================================================================
33 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de>
34 // ==========================================================================
35 // Approximate string matching via backtracking on a substring index.
36 // ==========================================================================
37 
38 #ifndef SEQAN_INDEX_FIND_INDEX_H_
39 #define SEQAN_INDEX_FIND_INDEX_H_
40 
41 namespace seqan {
42 
43 // ============================================================================
44 // Tags
45 // ============================================================================
46 
47 // ----------------------------------------------------------------------------
48 // Tags for backtracking specializations
49 // ----------------------------------------------------------------------------
50 
51 struct BacktrackingSemiGlobal_;
52 typedef Tag<BacktrackingSemiGlobal_>    BacktrackingSemiGlobal;
53 
54 struct BacktrackingGlobal_;
55 typedef Tag<BacktrackingGlobal_>        BacktrackingGlobal;
56 
57 // ----------------------------------------------------------------------------
58 // Tag Backtracking
59 // ----------------------------------------------------------------------------
60 
61 template <typename TDistance = HammingDistance, typename TSpec = BacktrackingSemiGlobal>
62 struct Backtracking {};
63 
64 // ============================================================================
65 // Metafunctions
66 // ============================================================================
67 
68 // ----------------------------------------------------------------------------
69 // Metafunction TextIterator_
70 // ----------------------------------------------------------------------------
71 
72 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec>
73 struct TextIterator_<Index<TText, TIndexSpec>, TPattern, TSpec>
74 {
75     typedef typename Iterator<Index<TText, TIndexSpec>, TopDown<> >::Type  Type;
76 };
77 
78 // ----------------------------------------------------------------------------
79 // Metafunction TextIterator_
80 // ----------------------------------------------------------------------------
81 
82 template <typename TText, typename TIndexSpec, typename TPattern, typename TDistance, typename TSpec>
83 struct TextIterator_<Index<TText, TIndexSpec>, TPattern, Backtracking<TDistance, TSpec> >
84 {
85     typedef typename Iterator<Index<TText, TIndexSpec>, TopDown<ParentLinks<> > >::Type  Type;
86 };
87 
88 // ============================================================================
89 // Classes
90 // ============================================================================
91 
92 // ----------------------------------------------------------------------------
93 // Class Finder
94 // ----------------------------------------------------------------------------
95 
96 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec>
97 struct Finder_<Index<TText, TIndexSpec>, TPattern, TSpec>
98 {
99     typedef Index<TText, TIndexSpec>                                    TIndex;
100     typedef typename TextIterator_<TIndex, TPattern, TSpec>::Type       TTextIterator;
101     typedef typename PatternIterator_<TIndex, TPattern, TSpec>::Type    TPatternIterator;
102     typedef typename Score_<TSpec>::Type                                TScore;
103 
104     TTextIterator       _textIt;
105     TPatternIterator    _patternIt;
106     TScore              _scoreThreshold;
107     TScore              _score;
108 
109     SEQAN_HOST_DEVICE
110     Finder_() :
111         _scoreThreshold(),
112         _score()
113     {}
114 
115     SEQAN_HOST_DEVICE
116     Finder_(TIndex /* const */ & index) :
117         _textIt(index),
118         _scoreThreshold(),
119         _score()
120     {}
121 
122     SEQAN_HOST_DEVICE
123     Finder_(TTextIterator const & textIt) :
124         _textIt(textIt),
125         _scoreThreshold(),
126         _score()
127     {}
128 };
129 
130 // ============================================================================
131 // Functions
132 // ============================================================================
133 
134 // ----------------------------------------------------------------------------
135 // Function clear()
136 // ----------------------------------------------------------------------------
137 
138 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec>
139 SEQAN_HOST_DEVICE inline void
140 clear(Finder_<Index<TText, TIndexSpec>, TPattern, TSpec> & finder)
141 {
142     // NOTE(esiragusa): should clear() be called on text/patternIt?
143     goRoot(_textIterator(finder));
144     // NOTE(esiragusa): if find wasn't called yet, _patternIterator is uninitialized.
145 //    goBegin(_patternIterator(finder));
146     finder._score = typename Score_<TSpec>::Type();
147 }
148 
149 // ----------------------------------------------------------------------------
150 // Function find_()
151 // ----------------------------------------------------------------------------
152 
153 template <typename TText, typename TIndexSpec, typename TPattern, typename TDelegate>
154 SEQAN_HOST_DEVICE inline void
155 _find(Finder_<Index<TText, TIndexSpec>, TPattern, FinderSTree> & finder,
156       TPattern const & pattern,
157       TDelegate & delegate)
158 {
159     if (goDown(_textIterator(finder), pattern))
160     {
161         // TODO(esiragusa): update _patternIterator.
162         delegate(finder);
163     }
164 }
165 
166 // ----------------------------------------------------------------------------
167 // Function _getVertexScore()
168 // ----------------------------------------------------------------------------
169 
170 template <typename TText, typename TPattern, typename TSpec>
171 SEQAN_HOST_DEVICE inline typename Score_<Backtracking<HammingDistance, TSpec> >::Type
172 _getVertexScore(Finder_<TText, TPattern, Backtracking<HammingDistance, TSpec> > const & finder)
173 {
174     return !ordEqual(parentEdgeLabel(_textIterator(finder)), value(_patternIterator(finder)));
175 }
176 
177 // ----------------------------------------------------------------------------
178 // Function _printState()
179 // ----------------------------------------------------------------------------
180 
181 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec>
182 SEQAN_HOST_DEVICE inline void
183 _printState(Finder_<Index<TText, TIndexSpec>, TPattern, Backtracking<HammingDistance, TSpec> > & finder)
184 {
185     std::cout << "Text:        " << parentEdgeLabel(_textIterator(finder)) << std::endl;
186     std::cout << "Pattern:     " << value(_patternIterator(finder)) << std::endl;
187     std::cout << "Text Len:    " << repLength(_textIterator(finder)) << std::endl;
188     std::cout << "Pattern Len: " << position(_patternIterator(finder)) + 1 << std::endl;
189     std::cout << "Errors:      " << static_cast<unsigned>(_getScore(finder)) << std::endl;
190     std::cout << "Max errors:  " << static_cast<unsigned>(finder._scoreThreshold) << std::endl;
191 }
192 
193 // ----------------------------------------------------------------------------
194 // Function _find()
195 // ----------------------------------------------------------------------------
196 
197 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec, typename TDelegate>
198 SEQAN_HOST_DEVICE inline void
199 _find(Finder_<Index<TText, TIndexSpec>, TPattern, Backtracking<HammingDistance, TSpec> > & finder,
200       TPattern const & pattern,
201       TDelegate & delegate)
202 {
203     typedef Index<TText, TIndexSpec>                                        TIndex;
204     typedef Backtracking<HammingDistance, TSpec>                            TFinderSpec;
205     typedef typename TextIterator_<TIndex, TPattern, TFinderSpec>::Type     TTextIterator;
206     typedef typename PatternIterator_<TIndex, TPattern, TFinderSpec>::Type  TPatternIterator;
207 
208     _patternIterator(finder) = begin(pattern);
209 
210     TTextIterator & textIt = _textIterator(finder);
211     TPatternIterator & patternIt = _patternIterator(finder);
212 
213     do
214     {
215         // Exact case.
216         if (finder._score == finder._scoreThreshold)
217         {
218             if (goDown(textIt, suffix(pattern, position(patternIt))))
219             {
220                 delegate(finder);
221             }
222 
223             goUp(textIt);
224         }
225 
226         // Approximate case.
227         else if (finder._score < finder._scoreThreshold)
228         {
229             // Base case.
230             if (atEnd(patternIt))
231             {
232                 delegate(finder);
233             }
234 
235             // Recursive case.
236             else if (goDown(textIt))
237             {
238                 finder._score += _getVertexScore(finder);
239                 goNext(patternIt);
240                 continue;
241             }
242         }
243 
244         // Backtrack.
245         do
246         {
247             // Termination.
248             if (isRoot(textIt)) break;
249 
250             goPrevious(patternIt);
251             finder._score -= _getVertexScore(finder);
252         }
253         while (!goRight(textIt) && goUp(textIt));
254 
255         // Termination.
256         if (isRoot(textIt)) break;
257 
258         finder._score += _getVertexScore(finder);
259         goNext(patternIt);
260     }
261     while (true);
262 }
263 
264 }
265 
266 #endif  // #ifndef SEQAN_INDEX_FIND_INDEX_H_
267