1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2015, Knut Reinert, FU Berlin 5 // Copyright (c) 2013 NVIDIA Corporation 6 // All rights reserved. 7 // 8 // Redistribution and use in source and binary forms, with or without 9 // modification, are permitted provided that the following conditions are met: 10 // 11 // * Redistributions of source code must retain the above copyright 12 // notice, this list of conditions and the following disclaimer. 13 // * Redistributions in binary form must reproduce the above copyright 14 // notice, this list of conditions and the following disclaimer in the 15 // documentation and/or other materials provided with the distribution. 16 // * Neither the name of NVIDIA Corporation nor the names of 17 // its contributors may be used to endorse or promote products derived 18 // from this software without specific prior written permission. 19 // 20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 // ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE 24 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 30 // DAMAGE. 31 // 32 // ========================================================================== 33 // Author: Enrico Siragusa <enrico.siragusa@fu-berlin.de> 34 // ========================================================================== 35 // Approximate string matching via backtracking on a substring index. 36 // ========================================================================== 37 38 #ifndef SEQAN_INDEX_FIND_INDEX_H_ 39 #define SEQAN_INDEX_FIND_INDEX_H_ 40 41 namespace seqan { 42 43 // ============================================================================ 44 // Tags 45 // ============================================================================ 46 47 // ---------------------------------------------------------------------------- 48 // Tags for backtracking specializations 49 // ---------------------------------------------------------------------------- 50 51 struct BacktrackingSemiGlobal_; 52 typedef Tag<BacktrackingSemiGlobal_> BacktrackingSemiGlobal; 53 54 struct BacktrackingGlobal_; 55 typedef Tag<BacktrackingGlobal_> BacktrackingGlobal; 56 57 // ---------------------------------------------------------------------------- 58 // Tag Backtracking 59 // ---------------------------------------------------------------------------- 60 61 template <typename TDistance = HammingDistance, typename TSpec = BacktrackingSemiGlobal> 62 struct Backtracking {}; 63 64 // ============================================================================ 65 // Metafunctions 66 // ============================================================================ 67 68 // ---------------------------------------------------------------------------- 69 // Metafunction TextIterator_ 70 // ---------------------------------------------------------------------------- 71 72 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec> 73 struct TextIterator_<Index<TText, TIndexSpec>, TPattern, TSpec> 74 { 75 typedef typename Iterator<Index<TText, TIndexSpec>, TopDown<> >::Type Type; 76 }; 77 78 // ---------------------------------------------------------------------------- 79 // Metafunction TextIterator_ 80 // ---------------------------------------------------------------------------- 81 82 template <typename TText, typename TIndexSpec, typename TPattern, typename TDistance, typename TSpec> 83 struct TextIterator_<Index<TText, TIndexSpec>, TPattern, Backtracking<TDistance, TSpec> > 84 { 85 typedef typename Iterator<Index<TText, TIndexSpec>, TopDown<ParentLinks<> > >::Type Type; 86 }; 87 88 // ============================================================================ 89 // Classes 90 // ============================================================================ 91 92 // ---------------------------------------------------------------------------- 93 // Class Finder 94 // ---------------------------------------------------------------------------- 95 96 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec> 97 struct Finder_<Index<TText, TIndexSpec>, TPattern, TSpec> 98 { 99 typedef Index<TText, TIndexSpec> TIndex; 100 typedef typename TextIterator_<TIndex, TPattern, TSpec>::Type TTextIterator; 101 typedef typename PatternIterator_<TIndex, TPattern, TSpec>::Type TPatternIterator; 102 typedef typename Score_<TSpec>::Type TScore; 103 104 TTextIterator _textIt; 105 TPatternIterator _patternIt; 106 TScore _scoreThreshold; 107 TScore _score; 108 109 SEQAN_HOST_DEVICE 110 Finder_() : 111 _scoreThreshold(), 112 _score() 113 {} 114 115 SEQAN_HOST_DEVICE 116 Finder_(TIndex /* const */ & index) : 117 _textIt(index), 118 _scoreThreshold(), 119 _score() 120 {} 121 122 SEQAN_HOST_DEVICE 123 Finder_(TTextIterator const & textIt) : 124 _textIt(textIt), 125 _scoreThreshold(), 126 _score() 127 {} 128 }; 129 130 // ============================================================================ 131 // Functions 132 // ============================================================================ 133 134 // ---------------------------------------------------------------------------- 135 // Function clear() 136 // ---------------------------------------------------------------------------- 137 138 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec> 139 SEQAN_HOST_DEVICE inline void 140 clear(Finder_<Index<TText, TIndexSpec>, TPattern, TSpec> & finder) 141 { 142 // NOTE(esiragusa): should clear() be called on text/patternIt? 143 goRoot(_textIterator(finder)); 144 // NOTE(esiragusa): if find wasn't called yet, _patternIterator is uninitialized. 145 // goBegin(_patternIterator(finder)); 146 finder._score = typename Score_<TSpec>::Type(); 147 } 148 149 // ---------------------------------------------------------------------------- 150 // Function find_() 151 // ---------------------------------------------------------------------------- 152 153 template <typename TText, typename TIndexSpec, typename TPattern, typename TDelegate> 154 SEQAN_HOST_DEVICE inline void 155 _find(Finder_<Index<TText, TIndexSpec>, TPattern, FinderSTree> & finder, 156 TPattern const & pattern, 157 TDelegate & delegate) 158 { 159 if (goDown(_textIterator(finder), pattern)) 160 { 161 // TODO(esiragusa): update _patternIterator. 162 delegate(finder); 163 } 164 } 165 166 // ---------------------------------------------------------------------------- 167 // Function _getVertexScore() 168 // ---------------------------------------------------------------------------- 169 170 template <typename TText, typename TPattern, typename TSpec> 171 SEQAN_HOST_DEVICE inline typename Score_<Backtracking<HammingDistance, TSpec> >::Type 172 _getVertexScore(Finder_<TText, TPattern, Backtracking<HammingDistance, TSpec> > const & finder) 173 { 174 return !ordEqual(parentEdgeLabel(_textIterator(finder)), value(_patternIterator(finder))); 175 } 176 177 // ---------------------------------------------------------------------------- 178 // Function _printState() 179 // ---------------------------------------------------------------------------- 180 181 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec> 182 SEQAN_HOST_DEVICE inline void 183 _printState(Finder_<Index<TText, TIndexSpec>, TPattern, Backtracking<HammingDistance, TSpec> > & finder) 184 { 185 std::cout << "Text: " << parentEdgeLabel(_textIterator(finder)) << std::endl; 186 std::cout << "Pattern: " << value(_patternIterator(finder)) << std::endl; 187 std::cout << "Text Len: " << repLength(_textIterator(finder)) << std::endl; 188 std::cout << "Pattern Len: " << position(_patternIterator(finder)) + 1 << std::endl; 189 std::cout << "Errors: " << static_cast<unsigned>(_getScore(finder)) << std::endl; 190 std::cout << "Max errors: " << static_cast<unsigned>(finder._scoreThreshold) << std::endl; 191 } 192 193 // ---------------------------------------------------------------------------- 194 // Function _find() 195 // ---------------------------------------------------------------------------- 196 197 template <typename TText, typename TIndexSpec, typename TPattern, typename TSpec, typename TDelegate> 198 SEQAN_HOST_DEVICE inline void 199 _find(Finder_<Index<TText, TIndexSpec>, TPattern, Backtracking<HammingDistance, TSpec> > & finder, 200 TPattern const & pattern, 201 TDelegate & delegate) 202 { 203 typedef Index<TText, TIndexSpec> TIndex; 204 typedef Backtracking<HammingDistance, TSpec> TFinderSpec; 205 typedef typename TextIterator_<TIndex, TPattern, TFinderSpec>::Type TTextIterator; 206 typedef typename PatternIterator_<TIndex, TPattern, TFinderSpec>::Type TPatternIterator; 207 208 _patternIterator(finder) = begin(pattern); 209 210 TTextIterator & textIt = _textIterator(finder); 211 TPatternIterator & patternIt = _patternIterator(finder); 212 213 do 214 { 215 // Exact case. 216 if (finder._score == finder._scoreThreshold) 217 { 218 if (goDown(textIt, suffix(pattern, position(patternIt)))) 219 { 220 delegate(finder); 221 } 222 223 goUp(textIt); 224 } 225 226 // Approximate case. 227 else if (finder._score < finder._scoreThreshold) 228 { 229 // Base case. 230 if (atEnd(patternIt)) 231 { 232 delegate(finder); 233 } 234 235 // Recursive case. 236 else if (goDown(textIt)) 237 { 238 finder._score += _getVertexScore(finder); 239 goNext(patternIt); 240 continue; 241 } 242 } 243 244 // Backtrack. 245 do 246 { 247 // Termination. 248 if (isRoot(textIt)) break; 249 250 goPrevious(patternIt); 251 finder._score -= _getVertexScore(finder); 252 } 253 while (!goRight(textIt) && goUp(textIt)); 254 255 // Termination. 256 if (isRoot(textIt)) break; 257 258 finder._score += _getVertexScore(finder); 259 goNext(patternIt); 260 } 261 while (true); 262 } 263 264 } 265 266 #endif // #ifndef SEQAN_INDEX_FIND_INDEX_H_ 267