1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 11 12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 13 # pragma GCC system_header 14 #endif 15 16 #include <__algorithm/fill_n.h> 17 #include <__config> 18 #include <__functional/hash.h> 19 #include <__functional/operations.h> 20 #include <__iterator/distance.h> 21 #include <__iterator/iterator_traits.h> 22 #include <__memory/shared_ptr.h> 23 #include <__type_traits/make_unsigned.h> 24 #include <__utility/pair.h> 25 #include <array> 26 #include <unordered_map> 27 #include <vector> 28 29 #if _LIBCPP_STD_VER >= 17 30 31 _LIBCPP_PUSH_MACROS 32 #include <__undef_macros> 33 34 _LIBCPP_BEGIN_NAMESPACE_STD 35 36 template <class _Key, 37 class _Value, 38 class _Hash, 39 class _BinaryPredicate, 40 bool /*useArray*/> 41 class _BMSkipTable; 42 43 // General case for BM data searching; use a map 44 template <class _Key, 45 class _Value, 46 class _Hash, 47 class _BinaryPredicate> 48 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> { 49 private: 50 using value_type = _Value; 51 using key_type = _Key; 52 53 const value_type __default_value_; 54 unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_; 55 56 public: 57 _LIBCPP_HIDE_FROM_ABI 58 explicit _BMSkipTable(size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred) 59 : __default_value_(__default_value), 60 __table_(__sz, __hash, __pred) {} 61 62 _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { 63 __table_[__key] = __val; 64 } 65 66 _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const { 67 auto __it = __table_.find(__key); 68 return __it == __table_.end() ? __default_value_ : __it->second; 69 } 70 }; 71 72 // Special case small numeric values; use an array 73 template <class _Key, 74 class _Value, 75 class _Hash, 76 class _BinaryPredicate> 77 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> { 78 private: 79 using value_type = _Value; 80 using key_type = _Key; 81 82 using unsigned_key_type = make_unsigned_t<key_type>; 83 std::array<value_type, 256> __table_; 84 static_assert(numeric_limits<unsigned_key_type>::max() < 256); 85 86 public: 87 _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) { 88 std::fill_n(__table_.data(), __table_.size(), __default_value); 89 } 90 91 _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) { 92 __table_[static_cast<unsigned_key_type>(__key)] = __val; 93 } 94 95 _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const { 96 return __table_[static_cast<unsigned_key_type>(__key)]; 97 } 98 }; 99 100 template <class _RandomAccessIterator1, 101 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 102 class _BinaryPredicate = equal_to<>> 103 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { 104 private: 105 using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; 106 using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; 107 using __skip_table_type = _BMSkipTable<value_type, 108 difference_type, 109 _Hash, 110 _BinaryPredicate, 111 is_integral_v<value_type> 112 && sizeof(value_type) == 1 113 && is_same_v<_Hash, hash<value_type>> 114 && is_same_v<_BinaryPredicate, equal_to<>>>; 115 116 public: 117 _LIBCPP_HIDE_FROM_ABI 118 boyer_moore_searcher(_RandomAccessIterator1 __first, 119 _RandomAccessIterator1 __last, 120 _Hash __hash = _Hash(), 121 _BinaryPredicate __pred = _BinaryPredicate()) 122 : __first_(__first), 123 __last_(__last), 124 __pred_(__pred), 125 __pattern_length_(__last - __first), 126 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)), 127 __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>( 128 allocator<difference_type>(), __pattern_length_ + 1)) { 129 difference_type __i = 0; 130 while (__first != __last) { 131 __skip_table_->insert(*__first, __i); 132 ++__first; 133 ++__i; 134 } 135 __build_suffix_table(__first_, __last_, __pred_); 136 } 137 138 template <class _RandomAccessIterator2> 139 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 140 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 141 static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type, 142 typename iterator_traits<_RandomAccessIterator2>::value_type>::value, 143 "Corpus and Pattern iterators must point to the same type"); 144 if (__first == __last) 145 return std::make_pair(__last, __last); 146 if (__first_ == __last_) 147 return std::make_pair(__first, __first); 148 149 if (__pattern_length_ > (__last - __first)) 150 return std::make_pair(__last, __last); 151 return __search(__first, __last); 152 } 153 154 private: 155 _RandomAccessIterator1 __first_; 156 _RandomAccessIterator1 __last_; 157 _BinaryPredicate __pred_; 158 difference_type __pattern_length_; 159 shared_ptr<__skip_table_type> __skip_table_; 160 shared_ptr<difference_type[]> __suffix_; 161 162 template <class _RandomAccessIterator2> 163 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 164 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 165 _RandomAccessIterator2 __current = __f; 166 const _RandomAccessIterator2 __last = __l - __pattern_length_; 167 const __skip_table_type& __skip_table = *__skip_table_; 168 169 while (__current <= __last) { 170 difference_type __j = __pattern_length_; 171 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 172 --__j; 173 if (__j == 0) 174 return std::make_pair(__current, __current + __pattern_length_); 175 } 176 177 difference_type __k = __skip_table[__current[__j - 1]]; 178 difference_type __m = __j - __k - 1; 179 if (__k < __j && __m > __suffix_[__j]) 180 __current += __m; 181 else 182 __current += __suffix_[__j]; 183 } 184 return std::make_pair(__l, __l); 185 } 186 187 template <class _Iterator, class _Container> 188 _LIBCPP_HIDE_FROM_ABI void 189 __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) { 190 const size_t __count = __last - __first; 191 192 __prefix[0] = 0; 193 size_t __k = 0; 194 195 for (size_t __i = 1; __i != __count; ++__i) { 196 while (__k > 0 && !__pred(__first[__k], __first[__i])) 197 __k = __prefix[__k - 1]; 198 199 if (__pred(__first[__k], __first[__i])) 200 ++__k; 201 __prefix[__i] = __k; 202 } 203 } 204 205 _LIBCPP_HIDE_FROM_ABI void 206 __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) { 207 const size_t __count = __last - __first; 208 209 if (__count == 0) 210 return; 211 212 vector<difference_type> __scratch(__count); 213 214 __compute_bm_prefix(__first, __last, __pred, __scratch); 215 for (size_t __i = 0; __i <= __count; ++__i) 216 __suffix_[__i] = __count - __scratch[__count - 1]; 217 218 using _ReverseIter = reverse_iterator<_RandomAccessIterator1>; 219 __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch); 220 221 for (size_t __i = 0; __i != __count; ++__i) { 222 const size_t __j = __count - __scratch[__i]; 223 const difference_type __k = __i - __scratch[__i] + 1; 224 225 if (__suffix_[__j] > __k) 226 __suffix_[__j] = __k; 227 } 228 } 229 }; 230 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher); 231 232 template <class _RandomAccessIterator1, 233 class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>, 234 class _BinaryPredicate = equal_to<>> 235 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher { 236 private: 237 using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type; 238 using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; 239 using __skip_table_type = _BMSkipTable<value_type, 240 difference_type, 241 _Hash, 242 _BinaryPredicate, 243 is_integral_v<value_type> 244 && sizeof(value_type) == 1 245 && is_same_v<_Hash, hash<value_type>> 246 && is_same_v<_BinaryPredicate, equal_to<>>>; 247 public: 248 _LIBCPP_HIDE_FROM_ABI 249 boyer_moore_horspool_searcher(_RandomAccessIterator1 __first, 250 _RandomAccessIterator1 __last, 251 _Hash __hash = _Hash(), 252 _BinaryPredicate __pred = _BinaryPredicate()) 253 : __first_(__first), 254 __last_(__last), 255 __pred_(__pred), 256 __pattern_length_(__last - __first), 257 __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) { 258 if (__first == __last) 259 return; 260 --__last; 261 difference_type __i = 0; 262 while (__first != __last) { 263 __skip_table_->insert(*__first, __pattern_length_ - 1 - __i); 264 ++__first; 265 ++__i; 266 } 267 } 268 269 template <class _RandomAccessIterator2> 270 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 271 operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const { 272 static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type, 273 typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value, 274 "Corpus and Pattern iterators must point to the same type"); 275 if (__first == __last) 276 return std::make_pair(__last, __last); 277 if (__first_ == __last_) 278 return std::make_pair(__first, __first); 279 280 if (__pattern_length_ > __last - __first) 281 return std::make_pair(__last, __last); 282 283 return __search(__first, __last); 284 } 285 286 private: 287 _RandomAccessIterator1 __first_; 288 _RandomAccessIterator1 __last_; 289 _BinaryPredicate __pred_; 290 difference_type __pattern_length_; 291 shared_ptr<__skip_table_type> __skip_table_; 292 293 template <class _RandomAccessIterator2> 294 _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2> 295 __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const { 296 _RandomAccessIterator2 __current = __f; 297 const _RandomAccessIterator2 __last = __l - __pattern_length_; 298 const __skip_table_type& __skip_table = *__skip_table_; 299 300 while (__current <= __last) { 301 difference_type __j = __pattern_length_; 302 while (__pred_(__first_[__j - 1], __current[__j - 1])) { 303 --__j; 304 if (__j == 0) 305 return std::make_pair(__current, __current + __pattern_length_); 306 } 307 __current += __skip_table[__current[__pattern_length_ - 1]]; 308 } 309 return std::make_pair(__l, __l); 310 } 311 }; 312 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher); 313 314 _LIBCPP_END_NAMESPACE_STD 315 316 _LIBCPP_POP_MACROS 317 318 #endif // _LIBCPP_STD_VER >= 17 319 320 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H 321