1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11 
12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13 #  pragma GCC system_header
14 #endif
15 
16 #include <__algorithm/fill_n.h>
17 #include <__config>
18 #include <__functional/hash.h>
19 #include <__functional/operations.h>
20 #include <__iterator/distance.h>
21 #include <__iterator/iterator_traits.h>
22 #include <__memory/shared_ptr.h>
23 #include <__type_traits/make_unsigned.h>
24 #include <__utility/pair.h>
25 #include <array>
26 #include <unordered_map>
27 #include <vector>
28 
29 #if _LIBCPP_STD_VER >= 17
30 
31 _LIBCPP_PUSH_MACROS
32 #  include <__undef_macros>
33 
34 _LIBCPP_BEGIN_NAMESPACE_STD
35 
36 template <class _Key, class _Value, class _Hash, class _BinaryPredicate, bool /*useArray*/>
37 class _BMSkipTable;
38 
39 // General case for BM data searching; use a map
40 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
41 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
42 private:
43   using value_type = _Value;
44   using key_type   = _Key;
45 
46   const value_type __default_value_;
47   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
48 
49 public:
50   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(
51       size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
52       : __default_value_(__default_value), __table_(__sz, __hash, __pred) {}
53 
54   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) { __table_[__key] = __val; }
55 
56   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
57     auto __it = __table_.find(__key);
58     return __it == __table_.end() ? __default_value_ : __it->second;
59   }
60 };
61 
62 // Special case small numeric values; use an array
63 template <class _Key, class _Value, class _Hash, class _BinaryPredicate>
64 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
65 private:
66   using value_type = _Value;
67   using key_type   = _Key;
68 
69   using unsigned_key_type = make_unsigned_t<key_type>;
70   std::array<value_type, 256> __table_;
71   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
72 
73 public:
74   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
75     std::fill_n(__table_.data(), __table_.size(), __default_value);
76   }
77 
78   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
79     __table_[static_cast<unsigned_key_type>(__key)] = __val;
80   }
81 
82   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
83     return __table_[static_cast<unsigned_key_type>(__key)];
84   }
85 };
86 
87 template <class _RandomAccessIterator1,
88           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
89           class _BinaryPredicate = equal_to<>>
90 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher {
91 private:
92   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
93   using value_type      = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
94   using __skip_table_type =
95       _BMSkipTable<value_type,
96                    difference_type,
97                    _Hash,
98                    _BinaryPredicate,
99                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
100                        is_same_v<_BinaryPredicate, equal_to<>>>;
101 
102 public:
103   _LIBCPP_HIDE_FROM_ABI boyer_moore_searcher(
104       _RandomAccessIterator1 __first,
105       _RandomAccessIterator1 __last,
106       _Hash __hash            = _Hash(),
107       _BinaryPredicate __pred = _BinaryPredicate())
108       : __first_(__first),
109         __last_(__last),
110         __pred_(__pred),
111         __pattern_length_(__last - __first),
112         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
113         __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
114             allocator<difference_type>(), __pattern_length_ + 1)) {
115     difference_type __i = 0;
116     while (__first != __last) {
117       __skip_table_->insert(*__first, __i);
118       ++__first;
119       ++__i;
120     }
121     __build_suffix_table(__first_, __last_, __pred_);
122   }
123 
124   template <class _RandomAccessIterator2>
125   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
126   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
127     static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type,
128                                     typename iterator_traits<_RandomAccessIterator2>::value_type>::value,
129                   "Corpus and Pattern iterators must point to the same type");
130     if (__first == __last)
131       return std::make_pair(__last, __last);
132     if (__first_ == __last_)
133       return std::make_pair(__first, __first);
134 
135     if (__pattern_length_ > (__last - __first))
136       return std::make_pair(__last, __last);
137     return __search(__first, __last);
138   }
139 
140 private:
141   _RandomAccessIterator1 __first_;
142   _RandomAccessIterator1 __last_;
143   _BinaryPredicate __pred_;
144   difference_type __pattern_length_;
145   shared_ptr<__skip_table_type> __skip_table_;
146   shared_ptr<difference_type[]> __suffix_;
147 
148   template <class _RandomAccessIterator2>
149   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
150   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
151     _RandomAccessIterator2 __current      = __f;
152     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
153     const __skip_table_type& __skip_table = *__skip_table_;
154 
155     while (__current <= __last) {
156       difference_type __j = __pattern_length_;
157       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
158         --__j;
159         if (__j == 0)
160           return std::make_pair(__current, __current + __pattern_length_);
161       }
162 
163       difference_type __k = __skip_table[__current[__j - 1]];
164       difference_type __m = __j - __k - 1;
165       if (__k < __j && __m > __suffix_[__j])
166         __current += __m;
167       else
168         __current += __suffix_[__j];
169     }
170     return std::make_pair(__l, __l);
171   }
172 
173   template <class _Iterator, class _Container>
174   _LIBCPP_HIDE_FROM_ABI void
175   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
176     const size_t __count = __last - __first;
177 
178     __prefix[0] = 0;
179     size_t __k  = 0;
180 
181     for (size_t __i = 1; __i != __count; ++__i) {
182       while (__k > 0 && !__pred(__first[__k], __first[__i]))
183         __k = __prefix[__k - 1];
184 
185       if (__pred(__first[__k], __first[__i]))
186         ++__k;
187       __prefix[__i] = __k;
188     }
189   }
190 
191   _LIBCPP_HIDE_FROM_ABI void
192   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
193     const size_t __count = __last - __first;
194 
195     if (__count == 0)
196       return;
197 
198     vector<difference_type> __scratch(__count);
199 
200     __compute_bm_prefix(__first, __last, __pred, __scratch);
201     for (size_t __i = 0; __i <= __count; ++__i)
202       __suffix_[__i] = __count - __scratch[__count - 1];
203 
204     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
205     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
206 
207     for (size_t __i = 0; __i != __count; ++__i) {
208       const size_t __j          = __count - __scratch[__i];
209       const difference_type __k = __i - __scratch[__i] + 1;
210 
211       if (__suffix_[__j] > __k)
212         __suffix_[__j] = __k;
213     }
214   }
215 };
216 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
217 
218 template <class _RandomAccessIterator1,
219           class _Hash            = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
220           class _BinaryPredicate = equal_to<>>
221 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher {
222 private:
223   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
224   using value_type      = typename iterator_traits<_RandomAccessIterator1>::value_type;
225   using __skip_table_type =
226       _BMSkipTable<value_type,
227                    difference_type,
228                    _Hash,
229                    _BinaryPredicate,
230                    is_integral_v<value_type> && sizeof(value_type) == 1 && is_same_v<_Hash, hash<value_type>> &&
231                        is_same_v<_BinaryPredicate, equal_to<>>>;
232 
233 public:
234   _LIBCPP_HIDE_FROM_ABI boyer_moore_horspool_searcher(
235       _RandomAccessIterator1 __first,
236       _RandomAccessIterator1 __last,
237       _Hash __hash            = _Hash(),
238       _BinaryPredicate __pred = _BinaryPredicate())
239       : __first_(__first),
240         __last_(__last),
241         __pred_(__pred),
242         __pattern_length_(__last - __first),
243         __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
244     if (__first == __last)
245       return;
246     --__last;
247     difference_type __i = 0;
248     while (__first != __last) {
249       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
250       ++__first;
251       ++__i;
252     }
253   }
254 
255   template <class _RandomAccessIterator2>
256   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
257   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
258     static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type,
259                                     typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value,
260                   "Corpus and Pattern iterators must point to the same type");
261     if (__first == __last)
262       return std::make_pair(__last, __last);
263     if (__first_ == __last_)
264       return std::make_pair(__first, __first);
265 
266     if (__pattern_length_ > __last - __first)
267       return std::make_pair(__last, __last);
268 
269     return __search(__first, __last);
270   }
271 
272 private:
273   _RandomAccessIterator1 __first_;
274   _RandomAccessIterator1 __last_;
275   _BinaryPredicate __pred_;
276   difference_type __pattern_length_;
277   shared_ptr<__skip_table_type> __skip_table_;
278 
279   template <class _RandomAccessIterator2>
280   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
281   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
282     _RandomAccessIterator2 __current      = __f;
283     const _RandomAccessIterator2 __last   = __l - __pattern_length_;
284     const __skip_table_type& __skip_table = *__skip_table_;
285 
286     while (__current <= __last) {
287       difference_type __j = __pattern_length_;
288       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
289         --__j;
290         if (__j == 0)
291           return std::make_pair(__current, __current + __pattern_length_);
292       }
293       __current += __skip_table[__current[__pattern_length_ - 1]];
294     }
295     return std::make_pair(__l, __l);
296   }
297 };
298 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
299 
300 _LIBCPP_END_NAMESPACE_STD
301 
302 _LIBCPP_POP_MACROS
303 
304 #endif // _LIBCPP_STD_VER >= 17
305 
306 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
307