1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
10 #define _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
11 
12 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
13 #  pragma GCC system_header
14 #endif
15 
16 #include <__algorithm/fill_n.h>
17 #include <__config>
18 #include <__functional/hash.h>
19 #include <__functional/operations.h>
20 #include <__iterator/distance.h>
21 #include <__iterator/iterator_traits.h>
22 #include <__memory/shared_ptr.h>
23 #include <__type_traits/make_unsigned.h>
24 #include <__utility/pair.h>
25 #include <array>
26 #include <unordered_map>
27 #include <vector>
28 
29 #if _LIBCPP_STD_VER >= 17
30 
31 _LIBCPP_PUSH_MACROS
32 #include <__undef_macros>
33 
34 _LIBCPP_BEGIN_NAMESPACE_STD
35 
36 template <class _Key,
37           class _Value,
38           class _Hash,
39           class _BinaryPredicate,
40           bool /*useArray*/>
41 class _BMSkipTable;
42 
43 // General case for BM data searching; use a map
44 template <class _Key,
45           class _Value,
46           class _Hash,
47           class _BinaryPredicate>
48 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, false> {
49 private:
50   using value_type = _Value;
51   using key_type = _Key;
52 
53   const value_type __default_value_;
54   unordered_map<_Key, _Value, _Hash, _BinaryPredicate> __table_;
55 
56 public:
57   _LIBCPP_HIDE_FROM_ABI
58   explicit _BMSkipTable(size_t __sz, value_type __default_value, _Hash __hash, _BinaryPredicate __pred)
59       : __default_value_(__default_value),
60         __table_(__sz, __hash, __pred) {}
61 
62   _LIBCPP_HIDE_FROM_ABI void insert(const key_type& __key, value_type __val) {
63     __table_[__key] = __val;
64   }
65 
66   _LIBCPP_HIDE_FROM_ABI value_type operator[](const key_type& __key) const {
67     auto __it = __table_.find(__key);
68     return __it == __table_.end() ? __default_value_ : __it->second;
69   }
70 };
71 
72 // Special case small numeric values; use an array
73 template <class _Key,
74           class _Value,
75           class _Hash,
76           class _BinaryPredicate>
77 class _BMSkipTable<_Key, _Value, _Hash, _BinaryPredicate, true> {
78 private:
79   using value_type = _Value;
80   using key_type = _Key;
81 
82   using unsigned_key_type = make_unsigned_t<key_type>;
83   std::array<value_type, 256> __table_;
84   static_assert(numeric_limits<unsigned_key_type>::max() < 256);
85 
86 public:
87   _LIBCPP_HIDE_FROM_ABI explicit _BMSkipTable(size_t, value_type __default_value, _Hash, _BinaryPredicate) {
88     std::fill_n(__table_.data(), __table_.size(), __default_value);
89   }
90 
91   _LIBCPP_HIDE_FROM_ABI void insert(key_type __key, value_type __val) {
92     __table_[static_cast<unsigned_key_type>(__key)] = __val;
93   }
94 
95   _LIBCPP_HIDE_FROM_ABI value_type operator[](key_type __key) const {
96     return __table_[static_cast<unsigned_key_type>(__key)];
97   }
98 };
99 
100 template <class _RandomAccessIterator1,
101           class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
102           class _BinaryPredicate = equal_to<>>
103 class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher {
104 private:
105   using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type;
106   using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
107   using __skip_table_type = _BMSkipTable<value_type,
108                                          difference_type,
109                                          _Hash,
110                                          _BinaryPredicate,
111                                          is_integral_v<value_type>
112                                       && sizeof(value_type) == 1
113                                       && is_same_v<_Hash, hash<value_type>>
114                                       && is_same_v<_BinaryPredicate, equal_to<>>>;
115 
116 public:
117   _LIBCPP_HIDE_FROM_ABI
118   boyer_moore_searcher(_RandomAccessIterator1 __first,
119                        _RandomAccessIterator1 __last,
120                        _Hash __hash = _Hash(),
121                        _BinaryPredicate __pred = _BinaryPredicate())
122     : __first_(__first),
123       __last_(__last),
124       __pred_(__pred),
125       __pattern_length_(__last - __first),
126       __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, -1, __hash, __pred_)),
127       __suffix_(std::__allocate_shared_unbounded_array<difference_type[]>(
128           allocator<difference_type>(), __pattern_length_ + 1)) {
129     difference_type __i = 0;
130     while (__first != __last) {
131       __skip_table_->insert(*__first, __i);
132       ++__first;
133       ++__i;
134     }
135     __build_suffix_table(__first_, __last_, __pred_);
136   }
137 
138   template <class _RandomAccessIterator2>
139   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
140   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
141     static_assert(__is_same_uncvref<typename iterator_traits<_RandomAccessIterator1>::value_type,
142                                     typename iterator_traits<_RandomAccessIterator2>::value_type>::value,
143                   "Corpus and Pattern iterators must point to the same type");
144     if (__first == __last)
145       return std::make_pair(__last, __last);
146     if (__first_ == __last_)
147       return std::make_pair(__first, __first);
148 
149     if (__pattern_length_ > (__last - __first))
150       return std::make_pair(__last, __last);
151     return __search(__first, __last);
152   }
153 
154 private:
155   _RandomAccessIterator1 __first_;
156   _RandomAccessIterator1 __last_;
157   _BinaryPredicate __pred_;
158   difference_type __pattern_length_;
159   shared_ptr<__skip_table_type> __skip_table_;
160   shared_ptr<difference_type[]> __suffix_;
161 
162   template <class _RandomAccessIterator2>
163   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
164   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
165     _RandomAccessIterator2 __current = __f;
166     const _RandomAccessIterator2 __last = __l - __pattern_length_;
167     const __skip_table_type& __skip_table = *__skip_table_;
168 
169     while (__current <= __last) {
170       difference_type __j = __pattern_length_;
171       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
172         --__j;
173         if (__j == 0)
174           return std::make_pair(__current, __current + __pattern_length_);
175       }
176 
177       difference_type __k = __skip_table[__current[__j - 1]];
178       difference_type __m = __j - __k - 1;
179       if (__k < __j && __m > __suffix_[__j])
180         __current += __m;
181       else
182         __current += __suffix_[__j];
183     }
184     return std::make_pair(__l, __l);
185   }
186 
187   template <class _Iterator, class _Container>
188   _LIBCPP_HIDE_FROM_ABI void
189   __compute_bm_prefix(_Iterator __first, _Iterator __last, _BinaryPredicate __pred, _Container& __prefix) {
190     const size_t __count = __last - __first;
191 
192     __prefix[0] = 0;
193     size_t __k = 0;
194 
195     for (size_t __i = 1; __i != __count; ++__i) {
196       while (__k > 0 && !__pred(__first[__k], __first[__i]))
197         __k = __prefix[__k - 1];
198 
199       if (__pred(__first[__k], __first[__i]))
200         ++__k;
201       __prefix[__i] = __k;
202     }
203   }
204 
205   _LIBCPP_HIDE_FROM_ABI void
206   __build_suffix_table(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _BinaryPredicate __pred) {
207     const size_t __count = __last - __first;
208 
209     if (__count == 0)
210       return;
211 
212     vector<difference_type> __scratch(__count);
213 
214     __compute_bm_prefix(__first, __last, __pred, __scratch);
215     for (size_t __i = 0; __i <= __count; ++__i)
216       __suffix_[__i] = __count - __scratch[__count - 1];
217 
218     using _ReverseIter = reverse_iterator<_RandomAccessIterator1>;
219     __compute_bm_prefix(_ReverseIter(__last), _ReverseIter(__first), __pred, __scratch);
220 
221     for (size_t __i = 0; __i != __count; ++__i) {
222       const size_t __j = __count - __scratch[__i];
223       const difference_type __k = __i - __scratch[__i] + 1;
224 
225       if (__suffix_[__j] > __k)
226         __suffix_[__j] = __k;
227     }
228   }
229 };
230 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_searcher);
231 
232 template <class _RandomAccessIterator1,
233           class _Hash = hash<typename iterator_traits<_RandomAccessIterator1>::value_type>,
234           class _BinaryPredicate = equal_to<>>
235 class _LIBCPP_TEMPLATE_VIS boyer_moore_horspool_searcher {
236 private:
237   using difference_type = typename iterator_traits<_RandomAccessIterator1>::difference_type;
238   using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type;
239   using __skip_table_type = _BMSkipTable<value_type,
240                                          difference_type,
241                                          _Hash,
242                                          _BinaryPredicate,
243                                          is_integral_v<value_type>
244                                       && sizeof(value_type) == 1
245                                       && is_same_v<_Hash, hash<value_type>>
246                                       && is_same_v<_BinaryPredicate, equal_to<>>>;
247 public:
248   _LIBCPP_HIDE_FROM_ABI
249   boyer_moore_horspool_searcher(_RandomAccessIterator1 __first,
250                                 _RandomAccessIterator1 __last,
251                                 _Hash __hash = _Hash(),
252                                 _BinaryPredicate __pred = _BinaryPredicate())
253     : __first_(__first),
254       __last_(__last),
255       __pred_(__pred),
256       __pattern_length_(__last - __first),
257       __skip_table_(std::make_shared<__skip_table_type>(__pattern_length_, __pattern_length_, __hash, __pred_)) {
258     if (__first == __last)
259       return;
260     --__last;
261     difference_type __i = 0;
262     while (__first != __last) {
263       __skip_table_->insert(*__first, __pattern_length_ - 1 - __i);
264       ++__first;
265       ++__i;
266     }
267   }
268 
269   template <class _RandomAccessIterator2>
270   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
271   operator()(_RandomAccessIterator2 __first, _RandomAccessIterator2 __last) const {
272     static_assert(__is_same_uncvref<typename std::iterator_traits<_RandomAccessIterator1>::value_type,
273                                     typename std::iterator_traits<_RandomAccessIterator2>::value_type>::value,
274                   "Corpus and Pattern iterators must point to the same type");
275     if (__first == __last)
276       return std::make_pair(__last, __last);
277     if (__first_ == __last_)
278       return std::make_pair(__first, __first);
279 
280     if (__pattern_length_ > __last - __first)
281       return std::make_pair(__last, __last);
282 
283     return __search(__first, __last);
284   }
285 
286 private:
287   _RandomAccessIterator1 __first_;
288   _RandomAccessIterator1 __last_;
289   _BinaryPredicate __pred_;
290   difference_type __pattern_length_;
291   shared_ptr<__skip_table_type> __skip_table_;
292 
293   template <class _RandomAccessIterator2>
294   _LIBCPP_HIDE_FROM_ABI pair<_RandomAccessIterator2, _RandomAccessIterator2>
295   __search(_RandomAccessIterator2 __f, _RandomAccessIterator2 __l) const {
296     _RandomAccessIterator2 __current = __f;
297     const _RandomAccessIterator2 __last = __l - __pattern_length_;
298     const __skip_table_type& __skip_table = *__skip_table_;
299 
300     while (__current <= __last) {
301       difference_type __j = __pattern_length_;
302       while (__pred_(__first_[__j - 1], __current[__j - 1])) {
303         --__j;
304         if (__j == 0)
305           return std::make_pair(__current, __current + __pattern_length_);
306       }
307       __current += __skip_table[__current[__pattern_length_ - 1]];
308     }
309     return std::make_pair(__l, __l);
310   }
311 };
312 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(boyer_moore_horspool_searcher);
313 
314 _LIBCPP_END_NAMESPACE_STD
315 
316 _LIBCPP_POP_MACROS
317 
318 #endif // _LIBCPP_STD_VER >= 17
319 
320 #endif // _LIBCPP___FUNCTIONAL_BOYER_MOORE_SEARCHER_H
321