1 use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
2 
3 mod fallback;
4 #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
5 mod genericsimd;
6 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
7 mod x86;
8 
9 /// The maximum frequency rank permitted for the fallback prefilter. If the
10 /// rarest byte in the needle has a frequency rank above this value, then no
11 /// prefilter is used if the fallback prefilter would otherwise be selected.
12 const MAX_FALLBACK_RANK: usize = 250;
13 
14 /// A combination of prefilter effectiveness state, the prefilter function and
15 /// the needle info required to run a prefilter.
16 ///
17 /// For the most part, these are grouped into a single type for convenience,
18 /// instead of needing to pass around all three as distinct function
19 /// parameters.
20 pub(crate) struct Pre<'a> {
21     /// State that tracks the effectiveness of a prefilter.
22     pub(crate) state: &'a mut PrefilterState,
23     /// The actual prefilter function.
24     pub(crate) prefn: PrefilterFn,
25     /// Information about a needle, such as its RK hash and rare byte offsets.
26     pub(crate) ninfo: &'a NeedleInfo,
27 }
28 
29 impl<'a> Pre<'a> {
30     /// Call this prefilter on the given haystack with the given needle.
31     #[inline(always)]
call( &mut self, haystack: &[u8], needle: &[u8], ) -> Option<usize>32     pub(crate) fn call(
33         &mut self,
34         haystack: &[u8],
35         needle: &[u8],
36     ) -> Option<usize> {
37         self.prefn.call(self.state, self.ninfo, haystack, needle)
38     }
39 
40     /// Return true if and only if this prefilter should be used.
41     #[inline(always)]
should_call(&mut self) -> bool42     pub(crate) fn should_call(&mut self) -> bool {
43         self.state.is_effective()
44     }
45 }
46 
47 /// A prefilter function.
48 ///
49 /// A prefilter function describes both forward and reverse searches.
50 /// (Although, we don't currently implement prefilters for reverse searching.)
51 /// In the case of a forward search, the position returned corresponds to
52 /// the starting offset of a match (confirmed or possible). Its minimum
53 /// value is `0`, and its maximum value is `haystack.len() - 1`. In the case
54 /// of a reverse search, the position returned corresponds to the position
55 /// immediately after a match (confirmed or possible). Its minimum value is `1`
56 /// and its maximum value is `haystack.len()`.
57 ///
58 /// In both cases, the position returned is the starting (or ending) point of a
59 /// _possible_ match. That is, returning a false positive is okay. A prefilter,
60 /// however, must never return any false negatives. That is, if a match exists
61 /// at a particular position `i`, then a prefilter _must_ return that position.
62 /// It cannot skip past it.
63 ///
64 /// # Safety
65 ///
66 /// A prefilter function is not safe to create, since not all prefilters are
67 /// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions
68 /// may only be called on x86_64 CPUs with the relevant AVX feature enabled.)
69 /// Thus, callers must ensure that when a prefilter function is created that it
70 /// is safe to call for the current environment.
71 #[derive(Clone, Copy)]
72 pub(crate) struct PrefilterFn(PrefilterFnTy);
73 
74 /// The type of a prefilter function. All prefilters must satisfy this
75 /// signature.
76 ///
77 /// Using a function pointer like this does inhibit inlining, but it does
78 /// eliminate branching and the extra costs associated with copying a larger
79 /// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work
80 /// here, since we want to work in contexts that don't have dynamic memory
81 /// allocation. Moreover, in the default configuration of this crate on x86_64
82 /// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter,
83 /// which generally won't be inlineable into the surrounding code anyway.
84 /// (Unless AVX2 is enabled at compile time, but this is typically rare, since
85 /// it produces a non-portable binary.)
86 pub(crate) type PrefilterFnTy = unsafe fn(
87     prestate: &mut PrefilterState,
88     ninfo: &NeedleInfo,
89     haystack: &[u8],
90     needle: &[u8],
91 ) -> Option<usize>;
92 
93 impl PrefilterFn {
94     /// Create a new prefilter function from the function pointer given.
95     ///
96     /// # Safety
97     ///
98     /// Callers must ensure that the given prefilter function is safe to call
99     /// for all inputs in the current environment. For example, if the given
100     /// prefilter function uses AVX instructions, then the caller must ensure
101     /// that the appropriate AVX CPU features are enabled.
new(prefn: PrefilterFnTy) -> PrefilterFn102     pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn {
103         PrefilterFn(prefn)
104     }
105 
106     /// Call the underlying prefilter function with the given arguments.
call( self, prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option<usize>107     pub fn call(
108         self,
109         prestate: &mut PrefilterState,
110         ninfo: &NeedleInfo,
111         haystack: &[u8],
112         needle: &[u8],
113     ) -> Option<usize> {
114         // SAFETY: Callers have the burden of ensuring that a prefilter
115         // function is safe to call for all inputs in the current environment.
116         unsafe { (self.0)(prestate, ninfo, haystack, needle) }
117     }
118 }
119 
120 impl core::fmt::Debug for PrefilterFn {
fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result121     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
122         "<prefilter-fn(...)>".fmt(f)
123     }
124 }
125 
126 /// Prefilter controls whether heuristics are used to accelerate searching.
127 ///
128 /// A prefilter refers to the idea of detecting candidate matches very quickly,
129 /// and then confirming whether those candidates are full matches. This
130 /// idea can be quite effective since it's often the case that looking for
131 /// candidates can be a lot faster than running a complete substring search
132 /// over the entire input. Namely, looking for candidates can be done with
133 /// extremely fast vectorized code.
134 ///
135 /// The downside of a prefilter is that it assumes false positives (which are
136 /// candidates generated by a prefilter that aren't matches) are somewhat rare
137 /// relative to the frequency of full matches. That is, if a lot of false
138 /// positives are generated, then it's possible for search time to be worse
139 /// than if the prefilter wasn't enabled in the first place.
140 ///
141 /// Another downside of a prefilter is that it can result in highly variable
142 /// performance, where some cases are extraordinarily fast and others aren't.
143 /// Typically, variable performance isn't a problem, but it may be for your use
144 /// case.
145 ///
146 /// The use of prefilters in this implementation does use a heuristic to detect
147 /// when a prefilter might not be carrying its weight, and will dynamically
148 /// disable its use. Nevertheless, this configuration option gives callers
149 /// the ability to disable prefilters if you have knowledge that they won't be
150 /// useful.
151 #[derive(Clone, Copy, Debug)]
152 #[non_exhaustive]
153 pub enum Prefilter {
154     /// Never used a prefilter in substring search.
155     None,
156     /// Automatically detect whether a heuristic prefilter should be used. If
157     /// it is used, then heuristics will be used to dynamically disable the
158     /// prefilter if it is believed to not be carrying its weight.
159     Auto,
160 }
161 
162 impl Default for Prefilter {
default() -> Prefilter163     fn default() -> Prefilter {
164         Prefilter::Auto
165     }
166 }
167 
168 impl Prefilter {
is_none(&self) -> bool169     pub(crate) fn is_none(&self) -> bool {
170         match *self {
171             Prefilter::None => true,
172             _ => false,
173         }
174     }
175 }
176 
177 /// PrefilterState tracks state associated with the effectiveness of a
178 /// prefilter. It is used to track how many bytes, on average, are skipped by
179 /// the prefilter. If this average dips below a certain threshold over time,
180 /// then the state renders the prefilter inert and stops using it.
181 ///
182 /// A prefilter state should be created for each search. (Where creating an
183 /// iterator is treated as a single search.) A prefilter state should only be
184 /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
185 /// `PrefilterState`.
186 #[derive(Clone, Debug)]
187 pub(crate) struct PrefilterState {
188     /// The number of skips that has been executed. This is always 1 greater
189     /// than the actual number of skips. The special sentinel value of 0
190     /// indicates that the prefilter is inert. This is useful to avoid
191     /// additional checks to determine whether the prefilter is still
192     /// "effective." Once a prefilter becomes inert, it should no longer be
193     /// used (according to our heuristics).
194     skips: u32,
195     /// The total number of bytes that have been skipped.
196     skipped: u32,
197 }
198 
199 impl PrefilterState {
200     /// The minimum number of skip attempts to try before considering whether
201     /// a prefilter is effective or not.
202     const MIN_SKIPS: u32 = 50;
203 
204     /// The minimum amount of bytes that skipping must average.
205     ///
206     /// This value was chosen based on varying it and checking
207     /// the microbenchmarks. In particular, this can impact the
208     /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
209     /// too low.
210     const MIN_SKIP_BYTES: u32 = 8;
211 
212     /// Create a fresh prefilter state.
new() -> PrefilterState213     pub(crate) fn new() -> PrefilterState {
214         PrefilterState { skips: 1, skipped: 0 }
215     }
216 
217     /// Create a fresh prefilter state that is always inert.
inert() -> PrefilterState218     pub(crate) fn inert() -> PrefilterState {
219         PrefilterState { skips: 0, skipped: 0 }
220     }
221 
222     /// Update this state with the number of bytes skipped on the last
223     /// invocation of the prefilter.
224     #[inline]
update(&mut self, skipped: usize)225     pub(crate) fn update(&mut self, skipped: usize) {
226         self.skips = self.skips.saturating_add(1);
227         // We need to do this dance since it's technically possible for
228         // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
229         // size of a prefilter state.)
230         if skipped > core::u32::MAX as usize {
231             self.skipped = core::u32::MAX;
232         } else {
233             self.skipped = self.skipped.saturating_add(skipped as u32);
234         }
235     }
236 
237     /// Return true if and only if this state indicates that a prefilter is
238     /// still effective.
239     #[inline]
is_effective(&mut self) -> bool240     pub(crate) fn is_effective(&mut self) -> bool {
241         if self.is_inert() {
242             return false;
243         }
244         if self.skips() < PrefilterState::MIN_SKIPS {
245             return true;
246         }
247         if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
248             return true;
249         }
250 
251         // We're inert.
252         self.skips = 0;
253         false
254     }
255 
256     #[inline]
is_inert(&self) -> bool257     fn is_inert(&self) -> bool {
258         self.skips == 0
259     }
260 
261     #[inline]
skips(&self) -> u32262     fn skips(&self) -> u32 {
263         self.skips.saturating_sub(1)
264     }
265 }
266 
267 /// Determine which prefilter function, if any, to use.
268 ///
269 /// This only applies to x86_64 when runtime SIMD detection is enabled (which
270 /// is the default). In general, we try to use an AVX prefilter, followed by
271 /// SSE and then followed by a generic one based on memchr.
272 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
273 #[inline(always)]
forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option<PrefilterFn>274 pub(crate) fn forward(
275     config: &Prefilter,
276     rare: &RareNeedleBytes,
277     needle: &[u8],
278 ) -> Option<PrefilterFn> {
279     if config.is_none() || needle.len() <= 1 {
280         return None;
281     }
282 
283     #[cfg(feature = "std")]
284     {
285         if cfg!(memchr_runtime_avx) {
286             if is_x86_feature_detected!("avx2") {
287                 // SAFETY: x86::avx::find only requires the avx2 feature,
288                 // which we've just checked above.
289                 return unsafe { Some(PrefilterFn::new(x86::avx::find)) };
290             }
291         }
292     }
293     if cfg!(memchr_runtime_sse2) {
294         // SAFETY: x86::sse::find only requires the sse2 feature, which is
295         // guaranteed to be available on x86_64.
296         return unsafe { Some(PrefilterFn::new(x86::sse::find)) };
297     }
298     // Check that our rarest byte has a reasonably low rank. The main issue
299     // here is that the fallback prefilter can perform pretty poorly if it's
300     // given common bytes. So we try to avoid the worst cases here.
301     let (rare1_rank, _) = rare.as_ranks(needle);
302     if rare1_rank <= MAX_FALLBACK_RANK {
303         // SAFETY: fallback::find is safe to call in all environments.
304         return unsafe { Some(PrefilterFn::new(fallback::find)) };
305     }
306     None
307 }
308 
309 /// Determine which prefilter function, if any, to use.
310 ///
311 /// Since SIMD is currently only supported on x86_64, this will just select
312 /// the fallback prefilter if the rare bytes provided have a low enough rank.
313 #[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))]
314 #[inline(always)]
forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option<PrefilterFn>315 pub(crate) fn forward(
316     config: &Prefilter,
317     rare: &RareNeedleBytes,
318     needle: &[u8],
319 ) -> Option<PrefilterFn> {
320     if config.is_none() || needle.len() <= 1 {
321         return None;
322     }
323     let (rare1_rank, _) = rare.as_ranks(needle);
324     if rare1_rank <= MAX_FALLBACK_RANK {
325         // SAFETY: fallback::find is safe to call in all environments.
326         return unsafe { Some(PrefilterFn::new(fallback::find)) };
327     }
328     None
329 }
330 
331 /// Return the minimum length of the haystack in which a prefilter should be
332 /// used. If the haystack is below this length, then it's probably not worth
333 /// the overhead of running the prefilter.
334 ///
335 /// We used to look at the length of a haystack here. That is, if it was too
336 /// small, then don't bother with the prefilter. But two things changed:
337 /// the prefilter falls back to memchr for small haystacks, and, at the
338 /// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway.
339 ///
340 /// We keep it around for now in case we want to bring it back.
341 #[allow(dead_code)]
minimum_len(_haystack: &[u8], needle: &[u8]) -> usize342 pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize {
343     // If the haystack length isn't greater than needle.len() * FACTOR, then
344     // no prefilter will be used. The presumption here is that since there
345     // are so few bytes to check, it's not worth running the prefilter since
346     // there will need to be a validation step anyway. Thus, the prefilter is
347     // largely redundant work.
348     //
349     // Increase the factor noticeably hurts the
350     // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks.
351     const PREFILTER_LENGTH_FACTOR: usize = 2;
352     const VECTOR_MIN_LENGTH: usize = 16;
353     let min = core::cmp::max(
354         VECTOR_MIN_LENGTH,
355         PREFILTER_LENGTH_FACTOR * needle.len(),
356     );
357     // For haystacks with length==min, we still want to avoid the prefilter,
358     // so add 1.
359     min + 1
360 }
361 
362 #[cfg(all(test, feature = "std", not(miri)))]
363 pub(crate) mod tests {
364     use std::convert::{TryFrom, TryInto};
365 
366     use super::*;
367     use crate::memmem::{
368         prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes,
369     };
370 
371     // Below is a small jig that generates prefilter tests. The main purpose
372     // of this jig is to generate tests of varying needle/haystack lengths
373     // in order to try and exercise all code paths in our prefilters. And in
374     // particular, this is especially important for vectorized prefilters where
375     // certain code paths might only be exercised at certain lengths.
376 
377     /// A test that represents the input and expected output to a prefilter
378     /// function. The test should be able to run with any prefilter function
379     /// and get the expected output.
380     pub(crate) struct PrefilterTest {
381         // These fields represent the inputs and expected output of a forwards
382         // prefilter function.
383         pub(crate) ninfo: NeedleInfo,
384         pub(crate) haystack: Vec<u8>,
385         pub(crate) needle: Vec<u8>,
386         pub(crate) output: Option<usize>,
387     }
388 
389     impl PrefilterTest {
390         /// Run all generated forward prefilter tests on the given prefn.
391         ///
392         /// # Safety
393         ///
394         /// Callers must ensure that the given prefilter function pointer is
395         /// safe to call for all inputs in the current environment.
run_all_tests(prefn: PrefilterFnTy)396         pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) {
397             PrefilterTest::run_all_tests_filter(prefn, |_| true)
398         }
399 
400         /// Run all generated forward prefilter tests that pass the given
401         /// predicate on the given prefn.
402         ///
403         /// # Safety
404         ///
405         /// Callers must ensure that the given prefilter function pointer is
406         /// safe to call for all inputs in the current environment.
run_all_tests_filter( prefn: PrefilterFnTy, mut predicate: impl FnMut(&PrefilterTest) -> bool, )407         pub(crate) unsafe fn run_all_tests_filter(
408             prefn: PrefilterFnTy,
409             mut predicate: impl FnMut(&PrefilterTest) -> bool,
410         ) {
411             for seed in PREFILTER_TEST_SEEDS {
412                 for test in seed.generate() {
413                     if predicate(&test) {
414                         test.run(prefn);
415                     }
416                 }
417             }
418         }
419 
420         /// Create a new prefilter test from a seed and some chose offsets to
421         /// rare bytes in the seed's needle.
422         ///
423         /// If a valid test could not be constructed, then None is returned.
424         /// (Currently, we take the approach of massaging tests to be valid
425         /// instead of rejecting them outright.)
new( seed: &PrefilterTestSeed, rare1i: usize, rare2i: usize, haystack_len: usize, needle_len: usize, output: Option<usize>, ) -> Option<PrefilterTest>426         fn new(
427             seed: &PrefilterTestSeed,
428             rare1i: usize,
429             rare2i: usize,
430             haystack_len: usize,
431             needle_len: usize,
432             output: Option<usize>,
433         ) -> Option<PrefilterTest> {
434             let mut rare1i: u8 = rare1i.try_into().unwrap();
435             let mut rare2i: u8 = rare2i.try_into().unwrap();
436             // The '#' byte is never used in a haystack (unless we're expecting
437             // a match), while the '@' byte is never used in a needle.
438             let mut haystack = vec![b'@'; haystack_len];
439             let mut needle = vec![b'#'; needle_len];
440             needle[0] = seed.first;
441             needle[rare1i as usize] = seed.rare1;
442             needle[rare2i as usize] = seed.rare2;
443             // If we're expecting a match, then make sure the needle occurs
444             // in the haystack at the expected position.
445             if let Some(i) = output {
446                 haystack[i..i + needle.len()].copy_from_slice(&needle);
447             }
448             // If the operations above lead to rare offsets pointing to the
449             // non-first occurrence of a byte, then adjust it. This might lead
450             // to redundant tests, but it's simpler than trying to change the
451             // generation process I think.
452             if let Some(i) = crate::memchr(seed.rare1, &needle) {
453                 rare1i = u8::try_from(i).unwrap();
454             }
455             if let Some(i) = crate::memchr(seed.rare2, &needle) {
456                 rare2i = u8::try_from(i).unwrap();
457             }
458             let ninfo = NeedleInfo {
459                 rarebytes: RareNeedleBytes::new(rare1i, rare2i),
460                 nhash: rabinkarp::NeedleHash::forward(&needle),
461             };
462             Some(PrefilterTest { ninfo, haystack, needle, output })
463         }
464 
465         /// Run this specific test on the given prefilter function. If the
466         /// outputs do no match, then this routine panics with a failure
467         /// message.
468         ///
469         /// # Safety
470         ///
471         /// Callers must ensure that the given prefilter function pointer is
472         /// safe to call for all inputs in the current environment.
run(&self, prefn: PrefilterFnTy)473         unsafe fn run(&self, prefn: PrefilterFnTy) {
474             let mut prestate = PrefilterState::new();
475             assert_eq!(
476                 self.output,
477                 prefn(
478                     &mut prestate,
479                     &self.ninfo,
480                     &self.haystack,
481                     &self.needle
482                 ),
483                 "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}",
484                 self.ninfo,
485                 self.haystack.len(),
486                 std::str::from_utf8(&self.haystack).unwrap(),
487                 self.needle.len(),
488                 std::str::from_utf8(&self.needle).unwrap(),
489             );
490         }
491     }
492 
493     /// A set of prefilter test seeds. Each seed serves as the base for the
494     /// generation of many other tests. In essence, the seed captures the
495     /// "rare" and first bytes among our needle. The tests generated from each
496     /// seed essentially vary the length of the needle and haystack, while
497     /// using the rare/first byte configuration from the seed.
498     ///
499     /// The purpose of this is to test many different needle/haystack lengths.
500     /// In particular, some of the vector optimizations might only have bugs
501     /// in haystacks of a certain size.
502     const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[
503         PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' },
504         PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' },
505         PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' },
506         PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' },
507         PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' },
508     ];
509 
510     /// Data that describes a single prefilter test seed.
511     struct PrefilterTestSeed {
512         first: u8,
513         rare1: u8,
514         rare2: u8,
515     }
516 
517     impl PrefilterTestSeed {
518         /// Generate a series of prefilter tests from this seed.
generate(&self) -> Vec<PrefilterTest>519         fn generate(&self) -> Vec<PrefilterTest> {
520             let mut tests = vec![];
521             let mut push = |test: Option<PrefilterTest>| {
522                 if let Some(test) = test {
523                     tests.push(test);
524                 }
525             };
526             let len_start = 2;
527             // The loop below generates *a lot* of tests. The number of tests
528             // was chosen somewhat empirically to be "bearable" when running
529             // the test suite.
530             for needle_len in len_start..=40 {
531                 let rare_start = len_start - 1;
532                 for rare1i in rare_start..needle_len {
533                     for rare2i in rare1i..needle_len {
534                         for haystack_len in needle_len..=66 {
535                             push(PrefilterTest::new(
536                                 self,
537                                 rare1i,
538                                 rare2i,
539                                 haystack_len,
540                                 needle_len,
541                                 None,
542                             ));
543                             // Test all possible match scenarios for this
544                             // needle and haystack.
545                             for output in 0..=(haystack_len - needle_len) {
546                                 push(PrefilterTest::new(
547                                     self,
548                                     rare1i,
549                                     rare2i,
550                                     haystack_len,
551                                     needle_len,
552                                     Some(output),
553                                 ));
554                             }
555                         }
556                     }
557                 }
558             }
559             tests
560         }
561     }
562 }
563