1 use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
2
3 mod fallback;
4 #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))]
5 mod genericsimd;
6 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
7 mod x86;
8
9 /// The maximum frequency rank permitted for the fallback prefilter. If the
10 /// rarest byte in the needle has a frequency rank above this value, then no
11 /// prefilter is used if the fallback prefilter would otherwise be selected.
12 const MAX_FALLBACK_RANK: usize = 250;
13
14 /// A combination of prefilter effectiveness state, the prefilter function and
15 /// the needle info required to run a prefilter.
16 ///
17 /// For the most part, these are grouped into a single type for convenience,
18 /// instead of needing to pass around all three as distinct function
19 /// parameters.
20 pub(crate) struct Pre<'a> {
21 /// State that tracks the effectiveness of a prefilter.
22 pub(crate) state: &'a mut PrefilterState,
23 /// The actual prefilter function.
24 pub(crate) prefn: PrefilterFn,
25 /// Information about a needle, such as its RK hash and rare byte offsets.
26 pub(crate) ninfo: &'a NeedleInfo,
27 }
28
29 impl<'a> Pre<'a> {
30 /// Call this prefilter on the given haystack with the given needle.
31 #[inline(always)]
call( &mut self, haystack: &[u8], needle: &[u8], ) -> Option<usize>32 pub(crate) fn call(
33 &mut self,
34 haystack: &[u8],
35 needle: &[u8],
36 ) -> Option<usize> {
37 self.prefn.call(self.state, self.ninfo, haystack, needle)
38 }
39
40 /// Return true if and only if this prefilter should be used.
41 #[inline(always)]
should_call(&mut self) -> bool42 pub(crate) fn should_call(&mut self) -> bool {
43 self.state.is_effective()
44 }
45 }
46
47 /// A prefilter function.
48 ///
49 /// A prefilter function describes both forward and reverse searches.
50 /// (Although, we don't currently implement prefilters for reverse searching.)
51 /// In the case of a forward search, the position returned corresponds to
52 /// the starting offset of a match (confirmed or possible). Its minimum
53 /// value is `0`, and its maximum value is `haystack.len() - 1`. In the case
54 /// of a reverse search, the position returned corresponds to the position
55 /// immediately after a match (confirmed or possible). Its minimum value is `1`
56 /// and its maximum value is `haystack.len()`.
57 ///
58 /// In both cases, the position returned is the starting (or ending) point of a
59 /// _possible_ match. That is, returning a false positive is okay. A prefilter,
60 /// however, must never return any false negatives. That is, if a match exists
61 /// at a particular position `i`, then a prefilter _must_ return that position.
62 /// It cannot skip past it.
63 ///
64 /// # Safety
65 ///
66 /// A prefilter function is not safe to create, since not all prefilters are
67 /// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions
68 /// may only be called on x86_64 CPUs with the relevant AVX feature enabled.)
69 /// Thus, callers must ensure that when a prefilter function is created that it
70 /// is safe to call for the current environment.
71 #[derive(Clone, Copy)]
72 pub(crate) struct PrefilterFn(PrefilterFnTy);
73
74 /// The type of a prefilter function. All prefilters must satisfy this
75 /// signature.
76 ///
77 /// Using a function pointer like this does inhibit inlining, but it does
78 /// eliminate branching and the extra costs associated with copying a larger
79 /// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work
80 /// here, since we want to work in contexts that don't have dynamic memory
81 /// allocation. Moreover, in the default configuration of this crate on x86_64
82 /// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter,
83 /// which generally won't be inlineable into the surrounding code anyway.
84 /// (Unless AVX2 is enabled at compile time, but this is typically rare, since
85 /// it produces a non-portable binary.)
86 pub(crate) type PrefilterFnTy = unsafe fn(
87 prestate: &mut PrefilterState,
88 ninfo: &NeedleInfo,
89 haystack: &[u8],
90 needle: &[u8],
91 ) -> Option<usize>;
92
93 impl PrefilterFn {
94 /// Create a new prefilter function from the function pointer given.
95 ///
96 /// # Safety
97 ///
98 /// Callers must ensure that the given prefilter function is safe to call
99 /// for all inputs in the current environment. For example, if the given
100 /// prefilter function uses AVX instructions, then the caller must ensure
101 /// that the appropriate AVX CPU features are enabled.
new(prefn: PrefilterFnTy) -> PrefilterFn102 pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn {
103 PrefilterFn(prefn)
104 }
105
106 /// Call the underlying prefilter function with the given arguments.
call( self, prestate: &mut PrefilterState, ninfo: &NeedleInfo, haystack: &[u8], needle: &[u8], ) -> Option<usize>107 pub fn call(
108 self,
109 prestate: &mut PrefilterState,
110 ninfo: &NeedleInfo,
111 haystack: &[u8],
112 needle: &[u8],
113 ) -> Option<usize> {
114 // SAFETY: Callers have the burden of ensuring that a prefilter
115 // function is safe to call for all inputs in the current environment.
116 unsafe { (self.0)(prestate, ninfo, haystack, needle) }
117 }
118 }
119
120 impl core::fmt::Debug for PrefilterFn {
fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result121 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
122 "<prefilter-fn(...)>".fmt(f)
123 }
124 }
125
126 /// Prefilter controls whether heuristics are used to accelerate searching.
127 ///
128 /// A prefilter refers to the idea of detecting candidate matches very quickly,
129 /// and then confirming whether those candidates are full matches. This
130 /// idea can be quite effective since it's often the case that looking for
131 /// candidates can be a lot faster than running a complete substring search
132 /// over the entire input. Namely, looking for candidates can be done with
133 /// extremely fast vectorized code.
134 ///
135 /// The downside of a prefilter is that it assumes false positives (which are
136 /// candidates generated by a prefilter that aren't matches) are somewhat rare
137 /// relative to the frequency of full matches. That is, if a lot of false
138 /// positives are generated, then it's possible for search time to be worse
139 /// than if the prefilter wasn't enabled in the first place.
140 ///
141 /// Another downside of a prefilter is that it can result in highly variable
142 /// performance, where some cases are extraordinarily fast and others aren't.
143 /// Typically, variable performance isn't a problem, but it may be for your use
144 /// case.
145 ///
146 /// The use of prefilters in this implementation does use a heuristic to detect
147 /// when a prefilter might not be carrying its weight, and will dynamically
148 /// disable its use. Nevertheless, this configuration option gives callers
149 /// the ability to disable prefilters if you have knowledge that they won't be
150 /// useful.
151 #[derive(Clone, Copy, Debug)]
152 #[non_exhaustive]
153 pub enum Prefilter {
154 /// Never used a prefilter in substring search.
155 None,
156 /// Automatically detect whether a heuristic prefilter should be used. If
157 /// it is used, then heuristics will be used to dynamically disable the
158 /// prefilter if it is believed to not be carrying its weight.
159 Auto,
160 }
161
162 impl Default for Prefilter {
default() -> Prefilter163 fn default() -> Prefilter {
164 Prefilter::Auto
165 }
166 }
167
168 impl Prefilter {
is_none(&self) -> bool169 pub(crate) fn is_none(&self) -> bool {
170 match *self {
171 Prefilter::None => true,
172 _ => false,
173 }
174 }
175 }
176
177 /// PrefilterState tracks state associated with the effectiveness of a
178 /// prefilter. It is used to track how many bytes, on average, are skipped by
179 /// the prefilter. If this average dips below a certain threshold over time,
180 /// then the state renders the prefilter inert and stops using it.
181 ///
182 /// A prefilter state should be created for each search. (Where creating an
183 /// iterator is treated as a single search.) A prefilter state should only be
184 /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
185 /// `PrefilterState`.
186 #[derive(Clone, Debug)]
187 pub(crate) struct PrefilterState {
188 /// The number of skips that has been executed. This is always 1 greater
189 /// than the actual number of skips. The special sentinel value of 0
190 /// indicates that the prefilter is inert. This is useful to avoid
191 /// additional checks to determine whether the prefilter is still
192 /// "effective." Once a prefilter becomes inert, it should no longer be
193 /// used (according to our heuristics).
194 skips: u32,
195 /// The total number of bytes that have been skipped.
196 skipped: u32,
197 }
198
199 impl PrefilterState {
200 /// The minimum number of skip attempts to try before considering whether
201 /// a prefilter is effective or not.
202 const MIN_SKIPS: u32 = 50;
203
204 /// The minimum amount of bytes that skipping must average.
205 ///
206 /// This value was chosen based on varying it and checking
207 /// the microbenchmarks. In particular, this can impact the
208 /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
209 /// too low.
210 const MIN_SKIP_BYTES: u32 = 8;
211
212 /// Create a fresh prefilter state.
new() -> PrefilterState213 pub(crate) fn new() -> PrefilterState {
214 PrefilterState { skips: 1, skipped: 0 }
215 }
216
217 /// Create a fresh prefilter state that is always inert.
inert() -> PrefilterState218 pub(crate) fn inert() -> PrefilterState {
219 PrefilterState { skips: 0, skipped: 0 }
220 }
221
222 /// Update this state with the number of bytes skipped on the last
223 /// invocation of the prefilter.
224 #[inline]
update(&mut self, skipped: usize)225 pub(crate) fn update(&mut self, skipped: usize) {
226 self.skips = self.skips.saturating_add(1);
227 // We need to do this dance since it's technically possible for
228 // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
229 // size of a prefilter state.)
230 if skipped > core::u32::MAX as usize {
231 self.skipped = core::u32::MAX;
232 } else {
233 self.skipped = self.skipped.saturating_add(skipped as u32);
234 }
235 }
236
237 /// Return true if and only if this state indicates that a prefilter is
238 /// still effective.
239 #[inline]
is_effective(&mut self) -> bool240 pub(crate) fn is_effective(&mut self) -> bool {
241 if self.is_inert() {
242 return false;
243 }
244 if self.skips() < PrefilterState::MIN_SKIPS {
245 return true;
246 }
247 if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
248 return true;
249 }
250
251 // We're inert.
252 self.skips = 0;
253 false
254 }
255
256 #[inline]
is_inert(&self) -> bool257 fn is_inert(&self) -> bool {
258 self.skips == 0
259 }
260
261 #[inline]
skips(&self) -> u32262 fn skips(&self) -> u32 {
263 self.skips.saturating_sub(1)
264 }
265 }
266
267 /// Determine which prefilter function, if any, to use.
268 ///
269 /// This only applies to x86_64 when runtime SIMD detection is enabled (which
270 /// is the default). In general, we try to use an AVX prefilter, followed by
271 /// SSE and then followed by a generic one based on memchr.
272 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
273 #[inline(always)]
forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option<PrefilterFn>274 pub(crate) fn forward(
275 config: &Prefilter,
276 rare: &RareNeedleBytes,
277 needle: &[u8],
278 ) -> Option<PrefilterFn> {
279 if config.is_none() || needle.len() <= 1 {
280 return None;
281 }
282
283 #[cfg(feature = "std")]
284 {
285 if cfg!(memchr_runtime_avx) {
286 if is_x86_feature_detected!("avx2") {
287 // SAFETY: x86::avx::find only requires the avx2 feature,
288 // which we've just checked above.
289 return unsafe { Some(PrefilterFn::new(x86::avx::find)) };
290 }
291 }
292 }
293 if cfg!(memchr_runtime_sse2) {
294 // SAFETY: x86::sse::find only requires the sse2 feature, which is
295 // guaranteed to be available on x86_64.
296 return unsafe { Some(PrefilterFn::new(x86::sse::find)) };
297 }
298 // Check that our rarest byte has a reasonably low rank. The main issue
299 // here is that the fallback prefilter can perform pretty poorly if it's
300 // given common bytes. So we try to avoid the worst cases here.
301 let (rare1_rank, _) = rare.as_ranks(needle);
302 if rare1_rank <= MAX_FALLBACK_RANK {
303 // SAFETY: fallback::find is safe to call in all environments.
304 return unsafe { Some(PrefilterFn::new(fallback::find)) };
305 }
306 None
307 }
308
309 /// Determine which prefilter function, if any, to use.
310 ///
311 /// Since SIMD is currently only supported on x86_64, this will just select
312 /// the fallback prefilter if the rare bytes provided have a low enough rank.
313 #[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))]
314 #[inline(always)]
forward( config: &Prefilter, rare: &RareNeedleBytes, needle: &[u8], ) -> Option<PrefilterFn>315 pub(crate) fn forward(
316 config: &Prefilter,
317 rare: &RareNeedleBytes,
318 needle: &[u8],
319 ) -> Option<PrefilterFn> {
320 if config.is_none() || needle.len() <= 1 {
321 return None;
322 }
323 let (rare1_rank, _) = rare.as_ranks(needle);
324 if rare1_rank <= MAX_FALLBACK_RANK {
325 // SAFETY: fallback::find is safe to call in all environments.
326 return unsafe { Some(PrefilterFn::new(fallback::find)) };
327 }
328 None
329 }
330
331 /// Return the minimum length of the haystack in which a prefilter should be
332 /// used. If the haystack is below this length, then it's probably not worth
333 /// the overhead of running the prefilter.
334 ///
335 /// We used to look at the length of a haystack here. That is, if it was too
336 /// small, then don't bother with the prefilter. But two things changed:
337 /// the prefilter falls back to memchr for small haystacks, and, at the
338 /// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway.
339 ///
340 /// We keep it around for now in case we want to bring it back.
341 #[allow(dead_code)]
minimum_len(_haystack: &[u8], needle: &[u8]) -> usize342 pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize {
343 // If the haystack length isn't greater than needle.len() * FACTOR, then
344 // no prefilter will be used. The presumption here is that since there
345 // are so few bytes to check, it's not worth running the prefilter since
346 // there will need to be a validation step anyway. Thus, the prefilter is
347 // largely redundant work.
348 //
349 // Increase the factor noticeably hurts the
350 // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks.
351 const PREFILTER_LENGTH_FACTOR: usize = 2;
352 const VECTOR_MIN_LENGTH: usize = 16;
353 let min = core::cmp::max(
354 VECTOR_MIN_LENGTH,
355 PREFILTER_LENGTH_FACTOR * needle.len(),
356 );
357 // For haystacks with length==min, we still want to avoid the prefilter,
358 // so add 1.
359 min + 1
360 }
361
362 #[cfg(all(test, feature = "std", not(miri)))]
363 pub(crate) mod tests {
364 use std::convert::{TryFrom, TryInto};
365
366 use super::*;
367 use crate::memmem::{
368 prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes,
369 };
370
371 // Below is a small jig that generates prefilter tests. The main purpose
372 // of this jig is to generate tests of varying needle/haystack lengths
373 // in order to try and exercise all code paths in our prefilters. And in
374 // particular, this is especially important for vectorized prefilters where
375 // certain code paths might only be exercised at certain lengths.
376
377 /// A test that represents the input and expected output to a prefilter
378 /// function. The test should be able to run with any prefilter function
379 /// and get the expected output.
380 pub(crate) struct PrefilterTest {
381 // These fields represent the inputs and expected output of a forwards
382 // prefilter function.
383 pub(crate) ninfo: NeedleInfo,
384 pub(crate) haystack: Vec<u8>,
385 pub(crate) needle: Vec<u8>,
386 pub(crate) output: Option<usize>,
387 }
388
389 impl PrefilterTest {
390 /// Run all generated forward prefilter tests on the given prefn.
391 ///
392 /// # Safety
393 ///
394 /// Callers must ensure that the given prefilter function pointer is
395 /// safe to call for all inputs in the current environment.
run_all_tests(prefn: PrefilterFnTy)396 pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) {
397 PrefilterTest::run_all_tests_filter(prefn, |_| true)
398 }
399
400 /// Run all generated forward prefilter tests that pass the given
401 /// predicate on the given prefn.
402 ///
403 /// # Safety
404 ///
405 /// Callers must ensure that the given prefilter function pointer is
406 /// safe to call for all inputs in the current environment.
run_all_tests_filter( prefn: PrefilterFnTy, mut predicate: impl FnMut(&PrefilterTest) -> bool, )407 pub(crate) unsafe fn run_all_tests_filter(
408 prefn: PrefilterFnTy,
409 mut predicate: impl FnMut(&PrefilterTest) -> bool,
410 ) {
411 for seed in PREFILTER_TEST_SEEDS {
412 for test in seed.generate() {
413 if predicate(&test) {
414 test.run(prefn);
415 }
416 }
417 }
418 }
419
420 /// Create a new prefilter test from a seed and some chose offsets to
421 /// rare bytes in the seed's needle.
422 ///
423 /// If a valid test could not be constructed, then None is returned.
424 /// (Currently, we take the approach of massaging tests to be valid
425 /// instead of rejecting them outright.)
new( seed: &PrefilterTestSeed, rare1i: usize, rare2i: usize, haystack_len: usize, needle_len: usize, output: Option<usize>, ) -> Option<PrefilterTest>426 fn new(
427 seed: &PrefilterTestSeed,
428 rare1i: usize,
429 rare2i: usize,
430 haystack_len: usize,
431 needle_len: usize,
432 output: Option<usize>,
433 ) -> Option<PrefilterTest> {
434 let mut rare1i: u8 = rare1i.try_into().unwrap();
435 let mut rare2i: u8 = rare2i.try_into().unwrap();
436 // The '#' byte is never used in a haystack (unless we're expecting
437 // a match), while the '@' byte is never used in a needle.
438 let mut haystack = vec![b'@'; haystack_len];
439 let mut needle = vec![b'#'; needle_len];
440 needle[0] = seed.first;
441 needle[rare1i as usize] = seed.rare1;
442 needle[rare2i as usize] = seed.rare2;
443 // If we're expecting a match, then make sure the needle occurs
444 // in the haystack at the expected position.
445 if let Some(i) = output {
446 haystack[i..i + needle.len()].copy_from_slice(&needle);
447 }
448 // If the operations above lead to rare offsets pointing to the
449 // non-first occurrence of a byte, then adjust it. This might lead
450 // to redundant tests, but it's simpler than trying to change the
451 // generation process I think.
452 if let Some(i) = crate::memchr(seed.rare1, &needle) {
453 rare1i = u8::try_from(i).unwrap();
454 }
455 if let Some(i) = crate::memchr(seed.rare2, &needle) {
456 rare2i = u8::try_from(i).unwrap();
457 }
458 let ninfo = NeedleInfo {
459 rarebytes: RareNeedleBytes::new(rare1i, rare2i),
460 nhash: rabinkarp::NeedleHash::forward(&needle),
461 };
462 Some(PrefilterTest { ninfo, haystack, needle, output })
463 }
464
465 /// Run this specific test on the given prefilter function. If the
466 /// outputs do no match, then this routine panics with a failure
467 /// message.
468 ///
469 /// # Safety
470 ///
471 /// Callers must ensure that the given prefilter function pointer is
472 /// safe to call for all inputs in the current environment.
run(&self, prefn: PrefilterFnTy)473 unsafe fn run(&self, prefn: PrefilterFnTy) {
474 let mut prestate = PrefilterState::new();
475 assert_eq!(
476 self.output,
477 prefn(
478 &mut prestate,
479 &self.ninfo,
480 &self.haystack,
481 &self.needle
482 ),
483 "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}",
484 self.ninfo,
485 self.haystack.len(),
486 std::str::from_utf8(&self.haystack).unwrap(),
487 self.needle.len(),
488 std::str::from_utf8(&self.needle).unwrap(),
489 );
490 }
491 }
492
493 /// A set of prefilter test seeds. Each seed serves as the base for the
494 /// generation of many other tests. In essence, the seed captures the
495 /// "rare" and first bytes among our needle. The tests generated from each
496 /// seed essentially vary the length of the needle and haystack, while
497 /// using the rare/first byte configuration from the seed.
498 ///
499 /// The purpose of this is to test many different needle/haystack lengths.
500 /// In particular, some of the vector optimizations might only have bugs
501 /// in haystacks of a certain size.
502 const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[
503 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' },
504 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' },
505 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' },
506 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' },
507 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' },
508 ];
509
510 /// Data that describes a single prefilter test seed.
511 struct PrefilterTestSeed {
512 first: u8,
513 rare1: u8,
514 rare2: u8,
515 }
516
517 impl PrefilterTestSeed {
518 /// Generate a series of prefilter tests from this seed.
generate(&self) -> Vec<PrefilterTest>519 fn generate(&self) -> Vec<PrefilterTest> {
520 let mut tests = vec![];
521 let mut push = |test: Option<PrefilterTest>| {
522 if let Some(test) = test {
523 tests.push(test);
524 }
525 };
526 let len_start = 2;
527 // The loop below generates *a lot* of tests. The number of tests
528 // was chosen somewhat empirically to be "bearable" when running
529 // the test suite.
530 for needle_len in len_start..=40 {
531 let rare_start = len_start - 1;
532 for rare1i in rare_start..needle_len {
533 for rare2i in rare1i..needle_len {
534 for haystack_len in needle_len..=66 {
535 push(PrefilterTest::new(
536 self,
537 rare1i,
538 rare2i,
539 haystack_len,
540 needle_len,
541 None,
542 ));
543 // Test all possible match scenarios for this
544 // needle and haystack.
545 for output in 0..=(haystack_len - needle_len) {
546 push(PrefilterTest::new(
547 self,
548 rare1i,
549 rare2i,
550 haystack_len,
551 needle_len,
552 Some(output),
553 ));
554 }
555 }
556 }
557 }
558 }
559 tests
560 }
561 }
562 }
563