1 use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo}; 2 3 mod fallback; 4 #[cfg(all(target_arch = "x86_64", memchr_runtime_simd))] 5 mod genericsimd; 6 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] 7 mod x86; 8 9 /// The maximum frequency rank permitted for the fallback prefilter. If the 10 /// rarest byte in the needle has a frequency rank above this value, then no 11 /// prefilter is used if the fallback prefilter would otherwise be selected. 12 const MAX_FALLBACK_RANK: usize = 250; 13 14 /// A combination of prefilter effectiveness state, the prefilter function and 15 /// the needle info required to run a prefilter. 16 /// 17 /// For the most part, these are grouped into a single type for convenience, 18 /// instead of needing to pass around all three as distinct function 19 /// parameters. 20 pub(crate) struct Pre<'a> { 21 /// State that tracks the effectiveness of a prefilter. 22 pub(crate) state: &'a mut PrefilterState, 23 /// The actual prefilter function. 24 pub(crate) prefn: PrefilterFn, 25 /// Information about a needle, such as its RK hash and rare byte offsets. 26 pub(crate) ninfo: &'a NeedleInfo, 27 } 28 29 impl<'a> Pre<'a> { 30 /// Call this prefilter on the given haystack with the given needle. 31 #[inline(always)] 32 pub(crate) fn call( 33 &mut self, 34 haystack: &[u8], 35 needle: &[u8], 36 ) -> Option<usize> { 37 self.prefn.call(self.state, self.ninfo, haystack, needle) 38 } 39 40 /// Return true if and only if this prefilter should be used. 41 #[inline(always)] 42 pub(crate) fn should_call(&mut self) -> bool { 43 self.state.is_effective() 44 } 45 } 46 47 /// A prefilter function. 48 /// 49 /// A prefilter function describes both forward and reverse searches. 50 /// (Although, we don't currently implement prefilters for reverse searching.) 51 /// In the case of a forward search, the position returned corresponds to 52 /// the starting offset of a match (confirmed or possible). Its minimum 53 /// value is `0`, and its maximum value is `haystack.len() - 1`. In the case 54 /// of a reverse search, the position returned corresponds to the position 55 /// immediately after a match (confirmed or possible). Its minimum value is `1` 56 /// and its maximum value is `haystack.len()`. 57 /// 58 /// In both cases, the position returned is the starting (or ending) point of a 59 /// _possible_ match. That is, returning a false positive is okay. A prefilter, 60 /// however, must never return any false negatives. That is, if a match exists 61 /// at a particular position `i`, then a prefilter _must_ return that position. 62 /// It cannot skip past it. 63 /// 64 /// # Safety 65 /// 66 /// A prefilter function is not safe to create, since not all prefilters are 67 /// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions 68 /// may only be called on x86_64 CPUs with the relevant AVX feature enabled.) 69 /// Thus, callers must ensure that when a prefilter function is created that it 70 /// is safe to call for the current environment. 71 #[derive(Clone, Copy)] 72 pub(crate) struct PrefilterFn(PrefilterFnTy); 73 74 /// The type of a prefilter function. All prefilters must satisfy this 75 /// signature. 76 /// 77 /// Using a function pointer like this does inhibit inlining, but it does 78 /// eliminate branching and the extra costs associated with copying a larger 79 /// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work 80 /// here, since we want to work in contexts that don't have dynamic memory 81 /// allocation. Moreover, in the default configuration of this crate on x86_64 82 /// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter, 83 /// which generally won't be inlineable into the surrounding code anyway. 84 /// (Unless AVX2 is enabled at compile time, but this is typically rare, since 85 /// it produces a non-portable binary.) 86 pub(crate) type PrefilterFnTy = unsafe fn( 87 prestate: &mut PrefilterState, 88 ninfo: &NeedleInfo, 89 haystack: &[u8], 90 needle: &[u8], 91 ) -> Option<usize>; 92 93 impl PrefilterFn { 94 /// Create a new prefilter function from the function pointer given. 95 /// 96 /// # Safety 97 /// 98 /// Callers must ensure that the given prefilter function is safe to call 99 /// for all inputs in the current environment. For example, if the given 100 /// prefilter function uses AVX instructions, then the caller must ensure 101 /// that the appropriate AVX CPU features are enabled. 102 pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn { 103 PrefilterFn(prefn) 104 } 105 106 /// Call the underlying prefilter function with the given arguments. 107 pub fn call( 108 self, 109 prestate: &mut PrefilterState, 110 ninfo: &NeedleInfo, 111 haystack: &[u8], 112 needle: &[u8], 113 ) -> Option<usize> { 114 // SAFETY: Callers have the burden of ensuring that a prefilter 115 // function is safe to call for all inputs in the current environment. 116 unsafe { (self.0)(prestate, ninfo, haystack, needle) } 117 } 118 } 119 120 impl core::fmt::Debug for PrefilterFn { 121 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 122 "<prefilter-fn(...)>".fmt(f) 123 } 124 } 125 126 /// Prefilter controls whether heuristics are used to accelerate searching. 127 /// 128 /// A prefilter refers to the idea of detecting candidate matches very quickly, 129 /// and then confirming whether those candidates are full matches. This 130 /// idea can be quite effective since it's often the case that looking for 131 /// candidates can be a lot faster than running a complete substring search 132 /// over the entire input. Namely, looking for candidates can be done with 133 /// extremely fast vectorized code. 134 /// 135 /// The downside of a prefilter is that it assumes false positives (which are 136 /// candidates generated by a prefilter that aren't matches) are somewhat rare 137 /// relative to the frequency of full matches. That is, if a lot of false 138 /// positives are generated, then it's possible for search time to be worse 139 /// than if the prefilter wasn't enabled in the first place. 140 /// 141 /// Another downside of a prefilter is that it can result in highly variable 142 /// performance, where some cases are extraordinarily fast and others aren't. 143 /// Typically, variable performance isn't a problem, but it may be for your use 144 /// case. 145 /// 146 /// The use of prefilters in this implementation does use a heuristic to detect 147 /// when a prefilter might not be carrying its weight, and will dynamically 148 /// disable its use. Nevertheless, this configuration option gives callers 149 /// the ability to disable prefilters if you have knowledge that they won't be 150 /// useful. 151 #[derive(Clone, Copy, Debug)] 152 #[non_exhaustive] 153 pub enum Prefilter { 154 /// Never used a prefilter in substring search. 155 None, 156 /// Automatically detect whether a heuristic prefilter should be used. If 157 /// it is used, then heuristics will be used to dynamically disable the 158 /// prefilter if it is believed to not be carrying its weight. 159 Auto, 160 } 161 162 impl Default for Prefilter { 163 fn default() -> Prefilter { 164 Prefilter::Auto 165 } 166 } 167 168 impl Prefilter { 169 pub(crate) fn is_none(&self) -> bool { 170 match *self { 171 Prefilter::None => true, 172 _ => false, 173 } 174 } 175 } 176 177 /// PrefilterState tracks state associated with the effectiveness of a 178 /// prefilter. It is used to track how many bytes, on average, are skipped by 179 /// the prefilter. If this average dips below a certain threshold over time, 180 /// then the state renders the prefilter inert and stops using it. 181 /// 182 /// A prefilter state should be created for each search. (Where creating an 183 /// iterator is treated as a single search.) A prefilter state should only be 184 /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert 185 /// `PrefilterState`. 186 #[derive(Clone, Debug)] 187 pub(crate) struct PrefilterState { 188 /// The number of skips that has been executed. This is always 1 greater 189 /// than the actual number of skips. The special sentinel value of 0 190 /// indicates that the prefilter is inert. This is useful to avoid 191 /// additional checks to determine whether the prefilter is still 192 /// "effective." Once a prefilter becomes inert, it should no longer be 193 /// used (according to our heuristics). 194 skips: u32, 195 /// The total number of bytes that have been skipped. 196 skipped: u32, 197 } 198 199 impl PrefilterState { 200 /// The minimum number of skip attempts to try before considering whether 201 /// a prefilter is effective or not. 202 const MIN_SKIPS: u32 = 50; 203 204 /// The minimum amount of bytes that skipping must average. 205 /// 206 /// This value was chosen based on varying it and checking 207 /// the microbenchmarks. In particular, this can impact the 208 /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set 209 /// too low. 210 const MIN_SKIP_BYTES: u32 = 8; 211 212 /// Create a fresh prefilter state. 213 pub(crate) fn new() -> PrefilterState { 214 PrefilterState { skips: 1, skipped: 0 } 215 } 216 217 /// Create a fresh prefilter state that is always inert. 218 pub(crate) fn inert() -> PrefilterState { 219 PrefilterState { skips: 0, skipped: 0 } 220 } 221 222 /// Update this state with the number of bytes skipped on the last 223 /// invocation of the prefilter. 224 #[inline] 225 pub(crate) fn update(&mut self, skipped: usize) { 226 self.skips = self.skips.saturating_add(1); 227 // We need to do this dance since it's technically possible for 228 // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the 229 // size of a prefilter state.) 230 if skipped > core::u32::MAX as usize { 231 self.skipped = core::u32::MAX; 232 } else { 233 self.skipped = self.skipped.saturating_add(skipped as u32); 234 } 235 } 236 237 /// Return true if and only if this state indicates that a prefilter is 238 /// still effective. 239 #[inline] 240 pub(crate) fn is_effective(&mut self) -> bool { 241 if self.is_inert() { 242 return false; 243 } 244 if self.skips() < PrefilterState::MIN_SKIPS { 245 return true; 246 } 247 if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() { 248 return true; 249 } 250 251 // We're inert. 252 self.skips = 0; 253 false 254 } 255 256 #[inline] 257 fn is_inert(&self) -> bool { 258 self.skips == 0 259 } 260 261 #[inline] 262 fn skips(&self) -> u32 { 263 self.skips.saturating_sub(1) 264 } 265 } 266 267 /// Determine which prefilter function, if any, to use. 268 /// 269 /// This only applies to x86_64 when runtime SIMD detection is enabled (which 270 /// is the default). In general, we try to use an AVX prefilter, followed by 271 /// SSE and then followed by a generic one based on memchr. 272 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))] 273 #[inline(always)] 274 pub(crate) fn forward( 275 config: &Prefilter, 276 rare: &RareNeedleBytes, 277 needle: &[u8], 278 ) -> Option<PrefilterFn> { 279 if config.is_none() || needle.len() <= 1 { 280 return None; 281 } 282 283 #[cfg(feature = "std")] 284 { 285 if cfg!(memchr_runtime_avx) { 286 if is_x86_feature_detected!("avx2") { 287 // SAFETY: x86::avx::find only requires the avx2 feature, 288 // which we've just checked above. 289 return unsafe { Some(PrefilterFn::new(x86::avx::find)) }; 290 } 291 } 292 } 293 if cfg!(memchr_runtime_sse2) { 294 // SAFETY: x86::sse::find only requires the sse2 feature, which is 295 // guaranteed to be available on x86_64. 296 return unsafe { Some(PrefilterFn::new(x86::sse::find)) }; 297 } 298 // Check that our rarest byte has a reasonably low rank. The main issue 299 // here is that the fallback prefilter can perform pretty poorly if it's 300 // given common bytes. So we try to avoid the worst cases here. 301 let (rare1_rank, _) = rare.as_ranks(needle); 302 if rare1_rank <= MAX_FALLBACK_RANK { 303 // SAFETY: fallback::find is safe to call in all environments. 304 return unsafe { Some(PrefilterFn::new(fallback::find)) }; 305 } 306 None 307 } 308 309 /// Determine which prefilter function, if any, to use. 310 /// 311 /// Since SIMD is currently only supported on x86_64, this will just select 312 /// the fallback prefilter if the rare bytes provided have a low enough rank. 313 #[cfg(not(all(not(miri), target_arch = "x86_64", memchr_runtime_simd)))] 314 #[inline(always)] 315 pub(crate) fn forward( 316 config: &Prefilter, 317 rare: &RareNeedleBytes, 318 needle: &[u8], 319 ) -> Option<PrefilterFn> { 320 if config.is_none() || needle.len() <= 1 { 321 return None; 322 } 323 let (rare1_rank, _) = rare.as_ranks(needle); 324 if rare1_rank <= MAX_FALLBACK_RANK { 325 // SAFETY: fallback::find is safe to call in all environments. 326 return unsafe { Some(PrefilterFn::new(fallback::find)) }; 327 } 328 None 329 } 330 331 /// Return the minimum length of the haystack in which a prefilter should be 332 /// used. If the haystack is below this length, then it's probably not worth 333 /// the overhead of running the prefilter. 334 /// 335 /// We used to look at the length of a haystack here. That is, if it was too 336 /// small, then don't bother with the prefilter. But two things changed: 337 /// the prefilter falls back to memchr for small haystacks, and, at the 338 /// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway. 339 /// 340 /// We keep it around for now in case we want to bring it back. 341 #[allow(dead_code)] 342 pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize { 343 // If the haystack length isn't greater than needle.len() * FACTOR, then 344 // no prefilter will be used. The presumption here is that since there 345 // are so few bytes to check, it's not worth running the prefilter since 346 // there will need to be a validation step anyway. Thus, the prefilter is 347 // largely redundant work. 348 // 349 // Increase the factor noticeably hurts the 350 // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks. 351 const PREFILTER_LENGTH_FACTOR: usize = 2; 352 const VECTOR_MIN_LENGTH: usize = 16; 353 let min = core::cmp::max( 354 VECTOR_MIN_LENGTH, 355 PREFILTER_LENGTH_FACTOR * needle.len(), 356 ); 357 // For haystacks with length==min, we still want to avoid the prefilter, 358 // so add 1. 359 min + 1 360 } 361 362 #[cfg(all(test, feature = "std", not(miri)))] 363 pub(crate) mod tests { 364 use std::convert::{TryFrom, TryInto}; 365 366 use super::*; 367 use crate::memmem::{ 368 prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes, 369 }; 370 371 // Below is a small jig that generates prefilter tests. The main purpose 372 // of this jig is to generate tests of varying needle/haystack lengths 373 // in order to try and exercise all code paths in our prefilters. And in 374 // particular, this is especially important for vectorized prefilters where 375 // certain code paths might only be exercised at certain lengths. 376 377 /// A test that represents the input and expected output to a prefilter 378 /// function. The test should be able to run with any prefilter function 379 /// and get the expected output. 380 pub(crate) struct PrefilterTest { 381 // These fields represent the inputs and expected output of a forwards 382 // prefilter function. 383 pub(crate) ninfo: NeedleInfo, 384 pub(crate) haystack: Vec<u8>, 385 pub(crate) needle: Vec<u8>, 386 pub(crate) output: Option<usize>, 387 } 388 389 impl PrefilterTest { 390 /// Run all generated forward prefilter tests on the given prefn. 391 /// 392 /// # Safety 393 /// 394 /// Callers must ensure that the given prefilter function pointer is 395 /// safe to call for all inputs in the current environment. 396 pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) { 397 PrefilterTest::run_all_tests_filter(prefn, |_| true) 398 } 399 400 /// Run all generated forward prefilter tests that pass the given 401 /// predicate on the given prefn. 402 /// 403 /// # Safety 404 /// 405 /// Callers must ensure that the given prefilter function pointer is 406 /// safe to call for all inputs in the current environment. 407 pub(crate) unsafe fn run_all_tests_filter( 408 prefn: PrefilterFnTy, 409 mut predicate: impl FnMut(&PrefilterTest) -> bool, 410 ) { 411 for seed in PREFILTER_TEST_SEEDS { 412 for test in seed.generate() { 413 if predicate(&test) { 414 test.run(prefn); 415 } 416 } 417 } 418 } 419 420 /// Create a new prefilter test from a seed and some chose offsets to 421 /// rare bytes in the seed's needle. 422 /// 423 /// If a valid test could not be constructed, then None is returned. 424 /// (Currently, we take the approach of massaging tests to be valid 425 /// instead of rejecting them outright.) 426 fn new( 427 seed: &PrefilterTestSeed, 428 rare1i: usize, 429 rare2i: usize, 430 haystack_len: usize, 431 needle_len: usize, 432 output: Option<usize>, 433 ) -> Option<PrefilterTest> { 434 let mut rare1i: u8 = rare1i.try_into().unwrap(); 435 let mut rare2i: u8 = rare2i.try_into().unwrap(); 436 // The '#' byte is never used in a haystack (unless we're expecting 437 // a match), while the '@' byte is never used in a needle. 438 let mut haystack = vec![b'@'; haystack_len]; 439 let mut needle = vec![b'#'; needle_len]; 440 needle[0] = seed.first; 441 needle[rare1i as usize] = seed.rare1; 442 needle[rare2i as usize] = seed.rare2; 443 // If we're expecting a match, then make sure the needle occurs 444 // in the haystack at the expected position. 445 if let Some(i) = output { 446 haystack[i..i + needle.len()].copy_from_slice(&needle); 447 } 448 // If the operations above lead to rare offsets pointing to the 449 // non-first occurrence of a byte, then adjust it. This might lead 450 // to redundant tests, but it's simpler than trying to change the 451 // generation process I think. 452 if let Some(i) = crate::memchr(seed.rare1, &needle) { 453 rare1i = u8::try_from(i).unwrap(); 454 } 455 if let Some(i) = crate::memchr(seed.rare2, &needle) { 456 rare2i = u8::try_from(i).unwrap(); 457 } 458 let ninfo = NeedleInfo { 459 rarebytes: RareNeedleBytes::new(rare1i, rare2i), 460 nhash: rabinkarp::NeedleHash::forward(&needle), 461 }; 462 Some(PrefilterTest { ninfo, haystack, needle, output }) 463 } 464 465 /// Run this specific test on the given prefilter function. If the 466 /// outputs do no match, then this routine panics with a failure 467 /// message. 468 /// 469 /// # Safety 470 /// 471 /// Callers must ensure that the given prefilter function pointer is 472 /// safe to call for all inputs in the current environment. 473 unsafe fn run(&self, prefn: PrefilterFnTy) { 474 let mut prestate = PrefilterState::new(); 475 assert_eq!( 476 self.output, 477 prefn( 478 &mut prestate, 479 &self.ninfo, 480 &self.haystack, 481 &self.needle 482 ), 483 "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}", 484 self.ninfo, 485 self.haystack.len(), 486 std::str::from_utf8(&self.haystack).unwrap(), 487 self.needle.len(), 488 std::str::from_utf8(&self.needle).unwrap(), 489 ); 490 } 491 } 492 493 /// A set of prefilter test seeds. Each seed serves as the base for the 494 /// generation of many other tests. In essence, the seed captures the 495 /// "rare" and first bytes among our needle. The tests generated from each 496 /// seed essentially vary the length of the needle and haystack, while 497 /// using the rare/first byte configuration from the seed. 498 /// 499 /// The purpose of this is to test many different needle/haystack lengths. 500 /// In particular, some of the vector optimizations might only have bugs 501 /// in haystacks of a certain size. 502 const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[ 503 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }, 504 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }, 505 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }, 506 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }, 507 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }, 508 ]; 509 510 /// Data that describes a single prefilter test seed. 511 struct PrefilterTestSeed { 512 first: u8, 513 rare1: u8, 514 rare2: u8, 515 } 516 517 impl PrefilterTestSeed { 518 /// Generate a series of prefilter tests from this seed. 519 fn generate(&self) -> Vec<PrefilterTest> { 520 let mut tests = vec![]; 521 let mut push = |test: Option<PrefilterTest>| { 522 if let Some(test) = test { 523 tests.push(test); 524 } 525 }; 526 let len_start = 2; 527 // The loop below generates *a lot* of tests. The number of tests 528 // was chosen somewhat empirically to be "bearable" when running 529 // the test suite. 530 for needle_len in len_start..=40 { 531 let rare_start = len_start - 1; 532 for rare1i in rare_start..needle_len { 533 for rare2i in rare1i..needle_len { 534 for haystack_len in needle_len..=66 { 535 push(PrefilterTest::new( 536 self, 537 rare1i, 538 rare2i, 539 haystack_len, 540 needle_len, 541 None, 542 )); 543 // Test all possible match scenarios for this 544 // needle and haystack. 545 for output in 0..=(haystack_len - needle_len) { 546 push(PrefilterTest::new( 547 self, 548 rare1i, 549 rare2i, 550 haystack_len, 551 needle_len, 552 Some(output), 553 )); 554 } 555 } 556 } 557 } 558 } 559 tests 560 } 561 } 562 } 563