1 use fallback;
2 
3 // We only use AVX when we can detect at runtime whether it's available, which
4 // requires std.
5 #[cfg(feature = "std")]
6 mod avx;
7 mod sse2;
8 
9 // This macro employs a gcc-like "ifunc" trick where by upon first calling
10 // `memchr` (for example), CPU feature detection will be performed at runtime
11 // to determine the best implementation to use. After CPU feature detection
12 // is done, we replace `memchr`'s function pointer with the selection. Upon
13 // subsequent invocations, the CPU-specific routine is invoked directly, which
14 // skips the CPU feature detection and subsequent branch that's required.
15 //
16 // While this typically doesn't matter for rare occurrences or when used on
17 // larger haystacks, `memchr` can be called in tight loops where the overhead
18 // of this branch can actually add up *and is measurable*. This trick was
19 // necessary to bring this implementation up to glibc's speeds for the 'tiny'
20 // benchmarks, for example.
21 //
22 // At some point, I expect the Rust ecosystem will get a nice macro for doing
23 // exactly this, at which point, we can replace our hand-jammed version of it.
24 //
25 // N.B. The ifunc strategy does prevent function inlining of course, but on
26 // modern CPUs, you'll probably end up with the AVX2 implementation, which
27 // probably can't be inlined anyway---unless you've compiled your entire
28 // program with AVX2 enabled. However, even then, the various memchr
29 // implementations aren't exactly small, so inlining might not help anyway!
30 #[cfg(feature = "std")]
31 macro_rules! ifunc {
32     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
33         use std::mem;
34         use std::sync::atomic::{AtomicPtr, Ordering};
35 
36         type FnRaw = *mut ();
37 
38         static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
39 
40         fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
41             let fun =
42                 if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
43                     avx::$name as FnRaw
44                 } else if cfg!(memchr_runtime_sse2) {
45                     sse2::$name as FnRaw
46                 } else {
47                     fallback::$name as FnRaw
48                 };
49             FN.store(fun as FnRaw, Ordering::Relaxed);
50             unsafe {
51                 mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
52             }
53         }
54 
55         unsafe {
56             let fun = FN.load(Ordering::Relaxed);
57             mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
58         }
59     }}
60 }
61 
62 // When std isn't available to provide runtime CPU feature detection, or if
63 // runtime CPU feature detection has been explicitly disabled, then just call
64 // our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
65 // so no CPU feature detection is necessary.
66 #[cfg(not(feature = "std"))]
67 macro_rules! ifunc {
68     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
69         if cfg!(memchr_runtime_sse2) {
70             unsafe { sse2::$name($($needle),+, $haystack) }
71         } else {
72             fallback::$name($($needle),+, $haystack)
73         }
74     }}
75 }
76 
77 #[inline(always)]
memchr(n1: u8, haystack: &[u8]) -> Option<usize>78 pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
79     ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
80 }
81 
82 #[inline(always)]
memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>83 pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
84     ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memchr2, haystack, n1, n2)
85 }
86 
87 #[inline(always)]
memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>88 pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
89     ifunc!(
90         fn(u8, u8, u8, &[u8]) -> Option<usize>,
91         memchr3,
92         haystack,
93         n1,
94         n2,
95         n3
96     )
97 }
98 
99 #[inline(always)]
memrchr(n1: u8, haystack: &[u8]) -> Option<usize>100 pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
101     ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
102 }
103 
104 #[inline(always)]
memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>105 pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
106     ifunc!(fn(u8, u8, &[u8]) -> Option<usize>, memrchr2, haystack, n1, n2)
107 }
108 
109 #[inline(always)]
memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>110 pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
111     ifunc!(
112         fn(u8, u8, u8, &[u8]) -> Option<usize>,
113         memrchr3,
114         haystack,
115         n1,
116         n2,
117         n3
118     )
119 }
120