1 use super::fallback;
2 
3 // We only use AVX when we can detect at runtime whether it's available, which
4 // requires std.
5 #[cfg(feature = "std")]
6 mod avx;
7 mod sse2;
8 
9 /// This macro employs a gcc-like "ifunc" trick where by upon first calling
10 /// `memchr` (for example), CPU feature detection will be performed at runtime
11 /// to determine the best implementation to use. After CPU feature detection
12 /// is done, we replace `memchr`'s function pointer with the selection. Upon
13 /// subsequent invocations, the CPU-specific routine is invoked directly, which
14 /// skips the CPU feature detection and subsequent branch that's required.
15 ///
16 /// While this typically doesn't matter for rare occurrences or when used on
17 /// larger haystacks, `memchr` can be called in tight loops where the overhead
18 /// of this branch can actually add up *and is measurable*. This trick was
19 /// necessary to bring this implementation up to glibc's speeds for the 'tiny'
20 /// benchmarks, for example.
21 ///
22 /// At some point, I expect the Rust ecosystem will get a nice macro for doing
23 /// exactly this, at which point, we can replace our hand-jammed version of it.
24 ///
25 /// N.B. The ifunc strategy does prevent function inlining of course, but
26 /// on modern CPUs, you'll probably end up with the AVX2 implementation,
27 /// which probably can't be inlined anyway---unless you've compiled your
28 /// entire program with AVX2 enabled. However, even then, the various memchr
29 /// implementations aren't exactly small, so inlining might not help anyway!
30 ///
31 /// # Safety
32 ///
33 /// Callers must ensure that fnty is function pointer type.
34 #[cfg(feature = "std")]
35 macro_rules! unsafe_ifunc {
36     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
37         use std::{mem, sync::atomic::{AtomicPtr, Ordering}};
38 
39         type FnRaw = *mut ();
40 
41         static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
42 
43         fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
44             let fun =
45                 if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
46                     avx::$name as FnRaw
47                 } else if cfg!(memchr_runtime_sse2) {
48                     sse2::$name as FnRaw
49                 } else {
50                     fallback::$name as FnRaw
51                 };
52             FN.store(fun as FnRaw, Ordering::Relaxed);
53             // SAFETY: By virtue of the caller contract, $fnty is a function
54             // pointer, which is always safe to transmute with a *mut ().
55             // Also, if 'fun is the AVX routine, then it is guaranteed to be
56             // supported since we checked the avx2 feature.
57             unsafe {
58                 mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
59             }
60         }
61 
62         // SAFETY: By virtue of the caller contract, $fnty is a function
63         // pointer, which is always safe to transmute with a *mut (). Also, if
64         // 'fun is the AVX routine, then it is guaranteed to be supported since
65         // we checked the avx2 feature.
66         unsafe {
67             let fun = FN.load(Ordering::Relaxed);
68             mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
69         }
70     }}
71 }
72 
73 /// When std isn't available to provide runtime CPU feature detection, or if
74 /// runtime CPU feature detection has been explicitly disabled, then just
75 /// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
76 /// targets, so no CPU feature detection is necessary.
77 ///
78 /// # Safety
79 ///
80 /// There are no safety requirements for this definition of the macro. It is
81 /// safe for all inputs since it is restricted to either the fallback routine
82 /// or the SSE routine, which is always safe to call on x86_64.
83 #[cfg(not(feature = "std"))]
84 macro_rules! unsafe_ifunc {
85     ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
86         if cfg!(memchr_runtime_sse2) {
87             unsafe { sse2::$name($($needle),+, $haystack) }
88         } else {
89             fallback::$name($($needle),+, $haystack)
90         }
91     }}
92 }
93 
94 #[inline(always)]
memchr(n1: u8, haystack: &[u8]) -> Option<usize>95 pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
96     unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
97 }
98 
99 #[inline(always)]
memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>100 pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
101     unsafe_ifunc!(
102         fn(u8, u8, &[u8]) -> Option<usize>,
103         memchr2,
104         haystack,
105         n1,
106         n2
107     )
108 }
109 
110 #[inline(always)]
memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>111 pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
112     unsafe_ifunc!(
113         fn(u8, u8, u8, &[u8]) -> Option<usize>,
114         memchr3,
115         haystack,
116         n1,
117         n2,
118         n3
119     )
120 }
121 
122 #[inline(always)]
memrchr(n1: u8, haystack: &[u8]) -> Option<usize>123 pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
124     unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
125 }
126 
127 #[inline(always)]
memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>128 pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
129     unsafe_ifunc!(
130         fn(u8, u8, &[u8]) -> Option<usize>,
131         memrchr2,
132         haystack,
133         n1,
134         n2
135     )
136 }
137 
138 #[inline(always)]
memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>139 pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
140     unsafe_ifunc!(
141         fn(u8, u8, u8, &[u8]) -> Option<usize>,
142         memrchr3,
143         haystack,
144         n1,
145         n2,
146         n3
147     )
148 }
149