1 use super::fallback;
2
3 // We only use AVX when we can detect at runtime whether it's available, which
4 // requires std.
5 #[cfg(feature = "std")]
6 mod avx;
7 mod sse2;
8
9 /// This macro employs a gcc-like "ifunc" trick where by upon first calling
10 /// `memchr` (for example), CPU feature detection will be performed at runtime
11 /// to determine the best implementation to use. After CPU feature detection
12 /// is done, we replace `memchr`'s function pointer with the selection. Upon
13 /// subsequent invocations, the CPU-specific routine is invoked directly, which
14 /// skips the CPU feature detection and subsequent branch that's required.
15 ///
16 /// While this typically doesn't matter for rare occurrences or when used on
17 /// larger haystacks, `memchr` can be called in tight loops where the overhead
18 /// of this branch can actually add up *and is measurable*. This trick was
19 /// necessary to bring this implementation up to glibc's speeds for the 'tiny'
20 /// benchmarks, for example.
21 ///
22 /// At some point, I expect the Rust ecosystem will get a nice macro for doing
23 /// exactly this, at which point, we can replace our hand-jammed version of it.
24 ///
25 /// N.B. The ifunc strategy does prevent function inlining of course, but
26 /// on modern CPUs, you'll probably end up with the AVX2 implementation,
27 /// which probably can't be inlined anyway---unless you've compiled your
28 /// entire program with AVX2 enabled. However, even then, the various memchr
29 /// implementations aren't exactly small, so inlining might not help anyway!
30 ///
31 /// # Safety
32 ///
33 /// Callers must ensure that fnty is function pointer type.
34 #[cfg(feature = "std")]
35 macro_rules! unsafe_ifunc {
36 ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
37 use std::{mem, sync::atomic::{AtomicPtr, Ordering}};
38
39 type FnRaw = *mut ();
40
41 static FN: AtomicPtr<()> = AtomicPtr::new(detect as FnRaw);
42
43 fn detect($($needle: u8),+, haystack: &[u8]) -> Option<usize> {
44 let fun =
45 if cfg!(memchr_runtime_avx) && is_x86_feature_detected!("avx2") {
46 avx::$name as FnRaw
47 } else if cfg!(memchr_runtime_sse2) {
48 sse2::$name as FnRaw
49 } else {
50 fallback::$name as FnRaw
51 };
52 FN.store(fun as FnRaw, Ordering::Relaxed);
53 // SAFETY: By virtue of the caller contract, $fnty is a function
54 // pointer, which is always safe to transmute with a *mut ().
55 // Also, if 'fun is the AVX routine, then it is guaranteed to be
56 // supported since we checked the avx2 feature.
57 unsafe {
58 mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, haystack)
59 }
60 }
61
62 // SAFETY: By virtue of the caller contract, $fnty is a function
63 // pointer, which is always safe to transmute with a *mut (). Also, if
64 // 'fun is the AVX routine, then it is guaranteed to be supported since
65 // we checked the avx2 feature.
66 unsafe {
67 let fun = FN.load(Ordering::Relaxed);
68 mem::transmute::<FnRaw, $fnty>(fun)($($needle),+, $haystack)
69 }
70 }}
71 }
72
73 /// When std isn't available to provide runtime CPU feature detection, or if
74 /// runtime CPU feature detection has been explicitly disabled, then just
75 /// call our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64
76 /// targets, so no CPU feature detection is necessary.
77 ///
78 /// # Safety
79 ///
80 /// There are no safety requirements for this definition of the macro. It is
81 /// safe for all inputs since it is restricted to either the fallback routine
82 /// or the SSE routine, which is always safe to call on x86_64.
83 #[cfg(not(feature = "std"))]
84 macro_rules! unsafe_ifunc {
85 ($fnty:ty, $name:ident, $haystack:ident, $($needle:ident),+) => {{
86 if cfg!(memchr_runtime_sse2) {
87 unsafe { sse2::$name($($needle),+, $haystack) }
88 } else {
89 fallback::$name($($needle),+, $haystack)
90 }
91 }}
92 }
93
94 #[inline(always)]
memchr(n1: u8, haystack: &[u8]) -> Option<usize>95 pub fn memchr(n1: u8, haystack: &[u8]) -> Option<usize> {
96 unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memchr, haystack, n1)
97 }
98
99 #[inline(always)]
memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>100 pub fn memchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
101 unsafe_ifunc!(
102 fn(u8, u8, &[u8]) -> Option<usize>,
103 memchr2,
104 haystack,
105 n1,
106 n2
107 )
108 }
109
110 #[inline(always)]
memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>111 pub fn memchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
112 unsafe_ifunc!(
113 fn(u8, u8, u8, &[u8]) -> Option<usize>,
114 memchr3,
115 haystack,
116 n1,
117 n2,
118 n3
119 )
120 }
121
122 #[inline(always)]
memrchr(n1: u8, haystack: &[u8]) -> Option<usize>123 pub fn memrchr(n1: u8, haystack: &[u8]) -> Option<usize> {
124 unsafe_ifunc!(fn(u8, &[u8]) -> Option<usize>, memrchr, haystack, n1)
125 }
126
127 #[inline(always)]
memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize>128 pub fn memrchr2(n1: u8, n2: u8, haystack: &[u8]) -> Option<usize> {
129 unsafe_ifunc!(
130 fn(u8, u8, &[u8]) -> Option<usize>,
131 memrchr2,
132 haystack,
133 n1,
134 n2
135 )
136 }
137
138 #[inline(always)]
memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize>139 pub fn memrchr3(n1: u8, n2: u8, n3: u8, haystack: &[u8]) -> Option<usize> {
140 unsafe_ifunc!(
141 fn(u8, u8, u8, &[u8]) -> Option<usize>,
142 memrchr3,
143 haystack,
144 n1,
145 n2,
146 n3
147 )
148 }
149