1 //! This module implements minimal run-time feature detection for x86.
2 //!
3 //! The features are detected using the `detect_features` function below.
4 //! This function uses the CPUID instruction to read the feature flags from the
5 //! CPU and encodes them in a `usize` where each bit position represents
6 //! whether a feature is available (bit is set) or unavaiable (bit is cleared).
7 //!
8 //! The enum `Feature` is used to map bit positions to feature names, and the
9 //! the `__crate::detect::check_for!` macro is used to map string literals (e.g.,
10 //! "avx") to these bit positions (e.g., `Feature::avx`).
11 //!
12 //! The run-time feature detection is performed by the
13 //! `__crate::detect::check_for(Feature) -> bool` function. On its first call,
14 //! this functions queries the CPU for the available features and stores them
15 //! in a global `AtomicUsize` variable. The query is performed by just checking
16 //! whether the feature bit in this global variable is set or cleared.
17 
18 /// A macro to test at *runtime* whether a CPU feature is available on
19 /// x86/x86-64 platforms.
20 ///
21 /// This macro is provided in the standard library and will detect at runtime
22 /// whether the specified CPU feature is detected. This does **not** resolve at
23 /// compile time unless the specified feature is already enabled for the entire
24 /// crate. Runtime detection currently relies mostly on the `cpuid` instruction.
25 ///
26 /// This macro only takes one argument which is a string literal of the feature
27 /// being tested for. The feature names supported are the lowercase versions of
28 /// the ones defined by Intel in [their documentation][docs].
29 ///
30 /// ## Supported arguments
31 ///
32 /// This macro supports the same names that `#[target_feature]` supports. Unlike
33 /// `#[target_feature]`, however, this macro does not support names separated
34 /// with a comma. Instead testing for multiple features must be done through
35 /// separate macro invocations for now.
36 ///
37 /// Supported arguments are:
38 ///
39 /// * `"aes"`
40 /// * `"pclmulqdq"`
41 /// * `"rdrand"`
42 /// * `"rdseed"`
43 /// * `"tsc"`
44 /// * `"mmx"`
45 /// * `"sse"`
46 /// * `"sse2"`
47 /// * `"sse3"`
48 /// * `"ssse3"`
49 /// * `"sse4.1"`
50 /// * `"sse4.2"`
51 /// * `"sse4a"`
52 /// * `"sha"`
53 /// * `"avx"`
54 /// * `"avx2"`
55 /// * `"avx512f"`
56 /// * `"avx512cd"`
57 /// * `"avx512er"`
58 /// * `"avx512pf"`
59 /// * `"avx512bw"`
60 /// * `"avx512dq"`
61 /// * `"avx512vl"`
62 /// * `"avx512ifma"`
63 /// * `"avx512vbmi"`
64 /// * `"avx512vpopcntdq"`
65 /// * `"f16c"`
66 /// * `"fma"`
67 /// * `"bmi1"`
68 /// * `"bmi2"`
69 /// * `"abm"`
70 /// * `"lzcnt"`
71 /// * `"tbm"`
72 /// * `"popcnt"`
73 /// * `"fxsr"`
74 /// * `"xsave"`
75 /// * `"xsaveopt"`
76 /// * `"xsaves"`
77 /// * `"xsavec"`
78 /// * `"adx"`
79 /// * `"rtm"`
80 ///
81 /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
82 #[macro_export]
83 #[stable(feature = "simd_x86", since = "1.27.0")]
84 #[allow_internal_unstable(stdsimd_internal, stdsimd)]
85 macro_rules! is_x86_feature_detected {
86     ("aes") => {
87         cfg!(target_feature = "aes") || $crate::detect::check_for($crate::detect::Feature::aes)
88     };
89     ("pclmulqdq") => {
90         cfg!(target_feature = "pclmulqdq")
91             || $crate::detect::check_for($crate::detect::Feature::pclmulqdq)
92     };
93     ("rdrand") => {
94         cfg!(target_feature = "rdrand")
95             || $crate::detect::check_for($crate::detect::Feature::rdrand)
96     };
97     ("rdseed") => {
98         cfg!(target_feature = "rdseed")
99             || $crate::detect::check_for($crate::detect::Feature::rdseed)
100     };
101     ("tsc") => {
102         cfg!(target_feature = "tsc") || $crate::detect::check_for($crate::detect::Feature::tsc)
103     };
104     ("mmx") => {
105         cfg!(target_feature = "mmx") || $crate::detect::check_for($crate::detect::Feature::mmx)
106     };
107     ("sse") => {
108         cfg!(target_feature = "sse") || $crate::detect::check_for($crate::detect::Feature::sse)
109     };
110     ("sse2") => {
111         cfg!(target_feature = "sse2") || $crate::detect::check_for($crate::detect::Feature::sse2)
112     };
113     ("sse3") => {
114         cfg!(target_feature = "sse3") || $crate::detect::check_for($crate::detect::Feature::sse3)
115     };
116     ("ssse3") => {
117         cfg!(target_feature = "ssse3") || $crate::detect::check_for($crate::detect::Feature::ssse3)
118     };
119     ("sse4.1") => {
120         cfg!(target_feature = "sse4.1")
121             || $crate::detect::check_for($crate::detect::Feature::sse4_1)
122     };
123     ("sse4.2") => {
124         cfg!(target_feature = "sse4.2")
125             || $crate::detect::check_for($crate::detect::Feature::sse4_2)
126     };
127     ("sse4a") => {
128         cfg!(target_feature = "sse4a") || $crate::detect::check_for($crate::detect::Feature::sse4a)
129     };
130     ("sha") => {
131         cfg!(target_feature = "sha") || $crate::detect::check_for($crate::detect::Feature::sha)
132     };
133     ("avx") => {
134         cfg!(target_feature = "avx") || $crate::detect::check_for($crate::detect::Feature::avx)
135     };
136     ("avx2") => {
137         cfg!(target_feature = "avx2") || $crate::detect::check_for($crate::detect::Feature::avx2)
138     };
139     ("avx512f") => {
140         cfg!(target_feature = "avx512f")
141             || $crate::detect::check_for($crate::detect::Feature::avx512f)
142     };
143     ("avx512cd") => {
144         cfg!(target_feature = "avx512cd")
145             || $crate::detect::check_for($crate::detect::Feature::avx512cd)
146     };
147     ("avx512er") => {
148         cfg!(target_feature = "avx512er")
149             || $crate::detect::check_for($crate::detect::Feature::avx512er)
150     };
151     ("avx512pf") => {
152         cfg!(target_feature = "avx512pf")
153             || $crate::detect::check_for($crate::detect::Feature::avx512pf)
154     };
155     ("avx512bw") => {
156         cfg!(target_feature = "avx512bw")
157             || $crate::detect::check_for($crate::detect::Feature::avx512bw)
158     };
159     ("avx512dq") => {
160         cfg!(target_feature = "avx512dq")
161             || $crate::detect::check_for($crate::detect::Feature::avx512dq)
162     };
163     ("avx512vl") => {
164         cfg!(target_Feature = "avx512vl")
165             || $crate::detect::check_for($crate::detect::Feature::avx512vl)
166     };
167     ("avx512ifma") => {
168         cfg!(target_feature = "avx512ifma")
169             || $crate::detect::check_for($crate::detect::Feature::avx512_ifma)
170     };
171     ("avx512vbmi") => {
172         cfg!(target_feature = "avx512vbmi")
173             || $crate::detect::check_for($crate::detect::Feature::avx512_vbmi)
174     };
175     ("avx512vpopcntdq") => {
176         cfg!(target_feature = "avx512vpopcntdq")
177             || $crate::detect::check_for($crate::detect::Feature::avx512_vpopcntdq)
178     };
179     ("f16c") => {
180         cfg!(target_feature = "f16c") || $crate::detect::check_for($crate::detect::Feature::f16c)
181     };
182     ("fma") => {
183         cfg!(target_feature = "fma") || $crate::detect::check_for($crate::detect::Feature::fma)
184     };
185     ("bmi1") => {
186         cfg!(target_feature = "bmi1") || $crate::detect::check_for($crate::detect::Feature::bmi)
187     };
188     ("bmi2") => {
189         cfg!(target_feature = "bmi2") || $crate::detect::check_for($crate::detect::Feature::bmi2)
190     };
191     ("abm") => {
192         cfg!(target_feature = "abm") || $crate::detect::check_for($crate::detect::Feature::abm)
193     };
194     ("lzcnt") => {
195         cfg!(target_feature = "lzcnt") || $crate::detect::check_for($crate::detect::Feature::abm)
196     };
197     ("tbm") => {
198         cfg!(target_feature = "tbm") || $crate::detect::check_for($crate::detect::Feature::tbm)
199     };
200     ("popcnt") => {
201         cfg!(target_feature = "popcnt")
202             || $crate::detect::check_for($crate::detect::Feature::popcnt)
203     };
204     ("fxsr") => {
205         cfg!(target_feature = "fxsr") || $crate::detect::check_for($crate::detect::Feature::fxsr)
206     };
207     ("xsave") => {
208         cfg!(target_feature = "xsave") || $crate::detect::check_for($crate::detect::Feature::xsave)
209     };
210     ("xsaveopt") => {
211         cfg!(target_feature = "xsaveopt")
212             || $crate::detect::check_for($crate::detect::Feature::xsaveopt)
213     };
214     ("xsaves") => {
215         cfg!(target_feature = "xsaves")
216             || $crate::detect::check_for($crate::detect::Feature::xsaves)
217     };
218     ("xsavec") => {
219         cfg!(target_feature = "xsavec")
220             || $crate::detect::check_for($crate::detect::Feature::xsavec)
221     };
222     ("cmpxchg16b") => {
223         cfg!(target_feature = "cmpxchg16b")
224             || $crate::detect::check_for($crate::detect::Feature::cmpxchg16b)
225     };
226     ("adx") => {
227         cfg!(target_feature = "adx") || $crate::detect::check_for($crate::detect::Feature::adx)
228     };
229     ("rtm") => {
230         cfg!(target_feature = "rtm") || $crate::detect::check_for($crate::detect::Feature::rtm)
231     };
232     ($t:tt,) => {
233         is_x86_feature_detected!($t);
234     };
235     ($t:tt) => {
236         compile_error!(concat!("unknown target feature: ", $t))
237     };
238 }
239 
240 /// X86 CPU Feature enum. Each variant denotes a position in a bitset for a
241 /// particular feature.
242 ///
243 /// This is an unstable implementation detail subject to change.
244 #[allow(non_camel_case_types)]
245 #[repr(u8)]
246 #[doc(hidden)]
247 #[unstable(feature = "stdsimd_internal", issue = "0")]
248 pub enum Feature {
249     /// AES (Advanced Encryption Standard New Instructions AES-NI)
250     aes,
251     /// CLMUL (Carry-less Multiplication)
252     pclmulqdq,
253     /// RDRAND
254     rdrand,
255     /// RDSEED
256     rdseed,
257     /// TSC (Time Stamp Counter)
258     tsc,
259     /// MMX
260     mmx,
261     /// SSE (Streaming SIMD Extensions)
262     sse,
263     /// SSE2 (Streaming SIMD Extensions 2)
264     sse2,
265     /// SSE3 (Streaming SIMD Extensions 3)
266     sse3,
267     /// SSSE3 (Supplemental Streaming SIMD Extensions 3)
268     ssse3,
269     /// SSE4.1 (Streaming SIMD Extensions 4.1)
270     sse4_1,
271     /// SSE4.2 (Streaming SIMD Extensions 4.2)
272     sse4_2,
273     /// SSE4a (Streaming SIMD Extensions 4a)
274     sse4a,
275     /// SHA
276     sha,
277     /// AVX (Advanced Vector Extensions)
278     avx,
279     /// AVX2 (Advanced Vector Extensions 2)
280     avx2,
281     /// AVX-512 F (Foundation)
282     avx512f,
283     /// AVX-512 CD (Conflict Detection Instructions)
284     avx512cd,
285     /// AVX-512 ER (Exponential and Reciprocal Instructions)
286     avx512er,
287     /// AVX-512 PF (Prefetch Instructions)
288     avx512pf,
289     /// AVX-512 BW (Byte and Word Instructions)
290     avx512bw,
291     /// AVX-512 DQ (Doubleword and Quadword)
292     avx512dq,
293     /// AVX-512 VL (Vector Length Extensions)
294     avx512vl,
295     /// AVX-512 IFMA (Integer Fused Multiply Add)
296     avx512_ifma,
297     /// AVX-512 VBMI (Vector Byte Manipulation Instructions)
298     avx512_vbmi,
299     /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
300     /// Quadword)
301     avx512_vpopcntdq,
302     /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
303     f16c,
304     /// FMA (Fused Multiply Add)
305     fma,
306     /// BMI1 (Bit Manipulation Instructions 1)
307     bmi,
308     /// BMI1 (Bit Manipulation Instructions 2)
309     bmi2,
310     /// ABM (Advanced Bit Manipulation) on AMD / LZCNT (Leading Zero
311     /// Count) on Intel
312     abm,
313     /// TBM (Trailing Bit Manipulation)
314     tbm,
315     /// POPCNT (Population Count)
316     popcnt,
317     /// FXSR (Floating-point context fast save and restor)
318     fxsr,
319     /// XSAVE (Save Processor Extended States)
320     xsave,
321     /// XSAVEOPT (Save Processor Extended States Optimized)
322     xsaveopt,
323     /// XSAVES (Save Processor Extended States Supervisor)
324     xsaves,
325     /// XSAVEC (Save Processor Extended States Compacted)
326     xsavec,
327     /// CMPXCH16B, a 16-byte compare-and-swap instruction
328     cmpxchg16b,
329     /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
330     adx,
331     /// RTM, Intel (Restricted Transactional Memory)
332     rtm,
333 }
334