1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24 #ifndef __IMMINTRIN_H
25 #define __IMMINTRIN_H
26
27 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
28 #include <mmintrin.h>
29 #endif
30
31 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
32 #include <xmmintrin.h>
33 #endif
34
35 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
36 #include <emmintrin.h>
37 #endif
38
39 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
40 #include <pmmintrin.h>
41 #endif
42
43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
44 #include <tmmintrin.h>
45 #endif
46
47 #if !defined(_MSC_VER) || __has_feature(modules) || \
48 (defined(__SSE4_2__) || defined(__SSE4_1__))
49 #include <smmintrin.h>
50 #endif
51
52 #if !defined(_MSC_VER) || __has_feature(modules) || \
53 (defined(__AES__) || defined(__PCLMUL__))
54 #include <wmmintrin.h>
55 #endif
56
57 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
58 #include <clflushoptintrin.h>
59 #endif
60
61 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
62 #include <clwbintrin.h>
63 #endif
64
65 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
66 #include <avxintrin.h>
67 #endif
68
69 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
70 #include <avx2intrin.h>
71 #endif
72
73 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
74 #include <f16cintrin.h>
75 #endif
76
77 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
78 #include <vpclmulqdqintrin.h>
79 #endif
80
81 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
82 #include <bmiintrin.h>
83 #endif
84
85 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
86 #include <bmi2intrin.h>
87 #endif
88
89 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
90 #include <lzcntintrin.h>
91 #endif
92
93 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
94 #include <popcntintrin.h>
95 #endif
96
97 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
98 #include <fmaintrin.h>
99 #endif
100
101 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
102 #include <avx512fintrin.h>
103 #endif
104
105 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
106 #include <avx512vlintrin.h>
107 #endif
108
109 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
110 #include <avx512bwintrin.h>
111 #endif
112
113 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
114 #include <avx512bitalgintrin.h>
115 #endif
116
117 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
118 #include <avx512cdintrin.h>
119 #endif
120
121 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
122 #include <avx512vpopcntdqintrin.h>
123 #endif
124
125 #if !defined(_MSC_VER) || __has_feature(modules) || \
126 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
127 #include <avx512vpopcntdqvlintrin.h>
128 #endif
129
130 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
131 #include <avx512vnniintrin.h>
132 #endif
133
134 #if !defined(_MSC_VER) || __has_feature(modules) || \
135 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
136 #include <avx512vlvnniintrin.h>
137 #endif
138
139 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
140 #include <avx512dqintrin.h>
141 #endif
142
143 #if !defined(_MSC_VER) || __has_feature(modules) || \
144 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
145 #include <avx512vlbitalgintrin.h>
146 #endif
147
148 #if !defined(_MSC_VER) || __has_feature(modules) || \
149 (defined(__AVX512VL__) && defined(__AVX512BW__))
150 #include <avx512vlbwintrin.h>
151 #endif
152
153 #if !defined(_MSC_VER) || __has_feature(modules) || \
154 (defined(__AVX512VL__) && defined(__AVX512CD__))
155 #include <avx512vlcdintrin.h>
156 #endif
157
158 #if !defined(_MSC_VER) || __has_feature(modules) || \
159 (defined(__AVX512VL__) && defined(__AVX512DQ__))
160 #include <avx512vldqintrin.h>
161 #endif
162
163 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
164 #include <avx512erintrin.h>
165 #endif
166
167 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
168 #include <avx512ifmaintrin.h>
169 #endif
170
171 #if !defined(_MSC_VER) || __has_feature(modules) || \
172 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
173 #include <avx512ifmavlintrin.h>
174 #endif
175
176 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
177 #include <avx512vbmiintrin.h>
178 #endif
179
180 #if !defined(_MSC_VER) || __has_feature(modules) || \
181 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
182 #include <avx512vbmivlintrin.h>
183 #endif
184
185 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
186 #include <avx512vbmi2intrin.h>
187 #endif
188
189 #if !defined(_MSC_VER) || __has_feature(modules) || \
190 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
191 #include <avx512vlvbmi2intrin.h>
192 #endif
193
194 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
195 #include <avx512pfintrin.h>
196 #endif
197
198 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
199 #include <pkuintrin.h>
200 #endif
201
202 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
203 #include <vaesintrin.h>
204 #endif
205
206 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
207 #include <gfniintrin.h>
208 #endif
209
210 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
211 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
212 ///
213 /// \headerfile <immintrin.h>
214 ///
215 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
216 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void)217 _rdpid_u32(void) {
218 return __builtin_ia32_rdpid();
219 }
220 #endif // __RDPID__
221
222 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
223 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short * __p)224 _rdrand16_step(unsigned short *__p)
225 {
226 return __builtin_ia32_rdrand16_step(__p);
227 }
228
229 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int * __p)230 _rdrand32_step(unsigned int *__p)
231 {
232 return __builtin_ia32_rdrand32_step(__p);
233 }
234
235 #ifdef __x86_64__
236 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long * __p)237 _rdrand64_step(unsigned long long *__p)
238 {
239 return __builtin_ia32_rdrand64_step(__p);
240 }
241 #endif
242 #endif /* __RDRND__ */
243
244 /* __bit_scan_forward */
245 static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_forward(int __A)246 _bit_scan_forward(int __A) {
247 return __builtin_ctz(__A);
248 }
249
250 /* __bit_scan_reverse */
251 static __inline__ int __attribute__((__always_inline__, __nodebug__))
_bit_scan_reverse(int __A)252 _bit_scan_reverse(int __A) {
253 return 31 - __builtin_clz(__A);
254 }
255
256 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
257 #ifdef __x86_64__
258 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)259 _readfsbase_u32(void)
260 {
261 return __builtin_ia32_rdfsbase32();
262 }
263
264 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)265 _readfsbase_u64(void)
266 {
267 return __builtin_ia32_rdfsbase64();
268 }
269
270 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)271 _readgsbase_u32(void)
272 {
273 return __builtin_ia32_rdgsbase32();
274 }
275
276 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)277 _readgsbase_u64(void)
278 {
279 return __builtin_ia32_rdgsbase64();
280 }
281
282 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)283 _writefsbase_u32(unsigned int __V)
284 {
285 __builtin_ia32_wrfsbase32(__V);
286 }
287
288 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)289 _writefsbase_u64(unsigned long long __V)
290 {
291 __builtin_ia32_wrfsbase64(__V);
292 }
293
294 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)295 _writegsbase_u32(unsigned int __V)
296 {
297 __builtin_ia32_wrgsbase32(__V);
298 }
299
300 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)301 _writegsbase_u64(unsigned long long __V)
302 {
303 __builtin_ia32_wrgsbase64(__V);
304 }
305
306 #endif
307 #endif /* __FSGSBASE__ */
308
309 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
310
311 /* The structs used below are to force the load/store to be unaligned. This
312 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
313 * tbaa metadata from being generated based on the struct and the type of the
314 * field inside of it.
315 */
316
317 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P)318 _loadbe_i16(void const * __P) {
319 struct __loadu_i16 {
320 short __v;
321 } __attribute__((__packed__, __may_alias__));
322 return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
323 }
324
325 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P,short __D)326 _storebe_i16(void * __P, short __D) {
327 struct __storeu_i16 {
328 short __v;
329 } __attribute__((__packed__, __may_alias__));
330 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
331 }
332
333 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P)334 _loadbe_i32(void const * __P) {
335 struct __loadu_i32 {
336 int __v;
337 } __attribute__((__packed__, __may_alias__));
338 return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
339 }
340
341 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P,int __D)342 _storebe_i32(void * __P, int __D) {
343 struct __storeu_i32 {
344 int __v;
345 } __attribute__((__packed__, __may_alias__));
346 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
347 }
348
349 #ifdef __x86_64__
350 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P)351 _loadbe_i64(void const * __P) {
352 struct __loadu_i64 {
353 long long __v;
354 } __attribute__((__packed__, __may_alias__));
355 return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
356 }
357
358 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P,long long __D)359 _storebe_i64(void * __P, long long __D) {
360 struct __storeu_i64 {
361 long long __v;
362 } __attribute__((__packed__, __may_alias__));
363 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
364 }
365 #endif
366 #endif /* __MOVBE */
367
368 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
369 #include <rtmintrin.h>
370 #include <xtestintrin.h>
371 #endif
372
373 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
374 #include <shaintrin.h>
375 #endif
376
377 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
378 #include <fxsrintrin.h>
379 #endif
380
381 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__)
382 #include <xsaveintrin.h>
383 #endif
384
385 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
386 #include <xsaveoptintrin.h>
387 #endif
388
389 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
390 #include <xsavecintrin.h>
391 #endif
392
393 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
394 #include <xsavesintrin.h>
395 #endif
396
397 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
398 #include <cetintrin.h>
399 #endif
400
401 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
402 * whereas others are also available at all times. */
403 #include <adxintrin.h>
404
405 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
406 #include <rdseedintrin.h>
407 #endif
408
409 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
410 #include <wbnoinvdintrin.h>
411 #endif
412
413 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
414 #include <cldemoteintrin.h>
415 #endif
416
417 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
418 #include <waitpkgintrin.h>
419 #endif
420
421 #if !defined(_MSC_VER) || __has_feature(modules) || \
422 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
423 #include <movdirintrin.h>
424 #endif
425
426 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
427 #include <pconfigintrin.h>
428 #endif
429
430 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
431 #include <sgxintrin.h>
432 #endif
433
434 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
435 #include <ptwriteintrin.h>
436 #endif
437
438 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
439 #include <invpcidintrin.h>
440 #endif
441
442 #ifdef _MSC_VER
443 /* Define the default attributes for these intrinsics */
444 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
445 #ifdef __cplusplus
446 extern "C" {
447 #endif
448 /*----------------------------------------------------------------------------*\
449 |* Interlocked Exchange HLE
450 \*----------------------------------------------------------------------------*/
451 #if defined(__i386__) || defined(__x86_64__)
452 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile * _Target,long _Value)453 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
454 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
455 : "+r" (_Value), "+m" (*_Target) :: "memory");
456 return _Value;
457 }
458 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile * _Target,long _Value)459 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
460 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
461 : "+r" (_Value), "+m" (*_Target) :: "memory");
462 return _Value;
463 }
464 #endif
465 #if defined(__x86_64__)
466 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile * _Target,__int64 _Value)467 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
468 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
469 : "+r" (_Value), "+m" (*_Target) :: "memory");
470 return _Value;
471 }
472 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile * _Target,__int64 _Value)473 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
474 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
475 : "+r" (_Value), "+m" (*_Target) :: "memory");
476 return _Value;
477 }
478 #endif
479 /*----------------------------------------------------------------------------*\
480 |* Interlocked Compare Exchange HLE
481 \*----------------------------------------------------------------------------*/
482 #if defined(__i386__) || defined(__x86_64__)
483 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile * _Destination,long _Exchange,long _Comparand)484 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
485 long _Exchange, long _Comparand) {
486 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
487 : "+a" (_Comparand), "+m" (*_Destination)
488 : "r" (_Exchange) : "memory");
489 return _Comparand;
490 }
491 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile * _Destination,long _Exchange,long _Comparand)492 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
493 long _Exchange, long _Comparand) {
494 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
495 : "+a" (_Comparand), "+m" (*_Destination)
496 : "r" (_Exchange) : "memory");
497 return _Comparand;
498 }
499 #endif
500 #if defined(__x86_64__)
501 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)502 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
503 __int64 _Exchange, __int64 _Comparand) {
504 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
505 : "+a" (_Comparand), "+m" (*_Destination)
506 : "r" (_Exchange) : "memory");
507 return _Comparand;
508 }
509 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)510 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
511 __int64 _Exchange, __int64 _Comparand) {
512 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
513 : "+a" (_Comparand), "+m" (*_Destination)
514 : "r" (_Exchange) : "memory");
515 return _Comparand;
516 }
517 #endif
518 #ifdef __cplusplus
519 }
520 #endif
521
522 #undef __DEFAULT_FN_ATTRS
523
524 #endif /* _MSC_VER */
525
526 #endif /* __IMMINTRIN_H */
527