1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #define __IMMINTRIN_H
12 
13 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MMX__)
14 #include <mmintrin.h>
15 #endif
16 
17 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE__)
18 #include <xmmintrin.h>
19 #endif
20 
21 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE2__)
22 #include <emmintrin.h>
23 #endif
24 
25 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSE3__)
26 #include <pmmintrin.h>
27 #endif
28 
29 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SSSE3__)
30 #include <tmmintrin.h>
31 #endif
32 
33 #if !defined(_MSC_VER) || __has_feature(modules) || \
34     (defined(__SSE4_2__) || defined(__SSE4_1__))
35 #include <smmintrin.h>
36 #endif
37 
38 #if !defined(_MSC_VER) || __has_feature(modules) || \
39     (defined(__AES__) || defined(__PCLMUL__))
40 #include <wmmintrin.h>
41 #endif
42 
43 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLFLUSHOPT__)
44 #include <clflushoptintrin.h>
45 #endif
46 
47 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLWB__)
48 #include <clwbintrin.h>
49 #endif
50 
51 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX__)
52 #include <avxintrin.h>
53 #endif
54 
55 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX2__)
56 #include <avx2intrin.h>
57 #endif
58 
59 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__F16C__)
60 #include <f16cintrin.h>
61 #endif
62 
63 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VPCLMULQDQ__)
64 #include <vpclmulqdqintrin.h>
65 #endif
66 
67 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI__)
68 #include <bmiintrin.h>
69 #endif
70 
71 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__BMI2__)
72 #include <bmi2intrin.h>
73 #endif
74 
75 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__LZCNT__)
76 #include <lzcntintrin.h>
77 #endif
78 
79 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__POPCNT__)
80 #include <popcntintrin.h>
81 #endif
82 
83 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FMA__)
84 #include <fmaintrin.h>
85 #endif
86 
87 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512F__)
88 #include <avx512fintrin.h>
89 #endif
90 
91 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VL__)
92 #include <avx512vlintrin.h>
93 #endif
94 
95 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BW__)
96 #include <avx512bwintrin.h>
97 #endif
98 
99 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__)
100 #include <avx512bitalgintrin.h>
101 #endif
102 
103 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__)
104 #include <avx512cdintrin.h>
105 #endif
106 
107 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
108 #include <avx512vpopcntdqintrin.h>
109 #endif
110 
111 #if !defined(_MSC_VER) || __has_feature(modules) || \
112     (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
113 #include <avx512vpopcntdqvlintrin.h>
114 #endif
115 
116 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VNNI__)
117 #include <avx512vnniintrin.h>
118 #endif
119 
120 #if !defined(_MSC_VER) || __has_feature(modules) || \
121     (defined(__AVX512VL__) && defined(__AVX512VNNI__))
122 #include <avx512vlvnniintrin.h>
123 #endif
124 
125 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__)
126 #include <avx512dqintrin.h>
127 #endif
128 
129 #if !defined(_MSC_VER) || __has_feature(modules) || \
130     (defined(__AVX512VL__) && defined(__AVX512BITALG__))
131 #include <avx512vlbitalgintrin.h>
132 #endif
133 
134 #if !defined(_MSC_VER) || __has_feature(modules) || \
135     (defined(__AVX512VL__) && defined(__AVX512BW__))
136 #include <avx512vlbwintrin.h>
137 #endif
138 
139 #if !defined(_MSC_VER) || __has_feature(modules) || \
140     (defined(__AVX512VL__) && defined(__AVX512CD__))
141 #include <avx512vlcdintrin.h>
142 #endif
143 
144 #if !defined(_MSC_VER) || __has_feature(modules) || \
145     (defined(__AVX512VL__) && defined(__AVX512DQ__))
146 #include <avx512vldqintrin.h>
147 #endif
148 
149 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512ER__)
150 #include <avx512erintrin.h>
151 #endif
152 
153 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512IFMA__)
154 #include <avx512ifmaintrin.h>
155 #endif
156 
157 #if !defined(_MSC_VER) || __has_feature(modules) || \
158     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
159 #include <avx512ifmavlintrin.h>
160 #endif
161 
162 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI__)
163 #include <avx512vbmiintrin.h>
164 #endif
165 
166 #if !defined(_MSC_VER) || __has_feature(modules) || \
167     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
168 #include <avx512vbmivlintrin.h>
169 #endif
170 
171 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VBMI2__)
172 #include <avx512vbmi2intrin.h>
173 #endif
174 
175 #if !defined(_MSC_VER) || __has_feature(modules) || \
176     (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
177 #include <avx512vlvbmi2intrin.h>
178 #endif
179 
180 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512PF__)
181 #include <avx512pfintrin.h>
182 #endif
183 
184 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__)
185 #include <avx512bf16intrin.h>
186 #endif
187 
188 #if !defined(_MSC_VER) || __has_feature(modules) || \
189     (defined(__AVX512VL__) && defined(__AVX512BF16__))
190 #include <avx512vlbf16intrin.h>
191 #endif
192 
193 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__)
194 #include <pkuintrin.h>
195 #endif
196 
197 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__)
198 #include <vaesintrin.h>
199 #endif
200 
201 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__)
202 #include <gfniintrin.h>
203 #endif
204 
205 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDPID__)
206 /// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
207 ///
208 /// \headerfile <immintrin.h>
209 ///
210 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
211 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void)212 _rdpid_u32(void) {
213   return __builtin_ia32_rdpid();
214 }
215 #endif // __RDPID__
216 
217 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__)
218 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short * __p)219 _rdrand16_step(unsigned short *__p)
220 {
221   return __builtin_ia32_rdrand16_step(__p);
222 }
223 
224 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int * __p)225 _rdrand32_step(unsigned int *__p)
226 {
227   return __builtin_ia32_rdrand32_step(__p);
228 }
229 
230 #ifdef __x86_64__
231 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long * __p)232 _rdrand64_step(unsigned long long *__p)
233 {
234   return __builtin_ia32_rdrand64_step(__p);
235 }
236 #endif
237 #endif /* __RDRND__ */
238 
239 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__)
240 #ifdef __x86_64__
241 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)242 _readfsbase_u32(void)
243 {
244   return __builtin_ia32_rdfsbase32();
245 }
246 
247 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)248 _readfsbase_u64(void)
249 {
250   return __builtin_ia32_rdfsbase64();
251 }
252 
253 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)254 _readgsbase_u32(void)
255 {
256   return __builtin_ia32_rdgsbase32();
257 }
258 
259 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)260 _readgsbase_u64(void)
261 {
262   return __builtin_ia32_rdgsbase64();
263 }
264 
265 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)266 _writefsbase_u32(unsigned int __V)
267 {
268   __builtin_ia32_wrfsbase32(__V);
269 }
270 
271 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)272 _writefsbase_u64(unsigned long long __V)
273 {
274   __builtin_ia32_wrfsbase64(__V);
275 }
276 
277 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)278 _writegsbase_u32(unsigned int __V)
279 {
280   __builtin_ia32_wrgsbase32(__V);
281 }
282 
283 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)284 _writegsbase_u64(unsigned long long __V)
285 {
286   __builtin_ia32_wrgsbase64(__V);
287 }
288 
289 #endif
290 #endif /* __FSGSBASE__ */
291 
292 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__MOVBE__)
293 
294 /* The structs used below are to force the load/store to be unaligned. This
295  * is accomplished with the __packed__ attribute. The __may_alias__ prevents
296  * tbaa metadata from being generated based on the struct and the type of the
297  * field inside of it.
298  */
299 
300 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P)301 _loadbe_i16(void const * __P) {
302   struct __loadu_i16 {
303     short __v;
304   } __attribute__((__packed__, __may_alias__));
305   return __builtin_bswap16(((struct __loadu_i16*)__P)->__v);
306 }
307 
308 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P,short __D)309 _storebe_i16(void * __P, short __D) {
310   struct __storeu_i16 {
311     short __v;
312   } __attribute__((__packed__, __may_alias__));
313   ((struct __storeu_i16*)__P)->__v = __builtin_bswap16(__D);
314 }
315 
316 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P)317 _loadbe_i32(void const * __P) {
318   struct __loadu_i32 {
319     int __v;
320   } __attribute__((__packed__, __may_alias__));
321   return __builtin_bswap32(((struct __loadu_i32*)__P)->__v);
322 }
323 
324 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P,int __D)325 _storebe_i32(void * __P, int __D) {
326   struct __storeu_i32 {
327     int __v;
328   } __attribute__((__packed__, __may_alias__));
329   ((struct __storeu_i32*)__P)->__v = __builtin_bswap32(__D);
330 }
331 
332 #ifdef __x86_64__
333 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P)334 _loadbe_i64(void const * __P) {
335   struct __loadu_i64 {
336     long long __v;
337   } __attribute__((__packed__, __may_alias__));
338   return __builtin_bswap64(((struct __loadu_i64*)__P)->__v);
339 }
340 
341 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P,long long __D)342 _storebe_i64(void * __P, long long __D) {
343   struct __storeu_i64 {
344     long long __v;
345   } __attribute__((__packed__, __may_alias__));
346   ((struct __storeu_i64*)__P)->__v = __builtin_bswap64(__D);
347 }
348 #endif
349 #endif /* __MOVBE */
350 
351 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RTM__)
352 #include <rtmintrin.h>
353 #include <xtestintrin.h>
354 #endif
355 
356 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHA__)
357 #include <shaintrin.h>
358 #endif
359 
360 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FXSR__)
361 #include <fxsrintrin.h>
362 #endif
363 
364 /* No feature check desired due to internal MSC_VER checks */
365 #include <xsaveintrin.h>
366 
367 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__)
368 #include <xsaveoptintrin.h>
369 #endif
370 
371 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEC__)
372 #include <xsavecintrin.h>
373 #endif
374 
375 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVES__)
376 #include <xsavesintrin.h>
377 #endif
378 
379 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SHSTK__)
380 #include <cetintrin.h>
381 #endif
382 
383 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
384  * whereas others are also available at all times. */
385 #include <adxintrin.h>
386 
387 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDSEED__)
388 #include <rdseedintrin.h>
389 #endif
390 
391 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WBNOINVD__)
392 #include <wbnoinvdintrin.h>
393 #endif
394 
395 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__CLDEMOTE__)
396 #include <cldemoteintrin.h>
397 #endif
398 
399 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__WAITPKG__)
400 #include <waitpkgintrin.h>
401 #endif
402 
403 #if !defined(_MSC_VER) || __has_feature(modules) || \
404   defined(__MOVDIRI__) || defined(__MOVDIR64B__)
405 #include <movdirintrin.h>
406 #endif
407 
408 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PCONFIG__)
409 #include <pconfigintrin.h>
410 #endif
411 
412 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__SGX__)
413 #include <sgxintrin.h>
414 #endif
415 
416 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PTWRITE__)
417 #include <ptwriteintrin.h>
418 #endif
419 
420 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__INVPCID__)
421 #include <invpcidintrin.h>
422 #endif
423 
424 #if !defined(_MSC_VER) || __has_feature(modules) || \
425   defined(__AVX512VP2INTERSECT__)
426 #include <avx512vp2intersectintrin.h>
427 #endif
428 
429 #if !defined(_MSC_VER) || __has_feature(modules) || \
430   (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
431 #include <avx512vlvp2intersectintrin.h>
432 #endif
433 
434 #if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__)
435 #include <enqcmdintrin.h>
436 #endif
437 
438 #if defined(_MSC_VER) && __has_extension(gnu_asm)
439 /* Define the default attributes for these intrinsics */
440 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
441 #ifdef __cplusplus
442 extern "C" {
443 #endif
444 /*----------------------------------------------------------------------------*\
445 |* Interlocked Exchange HLE
446 \*----------------------------------------------------------------------------*/
447 #if defined(__i386__) || defined(__x86_64__)
448 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile * _Target,long _Value)449 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
450   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
451                        : "+r" (_Value), "+m" (*_Target) :: "memory");
452   return _Value;
453 }
454 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile * _Target,long _Value)455 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
456   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
457                        : "+r" (_Value), "+m" (*_Target) :: "memory");
458   return _Value;
459 }
460 #endif
461 #if defined(__x86_64__)
462 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile * _Target,__int64 _Value)463 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
464   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg %0, %1"
465                        : "+r" (_Value), "+m" (*_Target) :: "memory");
466   return _Value;
467 }
468 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile * _Target,__int64 _Value)469 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
470   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg %0, %1"
471                        : "+r" (_Value), "+m" (*_Target) :: "memory");
472   return _Value;
473 }
474 #endif
475 /*----------------------------------------------------------------------------*\
476 |* Interlocked Compare Exchange HLE
477 \*----------------------------------------------------------------------------*/
478 #if defined(__i386__) || defined(__x86_64__)
479 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile * _Destination,long _Exchange,long _Comparand)480 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
481                               long _Exchange, long _Comparand) {
482   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
483                        : "+a" (_Comparand), "+m" (*_Destination)
484                        : "r" (_Exchange) : "memory");
485   return _Comparand;
486 }
487 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile * _Destination,long _Exchange,long _Comparand)488 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
489                               long _Exchange, long _Comparand) {
490   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
491                        : "+a" (_Comparand), "+m" (*_Destination)
492                        : "r" (_Exchange) : "memory");
493   return _Comparand;
494 }
495 #endif
496 #if defined(__x86_64__)
497 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)498 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
499                               __int64 _Exchange, __int64 _Comparand) {
500   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg %2, %1"
501                        : "+a" (_Comparand), "+m" (*_Destination)
502                        : "r" (_Exchange) : "memory");
503   return _Comparand;
504 }
505 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)506 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
507                               __int64 _Exchange, __int64 _Comparand) {
508   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg %2, %1"
509                        : "+a" (_Comparand), "+m" (*_Destination)
510                        : "r" (_Exchange) : "memory");
511   return _Comparand;
512 }
513 #endif
514 #ifdef __cplusplus
515 }
516 #endif
517 
518 #undef __DEFAULT_FN_ATTRS
519 
520 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
521 
522 #endif /* __IMMINTRIN_H */
523