1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __IMMINTRIN_H
11 #define __IMMINTRIN_H
12
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
16
17 #include <x86gprintrin.h>
18
19 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
20 defined(__MMX__)
21 #include <mmintrin.h>
22 #endif
23
24 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
25 defined(__SSE__)
26 #include <xmmintrin.h>
27 #endif
28
29 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
30 defined(__SSE2__)
31 #include <emmintrin.h>
32 #endif
33
34 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
35 defined(__SSE3__)
36 #include <pmmintrin.h>
37 #endif
38
39 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
40 defined(__SSSE3__)
41 #include <tmmintrin.h>
42 #endif
43
44 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
45 (defined(__SSE4_2__) || defined(__SSE4_1__))
46 #include <smmintrin.h>
47 #endif
48
49 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
50 (defined(__AES__) || defined(__PCLMUL__))
51 #include <wmmintrin.h>
52 #endif
53
54 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
55 defined(__CLFLUSHOPT__)
56 #include <clflushoptintrin.h>
57 #endif
58
59 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
60 defined(__CLWB__)
61 #include <clwbintrin.h>
62 #endif
63
64 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
65 defined(__AVX__)
66 #include <avxintrin.h>
67 #endif
68
69 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
70 defined(__AVX2__)
71 #include <avx2intrin.h>
72 #endif
73
74 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
75 defined(__F16C__)
76 #include <f16cintrin.h>
77 #endif
78
79 /* No feature check desired due to internal checks */
80 #include <bmiintrin.h>
81
82 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
83 defined(__BMI2__)
84 #include <bmi2intrin.h>
85 #endif
86
87 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
88 defined(__LZCNT__)
89 #include <lzcntintrin.h>
90 #endif
91
92 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
93 defined(__POPCNT__)
94 #include <popcntintrin.h>
95 #endif
96
97 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
98 defined(__FMA__)
99 #include <fmaintrin.h>
100 #endif
101
102 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
103 defined(__AVX512F__)
104 #include <avx512fintrin.h>
105 #endif
106
107 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
108 defined(__AVX512VL__)
109 #include <avx512vlintrin.h>
110 #endif
111
112 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
113 defined(__AVX512BW__)
114 #include <avx512bwintrin.h>
115 #endif
116
117 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
118 defined(__AVX512BITALG__)
119 #include <avx512bitalgintrin.h>
120 #endif
121
122 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
123 defined(__AVX512CD__)
124 #include <avx512cdintrin.h>
125 #endif
126
127 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
128 defined(__AVX512VPOPCNTDQ__)
129 #include <avx512vpopcntdqintrin.h>
130 #endif
131
132 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
133 (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
134 #include <avx512vpopcntdqvlintrin.h>
135 #endif
136
137 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
138 defined(__AVX512VNNI__)
139 #include <avx512vnniintrin.h>
140 #endif
141
142 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
143 (defined(__AVX512VL__) && defined(__AVX512VNNI__))
144 #include <avx512vlvnniintrin.h>
145 #endif
146
147 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
148 defined(__AVXVNNI__)
149 #include <avxvnniintrin.h>
150 #endif
151
152 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
153 defined(__AVX512DQ__)
154 #include <avx512dqintrin.h>
155 #endif
156
157 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
158 (defined(__AVX512VL__) && defined(__AVX512BITALG__))
159 #include <avx512vlbitalgintrin.h>
160 #endif
161
162 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
163 (defined(__AVX512VL__) && defined(__AVX512BW__))
164 #include <avx512vlbwintrin.h>
165 #endif
166
167 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
168 (defined(__AVX512VL__) && defined(__AVX512CD__))
169 #include <avx512vlcdintrin.h>
170 #endif
171
172 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
173 (defined(__AVX512VL__) && defined(__AVX512DQ__))
174 #include <avx512vldqintrin.h>
175 #endif
176
177 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
178 defined(__AVX512ER__)
179 #include <avx512erintrin.h>
180 #endif
181
182 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
183 defined(__AVX512IFMA__)
184 #include <avx512ifmaintrin.h>
185 #endif
186
187 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
188 (defined(__AVX512IFMA__) && defined(__AVX512VL__))
189 #include <avx512ifmavlintrin.h>
190 #endif
191
192 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
193 defined(__AVXIFMA__)
194 #include <avxifmaintrin.h>
195 #endif
196
197 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
198 defined(__AVX512VBMI__)
199 #include <avx512vbmiintrin.h>
200 #endif
201
202 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
203 (defined(__AVX512VBMI__) && defined(__AVX512VL__))
204 #include <avx512vbmivlintrin.h>
205 #endif
206
207 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
208 defined(__AVX512VBMI2__)
209 #include <avx512vbmi2intrin.h>
210 #endif
211
212 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
213 (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
214 #include <avx512vlvbmi2intrin.h>
215 #endif
216
217 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
218 defined(__AVX512PF__)
219 #include <avx512pfintrin.h>
220 #endif
221
222 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
223 defined(__AVX512FP16__)
224 #include <avx512fp16intrin.h>
225 #endif
226
227 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
228 (defined(__AVX512VL__) && defined(__AVX512FP16__))
229 #include <avx512vlfp16intrin.h>
230 #endif
231
232 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
233 defined(__AVX512BF16__)
234 #include <avx512bf16intrin.h>
235 #endif
236
237 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
238 (defined(__AVX512VL__) && defined(__AVX512BF16__))
239 #include <avx512vlbf16intrin.h>
240 #endif
241
242 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
243 defined(__PKU__)
244 #include <pkuintrin.h>
245 #endif
246
247 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
248 defined(__VPCLMULQDQ__)
249 #include <vpclmulqdqintrin.h>
250 #endif
251
252 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
253 defined(__VAES__)
254 #include <vaesintrin.h>
255 #endif
256
257 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
258 defined(__GFNI__)
259 #include <gfniintrin.h>
260 #endif
261
262 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
263 defined(__AVXVNNIINT8__)
264 #include <avxvnniint8intrin.h>
265 #endif
266
267 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
268 defined(__AVXNECONVERT__)
269 #include <avxneconvertintrin.h>
270 #endif
271
272 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
273 defined(__SHA512__)
274 #include <sha512intrin.h>
275 #endif
276
277 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
278 defined(__SM3__)
279 #include <sm3intrin.h>
280 #endif
281
282 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
283 defined(__SM4__)
284 #include <sm4intrin.h>
285 #endif
286
287 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
288 defined(__AVXVNNIINT16__)
289 #include <avxvnniint16intrin.h>
290 #endif
291
292 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
293 defined(__RDPID__)
294 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
295 ///
296 /// \headerfile <immintrin.h>
297 ///
298 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
299 ///
300 /// \returns The 32-bit contents of the MSR.
301 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
_rdpid_u32(void)302 _rdpid_u32(void) {
303 return __builtin_ia32_rdpid();
304 }
305 #endif // __RDPID__
306
307 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
308 defined(__RDRND__)
309 /// Returns a 16-bit hardware-generated random value.
310 ///
311 /// \headerfile <immintrin.h>
312 ///
313 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
314 ///
315 /// \param __p
316 /// A pointer to a 16-bit memory location to place the random value.
317 /// \returns 1 if the value was successfully generated, 0 otherwise.
318 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand16_step(unsigned short * __p)319 _rdrand16_step(unsigned short *__p)
320 {
321 return (int)__builtin_ia32_rdrand16_step(__p);
322 }
323
324 /// Returns a 32-bit hardware-generated random value.
325 ///
326 /// \headerfile <immintrin.h>
327 ///
328 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
329 ///
330 /// \param __p
331 /// A pointer to a 32-bit memory location to place the random value.
332 /// \returns 1 if the value was successfully generated, 0 otherwise.
333 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand32_step(unsigned int * __p)334 _rdrand32_step(unsigned int *__p)
335 {
336 return (int)__builtin_ia32_rdrand32_step(__p);
337 }
338
339 /// Returns a 64-bit hardware-generated random value.
340 ///
341 /// \headerfile <immintrin.h>
342 ///
343 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
344 ///
345 /// \param __p
346 /// A pointer to a 64-bit memory location to place the random value.
347 /// \returns 1 if the value was successfully generated, 0 otherwise.
348 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
_rdrand64_step(unsigned long long * __p)349 _rdrand64_step(unsigned long long *__p)
350 {
351 #ifdef __x86_64__
352 return (int)__builtin_ia32_rdrand64_step(__p);
353 #else
354 // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
355 // rdrand instructions.
356 unsigned int __lo, __hi;
357 unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
358 unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
359 if (__res_lo && __res_hi) {
360 *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
361 return 1;
362 } else {
363 *__p = 0;
364 return 0;
365 }
366 #endif
367 }
368 #endif /* __RDRND__ */
369
370 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
371 defined(__FSGSBASE__)
372 #ifdef __x86_64__
373 /// Reads the FS base register.
374 ///
375 /// \headerfile <immintrin.h>
376 ///
377 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
378 ///
379 /// \returns The lower 32 bits of the FS base register.
380 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u32(void)381 _readfsbase_u32(void)
382 {
383 return __builtin_ia32_rdfsbase32();
384 }
385
386 /// Reads the FS base register.
387 ///
388 /// \headerfile <immintrin.h>
389 ///
390 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
391 ///
392 /// \returns The contents of the FS base register.
393 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readfsbase_u64(void)394 _readfsbase_u64(void)
395 {
396 return __builtin_ia32_rdfsbase64();
397 }
398
399 /// Reads the GS base register.
400 ///
401 /// \headerfile <immintrin.h>
402 ///
403 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
404 ///
405 /// \returns The lower 32 bits of the GS base register.
406 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u32(void)407 _readgsbase_u32(void)
408 {
409 return __builtin_ia32_rdgsbase32();
410 }
411
412 /// Reads the GS base register.
413 ///
414 /// \headerfile <immintrin.h>
415 ///
416 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
417 ///
418 /// \returns The contents of the GS base register.
419 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_readgsbase_u64(void)420 _readgsbase_u64(void)
421 {
422 return __builtin_ia32_rdgsbase64();
423 }
424
425 /// Modifies the FS base register.
426 ///
427 /// \headerfile <immintrin.h>
428 ///
429 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
430 ///
431 /// \param __V
432 /// Value to use for the lower 32 bits of the FS base register.
433 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u32(unsigned int __V)434 _writefsbase_u32(unsigned int __V)
435 {
436 __builtin_ia32_wrfsbase32(__V);
437 }
438
439 /// Modifies the FS base register.
440 ///
441 /// \headerfile <immintrin.h>
442 ///
443 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
444 ///
445 /// \param __V
446 /// Value to use for the FS base register.
447 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writefsbase_u64(unsigned long long __V)448 _writefsbase_u64(unsigned long long __V)
449 {
450 __builtin_ia32_wrfsbase64(__V);
451 }
452
453 /// Modifies the GS base register.
454 ///
455 /// \headerfile <immintrin.h>
456 ///
457 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
458 ///
459 /// \param __V
460 /// Value to use for the lower 32 bits of the GS base register.
461 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u32(unsigned int __V)462 _writegsbase_u32(unsigned int __V)
463 {
464 __builtin_ia32_wrgsbase32(__V);
465 }
466
467 /// Modifies the GS base register.
468 ///
469 /// \headerfile <immintrin.h>
470 ///
471 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
472 ///
473 /// \param __V
474 /// Value to use for GS base register.
475 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
_writegsbase_u64(unsigned long long __V)476 _writegsbase_u64(unsigned long long __V)
477 {
478 __builtin_ia32_wrgsbase64(__V);
479 }
480
481 #endif
482 #endif /* __FSGSBASE__ */
483
484 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
485 defined(__MOVBE__)
486
487 /* The structs used below are to force the load/store to be unaligned. This
488 * is accomplished with the __packed__ attribute. The __may_alias__ prevents
489 * tbaa metadata from being generated based on the struct and the type of the
490 * field inside of it.
491 */
492
493 /// Load a 16-bit value from memory and swap its bytes.
494 ///
495 /// \headerfile <x86intrin.h>
496 ///
497 /// This intrinsic corresponds to the MOVBE instruction.
498 ///
499 /// \param __P
500 /// A pointer to the 16-bit value to load.
501 /// \returns The byte-swapped value.
502 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i16(void const * __P)503 _loadbe_i16(void const * __P) {
504 struct __loadu_i16 {
505 unsigned short __v;
506 } __attribute__((__packed__, __may_alias__));
507 return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
508 }
509
510 /// Swap the bytes of a 16-bit value and store it to memory.
511 ///
512 /// \headerfile <x86intrin.h>
513 ///
514 /// This intrinsic corresponds to the MOVBE instruction.
515 ///
516 /// \param __P
517 /// A pointer to the memory for storing the swapped value.
518 /// \param __D
519 /// The 16-bit value to be byte-swapped.
520 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i16(void * __P,short __D)521 _storebe_i16(void * __P, short __D) {
522 struct __storeu_i16 {
523 unsigned short __v;
524 } __attribute__((__packed__, __may_alias__));
525 ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
526 }
527
528 /// Load a 32-bit value from memory and swap its bytes.
529 ///
530 /// \headerfile <x86intrin.h>
531 ///
532 /// This intrinsic corresponds to the MOVBE instruction.
533 ///
534 /// \param __P
535 /// A pointer to the 32-bit value to load.
536 /// \returns The byte-swapped value.
537 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i32(void const * __P)538 _loadbe_i32(void const * __P) {
539 struct __loadu_i32 {
540 unsigned int __v;
541 } __attribute__((__packed__, __may_alias__));
542 return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
543 }
544
545 /// Swap the bytes of a 32-bit value and store it to memory.
546 ///
547 /// \headerfile <x86intrin.h>
548 ///
549 /// This intrinsic corresponds to the MOVBE instruction.
550 ///
551 /// \param __P
552 /// A pointer to the memory for storing the swapped value.
553 /// \param __D
554 /// The 32-bit value to be byte-swapped.
555 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i32(void * __P,int __D)556 _storebe_i32(void * __P, int __D) {
557 struct __storeu_i32 {
558 unsigned int __v;
559 } __attribute__((__packed__, __may_alias__));
560 ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
561 }
562
563 #ifdef __x86_64__
564 /// Load a 64-bit value from memory and swap its bytes.
565 ///
566 /// \headerfile <x86intrin.h>
567 ///
568 /// This intrinsic corresponds to the MOVBE instruction.
569 ///
570 /// \param __P
571 /// A pointer to the 64-bit value to load.
572 /// \returns The byte-swapped value.
573 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_loadbe_i64(void const * __P)574 _loadbe_i64(void const * __P) {
575 struct __loadu_i64 {
576 unsigned long long __v;
577 } __attribute__((__packed__, __may_alias__));
578 return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
579 }
580
581 /// Swap the bytes of a 64-bit value and store it to memory.
582 ///
583 /// \headerfile <x86intrin.h>
584 ///
585 /// This intrinsic corresponds to the MOVBE instruction.
586 ///
587 /// \param __P
588 /// A pointer to the memory for storing the swapped value.
589 /// \param __D
590 /// The 64-bit value to be byte-swapped.
591 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
_storebe_i64(void * __P,long long __D)592 _storebe_i64(void * __P, long long __D) {
593 struct __storeu_i64 {
594 unsigned long long __v;
595 } __attribute__((__packed__, __may_alias__));
596 ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
597 }
598 #endif
599 #endif /* __MOVBE */
600
601 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
602 defined(__RTM__)
603 #include <rtmintrin.h>
604 #include <xtestintrin.h>
605 #endif
606
607 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
608 defined(__SHA__)
609 #include <shaintrin.h>
610 #endif
611
612 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
613 defined(__FXSR__)
614 #include <fxsrintrin.h>
615 #endif
616
617 /* No feature check desired due to internal MSC_VER checks */
618 #include <xsaveintrin.h>
619
620 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
621 defined(__XSAVEOPT__)
622 #include <xsaveoptintrin.h>
623 #endif
624
625 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
626 defined(__XSAVEC__)
627 #include <xsavecintrin.h>
628 #endif
629
630 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
631 defined(__XSAVES__)
632 #include <xsavesintrin.h>
633 #endif
634
635 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
636 defined(__SHSTK__)
637 #include <cetintrin.h>
638 #endif
639
640 /* Intrinsics inside adcintrin.h are available at all times. */
641 #include <adcintrin.h>
642
643 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
644 defined(__ADX__)
645 #include <adxintrin.h>
646 #endif
647
648 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
649 defined(__RDSEED__)
650 #include <rdseedintrin.h>
651 #endif
652
653 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
654 defined(__WBNOINVD__)
655 #include <wbnoinvdintrin.h>
656 #endif
657
658 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
659 defined(__CLDEMOTE__)
660 #include <cldemoteintrin.h>
661 #endif
662
663 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
664 defined(__WAITPKG__)
665 #include <waitpkgintrin.h>
666 #endif
667
668 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
669 defined(__MOVDIRI__) || defined(__MOVDIR64B__)
670 #include <movdirintrin.h>
671 #endif
672
673 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
674 defined(__PCONFIG__)
675 #include <pconfigintrin.h>
676 #endif
677
678 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
679 defined(__SGX__)
680 #include <sgxintrin.h>
681 #endif
682
683 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
684 defined(__PTWRITE__)
685 #include <ptwriteintrin.h>
686 #endif
687
688 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
689 defined(__INVPCID__)
690 #include <invpcidintrin.h>
691 #endif
692 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
693 defined(__AMX_FP16__)
694 #include <amxfp16intrin.h>
695 #endif
696
697 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
698 defined(__KL__) || defined(__WIDEKL__)
699 #include <keylockerintrin.h>
700 #endif
701
702 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
703 defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
704 #include <amxintrin.h>
705 #endif
706
707 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
708 defined(__AMX_COMPLEX__)
709 #include <amxcomplexintrin.h>
710 #endif
711
712 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
713 defined(__AVX512VP2INTERSECT__)
714 #include <avx512vp2intersectintrin.h>
715 #endif
716
717 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
718 (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
719 #include <avx512vlvp2intersectintrin.h>
720 #endif
721
722 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
723 defined(__ENQCMD__)
724 #include <enqcmdintrin.h>
725 #endif
726
727 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
728 defined(__SERIALIZE__)
729 #include <serializeintrin.h>
730 #endif
731
732 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
733 defined(__TSXLDTRK__)
734 #include <tsxldtrkintrin.h>
735 #endif
736
737 #if defined(_MSC_VER) && __has_extension(gnu_asm)
738 /* Define the default attributes for these intrinsics */
739 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
740 #ifdef __cplusplus
741 extern "C" {
742 #endif
743 /*----------------------------------------------------------------------------*\
744 |* Interlocked Exchange HLE
745 \*----------------------------------------------------------------------------*/
746 #if defined(__i386__) || defined(__x86_64__)
747 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLEAcquire(long volatile * _Target,long _Value)748 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
749 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
750 : "+r" (_Value), "+m" (*_Target) :: "memory");
751 return _Value;
752 }
753 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedExchange_HLERelease(long volatile * _Target,long _Value)754 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
755 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
756 : "+r" (_Value), "+m" (*_Target) :: "memory");
757 return _Value;
758 }
759 #endif
760 #if defined(__x86_64__)
761 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLEAcquire(__int64 volatile * _Target,__int64 _Value)762 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
763 __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
764 : "+r" (_Value), "+m" (*_Target) :: "memory");
765 return _Value;
766 }
767 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedExchange64_HLERelease(__int64 volatile * _Target,__int64 _Value)768 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
769 __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
770 : "+r" (_Value), "+m" (*_Target) :: "memory");
771 return _Value;
772 }
773 #endif
774 /*----------------------------------------------------------------------------*\
775 |* Interlocked Compare Exchange HLE
776 \*----------------------------------------------------------------------------*/
777 #if defined(__i386__) || defined(__x86_64__)
778 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLEAcquire(long volatile * _Destination,long _Exchange,long _Comparand)779 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
780 long _Exchange, long _Comparand) {
781 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
782 : "+a" (_Comparand), "+m" (*_Destination)
783 : "r" (_Exchange) : "memory");
784 return _Comparand;
785 }
786 static __inline__ long __DEFAULT_FN_ATTRS
_InterlockedCompareExchange_HLERelease(long volatile * _Destination,long _Exchange,long _Comparand)787 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
788 long _Exchange, long _Comparand) {
789 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
790 : "+a" (_Comparand), "+m" (*_Destination)
791 : "r" (_Exchange) : "memory");
792 return _Comparand;
793 }
794 #endif
795 #if defined(__x86_64__)
796 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLEAcquire(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)797 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
798 __int64 _Exchange, __int64 _Comparand) {
799 __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
800 : "+a" (_Comparand), "+m" (*_Destination)
801 : "r" (_Exchange) : "memory");
802 return _Comparand;
803 }
804 static __inline__ __int64 __DEFAULT_FN_ATTRS
_InterlockedCompareExchange64_HLERelease(__int64 volatile * _Destination,__int64 _Exchange,__int64 _Comparand)805 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
806 __int64 _Exchange, __int64 _Comparand) {
807 __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
808 : "+a" (_Comparand), "+m" (*_Destination)
809 : "r" (_Exchange) : "memory");
810 return _Comparand;
811 }
812 #endif
813 #ifdef __cplusplus
814 }
815 #endif
816
817 #undef __DEFAULT_FN_ATTRS
818
819 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
820
821 #endif /* __IMMINTRIN_H */
822