1 /*
2 * PROJECT: ReactOS SDK
3 * LICENSE: MIT (https://spdx.org/licenses/MIT)
4 * PURPOSE: Intrinsics for the SSE2 instruction set
5 * COPYRIGHT: Copyright 2024 Timo Kreuzer (timo.kreuzer@reactos.org)
6 */
7
8 #pragma once
9
10 #define _INCLUDED_IMM
11
12 //#include <wmmintrin.h>
13 #include <emmintrin.h>
14
15 #if defined(_MSC_VER) && !defined(__clang__)
16
17 typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(32) __m256i
18 {
19 __int8 m256i_i8[32];
20 __int16 m256i_i16[16];
21 __int32 m256i_i32[8];
22 __int64 m256i_i64[4];
23 unsigned __int8 m256i_u8[32];
24 unsigned __int16 m256i_u16[16];
25 unsigned __int32 m256i_u32[8];
26 unsigned __int64 m256i_u64[4];
27 } __m256i;
28
29 #else /* _MSC_VER */
30
31 typedef char __v32qi __attribute__ ((__vector_size__ (32)));
32 typedef short __v16hi __attribute__ ((__vector_size__ (32)));
33 typedef long long __v4di __attribute__ ((__vector_size__ (32)));
34
35 typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__));
36
37 #endif /* _MSC_VER */
38
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42
43 extern __m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i);
44 extern __m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i);
45 extern int __cdecl _mm256_movemask_epi8(__m256i);
46 extern __m256i __cdecl _mm256_setzero_si256(void);
47 extern void __cdecl _mm256_zeroupper(void);
48
49 extern int __cdecl _rdrand16_step(unsigned short *random_val);
50 extern int __cdecl _rdrand32_step(unsigned int *random_val);
51 #if defined(_M_X64)
52 extern int __cdecl _rdrand64_step(unsigned __int64 *random_val);
53 #endif
54
55 extern int __cdecl _rdseed16_step(unsigned short *random_val);
56 extern int __cdecl _rdseed32_step(unsigned int *random_val);
57 #if defined(_M_X64)
58 extern int __cdecl _rdseed64_step(unsigned __int64 *random_val);
59 #endif
60
61
62 #if defined(_MSC_VER) && !defined(__clang__)
63
64 #pragma intrinsic(_mm256_cmpeq_epi8)
65 #pragma intrinsic(_mm256_cmpeq_epi16)
66 #pragma intrinsic(_mm256_movemask_epi8)
67 #pragma intrinsic(_mm256_setzero_si256)
68 #pragma intrinsic(_mm256_zeroupper)
69
70 #pragma intrinsic(_rdrand16_step)
71 #pragma intrinsic(_rdrand32_step)
72 #if defined(_M_X64)
73 #pragma intrinsic(_rdrand64_step)
74 #endif
75 #pragma intrinsic(_rdseed16_step)
76 #pragma intrinsic(_rdseed32_step)
77 #if defined(_M_X64)
78 #pragma intrinsic(_rdseed64_step)
79 #endif
80
81 #else /* _MSC_VER */
82
83 #ifdef __clang__
84 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
85 #define __ATTRIBUTE_AVX__ __attribute__((__target__("avx"),__min_vector_width__(256)))
86 #define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2"),__min_vector_width__(256)))
87 #else
88 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
89 #define __ATTRIBUTE_AVX__ __attribute__((__target__("avx")))
90 #define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2")))
91 #endif
92 #define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
93 #define __INTRIN_INLINE_AVX __INTRIN_INLINE __ATTRIBUTE_AVX__
94 #define __INTRIN_INLINE_AVX2 __INTRIN_INLINE __ATTRIBUTE_AVX2__
95
_mm256_cmpeq_epi8(__m256i __A,__m256i __B)96 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi8(__m256i __A, __m256i __B)
97 {
98 return (__m256i)((__v32qi)__A == (__v32qi)__B);
99 }
100
_mm256_cmpeq_epi16(__m256i __A,__m256i __B)101 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi16(__m256i __A, __m256i __B)
102 {
103 return (__m256i)((__v16hi)__A == (__v16hi)__B);
104 }
105
_mm256_movemask_epi8(__m256i __A)106 __INTRIN_INLINE_AVX2 int __cdecl _mm256_movemask_epi8(__m256i __A)
107 {
108 return __builtin_ia32_pmovmskb256((__v32qi)__A);
109 }
110
_mm256_setzero_si256(void)111 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_setzero_si256(void)
112 {
113 return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 };
114 }
115
_mm256_zeroupper(void)116 __INTRIN_INLINE void __cdecl _mm256_zeroupper(void)
117 {
118 __asm__ __volatile__("vzeroupper");
119 }
120
_rdrand16_step(unsigned short * random_val)121 __INTRIN_INLINE int _rdrand16_step(unsigned short* random_val)
122 {
123 unsigned char ok;
124 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
125 return (int)ok;
126 }
127
_rdrand32_step(unsigned int * random_val)128 __INTRIN_INLINE int _rdrand32_step(unsigned int* random_val)
129 {
130 unsigned char ok;
131 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
132 return (int)ok;
133 }
134
135 #if defined(__x86_64__)
_rdrand64_step(unsigned __int64 * random_val)136 __INTRIN_INLINE int _rdrand64_step(unsigned __int64* random_val)
137 {
138 unsigned char ok;
139 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok));
140 return (int)ok;
141 }
142 #endif // __x86_64__
143
_rdseed16_step(unsigned short * random_val)144 __INTRIN_INLINE int _rdseed16_step(unsigned short* random_val)
145 {
146 unsigned char ok;
147 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
148 return (int)ok;
149 }
150
_rdseed32_step(unsigned int * random_val)151 __INTRIN_INLINE int _rdseed32_step(unsigned int* random_val)
152 {
153 unsigned char ok;
154 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
155 return (int)ok;
156 }
157
158 #if defined(__x86_64__)
_rdseed64_step(unsigned __int64 * random_val)159 __INTRIN_INLINE int _rdseed64_step(unsigned __int64* random_val)
160 {
161 unsigned char ok;
162 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok));
163 return (int)ok;
164 }
165 #endif // __x86_64__
166
167 #endif /* _MSC_VER */
168
169 #ifdef __cplusplus
170 } // extern "C"
171 #endif
172