1 /* 2 * PROJECT: ReactOS SDK 3 * LICENSE: MIT (https://spdx.org/licenses/MIT) 4 * PURPOSE: Intrinsics for the SSE2 instruction set 5 * COPYRIGHT: Copyright 2024 Timo Kreuzer (timo.kreuzer@reactos.org) 6 */ 7 8 #pragma once 9 10 #define _INCLUDED_IMM 11 12 //#include <wmmintrin.h> 13 #include <emmintrin.h> 14 15 #if defined(_MSC_VER) && !defined(__clang__) 16 17 typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(32) __m256i 18 { 19 __int8 m256i_i8[32]; 20 __int16 m256i_i16[16]; 21 __int32 m256i_i32[8]; 22 __int64 m256i_i64[4]; 23 unsigned __int8 m256i_u8[32]; 24 unsigned __int16 m256i_u16[16]; 25 unsigned __int32 m256i_u32[8]; 26 unsigned __int64 m256i_u64[4]; 27 } __m256i; 28 29 #else /* _MSC_VER */ 30 31 typedef char __v32qi __attribute__ ((__vector_size__ (32))); 32 typedef short __v16hi __attribute__ ((__vector_size__ (32))); 33 typedef long long __v4di __attribute__ ((__vector_size__ (32))); 34 35 typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); 36 37 #endif /* _MSC_VER */ 38 39 #ifdef __cplusplus 40 extern "C" { 41 #endif 42 43 extern __m256i __cdecl _mm256_cmpeq_epi8(__m256i, __m256i); 44 extern __m256i __cdecl _mm256_cmpeq_epi16(__m256i, __m256i); 45 extern int __cdecl _mm256_movemask_epi8(__m256i); 46 extern __m256i __cdecl _mm256_setzero_si256(void); 47 extern void __cdecl _mm256_zeroupper(void); 48 49 extern int __cdecl _rdrand16_step(unsigned short *random_val); 50 extern int __cdecl _rdrand32_step(unsigned int *random_val); 51 #if defined(_M_X64) 52 extern int __cdecl _rdrand64_step(unsigned __int64 *random_val); 53 #endif 54 55 extern int __cdecl _rdseed16_step(unsigned short *random_val); 56 extern int __cdecl _rdseed32_step(unsigned int *random_val); 57 #if defined(_M_X64) 58 extern int __cdecl _rdseed64_step(unsigned __int64 *random_val); 59 #endif 60 61 62 #if defined(_MSC_VER) && !defined(__clang__) 63 64 #pragma intrinsic(_mm256_cmpeq_epi8) 65 #pragma intrinsic(_mm256_cmpeq_epi16) 66 #pragma intrinsic(_mm256_movemask_epi8) 67 #pragma intrinsic(_mm256_setzero_si256) 68 #pragma intrinsic(_mm256_zeroupper) 69 70 #pragma intrinsic(_rdrand16_step) 71 #pragma intrinsic(_rdrand32_step) 72 #if defined(_M_X64) 73 #pragma intrinsic(_rdrand64_step) 74 #endif 75 #pragma intrinsic(_rdseed16_step) 76 #pragma intrinsic(_rdseed32_step) 77 #if defined(_M_X64) 78 #pragma intrinsic(_rdseed64_step) 79 #endif 80 81 #else /* _MSC_VER */ 82 83 #ifdef __clang__ 84 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128))) 85 #define __ATTRIBUTE_AVX__ __attribute__((__target__("avx"),__min_vector_width__(256))) 86 #define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2"),__min_vector_width__(256))) 87 #else 88 #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"))) 89 #define __ATTRIBUTE_AVX__ __attribute__((__target__("avx"))) 90 #define __ATTRIBUTE_AVX2__ __attribute__((__target__("avx2"))) 91 #endif 92 #define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__ 93 #define __INTRIN_INLINE_AVX __INTRIN_INLINE __ATTRIBUTE_AVX__ 94 #define __INTRIN_INLINE_AVX2 __INTRIN_INLINE __ATTRIBUTE_AVX2__ 95 96 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi8(__m256i __A, __m256i __B) 97 { 98 return (__m256i)((__v32qi)__A == (__v32qi)__B); 99 } 100 101 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_cmpeq_epi16(__m256i __A, __m256i __B) 102 { 103 return (__m256i)((__v16hi)__A == (__v16hi)__B); 104 } 105 106 __INTRIN_INLINE_AVX2 int __cdecl _mm256_movemask_epi8(__m256i __A) 107 { 108 return __builtin_ia32_pmovmskb256((__v32qi)__A); 109 } 110 111 __INTRIN_INLINE_AVX __m256i __cdecl _mm256_setzero_si256(void) 112 { 113 return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; 114 } 115 116 __INTRIN_INLINE void __cdecl _mm256_zeroupper(void) 117 { 118 __asm__ __volatile__("vzeroupper"); 119 } 120 121 __INTRIN_INLINE int _rdrand16_step(unsigned short* random_val) 122 { 123 unsigned char ok; 124 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 125 return (int)ok; 126 } 127 128 __INTRIN_INLINE int _rdrand32_step(unsigned int* random_val) 129 { 130 unsigned char ok; 131 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 132 return (int)ok; 133 } 134 135 #if defined(__x86_64__) 136 __INTRIN_INLINE int _rdrand64_step(unsigned __int64* random_val) 137 { 138 unsigned char ok; 139 __asm__ __volatile__("rdrand %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 140 return (int)ok; 141 } 142 #endif // __x86_64__ 143 144 __INTRIN_INLINE int _rdseed16_step(unsigned short* random_val) 145 { 146 unsigned char ok; 147 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 148 return (int)ok; 149 } 150 151 __INTRIN_INLINE int _rdseed32_step(unsigned int* random_val) 152 { 153 unsigned char ok; 154 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 155 return (int)ok; 156 } 157 158 #if defined(__x86_64__) 159 __INTRIN_INLINE int _rdseed64_step(unsigned __int64* random_val) 160 { 161 unsigned char ok; 162 __asm__ __volatile__("rdseed %0; setc %1" : "=r"(*random_val), "=qm"(ok)); 163 return (int)ok; 164 } 165 #endif // __x86_64__ 166 167 #endif /* _MSC_VER */ 168 169 #ifdef __cplusplus 170 } // extern "C" 171 #endif 172