1 // 2 // corecrt_internal_simd.h 3 // 4 // Copyright (c) Microsoft Corporation. All rights reserved. 5 // 6 // This internal header defines internal SIMD utilities. This header may only 7 // be included in C++ translation units. 8 // 9 #pragma once 10 11 #include <intrin.h> 12 #include <isa_availability.h> 13 #include <stdint.h> 14 15 #if (defined _M_IX86 || defined _M_X64) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC) 16 #define _CRT_SIMD_SUPPORT_AVAILABLE 17 #endif 18 19 #if defined _CRT_SIMD_SUPPORT_AVAILABLE 20 21 #if defined(__clang__) 22 #define _UCRT_ENABLE_EXTENDED_ISA \ 23 _Pragma("clang attribute push(__attribute__((target(\"sse2,avx,avx2\"))), apply_to=function)") 24 #define _UCRT_RESTORE_DEFAULT_ISA \ 25 _Pragma("clang attribute pop") 26 #elif defined(__GNUC__) 27 #define _UCRT_ENABLE_EXTENDED_ISA \ 28 _Pragma("GCC push_options") \ 29 _Pragma("GCC target(\"avx2\")") 30 #define _UCRT_RESTORE_DEFAULT_ISA \ 31 _Pragma("GCC pop_options") 32 #else 33 #define _UCRT_ENABLE_EXTENDED_ISA 34 #define _UCRT_RESTORE_DEFAULT_ISA 35 #endif 36 37 _UCRT_ENABLE_EXTENDED_ISA 38 39 extern "C" int __isa_available; 40 41 enum class __crt_simd_isa 42 { 43 sse2, 44 avx2 45 }; 46 47 template <__crt_simd_isa Isa> 48 struct __crt_simd_cleanup_guard; 49 50 template <__crt_simd_isa Isa> 51 struct __crt_simd_pack_traits; 52 53 template <__crt_simd_isa Isa, typename Element> 54 struct __crt_simd_traits; 55 56 57 58 template <__crt_simd_isa Isa, typename Element> 59 struct __crt_simd_element_traits 60 : __crt_simd_pack_traits<Isa> 61 { 62 using element_type = Element; 63 using __crt_simd_pack_traits<Isa>::pack_size; 64 65 enum : size_t 66 { 67 element_size = sizeof(element_type), 68 elements_per_pack = pack_size / element_size 69 }; 70 }; 71 72 73 74 template <> 75 struct __crt_simd_cleanup_guard<__crt_simd_isa::sse2> 76 { 77 // No cleanup required for SSE2 usage, however we still need to define 78 // the no-op destructor in order to avoid unreferened local variable 79 // warnings when this cleanup guard is used. 80 ~__crt_simd_cleanup_guard() throw() 81 { 82 } 83 }; 84 85 template <> 86 struct __crt_simd_pack_traits<__crt_simd_isa::sse2> 87 { 88 using pack_type = __m128i; 89 90 enum : size_t { pack_size = sizeof(pack_type) }; 91 92 static __forceinline pack_type get_zero_pack() throw() 93 { 94 return _mm_setzero_si128(); 95 } 96 97 static __forceinline int compute_byte_mask(pack_type const x) throw() 98 { 99 return _mm_movemask_epi8(x); 100 } 101 }; 102 103 template <> 104 struct __crt_simd_traits<__crt_simd_isa::sse2, uint8_t> 105 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint8_t> 106 { 107 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw() 108 { 109 return _mm_cmpeq_epi8(x, y); 110 } 111 }; 112 113 template <> 114 struct __crt_simd_traits<__crt_simd_isa::sse2, uint16_t> 115 : __crt_simd_element_traits<__crt_simd_isa::sse2, uint16_t> 116 { 117 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw() 118 { 119 return _mm_cmpeq_epi16(x, y); 120 } 121 }; 122 123 124 125 template <> 126 struct __crt_simd_cleanup_guard<__crt_simd_isa::avx2> 127 { 128 ~__crt_simd_cleanup_guard() 129 { 130 // After executing AVX2 instructions, we must zero the upper halves 131 // of the YMM registers before returning. See the Intel article 132 // "Intel AVX State Transitions: Migrating SSE Code to AVX" for 133 // further details. 134 _mm256_zeroupper(); 135 } 136 }; 137 138 template <> 139 struct __crt_simd_pack_traits<__crt_simd_isa::avx2> 140 { 141 using pack_type = __m256i; 142 143 enum : size_t { pack_size = sizeof(pack_type) }; 144 145 static __forceinline pack_type get_zero_pack() throw() 146 { 147 return _mm256_setzero_si256(); 148 } 149 150 static __forceinline int compute_byte_mask(pack_type const x) throw() 151 { 152 return _mm256_movemask_epi8(x); 153 } 154 }; 155 156 template <> 157 struct __crt_simd_traits<__crt_simd_isa::avx2, uint8_t> 158 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint8_t> 159 { 160 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw() 161 { 162 return _mm256_cmpeq_epi8(x, y); 163 } 164 }; 165 166 template <> 167 struct __crt_simd_traits<__crt_simd_isa::avx2, uint16_t> 168 : __crt_simd_element_traits<__crt_simd_isa::avx2, uint16_t> 169 { 170 static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw() 171 { 172 return _mm256_cmpeq_epi16(x, y); 173 } 174 }; 175 176 _UCRT_RESTORE_DEFAULT_ISA 177 178 #endif // _CRT_SIMD_SUPPORT_AVAILABLE 179