1 //
2 // corecrt_internal_simd.h
3 //
4 //      Copyright (c) Microsoft Corporation.  All rights reserved.
5 //
6 // This internal header defines internal SIMD utilities.  This header may only
7 // be included in C++ translation units.
8 //
9 #pragma once
10 
11 #include <intrin.h>
12 #include <isa_availability.h>
13 #include <stdint.h>
14 
15 #if (defined _M_IX86 || defined _M_X64) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC)
16     #define _CRT_SIMD_SUPPORT_AVAILABLE
17 #endif
18 
19 #if defined _CRT_SIMD_SUPPORT_AVAILABLE
20 
21 #if defined(__clang__)
22 #define _UCRT_ENABLE_EXTENDED_ISA \
23     _Pragma("clang attribute push(__attribute__((target(\"sse2,avx,avx2\"))), apply_to=function)")
24 #define _UCRT_RESTORE_DEFAULT_ISA \
25     _Pragma("clang attribute pop")
26 #elif defined(__GNUC__)
27 #define _UCRT_ENABLE_EXTENDED_ISA \
28     _Pragma("GCC push_options") \
29     _Pragma("GCC target(\"avx2\")")
30 #define _UCRT_RESTORE_DEFAULT_ISA \
31     _Pragma("GCC pop_options")
32 #else
33 #define _UCRT_ENABLE_EXTENDED_ISA
34 #define _UCRT_RESTORE_DEFAULT_ISA
35 #endif
36 
37 _UCRT_ENABLE_EXTENDED_ISA
38 
39     extern "C" int __isa_available;
40 
41     enum class __crt_simd_isa
42     {
43         sse2,
44         avx2
45     };
46 
47     template <__crt_simd_isa Isa>
48     struct __crt_simd_cleanup_guard;
49 
50     template <__crt_simd_isa Isa>
51     struct __crt_simd_pack_traits;
52 
53     template <__crt_simd_isa Isa, typename Element>
54     struct __crt_simd_traits;
55 
56 
57 
58     template <__crt_simd_isa Isa, typename Element>
59     struct __crt_simd_element_traits
60         : __crt_simd_pack_traits<Isa>
61     {
62         using element_type = Element;
63         using __crt_simd_pack_traits<Isa>::pack_size;
64 
65         enum : size_t
66         {
67             element_size      = sizeof(element_type),
68             elements_per_pack = pack_size / element_size
69         };
70     };
71 
72 
73 
74     template <>
75     struct __crt_simd_cleanup_guard<__crt_simd_isa::sse2>
76     {
77         // No cleanup required for SSE2 usage, however we still need to define
78         // the no-op destructor in order to avoid unreferened local variable
79         // warnings when this cleanup guard is used.
80         ~__crt_simd_cleanup_guard() throw()
81         {
82         }
83     };
84 
85     template <>
86     struct __crt_simd_pack_traits<__crt_simd_isa::sse2>
87     {
88         using pack_type = __m128i;
89 
90         enum : size_t { pack_size = sizeof(pack_type) };
91 
92         static __forceinline pack_type get_zero_pack() throw()
93         {
94             return _mm_setzero_si128();
95         }
96 
97         static __forceinline int compute_byte_mask(pack_type const x) throw()
98         {
99             return _mm_movemask_epi8(x);
100         }
101     };
102 
103     template <>
104     struct __crt_simd_traits<__crt_simd_isa::sse2, uint8_t>
105         : __crt_simd_element_traits<__crt_simd_isa::sse2, uint8_t>
106     {
107         static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
108         {
109             return _mm_cmpeq_epi8(x, y);
110         }
111     };
112 
113     template <>
114     struct __crt_simd_traits<__crt_simd_isa::sse2, uint16_t>
115         : __crt_simd_element_traits<__crt_simd_isa::sse2, uint16_t>
116     {
117         static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
118         {
119             return _mm_cmpeq_epi16(x, y);
120         }
121     };
122 
123 
124 
125     template <>
126     struct __crt_simd_cleanup_guard<__crt_simd_isa::avx2>
127     {
128         ~__crt_simd_cleanup_guard()
129         {
130             // After executing AVX2 instructions, we must zero the upper halves
131             // of the YMM registers before returning.  See the Intel article
132             // "Intel AVX State Transitions: Migrating SSE Code to AVX" for
133             // further details.
134             _mm256_zeroupper();
135         }
136     };
137 
138     template <>
139     struct __crt_simd_pack_traits<__crt_simd_isa::avx2>
140     {
141         using pack_type = __m256i;
142 
143         enum : size_t { pack_size = sizeof(pack_type) };
144 
145         static __forceinline pack_type get_zero_pack() throw()
146         {
147             return _mm256_setzero_si256();
148         }
149 
150         static __forceinline int compute_byte_mask(pack_type const x) throw()
151         {
152             return _mm256_movemask_epi8(x);
153         }
154     };
155 
156     template <>
157     struct __crt_simd_traits<__crt_simd_isa::avx2, uint8_t>
158         : __crt_simd_element_traits<__crt_simd_isa::avx2, uint8_t>
159     {
160         static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
161         {
162             return _mm256_cmpeq_epi8(x, y);
163         }
164     };
165 
166     template <>
167     struct __crt_simd_traits<__crt_simd_isa::avx2, uint16_t>
168         : __crt_simd_element_traits<__crt_simd_isa::avx2, uint16_t>
169     {
170         static __forceinline pack_type compare_equals(pack_type const x, pack_type const y) throw()
171         {
172             return _mm256_cmpeq_epi16(x, y);
173         }
174     };
175 
176 _UCRT_RESTORE_DEFAULT_ISA
177 
178 #endif // _CRT_SIMD_SUPPORT_AVAILABLE
179