1 /* SPDX-License-Identifier: MIT
2 *
3 * Permission is hereby granted, free of charge, to any person
4 * obtaining a copy of this software and associated documentation
5 * files (the "Software"), to deal in the Software without
6 * restriction, including without limitation the rights to use, copy,
7 * modify, merge, publish, distribute, sublicense, and/or sell copies
8 * of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Copyright:
24 * 2020 Evan Nemerson <evan@nemerson.com>
25 * 2020 Ashleigh Newman-Jones <ashnewman-jones@hotmail.co.uk>
26 */
27
28 #if !defined(SIMDE_X86_AVX512_2INTERSECT_H)
29 #define SIMDE_X86_AVX512_2INTERSECT_H
30
31 #include "types.h"
32
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36
37 SIMDE_FUNCTION_ATTRIBUTES
38 void
simde_mm_2intersect_epi32(simde__m128i a,simde__m128i b,simde__mmask8 * k1,simde__mmask8 * k2)39 simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) {
40 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
41 _mm_2intersect_epi32(a, b, k1, k2);
42 #else
43 simde__m128i_private
44 a_ = simde__m128i_to_private(a),
45 b_ = simde__m128i_to_private(b);
46 simde__mmask8
47 k1_ = 0,
48 k2_ = 0;
49
50 for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
51 #if defined(SIMDE_ENABLE_OPENMP)
52 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
53 #else
54 SIMDE_VECTORIZE
55 #endif
56 for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
57 const int32_t m = a_.i32[i] == b_.i32[j];
58 k1_ |= m << i;
59 k2_ |= m << j;
60 }
61 }
62
63 *k1 = k1_;
64 *k2 = k2_;
65 #endif
66 }
67 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
68 #undef __mm_2intersect_epi32
69 #define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2)
70 #endif
71
72 SIMDE_FUNCTION_ATTRIBUTES
73 void
simde_mm_2intersect_epi64(simde__m128i a,simde__m128i b,simde__mmask8 * k1,simde__mmask8 * k2)74 simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) {
75 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
76 _mm_2intersect_epi64(a, b, k1, k2);
77 #else
78 simde__m128i_private
79 a_ = simde__m128i_to_private(a),
80 b_ = simde__m128i_to_private(b);
81 simde__mmask8
82 k1_ = 0,
83 k2_ = 0;
84
85 for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
86 #if defined(SIMDE_ENABLE_OPENMP)
87 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
88 #else
89 SIMDE_VECTORIZE
90 #endif
91 for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
92 const int32_t m = a_.i64[i] == b_.i64[j];
93 k1_ |= m << i;
94 k2_ |= m << j;
95 }
96 }
97
98 *k1 = k1_;
99 *k2 = k2_;
100 #endif
101 }
102 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
103 #undef __mm_2intersect_epi64
104 #define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2)
105 #endif
106
107 SIMDE_FUNCTION_ATTRIBUTES
108 void
simde_mm256_2intersect_epi32(simde__m256i a,simde__m256i b,simde__mmask8 * k1,simde__mmask8 * k2)109 simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) {
110 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
111 _mm256_2intersect_epi32(a, b, k1, k2);
112 #else
113 simde__m256i_private
114 a_ = simde__m256i_to_private(a),
115 b_ = simde__m256i_to_private(b);
116 simde__mmask8
117 k1_ = 0,
118 k2_ = 0;
119
120 for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
121 #if defined(SIMDE_ENABLE_OPENMP)
122 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
123 #else
124 SIMDE_VECTORIZE
125 #endif
126 for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
127 const int32_t m = a_.i32[i] == b_.i32[j];
128 k1_ |= m << i;
129 k2_ |= m << j;
130 }
131 }
132
133 *k1 = k1_;
134 *k2 = k2_;
135 #endif
136 }
137 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
138 #undef _mm256_2intersect_epi32
139 #define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2)
140 #endif
141
142 SIMDE_FUNCTION_ATTRIBUTES
143 void
simde_mm256_2intersect_epi64(simde__m256i a,simde__m256i b,simde__mmask8 * k1,simde__mmask8 * k2)144 simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) {
145 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
146 _mm256_2intersect_epi64(a, b, k1, k2);
147 #else
148 simde__m256i_private
149 a_ = simde__m256i_to_private(a),
150 b_ = simde__m256i_to_private(b);
151 simde__mmask8
152 k1_ = 0,
153 k2_ = 0;
154
155 for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
156 #if defined(SIMDE_ENABLE_OPENMP)
157 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
158 #else
159 SIMDE_VECTORIZE
160 #endif
161 for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
162 const int32_t m = a_.i64[i] == b_.i64[j];
163 k1_ |= m << i;
164 k2_ |= m << j;
165 }
166 }
167
168 *k1 = k1_;
169 *k2 = k2_;
170 #endif
171 }
172 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
173 #undef _mm256_2intersect_epi64
174 #define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2)
175 #endif
176
177 SIMDE_FUNCTION_ATTRIBUTES
178 void
simde_mm512_2intersect_epi32(simde__m512i a,simde__m512i b,simde__mmask16 * k1,simde__mmask16 * k2)179 simde_mm512_2intersect_epi32(simde__m512i a, simde__m512i b, simde__mmask16 *k1, simde__mmask16 *k2) {
180 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)
181 _mm512_2intersect_epi32(a, b, k1, k2);
182 #else
183 simde__m512i_private
184 a_ = simde__m512i_to_private(a),
185 b_ = simde__m512i_to_private(b);
186 simde__mmask16
187 k1_ = 0,
188 k2_ = 0;
189
190 for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
191 #if defined(SIMDE_ENABLE_OPENMP)
192 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
193 #else
194 SIMDE_VECTORIZE
195 #endif
196 for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
197 const int32_t m = a_.i32[i] == b_.i32[j];
198 k1_ |= m << i;
199 k2_ |= m << j;
200 }
201 }
202
203 *k1 = k1_;
204 *k2 = k2_;
205 #endif
206 }
207 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES)
208 #undef _mm512_2intersect_epi32
209 #define _mm512_2intersect_epi32(a, b, k1, k2) simde_mm512_2intersect_epi32(a, b, k1, k2)
210 #endif
211
212 SIMDE_FUNCTION_ATTRIBUTES
213 void
simde_mm512_2intersect_epi64(simde__m512i a,simde__m512i b,simde__mmask8 * k1,simde__mmask8 * k2)214 simde_mm512_2intersect_epi64(simde__m512i a, simde__m512i b, simde__mmask8 *k1, simde__mmask8 *k2) {
215 #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)
216 _mm512_2intersect_epi64(a, b, k1, k2);
217 #else
218 simde__m512i_private
219 a_ = simde__m512i_to_private(a),
220 b_ = simde__m512i_to_private(b);
221 simde__mmask8
222 k1_ = 0,
223 k2_ = 0;
224
225 for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
226 #if defined(SIMDE_ENABLE_OPENMP)
227 #pragma omp simd reduction(|:k1_) reduction(|:k2_)
228 #else
229 SIMDE_VECTORIZE
230 #endif
231 for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
232 const int32_t m = a_.i64[i] == b_.i64[j];
233 k1_ |= m << i;
234 k2_ |= m << j;
235 }
236 }
237
238 *k1 = k1_;
239 *k2 = k2_;
240 #endif
241 }
242 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES)
243 #undef _mm512_2intersect_epi64
244 #define _mm512_2intersect_epi64(a, b, k1, k2) simde_mm512_2intersect_epi64(a, b, k1, k2)
245 #endif
246
247 SIMDE_END_DECLS_
248 HEDLEY_DIAGNOSTIC_POP
249
250 #endif /* !defined(SIMDE_X86_AVX512_2INTERSECT_H) */
251