1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  *   2020      Ashleigh Newman-Jones <ashnewman-jones@hotmail.co.uk>
26  */
27 
28 #if !defined(SIMDE_X86_AVX512_2INTERSECT_H)
29 #define SIMDE_X86_AVX512_2INTERSECT_H
30 
31 #include "types.h"
32 
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36 
37 SIMDE_FUNCTION_ATTRIBUTES
38 void
simde_mm_2intersect_epi32(simde__m128i a,simde__m128i b,simde__mmask8 * k1,simde__mmask8 * k2)39 simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) {
40   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
41     _mm_2intersect_epi32(a, b, k1, k2);
42   #else
43     simde__m128i_private
44       a_ = simde__m128i_to_private(a),
45       b_ = simde__m128i_to_private(b);
46     simde__mmask8
47       k1_ = 0,
48       k2_ = 0;
49 
50     for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
51       #if defined(SIMDE_ENABLE_OPENMP)
52         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
53       #else
54         SIMDE_VECTORIZE
55       #endif
56       for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
57         const int32_t m = a_.i32[i] == b_.i32[j];
58         k1_ |= m << i;
59         k2_ |= m << j;
60       }
61     }
62 
63     *k1 = k1_;
64     *k2 = k2_;
65   #endif
66 }
67 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
68   #undef __mm_2intersect_epi32
69   #define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2)
70 #endif
71 
72 SIMDE_FUNCTION_ATTRIBUTES
73 void
simde_mm_2intersect_epi64(simde__m128i a,simde__m128i b,simde__mmask8 * k1,simde__mmask8 * k2)74 simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) {
75   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
76     _mm_2intersect_epi64(a, b, k1, k2);
77   #else
78     simde__m128i_private
79       a_ = simde__m128i_to_private(a),
80       b_ = simde__m128i_to_private(b);
81     simde__mmask8
82       k1_ = 0,
83       k2_ = 0;
84 
85     for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
86       #if defined(SIMDE_ENABLE_OPENMP)
87         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
88       #else
89         SIMDE_VECTORIZE
90       #endif
91       for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
92         const int32_t m = a_.i64[i] == b_.i64[j];
93         k1_ |= m << i;
94         k2_ |= m << j;
95       }
96     }
97 
98     *k1 = k1_;
99     *k2 = k2_;
100   #endif
101 }
102 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
103   #undef __mm_2intersect_epi64
104   #define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2)
105 #endif
106 
107 SIMDE_FUNCTION_ATTRIBUTES
108 void
simde_mm256_2intersect_epi32(simde__m256i a,simde__m256i b,simde__mmask8 * k1,simde__mmask8 * k2)109 simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) {
110   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
111     _mm256_2intersect_epi32(a, b, k1, k2);
112   #else
113     simde__m256i_private
114       a_ = simde__m256i_to_private(a),
115       b_ = simde__m256i_to_private(b);
116     simde__mmask8
117       k1_ = 0,
118       k2_ = 0;
119 
120     for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
121       #if defined(SIMDE_ENABLE_OPENMP)
122         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
123       #else
124         SIMDE_VECTORIZE
125       #endif
126       for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
127         const int32_t m = a_.i32[i] == b_.i32[j];
128         k1_ |= m << i;
129         k2_ |= m << j;
130       }
131     }
132 
133     *k1 = k1_;
134     *k2 = k2_;
135   #endif
136 }
137 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
138   #undef _mm256_2intersect_epi32
139   #define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2)
140 #endif
141 
142 SIMDE_FUNCTION_ATTRIBUTES
143 void
simde_mm256_2intersect_epi64(simde__m256i a,simde__m256i b,simde__mmask8 * k1,simde__mmask8 * k2)144 simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, simde__mmask8 *k2) {
145   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
146     _mm256_2intersect_epi64(a, b, k1, k2);
147   #else
148     simde__m256i_private
149       a_ = simde__m256i_to_private(a),
150       b_ = simde__m256i_to_private(b);
151     simde__mmask8
152       k1_ = 0,
153       k2_ = 0;
154 
155     for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
156       #if defined(SIMDE_ENABLE_OPENMP)
157         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
158       #else
159         SIMDE_VECTORIZE
160       #endif
161       for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
162         const int32_t m = a_.i64[i] == b_.i64[j];
163         k1_ |= m << i;
164         k2_ |= m << j;
165       }
166     }
167 
168     *k1 = k1_;
169     *k2 = k2_;
170   #endif
171 }
172 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
173   #undef _mm256_2intersect_epi64
174   #define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2)
175 #endif
176 
177 SIMDE_FUNCTION_ATTRIBUTES
178 void
simde_mm512_2intersect_epi32(simde__m512i a,simde__m512i b,simde__mmask16 * k1,simde__mmask16 * k2)179 simde_mm512_2intersect_epi32(simde__m512i a, simde__m512i b, simde__mmask16 *k1, simde__mmask16 *k2) {
180   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)
181     _mm512_2intersect_epi32(a, b, k1, k2);
182   #else
183     simde__m512i_private
184       a_ = simde__m512i_to_private(a),
185       b_ = simde__m512i_to_private(b);
186     simde__mmask16
187       k1_ = 0,
188       k2_ = 0;
189 
190     for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
191       #if defined(SIMDE_ENABLE_OPENMP)
192         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
193       #else
194         SIMDE_VECTORIZE
195       #endif
196       for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
197         const int32_t m = a_.i32[i] == b_.i32[j];
198         k1_ |= m << i;
199         k2_ |= m << j;
200       }
201     }
202 
203     *k1 = k1_;
204     *k2 = k2_;
205   #endif
206 }
207 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES)
208   #undef _mm512_2intersect_epi32
209   #define _mm512_2intersect_epi32(a, b, k1, k2) simde_mm512_2intersect_epi32(a, b, k1, k2)
210 #endif
211 
212 SIMDE_FUNCTION_ATTRIBUTES
213 void
simde_mm512_2intersect_epi64(simde__m512i a,simde__m512i b,simde__mmask8 * k1,simde__mmask8 * k2)214 simde_mm512_2intersect_epi64(simde__m512i a, simde__m512i b, simde__mmask8 *k1, simde__mmask8 *k2) {
215   #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)
216     _mm512_2intersect_epi64(a, b, k1, k2);
217   #else
218     simde__m512i_private
219       a_ = simde__m512i_to_private(a),
220       b_ = simde__m512i_to_private(b);
221     simde__mmask8
222       k1_ = 0,
223       k2_ = 0;
224 
225     for (size_t i = 0 ; i < sizeof(a_.i64) / sizeof(a_.i64[0]) ; i++) {
226       #if defined(SIMDE_ENABLE_OPENMP)
227         #pragma omp simd reduction(|:k1_) reduction(|:k2_)
228       #else
229         SIMDE_VECTORIZE
230       #endif
231       for (size_t j = 0 ; j < sizeof(b_.i64) / sizeof(b_.i64[0]) ; j++) {
232         const int32_t m = a_.i64[i] == b_.i64[j];
233         k1_ |= m << i;
234         k2_ |= m << j;
235       }
236     }
237 
238     *k1 = k1_;
239     *k2 = k2_;
240   #endif
241 }
242 #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES)
243   #undef _mm512_2intersect_epi64
244   #define _mm512_2intersect_epi64(a, b, k1, k2) simde_mm512_2intersect_epi64(a, b, k1, k2)
245 #endif
246 
247 SIMDE_END_DECLS_
248 HEDLEY_DIAGNOSTIC_POP
249 
250 #endif /* !defined(SIMDE_X86_AVX512_2INTERSECT_H) */
251