1 /* SPDX-License-Identifier: MIT
2  *
3  * Permission is hereby granted, free of charge, to any person
4  * obtaining a copy of this software and associated documentation
5  * files (the "Software"), to deal in the Software without
6  * restriction, including without limitation the rights to use, copy,
7  * modify, merge, publish, distribute, sublicense, and/or sell copies
8  * of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
18  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Copyright:
24  *   2020      Evan Nemerson <evan@nemerson.com>
25  */
26 
27 #if !defined(SIMDE_ARM_NEON_BIC_H)
28 #define SIMDE_ARM_NEON_BIC_H
29 
30 #include "dup_n.h"
31 #include "types.h"
32 
33 HEDLEY_DIAGNOSTIC_PUSH
34 SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
35 SIMDE_BEGIN_DECLS_
36 
37 SIMDE_FUNCTION_ATTRIBUTES
38 simde_int8x8_t
simde_vbic_s8(simde_int8x8_t a,simde_int8x8_t b)39 simde_vbic_s8(simde_int8x8_t a, simde_int8x8_t b) {
40   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
41     return vbic_s8(a, b);
42   #elif defined(SIMDE_X86_MMX_NATIVE)
43     return _mm_andnot_si64(b, a);
44   #else
45     simde_int8x8_private
46       a_ = simde_int8x8_to_private(a),
47       b_ = simde_int8x8_to_private(b),
48       r_;
49 
50     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
51       r_.values[i] = a_.values[i] & ~b_.values[i];
52     }
53 
54     return simde_int8x8_from_private(r_);
55   #endif
56 }
57 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
58   #undef vbic_s8
59   #define vbic_s8(a, b) simde_vbic_s8((a), (b))
60 #endif
61 
62 SIMDE_FUNCTION_ATTRIBUTES
63 simde_int16x4_t
simde_vbic_s16(simde_int16x4_t a,simde_int16x4_t b)64 simde_vbic_s16(simde_int16x4_t a, simde_int16x4_t b) {
65   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
66     return vbic_s16(a, b);
67   #elif defined(SIMDE_X86_MMX_NATIVE)
68     return _mm_andnot_si64(b, a);
69   #else
70     simde_int16x4_private
71       a_ = simde_int16x4_to_private(a),
72       b_ = simde_int16x4_to_private(b),
73       r_;
74 
75     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
76       r_.values[i] = a_.values[i] & ~b_.values[i];
77     }
78 
79     return simde_int16x4_from_private(r_);
80   #endif
81 }
82 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
83   #undef vbic_s16
84   #define vbic_s16(a, b) simde_vbic_s16((a), (b))
85 #endif
86 
87 SIMDE_FUNCTION_ATTRIBUTES
88 simde_int32x2_t
simde_vbic_s32(simde_int32x2_t a,simde_int32x2_t b)89 simde_vbic_s32(simde_int32x2_t a, simde_int32x2_t b) {
90   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
91     return vbic_s32(a, b);
92   #elif defined(SIMDE_X86_MMX_NATIVE)
93     return _mm_andnot_si64(b, a);
94   #else
95     simde_int32x2_private
96       a_ = simde_int32x2_to_private(a),
97       b_ = simde_int32x2_to_private(b),
98       r_;
99 
100     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
101       r_.values[i] = a_.values[i] & ~b_.values[i];
102     }
103 
104     return simde_int32x2_from_private(r_);
105   #endif
106 }
107 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
108   #undef vbic_s32
109   #define vbic_s32(a, b) simde_vbic_s32((a), (b))
110 #endif
111 
112 SIMDE_FUNCTION_ATTRIBUTES
113 simde_int64x1_t
simde_vbic_s64(simde_int64x1_t a,simde_int64x1_t b)114 simde_vbic_s64(simde_int64x1_t a, simde_int64x1_t b) {
115   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
116     return vbic_s64(a, b);
117   #elif defined(SIMDE_X86_MMX_NATIVE)
118     return _mm_andnot_si64(b, a);
119   #else
120     simde_int64x1_private
121       a_ = simde_int64x1_to_private(a),
122       b_ = simde_int64x1_to_private(b),
123       r_;
124 
125     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
126       r_.values[i] = a_.values[i] & ~b_.values[i];
127     }
128 
129     return simde_int64x1_from_private(r_);
130   #endif
131 }
132 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
133   #undef vbic_s64
134   #define vbic_s64(a, b) simde_vbic_s64((a), (b))
135 #endif
136 
137 SIMDE_FUNCTION_ATTRIBUTES
138 simde_uint8x8_t
simde_vbic_u8(simde_uint8x8_t a,simde_uint8x8_t b)139 simde_vbic_u8(simde_uint8x8_t a, simde_uint8x8_t b) {
140   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
141     return vbic_u8(a, b);
142   #elif defined(SIMDE_X86_MMX_NATIVE)
143     return _mm_andnot_si64(b, a);
144   #else
145     simde_uint8x8_private
146       a_ = simde_uint8x8_to_private(a),
147       b_ = simde_uint8x8_to_private(b),
148       r_;
149 
150     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
151       r_.values[i] = a_.values[i] & ~b_.values[i];
152     }
153 
154     return simde_uint8x8_from_private(r_);
155   #endif
156 }
157 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
158   #undef vbic_u8
159   #define vbic_u8(a, b) simde_vbic_u8((a), (b))
160 #endif
161 
162 SIMDE_FUNCTION_ATTRIBUTES
163 simde_uint16x4_t
simde_vbic_u16(simde_uint16x4_t a,simde_uint16x4_t b)164 simde_vbic_u16(simde_uint16x4_t a, simde_uint16x4_t b) {
165   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
166     return vbic_u16(a, b);
167   #elif defined(SIMDE_X86_MMX_NATIVE)
168     return _mm_andnot_si64(b, a);
169   #else
170     simde_uint16x4_private
171       a_ = simde_uint16x4_to_private(a),
172       b_ = simde_uint16x4_to_private(b),
173       r_;
174 
175     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
176       r_.values[i] = a_.values[i] & ~b_.values[i];
177     }
178 
179     return simde_uint16x4_from_private(r_);
180   #endif
181 }
182 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
183   #undef vbic_u16
184   #define vbic_u16(a, b) simde_vbic_u16((a), (b))
185 #endif
186 
187 SIMDE_FUNCTION_ATTRIBUTES
188 simde_uint32x2_t
simde_vbic_u32(simde_uint32x2_t a,simde_uint32x2_t b)189 simde_vbic_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
190   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
191     return vbic_u32(a, b);
192   #elif defined(SIMDE_X86_MMX_NATIVE)
193     return _mm_andnot_si64(b, a);
194   #else
195     simde_uint32x2_private
196       a_ = simde_uint32x2_to_private(a),
197       b_ = simde_uint32x2_to_private(b),
198       r_;
199 
200     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
201       r_.values[i] = a_.values[i] & ~b_.values[i];
202     }
203 
204     return simde_uint32x2_from_private(r_);
205   #endif
206 }
207 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
208   #undef vbic_u32
209   #define vbic_u32(a, b) simde_vbic_u32((a), (b))
210 #endif
211 
212 SIMDE_FUNCTION_ATTRIBUTES
213 simde_uint64x1_t
simde_vbic_u64(simde_uint64x1_t a,simde_uint64x1_t b)214 simde_vbic_u64(simde_uint64x1_t a, simde_uint64x1_t b) {
215   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
216     return vbic_u64(a, b);
217   #elif defined(SIMDE_X86_MMX_NATIVE)
218     return _mm_andnot_si64(b, a);
219   #else
220     simde_uint64x1_private
221       a_ = simde_uint64x1_to_private(a),
222       b_ = simde_uint64x1_to_private(b),
223       r_;
224 
225     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
226       r_.values[i] = a_.values[i] & ~b_.values[i];
227     }
228 
229     return simde_uint64x1_from_private(r_);
230   #endif
231 }
232 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
233   #undef vbic_u64
234   #define vbic_u64(a, b) simde_vbic_u64((a), (b))
235 #endif
236 
237 SIMDE_FUNCTION_ATTRIBUTES
238 simde_int8x16_t
simde_vbicq_s8(simde_int8x16_t a,simde_int8x16_t b)239 simde_vbicq_s8(simde_int8x16_t a, simde_int8x16_t b) {
240   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
241     return vbicq_s8(a, b);
242   #elif defined(SIMDE_X86_SSE2_NATIVE)
243     return _mm_andnot_si128(b, a);
244   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
245     return wasm_v128_andnot(a, b);
246   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
247     return vec_andc(a, b);
248   #else
249     simde_int8x16_private
250       a_ = simde_int8x16_to_private(a),
251       b_ = simde_int8x16_to_private(b),
252       r_;
253 
254     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
255       r_.values[i] = a_.values[i] & ~b_.values[i];
256     }
257 
258     return simde_int8x16_from_private(r_);
259   #endif
260 }
261 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
262   #undef vbicq_s8
263   #define vbicq_s8(a, b) simde_vbicq_s8((a), (b))
264 #endif
265 
266 SIMDE_FUNCTION_ATTRIBUTES
267 simde_int16x8_t
simde_vbicq_s16(simde_int16x8_t a,simde_int16x8_t b)268 simde_vbicq_s16(simde_int16x8_t a, simde_int16x8_t b) {
269   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
270     return vbicq_s16(a, b);
271   #elif defined(SIMDE_X86_SSE2_NATIVE)
272     return _mm_andnot_si128(b, a);
273   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
274     return wasm_v128_andnot(a, b);
275   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
276     return vec_andc(a, b);
277   #else
278     simde_int16x8_private
279       a_ = simde_int16x8_to_private(a),
280       b_ = simde_int16x8_to_private(b),
281       r_;
282 
283     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
284       r_.values[i] = a_.values[i] & ~b_.values[i];
285     }
286 
287     return simde_int16x8_from_private(r_);
288   #endif
289 }
290 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
291   #undef vbicq_s16
292   #define vbicq_s16(a, b) simde_vbicq_s16((a), (b))
293 #endif
294 
295 SIMDE_FUNCTION_ATTRIBUTES
296 simde_int32x4_t
simde_vbicq_s32(simde_int32x4_t a,simde_int32x4_t b)297 simde_vbicq_s32(simde_int32x4_t a, simde_int32x4_t b) {
298   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
299     return vbicq_s32(a, b);
300   #elif defined(SIMDE_X86_SSE2_NATIVE)
301     return _mm_andnot_si128(b, a);
302   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
303     return wasm_v128_andnot(a, b);
304   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
305     return vec_andc(a, b);
306   #else
307     simde_int32x4_private
308       a_ = simde_int32x4_to_private(a),
309       b_ = simde_int32x4_to_private(b),
310       r_;
311 
312     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
313       r_.values[i] = a_.values[i] & ~b_.values[i];
314     }
315 
316     return simde_int32x4_from_private(r_);
317   #endif
318 }
319 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
320   #undef vbicq_s32
321   #define vbicq_s32(a, b) simde_vbicq_s32((a), (b))
322 #endif
323 
324 SIMDE_FUNCTION_ATTRIBUTES
325 simde_int64x2_t
simde_vbicq_s64(simde_int64x2_t a,simde_int64x2_t b)326 simde_vbicq_s64(simde_int64x2_t a, simde_int64x2_t b) {
327   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
328     return vbicq_s64(a, b);
329   #elif defined(SIMDE_X86_SSE2_NATIVE)
330     return _mm_andnot_si128(b, a);
331   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
332     return wasm_v128_andnot(a, b);
333   #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
334     return vec_andc(a, b);
335   #else
336     simde_int64x2_private
337       a_ = simde_int64x2_to_private(a),
338       b_ = simde_int64x2_to_private(b),
339       r_;
340 
341     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
342       r_.values[i] = a_.values[i] & ~b_.values[i];
343     }
344 
345     return simde_int64x2_from_private(r_);
346   #endif
347 }
348 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
349   #undef vbicq_s64
350   #define vbicq_s64(a, b) simde_vbicq_s64((a), (b))
351 #endif
352 
353 SIMDE_FUNCTION_ATTRIBUTES
354 simde_uint8x16_t
simde_vbicq_u8(simde_uint8x16_t a,simde_uint8x16_t b)355 simde_vbicq_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
356   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
357     return vbicq_u8(a, b);
358   #elif defined(SIMDE_X86_SSE2_NATIVE)
359     return _mm_andnot_si128(b, a);
360   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
361     return wasm_v128_andnot(a, b);
362   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
363     return vec_andc(a, b);
364   #else
365     simde_uint8x16_private
366       a_ = simde_uint8x16_to_private(a),
367       b_ = simde_uint8x16_to_private(b),
368       r_;
369 
370     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
371       r_.values[i] = a_.values[i] & ~b_.values[i];
372     }
373 
374     return simde_uint8x16_from_private(r_);
375   #endif
376 }
377 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
378   #undef vbicq_u8
379   #define vbicq_u8(a, b) simde_vbicq_u8((a), (b))
380 #endif
381 
382 SIMDE_FUNCTION_ATTRIBUTES
383 simde_uint16x8_t
simde_vbicq_u16(simde_uint16x8_t a,simde_uint16x8_t b)384 simde_vbicq_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
385   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
386     return vbicq_u16(a, b);
387   #elif defined(SIMDE_X86_SSE2_NATIVE)
388     return _mm_andnot_si128(b, a);
389   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
390     return wasm_v128_andnot(a, b);
391   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
392     return vec_andc(a, b);
393   #else
394     simde_uint16x8_private
395       a_ = simde_uint16x8_to_private(a),
396       b_ = simde_uint16x8_to_private(b),
397       r_;
398 
399     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
400       r_.values[i] = a_.values[i] & ~b_.values[i];
401     }
402 
403     return simde_uint16x8_from_private(r_);
404   #endif
405 }
406 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
407   #undef vbicq_u16
408   #define vbicq_u16(a, b) simde_vbicq_u16((a), (b))
409 #endif
410 
411 SIMDE_FUNCTION_ATTRIBUTES
412 simde_uint32x4_t
simde_vbicq_u32(simde_uint32x4_t a,simde_uint32x4_t b)413 simde_vbicq_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
414   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
415     return vbicq_u32(a, b);
416   #elif defined(SIMDE_X86_SSE2_NATIVE)
417     return _mm_andnot_si128(b, a);
418   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
419     return wasm_v128_andnot(a, b);
420   #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
421     return vec_andc(a, b);
422   #else
423     simde_uint32x4_private
424       a_ = simde_uint32x4_to_private(a),
425       b_ = simde_uint32x4_to_private(b),
426       r_;
427 
428     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
429       r_.values[i] = a_.values[i] & ~b_.values[i];
430     }
431 
432     return simde_uint32x4_from_private(r_);
433   #endif
434 }
435 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
436   #undef vbicq_u32
437   #define vbicq_u32(a, b) simde_vbicq_u32((a), (b))
438 #endif
439 
440 SIMDE_FUNCTION_ATTRIBUTES
441 simde_uint64x2_t
simde_vbicq_u64(simde_uint64x2_t a,simde_uint64x2_t b)442 simde_vbicq_u64(simde_uint64x2_t a, simde_uint64x2_t b) {
443   #if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
444     return vbicq_u64(a, b);
445   #elif defined(SIMDE_X86_SSE2_NATIVE)
446     return _mm_andnot_si128(b, a);
447   #elif defined(SIMDE_WASM_SIMD128_NATIVE)
448     return wasm_v128_andnot(a, b);
449   #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
450     return vec_andc(a, b);
451   #else
452     simde_uint64x2_private
453       a_ = simde_uint64x2_to_private(a),
454       b_ = simde_uint64x2_to_private(b),
455       r_;
456 
457     for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
458       r_.values[i] = a_.values[i] & ~b_.values[i];
459     }
460 
461     return simde_uint64x2_from_private(r_);
462   #endif
463 }
464 #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
465   #undef vbicq_u64
466   #define vbicq_u64(a, b) simde_vbicq_u64((a), (b))
467 #endif
468 
469 SIMDE_END_DECLS_
470 HEDLEY_DIAGNOSTIC_POP
471 
472 #endif /* !defined(SIMDE_ARM_NEON_BIC_H) */
473