1 use super::*;
2 
3 pick! {
4   if #[cfg(target_feature="avx2")] {
5     #[derive(Default, Clone, Copy, PartialEq, Eq)]
6     #[repr(C, align(32))]
7     pub struct u32x8 { avx2: m256i }
8   } else if #[cfg(target_feature="sse")] {
9     #[derive(Default, Clone, Copy, PartialEq, Eq)]
10     #[repr(C, align(32))]
11     pub struct u32x8 { sse0: m128i, sse1: m128i }
12   } else if #[cfg(target_feature="simd128")] {
13     use core::arch::wasm32::*;
14 
15     #[derive(Clone, Copy)]
16     #[repr(C, align(32))]
17     pub struct u32x8 { simd0: v128, simd1: v128 }
18 
19     impl Default for u32x8 {
20       fn default() -> Self {
21         Self::splat(0)
22       }
23     }
24 
25     impl PartialEq for u32x8 {
26       fn eq(&self, other: &Self) -> bool {
27         !v128_any_true(v128_or(v128_xor(self.simd0, other.simd0), v128_xor(self.simd1, other.simd1)))
28       }
29     }
30 
31     impl Eq for u32x8 { }
32   } else {
33     #[derive(Default, Clone, Copy, PartialEq, Eq)]
34     #[repr(C, align(32))]
35     pub struct u32x8 { arr: [u32;8] }
36   }
37 }
38 
39 int_uint_consts!(u32, 8, u32x8, u32x8, u32a8, const_u32_as_u32x8, 256);
40 
41 unsafe impl Zeroable for u32x8 {}
42 unsafe impl Pod for u32x8 {}
43 
44 impl Add for u32x8 {
45   type Output = Self;
46   #[inline]
47   #[must_use]
add(self, rhs: Self) -> Self::Output48   fn add(self, rhs: Self) -> Self::Output {
49     pick! {
50       if #[cfg(target_feature="avx2")] {
51         Self { avx2: add_i32_m256i(self.avx2, rhs.avx2) }
52       } else if #[cfg(target_feature="sse2")] {
53         Self { sse0: add_i32_m128i(self.sse0, rhs.sse0), sse1: add_i32_m128i(self.sse1, rhs.sse1)}
54       } else if #[cfg(target_feature="simd128")] {
55         Self { simd0: u32x4_add(self.simd0, rhs.simd0), simd1: u32x4_add(self.simd1, rhs.simd1) }
56       } else {
57         Self { arr: [
58           self.arr[0].wrapping_add(rhs.arr[0]),
59           self.arr[1].wrapping_add(rhs.arr[1]),
60           self.arr[2].wrapping_add(rhs.arr[2]),
61           self.arr[3].wrapping_add(rhs.arr[3]),
62           self.arr[4].wrapping_add(rhs.arr[4]),
63           self.arr[5].wrapping_add(rhs.arr[5]),
64           self.arr[6].wrapping_add(rhs.arr[6]),
65           self.arr[7].wrapping_add(rhs.arr[7]),
66         ]}
67       }
68     }
69   }
70 }
71 
72 impl Sub for u32x8 {
73   type Output = Self;
74   #[inline]
75   #[must_use]
sub(self, rhs: Self) -> Self::Output76   fn sub(self, rhs: Self) -> Self::Output {
77     pick! {
78       if #[cfg(target_feature="avx2")] {
79         Self { avx2: sub_i32_m256i(self.avx2, rhs.avx2) }
80       } else if #[cfg(target_feature="sse2")] {
81         Self { sse0: sub_i32_m128i(self.sse0, rhs.sse0), sse1: sub_i32_m128i(self.sse1, rhs.sse1)}
82       } else if #[cfg(target_feature="simd128")] {
83         Self { simd0: u32x4_sub(self.simd0, rhs.simd0), simd1: u32x4_sub(self.simd1, rhs.simd1) }
84       } else {
85         Self { arr: [
86           self.arr[0].wrapping_sub(rhs.arr[0]),
87           self.arr[1].wrapping_sub(rhs.arr[1]),
88           self.arr[2].wrapping_sub(rhs.arr[2]),
89           self.arr[3].wrapping_sub(rhs.arr[3]),
90           self.arr[4].wrapping_sub(rhs.arr[4]),
91           self.arr[5].wrapping_sub(rhs.arr[5]),
92           self.arr[6].wrapping_sub(rhs.arr[6]),
93           self.arr[7].wrapping_sub(rhs.arr[7]),
94         ]}
95       }
96     }
97   }
98 }
99 
100 impl Mul for u32x8 {
101   type Output = Self;
102   #[inline]
103   #[must_use]
mul(self, rhs: Self) -> Self::Output104   fn mul(self, rhs: Self) -> Self::Output {
105     pick! {
106       if #[cfg(target_feature="avx2")] {
107         Self { avx2: mul_i32_keep_low_m256i(self.avx2, rhs.avx2) }
108       } else if #[cfg(target_feature="sse4.1")] {
109         Self { sse0: mul_i32_keep_low_m128i(self.sse0, rhs.sse0), sse1: mul_i32_keep_low_m128i(self.sse1, rhs.sse1)}
110       } else if #[cfg(target_feature="simd128")] {
111         Self { simd0: u32x4_mul(self.simd0, rhs.simd0), simd1: u32x4_mul(self.simd1, rhs.simd1) }
112       } else {
113         let arr1: [u32; 8] = cast(self);
114         let arr2: [u32; 8] = cast(rhs);
115         cast([
116           arr1[0].wrapping_mul(arr2[0]),
117           arr1[1].wrapping_mul(arr2[1]),
118           arr1[2].wrapping_mul(arr2[2]),
119           arr1[3].wrapping_mul(arr2[3]),
120           arr1[4].wrapping_mul(arr2[4]),
121           arr1[5].wrapping_mul(arr2[5]),
122           arr1[6].wrapping_mul(arr2[6]),
123           arr1[7].wrapping_mul(arr2[7]),
124         ])
125       }
126     }
127   }
128 }
129 
130 impl BitAnd for u32x8 {
131   type Output = Self;
132   #[inline]
133   #[must_use]
bitand(self, rhs: Self) -> Self::Output134   fn bitand(self, rhs: Self) -> Self::Output {
135     pick! {
136       if #[cfg(target_feature="avx2")] {
137         Self { avx2: bitand_m256i(self.avx2, rhs.avx2) }
138       } else if #[cfg(target_feature="sse2")] {
139         Self { sse0: bitand_m128i(self.sse0, rhs.sse0), sse1: bitand_m128i(self.sse1, rhs.sse1)}
140       } else if #[cfg(target_feature="simd128")] {
141         Self { simd0: v128_and(self.simd0, rhs.simd0), simd1: v128_and(self.simd1, rhs.simd1) }
142       } else {
143         Self { arr: [
144           self.arr[0].bitand(rhs.arr[0]),
145           self.arr[1].bitand(rhs.arr[1]),
146           self.arr[2].bitand(rhs.arr[2]),
147           self.arr[3].bitand(rhs.arr[3]),
148           self.arr[4].bitand(rhs.arr[4]),
149           self.arr[5].bitand(rhs.arr[5]),
150           self.arr[6].bitand(rhs.arr[6]),
151           self.arr[7].bitand(rhs.arr[7]),
152         ]}
153       }
154     }
155   }
156 }
157 
158 impl BitOr for u32x8 {
159   type Output = Self;
160   #[inline]
161   #[must_use]
bitor(self, rhs: Self) -> Self::Output162   fn bitor(self, rhs: Self) -> Self::Output {
163     pick! {
164       if #[cfg(target_feature="avx2")] {
165         Self { avx2: bitor_m256i(self.avx2, rhs.avx2) }
166       } else if #[cfg(target_feature="sse2")] {
167         Self { sse0: bitor_m128i(self.sse0, rhs.sse0), sse1: bitor_m128i(self.sse1, rhs.sse1)}
168       } else if #[cfg(target_feature="simd128")] {
169         Self { simd0: v128_or(self.simd0, rhs.simd0), simd1: v128_or(self.simd1, rhs.simd1) }
170       } else {
171         Self { arr: [
172           self.arr[0].bitor(rhs.arr[0]),
173           self.arr[1].bitor(rhs.arr[1]),
174           self.arr[2].bitor(rhs.arr[2]),
175           self.arr[3].bitor(rhs.arr[3]),
176           self.arr[4].bitor(rhs.arr[4]),
177           self.arr[5].bitor(rhs.arr[5]),
178           self.arr[6].bitor(rhs.arr[6]),
179           self.arr[7].bitor(rhs.arr[7]),
180         ]}
181       }
182     }
183   }
184 }
185 
186 impl BitXor for u32x8 {
187   type Output = Self;
188   #[inline]
189   #[must_use]
bitxor(self, rhs: Self) -> Self::Output190   fn bitxor(self, rhs: Self) -> Self::Output {
191     pick! {
192       if #[cfg(target_feature="avx2")] {
193         Self { avx2: bitxor_m256i(self.avx2, rhs.avx2) }
194       } else if #[cfg(target_feature="sse2")] {
195         Self { sse0: bitxor_m128i(self.sse0, rhs.sse0), sse1: bitxor_m128i(self.sse1, rhs.sse1)}
196       } else if #[cfg(target_feature="simd128")] {
197         Self { simd0: v128_xor(self.simd0, rhs.simd0), simd1: v128_xor(self.simd1, rhs.simd1) }
198       } else {
199         Self { arr: [
200           self.arr[0].bitxor(rhs.arr[0]),
201           self.arr[1].bitxor(rhs.arr[1]),
202           self.arr[2].bitxor(rhs.arr[2]),
203           self.arr[3].bitxor(rhs.arr[3]),
204           self.arr[4].bitxor(rhs.arr[4]),
205           self.arr[5].bitxor(rhs.arr[5]),
206           self.arr[6].bitxor(rhs.arr[6]),
207           self.arr[7].bitxor(rhs.arr[7]),
208         ]}
209       }
210     }
211   }
212 }
213 
214 macro_rules! impl_shl_t_for_u32x8 {
215   ($($shift_type:ty),+ $(,)?) => {
216     $(impl Shl<$shift_type> for u32x8 {
217       type Output = Self;
218       /// Shifts all lanes by the value given.
219       #[inline]
220       #[must_use]
221       fn shl(self, rhs: $shift_type) -> Self::Output {
222         pick! {
223           if #[cfg(target_feature="avx2")] {
224             let shift = cast([rhs as u64, 0]);
225             Self { avx2: shl_all_u32_m256i(self.avx2, shift) }
226           } else if #[cfg(target_feature="sse2")] {
227             let shift = cast([rhs as u64, 0]);
228             Self { sse0: shl_all_u32_m128i(self.sse0, shift), sse1: shl_all_u32_m128i(self.sse1, shift)}
229           } else if #[cfg(target_feature="simd128")] {
230             let u = rhs as u32;
231             Self { simd0: u32x4_shl(self.simd0, u), simd1: u32x4_shl(self.simd1, u) }
232           } else {
233             let u = rhs as u64;
234             Self { arr: [
235               self.arr[0] << u,
236               self.arr[1] << u,
237               self.arr[2] << u,
238               self.arr[3] << u,
239               self.arr[4] << u,
240               self.arr[5] << u,
241               self.arr[6] << u,
242               self.arr[7] << u,
243             ]}
244           }
245         }
246       }
247     })+
248   };
249 }
250 impl_shl_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
251 
252 macro_rules! impl_shr_t_for_u32x8 {
253   ($($shift_type:ty),+ $(,)?) => {
254     $(impl Shr<$shift_type> for u32x8 {
255       type Output = Self;
256       /// Shifts all lanes by the value given.
257       #[inline]
258       #[must_use]
259       fn shr(self, rhs: $shift_type) -> Self::Output {
260         pick! {
261           if #[cfg(target_feature="avx2")] {
262             let shift = cast([rhs as u64, 0]);
263             Self { avx2: shr_all_u32_m256i(self.avx2, shift) }
264           } else if #[cfg(target_feature="sse2")] {
265             let shift = cast([rhs as u64, 0]);
266             Self { sse0: shr_all_u32_m128i(self.sse0, shift), sse1: shr_all_u32_m128i(self.sse1, shift)}
267           } else if #[cfg(target_feature="simd128")] {
268             let u = rhs as u32;
269             Self { simd0: u32x4_shr(self.simd0, u), simd1: u32x4_shr(self.simd1, u) }
270           } else {
271             let u = rhs as u64;
272             Self { arr: [
273               self.arr[0] >> u,
274               self.arr[1] >> u,
275               self.arr[2] >> u,
276               self.arr[3] >> u,
277               self.arr[4] >> u,
278               self.arr[5] >> u,
279               self.arr[6] >> u,
280               self.arr[7] >> u,
281             ]}
282           }
283         }
284       }
285     })+
286   };
287 }
288 
289 impl_shr_t_for_u32x8!(i8, u8, i16, u16, i32, u32, i64, u64, i128, u128);
290 
291 impl u32x8 {
292   #[inline]
293   #[must_use]
new(array: [u32; 8]) -> Self294   pub fn new(array: [u32; 8]) -> Self {
295     Self::from(array)
296   }
297   #[inline]
298   #[must_use]
cmp_eq(self, rhs: Self) -> Self299   pub fn cmp_eq(self, rhs: Self) -> Self {
300     pick! {
301       if #[cfg(target_feature="avx2")] {
302         Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
303       } else if #[cfg(target_feature="sse2")] {
304         Self { sse0: cmp_eq_mask_i32_m128i(self.sse0,rhs.sse0), sse1: cmp_eq_mask_i32_m128i(self.sse1,rhs.sse1), }
305       } else if #[cfg(target_feature="simd128")] {
306         Self { simd0: u32x4_eq(self.simd0, rhs.simd0), simd1: u32x4_eq(self.simd1, rhs.simd1) }
307       } else {
308         Self { arr: [
309           if self.arr[0] == rhs.arr[0] { u32::MAX } else { 0 },
310           if self.arr[1] == rhs.arr[1] { u32::MAX } else { 0 },
311           if self.arr[2] == rhs.arr[2] { u32::MAX } else { 0 },
312           if self.arr[3] == rhs.arr[3] { u32::MAX } else { 0 },
313           if self.arr[4] == rhs.arr[4] { u32::MAX } else { 0 },
314           if self.arr[5] == rhs.arr[5] { u32::MAX } else { 0 },
315           if self.arr[6] == rhs.arr[6] { u32::MAX } else { 0 },
316           if self.arr[7] == rhs.arr[7] { u32::MAX } else { 0 },
317         ]}
318       }
319     }
320   }
321   #[inline]
322   #[must_use]
cmp_gt(self, rhs: Self) -> Self323   pub fn cmp_gt(self, rhs: Self) -> Self {
324     pick! {
325       if #[cfg(target_feature="avx2")] {
326         Self { avx2: cmp_gt_mask_i32_m256i(self.avx2, rhs.avx2 ) }
327       } else if #[cfg(target_feature="sse2")] {
328         Self { sse0: cmp_gt_mask_i32_m128i(self.sse0,rhs.sse0), sse1: cmp_gt_mask_i32_m128i(self.sse1,rhs.sse1), }
329       } else if #[cfg(target_feature="simd128")] {
330         Self { simd0: u32x4_gt(self.simd0, rhs.simd0), simd1: u32x4_gt(self.simd1, rhs.simd1) }
331       } else {
332         Self { arr: [
333           if self.arr[0] > rhs.arr[0] { u32::MAX } else { 0 },
334           if self.arr[1] > rhs.arr[1] { u32::MAX } else { 0 },
335           if self.arr[2] > rhs.arr[2] { u32::MAX } else { 0 },
336           if self.arr[3] > rhs.arr[3] { u32::MAX } else { 0 },
337           if self.arr[4] > rhs.arr[4] { u32::MAX } else { 0 },
338           if self.arr[5] > rhs.arr[5] { u32::MAX } else { 0 },
339           if self.arr[6] > rhs.arr[6] { u32::MAX } else { 0 },
340           if self.arr[7] > rhs.arr[7] { u32::MAX } else { 0 },
341         ]}
342       }
343     }
344   }
345   #[inline]
346   #[must_use]
cmp_lt(self, rhs: Self) -> Self347   pub fn cmp_lt(self, rhs: Self) -> Self {
348     pick! {
349       if #[cfg(target_feature="avx2")] {
350         Self { avx2: cmp_eq_mask_i32_m256i(self.avx2, rhs.avx2 ) }
351       } else if #[cfg(target_feature="sse2")] {
352         Self { sse0: cmp_lt_mask_i32_m128i(self.sse0,rhs.sse0), sse1: cmp_lt_mask_i32_m128i(self.sse1,rhs.sse1), }
353       } else if #[cfg(target_feature="simd128")] {
354         Self { simd0: u32x4_lt(self.simd0, rhs.simd0), simd1: u32x4_lt(self.simd1, rhs.simd1) }
355       } else {
356         Self { arr: [
357           if self.arr[0] < rhs.arr[0] { u32::MAX } else { 0 },
358           if self.arr[1] < rhs.arr[1] { u32::MAX } else { 0 },
359           if self.arr[2] < rhs.arr[2] { u32::MAX } else { 0 },
360           if self.arr[3] < rhs.arr[3] { u32::MAX } else { 0 },
361           if self.arr[4] < rhs.arr[4] { u32::MAX } else { 0 },
362           if self.arr[5] < rhs.arr[5] { u32::MAX } else { 0 },
363           if self.arr[6] < rhs.arr[6] { u32::MAX } else { 0 },
364           if self.arr[7] < rhs.arr[7] { u32::MAX } else { 0 },
365         ]}
366       }
367     }
368   }
369   #[inline]
370   #[must_use]
blend(self, t: Self, f: Self) -> Self371   pub fn blend(self, t: Self, f: Self) -> Self {
372     pick! {
373       if #[cfg(target_feature="avx2")] {
374         Self { avx2: blend_varying_i8_m256i(f.avx2, t.avx2, self.avx2) }
375       } else if #[cfg(target_feature="sse4.1")] {
376         Self { sse0: blend_varying_i8_m128i(f.sse0, t.sse0, self.sse0), sse1: blend_varying_i8_m128i(f.sse1, t.sse1, self.sse1)}
377       } else if #[cfg(target_feature="simd128")] {
378         Self { simd0: v128_bitselect(t.simd0, f.simd0, self.simd0), simd1: v128_bitselect(t.simd1, f.simd1, self.simd1) }
379       } else {
380         generic_bit_blend(self, t, f)
381       }
382     }
383   }
384 
385   #[inline]
386   #[must_use]
max(self, rhs: Self) -> Self387   pub fn max(self, rhs: Self) -> Self {
388     pick! {
389       if #[cfg(target_feature="avx2")] {
390         Self { avx2: max_i32_m256i(self.avx2, rhs.avx2 ) }
391       } else if #[cfg(target_feature="sse4.1")] {
392         Self { sse0: max_i32_m128i(self.sse0, rhs.sse0), sse1: max_i32_m128i(self.sse1, rhs.sse1) }
393       } else if #[cfg(target_feature="simd128")] {
394         Self { simd0: u32x4_max(self.simd0, rhs.simd0), simd1: u32x4_max(self.simd1, rhs.simd1) }
395       } else {
396         self.cmp_lt(rhs).blend(rhs, self)
397       }
398     }
399   }
400   #[inline]
401   #[must_use]
min(self, rhs: Self) -> Self402   pub fn min(self, rhs: Self) -> Self {
403     pick! {
404       if #[cfg(target_feature="avx2")] {
405         Self { avx2: max_i32_m256i(self.avx2, rhs.avx2 ) }
406       } else if #[cfg(target_feature="sse4.1")] {
407         Self { sse0: max_i32_m128i(self.sse0, rhs.sse0), sse1: max_i32_m128i(self.sse1, rhs.sse1) }
408       } else if #[cfg(target_feature="simd128")] {
409         Self { simd0: u32x4_min(self.simd0, rhs.simd0), simd1: u32x4_min(self.simd1, rhs.simd1) }
410       } else {
411         self.cmp_lt(rhs).blend(self, rhs)
412       }
413     }
414   }
415 
to_array(self) -> [u32; 8]416   pub fn to_array(self) -> [u32; 8] {
417     cast(self)
418   }
419 
as_array_ref(&self) -> &[u32; 8]420   pub fn as_array_ref(&self) -> &[u32; 8] {
421     cast_ref(self)
422   }
423 }
424 
425 impl Not for u32x8 {
426   type Output = Self;
not(self) -> Self427   fn not(self) -> Self {
428     pick! {
429       if #[cfg(target_feature="avx2")] {
430         Self { avx2: self.avx2.not()  }
431       } else if #[cfg(target_feature="sse2")] {
432         Self { sse0: self.sse0.not(), sse1: self.sse1.not() }
433       } else if #[cfg(target_feature="simd128")] {
434         Self { simd0: v128_not(self.simd0), simd1: v128_not(self.simd1) }
435       } else {
436         Self { arr: [
437           !self.arr[0],
438           !self.arr[1],
439           !self.arr[2],
440           !self.arr[3],
441           !self.arr[4],
442           !self.arr[5],
443           !self.arr[6],
444           !self.arr[7],
445         ]}
446       }
447     }
448   }
449 }
450