1 use super::*;
2 #[allow(unused_imports)]
3 use super::{
4     simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge,
5     simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16,
6     simd_insert, simd_extract,
7     simd_cast,
8     simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor,
9 
10     Unalign, bitcast,
11 };
12 use core::{mem,ops};
13 
14 #[cfg(any(target_arch = "x86",
15           target_arch = "x86_64"))]
16 use x86::sse2::common;
17 #[cfg(any(target_arch = "arm"))]
18 use arm::neon::common;
19 #[cfg(any(target_arch = "aarch64"))]
20 use aarch64::neon::common;
21 
22 macro_rules! basic_impls {
23     ($(
24         $name: ident:
25         $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*;
26         )*) => {
27         $(impl $name {
28             /// Create a new instance.
29             #[inline]
30             pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name {
31                 $name($($first),*, $($last),*)
32             }
33 
34             /// Create a new instance where every lane has value `x`.
35             #[inline]
36             pub const fn splat(x: $elem) -> $name {
37                 $name($({ #[allow(dead_code)] struct $first; x }),*,
38                       $({ #[allow(dead_code)] struct $last; x }),*)
39             }
40 
41             /// Compare for equality.
42             #[inline]
43             pub fn eq(self, other: Self) -> $bool {
44                 unsafe {simd_eq(self, other)}
45             }
46             /// Compare for equality.
47             #[inline]
48             pub fn ne(self, other: Self) -> $bool {
49                 unsafe {simd_ne(self, other)}
50             }
51             /// Compare for equality.
52             #[inline]
53             pub fn lt(self, other: Self) -> $bool {
54                 unsafe {simd_lt(self, other)}
55             }
56             /// Compare for equality.
57             #[inline]
58             pub fn le(self, other: Self) -> $bool {
59                 unsafe {simd_le(self, other)}
60             }
61             /// Compare for equality.
62             #[inline]
63             pub fn gt(self, other: Self) -> $bool {
64                 unsafe {simd_gt(self, other)}
65             }
66             /// Compare for equality.
67             #[inline]
68             pub fn ge(self, other: Self) -> $bool {
69                 unsafe {simd_ge(self, other)}
70             }
71 
72             /// Extract the value of the `idx`th lane of `self`.
73             ///
74             /// # Panics
75             ///
76             /// `extract` will panic if `idx` is out of bounds.
77             #[inline]
78             pub fn extract(self, idx: u32) -> $elem {
79                 assert!(idx < $length);
80                 unsafe {simd_extract(self, idx)}
81             }
82             /// Return a new vector where the `idx`th lane is replaced
83             /// by `elem`.
84             ///
85             /// # Panics
86             ///
87             /// `replace` will panic if `idx` is out of bounds.
88             #[inline]
89             pub fn replace(self, idx: u32, elem: $elem) -> Self {
90                 assert!(idx < $length);
91                 unsafe {simd_insert(self, idx, elem)}
92             }
93 
94             /// Load a new value from the `idx`th position of `array`.
95             ///
96             /// This is equivalent to the following, but is possibly
97             /// more efficient:
98             ///
99             /// ```rust,ignore
100             /// Self::new(array[idx], array[idx + 1], ...)
101             /// ```
102             ///
103             /// # Panics
104             ///
105             /// `load` will panic if `idx` is out of bounds in
106             /// `array`, or if `array[idx..]` is too short.
107             #[inline]
108             pub fn load(array: &[$elem], idx: usize) -> Self {
109                 let data = &array[idx..idx + $length];
110                 let loaded = unsafe {
111                     *(data.as_ptr() as *const Unalign<Self>)
112                 };
113                 loaded.0
114             }
115 
116             /// Store the elements of `self` to `array`, starting at
117             /// the `idx`th position.
118             ///
119             /// This is equivalent to the following, but is possibly
120             /// more efficient:
121             ///
122             /// ```rust,ignore
123             /// array[i] = self.extract(0);
124             /// array[i + 1] = self.extract(1);
125             /// // ...
126             /// ```
127             ///
128             /// # Panics
129             ///
130             /// `store` will panic if `idx` is out of bounds in
131             /// `array`, or if `array[idx...]` is too short.
132             #[inline]
133             pub fn store(self, array: &mut [$elem], idx: usize) {
134                 let place = &mut array[idx..idx + $length];
135                 unsafe {
136                     *(place.as_mut_ptr() as *mut Unalign<Self>) = Unalign(self)
137                 }
138             }
139         })*
140     }
141 }
142 
143 basic_impls! {
144     u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
145     i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3;
146     f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3;
147 
148     u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
149     i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7;
150 
151     u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
152     i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15;
153 }
154 
155 macro_rules! bool_impls {
156     ($(
157         $name: ident:
158         $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident,
159         $($first: ident),* | $($last: ident),*
160         [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*];
161         )*) => {
162         $(impl $name {
163             /// Convert to integer representation.
164             #[inline]
165             pub fn to_repr(self) -> $repr {
166                 unsafe {mem::transmute(self)}
167             }
168             /// Convert from integer representation.
169             #[inline]
170             #[inline]
171             pub fn from_repr(x: $repr) -> Self {
172                 unsafe {mem::transmute(x)}
173             }
174 
175             /// Create a new instance.
176             #[inline]
177             pub fn new($($first: bool),*, $($last: bool),*) -> $name {
178                 unsafe {
179                     // negate everything together
180                     simd_sub($name::splat(false),
181                              $name($( ($first as $repr_elem) ),*,
182                                    $( ($last as $repr_elem) ),*))
183                 }
184             }
185 
186             /// Create a new instance where every lane has value `x`.
187             #[allow(unused_variables)]
188             #[inline]
189             pub fn splat(x: bool) -> $name {
190                 let x = if x {!(0 as $repr_elem)} else {0};
191                 $name($({ let $first = (); x}),*,
192                       $({ let $last = (); x}),*)
193             }
194 
195             /// Extract the value of the `idx`th lane of `self`.
196             ///
197             /// # Panics
198             ///
199             /// `extract` will panic if `idx` is out of bounds.
200             #[inline]
201             pub fn extract(self, idx: u32) -> bool {
202                 assert!(idx < $length);
203                 unsafe {simd_extract(self.to_repr(), idx) != 0}
204             }
205             /// Return a new vector where the `idx`th lane is replaced
206             /// by `elem`.
207             ///
208             /// # Panics
209             ///
210             /// `replace` will panic if `idx` is out of bounds.
211             #[inline]
212             pub fn replace(self, idx: u32, elem: bool) -> Self {
213                 assert!(idx < $length);
214                 let x = if elem {!(0 as $repr_elem)} else {0};
215                 unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))}
216             }
217             /// Select between elements of `then` and `else_`, based on
218             /// the corresponding element of `self`.
219             ///
220             /// This is equivalent to the following, but is possibly
221             /// more efficient:
222             ///
223             /// ```rust,ignore
224             /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) },
225             ///        if self.extract(1) { then.extract(1) } else { else_.extract(1) },
226             ///        ...)
227             /// ```
228             #[inline]
229             pub fn select<T: Simd<Bool = $name>>(self, then: T, else_: T) -> T {
230                 let then: $repr = bitcast(then);
231                 let else_: $repr = bitcast(else_);
232                 bitcast((then & self.to_repr()) | (else_ & (!self).to_repr()))
233             }
234 
235             /// Check if every element of `self` is true.
236             ///
237             /// This is equivalent to the following, but is possibly
238             /// more efficient:
239             ///
240             /// ```rust,ignore
241             /// self.extract(0) && self.extract(1) && ...
242             /// ```
243             #[inline]
244             pub fn all(self) -> bool {
245                 common::$all(self)
246             }
247             /// Check if any element of `self` is true.
248             ///
249             /// This is equivalent to the following, but is possibly
250             /// more efficient:
251             ///
252             /// ```rust,ignore
253             /// self.extract(0) || self.extract(1) || ...
254             /// ```
255             #[inline]
256             pub fn any(self) -> bool {
257                 common::$any(self)
258             }
259 
260             $(
261                 #[$cvt_meta]
262                 #[inline]
263                 pub fn $cvt(self) -> $cvt_to {
264                     bitcast(self)
265                 }
266                 )*
267         }
268           impl ops::Not for $name {
269               type Output = Self;
270 
271               #[inline]
272               fn not(self) -> Self {
273                   Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr())
274               }
275           }
276           )*
277     }
278 }
279 
280 bool_impls! {
281     bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3
282         [/// Convert `self` to a boolean vector for interacting with floating point vectors.
283          to_f -> bool32fx4];
284     bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3
285         [/// Convert `self` to a boolean vector for interacting with integer vectors.
286          to_i -> bool32ix4];
287 
288     bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 [];
289 
290     bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 [];
291 }
292 
293 impl u32x4 {
294     /// Convert each lane to a signed integer.
295     #[inline]
to_i32(self) -> i32x4296     pub fn to_i32(self) -> i32x4 {
297         unsafe {simd_cast(self)}
298     }
299     /// Convert each lane to a 32-bit float.
300     #[inline]
to_f32(self) -> f32x4301     pub fn to_f32(self) -> f32x4 {
302         unsafe {simd_cast(self)}
303     }
304 }
305 impl i32x4 {
306     /// Convert each lane to an unsigned integer.
307     #[inline]
to_u32(self) -> u32x4308     pub fn to_u32(self) -> u32x4 {
309         unsafe {simd_cast(self)}
310     }
311     /// Convert each lane to a 32-bit float.
312     #[inline]
to_f32(self) -> f32x4313     pub fn to_f32(self) -> f32x4 {
314         unsafe {simd_cast(self)}
315     }
316 }
317 impl f32x4 {
318     /// Compute the square root of each lane.
319     #[inline]
sqrt(self) -> Self320     pub fn sqrt(self) -> Self {
321         common::f32x4_sqrt(self)
322     }
323     /// Compute an approximation to the reciprocal of the square root
324     /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`.
325     ///
326     /// The accuracy of this approximation is platform dependent.
327     #[inline]
approx_rsqrt(self) -> Self328     pub fn approx_rsqrt(self) -> Self {
329         common::f32x4_approx_rsqrt(self)
330     }
331     /// Compute an approximation to the reciprocal of `self`, that is,
332     /// `f32::splat(1.0) / self`.
333     ///
334     /// The accuracy of this approximation is platform dependent.
335     #[inline]
approx_reciprocal(self) -> Self336     pub fn approx_reciprocal(self) -> Self {
337         common::f32x4_approx_reciprocal(self)
338     }
339     /// Compute the lane-wise maximum of `self` and `other`.
340     ///
341     /// This is equivalent to the following, but is possibly more
342     /// efficient:
343     ///
344     /// ```rust,ignore
345     /// f32x4::new(self.extract(0).max(other.extract(0)),
346     ///            self.extract(1).max(other.extract(1)),
347     ///            ...)
348     /// ```
349     #[inline]
max(self, other: Self) -> Self350     pub fn max(self, other: Self) -> Self {
351         common::f32x4_max(self, other)
352     }
353     /// Compute the lane-wise minimum of `self` and `other`.
354     ///
355     /// This is equivalent to the following, but is possibly more
356     /// efficient:
357     ///
358     /// ```rust,ignore
359     /// f32x4::new(self.extract(0).min(other.extract(0)),
360     ///            self.extract(1).min(other.extract(1)),
361     ///            ...)
362     /// ```
363     #[inline]
min(self, other: Self) -> Self364     pub fn min(self, other: Self) -> Self {
365         common::f32x4_min(self, other)
366     }
367     /// Convert each lane to a signed integer.
368     #[inline]
to_i32(self) -> i32x4369     pub fn to_i32(self) -> i32x4 {
370         unsafe {simd_cast(self)}
371     }
372     /// Convert each lane to an unsigned integer.
373     #[inline]
to_u32(self) -> u32x4374     pub fn to_u32(self) -> u32x4 {
375         unsafe {simd_cast(self)}
376     }
377 }
378 
379 impl i16x8 {
380     /// Convert each lane to an unsigned integer.
381     #[inline]
to_u16(self) -> u16x8382     pub fn to_u16(self) -> u16x8 {
383         unsafe {simd_cast(self)}
384     }
385 }
386 impl u16x8 {
387     /// Convert each lane to a signed integer.
388     #[inline]
to_i16(self) -> i16x8389     pub fn to_i16(self) -> i16x8 {
390         unsafe {simd_cast(self)}
391     }
392 }
393 
394 impl i8x16 {
395     /// Convert each lane to an unsigned integer.
396     #[inline]
to_u8(self) -> u8x16397     pub fn to_u8(self) -> u8x16 {
398         unsafe {simd_cast(self)}
399     }
400 }
401 impl u8x16 {
402     /// Convert each lane to a signed integer.
403     #[inline]
to_i8(self) -> i8x16404     pub fn to_i8(self) -> i8x16 {
405         unsafe {simd_cast(self)}
406     }
407 }
408 
409 
410 macro_rules! neg_impls {
411     ($zero: expr, $($ty: ident,)*) => {
412         $(impl ops::Neg for $ty {
413             type Output = Self;
414             fn neg(self) -> Self {
415                 $ty::splat($zero) - self
416             }
417         })*
418     }
419 }
420 neg_impls!{
421     0,
422     i32x4,
423     i16x8,
424     i8x16,
425 }
426 neg_impls! {
427     0.0,
428     f32x4,
429 }
430 macro_rules! not_impls {
431     ($($ty: ident,)*) => {
432         $(impl ops::Not for $ty {
433             type Output = Self;
434             fn not(self) -> Self {
435                 $ty::splat(!0) ^ self
436             }
437         })*
438     }
439 }
440 not_impls! {
441     i32x4,
442     i16x8,
443     i8x16,
444     u32x4,
445     u16x8,
446     u8x16,
447 }
448 
449 macro_rules! operators {
450     ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => {
451         $(
452             $(impl ops::$trayt for $ty {
453                 type Output = Self;
454                 #[inline]
455                 fn $method(self, x: Self) -> Self {
456                     unsafe {$func(self, x)}
457                 }
458             })*
459                 )*
460     }
461 }
462 operators! {
463     Add (simd_add, add):
464         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
465         f32x4;
466     Sub (simd_sub, sub):
467         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
468         f32x4;
469     Mul (simd_mul, mul):
470         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
471         f32x4;
472     Div (simd_div, div): f32x4;
473 
474     BitAnd (simd_and, bitand):
475         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
476         bool8ix16, bool16ix8, bool32ix4,
477         bool32fx4;
478     BitOr (simd_or, bitor):
479         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
480         bool8ix16, bool16ix8, bool32ix4,
481         bool32fx4;
482     BitXor (simd_xor, bitxor):
483         i8x16, u8x16, i16x8, u16x8, i32x4, u32x4,
484         bool8ix16, bool16ix8, bool32ix4,
485         bool32fx4;
486 }
487 
488 macro_rules! shift_one {
489     ($ty: ident, $($by: ident),*) => {
490         $(
491         impl ops::Shl<$by> for $ty {
492             type Output = Self;
493             #[inline]
494             fn shl(self, other: $by) -> Self {
495                 unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) }
496             }
497         }
498         impl ops::Shr<$by> for $ty {
499             type Output = Self;
500             #[inline]
501             fn shr(self, other: $by) -> Self {
502                 unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))}
503             }
504         }
505             )*
506     }
507 }
508 
509 macro_rules! shift {
510     ($($ty: ident),*) => {
511         $(shift_one! {
512             $ty,
513             u8, u16, u32, u64, usize,
514             i8, i16, i32, i64, isize
515         })*
516     }
517 }
518 shift! {
519     i8x16, u8x16, i16x8, u16x8, i32x4, u32x4
520 }
521