1 #![no_std]
2 #![allow(non_camel_case_types)]
3 
4 //! A crate to help you go wide.
5 //!
6 //! This crate provides SIMD-compatible data types.
7 //!
8 //! When possible, explicit SIMD is used with all the math operations here. As a
9 //! fallback, the fact that all the lengths of a fixed length array are doing
10 //! the same thing will often make LLVM notice that it should use SIMD
11 //! instructions to complete the task. In the worst case, the code just becomes
12 //! totally scalar (though the math is still correct, at least).
13 //!
14 //! ## Crate Features
15 //!
16 //! * `std`: This causes the feature to link to `std`.
17 //!   * Currently this just improves the performance of `sqrt` when an explicit
18 //!     SIMD `sqrt` isn't available.
19 
20 // Note(Lokathor): Due to standard library magic, the std-only methods for f32
21 // and f64 will automatically be available simply by declaring this.
22 
23 // TODO
24 // Add/Sub/Mul/Div with constant
25 // Shuffle left/right/by index
26 
27 #[cfg(feature = "std")]
28 extern crate std;
29 
30 use core::{
31   fmt::{
32     Binary, Debug, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
33   },
34   ops::*,
35 };
36 
37 #[allow(unused_imports)]
38 use safe_arch::*;
39 
40 use bytemuck::*;
41 
42 #[macro_use]
43 mod macros;
44 
45 macro_rules! pick {
46   ($(if #[cfg($($test:meta),*)] {
47       $($if_tokens:tt)*
48     })else+ else {
49       $($else_tokens:tt)*
50     }) => {
51     pick!{
52       @__forests [ ] ;
53       $( [ {$($test),*} {$($if_tokens)*} ], )*
54       [ { } {$($else_tokens)*} ],
55     }
56   };
57   (if #[cfg($($if_meta:meta),*)] {
58       $($if_tokens:tt)*
59     } $(else if #[cfg($($else_meta:meta),*)] {
60       $($else_tokens:tt)*
61     })*) => {
62     pick!{
63       @__forests [ ] ;
64       [ {$($if_meta),*} {$($if_tokens)*} ],
65       $( [ {$($else_meta),*} {$($else_tokens)*} ], )*
66     }
67   };
68   (@__forests [$($not:meta,)*];) => {
69     /* halt expansion */
70   };
71   (@__forests [$($not:meta,)*]; [{$($m:meta),*} {$($tokens:tt)*}], $($rest:tt)*) => {
72     #[cfg(all( $($m,)* not(any($($not),*)) ))]
73     pick!{ @__identity $($tokens)* }
74     pick!{ @__forests [ $($not,)* $($m,)* ] ; $($rest)* }
75   };
76   (@__identity $($tokens:tt)*) => {
77     $($tokens)*
78   };
79 }
80 
81 // TODO: make these generic over `mul_add`? Worth it?
82 
83 macro_rules! polynomial_2 {
84   ($x:expr, $c0:expr, $c1:expr, $c2:expr $(,)?) => {{
85     let x = $x;
86     let x2 = x * x;
87     x2.mul_add($c2, x.mul_add($c1, $c0))
88   }};
89 }
90 
91 macro_rules! polynomial_3 {
92   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr $(,)?) => {{
93     let x = $x;
94     let x2 = x * x;
95     $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0))
96   }};
97 }
98 
99 macro_rules! polynomial_4 {
100   ($x:expr, $c0:expr, $c1:expr, $c2:expr ,$c3:expr, $c4:expr $(,)?) => {{
101     let x = $x;
102     let x2 = x * x;
103     let x4 = x2 * x2;
104     $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0)) + $c4 * x4
105   }};
106 }
107 
108 macro_rules! polynomial_5 {
109   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
110     let x = $x;
111     let x2 = x * x;
112     let x4 = x2 * x2;
113     $c3
114       .mul_add(x, $c2)
115       .mul_add(x2, $c5.mul_add(x, $c4).mul_add(x4, $c1.mul_add(x, $c0)))
116   }};
117 }
118 
119 macro_rules! polynomial_5n {
120   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr $(,)?) => {{
121     let x = $x;
122     let x2 = x * x;
123     let x4 = x2 * x2;
124     x2.mul_add(x.mul_add($c3, $c2), (x4.mul_add($c4 + x, x.mul_add($c1, $c0))))
125   }};
126 }
127 
128 macro_rules! polynomial_6 {
129   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr ,$c6:expr $(,)?) => {{
130     let x = $x;
131     let x2 = x * x;
132     let x4 = x2 * x2;
133     x4.mul_add(
134       x2.mul_add($c6, x.mul_add($c5, $c4)),
135       x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
136     )
137   }};
138 }
139 
140 macro_rules! polynomial_6n {
141   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
142     let x = $x;
143     let x2 = x * x;
144     let x4 = x2 * x2;
145     x4.mul_add(
146       x.mul_add($c5, x2 + $c4),
147       x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
148     )
149   }};
150 }
151 
152 macro_rules! polynomial_8 {
153   ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr,  $c6:expr, $c7:expr, $c8:expr $(,)?) => {{
154     let x = $x;
155     let x2 = x * x;
156     let x4 = x2 * x2;
157     let x8 = x4 * x4;
158     x4.mul_add(
159       x2.mul_add($c7.mul_add(x, $c6), x.mul_add($c5, $c4)),
160       x8.mul_add($c8, x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0))),
161     )
162   }};
163 }
164 
165 macro_rules! polynomial_13 {
166   // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
167   ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
168     let x = $x;
169     let x2 = x * x;
170     let x4 = x2 * x2;
171     let x8 = x4 * x4;
172     x8.mul_add(
173       x4.mul_add(
174         x.mul_add($c13, $c12),
175         x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
176       ),
177       x4.mul_add(
178         x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
179         x2.mul_add(x.mul_add($c3, $c2), x),
180       ),
181     )
182   }};
183 }
184 
185 macro_rules! polynomial_13m {
186   // return  ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 +
187   // (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
188   ($x:expr,  $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr  $(,)?) => {{
189     let x = $x;
190     let x2 = x * x;
191     let x4 = x2 * x2;
192     let x8 = x4 * x4;
193 
194     x8.mul_add(
195       x4.mul_add(
196         x.mul_add($c13, $c12),
197         x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
198       ),
199       x4.mul_add(
200         x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
201         x2.mul_add(x.mul_add($c3, $c2), x),
202       ),
203     )
204   }};
205 }
206 
207 mod f32x8_;
208 pub use f32x8_::*;
209 
210 mod f32x4_;
211 pub use f32x4_::*;
212 
213 mod f64x4_;
214 pub use f64x4_::*;
215 
216 mod f64x2_;
217 pub use f64x2_::*;
218 
219 mod i8x16_;
220 pub use i8x16_::*;
221 
222 mod i16x16_;
223 pub use i16x16_::*;
224 
225 mod i8x32_;
226 pub use i8x32_::*;
227 
228 mod i16x8_;
229 pub use i16x8_::*;
230 
231 mod i32x4_;
232 pub use i32x4_::*;
233 
234 mod i32x8_;
235 pub use i32x8_::*;
236 
237 mod i64x2_;
238 pub use i64x2_::*;
239 
240 mod i64x4_;
241 pub use i64x4_::*;
242 
243 mod u8x16_;
244 pub use u8x16_::*;
245 
246 mod u16x8_;
247 pub use u16x8_::*;
248 
249 mod u32x4_;
250 pub use u32x4_::*;
251 
252 mod u32x8_;
253 pub use u32x8_::*;
254 
255 mod u64x2_;
256 pub use u64x2_::*;
257 
258 mod u64x4_;
259 pub use u64x4_::*;
260 
261 #[allow(non_camel_case_types)]
262 #[repr(C, align(16))]
263 #[rustfmt::skip]
264 union ConstUnionHack128bit {
265   f32a4: [f32; 4],
266   f64a2: [f64; 2],
267   i8a16: [i8; 16],
268   i16a8: [i16; 8],
269   i32a4: [i32; 4],
270   i64a2: [i64; 2],
271   u8a16: [u8; 16],
272   u16a8: [u16; 8],
273   u32a4: [u32; 4],
274   u64a2: [u64; 2],
275   f32x4: f32x4,
276   f64x2: f64x2,
277   i8x16: i8x16,
278   i16x8: i16x8,
279   i32x4: i32x4,
280   i64x2: i64x2,
281   u8x16: u8x16,
282   u16x8: u16x8,
283   u32x4: u32x4,
284   u64x2: u64x2,
285   u128:  u128,
286 }
287 
288 #[allow(non_camel_case_types)]
289 #[repr(C, align(16))]
290 #[rustfmt::skip]
291 union ConstUnionHack256bit {
292   f32a8:  [f32; 8],
293   f64a4:  [f64; 4],
294   i8a32:  [i8; 32],
295   i16a16: [i16; 16],
296   i32a8:  [i32; 8],
297   i64a4:  [i64; 4],
298   u8a32:  [u8; 32],
299   u16a16: [u16; 16],
300   u32a8:  [u32; 8],
301   u64a4:  [u64; 4],
302   u128x2: [u128; 2],
303   f32x8:  f32x8,
304   f64x4:  f64x4,
305   i8x32:  i8x32,
306   i16x16: i16x16,
307   i32x8:  i32x8,
308   i64x4:  i64x4,
309   // u8x32:  u8x32,
310   // u16x16: u16x16,
311   u32x8:  u32x8,
312   u64x4:  u64x4,
313 }
314 
315 #[allow(dead_code)]
generic_bit_blend<T>(mask: T, y: T, n: T) -> T where T: Copy + BitXor<Output = T> + BitAnd<Output = T>,316 fn generic_bit_blend<T>(mask: T, y: T, n: T) -> T
317 where
318   T: Copy + BitXor<Output = T> + BitAnd<Output = T>,
319 {
320   n ^ ((n ^ y) & mask)
321 }
322 
323 /// given `type.op(type)` and type is Copy, impls `type.op(&type)`
324 macro_rules! bulk_impl_op_ref_self_for {
325   ($(($op:ident, $method:ident) => [$($t:ty),+]),+ $(,)?) => {
326     $( // do each trait/list matching given
327       $( // do the current trait for each type in its list.
328         impl $op<&Self> for $t {
329           type Output = Self;
330           #[inline]
331           #[must_use]
332           fn $method(self, rhs: &Self) -> Self::Output {
333             self.$method(*rhs)
334           }
335         }
336       )+
337     )+
338   };
339 }
340 
341 bulk_impl_op_ref_self_for! {
342   (Add, add) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
343   (Sub, sub) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
344   (Mul, mul) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
345   (Div, div) => [f32x8, f32x4, f64x4, f64x2],
346   (BitAnd, bitand) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
347   (BitOr, bitor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
348   (BitXor, bitxor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
349 }
350 
351 /// given `type.op(rhs)` and type is Copy, impls `type.op_assign(rhs)`
352 macro_rules! bulk_impl_op_assign_for {
353   ($(($op:ident<$rhs:ty>, $method:ident, $method_assign:ident) => [$($t:ty),+]),+ $(,)?) => {
354     $( // do each trait/list matching given
355       $( // do the current trait for each type in its list.
356         impl $op<$rhs> for $t {
357           #[inline]
358           fn $method_assign(&mut self, rhs: $rhs) {
359             *self = self.$method(rhs);
360           }
361         }
362       )+
363     )+
364   };
365 }
366 
367 // Note: remember to update bulk_impl_op_ref_self_for first or this will give
368 // weird errors!
369 bulk_impl_op_assign_for! {
370   (AddAssign<Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
371   (AddAssign<&Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
372   (SubAssign<Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
373   (SubAssign<&Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
374   (MulAssign<Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
375   (MulAssign<&Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
376   (DivAssign<Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
377   (DivAssign<&Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
378   (BitAndAssign<Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
379   (BitAndAssign<&Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
380   (BitOrAssign<Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
381   (BitOrAssign<&Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
382   (BitXorAssign<Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
383   (BitXorAssign<&Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
384 }
385 
386 macro_rules! impl_simple_neg {
387   ($($t:ty),+ $(,)?) => {
388     $(
389       impl Neg for $t {
390         type Output = Self;
391         #[inline]
392         #[must_use]
393         fn neg(self) -> Self::Output {
394           Self::default() - self
395         }
396       }
397       impl Neg for &'_ $t {
398         type Output = $t;
399         #[inline]
400         #[must_use]
401         fn neg(self) -> Self::Output {
402           <$t>::default() - *self
403         }
404       }
405     )+
406   };
407 }
408 
409 impl_simple_neg! {
410   f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
411 }
412 
413 macro_rules! impl_simple_not {
414   ($($t:ty),+ $(,)?) => {
415     $(
416       impl Not for $t {
417         type Output = Self;
418         #[inline]
419         #[must_use]
420         fn not(self) -> Self::Output {
421           self ^ cast::<u128, $t>(u128::MAX)
422         }
423       }
424       impl Not for &'_ $t {
425         type Output = $t;
426         #[inline]
427         #[must_use]
428         fn not(self) -> Self::Output {
429           *self ^ cast::<u128, $t>(u128::MAX)
430         }
431       }
432     )+
433   };
434 }
435 
436 impl_simple_not! {
437   f32x4, i8x32, i8x16, i16x8, i16x16, i32x4, i64x2, u8x16, u16x8, u32x4, u64x2,
438 }
439 
440 macro_rules! impl_simple_sum {
441   ($($t:ty),+ $(,)?) => {
442     $(
443       impl<RHS> core::iter::Sum<RHS> for $t where $t: AddAssign<RHS> {
444         fn sum<I: Iterator<Item = RHS>>(iter: I) -> Self {
445           let mut total = Self::zeroed();
446           for val in iter {
447             total += val;
448           }
449           total
450         }
451       }
452     )+
453   };
454 }
455 
456 impl_simple_sum! {
457   f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
458 }
459 
460 macro_rules! impl_floating_product {
461   ($($t:ty),+ $(,)?) => {
462     $(
463       impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
464         fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
465           let mut total = Self::from(1.0);
466           for val in iter {
467             total *= val;
468           }
469           total
470         }
471       }
472     )+
473   };
474 }
475 
476 impl_floating_product! {
477   f32x8, f32x4, f64x4, f64x2
478 }
479 
480 macro_rules! impl_integer_product {
481   ($($t:ty),+ $(,)?) => {
482     $(
483       impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
484         fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
485           let mut total = Self::from(1);
486           for val in iter {
487             total *= val;
488           }
489           total
490         }
491       }
492     )+
493   };
494 }
495 
496 impl_integer_product! {
497   i16x8, i32x4, i32x8,
498 }
499 
500 /// impls `From<a> for b` by just calling `cast`
501 macro_rules! impl_from_a_for_b_with_cast {
502   ($(($arr:ty, $simd:ty)),+  $(,)?) => {
503     $(impl From<$arr> for $simd {
504       #[inline]
505       #[must_use]
506       fn from(arr: $arr) -> Self {
507         cast(arr)
508       }
509     }
510     impl From<$simd> for $arr {
511       #[inline]
512       #[must_use]
513       fn from(simd: $simd) -> Self {
514         cast(simd)
515       }
516     })+
517   };
518 }
519 
520 impl_from_a_for_b_with_cast! {
521   ([f32;8], f32x8),
522   ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
523   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
524   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
525 }
526 
527 macro_rules! impl_from_single_value {
528   ($(([$elem:ty;$len:expr], $simd:ty)),+  $(,)?) => {
529     $(impl From<$elem> for $simd {
530       /// Splats the single value given across all lanes.
531       #[inline]
532       #[must_use]
533       fn from(elem: $elem) -> Self {
534         cast([elem; $len])
535       }
536     }
537     impl $simd {
538       #[inline]
539       #[must_use]
540       pub fn splat(elem: $elem) -> $simd {
541         cast([elem; $len])
542       }
543     })+
544   };
545 }
546 
547 impl_from_single_value! {
548   ([f32;8], f32x8),
549   ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
550   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
551   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
552 }
553 
554 /// formatter => [(arr, simd)+],+
555 macro_rules! impl_formatter_for {
556   ($($trait:ident => [$(($arr:ty, $simd:ty)),+]),+ $(,)?) => {
557     $( // do per trait
558       $( // do per simd type
559         impl $trait for $simd {
560           fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
561             let a: $arr = cast(*self);
562             write!(f, "(")?;
563             for (x, a_ref) in a.iter().enumerate() {
564               if x > 0 {
565                 write!(f, ", ")?;
566               }
567               $trait::fmt(a_ref, f)?;
568             }
569             write!(f, ")")
570           }
571         }
572       )+
573     )+
574   }
575 }
576 
577 impl_formatter_for! {
578   Binary => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
579   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
580   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
581   Debug => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
582   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
583   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
584   Display => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
585   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
586   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
587   LowerExp => [([f32;8], f32x8), ([f32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
588   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
589   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
590   LowerHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
591   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
592   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
593   Octal => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
594   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
595   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
596   UpperExp => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
597   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
598   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
599   UpperHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
600   ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
601   ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
602 }
603 
604 // With const generics this could be simplified I hope
605 macro_rules! from_array {
606   ($ty:ty,$dst:ty,$dst_wide:ident,32) => {
607     impl From<&[$ty]> for $dst_wide {
608       fn from(src: &[$ty]) -> $dst_wide {
609         match src.len() {
610           32 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst, src[31] as $dst,]),
611           31 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst,0 as $dst,]),
612           30 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst,0 as $dst,0 as $dst,]),
613           29 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
614           28 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
615           27 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
616           26 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
617           25 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
618           24 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
619           23 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
620           22 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
621           21 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
622           20 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
623           19 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
624           18 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
625           17 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
626           16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
627           15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
628           14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
629           13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
630           12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
631           11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
632           10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
633           9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
634           8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
635           7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
636           6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
637           5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
638           4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
639           3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
640           2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
641           1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
642           _ => panic!(
643             "Converting from an array larger than what can be stored in $dst_wide"
644           ),
645         }
646       }
647     }
648   };
649   ($ty:ty,$dst:ty,$dst_wide:ident,16) => {
650     impl From<&[$ty]> for $dst_wide {
651       fn from(src: &[$ty]) -> $dst_wide {
652         match src.len() {
653           16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,]),
654           15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,]),
655           14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,]),
656           13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
657           12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
658           11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
659           10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
660           9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
661           8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
662           7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
663           6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
664           5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
665           4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
666           3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
667           2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
668           1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
669           _ => panic!(
670             "Converting from an array larger than what can be stored in $dst_wide"
671           ),
672         }
673       }
674     }
675   };
676   ($ty:ty,$dst:ty,$dst_wide:ident,8) => {
677     impl From<&[$ty]> for $dst_wide {
678       fn from(src: &[$ty]) -> $dst_wide {
679         match src.len() {
680           8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,]),
681           7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,]),
682           6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,]),
683           5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
684           4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
685           3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
686           2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
687           1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
688           0 => $dst_wide::from([0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
689           _ => panic!(
690             "Converting from an array larger than what can be stored in $dst_wide"
691           ),
692         }
693       }
694     }
695   };
696   ($ty:ty,$dst:ty,$dst_wide:ident,4) => {
697     impl From<&[$ty]> for $dst_wide {
698       fn from(src: &[$ty]) -> $dst_wide {
699         match src.len() {
700           4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,]),
701           3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,]),
702           2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,]),
703           1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
704           _ => panic!(
705             "Converting from an array larger than what can be stored in $dst_wide"
706           ),
707         }
708       }
709     }
710   };
711 }
712 
713 from_array!(i8, i8, i8x32, 32);
714 from_array!(i8, i8, i8x16, 16);
715 from_array!(i8, i32, i32x8, 8);
716 from_array!(u8, u8, u8x16, 16);
717 from_array!(i16, i16, i16x16, 16);
718 from_array!(i32, i32, i32x8, 8);
719 from_array!(f32, f32, f32x8, 8);
720 from_array!(f32, f32, f32x4, 4);
721 from_array!(f64, f64, f64x4, 4);
722 from_array!(u64, u64, u64x4, 4);
723 from_array!(i64, i64, i64x4, 4);
724 
725 #[allow(unused)]
software_sqrt(x: f64) -> f64726 fn software_sqrt(x: f64) -> f64 {
727   use core::num::Wrapping;
728   type wu32 = Wrapping<u32>;
729   const fn w(u: u32) -> wu32 {
730     Wrapping(u)
731   }
732   let mut z: f64;
733   let sign: wu32 = w(0x80000000);
734   let mut ix0: i32;
735   let mut s0: i32;
736   let mut q: i32;
737   let mut m: i32;
738   let mut t: i32;
739   let mut i: i32;
740   let mut r: wu32;
741   let mut t1: wu32;
742   let mut s1: wu32;
743   let mut ix1: wu32;
744   let mut q1: wu32;
745   // extract data
746   {
747     let [low, high]: [u32; 2] = cast(x);
748     ix0 = high as i32;
749     ix1 = w(low);
750   }
751   // inf and nan
752   {
753     if x.is_nan() {
754       return f64::NAN;
755     }
756     if ix0 & 0x7ff00000 == 0x7ff00000 {
757       return x * x + x;
758     }
759   }
760   // handle zero
761   {
762     if ix0 <= 0 {
763       if ((ix0 & (!sign).0 as i32) | (ix1.0 as i32)) == 0 {
764         return x;
765       } else if ix0 < 0 {
766         return (x - x) / (x - x);
767       }
768     }
769   }
770   // normalize
771   {
772     m = ix0 >> 20;
773     if m == 0 {
774       // subnormal
775       while ix0 == 0 {
776         m -= 21;
777         ix0 |= (ix1 >> 11).0 as i32;
778         ix1 <<= 21;
779       }
780       i = 0;
781       while ix0 & 0x00100000 == 0 {
782         ix0 <<= 1;
783         i += 1;
784       }
785       m -= i - 1;
786       ix0 |= (ix1.0 >> (31 - i)) as i32;
787       ix1 <<= i as usize;
788     }
789     // un-bias exponent
790     m -= 1023;
791     ix0 = (ix0 & 0x000fffff) | 0x00100000;
792     if (m & 1) != 0 {
793       // odd m, double the input to make it even
794       ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
795       ix1 += ix1;
796     }
797     m >>= 1;
798   }
799   // generate sqrt bit by bit
800   {
801     ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
802     ix1 += ix1;
803     // q and q1 store the sqrt(x);
804     q = 0;
805     q1 = w(0);
806     s0 = 0;
807     s1 = w(0);
808     // our bit that moves from right to left
809     r = w(0x00200000);
810     while r != w(0) {
811       t = s0 + (r.0 as i32);
812       if t <= ix0 {
813         s0 = t + (r.0 as i32);
814         ix0 -= t;
815         q += (r.0 as i32);
816       }
817       ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
818       ix1 += ix1;
819       r >>= 1;
820     }
821     r = sign;
822     while r != w(0) {
823       t1 = s1 + r;
824       t = s0;
825       if (t < ix0) || ((t == ix0) && (t1 <= ix1)) {
826         s1 = t1 + r;
827         if t1 & sign == sign && (s1 & sign) == w(0) {
828           s0 += 1;
829         }
830         ix0 -= t;
831         if ix1 < t1 {
832           ix0 -= 1;
833         }
834         ix1 -= t1;
835         q1 += r;
836       }
837       ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
838       ix1 += ix1;
839       r >>= 1;
840     }
841   }
842   // use floating add to find out rounding direction
843   {
844     if ix0 | (ix1.0 as i32) != 0 {
845       z = 1.0 - 1.0e-300;
846       if z >= 1.0 {
847         z = 1.0 + 1.0e-300;
848         if q1 == w(0xffffffff) {
849           q1 = w(0);
850           q += 1;
851         } else if z > 1.0 {
852           if q1 == w(0xfffffffe) {
853             q += 1;
854           }
855           q1 += w(2);
856         } else {
857           q1 += q1 & w(1);
858         }
859       }
860     }
861   }
862   // finish up
863   ix0 = (q >> 1) + 0x3fe00000;
864   ix1 = q1 >> 1;
865   if q & 1 == 1 {
866     ix1 |= sign;
867   }
868   ix0 += m << 20;
869 
870   cast::<[u32; 2], f64>([ix1.0, ix0 as u32])
871 }
872 
873 #[test]
test_software_sqrt()874 fn test_software_sqrt() {
875   assert!(software_sqrt(f64::NAN).is_nan());
876   assert_eq!(software_sqrt(f64::INFINITY), f64::INFINITY);
877   assert_eq!(software_sqrt(0.0), 0.0);
878   assert_eq!(software_sqrt(-0.0), -0.0);
879   assert!(software_sqrt(-1.0).is_nan());
880   assert!(software_sqrt(f64::NEG_INFINITY).is_nan());
881   assert_eq!(software_sqrt(4.0), 2.0);
882   assert_eq!(software_sqrt(9.0), 3.0);
883   assert_eq!(software_sqrt(16.0), 4.0);
884   assert_eq!(software_sqrt(25.0), 5.0);
885   assert_eq!(software_sqrt(5000.0 * 5000.0), 5000.0);
886 }
887 
888 pub trait CmpEq<Rhs = Self> {
889   type Output;
cmp_eq(self, rhs: Rhs) -> Self::Output890   fn cmp_eq(self, rhs: Rhs) -> Self::Output;
891 }
892 
893 pub trait CmpGt<Rhs = Self> {
894   type Output;
cmp_gt(self, rhs: Rhs) -> Self::Output895   fn cmp_gt(self, rhs: Rhs) -> Self::Output;
896 }
897 
898 pub trait CmpGe<Rhs = Self> {
899   type Output;
cmp_ge(self, rhs: Rhs) -> Self::Output900   fn cmp_ge(self, rhs: Rhs) -> Self::Output;
901 }
902 
903 pub trait CmpNe<Rhs = Self> {
904   type Output;
cmp_ne(self, rhs: Rhs) -> Self::Output905   fn cmp_ne(self, rhs: Rhs) -> Self::Output;
906 }
907 
908 pub trait CmpLt<Rhs = Self> {
909   type Output;
cmp_lt(self, rhs: Rhs) -> Self::Output910   fn cmp_lt(self, rhs: Rhs) -> Self::Output;
911 }
912 
913 pub trait CmpLe<Rhs = Self> {
914   type Output;
cmp_le(self, rhs: Rhs) -> Self::Output915   fn cmp_le(self, rhs: Rhs) -> Self::Output;
916 }
917 
918 macro_rules! bulk_impl_const_rhs_op {
919   (($op:ident,$method:ident) => [$(($lhs:ty,$rhs:ty),)+]) => {
920     $(
921     impl $op<$rhs> for $lhs {
922       type Output = Self;
923       #[inline]
924       #[must_use]
925       fn $method(self, rhs: $rhs) -> Self::Output {
926         self.$method(<$lhs>::splat(rhs))
927       }
928     }
929     )+
930   };
931 }
932 
933 bulk_impl_const_rhs_op!((CmpEq, cmp_eq) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
934 bulk_impl_const_rhs_op!((CmpLt, cmp_lt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
935 bulk_impl_const_rhs_op!((CmpGt, cmp_gt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
936 bulk_impl_const_rhs_op!((CmpNe, cmp_ne) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
937 bulk_impl_const_rhs_op!((CmpLe, cmp_le) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
938 bulk_impl_const_rhs_op!((CmpGe, cmp_ge) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
939