1 #![no_std]
2 #![allow(non_camel_case_types)]
3
4 //! A crate to help you go wide.
5 //!
6 //! This crate provides SIMD-compatible data types.
7 //!
8 //! When possible, explicit SIMD is used with all the math operations here. As a
9 //! fallback, the fact that all the lengths of a fixed length array are doing
10 //! the same thing will often make LLVM notice that it should use SIMD
11 //! instructions to complete the task. In the worst case, the code just becomes
12 //! totally scalar (though the math is still correct, at least).
13 //!
14 //! ## Crate Features
15 //!
16 //! * `std`: This causes the feature to link to `std`.
17 //! * Currently this just improves the performance of `sqrt` when an explicit
18 //! SIMD `sqrt` isn't available.
19
20 // Note(Lokathor): Due to standard library magic, the std-only methods for f32
21 // and f64 will automatically be available simply by declaring this.
22
23 // TODO
24 // Add/Sub/Mul/Div with constant
25 // Shuffle left/right/by index
26
27 #[cfg(feature = "std")]
28 extern crate std;
29
30 use core::{
31 fmt::{
32 Binary, Debug, Display, LowerExp, LowerHex, Octal, UpperExp, UpperHex,
33 },
34 ops::*,
35 };
36
37 #[allow(unused_imports)]
38 use safe_arch::*;
39
40 use bytemuck::*;
41
42 #[macro_use]
43 mod macros;
44
45 macro_rules! pick {
46 ($(if #[cfg($($test:meta),*)] {
47 $($if_tokens:tt)*
48 })else+ else {
49 $($else_tokens:tt)*
50 }) => {
51 pick!{
52 @__forests [ ] ;
53 $( [ {$($test),*} {$($if_tokens)*} ], )*
54 [ { } {$($else_tokens)*} ],
55 }
56 };
57 (if #[cfg($($if_meta:meta),*)] {
58 $($if_tokens:tt)*
59 } $(else if #[cfg($($else_meta:meta),*)] {
60 $($else_tokens:tt)*
61 })*) => {
62 pick!{
63 @__forests [ ] ;
64 [ {$($if_meta),*} {$($if_tokens)*} ],
65 $( [ {$($else_meta),*} {$($else_tokens)*} ], )*
66 }
67 };
68 (@__forests [$($not:meta,)*];) => {
69 /* halt expansion */
70 };
71 (@__forests [$($not:meta,)*]; [{$($m:meta),*} {$($tokens:tt)*}], $($rest:tt)*) => {
72 #[cfg(all( $($m,)* not(any($($not),*)) ))]
73 pick!{ @__identity $($tokens)* }
74 pick!{ @__forests [ $($not,)* $($m,)* ] ; $($rest)* }
75 };
76 (@__identity $($tokens:tt)*) => {
77 $($tokens)*
78 };
79 }
80
81 // TODO: make these generic over `mul_add`? Worth it?
82
83 macro_rules! polynomial_2 {
84 ($x:expr, $c0:expr, $c1:expr, $c2:expr $(,)?) => {{
85 let x = $x;
86 let x2 = x * x;
87 x2.mul_add($c2, x.mul_add($c1, $c0))
88 }};
89 }
90
91 macro_rules! polynomial_3 {
92 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr $(,)?) => {{
93 let x = $x;
94 let x2 = x * x;
95 $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0))
96 }};
97 }
98
99 macro_rules! polynomial_4 {
100 ($x:expr, $c0:expr, $c1:expr, $c2:expr ,$c3:expr, $c4:expr $(,)?) => {{
101 let x = $x;
102 let x2 = x * x;
103 let x4 = x2 * x2;
104 $c3.mul_add(x, $c2).mul_add(x2, $c1.mul_add(x, $c0)) + $c4 * x4
105 }};
106 }
107
108 macro_rules! polynomial_5 {
109 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
110 let x = $x;
111 let x2 = x * x;
112 let x4 = x2 * x2;
113 $c3
114 .mul_add(x, $c2)
115 .mul_add(x2, $c5.mul_add(x, $c4).mul_add(x4, $c1.mul_add(x, $c0)))
116 }};
117 }
118
119 macro_rules! polynomial_5n {
120 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr $(,)?) => {{
121 let x = $x;
122 let x2 = x * x;
123 let x4 = x2 * x2;
124 x2.mul_add(x.mul_add($c3, $c2), (x4.mul_add($c4 + x, x.mul_add($c1, $c0))))
125 }};
126 }
127
128 macro_rules! polynomial_6 {
129 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr ,$c6:expr $(,)?) => {{
130 let x = $x;
131 let x2 = x * x;
132 let x4 = x2 * x2;
133 x4.mul_add(
134 x2.mul_add($c6, x.mul_add($c5, $c4)),
135 x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
136 )
137 }};
138 }
139
140 macro_rules! polynomial_6n {
141 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr $(,)?) => {{
142 let x = $x;
143 let x2 = x * x;
144 let x4 = x2 * x2;
145 x4.mul_add(
146 x.mul_add($c5, x2 + $c4),
147 x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0)),
148 )
149 }};
150 }
151
152 macro_rules! polynomial_8 {
153 ($x:expr, $c0:expr, $c1:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr, $c6:expr, $c7:expr, $c8:expr $(,)?) => {{
154 let x = $x;
155 let x2 = x * x;
156 let x4 = x2 * x2;
157 let x8 = x4 * x4;
158 x4.mul_add(
159 x2.mul_add($c7.mul_add(x, $c6), x.mul_add($c5, $c4)),
160 x8.mul_add($c8, x2.mul_add(x.mul_add($c3, $c2), x.mul_add($c1, $c0))),
161 )
162 }};
163 }
164
165 macro_rules! polynomial_13 {
166 // calculates polynomial c13*x^13 + c12*x^12 + ... + c1*x + c0
167 ($x:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr $(,)?) => {{
168 let x = $x;
169 let x2 = x * x;
170 let x4 = x2 * x2;
171 let x8 = x4 * x4;
172 x8.mul_add(
173 x4.mul_add(
174 x.mul_add($c13, $c12),
175 x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
176 ),
177 x4.mul_add(
178 x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
179 x2.mul_add(x.mul_add($c3, $c2), x),
180 ),
181 )
182 }};
183 }
184
185 macro_rules! polynomial_13m {
186 // return ((c8+c9*x) + (c10+c11*x)*x2 + (c12+c13*x)*x4)*x8 + (((c6+c7*x)*x2 +
187 // (c4+c5*x))*x4 + ((c2+c3*x)*x2 + x));
188 ($x:expr, $c2:expr, $c3:expr, $c4:expr, $c5:expr,$c6:expr, $c7:expr, $c8:expr,$c9:expr, $c10:expr, $c11:expr, $c12:expr, $c13:expr $(,)?) => {{
189 let x = $x;
190 let x2 = x * x;
191 let x4 = x2 * x2;
192 let x8 = x4 * x4;
193
194 x8.mul_add(
195 x4.mul_add(
196 x.mul_add($c13, $c12),
197 x2.mul_add(x.mul_add($c11, $c10), x.mul_add($c9, $c8)),
198 ),
199 x4.mul_add(
200 x2.mul_add(x.mul_add($c7, $c6), x.mul_add($c5, $c4)),
201 x2.mul_add(x.mul_add($c3, $c2), x),
202 ),
203 )
204 }};
205 }
206
207 mod f32x8_;
208 pub use f32x8_::*;
209
210 mod f32x4_;
211 pub use f32x4_::*;
212
213 mod f64x4_;
214 pub use f64x4_::*;
215
216 mod f64x2_;
217 pub use f64x2_::*;
218
219 mod i8x16_;
220 pub use i8x16_::*;
221
222 mod i16x16_;
223 pub use i16x16_::*;
224
225 mod i8x32_;
226 pub use i8x32_::*;
227
228 mod i16x8_;
229 pub use i16x8_::*;
230
231 mod i32x4_;
232 pub use i32x4_::*;
233
234 mod i32x8_;
235 pub use i32x8_::*;
236
237 mod i64x2_;
238 pub use i64x2_::*;
239
240 mod i64x4_;
241 pub use i64x4_::*;
242
243 mod u8x16_;
244 pub use u8x16_::*;
245
246 mod u16x8_;
247 pub use u16x8_::*;
248
249 mod u32x4_;
250 pub use u32x4_::*;
251
252 mod u32x8_;
253 pub use u32x8_::*;
254
255 mod u64x2_;
256 pub use u64x2_::*;
257
258 mod u64x4_;
259 pub use u64x4_::*;
260
261 #[allow(non_camel_case_types)]
262 #[repr(C, align(16))]
263 #[rustfmt::skip]
264 union ConstUnionHack128bit {
265 f32a4: [f32; 4],
266 f64a2: [f64; 2],
267 i8a16: [i8; 16],
268 i16a8: [i16; 8],
269 i32a4: [i32; 4],
270 i64a2: [i64; 2],
271 u8a16: [u8; 16],
272 u16a8: [u16; 8],
273 u32a4: [u32; 4],
274 u64a2: [u64; 2],
275 f32x4: f32x4,
276 f64x2: f64x2,
277 i8x16: i8x16,
278 i16x8: i16x8,
279 i32x4: i32x4,
280 i64x2: i64x2,
281 u8x16: u8x16,
282 u16x8: u16x8,
283 u32x4: u32x4,
284 u64x2: u64x2,
285 u128: u128,
286 }
287
288 #[allow(non_camel_case_types)]
289 #[repr(C, align(16))]
290 #[rustfmt::skip]
291 union ConstUnionHack256bit {
292 f32a8: [f32; 8],
293 f64a4: [f64; 4],
294 i8a32: [i8; 32],
295 i16a16: [i16; 16],
296 i32a8: [i32; 8],
297 i64a4: [i64; 4],
298 u8a32: [u8; 32],
299 u16a16: [u16; 16],
300 u32a8: [u32; 8],
301 u64a4: [u64; 4],
302 u128x2: [u128; 2],
303 f32x8: f32x8,
304 f64x4: f64x4,
305 i8x32: i8x32,
306 i16x16: i16x16,
307 i32x8: i32x8,
308 i64x4: i64x4,
309 // u8x32: u8x32,
310 // u16x16: u16x16,
311 u32x8: u32x8,
312 u64x4: u64x4,
313 }
314
315 #[allow(dead_code)]
generic_bit_blend<T>(mask: T, y: T, n: T) -> T where T: Copy + BitXor<Output = T> + BitAnd<Output = T>,316 fn generic_bit_blend<T>(mask: T, y: T, n: T) -> T
317 where
318 T: Copy + BitXor<Output = T> + BitAnd<Output = T>,
319 {
320 n ^ ((n ^ y) & mask)
321 }
322
323 /// given `type.op(type)` and type is Copy, impls `type.op(&type)`
324 macro_rules! bulk_impl_op_ref_self_for {
325 ($(($op:ident, $method:ident) => [$($t:ty),+]),+ $(,)?) => {
326 $( // do each trait/list matching given
327 $( // do the current trait for each type in its list.
328 impl $op<&Self> for $t {
329 type Output = Self;
330 #[inline]
331 #[must_use]
332 fn $method(self, rhs: &Self) -> Self::Output {
333 self.$method(*rhs)
334 }
335 }
336 )+
337 )+
338 };
339 }
340
341 bulk_impl_op_ref_self_for! {
342 (Add, add) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
343 (Sub, sub) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
344 (Mul, mul) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
345 (Div, div) => [f32x8, f32x4, f64x4, f64x2],
346 (BitAnd, bitand) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
347 (BitOr, bitor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
348 (BitXor, bitxor) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
349 }
350
351 /// given `type.op(rhs)` and type is Copy, impls `type.op_assign(rhs)`
352 macro_rules! bulk_impl_op_assign_for {
353 ($(($op:ident<$rhs:ty>, $method:ident, $method_assign:ident) => [$($t:ty),+]),+ $(,)?) => {
354 $( // do each trait/list matching given
355 $( // do the current trait for each type in its list.
356 impl $op<$rhs> for $t {
357 #[inline]
358 fn $method_assign(&mut self, rhs: $rhs) {
359 *self = self.$method(rhs);
360 }
361 }
362 )+
363 )+
364 };
365 }
366
367 // Note: remember to update bulk_impl_op_ref_self_for first or this will give
368 // weird errors!
369 bulk_impl_op_assign_for! {
370 (AddAssign<Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
371 (AddAssign<&Self>, add, add_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
372 (SubAssign<Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
373 (SubAssign<&Self>, sub, sub_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
374 (MulAssign<Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
375 (MulAssign<&Self>, mul, mul_assign) => [f32x8, f32x4, f64x4, f64x2, i16x8, i16x16, i32x8, i32x4],
376 (DivAssign<Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
377 (DivAssign<&Self>, div, div_assign) => [f32x8, f32x4, f64x4, f64x2],
378 (BitAndAssign<Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
379 (BitAndAssign<&Self>, bitand, bitand_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
380 (BitOrAssign<Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
381 (BitOrAssign<&Self>, bitor, bitor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
382 (BitXorAssign<Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
383 (BitXorAssign<&Self>, bitxor, bitxor_assign) => [f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x4, u64x2],
384 }
385
386 macro_rules! impl_simple_neg {
387 ($($t:ty),+ $(,)?) => {
388 $(
389 impl Neg for $t {
390 type Output = Self;
391 #[inline]
392 #[must_use]
393 fn neg(self) -> Self::Output {
394 Self::default() - self
395 }
396 }
397 impl Neg for &'_ $t {
398 type Output = $t;
399 #[inline]
400 #[must_use]
401 fn neg(self) -> Self::Output {
402 <$t>::default() - *self
403 }
404 }
405 )+
406 };
407 }
408
409 impl_simple_neg! {
410 f32x8, f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
411 }
412
413 macro_rules! impl_simple_not {
414 ($($t:ty),+ $(,)?) => {
415 $(
416 impl Not for $t {
417 type Output = Self;
418 #[inline]
419 #[must_use]
420 fn not(self) -> Self::Output {
421 self ^ cast::<u128, $t>(u128::MAX)
422 }
423 }
424 impl Not for &'_ $t {
425 type Output = $t;
426 #[inline]
427 #[must_use]
428 fn not(self) -> Self::Output {
429 *self ^ cast::<u128, $t>(u128::MAX)
430 }
431 }
432 )+
433 };
434 }
435
436 impl_simple_not! {
437 f32x4, i8x32, i8x16, i16x8, i16x16, i32x4, i64x2, u8x16, u16x8, u32x4, u64x2,
438 }
439
440 macro_rules! impl_simple_sum {
441 ($($t:ty),+ $(,)?) => {
442 $(
443 impl<RHS> core::iter::Sum<RHS> for $t where $t: AddAssign<RHS> {
444 fn sum<I: Iterator<Item = RHS>>(iter: I) -> Self {
445 let mut total = Self::zeroed();
446 for val in iter {
447 total += val;
448 }
449 total
450 }
451 }
452 )+
453 };
454 }
455
456 impl_simple_sum! {
457 f32x4, f64x4, f64x2, i8x32, i8x16, i16x8, i16x16, i32x8, i32x4, i64x4, i64x2, u8x16, u16x8, u32x8, u32x4, u64x2, u64x4
458 }
459
460 macro_rules! impl_floating_product {
461 ($($t:ty),+ $(,)?) => {
462 $(
463 impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
464 fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
465 let mut total = Self::from(1.0);
466 for val in iter {
467 total *= val;
468 }
469 total
470 }
471 }
472 )+
473 };
474 }
475
476 impl_floating_product! {
477 f32x8, f32x4, f64x4, f64x2
478 }
479
480 macro_rules! impl_integer_product {
481 ($($t:ty),+ $(,)?) => {
482 $(
483 impl<RHS> core::iter::Product<RHS> for $t where $t: MulAssign<RHS> {
484 fn product<I: Iterator<Item = RHS>>(iter: I) -> Self {
485 let mut total = Self::from(1);
486 for val in iter {
487 total *= val;
488 }
489 total
490 }
491 }
492 )+
493 };
494 }
495
496 impl_integer_product! {
497 i16x8, i32x4, i32x8,
498 }
499
500 /// impls `From<a> for b` by just calling `cast`
501 macro_rules! impl_from_a_for_b_with_cast {
502 ($(($arr:ty, $simd:ty)),+ $(,)?) => {
503 $(impl From<$arr> for $simd {
504 #[inline]
505 #[must_use]
506 fn from(arr: $arr) -> Self {
507 cast(arr)
508 }
509 }
510 impl From<$simd> for $arr {
511 #[inline]
512 #[must_use]
513 fn from(simd: $simd) -> Self {
514 cast(simd)
515 }
516 })+
517 };
518 }
519
520 impl_from_a_for_b_with_cast! {
521 ([f32;8], f32x8),
522 ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
523 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
524 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
525 }
526
527 macro_rules! impl_from_single_value {
528 ($(([$elem:ty;$len:expr], $simd:ty)),+ $(,)?) => {
529 $(impl From<$elem> for $simd {
530 /// Splats the single value given across all lanes.
531 #[inline]
532 #[must_use]
533 fn from(elem: $elem) -> Self {
534 cast([elem; $len])
535 }
536 }
537 impl $simd {
538 #[inline]
539 #[must_use]
540 pub fn splat(elem: $elem) -> $simd {
541 cast([elem; $len])
542 }
543 })+
544 };
545 }
546
547 impl_from_single_value! {
548 ([f32;8], f32x8),
549 ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
550 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2), ([i64;4], i64x4),
551 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2), ([u64;4], u64x4),
552 }
553
554 /// formatter => [(arr, simd)+],+
555 macro_rules! impl_formatter_for {
556 ($($trait:ident => [$(($arr:ty, $simd:ty)),+]),+ $(,)?) => {
557 $( // do per trait
558 $( // do per simd type
559 impl $trait for $simd {
560 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
561 let a: $arr = cast(*self);
562 write!(f, "(")?;
563 for (x, a_ref) in a.iter().enumerate() {
564 if x > 0 {
565 write!(f, ", ")?;
566 }
567 $trait::fmt(a_ref, f)?;
568 }
569 write!(f, ")")
570 }
571 }
572 )+
573 )+
574 }
575 }
576
577 impl_formatter_for! {
578 Binary => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
579 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
580 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
581 Debug => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
582 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
583 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
584 Display => [([f32;8], f32x8), ([f32;4], f32x4), ([f64;4], f64x4), ([f64;2], f64x2),
585 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
586 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
587 LowerExp => [([f32;8], f32x8), ([f32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
588 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
589 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
590 LowerHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
591 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
592 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
593 Octal => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
594 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
595 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
596 UpperExp => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
597 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
598 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
599 UpperHex => [([u32;8], f32x8), ([u32;4], f32x4), ([u64;4], f64x4), ([u64;2], f64x2),
600 ([i8;32], i8x32), ([i8;16], i8x16), ([i16;8], i16x8), ([i16;16], i16x16), ([i32;8], i32x8), ([i32;4], i32x4), ([i64;2], i64x2),([i64;4], i64x4),
601 ([u8;16], u8x16), ([u16;8], u16x8), ([u32;8], u32x8), ([u32;4], u32x4), ([u64;2], u64x2),([u64;4], u64x4)],
602 }
603
604 // With const generics this could be simplified I hope
605 macro_rules! from_array {
606 ($ty:ty,$dst:ty,$dst_wide:ident,32) => {
607 impl From<&[$ty]> for $dst_wide {
608 fn from(src: &[$ty]) -> $dst_wide {
609 match src.len() {
610 32 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst, src[31] as $dst,]),
611 31 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst, src[30] as $dst,0 as $dst,]),
612 30 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst, src[29] as $dst,0 as $dst,0 as $dst,]),
613 29 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst, src[28] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
614 28 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst, src[27] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
615 27 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst, src[26] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
616 26 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst, src[25] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
617 25 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst, src[24] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
618 24 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst, src[23] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
619 23 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst, src[22] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
620 22 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst, src[21] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
621 21 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst, src[20] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
622 20 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst, src[19] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
623 19 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst, src[18] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
624 18 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst, src[17] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
625 17 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst, src[16] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
626 16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
627 15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
628 14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
629 13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
630 12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
631 11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
632 10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
633 9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
634 8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
635 7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
636 6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
637 5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
638 4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
639 3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
640 2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
641 1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
642 _ => panic!(
643 "Converting from an array larger than what can be stored in $dst_wide"
644 ),
645 }
646 }
647 }
648 };
649 ($ty:ty,$dst:ty,$dst_wide:ident,16) => {
650 impl From<&[$ty]> for $dst_wide {
651 fn from(src: &[$ty]) -> $dst_wide {
652 match src.len() {
653 16 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst, src[15] as $dst,]),
654 15 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst, src[14] as $dst,0 as $dst,]),
655 14 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst, src[13] as $dst,0 as $dst,0 as $dst,]),
656 13 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst, src[12] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
657 12 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst, src[11] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
658 11 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst, src[10] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
659 10 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst, src[9] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
660 9 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst, src[8] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
661 8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
662 7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
663 6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
664 5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
665 4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
666 3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
667 2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
668 1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
669 _ => panic!(
670 "Converting from an array larger than what can be stored in $dst_wide"
671 ),
672 }
673 }
674 }
675 };
676 ($ty:ty,$dst:ty,$dst_wide:ident,8) => {
677 impl From<&[$ty]> for $dst_wide {
678 fn from(src: &[$ty]) -> $dst_wide {
679 match src.len() {
680 8 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst, src[7] as $dst,]),
681 7 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst, src[6] as $dst,0 as $dst,]),
682 6 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst, src[5] as $dst,0 as $dst,0 as $dst,]),
683 5 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst, src[4] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
684 4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
685 3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
686 2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
687 1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
688 0 => $dst_wide::from([0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,0 as $dst,]),
689 _ => panic!(
690 "Converting from an array larger than what can be stored in $dst_wide"
691 ),
692 }
693 }
694 }
695 };
696 ($ty:ty,$dst:ty,$dst_wide:ident,4) => {
697 impl From<&[$ty]> for $dst_wide {
698 fn from(src: &[$ty]) -> $dst_wide {
699 match src.len() {
700 4 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst, src[3] as $dst,]),
701 3 => $dst_wide::from([src[0] as $dst, src[1] as $dst, src[2] as $dst,0 as $dst,]),
702 2 => $dst_wide::from([src[0] as $dst, src[1] as $dst,0 as $dst,0 as $dst,]),
703 1 => $dst_wide::from([src[0] as $dst,0 as $dst,0 as $dst,0 as $dst,]),
704 _ => panic!(
705 "Converting from an array larger than what can be stored in $dst_wide"
706 ),
707 }
708 }
709 }
710 };
711 }
712
713 from_array!(i8, i8, i8x32, 32);
714 from_array!(i8, i8, i8x16, 16);
715 from_array!(i8, i32, i32x8, 8);
716 from_array!(u8, u8, u8x16, 16);
717 from_array!(i16, i16, i16x16, 16);
718 from_array!(i32, i32, i32x8, 8);
719 from_array!(f32, f32, f32x8, 8);
720 from_array!(f32, f32, f32x4, 4);
721 from_array!(f64, f64, f64x4, 4);
722 from_array!(u64, u64, u64x4, 4);
723 from_array!(i64, i64, i64x4, 4);
724
725 #[allow(unused)]
software_sqrt(x: f64) -> f64726 fn software_sqrt(x: f64) -> f64 {
727 use core::num::Wrapping;
728 type wu32 = Wrapping<u32>;
729 const fn w(u: u32) -> wu32 {
730 Wrapping(u)
731 }
732 let mut z: f64;
733 let sign: wu32 = w(0x80000000);
734 let mut ix0: i32;
735 let mut s0: i32;
736 let mut q: i32;
737 let mut m: i32;
738 let mut t: i32;
739 let mut i: i32;
740 let mut r: wu32;
741 let mut t1: wu32;
742 let mut s1: wu32;
743 let mut ix1: wu32;
744 let mut q1: wu32;
745 // extract data
746 {
747 let [low, high]: [u32; 2] = cast(x);
748 ix0 = high as i32;
749 ix1 = w(low);
750 }
751 // inf and nan
752 {
753 if x.is_nan() {
754 return f64::NAN;
755 }
756 if ix0 & 0x7ff00000 == 0x7ff00000 {
757 return x * x + x;
758 }
759 }
760 // handle zero
761 {
762 if ix0 <= 0 {
763 if ((ix0 & (!sign).0 as i32) | (ix1.0 as i32)) == 0 {
764 return x;
765 } else if ix0 < 0 {
766 return (x - x) / (x - x);
767 }
768 }
769 }
770 // normalize
771 {
772 m = ix0 >> 20;
773 if m == 0 {
774 // subnormal
775 while ix0 == 0 {
776 m -= 21;
777 ix0 |= (ix1 >> 11).0 as i32;
778 ix1 <<= 21;
779 }
780 i = 0;
781 while ix0 & 0x00100000 == 0 {
782 ix0 <<= 1;
783 i += 1;
784 }
785 m -= i - 1;
786 ix0 |= (ix1.0 >> (31 - i)) as i32;
787 ix1 <<= i as usize;
788 }
789 // un-bias exponent
790 m -= 1023;
791 ix0 = (ix0 & 0x000fffff) | 0x00100000;
792 if (m & 1) != 0 {
793 // odd m, double the input to make it even
794 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
795 ix1 += ix1;
796 }
797 m >>= 1;
798 }
799 // generate sqrt bit by bit
800 {
801 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
802 ix1 += ix1;
803 // q and q1 store the sqrt(x);
804 q = 0;
805 q1 = w(0);
806 s0 = 0;
807 s1 = w(0);
808 // our bit that moves from right to left
809 r = w(0x00200000);
810 while r != w(0) {
811 t = s0 + (r.0 as i32);
812 if t <= ix0 {
813 s0 = t + (r.0 as i32);
814 ix0 -= t;
815 q += (r.0 as i32);
816 }
817 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
818 ix1 += ix1;
819 r >>= 1;
820 }
821 r = sign;
822 while r != w(0) {
823 t1 = s1 + r;
824 t = s0;
825 if (t < ix0) || ((t == ix0) && (t1 <= ix1)) {
826 s1 = t1 + r;
827 if t1 & sign == sign && (s1 & sign) == w(0) {
828 s0 += 1;
829 }
830 ix0 -= t;
831 if ix1 < t1 {
832 ix0 -= 1;
833 }
834 ix1 -= t1;
835 q1 += r;
836 }
837 ix0 += ix0 + ((ix1 & sign) >> 31).0 as i32;
838 ix1 += ix1;
839 r >>= 1;
840 }
841 }
842 // use floating add to find out rounding direction
843 {
844 if ix0 | (ix1.0 as i32) != 0 {
845 z = 1.0 - 1.0e-300;
846 if z >= 1.0 {
847 z = 1.0 + 1.0e-300;
848 if q1 == w(0xffffffff) {
849 q1 = w(0);
850 q += 1;
851 } else if z > 1.0 {
852 if q1 == w(0xfffffffe) {
853 q += 1;
854 }
855 q1 += w(2);
856 } else {
857 q1 += q1 & w(1);
858 }
859 }
860 }
861 }
862 // finish up
863 ix0 = (q >> 1) + 0x3fe00000;
864 ix1 = q1 >> 1;
865 if q & 1 == 1 {
866 ix1 |= sign;
867 }
868 ix0 += m << 20;
869
870 cast::<[u32; 2], f64>([ix1.0, ix0 as u32])
871 }
872
873 #[test]
test_software_sqrt()874 fn test_software_sqrt() {
875 assert!(software_sqrt(f64::NAN).is_nan());
876 assert_eq!(software_sqrt(f64::INFINITY), f64::INFINITY);
877 assert_eq!(software_sqrt(0.0), 0.0);
878 assert_eq!(software_sqrt(-0.0), -0.0);
879 assert!(software_sqrt(-1.0).is_nan());
880 assert!(software_sqrt(f64::NEG_INFINITY).is_nan());
881 assert_eq!(software_sqrt(4.0), 2.0);
882 assert_eq!(software_sqrt(9.0), 3.0);
883 assert_eq!(software_sqrt(16.0), 4.0);
884 assert_eq!(software_sqrt(25.0), 5.0);
885 assert_eq!(software_sqrt(5000.0 * 5000.0), 5000.0);
886 }
887
888 pub trait CmpEq<Rhs = Self> {
889 type Output;
cmp_eq(self, rhs: Rhs) -> Self::Output890 fn cmp_eq(self, rhs: Rhs) -> Self::Output;
891 }
892
893 pub trait CmpGt<Rhs = Self> {
894 type Output;
cmp_gt(self, rhs: Rhs) -> Self::Output895 fn cmp_gt(self, rhs: Rhs) -> Self::Output;
896 }
897
898 pub trait CmpGe<Rhs = Self> {
899 type Output;
cmp_ge(self, rhs: Rhs) -> Self::Output900 fn cmp_ge(self, rhs: Rhs) -> Self::Output;
901 }
902
903 pub trait CmpNe<Rhs = Self> {
904 type Output;
cmp_ne(self, rhs: Rhs) -> Self::Output905 fn cmp_ne(self, rhs: Rhs) -> Self::Output;
906 }
907
908 pub trait CmpLt<Rhs = Self> {
909 type Output;
cmp_lt(self, rhs: Rhs) -> Self::Output910 fn cmp_lt(self, rhs: Rhs) -> Self::Output;
911 }
912
913 pub trait CmpLe<Rhs = Self> {
914 type Output;
cmp_le(self, rhs: Rhs) -> Self::Output915 fn cmp_le(self, rhs: Rhs) -> Self::Output;
916 }
917
918 macro_rules! bulk_impl_const_rhs_op {
919 (($op:ident,$method:ident) => [$(($lhs:ty,$rhs:ty),)+]) => {
920 $(
921 impl $op<$rhs> for $lhs {
922 type Output = Self;
923 #[inline]
924 #[must_use]
925 fn $method(self, rhs: $rhs) -> Self::Output {
926 self.$method(<$lhs>::splat(rhs))
927 }
928 }
929 )+
930 };
931 }
932
933 bulk_impl_const_rhs_op!((CmpEq, cmp_eq) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
934 bulk_impl_const_rhs_op!((CmpLt, cmp_lt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
935 bulk_impl_const_rhs_op!((CmpGt, cmp_gt) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
936 bulk_impl_const_rhs_op!((CmpNe, cmp_ne) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
937 bulk_impl_const_rhs_op!((CmpLe, cmp_le) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
938 bulk_impl_const_rhs_op!((CmpGe, cmp_ge) => [(f64x4, f64), (f64x2, f64), (f32x4,f32), (f32x8,f32),]);
939