1 #![allow(non_camel_case_types)]
2 
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6 
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10     d: [u32; 4],
11     q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14     #[inline]
from(d: [u32; 4]) -> Self15     fn from(d: [u32; 4]) -> Self {
16         Self { d }
17     }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20     #[inline]
from(d: vec128_storage) -> Self21     fn from(d: vec128_storage) -> Self {
22         unsafe { d.d }
23     }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26     #[inline]
from(q: [u64; 2]) -> Self27     fn from(q: [u64; 2]) -> Self {
28         Self { q }
29     }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32     #[inline]
from(q: vec128_storage) -> Self33     fn from(q: vec128_storage) -> Self {
34         unsafe { q.q }
35     }
36 }
37 impl Default for vec128_storage {
38     #[inline]
default() -> Self39     fn default() -> Self {
40         Self { q: [0, 0] }
41     }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45     #[inline]
eq(&self, rhs: &Self) -> bool46     fn eq(&self, rhs: &Self) -> bool {
47         unsafe { self.q == rhs.q }
48     }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52     v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55     #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56     pub fn new128(v128: [vec128_storage; 2]) -> Self {
57         Self { v128 }
58     }
59     #[inline(always)]
split128(self) -> [vec128_storage; 2]60     pub fn split128(self) -> [vec128_storage; 2] {
61         self.v128
62     }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65     #[inline]
from(q: vec256_storage) -> Self66     fn from(q: vec256_storage) -> Self {
67         let [a, b]: [u64; 2] = q.v128[0].into();
68         let [c, d]: [u64; 2] = q.v128[1].into();
69         [a, b, c, d]
70     }
71 }
72 #[derive(Clone, Copy, PartialEq, Eq, Default)]
73 pub struct vec512_storage {
74     v128: [vec128_storage; 4],
75 }
76 impl vec512_storage {
77     #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self78     pub fn new128(v128: [vec128_storage; 4]) -> Self {
79         Self { v128 }
80     }
81     #[inline(always)]
split128(self) -> [vec128_storage; 4]82     pub fn split128(self) -> [vec128_storage; 4] {
83         self.v128
84     }
85 }
86 
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,87 fn dmap<T, F>(t: T, f: F) -> T
88 where
89     T: Store<vec128_storage> + Into<vec128_storage>,
90     F: Fn(u32) -> u32,
91 {
92     let t: vec128_storage = t.into();
93     let d = unsafe { t.d };
94     let d = vec128_storage {
95         d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
96     };
97     unsafe { T::unpack(d) }
98 }
99 
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,100 fn dmap2<T, F>(a: T, b: T, f: F) -> T
101 where
102     T: Store<vec128_storage> + Into<vec128_storage>,
103     F: Fn(u32, u32) -> u32,
104 {
105     let a: vec128_storage = a.into();
106     let b: vec128_storage = b.into();
107     let ao = unsafe { a.d };
108     let bo = unsafe { b.d };
109     let d = vec128_storage {
110         d: [
111             f(ao[0], bo[0]),
112             f(ao[1], bo[1]),
113             f(ao[2], bo[2]),
114             f(ao[3], bo[3]),
115         ],
116     };
117     unsafe { T::unpack(d) }
118 }
119 
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,120 fn qmap<T, F>(t: T, f: F) -> T
121 where
122     T: Store<vec128_storage> + Into<vec128_storage>,
123     F: Fn(u64) -> u64,
124 {
125     let t: vec128_storage = t.into();
126     let q = unsafe { t.q };
127     let q = vec128_storage {
128         q: [f(q[0]), f(q[1])],
129     };
130     unsafe { T::unpack(q) }
131 }
132 
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,133 fn qmap2<T, F>(a: T, b: T, f: F) -> T
134 where
135     T: Store<vec128_storage> + Into<vec128_storage>,
136     F: Fn(u64, u64) -> u64,
137 {
138     let a: vec128_storage = a.into();
139     let b: vec128_storage = b.into();
140     let ao = unsafe { a.q };
141     let bo = unsafe { b.q };
142     let q = vec128_storage {
143         q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
144     };
145     unsafe { T::unpack(q) }
146 }
147 
o_of_q(q: [u64; 2]) -> u128148 fn o_of_q(q: [u64; 2]) -> u128 {
149     u128::from(q[0]) | (u128::from(q[1]) << 64)
150 }
151 
q_of_o(o: u128) -> [u64; 2]152 fn q_of_o(o: u128) -> [u64; 2] {
153     [o as u64, (o >> 64) as u64]
154 }
155 
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,156 fn omap<T, F>(a: T, f: F) -> T
157 where
158     T: Store<vec128_storage> + Into<vec128_storage>,
159     F: Fn(u128) -> u128,
160 {
161     let a: vec128_storage = a.into();
162     let ao = o_of_q(unsafe { a.q });
163     let o = vec128_storage { q: q_of_o(f(ao)) };
164     unsafe { T::unpack(o) }
165 }
166 
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,167 fn omap2<T, F>(a: T, b: T, f: F) -> T
168 where
169     T: Store<vec128_storage> + Into<vec128_storage>,
170     F: Fn(u128, u128) -> u128,
171 {
172     let a: vec128_storage = a.into();
173     let b: vec128_storage = b.into();
174     let ao = o_of_q(unsafe { a.q });
175     let bo = o_of_q(unsafe { b.q });
176     let o = vec128_storage {
177         q: q_of_o(f(ao, bo)),
178     };
179     unsafe { T::unpack(o) }
180 }
181 
182 impl RotateEachWord128 for u128x1_generic {}
183 impl BitOps128 for u128x1_generic {}
184 impl BitOps64 for u128x1_generic {}
185 impl BitOps64 for u64x2_generic {}
186 impl BitOps32 for u128x1_generic {}
187 impl BitOps32 for u64x2_generic {}
188 impl BitOps32 for u32x4_generic {}
189 impl BitOps0 for u128x1_generic {}
190 impl BitOps0 for u64x2_generic {}
191 impl BitOps0 for u32x4_generic {}
192 
193 macro_rules! impl_bitops {
194     ($vec:ident) => {
195         impl Not for $vec {
196             type Output = Self;
197             #[inline(always)]
198             fn not(self) -> Self::Output {
199                 omap(self, |x| !x)
200             }
201         }
202         impl BitAnd for $vec {
203             type Output = Self;
204             #[inline(always)]
205             fn bitand(self, rhs: Self) -> Self::Output {
206                 omap2(self, rhs, |x, y| x & y)
207             }
208         }
209         impl BitOr for $vec {
210             type Output = Self;
211             #[inline(always)]
212             fn bitor(self, rhs: Self) -> Self::Output {
213                 omap2(self, rhs, |x, y| x | y)
214             }
215         }
216         impl BitXor for $vec {
217             type Output = Self;
218             #[inline(always)]
219             fn bitxor(self, rhs: Self) -> Self::Output {
220                 omap2(self, rhs, |x, y| x ^ y)
221             }
222         }
223         impl AndNot for $vec {
224             type Output = Self;
225             #[inline(always)]
226             fn andnot(self, rhs: Self) -> Self::Output {
227                 omap2(self, rhs, |x, y| !x & y)
228             }
229         }
230         impl BitAndAssign for $vec {
231             #[inline(always)]
232             fn bitand_assign(&mut self, rhs: Self) {
233                 *self = *self & rhs
234             }
235         }
236         impl BitOrAssign for $vec {
237             #[inline(always)]
238             fn bitor_assign(&mut self, rhs: Self) {
239                 *self = *self | rhs
240             }
241         }
242         impl BitXorAssign for $vec {
243             #[inline(always)]
244             fn bitxor_assign(&mut self, rhs: Self) {
245                 *self = *self ^ rhs
246             }
247         }
248 
249         impl Swap64 for $vec {
250             #[inline]
251             fn swap1(self) -> Self {
252                 qmap(self, |x| {
253                     ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
254                 })
255             }
256             #[inline]
257             fn swap2(self) -> Self {
258                 qmap(self, |x| {
259                     ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
260                 })
261             }
262             #[inline]
263             fn swap4(self) -> Self {
264                 qmap(self, |x| {
265                     ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
266                 })
267             }
268             #[inline]
269             fn swap8(self) -> Self {
270                 qmap(self, |x| {
271                     ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
272                 })
273             }
274             #[inline]
275             fn swap16(self) -> Self {
276                 dmap(self, |x| x.rotate_left(16))
277             }
278             #[inline]
279             fn swap32(self) -> Self {
280                 qmap(self, |x| x.rotate_left(32))
281             }
282             #[inline]
283             fn swap64(self) -> Self {
284                 omap(self, |x| (x << 64) | (x >> 64))
285             }
286         }
287     };
288 }
289 impl_bitops!(u32x4_generic);
290 impl_bitops!(u64x2_generic);
291 impl_bitops!(u128x1_generic);
292 
293 impl RotateEachWord32 for u32x4_generic {
294     #[inline]
rotate_each_word_right7(self) -> Self295     fn rotate_each_word_right7(self) -> Self {
296         dmap(self, |x| x.rotate_right(7))
297     }
298     #[inline]
rotate_each_word_right8(self) -> Self299     fn rotate_each_word_right8(self) -> Self {
300         dmap(self, |x| x.rotate_right(8))
301     }
302     #[inline]
rotate_each_word_right11(self) -> Self303     fn rotate_each_word_right11(self) -> Self {
304         dmap(self, |x| x.rotate_right(11))
305     }
306     #[inline]
rotate_each_word_right12(self) -> Self307     fn rotate_each_word_right12(self) -> Self {
308         dmap(self, |x| x.rotate_right(12))
309     }
310     #[inline]
rotate_each_word_right16(self) -> Self311     fn rotate_each_word_right16(self) -> Self {
312         dmap(self, |x| x.rotate_right(16))
313     }
314     #[inline]
rotate_each_word_right20(self) -> Self315     fn rotate_each_word_right20(self) -> Self {
316         dmap(self, |x| x.rotate_right(20))
317     }
318     #[inline]
rotate_each_word_right24(self) -> Self319     fn rotate_each_word_right24(self) -> Self {
320         dmap(self, |x| x.rotate_right(24))
321     }
322     #[inline]
rotate_each_word_right25(self) -> Self323     fn rotate_each_word_right25(self) -> Self {
324         dmap(self, |x| x.rotate_right(25))
325     }
326 }
327 
328 impl RotateEachWord32 for u64x2_generic {
329     #[inline]
rotate_each_word_right7(self) -> Self330     fn rotate_each_word_right7(self) -> Self {
331         qmap(self, |x| x.rotate_right(7))
332     }
333     #[inline]
rotate_each_word_right8(self) -> Self334     fn rotate_each_word_right8(self) -> Self {
335         qmap(self, |x| x.rotate_right(8))
336     }
337     #[inline]
rotate_each_word_right11(self) -> Self338     fn rotate_each_word_right11(self) -> Self {
339         qmap(self, |x| x.rotate_right(11))
340     }
341     #[inline]
rotate_each_word_right12(self) -> Self342     fn rotate_each_word_right12(self) -> Self {
343         qmap(self, |x| x.rotate_right(12))
344     }
345     #[inline]
rotate_each_word_right16(self) -> Self346     fn rotate_each_word_right16(self) -> Self {
347         qmap(self, |x| x.rotate_right(16))
348     }
349     #[inline]
rotate_each_word_right20(self) -> Self350     fn rotate_each_word_right20(self) -> Self {
351         qmap(self, |x| x.rotate_right(20))
352     }
353     #[inline]
rotate_each_word_right24(self) -> Self354     fn rotate_each_word_right24(self) -> Self {
355         qmap(self, |x| x.rotate_right(24))
356     }
357     #[inline]
rotate_each_word_right25(self) -> Self358     fn rotate_each_word_right25(self) -> Self {
359         qmap(self, |x| x.rotate_right(25))
360     }
361 }
362 impl RotateEachWord64 for u64x2_generic {
363     #[inline]
rotate_each_word_right32(self) -> Self364     fn rotate_each_word_right32(self) -> Self {
365         qmap(self, |x| x.rotate_right(32))
366     }
367 }
368 
369 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
rotate_u128_right(x: u128, i: u32) -> u128370 fn rotate_u128_right(x: u128, i: u32) -> u128 {
371     (x >> i) | (x << (128 - i))
372 }
373 #[test]
test_rotate_u128()374 fn test_rotate_u128() {
375     const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
376     assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
377 }
378 
379 impl RotateEachWord32 for u128x1_generic {
380     #[inline]
rotate_each_word_right7(self) -> Self381     fn rotate_each_word_right7(self) -> Self {
382         Self([rotate_u128_right(self.0[0], 7)])
383     }
384     #[inline]
rotate_each_word_right8(self) -> Self385     fn rotate_each_word_right8(self) -> Self {
386         Self([rotate_u128_right(self.0[0], 8)])
387     }
388     #[inline]
rotate_each_word_right11(self) -> Self389     fn rotate_each_word_right11(self) -> Self {
390         Self([rotate_u128_right(self.0[0], 11)])
391     }
392     #[inline]
rotate_each_word_right12(self) -> Self393     fn rotate_each_word_right12(self) -> Self {
394         Self([rotate_u128_right(self.0[0], 12)])
395     }
396     #[inline]
rotate_each_word_right16(self) -> Self397     fn rotate_each_word_right16(self) -> Self {
398         Self([rotate_u128_right(self.0[0], 16)])
399     }
400     #[inline]
rotate_each_word_right20(self) -> Self401     fn rotate_each_word_right20(self) -> Self {
402         Self([rotate_u128_right(self.0[0], 20)])
403     }
404     #[inline]
rotate_each_word_right24(self) -> Self405     fn rotate_each_word_right24(self) -> Self {
406         Self([rotate_u128_right(self.0[0], 24)])
407     }
408     #[inline]
rotate_each_word_right25(self) -> Self409     fn rotate_each_word_right25(self) -> Self {
410         Self([rotate_u128_right(self.0[0], 25)])
411     }
412 }
413 impl RotateEachWord64 for u128x1_generic {
414     #[inline]
rotate_each_word_right32(self) -> Self415     fn rotate_each_word_right32(self) -> Self {
416         Self([rotate_u128_right(self.0[0], 32)])
417     }
418 }
419 
420 #[derive(Copy, Clone)]
421 pub struct GenericMachine;
422 impl Machine for GenericMachine {
423     type u32x4 = u32x4_generic;
424     type u64x2 = u64x2_generic;
425     type u128x1 = u128x1_generic;
426     type u32x4x2 = u32x4x2_generic;
427     type u64x2x2 = u64x2x2_generic;
428     type u64x4 = u64x4_generic;
429     type u128x2 = u128x2_generic;
430     type u32x4x4 = u32x4x4_generic;
431     type u64x2x4 = u64x2x4_generic;
432     type u128x4 = u128x4_generic;
433     #[inline]
instance() -> Self434     unsafe fn instance() -> Self {
435         Self
436     }
437 }
438 
439 #[derive(Copy, Clone, Debug, PartialEq)]
440 pub struct u32x4_generic([u32; 4]);
441 #[derive(Copy, Clone, Debug, PartialEq)]
442 pub struct u64x2_generic([u64; 2]);
443 #[derive(Copy, Clone, Debug, PartialEq)]
444 pub struct u128x1_generic([u128; 1]);
445 
446 impl From<u32x4_generic> for vec128_storage {
447     #[inline(always)]
from(d: u32x4_generic) -> Self448     fn from(d: u32x4_generic) -> Self {
449         Self { d: d.0 }
450     }
451 }
452 impl From<u64x2_generic> for vec128_storage {
453     #[inline(always)]
from(q: u64x2_generic) -> Self454     fn from(q: u64x2_generic) -> Self {
455         Self { q: q.0 }
456     }
457 }
458 impl From<u128x1_generic> for vec128_storage {
459     #[inline(always)]
from(o: u128x1_generic) -> Self460     fn from(o: u128x1_generic) -> Self {
461         Self { q: q_of_o(o.0[0]) }
462     }
463 }
464 
465 impl Store<vec128_storage> for u32x4_generic {
466     #[inline(always)]
unpack(s: vec128_storage) -> Self467     unsafe fn unpack(s: vec128_storage) -> Self {
468         Self(s.d)
469     }
470 }
471 impl Store<vec128_storage> for u64x2_generic {
472     #[inline(always)]
unpack(s: vec128_storage) -> Self473     unsafe fn unpack(s: vec128_storage) -> Self {
474         Self(s.q)
475     }
476 }
477 impl Store<vec128_storage> for u128x1_generic {
478     #[inline(always)]
unpack(s: vec128_storage) -> Self479     unsafe fn unpack(s: vec128_storage) -> Self {
480         Self([o_of_q(s.q); 1])
481     }
482 }
483 
484 impl ArithOps for u32x4_generic {}
485 impl ArithOps for u64x2_generic {}
486 impl ArithOps for u128x1_generic {}
487 
488 impl Add for u32x4_generic {
489     type Output = Self;
490     #[inline(always)]
add(self, rhs: Self) -> Self::Output491     fn add(self, rhs: Self) -> Self::Output {
492         dmap2(self, rhs, |x, y| x.wrapping_add(y))
493     }
494 }
495 impl Add for u64x2_generic {
496     type Output = Self;
497     #[inline(always)]
add(self, rhs: Self) -> Self::Output498     fn add(self, rhs: Self) -> Self::Output {
499         qmap2(self, rhs, |x, y| x.wrapping_add(y))
500     }
501 }
502 impl Add for u128x1_generic {
503     type Output = Self;
504     #[inline(always)]
add(self, rhs: Self) -> Self::Output505     fn add(self, rhs: Self) -> Self::Output {
506         omap2(self, rhs, |x, y| x.wrapping_add(y))
507     }
508 }
509 impl AddAssign for u32x4_generic {
510     #[inline(always)]
add_assign(&mut self, rhs: Self)511     fn add_assign(&mut self, rhs: Self) {
512         *self = *self + rhs
513     }
514 }
515 impl AddAssign for u64x2_generic {
516     #[inline(always)]
add_assign(&mut self, rhs: Self)517     fn add_assign(&mut self, rhs: Self) {
518         *self = *self + rhs
519     }
520 }
521 impl AddAssign for u128x1_generic {
522     #[inline(always)]
add_assign(&mut self, rhs: Self)523     fn add_assign(&mut self, rhs: Self) {
524         *self = *self + rhs
525     }
526 }
527 impl BSwap for u32x4_generic {
528     #[inline(always)]
bswap(self) -> Self529     fn bswap(self) -> Self {
530         dmap(self, |x| x.swap_bytes())
531     }
532 }
533 impl BSwap for u64x2_generic {
534     #[inline(always)]
bswap(self) -> Self535     fn bswap(self) -> Self {
536         qmap(self, |x| x.swap_bytes())
537     }
538 }
539 impl BSwap for u128x1_generic {
540     #[inline(always)]
bswap(self) -> Self541     fn bswap(self) -> Self {
542         omap(self, |x| x.swap_bytes())
543     }
544 }
545 impl StoreBytes for u32x4_generic {
546     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self547     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
548         assert_eq!(input.len(), 16);
549         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
550         dmap(x, |x| x.to_le())
551     }
552     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self553     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
554         assert_eq!(input.len(), 16);
555         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
556         dmap(x, |x| x.to_be())
557     }
558     #[inline(always)]
write_le(self, out: &mut [u8])559     fn write_le(self, out: &mut [u8]) {
560         assert_eq!(out.len(), 16);
561         let x = dmap(self, |x| x.to_le());
562         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
563     }
564     #[inline(always)]
write_be(self, out: &mut [u8])565     fn write_be(self, out: &mut [u8]) {
566         assert_eq!(out.len(), 16);
567         let x = dmap(self, |x| x.to_be());
568         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
569     }
570 }
571 impl StoreBytes for u64x2_generic {
572     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self573     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
574         assert_eq!(input.len(), 16);
575         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
576         qmap(x, |x| x.to_le())
577     }
578     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self579     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
580         assert_eq!(input.len(), 16);
581         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
582         qmap(x, |x| x.to_be())
583     }
584     #[inline(always)]
write_le(self, out: &mut [u8])585     fn write_le(self, out: &mut [u8]) {
586         assert_eq!(out.len(), 16);
587         let x = qmap(self, |x| x.to_le());
588         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
589     }
590     #[inline(always)]
write_be(self, out: &mut [u8])591     fn write_be(self, out: &mut [u8]) {
592         assert_eq!(out.len(), 16);
593         let x = qmap(self, |x| x.to_be());
594         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
595     }
596 }
597 
598 #[derive(Copy, Clone)]
599 pub struct G0;
600 #[derive(Copy, Clone)]
601 pub struct G1;
602 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
603 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
604 pub type u64x4_generic = x2<u64x2_generic, G1>;
605 pub type u128x2_generic = x2<u128x1_generic, G0>;
606 pub type u32x4x4_generic = x4<u32x4_generic>;
607 pub type u64x2x4_generic = x4<u64x2_generic>;
608 pub type u128x4_generic = x4<u128x1_generic>;
609 
610 impl MultiLane<[u32; 4]> for u32x4_generic {
611     #[inline(always)]
to_lanes(self) -> [u32; 4]612     fn to_lanes(self) -> [u32; 4] {
613         self.0
614     }
615     #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self616     fn from_lanes(xs: [u32; 4]) -> Self {
617         Self(xs)
618     }
619 }
620 impl MultiLane<[u64; 2]> for u64x2_generic {
621     #[inline(always)]
to_lanes(self) -> [u64; 2]622     fn to_lanes(self) -> [u64; 2] {
623         self.0
624     }
625     #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self626     fn from_lanes(xs: [u64; 2]) -> Self {
627         Self(xs)
628     }
629 }
630 impl MultiLane<[u64; 4]> for u64x4_generic {
631     #[inline(always)]
to_lanes(self) -> [u64; 4]632     fn to_lanes(self) -> [u64; 4] {
633         let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
634         [a[0], a[1], b[0], b[1]]
635     }
636     #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self637     fn from_lanes(xs: [u64; 4]) -> Self {
638         let (a, b) = (
639             u64x2_generic::from_lanes([xs[0], xs[1]]),
640             u64x2_generic::from_lanes([xs[2], xs[3]]),
641         );
642         x2::new([a, b])
643     }
644 }
645 impl MultiLane<[u128; 1]> for u128x1_generic {
646     #[inline(always)]
to_lanes(self) -> [u128; 1]647     fn to_lanes(self) -> [u128; 1] {
648         self.0
649     }
650     #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self651     fn from_lanes(xs: [u128; 1]) -> Self {
652         Self(xs)
653     }
654 }
655 impl Vec4<u32> for u32x4_generic {
656     #[inline(always)]
extract(self, i: u32) -> u32657     fn extract(self, i: u32) -> u32 {
658         self.0[i as usize]
659     }
660     #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self661     fn insert(mut self, v: u32, i: u32) -> Self {
662         self.0[i as usize] = v;
663         self
664     }
665 }
666 impl Vec4<u64> for u64x4_generic {
667     #[inline(always)]
extract(self, i: u32) -> u64668     fn extract(self, i: u32) -> u64 {
669         let d: [u64; 4] = self.to_lanes();
670         d[i as usize]
671     }
672     #[inline(always)]
insert(self, v: u64, i: u32) -> Self673     fn insert(self, v: u64, i: u32) -> Self {
674         self.0[(i / 2) as usize].insert(v, i % 2);
675         self
676     }
677 }
678 impl Vec2<u64> for u64x2_generic {
679     #[inline(always)]
extract(self, i: u32) -> u64680     fn extract(self, i: u32) -> u64 {
681         self.0[i as usize]
682     }
683     #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self684     fn insert(mut self, v: u64, i: u32) -> Self {
685         self.0[i as usize] = v;
686         self
687     }
688 }
689 
690 impl Words4 for u32x4_generic {
691     #[inline(always)]
shuffle2301(self) -> Self692     fn shuffle2301(self) -> Self {
693         self.swap64()
694     }
695     #[inline(always)]
shuffle1230(self) -> Self696     fn shuffle1230(self) -> Self {
697         let x = self.0;
698         Self([x[3], x[0], x[1], x[2]])
699     }
700     #[inline(always)]
shuffle3012(self) -> Self701     fn shuffle3012(self) -> Self {
702         let x = self.0;
703         Self([x[1], x[2], x[3], x[0]])
704     }
705 }
706 impl LaneWords4 for u32x4_generic {
707     #[inline(always)]
shuffle_lane_words2301(self) -> Self708     fn shuffle_lane_words2301(self) -> Self {
709         self.shuffle2301()
710     }
711     #[inline(always)]
shuffle_lane_words1230(self) -> Self712     fn shuffle_lane_words1230(self) -> Self {
713         self.shuffle1230()
714     }
715     #[inline(always)]
shuffle_lane_words3012(self) -> Self716     fn shuffle_lane_words3012(self) -> Self {
717         self.shuffle3012()
718     }
719 }
720 
721 impl Words4 for u64x4_generic {
722     #[inline(always)]
shuffle2301(self) -> Self723     fn shuffle2301(self) -> Self {
724         x2::new([self.0[1], self.0[0]])
725     }
726     #[inline(always)]
shuffle1230(self) -> Self727     fn shuffle1230(self) -> Self {
728         unimplemented!()
729     }
730     #[inline(always)]
shuffle3012(self) -> Self731     fn shuffle3012(self) -> Self {
732         unimplemented!()
733     }
734 }
735 
736 impl u32x4<GenericMachine> for u32x4_generic {}
737 impl u64x2<GenericMachine> for u64x2_generic {}
738 impl u128x1<GenericMachine> for u128x1_generic {}
739 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
740 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
741 impl u64x4<GenericMachine> for u64x4_generic {}
742 impl u128x2<GenericMachine> for u128x2_generic {}
743 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
744 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
745 impl u128x4<GenericMachine> for u128x4_generic {}
746 
747 #[macro_export]
748 macro_rules! dispatch {
749     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
750         #[inline]
751         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
752             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
753             #[inline(always)]
754             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
755             fn_impl($mach, $($arg),*)
756         }
757     };
758     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
759         dispatch!($mach, $MTy, {
760             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
761         });
762     }
763 }
764 #[macro_export]
765 macro_rules! dispatch_light128 {
766     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
767         #[inline]
768         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
769             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
770             #[inline(always)]
771             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
772             fn_impl($mach, $($arg),*)
773         }
774     };
775     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
776         dispatch!($mach, $MTy, {
777             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
778         });
779     }
780 }
781 #[macro_export]
782 macro_rules! dispatch_light256 {
783     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
784         #[inline]
785         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
786             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
787             #[inline(always)]
788             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
789             fn_impl($mach, $($arg),*)
790         }
791     };
792     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
793         dispatch!($mach, $MTy, {
794             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
795         });
796     }
797 }
798 #[macro_export]
799 macro_rules! dispatch_light512 {
800     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
801         #[inline]
802         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
803             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
804             #[inline(always)]
805             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
806             fn_impl($mach, $($arg),*)
807         }
808     };
809     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
810         dispatch!($mach, $MTy, {
811             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
812         });
813     }
814 }
815 
816 #[cfg(test)]
817 mod test {
818     use super::*;
819 
820     #[test]
test_bswap32()821     fn test_bswap32() {
822         let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
823         let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
824 
825         let m = unsafe { GenericMachine::instance() };
826 
827         let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
828         let x = x.bswap();
829 
830         let y = m.vec(ys);
831         assert_eq!(x, y);
832     }
833 }
834