1 #![allow(non_camel_case_types)]
2 
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6 
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10     d: [u32; 4],
11     q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14     #[inline(always)]
from(d: [u32; 4]) -> Self15     fn from(d: [u32; 4]) -> Self {
16         Self { d }
17     }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20     #[inline(always)]
from(d: vec128_storage) -> Self21     fn from(d: vec128_storage) -> Self {
22         unsafe { d.d }
23     }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26     #[inline(always)]
from(q: [u64; 2]) -> Self27     fn from(q: [u64; 2]) -> Self {
28         Self { q }
29     }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32     #[inline(always)]
from(q: vec128_storage) -> Self33     fn from(q: vec128_storage) -> Self {
34         unsafe { q.q }
35     }
36 }
37 impl Default for vec128_storage {
38     #[inline(always)]
default() -> Self39     fn default() -> Self {
40         Self { q: [0, 0] }
41     }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45     #[inline(always)]
eq(&self, rhs: &Self) -> bool46     fn eq(&self, rhs: &Self) -> bool {
47         unsafe { self.q == rhs.q }
48     }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52     v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55     #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56     pub fn new128(v128: [vec128_storage; 2]) -> Self {
57         Self { v128 }
58     }
59     #[inline(always)]
split128(self) -> [vec128_storage; 2]60     pub fn split128(self) -> [vec128_storage; 2] {
61         self.v128
62     }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65     #[inline(always)]
from(q: vec256_storage) -> Self66     fn from(q: vec256_storage) -> Self {
67         let [a, b]: [u64; 2] = q.v128[0].into();
68         let [c, d]: [u64; 2] = q.v128[1].into();
69         [a, b, c, d]
70     }
71 }
72 #[derive(Clone, Copy, PartialEq, Eq, Default)]
73 pub struct vec512_storage {
74     v128: [vec128_storage; 4],
75 }
76 impl vec512_storage {
77     #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self78     pub fn new128(v128: [vec128_storage; 4]) -> Self {
79         Self { v128 }
80     }
81     #[inline(always)]
split128(self) -> [vec128_storage; 4]82     pub fn split128(self) -> [vec128_storage; 4] {
83         self.v128
84     }
85 }
86 
87 #[inline(always)]
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,88 fn dmap<T, F>(t: T, f: F) -> T
89 where
90     T: Store<vec128_storage> + Into<vec128_storage>,
91     F: Fn(u32) -> u32,
92 {
93     let t: vec128_storage = t.into();
94     let d = unsafe { t.d };
95     let d = vec128_storage {
96         d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
97     };
98     unsafe { T::unpack(d) }
99 }
100 
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,101 fn dmap2<T, F>(a: T, b: T, f: F) -> T
102 where
103     T: Store<vec128_storage> + Into<vec128_storage>,
104     F: Fn(u32, u32) -> u32,
105 {
106     let a: vec128_storage = a.into();
107     let b: vec128_storage = b.into();
108     let ao = unsafe { a.d };
109     let bo = unsafe { b.d };
110     let d = vec128_storage {
111         d: [
112             f(ao[0], bo[0]),
113             f(ao[1], bo[1]),
114             f(ao[2], bo[2]),
115             f(ao[3], bo[3]),
116         ],
117     };
118     unsafe { T::unpack(d) }
119 }
120 
121 #[inline(always)]
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,122 fn qmap<T, F>(t: T, f: F) -> T
123 where
124     T: Store<vec128_storage> + Into<vec128_storage>,
125     F: Fn(u64) -> u64,
126 {
127     let t: vec128_storage = t.into();
128     let q = unsafe { t.q };
129     let q = vec128_storage {
130         q: [f(q[0]), f(q[1])],
131     };
132     unsafe { T::unpack(q) }
133 }
134 
135 #[inline(always)]
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,136 fn qmap2<T, F>(a: T, b: T, f: F) -> T
137 where
138     T: Store<vec128_storage> + Into<vec128_storage>,
139     F: Fn(u64, u64) -> u64,
140 {
141     let a: vec128_storage = a.into();
142     let b: vec128_storage = b.into();
143     let ao = unsafe { a.q };
144     let bo = unsafe { b.q };
145     let q = vec128_storage {
146         q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
147     };
148     unsafe { T::unpack(q) }
149 }
150 
151 #[inline(always)]
o_of_q(q: [u64; 2]) -> u128152 fn o_of_q(q: [u64; 2]) -> u128 {
153     u128::from(q[0]) | (u128::from(q[1]) << 64)
154 }
155 
156 #[inline(always)]
q_of_o(o: u128) -> [u64; 2]157 fn q_of_o(o: u128) -> [u64; 2] {
158     [o as u64, (o >> 64) as u64]
159 }
160 
161 #[inline(always)]
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,162 fn omap<T, F>(a: T, f: F) -> T
163 where
164     T: Store<vec128_storage> + Into<vec128_storage>,
165     F: Fn(u128) -> u128,
166 {
167     let a: vec128_storage = a.into();
168     let ao = o_of_q(unsafe { a.q });
169     let o = vec128_storage { q: q_of_o(f(ao)) };
170     unsafe { T::unpack(o) }
171 }
172 
173 #[inline(always)]
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,174 fn omap2<T, F>(a: T, b: T, f: F) -> T
175 where
176     T: Store<vec128_storage> + Into<vec128_storage>,
177     F: Fn(u128, u128) -> u128,
178 {
179     let a: vec128_storage = a.into();
180     let b: vec128_storage = b.into();
181     let ao = o_of_q(unsafe { a.q });
182     let bo = o_of_q(unsafe { b.q });
183     let o = vec128_storage {
184         q: q_of_o(f(ao, bo)),
185     };
186     unsafe { T::unpack(o) }
187 }
188 
189 impl RotateEachWord128 for u128x1_generic {}
190 impl BitOps128 for u128x1_generic {}
191 impl BitOps64 for u128x1_generic {}
192 impl BitOps64 for u64x2_generic {}
193 impl BitOps32 for u128x1_generic {}
194 impl BitOps32 for u64x2_generic {}
195 impl BitOps32 for u32x4_generic {}
196 impl BitOps0 for u128x1_generic {}
197 impl BitOps0 for u64x2_generic {}
198 impl BitOps0 for u32x4_generic {}
199 
200 macro_rules! impl_bitops {
201     ($vec:ident) => {
202         impl Not for $vec {
203             type Output = Self;
204             #[inline(always)]
205             fn not(self) -> Self::Output {
206                 omap(self, |x| !x)
207             }
208         }
209         impl BitAnd for $vec {
210             type Output = Self;
211             #[inline(always)]
212             fn bitand(self, rhs: Self) -> Self::Output {
213                 omap2(self, rhs, |x, y| x & y)
214             }
215         }
216         impl BitOr for $vec {
217             type Output = Self;
218             #[inline(always)]
219             fn bitor(self, rhs: Self) -> Self::Output {
220                 omap2(self, rhs, |x, y| x | y)
221             }
222         }
223         impl BitXor for $vec {
224             type Output = Self;
225             #[inline(always)]
226             fn bitxor(self, rhs: Self) -> Self::Output {
227                 omap2(self, rhs, |x, y| x ^ y)
228             }
229         }
230         impl AndNot for $vec {
231             type Output = Self;
232             #[inline(always)]
233             fn andnot(self, rhs: Self) -> Self::Output {
234                 omap2(self, rhs, |x, y| !x & y)
235             }
236         }
237         impl BitAndAssign for $vec {
238             #[inline(always)]
239             fn bitand_assign(&mut self, rhs: Self) {
240                 *self = *self & rhs
241             }
242         }
243         impl BitOrAssign for $vec {
244             #[inline(always)]
245             fn bitor_assign(&mut self, rhs: Self) {
246                 *self = *self | rhs
247             }
248         }
249         impl BitXorAssign for $vec {
250             #[inline(always)]
251             fn bitxor_assign(&mut self, rhs: Self) {
252                 *self = *self ^ rhs
253             }
254         }
255 
256         impl Swap64 for $vec {
257             #[inline(always)]
258             fn swap1(self) -> Self {
259                 qmap(self, |x| {
260                     ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
261                 })
262             }
263             #[inline(always)]
264             fn swap2(self) -> Self {
265                 qmap(self, |x| {
266                     ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
267                 })
268             }
269             #[inline(always)]
270             fn swap4(self) -> Self {
271                 qmap(self, |x| {
272                     ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
273                 })
274             }
275             #[inline(always)]
276             fn swap8(self) -> Self {
277                 qmap(self, |x| {
278                     ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
279                 })
280             }
281             #[inline(always)]
282             fn swap16(self) -> Self {
283                 dmap(self, |x| x.rotate_left(16))
284             }
285             #[inline(always)]
286             fn swap32(self) -> Self {
287                 qmap(self, |x| x.rotate_left(32))
288             }
289             #[inline(always)]
290             fn swap64(self) -> Self {
291                 omap(self, |x| (x << 64) | (x >> 64))
292             }
293         }
294     };
295 }
296 impl_bitops!(u32x4_generic);
297 impl_bitops!(u64x2_generic);
298 impl_bitops!(u128x1_generic);
299 
300 impl RotateEachWord32 for u32x4_generic {
301     #[inline(always)]
rotate_each_word_right7(self) -> Self302     fn rotate_each_word_right7(self) -> Self {
303         dmap(self, |x| x.rotate_right(7))
304     }
305     #[inline(always)]
rotate_each_word_right8(self) -> Self306     fn rotate_each_word_right8(self) -> Self {
307         dmap(self, |x| x.rotate_right(8))
308     }
309     #[inline(always)]
rotate_each_word_right11(self) -> Self310     fn rotate_each_word_right11(self) -> Self {
311         dmap(self, |x| x.rotate_right(11))
312     }
313     #[inline(always)]
rotate_each_word_right12(self) -> Self314     fn rotate_each_word_right12(self) -> Self {
315         dmap(self, |x| x.rotate_right(12))
316     }
317     #[inline(always)]
rotate_each_word_right16(self) -> Self318     fn rotate_each_word_right16(self) -> Self {
319         dmap(self, |x| x.rotate_right(16))
320     }
321     #[inline(always)]
rotate_each_word_right20(self) -> Self322     fn rotate_each_word_right20(self) -> Self {
323         dmap(self, |x| x.rotate_right(20))
324     }
325     #[inline(always)]
rotate_each_word_right24(self) -> Self326     fn rotate_each_word_right24(self) -> Self {
327         dmap(self, |x| x.rotate_right(24))
328     }
329     #[inline(always)]
rotate_each_word_right25(self) -> Self330     fn rotate_each_word_right25(self) -> Self {
331         dmap(self, |x| x.rotate_right(25))
332     }
333 }
334 
335 impl RotateEachWord32 for u64x2_generic {
336     #[inline(always)]
rotate_each_word_right7(self) -> Self337     fn rotate_each_word_right7(self) -> Self {
338         qmap(self, |x| x.rotate_right(7))
339     }
340     #[inline(always)]
rotate_each_word_right8(self) -> Self341     fn rotate_each_word_right8(self) -> Self {
342         qmap(self, |x| x.rotate_right(8))
343     }
344     #[inline(always)]
rotate_each_word_right11(self) -> Self345     fn rotate_each_word_right11(self) -> Self {
346         qmap(self, |x| x.rotate_right(11))
347     }
348     #[inline(always)]
rotate_each_word_right12(self) -> Self349     fn rotate_each_word_right12(self) -> Self {
350         qmap(self, |x| x.rotate_right(12))
351     }
352     #[inline(always)]
rotate_each_word_right16(self) -> Self353     fn rotate_each_word_right16(self) -> Self {
354         qmap(self, |x| x.rotate_right(16))
355     }
356     #[inline(always)]
rotate_each_word_right20(self) -> Self357     fn rotate_each_word_right20(self) -> Self {
358         qmap(self, |x| x.rotate_right(20))
359     }
360     #[inline(always)]
rotate_each_word_right24(self) -> Self361     fn rotate_each_word_right24(self) -> Self {
362         qmap(self, |x| x.rotate_right(24))
363     }
364     #[inline(always)]
rotate_each_word_right25(self) -> Self365     fn rotate_each_word_right25(self) -> Self {
366         qmap(self, |x| x.rotate_right(25))
367     }
368 }
369 impl RotateEachWord64 for u64x2_generic {
370     #[inline(always)]
rotate_each_word_right32(self) -> Self371     fn rotate_each_word_right32(self) -> Self {
372         qmap(self, |x| x.rotate_right(32))
373     }
374 }
375 
376 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
377 #[inline(always)]
rotate_u128_right(x: u128, i: u32) -> u128378 fn rotate_u128_right(x: u128, i: u32) -> u128 {
379     (x >> i) | (x << (128 - i))
380 }
381 #[test]
test_rotate_u128()382 fn test_rotate_u128() {
383     const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
384     assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
385 }
386 
387 impl RotateEachWord32 for u128x1_generic {
388     #[inline(always)]
rotate_each_word_right7(self) -> Self389     fn rotate_each_word_right7(self) -> Self {
390         Self([rotate_u128_right(self.0[0], 7)])
391     }
392     #[inline(always)]
rotate_each_word_right8(self) -> Self393     fn rotate_each_word_right8(self) -> Self {
394         Self([rotate_u128_right(self.0[0], 8)])
395     }
396     #[inline(always)]
rotate_each_word_right11(self) -> Self397     fn rotate_each_word_right11(self) -> Self {
398         Self([rotate_u128_right(self.0[0], 11)])
399     }
400     #[inline(always)]
rotate_each_word_right12(self) -> Self401     fn rotate_each_word_right12(self) -> Self {
402         Self([rotate_u128_right(self.0[0], 12)])
403     }
404     #[inline(always)]
rotate_each_word_right16(self) -> Self405     fn rotate_each_word_right16(self) -> Self {
406         Self([rotate_u128_right(self.0[0], 16)])
407     }
408     #[inline(always)]
rotate_each_word_right20(self) -> Self409     fn rotate_each_word_right20(self) -> Self {
410         Self([rotate_u128_right(self.0[0], 20)])
411     }
412     #[inline(always)]
rotate_each_word_right24(self) -> Self413     fn rotate_each_word_right24(self) -> Self {
414         Self([rotate_u128_right(self.0[0], 24)])
415     }
416     #[inline(always)]
rotate_each_word_right25(self) -> Self417     fn rotate_each_word_right25(self) -> Self {
418         Self([rotate_u128_right(self.0[0], 25)])
419     }
420 }
421 impl RotateEachWord64 for u128x1_generic {
422     #[inline(always)]
rotate_each_word_right32(self) -> Self423     fn rotate_each_word_right32(self) -> Self {
424         Self([rotate_u128_right(self.0[0], 32)])
425     }
426 }
427 
428 #[derive(Copy, Clone)]
429 pub struct GenericMachine;
430 impl Machine for GenericMachine {
431     type u32x4 = u32x4_generic;
432     type u64x2 = u64x2_generic;
433     type u128x1 = u128x1_generic;
434     type u32x4x2 = u32x4x2_generic;
435     type u64x2x2 = u64x2x2_generic;
436     type u64x4 = u64x4_generic;
437     type u128x2 = u128x2_generic;
438     type u32x4x4 = u32x4x4_generic;
439     type u64x2x4 = u64x2x4_generic;
440     type u128x4 = u128x4_generic;
441     #[inline(always)]
instance() -> Self442     unsafe fn instance() -> Self {
443         Self
444     }
445 }
446 
447 #[derive(Copy, Clone, Debug, PartialEq)]
448 pub struct u32x4_generic([u32; 4]);
449 #[derive(Copy, Clone, Debug, PartialEq)]
450 pub struct u64x2_generic([u64; 2]);
451 #[derive(Copy, Clone, Debug, PartialEq)]
452 pub struct u128x1_generic([u128; 1]);
453 
454 impl From<u32x4_generic> for vec128_storage {
455     #[inline(always)]
from(d: u32x4_generic) -> Self456     fn from(d: u32x4_generic) -> Self {
457         Self { d: d.0 }
458     }
459 }
460 impl From<u64x2_generic> for vec128_storage {
461     #[inline(always)]
from(q: u64x2_generic) -> Self462     fn from(q: u64x2_generic) -> Self {
463         Self { q: q.0 }
464     }
465 }
466 impl From<u128x1_generic> for vec128_storage {
467     #[inline(always)]
from(o: u128x1_generic) -> Self468     fn from(o: u128x1_generic) -> Self {
469         Self { q: q_of_o(o.0[0]) }
470     }
471 }
472 
473 impl Store<vec128_storage> for u32x4_generic {
474     #[inline(always)]
unpack(s: vec128_storage) -> Self475     unsafe fn unpack(s: vec128_storage) -> Self {
476         Self(s.d)
477     }
478 }
479 impl Store<vec128_storage> for u64x2_generic {
480     #[inline(always)]
unpack(s: vec128_storage) -> Self481     unsafe fn unpack(s: vec128_storage) -> Self {
482         Self(s.q)
483     }
484 }
485 impl Store<vec128_storage> for u128x1_generic {
486     #[inline(always)]
unpack(s: vec128_storage) -> Self487     unsafe fn unpack(s: vec128_storage) -> Self {
488         Self([o_of_q(s.q); 1])
489     }
490 }
491 
492 impl ArithOps for u32x4_generic {}
493 impl ArithOps for u64x2_generic {}
494 impl ArithOps for u128x1_generic {}
495 
496 impl Add for u32x4_generic {
497     type Output = Self;
498     #[inline(always)]
add(self, rhs: Self) -> Self::Output499     fn add(self, rhs: Self) -> Self::Output {
500         dmap2(self, rhs, |x, y| x.wrapping_add(y))
501     }
502 }
503 impl Add for u64x2_generic {
504     type Output = Self;
505     #[inline(always)]
add(self, rhs: Self) -> Self::Output506     fn add(self, rhs: Self) -> Self::Output {
507         qmap2(self, rhs, |x, y| x.wrapping_add(y))
508     }
509 }
510 impl Add for u128x1_generic {
511     type Output = Self;
512     #[inline(always)]
add(self, rhs: Self) -> Self::Output513     fn add(self, rhs: Self) -> Self::Output {
514         omap2(self, rhs, |x, y| x.wrapping_add(y))
515     }
516 }
517 impl AddAssign for u32x4_generic {
518     #[inline(always)]
add_assign(&mut self, rhs: Self)519     fn add_assign(&mut self, rhs: Self) {
520         *self = *self + rhs
521     }
522 }
523 impl AddAssign for u64x2_generic {
524     #[inline(always)]
add_assign(&mut self, rhs: Self)525     fn add_assign(&mut self, rhs: Self) {
526         *self = *self + rhs
527     }
528 }
529 impl AddAssign for u128x1_generic {
530     #[inline(always)]
add_assign(&mut self, rhs: Self)531     fn add_assign(&mut self, rhs: Self) {
532         *self = *self + rhs
533     }
534 }
535 impl BSwap for u32x4_generic {
536     #[inline(always)]
bswap(self) -> Self537     fn bswap(self) -> Self {
538         dmap(self, |x| x.swap_bytes())
539     }
540 }
541 impl BSwap for u64x2_generic {
542     #[inline(always)]
bswap(self) -> Self543     fn bswap(self) -> Self {
544         qmap(self, |x| x.swap_bytes())
545     }
546 }
547 impl BSwap for u128x1_generic {
548     #[inline(always)]
bswap(self) -> Self549     fn bswap(self) -> Self {
550         omap(self, |x| x.swap_bytes())
551     }
552 }
553 impl StoreBytes for u32x4_generic {
554     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self555     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
556         assert_eq!(input.len(), 16);
557         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
558         dmap(x, |x| x.to_le())
559     }
560     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self561     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
562         assert_eq!(input.len(), 16);
563         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
564         dmap(x, |x| x.to_be())
565     }
566     #[inline(always)]
write_le(self, out: &mut [u8])567     fn write_le(self, out: &mut [u8]) {
568         assert_eq!(out.len(), 16);
569         let x = dmap(self, |x| x.to_le());
570         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
571     }
572     #[inline(always)]
write_be(self, out: &mut [u8])573     fn write_be(self, out: &mut [u8]) {
574         assert_eq!(out.len(), 16);
575         let x = dmap(self, |x| x.to_be());
576         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
577     }
578 }
579 impl StoreBytes for u64x2_generic {
580     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self581     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
582         assert_eq!(input.len(), 16);
583         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
584         qmap(x, |x| x.to_le())
585     }
586     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self587     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
588         assert_eq!(input.len(), 16);
589         let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
590         qmap(x, |x| x.to_be())
591     }
592     #[inline(always)]
write_le(self, out: &mut [u8])593     fn write_le(self, out: &mut [u8]) {
594         assert_eq!(out.len(), 16);
595         let x = qmap(self, |x| x.to_le());
596         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
597     }
598     #[inline(always)]
write_be(self, out: &mut [u8])599     fn write_be(self, out: &mut [u8]) {
600         assert_eq!(out.len(), 16);
601         let x = qmap(self, |x| x.to_be());
602         unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
603     }
604 }
605 
606 #[derive(Copy, Clone)]
607 pub struct G0;
608 #[derive(Copy, Clone)]
609 pub struct G1;
610 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
611 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
612 pub type u64x4_generic = x2<u64x2_generic, G1>;
613 pub type u128x2_generic = x2<u128x1_generic, G0>;
614 pub type u32x4x4_generic = x4<u32x4_generic>;
615 pub type u64x2x4_generic = x4<u64x2_generic>;
616 pub type u128x4_generic = x4<u128x1_generic>;
617 
618 impl Vector<[u32; 16]> for u32x4x4_generic {
to_scalars(self) -> [u32; 16]619     fn to_scalars(self) -> [u32; 16] {
620         let [a, b, c, d] = self.0;
621         let a = a.0;
622         let b = b.0;
623         let c = c.0;
624         let d = d.0;
625         [
626             a[0], a[1], a[2], a[3],
627             b[0], b[1], b[2], b[3],
628             c[0], c[1], c[2], c[3],
629             d[0], d[1], d[2], d[3],
630         ]
631     }
632 }
633 
634 impl MultiLane<[u32; 4]> for u32x4_generic {
635     #[inline(always)]
to_lanes(self) -> [u32; 4]636     fn to_lanes(self) -> [u32; 4] {
637         self.0
638     }
639     #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self640     fn from_lanes(xs: [u32; 4]) -> Self {
641         Self(xs)
642     }
643 }
644 impl MultiLane<[u64; 2]> for u64x2_generic {
645     #[inline(always)]
to_lanes(self) -> [u64; 2]646     fn to_lanes(self) -> [u64; 2] {
647         self.0
648     }
649     #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self650     fn from_lanes(xs: [u64; 2]) -> Self {
651         Self(xs)
652     }
653 }
654 impl MultiLane<[u64; 4]> for u64x4_generic {
655     #[inline(always)]
to_lanes(self) -> [u64; 4]656     fn to_lanes(self) -> [u64; 4] {
657         let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
658         [a[0], a[1], b[0], b[1]]
659     }
660     #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self661     fn from_lanes(xs: [u64; 4]) -> Self {
662         let (a, b) = (
663             u64x2_generic::from_lanes([xs[0], xs[1]]),
664             u64x2_generic::from_lanes([xs[2], xs[3]]),
665         );
666         x2::new([a, b])
667     }
668 }
669 impl MultiLane<[u128; 1]> for u128x1_generic {
670     #[inline(always)]
to_lanes(self) -> [u128; 1]671     fn to_lanes(self) -> [u128; 1] {
672         self.0
673     }
674     #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self675     fn from_lanes(xs: [u128; 1]) -> Self {
676         Self(xs)
677     }
678 }
679 impl Vec4<u32> for u32x4_generic {
680     #[inline(always)]
extract(self, i: u32) -> u32681     fn extract(self, i: u32) -> u32 {
682         self.0[i as usize]
683     }
684     #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self685     fn insert(mut self, v: u32, i: u32) -> Self {
686         self.0[i as usize] = v;
687         self
688     }
689 }
690 impl Vec4<u64> for u64x4_generic {
691     #[inline(always)]
extract(self, i: u32) -> u64692     fn extract(self, i: u32) -> u64 {
693         let d: [u64; 4] = self.to_lanes();
694         d[i as usize]
695     }
696     #[inline(always)]
insert(self, v: u64, i: u32) -> Self697     fn insert(self, v: u64, i: u32) -> Self {
698         self.0[(i / 2) as usize].insert(v, i % 2);
699         self
700     }
701 }
702 impl Vec2<u64> for u64x2_generic {
703     #[inline(always)]
extract(self, i: u32) -> u64704     fn extract(self, i: u32) -> u64 {
705         self.0[i as usize]
706     }
707     #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self708     fn insert(mut self, v: u64, i: u32) -> Self {
709         self.0[i as usize] = v;
710         self
711     }
712 }
713 
714 impl Words4 for u32x4_generic {
715     #[inline(always)]
shuffle2301(self) -> Self716     fn shuffle2301(self) -> Self {
717         self.swap64()
718     }
719     #[inline(always)]
shuffle1230(self) -> Self720     fn shuffle1230(self) -> Self {
721         let x = self.0;
722         Self([x[3], x[0], x[1], x[2]])
723     }
724     #[inline(always)]
shuffle3012(self) -> Self725     fn shuffle3012(self) -> Self {
726         let x = self.0;
727         Self([x[1], x[2], x[3], x[0]])
728     }
729 }
730 impl LaneWords4 for u32x4_generic {
731     #[inline(always)]
shuffle_lane_words2301(self) -> Self732     fn shuffle_lane_words2301(self) -> Self {
733         self.shuffle2301()
734     }
735     #[inline(always)]
shuffle_lane_words1230(self) -> Self736     fn shuffle_lane_words1230(self) -> Self {
737         self.shuffle1230()
738     }
739     #[inline(always)]
shuffle_lane_words3012(self) -> Self740     fn shuffle_lane_words3012(self) -> Self {
741         self.shuffle3012()
742     }
743 }
744 
745 impl Words4 for u64x4_generic {
746     #[inline(always)]
shuffle2301(self) -> Self747     fn shuffle2301(self) -> Self {
748         x2::new([self.0[1], self.0[0]])
749     }
750     #[inline(always)]
shuffle1230(self) -> Self751     fn shuffle1230(self) -> Self {
752         unimplemented!()
753     }
754     #[inline(always)]
shuffle3012(self) -> Self755     fn shuffle3012(self) -> Self {
756         unimplemented!()
757     }
758 }
759 
760 impl u32x4<GenericMachine> for u32x4_generic {}
761 impl u64x2<GenericMachine> for u64x2_generic {}
762 impl u128x1<GenericMachine> for u128x1_generic {}
763 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
764 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
765 impl u64x4<GenericMachine> for u64x4_generic {}
766 impl u128x2<GenericMachine> for u128x2_generic {}
767 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
768 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
769 impl u128x4<GenericMachine> for u128x4_generic {}
770 
771 #[macro_export]
772 macro_rules! dispatch {
773     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
774         #[inline(always)]
775         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
776             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
777             #[inline(always)]
778             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
779             fn_impl($mach, $($arg),*)
780         }
781     };
782     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
783         dispatch!($mach, $MTy, {
784             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
785         });
786     }
787 }
788 #[macro_export]
789 macro_rules! dispatch_light128 {
790     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
791         #[inline(always)]
792         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
793             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
794             #[inline(always)]
795             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
796             fn_impl($mach, $($arg),*)
797         }
798     };
799     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
800         dispatch!($mach, $MTy, {
801             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
802         });
803     }
804 }
805 #[macro_export]
806 macro_rules! dispatch_light256 {
807     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
808         #[inline(always)]
809         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
810             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
811             #[inline(always)]
812             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
813             fn_impl($mach, $($arg),*)
814         }
815     };
816     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
817         dispatch!($mach, $MTy, {
818             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
819         });
820     }
821 }
822 #[macro_export]
823 macro_rules! dispatch_light512 {
824     ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
825         #[inline(always)]
826         $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
827             let $mach = unsafe { $crate::generic::GenericMachine::instance() };
828             #[inline(always)]
829             fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
830             fn_impl($mach, $($arg),*)
831         }
832     };
833     ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
834         dispatch!($mach, $MTy, {
835             $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
836         });
837     }
838 }
839 
840 #[cfg(test)]
841 mod test {
842     use super::*;
843 
844     #[test]
test_bswap32()845     fn test_bswap32() {
846         let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
847         let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
848 
849         let m = unsafe { GenericMachine::instance() };
850 
851         let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
852         let x = x.bswap();
853 
854         let y = m.vec(ys);
855         assert_eq!(x, y);
856     }
857 }
858