1 #![allow(non_camel_case_types)]
2
3 use crate::soft::{x2, x4};
4 use crate::types::*;
5 use core::ops::*;
6
7 #[repr(C)]
8 #[derive(Clone, Copy)]
9 pub union vec128_storage {
10 d: [u32; 4],
11 q: [u64; 2],
12 }
13 impl From<[u32; 4]> for vec128_storage {
14 #[inline(always)]
from(d: [u32; 4]) -> Self15 fn from(d: [u32; 4]) -> Self {
16 Self { d }
17 }
18 }
19 impl From<vec128_storage> for [u32; 4] {
20 #[inline(always)]
from(d: vec128_storage) -> Self21 fn from(d: vec128_storage) -> Self {
22 unsafe { d.d }
23 }
24 }
25 impl From<[u64; 2]> for vec128_storage {
26 #[inline(always)]
from(q: [u64; 2]) -> Self27 fn from(q: [u64; 2]) -> Self {
28 Self { q }
29 }
30 }
31 impl From<vec128_storage> for [u64; 2] {
32 #[inline(always)]
from(q: vec128_storage) -> Self33 fn from(q: vec128_storage) -> Self {
34 unsafe { q.q }
35 }
36 }
37 impl Default for vec128_storage {
38 #[inline(always)]
default() -> Self39 fn default() -> Self {
40 Self { q: [0, 0] }
41 }
42 }
43 impl Eq for vec128_storage {}
44 impl PartialEq<vec128_storage> for vec128_storage {
45 #[inline(always)]
eq(&self, rhs: &Self) -> bool46 fn eq(&self, rhs: &Self) -> bool {
47 unsafe { self.q == rhs.q }
48 }
49 }
50 #[derive(Clone, Copy, PartialEq, Eq, Default)]
51 pub struct vec256_storage {
52 v128: [vec128_storage; 2],
53 }
54 impl vec256_storage {
55 #[inline(always)]
new128(v128: [vec128_storage; 2]) -> Self56 pub fn new128(v128: [vec128_storage; 2]) -> Self {
57 Self { v128 }
58 }
59 #[inline(always)]
split128(self) -> [vec128_storage; 2]60 pub fn split128(self) -> [vec128_storage; 2] {
61 self.v128
62 }
63 }
64 impl From<vec256_storage> for [u64; 4] {
65 #[inline(always)]
from(q: vec256_storage) -> Self66 fn from(q: vec256_storage) -> Self {
67 let [a, b]: [u64; 2] = q.v128[0].into();
68 let [c, d]: [u64; 2] = q.v128[1].into();
69 [a, b, c, d]
70 }
71 }
72 #[derive(Clone, Copy, PartialEq, Eq, Default)]
73 pub struct vec512_storage {
74 v128: [vec128_storage; 4],
75 }
76 impl vec512_storage {
77 #[inline(always)]
new128(v128: [vec128_storage; 4]) -> Self78 pub fn new128(v128: [vec128_storage; 4]) -> Self {
79 Self { v128 }
80 }
81 #[inline(always)]
split128(self) -> [vec128_storage; 4]82 pub fn split128(self) -> [vec128_storage; 4] {
83 self.v128
84 }
85 }
86
87 #[inline(always)]
dmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32) -> u32,88 fn dmap<T, F>(t: T, f: F) -> T
89 where
90 T: Store<vec128_storage> + Into<vec128_storage>,
91 F: Fn(u32) -> u32,
92 {
93 let t: vec128_storage = t.into();
94 let d = unsafe { t.d };
95 let d = vec128_storage {
96 d: [f(d[0]), f(d[1]), f(d[2]), f(d[3])],
97 };
98 unsafe { T::unpack(d) }
99 }
100
dmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u32, u32) -> u32,101 fn dmap2<T, F>(a: T, b: T, f: F) -> T
102 where
103 T: Store<vec128_storage> + Into<vec128_storage>,
104 F: Fn(u32, u32) -> u32,
105 {
106 let a: vec128_storage = a.into();
107 let b: vec128_storage = b.into();
108 let ao = unsafe { a.d };
109 let bo = unsafe { b.d };
110 let d = vec128_storage {
111 d: [
112 f(ao[0], bo[0]),
113 f(ao[1], bo[1]),
114 f(ao[2], bo[2]),
115 f(ao[3], bo[3]),
116 ],
117 };
118 unsafe { T::unpack(d) }
119 }
120
121 #[inline(always)]
qmap<T, F>(t: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64) -> u64,122 fn qmap<T, F>(t: T, f: F) -> T
123 where
124 T: Store<vec128_storage> + Into<vec128_storage>,
125 F: Fn(u64) -> u64,
126 {
127 let t: vec128_storage = t.into();
128 let q = unsafe { t.q };
129 let q = vec128_storage {
130 q: [f(q[0]), f(q[1])],
131 };
132 unsafe { T::unpack(q) }
133 }
134
135 #[inline(always)]
qmap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u64, u64) -> u64,136 fn qmap2<T, F>(a: T, b: T, f: F) -> T
137 where
138 T: Store<vec128_storage> + Into<vec128_storage>,
139 F: Fn(u64, u64) -> u64,
140 {
141 let a: vec128_storage = a.into();
142 let b: vec128_storage = b.into();
143 let ao = unsafe { a.q };
144 let bo = unsafe { b.q };
145 let q = vec128_storage {
146 q: [f(ao[0], bo[0]), f(ao[1], bo[1])],
147 };
148 unsafe { T::unpack(q) }
149 }
150
151 #[inline(always)]
o_of_q(q: [u64; 2]) -> u128152 fn o_of_q(q: [u64; 2]) -> u128 {
153 u128::from(q[0]) | (u128::from(q[1]) << 64)
154 }
155
156 #[inline(always)]
q_of_o(o: u128) -> [u64; 2]157 fn q_of_o(o: u128) -> [u64; 2] {
158 [o as u64, (o >> 64) as u64]
159 }
160
161 #[inline(always)]
omap<T, F>(a: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128) -> u128,162 fn omap<T, F>(a: T, f: F) -> T
163 where
164 T: Store<vec128_storage> + Into<vec128_storage>,
165 F: Fn(u128) -> u128,
166 {
167 let a: vec128_storage = a.into();
168 let ao = o_of_q(unsafe { a.q });
169 let o = vec128_storage { q: q_of_o(f(ao)) };
170 unsafe { T::unpack(o) }
171 }
172
173 #[inline(always)]
omap2<T, F>(a: T, b: T, f: F) -> T where T: Store<vec128_storage> + Into<vec128_storage>, F: Fn(u128, u128) -> u128,174 fn omap2<T, F>(a: T, b: T, f: F) -> T
175 where
176 T: Store<vec128_storage> + Into<vec128_storage>,
177 F: Fn(u128, u128) -> u128,
178 {
179 let a: vec128_storage = a.into();
180 let b: vec128_storage = b.into();
181 let ao = o_of_q(unsafe { a.q });
182 let bo = o_of_q(unsafe { b.q });
183 let o = vec128_storage {
184 q: q_of_o(f(ao, bo)),
185 };
186 unsafe { T::unpack(o) }
187 }
188
189 impl RotateEachWord128 for u128x1_generic {}
190 impl BitOps128 for u128x1_generic {}
191 impl BitOps64 for u128x1_generic {}
192 impl BitOps64 for u64x2_generic {}
193 impl BitOps32 for u128x1_generic {}
194 impl BitOps32 for u64x2_generic {}
195 impl BitOps32 for u32x4_generic {}
196 impl BitOps0 for u128x1_generic {}
197 impl BitOps0 for u64x2_generic {}
198 impl BitOps0 for u32x4_generic {}
199
200 macro_rules! impl_bitops {
201 ($vec:ident) => {
202 impl Not for $vec {
203 type Output = Self;
204 #[inline(always)]
205 fn not(self) -> Self::Output {
206 omap(self, |x| !x)
207 }
208 }
209 impl BitAnd for $vec {
210 type Output = Self;
211 #[inline(always)]
212 fn bitand(self, rhs: Self) -> Self::Output {
213 omap2(self, rhs, |x, y| x & y)
214 }
215 }
216 impl BitOr for $vec {
217 type Output = Self;
218 #[inline(always)]
219 fn bitor(self, rhs: Self) -> Self::Output {
220 omap2(self, rhs, |x, y| x | y)
221 }
222 }
223 impl BitXor for $vec {
224 type Output = Self;
225 #[inline(always)]
226 fn bitxor(self, rhs: Self) -> Self::Output {
227 omap2(self, rhs, |x, y| x ^ y)
228 }
229 }
230 impl AndNot for $vec {
231 type Output = Self;
232 #[inline(always)]
233 fn andnot(self, rhs: Self) -> Self::Output {
234 omap2(self, rhs, |x, y| !x & y)
235 }
236 }
237 impl BitAndAssign for $vec {
238 #[inline(always)]
239 fn bitand_assign(&mut self, rhs: Self) {
240 *self = *self & rhs
241 }
242 }
243 impl BitOrAssign for $vec {
244 #[inline(always)]
245 fn bitor_assign(&mut self, rhs: Self) {
246 *self = *self | rhs
247 }
248 }
249 impl BitXorAssign for $vec {
250 #[inline(always)]
251 fn bitxor_assign(&mut self, rhs: Self) {
252 *self = *self ^ rhs
253 }
254 }
255
256 impl Swap64 for $vec {
257 #[inline(always)]
258 fn swap1(self) -> Self {
259 qmap(self, |x| {
260 ((x & 0x5555555555555555) << 1) | ((x & 0xaaaaaaaaaaaaaaaa) >> 1)
261 })
262 }
263 #[inline(always)]
264 fn swap2(self) -> Self {
265 qmap(self, |x| {
266 ((x & 0x3333333333333333) << 2) | ((x & 0xcccccccccccccccc) >> 2)
267 })
268 }
269 #[inline(always)]
270 fn swap4(self) -> Self {
271 qmap(self, |x| {
272 ((x & 0x0f0f0f0f0f0f0f0f) << 4) | ((x & 0xf0f0f0f0f0f0f0f0) >> 4)
273 })
274 }
275 #[inline(always)]
276 fn swap8(self) -> Self {
277 qmap(self, |x| {
278 ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8)
279 })
280 }
281 #[inline(always)]
282 fn swap16(self) -> Self {
283 dmap(self, |x| x.rotate_left(16))
284 }
285 #[inline(always)]
286 fn swap32(self) -> Self {
287 qmap(self, |x| x.rotate_left(32))
288 }
289 #[inline(always)]
290 fn swap64(self) -> Self {
291 omap(self, |x| (x << 64) | (x >> 64))
292 }
293 }
294 };
295 }
296 impl_bitops!(u32x4_generic);
297 impl_bitops!(u64x2_generic);
298 impl_bitops!(u128x1_generic);
299
300 impl RotateEachWord32 for u32x4_generic {
301 #[inline(always)]
rotate_each_word_right7(self) -> Self302 fn rotate_each_word_right7(self) -> Self {
303 dmap(self, |x| x.rotate_right(7))
304 }
305 #[inline(always)]
rotate_each_word_right8(self) -> Self306 fn rotate_each_word_right8(self) -> Self {
307 dmap(self, |x| x.rotate_right(8))
308 }
309 #[inline(always)]
rotate_each_word_right11(self) -> Self310 fn rotate_each_word_right11(self) -> Self {
311 dmap(self, |x| x.rotate_right(11))
312 }
313 #[inline(always)]
rotate_each_word_right12(self) -> Self314 fn rotate_each_word_right12(self) -> Self {
315 dmap(self, |x| x.rotate_right(12))
316 }
317 #[inline(always)]
rotate_each_word_right16(self) -> Self318 fn rotate_each_word_right16(self) -> Self {
319 dmap(self, |x| x.rotate_right(16))
320 }
321 #[inline(always)]
rotate_each_word_right20(self) -> Self322 fn rotate_each_word_right20(self) -> Self {
323 dmap(self, |x| x.rotate_right(20))
324 }
325 #[inline(always)]
rotate_each_word_right24(self) -> Self326 fn rotate_each_word_right24(self) -> Self {
327 dmap(self, |x| x.rotate_right(24))
328 }
329 #[inline(always)]
rotate_each_word_right25(self) -> Self330 fn rotate_each_word_right25(self) -> Self {
331 dmap(self, |x| x.rotate_right(25))
332 }
333 }
334
335 impl RotateEachWord32 for u64x2_generic {
336 #[inline(always)]
rotate_each_word_right7(self) -> Self337 fn rotate_each_word_right7(self) -> Self {
338 qmap(self, |x| x.rotate_right(7))
339 }
340 #[inline(always)]
rotate_each_word_right8(self) -> Self341 fn rotate_each_word_right8(self) -> Self {
342 qmap(self, |x| x.rotate_right(8))
343 }
344 #[inline(always)]
rotate_each_word_right11(self) -> Self345 fn rotate_each_word_right11(self) -> Self {
346 qmap(self, |x| x.rotate_right(11))
347 }
348 #[inline(always)]
rotate_each_word_right12(self) -> Self349 fn rotate_each_word_right12(self) -> Self {
350 qmap(self, |x| x.rotate_right(12))
351 }
352 #[inline(always)]
rotate_each_word_right16(self) -> Self353 fn rotate_each_word_right16(self) -> Self {
354 qmap(self, |x| x.rotate_right(16))
355 }
356 #[inline(always)]
rotate_each_word_right20(self) -> Self357 fn rotate_each_word_right20(self) -> Self {
358 qmap(self, |x| x.rotate_right(20))
359 }
360 #[inline(always)]
rotate_each_word_right24(self) -> Self361 fn rotate_each_word_right24(self) -> Self {
362 qmap(self, |x| x.rotate_right(24))
363 }
364 #[inline(always)]
rotate_each_word_right25(self) -> Self365 fn rotate_each_word_right25(self) -> Self {
366 qmap(self, |x| x.rotate_right(25))
367 }
368 }
369 impl RotateEachWord64 for u64x2_generic {
370 #[inline(always)]
rotate_each_word_right32(self) -> Self371 fn rotate_each_word_right32(self) -> Self {
372 qmap(self, |x| x.rotate_right(32))
373 }
374 }
375
376 // workaround for koute/cargo-web#52 (u128::rotate_* broken with cargo web)
377 #[inline(always)]
rotate_u128_right(x: u128, i: u32) -> u128378 fn rotate_u128_right(x: u128, i: u32) -> u128 {
379 (x >> i) | (x << (128 - i))
380 }
381 #[test]
test_rotate_u128()382 fn test_rotate_u128() {
383 const X: u128 = 0x0001_0203_0405_0607_0809_0a0b_0c0d_0e0f;
384 assert_eq!(rotate_u128_right(X, 17), X.rotate_right(17));
385 }
386
387 impl RotateEachWord32 for u128x1_generic {
388 #[inline(always)]
rotate_each_word_right7(self) -> Self389 fn rotate_each_word_right7(self) -> Self {
390 Self([rotate_u128_right(self.0[0], 7)])
391 }
392 #[inline(always)]
rotate_each_word_right8(self) -> Self393 fn rotate_each_word_right8(self) -> Self {
394 Self([rotate_u128_right(self.0[0], 8)])
395 }
396 #[inline(always)]
rotate_each_word_right11(self) -> Self397 fn rotate_each_word_right11(self) -> Self {
398 Self([rotate_u128_right(self.0[0], 11)])
399 }
400 #[inline(always)]
rotate_each_word_right12(self) -> Self401 fn rotate_each_word_right12(self) -> Self {
402 Self([rotate_u128_right(self.0[0], 12)])
403 }
404 #[inline(always)]
rotate_each_word_right16(self) -> Self405 fn rotate_each_word_right16(self) -> Self {
406 Self([rotate_u128_right(self.0[0], 16)])
407 }
408 #[inline(always)]
rotate_each_word_right20(self) -> Self409 fn rotate_each_word_right20(self) -> Self {
410 Self([rotate_u128_right(self.0[0], 20)])
411 }
412 #[inline(always)]
rotate_each_word_right24(self) -> Self413 fn rotate_each_word_right24(self) -> Self {
414 Self([rotate_u128_right(self.0[0], 24)])
415 }
416 #[inline(always)]
rotate_each_word_right25(self) -> Self417 fn rotate_each_word_right25(self) -> Self {
418 Self([rotate_u128_right(self.0[0], 25)])
419 }
420 }
421 impl RotateEachWord64 for u128x1_generic {
422 #[inline(always)]
rotate_each_word_right32(self) -> Self423 fn rotate_each_word_right32(self) -> Self {
424 Self([rotate_u128_right(self.0[0], 32)])
425 }
426 }
427
428 #[derive(Copy, Clone)]
429 pub struct GenericMachine;
430 impl Machine for GenericMachine {
431 type u32x4 = u32x4_generic;
432 type u64x2 = u64x2_generic;
433 type u128x1 = u128x1_generic;
434 type u32x4x2 = u32x4x2_generic;
435 type u64x2x2 = u64x2x2_generic;
436 type u64x4 = u64x4_generic;
437 type u128x2 = u128x2_generic;
438 type u32x4x4 = u32x4x4_generic;
439 type u64x2x4 = u64x2x4_generic;
440 type u128x4 = u128x4_generic;
441 #[inline(always)]
instance() -> Self442 unsafe fn instance() -> Self {
443 Self
444 }
445 }
446
447 #[derive(Copy, Clone, Debug, PartialEq)]
448 pub struct u32x4_generic([u32; 4]);
449 #[derive(Copy, Clone, Debug, PartialEq)]
450 pub struct u64x2_generic([u64; 2]);
451 #[derive(Copy, Clone, Debug, PartialEq)]
452 pub struct u128x1_generic([u128; 1]);
453
454 impl From<u32x4_generic> for vec128_storage {
455 #[inline(always)]
from(d: u32x4_generic) -> Self456 fn from(d: u32x4_generic) -> Self {
457 Self { d: d.0 }
458 }
459 }
460 impl From<u64x2_generic> for vec128_storage {
461 #[inline(always)]
from(q: u64x2_generic) -> Self462 fn from(q: u64x2_generic) -> Self {
463 Self { q: q.0 }
464 }
465 }
466 impl From<u128x1_generic> for vec128_storage {
467 #[inline(always)]
from(o: u128x1_generic) -> Self468 fn from(o: u128x1_generic) -> Self {
469 Self { q: q_of_o(o.0[0]) }
470 }
471 }
472
473 impl Store<vec128_storage> for u32x4_generic {
474 #[inline(always)]
unpack(s: vec128_storage) -> Self475 unsafe fn unpack(s: vec128_storage) -> Self {
476 Self(s.d)
477 }
478 }
479 impl Store<vec128_storage> for u64x2_generic {
480 #[inline(always)]
unpack(s: vec128_storage) -> Self481 unsafe fn unpack(s: vec128_storage) -> Self {
482 Self(s.q)
483 }
484 }
485 impl Store<vec128_storage> for u128x1_generic {
486 #[inline(always)]
unpack(s: vec128_storage) -> Self487 unsafe fn unpack(s: vec128_storage) -> Self {
488 Self([o_of_q(s.q); 1])
489 }
490 }
491
492 impl ArithOps for u32x4_generic {}
493 impl ArithOps for u64x2_generic {}
494 impl ArithOps for u128x1_generic {}
495
496 impl Add for u32x4_generic {
497 type Output = Self;
498 #[inline(always)]
add(self, rhs: Self) -> Self::Output499 fn add(self, rhs: Self) -> Self::Output {
500 dmap2(self, rhs, |x, y| x.wrapping_add(y))
501 }
502 }
503 impl Add for u64x2_generic {
504 type Output = Self;
505 #[inline(always)]
add(self, rhs: Self) -> Self::Output506 fn add(self, rhs: Self) -> Self::Output {
507 qmap2(self, rhs, |x, y| x.wrapping_add(y))
508 }
509 }
510 impl Add for u128x1_generic {
511 type Output = Self;
512 #[inline(always)]
add(self, rhs: Self) -> Self::Output513 fn add(self, rhs: Self) -> Self::Output {
514 omap2(self, rhs, |x, y| x.wrapping_add(y))
515 }
516 }
517 impl AddAssign for u32x4_generic {
518 #[inline(always)]
add_assign(&mut self, rhs: Self)519 fn add_assign(&mut self, rhs: Self) {
520 *self = *self + rhs
521 }
522 }
523 impl AddAssign for u64x2_generic {
524 #[inline(always)]
add_assign(&mut self, rhs: Self)525 fn add_assign(&mut self, rhs: Self) {
526 *self = *self + rhs
527 }
528 }
529 impl AddAssign for u128x1_generic {
530 #[inline(always)]
add_assign(&mut self, rhs: Self)531 fn add_assign(&mut self, rhs: Self) {
532 *self = *self + rhs
533 }
534 }
535 impl BSwap for u32x4_generic {
536 #[inline(always)]
bswap(self) -> Self537 fn bswap(self) -> Self {
538 dmap(self, |x| x.swap_bytes())
539 }
540 }
541 impl BSwap for u64x2_generic {
542 #[inline(always)]
bswap(self) -> Self543 fn bswap(self) -> Self {
544 qmap(self, |x| x.swap_bytes())
545 }
546 }
547 impl BSwap for u128x1_generic {
548 #[inline(always)]
bswap(self) -> Self549 fn bswap(self) -> Self {
550 omap(self, |x| x.swap_bytes())
551 }
552 }
553 impl StoreBytes for u32x4_generic {
554 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self555 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
556 assert_eq!(input.len(), 16);
557 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
558 dmap(x, |x| x.to_le())
559 }
560 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self561 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
562 assert_eq!(input.len(), 16);
563 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
564 dmap(x, |x| x.to_be())
565 }
566 #[inline(always)]
write_le(self, out: &mut [u8])567 fn write_le(self, out: &mut [u8]) {
568 assert_eq!(out.len(), 16);
569 let x = dmap(self, |x| x.to_le());
570 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
571 }
572 #[inline(always)]
write_be(self, out: &mut [u8])573 fn write_be(self, out: &mut [u8]) {
574 assert_eq!(out.len(), 16);
575 let x = dmap(self, |x| x.to_be());
576 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
577 }
578 }
579 impl StoreBytes for u64x2_generic {
580 #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self581 unsafe fn unsafe_read_le(input: &[u8]) -> Self {
582 assert_eq!(input.len(), 16);
583 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
584 qmap(x, |x| x.to_le())
585 }
586 #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self587 unsafe fn unsafe_read_be(input: &[u8]) -> Self {
588 assert_eq!(input.len(), 16);
589 let x = core::mem::transmute(core::ptr::read(input as *const _ as *const [u8; 16]));
590 qmap(x, |x| x.to_be())
591 }
592 #[inline(always)]
write_le(self, out: &mut [u8])593 fn write_le(self, out: &mut [u8]) {
594 assert_eq!(out.len(), 16);
595 let x = qmap(self, |x| x.to_le());
596 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
597 }
598 #[inline(always)]
write_be(self, out: &mut [u8])599 fn write_be(self, out: &mut [u8]) {
600 assert_eq!(out.len(), 16);
601 let x = qmap(self, |x| x.to_be());
602 unsafe { core::ptr::write(out as *mut _ as *mut [u8; 16], core::mem::transmute(x)) }
603 }
604 }
605
606 #[derive(Copy, Clone)]
607 pub struct G0;
608 #[derive(Copy, Clone)]
609 pub struct G1;
610 pub type u32x4x2_generic = x2<u32x4_generic, G0>;
611 pub type u64x2x2_generic = x2<u64x2_generic, G0>;
612 pub type u64x4_generic = x2<u64x2_generic, G1>;
613 pub type u128x2_generic = x2<u128x1_generic, G0>;
614 pub type u32x4x4_generic = x4<u32x4_generic>;
615 pub type u64x2x4_generic = x4<u64x2_generic>;
616 pub type u128x4_generic = x4<u128x1_generic>;
617
618 impl Vector<[u32; 16]> for u32x4x4_generic {
to_scalars(self) -> [u32; 16]619 fn to_scalars(self) -> [u32; 16] {
620 let [a, b, c, d] = self.0;
621 let a = a.0;
622 let b = b.0;
623 let c = c.0;
624 let d = d.0;
625 [
626 a[0], a[1], a[2], a[3],
627 b[0], b[1], b[2], b[3],
628 c[0], c[1], c[2], c[3],
629 d[0], d[1], d[2], d[3],
630 ]
631 }
632 }
633
634 impl MultiLane<[u32; 4]> for u32x4_generic {
635 #[inline(always)]
to_lanes(self) -> [u32; 4]636 fn to_lanes(self) -> [u32; 4] {
637 self.0
638 }
639 #[inline(always)]
from_lanes(xs: [u32; 4]) -> Self640 fn from_lanes(xs: [u32; 4]) -> Self {
641 Self(xs)
642 }
643 }
644 impl MultiLane<[u64; 2]> for u64x2_generic {
645 #[inline(always)]
to_lanes(self) -> [u64; 2]646 fn to_lanes(self) -> [u64; 2] {
647 self.0
648 }
649 #[inline(always)]
from_lanes(xs: [u64; 2]) -> Self650 fn from_lanes(xs: [u64; 2]) -> Self {
651 Self(xs)
652 }
653 }
654 impl MultiLane<[u64; 4]> for u64x4_generic {
655 #[inline(always)]
to_lanes(self) -> [u64; 4]656 fn to_lanes(self) -> [u64; 4] {
657 let (a, b) = (self.0[0].to_lanes(), self.0[1].to_lanes());
658 [a[0], a[1], b[0], b[1]]
659 }
660 #[inline(always)]
from_lanes(xs: [u64; 4]) -> Self661 fn from_lanes(xs: [u64; 4]) -> Self {
662 let (a, b) = (
663 u64x2_generic::from_lanes([xs[0], xs[1]]),
664 u64x2_generic::from_lanes([xs[2], xs[3]]),
665 );
666 x2::new([a, b])
667 }
668 }
669 impl MultiLane<[u128; 1]> for u128x1_generic {
670 #[inline(always)]
to_lanes(self) -> [u128; 1]671 fn to_lanes(self) -> [u128; 1] {
672 self.0
673 }
674 #[inline(always)]
from_lanes(xs: [u128; 1]) -> Self675 fn from_lanes(xs: [u128; 1]) -> Self {
676 Self(xs)
677 }
678 }
679 impl Vec4<u32> for u32x4_generic {
680 #[inline(always)]
extract(self, i: u32) -> u32681 fn extract(self, i: u32) -> u32 {
682 self.0[i as usize]
683 }
684 #[inline(always)]
insert(mut self, v: u32, i: u32) -> Self685 fn insert(mut self, v: u32, i: u32) -> Self {
686 self.0[i as usize] = v;
687 self
688 }
689 }
690 impl Vec4<u64> for u64x4_generic {
691 #[inline(always)]
extract(self, i: u32) -> u64692 fn extract(self, i: u32) -> u64 {
693 let d: [u64; 4] = self.to_lanes();
694 d[i as usize]
695 }
696 #[inline(always)]
insert(self, v: u64, i: u32) -> Self697 fn insert(self, v: u64, i: u32) -> Self {
698 self.0[(i / 2) as usize].insert(v, i % 2);
699 self
700 }
701 }
702 impl Vec2<u64> for u64x2_generic {
703 #[inline(always)]
extract(self, i: u32) -> u64704 fn extract(self, i: u32) -> u64 {
705 self.0[i as usize]
706 }
707 #[inline(always)]
insert(mut self, v: u64, i: u32) -> Self708 fn insert(mut self, v: u64, i: u32) -> Self {
709 self.0[i as usize] = v;
710 self
711 }
712 }
713
714 impl Words4 for u32x4_generic {
715 #[inline(always)]
shuffle2301(self) -> Self716 fn shuffle2301(self) -> Self {
717 self.swap64()
718 }
719 #[inline(always)]
shuffle1230(self) -> Self720 fn shuffle1230(self) -> Self {
721 let x = self.0;
722 Self([x[3], x[0], x[1], x[2]])
723 }
724 #[inline(always)]
shuffle3012(self) -> Self725 fn shuffle3012(self) -> Self {
726 let x = self.0;
727 Self([x[1], x[2], x[3], x[0]])
728 }
729 }
730 impl LaneWords4 for u32x4_generic {
731 #[inline(always)]
shuffle_lane_words2301(self) -> Self732 fn shuffle_lane_words2301(self) -> Self {
733 self.shuffle2301()
734 }
735 #[inline(always)]
shuffle_lane_words1230(self) -> Self736 fn shuffle_lane_words1230(self) -> Self {
737 self.shuffle1230()
738 }
739 #[inline(always)]
shuffle_lane_words3012(self) -> Self740 fn shuffle_lane_words3012(self) -> Self {
741 self.shuffle3012()
742 }
743 }
744
745 impl Words4 for u64x4_generic {
746 #[inline(always)]
shuffle2301(self) -> Self747 fn shuffle2301(self) -> Self {
748 x2::new([self.0[1], self.0[0]])
749 }
750 #[inline(always)]
shuffle1230(self) -> Self751 fn shuffle1230(self) -> Self {
752 unimplemented!()
753 }
754 #[inline(always)]
shuffle3012(self) -> Self755 fn shuffle3012(self) -> Self {
756 unimplemented!()
757 }
758 }
759
760 impl u32x4<GenericMachine> for u32x4_generic {}
761 impl u64x2<GenericMachine> for u64x2_generic {}
762 impl u128x1<GenericMachine> for u128x1_generic {}
763 impl u32x4x2<GenericMachine> for u32x4x2_generic {}
764 impl u64x2x2<GenericMachine> for u64x2x2_generic {}
765 impl u64x4<GenericMachine> for u64x4_generic {}
766 impl u128x2<GenericMachine> for u128x2_generic {}
767 impl u32x4x4<GenericMachine> for u32x4x4_generic {}
768 impl u64x2x4<GenericMachine> for u64x2x4_generic {}
769 impl u128x4<GenericMachine> for u128x4_generic {}
770
771 #[macro_export]
772 macro_rules! dispatch {
773 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
774 #[inline(always)]
775 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
776 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
777 #[inline(always)]
778 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
779 fn_impl($mach, $($arg),*)
780 }
781 };
782 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
783 dispatch!($mach, $MTy, {
784 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
785 });
786 }
787 }
788 #[macro_export]
789 macro_rules! dispatch_light128 {
790 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
791 #[inline(always)]
792 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
793 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
794 #[inline(always)]
795 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
796 fn_impl($mach, $($arg),*)
797 }
798 };
799 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
800 dispatch!($mach, $MTy, {
801 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
802 });
803 }
804 }
805 #[macro_export]
806 macro_rules! dispatch_light256 {
807 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
808 #[inline(always)]
809 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
810 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
811 #[inline(always)]
812 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
813 fn_impl($mach, $($arg),*)
814 }
815 };
816 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
817 dispatch!($mach, $MTy, {
818 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
819 });
820 }
821 }
822 #[macro_export]
823 macro_rules! dispatch_light512 {
824 ($mach:ident, $MTy:ident, { $([$pub:tt$(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) -> $ret:ty $body:block }) => {
825 #[inline(always)]
826 $($pub$(($krate))*)* fn $name($($arg: $argty),*) -> $ret {
827 let $mach = unsafe { $crate::generic::GenericMachine::instance() };
828 #[inline(always)]
829 fn fn_impl<$MTy: $crate::Machine>($mach: $MTy, $($arg: $argty),*) -> $ret $body
830 fn_impl($mach, $($arg),*)
831 }
832 };
833 ($mach:ident, $MTy:ident, { $([$pub:tt $(($krate:tt))*])* fn $name:ident($($arg:ident: $argty:ty),* $(,)*) $body:block }) => {
834 dispatch!($mach, $MTy, {
835 $([$pub $(($krate))*])* fn $name($($arg: $argty),*) -> () $body
836 });
837 }
838 }
839
840 #[cfg(test)]
841 mod test {
842 use super::*;
843
844 #[test]
test_bswap32()845 fn test_bswap32() {
846 let xs = [0x0f0e_0d0c, 0x0b0a_0908, 0x0706_0504, 0x0302_0100];
847 let ys = [0x0c0d_0e0f, 0x0809_0a0b, 0x0405_0607, 0x0001_0203];
848
849 let m = unsafe { GenericMachine::instance() };
850
851 let x: <GenericMachine as Machine>::u32x4 = m.vec(xs);
852 let x = x.bswap();
853
854 let y = m.vec(ys);
855 assert_eq!(x, y);
856 }
857 }
858