1 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2 
3 use crate::types::*;
4 use crate::{vec128_storage, vec256_storage, vec512_storage};
5 use core::marker::PhantomData;
6 use core::ops::*;
7 
8 #[derive(Copy, Clone, Default)]
9 #[allow(non_camel_case_types)]
10 pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
11 impl<W, G> x2<W, G> {
12     #[inline(always)]
new(xs: [W; 2]) -> Self13     pub fn new(xs: [W; 2]) -> Self {
14         x2(xs, PhantomData)
15     }
16 }
17 macro_rules! fwd_binop_x2 {
18     ($trait:ident, $fn:ident) => {
19         impl<W: $trait + Copy, G> $trait for x2<W, G> {
20             type Output = x2<W::Output, G>;
21             #[inline(always)]
22             fn $fn(self, rhs: Self) -> Self::Output {
23                 x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
24             }
25         }
26     };
27 }
28 macro_rules! fwd_binop_assign_x2 {
29     ($trait:ident, $fn_assign:ident) => {
30         impl<W: $trait + Copy, G> $trait for x2<W, G> {
31             #[inline(always)]
32             fn $fn_assign(&mut self, rhs: Self) {
33                 (self.0[0]).$fn_assign(rhs.0[0]);
34                 (self.0[1]).$fn_assign(rhs.0[1]);
35             }
36         }
37     };
38 }
39 macro_rules! fwd_unop_x2 {
40     ($fn:ident) => {
41         #[inline(always)]
42         fn $fn(self) -> Self {
43             x2::new([self.0[0].$fn(), self.0[1].$fn()])
44         }
45     };
46 }
47 impl<W, G> RotateEachWord32 for x2<W, G>
48 where
49     W: Copy + RotateEachWord32,
50 {
51     fwd_unop_x2!(rotate_each_word_right7);
52     fwd_unop_x2!(rotate_each_word_right8);
53     fwd_unop_x2!(rotate_each_word_right11);
54     fwd_unop_x2!(rotate_each_word_right12);
55     fwd_unop_x2!(rotate_each_word_right16);
56     fwd_unop_x2!(rotate_each_word_right20);
57     fwd_unop_x2!(rotate_each_word_right24);
58     fwd_unop_x2!(rotate_each_word_right25);
59 }
60 impl<W, G> RotateEachWord64 for x2<W, G>
61 where
62     W: Copy + RotateEachWord64,
63 {
64     fwd_unop_x2!(rotate_each_word_right32);
65 }
66 impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
67 impl<W, G> BitOps0 for x2<W, G>
68 where
69     W: BitOps0,
70     G: Copy,
71 {
72 }
73 impl<W, G> BitOps32 for x2<W, G>
74 where
75     W: BitOps32 + BitOps0,
76     G: Copy,
77 {
78 }
79 impl<W, G> BitOps64 for x2<W, G>
80 where
81     W: BitOps64 + BitOps0,
82     G: Copy,
83 {
84 }
85 impl<W, G> BitOps128 for x2<W, G>
86 where
87     W: BitOps128 + BitOps0,
88     G: Copy,
89 {
90 }
91 fwd_binop_x2!(BitAnd, bitand);
92 fwd_binop_x2!(BitOr, bitor);
93 fwd_binop_x2!(BitXor, bitxor);
94 fwd_binop_x2!(AndNot, andnot);
95 fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
96 fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
97 fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
98 impl<W, G> ArithOps for x2<W, G>
99 where
100     W: ArithOps,
101     G: Copy,
102 {
103 }
104 fwd_binop_x2!(Add, add);
105 fwd_binop_assign_x2!(AddAssign, add_assign);
106 impl<W: Not + Copy, G> Not for x2<W, G> {
107     type Output = x2<W::Output, G>;
108     #[inline(always)]
not(self) -> Self::Output109     fn not(self) -> Self::Output {
110         x2::new([self.0[0].not(), self.0[1].not()])
111     }
112 }
113 impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
114     #[inline(always)]
unsafe_from(xs: [W; 2]) -> Self115     unsafe fn unsafe_from(xs: [W; 2]) -> Self {
116         x2::new(xs)
117     }
118 }
119 impl<W: Copy, G> Vec2<W> for x2<W, G> {
120     #[inline(always)]
extract(self, i: u32) -> W121     fn extract(self, i: u32) -> W {
122         self.0[i as usize]
123     }
124     #[inline(always)]
insert(mut self, w: W, i: u32) -> Self125     fn insert(mut self, w: W, i: u32) -> Self {
126         self.0[i as usize] = w;
127         self
128     }
129 }
130 impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
131     #[inline(always)]
unpack(p: vec256_storage) -> Self132     unsafe fn unpack(p: vec256_storage) -> Self {
133         let p = p.split128();
134         x2::new([W::unpack(p[0]), W::unpack(p[1])])
135     }
136 }
137 impl<W, G> From<x2<W, G>> for vec256_storage
138 where
139     W: Copy,
140     vec128_storage: From<W>,
141 {
142     #[inline(always)]
from(x: x2<W, G>) -> Self143     fn from(x: x2<W, G>) -> Self {
144         vec256_storage::new128([x.0[0].into(), x.0[1].into()])
145     }
146 }
147 impl<W, G> Swap64 for x2<W, G>
148 where
149     W: Swap64 + Copy,
150 {
151     fwd_unop_x2!(swap1);
152     fwd_unop_x2!(swap2);
153     fwd_unop_x2!(swap4);
154     fwd_unop_x2!(swap8);
155     fwd_unop_x2!(swap16);
156     fwd_unop_x2!(swap32);
157     fwd_unop_x2!(swap64);
158 }
159 impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
160     #[inline(always)]
to_lanes(self) -> [W; 2]161     fn to_lanes(self) -> [W; 2] {
162         self.0
163     }
164     #[inline(always)]
from_lanes(lanes: [W; 2]) -> Self165     fn from_lanes(lanes: [W; 2]) -> Self {
166         x2::new(lanes)
167     }
168 }
169 impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
170     #[inline(always)]
bswap(self) -> Self171     fn bswap(self) -> Self {
172         x2::new([self.0[0].bswap(), self.0[1].bswap()])
173     }
174 }
175 impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
176     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self177     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
178         let input = input.split_at(input.len() / 2);
179         x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
180     }
181     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self182     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
183         let input = input.split_at(input.len() / 2);
184         x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
185     }
186     #[inline(always)]
write_le(self, out: &mut [u8])187     fn write_le(self, out: &mut [u8]) {
188         let out = out.split_at_mut(out.len() / 2);
189         self.0[0].write_le(out.0);
190         self.0[1].write_le(out.1);
191     }
192     #[inline(always)]
write_be(self, out: &mut [u8])193     fn write_be(self, out: &mut [u8]) {
194         let out = out.split_at_mut(out.len() / 2);
195         self.0[0].write_be(out.0);
196         self.0[1].write_be(out.1);
197     }
198 }
199 impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
200     #[inline(always)]
shuffle_lane_words2301(self) -> Self201     fn shuffle_lane_words2301(self) -> Self {
202         Self::new([
203             self.0[0].shuffle_lane_words2301(),
204             self.0[1].shuffle_lane_words2301(),
205         ])
206     }
207     #[inline(always)]
shuffle_lane_words1230(self) -> Self208     fn shuffle_lane_words1230(self) -> Self {
209         Self::new([
210             self.0[0].shuffle_lane_words1230(),
211             self.0[1].shuffle_lane_words1230(),
212         ])
213     }
214     #[inline(always)]
shuffle_lane_words3012(self) -> Self215     fn shuffle_lane_words3012(self) -> Self {
216         Self::new([
217             self.0[0].shuffle_lane_words3012(),
218             self.0[1].shuffle_lane_words3012(),
219         ])
220     }
221 }
222 
223 #[derive(Copy, Clone, Default)]
224 #[allow(non_camel_case_types)]
225 pub struct x4<W>(pub [W; 4]);
226 impl<W> x4<W> {
227     #[inline(always)]
new(xs: [W; 4]) -> Self228     pub fn new(xs: [W; 4]) -> Self {
229         x4(xs)
230     }
231 }
232 macro_rules! fwd_binop_x4 {
233     ($trait:ident, $fn:ident) => {
234         impl<W: $trait + Copy> $trait for x4<W> {
235             type Output = x4<W::Output>;
236             #[inline(always)]
237             fn $fn(self, rhs: Self) -> Self::Output {
238                 x4([
239                     self.0[0].$fn(rhs.0[0]),
240                     self.0[1].$fn(rhs.0[1]),
241                     self.0[2].$fn(rhs.0[2]),
242                     self.0[3].$fn(rhs.0[3]),
243                 ])
244             }
245         }
246     };
247 }
248 macro_rules! fwd_binop_assign_x4 {
249     ($trait:ident, $fn_assign:ident) => {
250         impl<W: $trait + Copy> $trait for x4<W> {
251             #[inline(always)]
252             fn $fn_assign(&mut self, rhs: Self) {
253                 self.0[0].$fn_assign(rhs.0[0]);
254                 self.0[1].$fn_assign(rhs.0[1]);
255                 self.0[2].$fn_assign(rhs.0[2]);
256                 self.0[3].$fn_assign(rhs.0[3]);
257             }
258         }
259     };
260 }
261 macro_rules! fwd_unop_x4 {
262     ($fn:ident) => {
263         #[inline(always)]
264         fn $fn(self) -> Self {
265             x4([
266                 self.0[0].$fn(),
267                 self.0[1].$fn(),
268                 self.0[2].$fn(),
269                 self.0[3].$fn(),
270             ])
271         }
272     };
273 }
274 impl<W> RotateEachWord32 for x4<W>
275 where
276     W: Copy + RotateEachWord32,
277 {
278     fwd_unop_x4!(rotate_each_word_right7);
279     fwd_unop_x4!(rotate_each_word_right8);
280     fwd_unop_x4!(rotate_each_word_right11);
281     fwd_unop_x4!(rotate_each_word_right12);
282     fwd_unop_x4!(rotate_each_word_right16);
283     fwd_unop_x4!(rotate_each_word_right20);
284     fwd_unop_x4!(rotate_each_word_right24);
285     fwd_unop_x4!(rotate_each_word_right25);
286 }
287 impl<W> RotateEachWord64 for x4<W>
288 where
289     W: Copy + RotateEachWord64,
290 {
291     fwd_unop_x4!(rotate_each_word_right32);
292 }
293 impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
294 impl<W> BitOps0 for x4<W> where W: BitOps0 {}
295 impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
296 impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
297 impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
298 fwd_binop_x4!(BitAnd, bitand);
299 fwd_binop_x4!(BitOr, bitor);
300 fwd_binop_x4!(BitXor, bitxor);
301 fwd_binop_x4!(AndNot, andnot);
302 fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
303 fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
304 fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
305 impl<W> ArithOps for x4<W> where W: ArithOps {}
306 fwd_binop_x4!(Add, add);
307 fwd_binop_assign_x4!(AddAssign, add_assign);
308 impl<W: Not + Copy> Not for x4<W> {
309     type Output = x4<W::Output>;
310     #[inline(always)]
not(self) -> Self::Output311     fn not(self) -> Self::Output {
312         x4([
313             self.0[0].not(),
314             self.0[1].not(),
315             self.0[2].not(),
316             self.0[3].not(),
317         ])
318     }
319 }
320 impl<W> UnsafeFrom<[W; 4]> for x4<W> {
321     #[inline(always)]
unsafe_from(xs: [W; 4]) -> Self322     unsafe fn unsafe_from(xs: [W; 4]) -> Self {
323         x4(xs)
324     }
325 }
326 impl<W: Copy> Vec4<W> for x4<W> {
327     #[inline(always)]
extract(self, i: u32) -> W328     fn extract(self, i: u32) -> W {
329         self.0[i as usize]
330     }
331     #[inline(always)]
insert(mut self, w: W, i: u32) -> Self332     fn insert(mut self, w: W, i: u32) -> Self {
333         self.0[i as usize] = w;
334         self
335     }
336 }
337 impl<W: Copy> Vec4Ext<W> for x4<W> {
338     #[inline(always)]
transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized,339     fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
340     where
341         Self: Sized,
342     {
343         (
344             x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
345             x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
346             x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
347             x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
348         )
349     }
350 }
351 impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
352     #[inline(always)]
unpack(p: vec512_storage) -> Self353     unsafe fn unpack(p: vec512_storage) -> Self {
354         let p = p.split128();
355         x4([
356             W::unpack(p[0]),
357             W::unpack(p[1]),
358             W::unpack(p[2]),
359             W::unpack(p[3]),
360         ])
361     }
362 }
363 impl<W> From<x4<W>> for vec512_storage
364 where
365     W: Copy,
366     vec128_storage: From<W>,
367 {
368     #[inline(always)]
from(x: x4<W>) -> Self369     fn from(x: x4<W>) -> Self {
370         vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
371     }
372 }
373 impl<W> Swap64 for x4<W>
374 where
375     W: Swap64 + Copy,
376 {
377     fwd_unop_x4!(swap1);
378     fwd_unop_x4!(swap2);
379     fwd_unop_x4!(swap4);
380     fwd_unop_x4!(swap8);
381     fwd_unop_x4!(swap16);
382     fwd_unop_x4!(swap32);
383     fwd_unop_x4!(swap64);
384 }
385 impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
386     #[inline(always)]
to_lanes(self) -> [W; 4]387     fn to_lanes(self) -> [W; 4] {
388         self.0
389     }
390     #[inline(always)]
from_lanes(lanes: [W; 4]) -> Self391     fn from_lanes(lanes: [W; 4]) -> Self {
392         x4(lanes)
393     }
394 }
395 impl<W: BSwap + Copy> BSwap for x4<W> {
396     #[inline(always)]
bswap(self) -> Self397     fn bswap(self) -> Self {
398         x4([
399             self.0[0].bswap(),
400             self.0[1].bswap(),
401             self.0[2].bswap(),
402             self.0[3].bswap(),
403         ])
404     }
405 }
406 impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
407     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self408     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
409         let n = input.len() / 4;
410         x4([
411             W::unsafe_read_le(&input[..n]),
412             W::unsafe_read_le(&input[n..n * 2]),
413             W::unsafe_read_le(&input[n * 2..n * 3]),
414             W::unsafe_read_le(&input[n * 3..]),
415         ])
416     }
417     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self418     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
419         let n = input.len() / 4;
420         x4([
421             W::unsafe_read_be(&input[..n]),
422             W::unsafe_read_be(&input[n..n * 2]),
423             W::unsafe_read_be(&input[n * 2..n * 3]),
424             W::unsafe_read_be(&input[n * 3..]),
425         ])
426     }
427     #[inline(always)]
write_le(self, out: &mut [u8])428     fn write_le(self, out: &mut [u8]) {
429         let n = out.len() / 4;
430         self.0[0].write_le(&mut out[..n]);
431         self.0[1].write_le(&mut out[n..n * 2]);
432         self.0[2].write_le(&mut out[n * 2..n * 3]);
433         self.0[3].write_le(&mut out[n * 3..]);
434     }
435     #[inline(always)]
write_be(self, out: &mut [u8])436     fn write_be(self, out: &mut [u8]) {
437         let n = out.len() / 4;
438         self.0[0].write_be(&mut out[..n]);
439         self.0[1].write_be(&mut out[n..n * 2]);
440         self.0[2].write_be(&mut out[n * 2..n * 3]);
441         self.0[3].write_be(&mut out[n * 3..]);
442     }
443 }
444 impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
445     #[inline(always)]
shuffle_lane_words2301(self) -> Self446     fn shuffle_lane_words2301(self) -> Self {
447         x4([
448             self.0[0].shuffle_lane_words2301(),
449             self.0[1].shuffle_lane_words2301(),
450             self.0[2].shuffle_lane_words2301(),
451             self.0[3].shuffle_lane_words2301(),
452         ])
453     }
454     #[inline(always)]
shuffle_lane_words1230(self) -> Self455     fn shuffle_lane_words1230(self) -> Self {
456         x4([
457             self.0[0].shuffle_lane_words1230(),
458             self.0[1].shuffle_lane_words1230(),
459             self.0[2].shuffle_lane_words1230(),
460             self.0[3].shuffle_lane_words1230(),
461         ])
462     }
463     #[inline(always)]
shuffle_lane_words3012(self) -> Self464     fn shuffle_lane_words3012(self) -> Self {
465         x4([
466             self.0[0].shuffle_lane_words3012(),
467             self.0[1].shuffle_lane_words3012(),
468             self.0[2].shuffle_lane_words3012(),
469             self.0[3].shuffle_lane_words3012(),
470         ])
471     }
472 }
473