1 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2 
3 use crate::types::*;
4 use crate::{vec128_storage, vec256_storage, vec512_storage};
5 use core::marker::PhantomData;
6 use core::ops::*;
7 
8 #[derive(Copy, Clone, Default)]
9 #[allow(non_camel_case_types)]
10 pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
11 impl<W, G> x2<W, G> {
12     #[inline(always)]
new(xs: [W; 2]) -> Self13     pub fn new(xs: [W; 2]) -> Self {
14         x2(xs, PhantomData)
15     }
16 }
17 macro_rules! fwd_binop_x2 {
18     ($trait:ident, $fn:ident) => {
19         impl<W: $trait + Copy, G> $trait for x2<W, G> {
20             type Output = x2<W::Output, G>;
21             #[inline(always)]
22             fn $fn(self, rhs: Self) -> Self::Output {
23                 x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
24             }
25         }
26     };
27 }
28 macro_rules! fwd_binop_assign_x2 {
29     ($trait:ident, $fn_assign:ident) => {
30         impl<W: $trait + Copy, G> $trait for x2<W, G> {
31             #[inline(always)]
32             fn $fn_assign(&mut self, rhs: Self) {
33                 (self.0[0]).$fn_assign(rhs.0[0]);
34                 (self.0[1]).$fn_assign(rhs.0[1]);
35             }
36         }
37     };
38 }
39 macro_rules! fwd_unop_x2 {
40     ($fn:ident) => {
41         #[inline(always)]
42         fn $fn(self) -> Self {
43             x2::new([self.0[0].$fn(), self.0[1].$fn()])
44         }
45     };
46 }
47 impl<W, G> RotateEachWord32 for x2<W, G>
48 where
49     W: Copy + RotateEachWord32,
50 {
51     fwd_unop_x2!(rotate_each_word_right7);
52     fwd_unop_x2!(rotate_each_word_right8);
53     fwd_unop_x2!(rotate_each_word_right11);
54     fwd_unop_x2!(rotate_each_word_right12);
55     fwd_unop_x2!(rotate_each_word_right16);
56     fwd_unop_x2!(rotate_each_word_right20);
57     fwd_unop_x2!(rotate_each_word_right24);
58     fwd_unop_x2!(rotate_each_word_right25);
59 }
60 impl<W, G> RotateEachWord64 for x2<W, G>
61 where
62     W: Copy + RotateEachWord64,
63 {
64     fwd_unop_x2!(rotate_each_word_right32);
65 }
66 impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
67 impl<W, G> BitOps0 for x2<W, G>
68 where
69     W: BitOps0,
70     G: Copy,
71 {
72 }
73 impl<W, G> BitOps32 for x2<W, G>
74 where
75     W: BitOps32 + BitOps0,
76     G: Copy,
77 {
78 }
79 impl<W, G> BitOps64 for x2<W, G>
80 where
81     W: BitOps64 + BitOps0,
82     G: Copy,
83 {
84 }
85 impl<W, G> BitOps128 for x2<W, G>
86 where
87     W: BitOps128 + BitOps0,
88     G: Copy,
89 {
90 }
91 fwd_binop_x2!(BitAnd, bitand);
92 fwd_binop_x2!(BitOr, bitor);
93 fwd_binop_x2!(BitXor, bitxor);
94 fwd_binop_x2!(AndNot, andnot);
95 fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
96 fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
97 fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
98 impl<W, G> ArithOps for x2<W, G>
99 where
100     W: ArithOps,
101     G: Copy,
102 {
103 }
104 fwd_binop_x2!(Add, add);
105 fwd_binop_assign_x2!(AddAssign, add_assign);
106 impl<W: Not + Copy, G> Not for x2<W, G> {
107     type Output = x2<W::Output, G>;
108     #[inline(always)]
not(self) -> Self::Output109     fn not(self) -> Self::Output {
110         x2::new([self.0[0].not(), self.0[1].not()])
111     }
112 }
113 impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
114     #[inline(always)]
unsafe_from(xs: [W; 2]) -> Self115     unsafe fn unsafe_from(xs: [W; 2]) -> Self {
116         x2::new(xs)
117     }
118 }
119 impl<W: Copy, G> Vec2<W> for x2<W, G> {
120     #[inline(always)]
extract(self, i: u32) -> W121     fn extract(self, i: u32) -> W {
122         self.0[i as usize]
123     }
124     #[inline(always)]
insert(mut self, w: W, i: u32) -> Self125     fn insert(mut self, w: W, i: u32) -> Self {
126         self.0[i as usize] = w;
127         self
128     }
129 }
130 impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
131     #[inline(always)]
unpack(p: vec256_storage) -> Self132     unsafe fn unpack(p: vec256_storage) -> Self {
133         let p = p.split128();
134         x2::new([W::unpack(p[0]), W::unpack(p[1])])
135     }
136 }
137 impl<W, G> From<x2<W, G>> for vec256_storage
138 where
139     W: Copy,
140     vec128_storage: From<W>,
141 {
142     #[inline(always)]
from(x: x2<W, G>) -> Self143     fn from(x: x2<W, G>) -> Self {
144         vec256_storage::new128([x.0[0].into(), x.0[1].into()])
145     }
146 }
147 impl<W, G> Swap64 for x2<W, G>
148 where
149     W: Swap64 + Copy,
150 {
151     fwd_unop_x2!(swap1);
152     fwd_unop_x2!(swap2);
153     fwd_unop_x2!(swap4);
154     fwd_unop_x2!(swap8);
155     fwd_unop_x2!(swap16);
156     fwd_unop_x2!(swap32);
157     fwd_unop_x2!(swap64);
158 }
159 impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
160     #[inline(always)]
to_lanes(self) -> [W; 2]161     fn to_lanes(self) -> [W; 2] {
162         self.0
163     }
164     #[inline(always)]
from_lanes(lanes: [W; 2]) -> Self165     fn from_lanes(lanes: [W; 2]) -> Self {
166         x2::new(lanes)
167     }
168 }
169 impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
170     #[inline(always)]
bswap(self) -> Self171     fn bswap(self) -> Self {
172         x2::new([self.0[0].bswap(), self.0[1].bswap()])
173     }
174 }
175 impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
176     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self177     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
178         let input = input.split_at(16);
179         x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
180     }
181     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self182     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
183         x2::unsafe_read_le(input).bswap()
184     }
185     #[inline(always)]
write_le(self, out: &mut [u8])186     fn write_le(self, out: &mut [u8]) {
187         let out = out.split_at_mut(16);
188         self.0[0].write_le(out.0);
189         self.0[1].write_le(out.1);
190     }
191     #[inline(always)]
write_be(self, out: &mut [u8])192     fn write_be(self, out: &mut [u8]) {
193         let out = out.split_at_mut(16);
194         self.0[0].write_be(out.0);
195         self.0[1].write_be(out.1);
196     }
197 }
198 
199 #[derive(Copy, Clone, Default)]
200 #[allow(non_camel_case_types)]
201 pub struct x4<W>(pub [W; 4]);
202 impl<W> x4<W> {
203     #[inline(always)]
new(xs: [W; 4]) -> Self204     pub fn new(xs: [W; 4]) -> Self {
205         x4(xs)
206     }
207 }
208 macro_rules! fwd_binop_x4 {
209     ($trait:ident, $fn:ident) => {
210         impl<W: $trait + Copy> $trait for x4<W> {
211             type Output = x4<W::Output>;
212             #[inline(always)]
213             fn $fn(self, rhs: Self) -> Self::Output {
214                 x4([
215                     self.0[0].$fn(rhs.0[0]),
216                     self.0[1].$fn(rhs.0[1]),
217                     self.0[2].$fn(rhs.0[2]),
218                     self.0[3].$fn(rhs.0[3]),
219                 ])
220             }
221         }
222     };
223 }
224 macro_rules! fwd_binop_assign_x4 {
225     ($trait:ident, $fn_assign:ident) => {
226         impl<W: $trait + Copy> $trait for x4<W> {
227             #[inline(always)]
228             fn $fn_assign(&mut self, rhs: Self) {
229                 self.0[0].$fn_assign(rhs.0[0]);
230                 self.0[1].$fn_assign(rhs.0[1]);
231                 self.0[2].$fn_assign(rhs.0[2]);
232                 self.0[3].$fn_assign(rhs.0[3]);
233             }
234         }
235     };
236 }
237 macro_rules! fwd_unop_x4 {
238     ($fn:ident) => {
239         #[inline(always)]
240         fn $fn(self) -> Self {
241             x4([
242                 self.0[0].$fn(),
243                 self.0[1].$fn(),
244                 self.0[2].$fn(),
245                 self.0[3].$fn(),
246             ])
247         }
248     };
249 }
250 impl<W> RotateEachWord32 for x4<W>
251 where
252     W: Copy + RotateEachWord32,
253 {
254     fwd_unop_x4!(rotate_each_word_right7);
255     fwd_unop_x4!(rotate_each_word_right8);
256     fwd_unop_x4!(rotate_each_word_right11);
257     fwd_unop_x4!(rotate_each_word_right12);
258     fwd_unop_x4!(rotate_each_word_right16);
259     fwd_unop_x4!(rotate_each_word_right20);
260     fwd_unop_x4!(rotate_each_word_right24);
261     fwd_unop_x4!(rotate_each_word_right25);
262 }
263 impl<W> RotateEachWord64 for x4<W>
264 where
265     W: Copy + RotateEachWord64,
266 {
267     fwd_unop_x4!(rotate_each_word_right32);
268 }
269 impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
270 impl<W> BitOps0 for x4<W> where W: BitOps0 {}
271 impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
272 impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
273 impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
274 fwd_binop_x4!(BitAnd, bitand);
275 fwd_binop_x4!(BitOr, bitor);
276 fwd_binop_x4!(BitXor, bitxor);
277 fwd_binop_x4!(AndNot, andnot);
278 fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
279 fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
280 fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
281 impl<W> ArithOps for x4<W> where W: ArithOps {}
282 fwd_binop_x4!(Add, add);
283 fwd_binop_assign_x4!(AddAssign, add_assign);
284 impl<W: Not + Copy> Not for x4<W> {
285     type Output = x4<W::Output>;
286     #[inline(always)]
not(self) -> Self::Output287     fn not(self) -> Self::Output {
288         x4([
289             self.0[0].not(),
290             self.0[1].not(),
291             self.0[2].not(),
292             self.0[3].not(),
293         ])
294     }
295 }
296 impl<W> UnsafeFrom<[W; 4]> for x4<W> {
297     #[inline(always)]
unsafe_from(xs: [W; 4]) -> Self298     unsafe fn unsafe_from(xs: [W; 4]) -> Self {
299         x4(xs)
300     }
301 }
302 impl<W: Copy> Vec4<W> for x4<W> {
303     #[inline(always)]
extract(self, i: u32) -> W304     fn extract(self, i: u32) -> W {
305         self.0[i as usize]
306     }
307     #[inline(always)]
insert(mut self, w: W, i: u32) -> Self308     fn insert(mut self, w: W, i: u32) -> Self {
309         self.0[i as usize] = w;
310         self
311     }
312 }
313 impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
314     #[inline(always)]
unpack(p: vec512_storage) -> Self315     unsafe fn unpack(p: vec512_storage) -> Self {
316         let p = p.split128();
317         x4([
318             W::unpack(p[0]),
319             W::unpack(p[1]),
320             W::unpack(p[2]),
321             W::unpack(p[3]),
322         ])
323     }
324 }
325 impl<W> From<x4<W>> for vec512_storage
326 where
327     W: Copy,
328     vec128_storage: From<W>,
329 {
330     #[inline(always)]
from(x: x4<W>) -> Self331     fn from(x: x4<W>) -> Self {
332         vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
333     }
334 }
335 impl<W> Swap64 for x4<W>
336 where
337     W: Swap64 + Copy,
338 {
339     fwd_unop_x4!(swap1);
340     fwd_unop_x4!(swap2);
341     fwd_unop_x4!(swap4);
342     fwd_unop_x4!(swap8);
343     fwd_unop_x4!(swap16);
344     fwd_unop_x4!(swap32);
345     fwd_unop_x4!(swap64);
346 }
347 impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
348     #[inline(always)]
to_lanes(self) -> [W; 4]349     fn to_lanes(self) -> [W; 4] {
350         self.0
351     }
352     #[inline(always)]
from_lanes(lanes: [W; 4]) -> Self353     fn from_lanes(lanes: [W; 4]) -> Self {
354         x4(lanes)
355     }
356 }
357 impl<W: BSwap + Copy> BSwap for x4<W> {
358     #[inline(always)]
bswap(self) -> Self359     fn bswap(self) -> Self {
360         x4([
361             self.0[0].bswap(),
362             self.0[1].bswap(),
363             self.0[2].bswap(),
364             self.0[3].bswap(),
365         ])
366     }
367 }
368 impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
369     #[inline(always)]
unsafe_read_le(input: &[u8]) -> Self370     unsafe fn unsafe_read_le(input: &[u8]) -> Self {
371         x4([
372             W::unsafe_read_le(&input[0..16]),
373             W::unsafe_read_le(&input[16..32]),
374             W::unsafe_read_le(&input[32..48]),
375             W::unsafe_read_le(&input[48..64]),
376         ])
377     }
378     #[inline(always)]
unsafe_read_be(input: &[u8]) -> Self379     unsafe fn unsafe_read_be(input: &[u8]) -> Self {
380         x4::unsafe_read_le(input).bswap()
381     }
382     #[inline(always)]
write_le(self, out: &mut [u8])383     fn write_le(self, out: &mut [u8]) {
384         self.0[0].write_le(&mut out[0..16]);
385         self.0[1].write_le(&mut out[16..32]);
386         self.0[2].write_le(&mut out[32..48]);
387         self.0[3].write_le(&mut out[48..64]);
388     }
389     #[inline(always)]
write_be(self, out: &mut [u8])390     fn write_be(self, out: &mut [u8]) {
391         self.0[0].write_be(&mut out[0..16]);
392         self.0[1].write_be(&mut out[16..32]);
393         self.0[2].write_be(&mut out[32..48]);
394         self.0[3].write_be(&mut out[48..64]);
395     }
396 }
397 impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
398     #[inline(always)]
shuffle_lane_words2301(self) -> Self399     fn shuffle_lane_words2301(self) -> Self {
400         x4([
401             self.0[0].shuffle_lane_words2301(),
402             self.0[1].shuffle_lane_words2301(),
403             self.0[2].shuffle_lane_words2301(),
404             self.0[3].shuffle_lane_words2301(),
405         ])
406     }
407     #[inline(always)]
shuffle_lane_words1230(self) -> Self408     fn shuffle_lane_words1230(self) -> Self {
409         x4([
410             self.0[0].shuffle_lane_words1230(),
411             self.0[1].shuffle_lane_words1230(),
412             self.0[2].shuffle_lane_words1230(),
413             self.0[3].shuffle_lane_words1230(),
414         ])
415     }
416     #[inline(always)]
shuffle_lane_words3012(self) -> Self417     fn shuffle_lane_words3012(self) -> Self {
418         x4([
419             self.0[0].shuffle_lane_words3012(),
420             self.0[1].shuffle_lane_words3012(),
421             self.0[2].shuffle_lane_words3012(),
422             self.0[3].shuffle_lane_words3012(),
423         ])
424     }
425 }
426