1 //! Horizontal swap bytes reductions.
2 
3 // FIXME: investigate using `llvm.bswap`
4 // https://github.com/rust-lang-nursery/packed_simd/issues/19
5 
6 use crate::*;
7 
8 crate trait SwapBytes {
swap_bytes(self) -> Self9     fn swap_bytes(self) -> Self;
10 }
11 
12 macro_rules! impl_swap_bytes {
13     (v16: $($id:ident,)+) => {
14         $(
15             impl SwapBytes for $id {
16                 #[inline]
17                 fn swap_bytes(self) -> Self {
18                     unsafe { shuffle!(self, [1, 0]) }
19                 }
20             }
21         )+
22     };
23     (v32: $($id:ident,)+) => {
24         $(
25             impl SwapBytes for $id {
26                 #[inline]
27                 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
28                 fn swap_bytes(self) -> Self {
29                     unsafe {
30                         let bytes: u8x4 = crate::mem::transmute(self);
31                         let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]);
32                         crate::mem::transmute(result)
33                     }
34                 }
35             }
36         )+
37     };
38     (v64: $($id:ident,)+) => {
39         $(
40             impl SwapBytes for $id {
41                 #[inline]
42                 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
43                 fn swap_bytes(self) -> Self {
44                     unsafe {
45                         let bytes: u8x8 = crate::mem::transmute(self);
46                         let result: u8x8 = shuffle!(
47                             bytes, [7, 6, 5, 4, 3, 2, 1, 0]
48                         );
49                         crate::mem::transmute(result)
50                     }
51                 }
52             }
53         )+
54     };
55     (v128: $($id:ident,)+) => {
56         $(
57             impl SwapBytes for $id {
58                 #[inline]
59                 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
60                 fn swap_bytes(self) -> Self {
61                     unsafe {
62                         let bytes: u8x16 = crate::mem::transmute(self);
63                         let result: u8x16 = shuffle!(bytes, [
64                             15, 14, 13, 12, 11, 10, 9, 8,
65                             7, 6, 5, 4, 3, 2, 1, 0
66                         ]);
67                         crate::mem::transmute(result)
68                     }
69                 }
70             }
71         )+
72     };
73     (v256: $($id:ident,)+) => {
74         $(
75             impl SwapBytes for $id {
76                 #[inline]
77                 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
78                 fn swap_bytes(self) -> Self {
79                     unsafe {
80                         let bytes: u8x32 = crate::mem::transmute(self);
81                         let result: u8x32 = shuffle!(bytes, [
82                             31, 30, 29, 28, 27, 26, 25, 24,
83                             23, 22, 21, 20, 19, 18, 17, 16,
84                             15, 14, 13, 12, 11, 10, 9,  8,
85                             7,  6,  5,  4,  3,  2,  1,  0
86                         ]);
87                         crate::mem::transmute(result)
88                     }
89                 }
90             }
91         )+
92     };
93     (v512: $($id:ident,)+) => {
94         $(
95             impl SwapBytes for $id {
96                 #[inline]
97                 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))]
98                 fn swap_bytes(self) -> Self {
99                     unsafe {
100                         let bytes: u8x64 = crate::mem::transmute(self);
101                         let result: u8x64 = shuffle!(bytes, [
102                             63, 62, 61, 60, 59, 58, 57, 56,
103                             55, 54, 53, 52, 51, 50, 49, 48,
104                             47, 46, 45, 44, 43, 42, 41, 40,
105                             39, 38, 37, 36, 35, 34, 33, 32,
106                             31, 30, 29, 28, 27, 26, 25, 24,
107                             23, 22, 21, 20, 19, 18, 17, 16,
108                             15, 14, 13, 12, 11, 10, 9,  8,
109                             7,  6,  5,  4,  3,  2,  1,  0
110                         ]);
111                         crate::mem::transmute(result)
112                     }
113                 }
114             }
115         )+
116     };
117 }
118 
119 impl_swap_bytes!(v16: u8x2, i8x2,);
120 impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,);
121 // FIXME: 64-bit single element vector
122 impl_swap_bytes!(
123     v64: u8x8,
124     i8x8,
125     u16x4,
126     i16x4,
127     u32x2,
128     i32x2, /* u64x1, i64x1, */
129 );
130 
131 impl_swap_bytes!(
132     v128: u8x16,
133     i8x16,
134     u16x8,
135     i16x8,
136     u32x4,
137     i32x4,
138     u64x2,
139     i64x2,
140     u128x1,
141     i128x1,
142 );
143 impl_swap_bytes!(
144     v256: u8x32,
145     i8x32,
146     u16x16,
147     i16x16,
148     u32x8,
149     i32x8,
150     u64x4,
151     i64x4,
152     u128x2,
153     i128x2,
154 );
155 
156 impl_swap_bytes!(
157     v512: u8x64,
158     i8x64,
159     u16x32,
160     i16x32,
161     u32x16,
162     i32x16,
163     u64x8,
164     i64x8,
165     u128x4,
166     i128x4,
167 );
168 
169 cfg_if! {
170     if #[cfg(target_pointer_width = "8")] {
171         impl_swap_bytes!(v16: isizex2, usizex2,);
172         impl_swap_bytes!(v32: isizex4, usizex4,);
173         impl_swap_bytes!(v64: isizex8, usizex8,);
174     } else if #[cfg(target_pointer_width = "16")] {
175         impl_swap_bytes!(v32: isizex2, usizex2,);
176         impl_swap_bytes!(v64: isizex4, usizex4,);
177         impl_swap_bytes!(v128: isizex8, usizex8,);
178     } else if #[cfg(target_pointer_width = "32")] {
179         impl_swap_bytes!(v64: isizex2, usizex2,);
180         impl_swap_bytes!(v128: isizex4, usizex4,);
181         impl_swap_bytes!(v256: isizex8, usizex8,);
182     } else if #[cfg(target_pointer_width = "64")] {
183         impl_swap_bytes!(v128: isizex2, usizex2,);
184         impl_swap_bytes!(v256: isizex4, usizex4,);
185         impl_swap_bytes!(v512: isizex8, usizex8,);
186     } else {
187         compile_error!("unsupported target_pointer_width");
188     }
189 }
190