1 //! Horizontal swap bytes reductions. 2 3 // FIXME: investigate using `llvm.bswap` 4 // https://github.com/rust-lang-nursery/packed_simd/issues/19 5 6 use crate::*; 7 8 crate trait SwapBytes { swap_bytes(self) -> Self9 fn swap_bytes(self) -> Self; 10 } 11 12 macro_rules! impl_swap_bytes { 13 (v16: $($id:ident,)+) => { 14 $( 15 impl SwapBytes for $id { 16 #[inline] 17 fn swap_bytes(self) -> Self { 18 unsafe { shuffle!(self, [1, 0]) } 19 } 20 } 21 )+ 22 }; 23 (v32: $($id:ident,)+) => { 24 $( 25 impl SwapBytes for $id { 26 #[inline] 27 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] 28 fn swap_bytes(self) -> Self { 29 unsafe { 30 let bytes: u8x4 = crate::mem::transmute(self); 31 let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); 32 crate::mem::transmute(result) 33 } 34 } 35 } 36 )+ 37 }; 38 (v64: $($id:ident,)+) => { 39 $( 40 impl SwapBytes for $id { 41 #[inline] 42 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] 43 fn swap_bytes(self) -> Self { 44 unsafe { 45 let bytes: u8x8 = crate::mem::transmute(self); 46 let result: u8x8 = shuffle!( 47 bytes, [7, 6, 5, 4, 3, 2, 1, 0] 48 ); 49 crate::mem::transmute(result) 50 } 51 } 52 } 53 )+ 54 }; 55 (v128: $($id:ident,)+) => { 56 $( 57 impl SwapBytes for $id { 58 #[inline] 59 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] 60 fn swap_bytes(self) -> Self { 61 unsafe { 62 let bytes: u8x16 = crate::mem::transmute(self); 63 let result: u8x16 = shuffle!(bytes, [ 64 15, 14, 13, 12, 11, 10, 9, 8, 65 7, 6, 5, 4, 3, 2, 1, 0 66 ]); 67 crate::mem::transmute(result) 68 } 69 } 70 } 71 )+ 72 }; 73 (v256: $($id:ident,)+) => { 74 $( 75 impl SwapBytes for $id { 76 #[inline] 77 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] 78 fn swap_bytes(self) -> Self { 79 unsafe { 80 let bytes: u8x32 = crate::mem::transmute(self); 81 let result: u8x32 = shuffle!(bytes, [ 82 31, 30, 29, 28, 27, 26, 25, 24, 83 23, 22, 21, 20, 19, 18, 17, 16, 84 15, 14, 13, 12, 11, 10, 9, 8, 85 7, 6, 5, 4, 3, 2, 1, 0 86 ]); 87 crate::mem::transmute(result) 88 } 89 } 90 } 91 )+ 92 }; 93 (v512: $($id:ident,)+) => { 94 $( 95 impl SwapBytes for $id { 96 #[inline] 97 #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] 98 fn swap_bytes(self) -> Self { 99 unsafe { 100 let bytes: u8x64 = crate::mem::transmute(self); 101 let result: u8x64 = shuffle!(bytes, [ 102 63, 62, 61, 60, 59, 58, 57, 56, 103 55, 54, 53, 52, 51, 50, 49, 48, 104 47, 46, 45, 44, 43, 42, 41, 40, 105 39, 38, 37, 36, 35, 34, 33, 32, 106 31, 30, 29, 28, 27, 26, 25, 24, 107 23, 22, 21, 20, 19, 18, 17, 16, 108 15, 14, 13, 12, 11, 10, 9, 8, 109 7, 6, 5, 4, 3, 2, 1, 0 110 ]); 111 crate::mem::transmute(result) 112 } 113 } 114 } 115 )+ 116 }; 117 } 118 119 impl_swap_bytes!(v16: u8x2, i8x2,); 120 impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); 121 // FIXME: 64-bit single element vector 122 impl_swap_bytes!( 123 v64: u8x8, 124 i8x8, 125 u16x4, 126 i16x4, 127 u32x2, 128 i32x2, /* u64x1, i64x1, */ 129 ); 130 131 impl_swap_bytes!( 132 v128: u8x16, 133 i8x16, 134 u16x8, 135 i16x8, 136 u32x4, 137 i32x4, 138 u64x2, 139 i64x2, 140 u128x1, 141 i128x1, 142 ); 143 impl_swap_bytes!( 144 v256: u8x32, 145 i8x32, 146 u16x16, 147 i16x16, 148 u32x8, 149 i32x8, 150 u64x4, 151 i64x4, 152 u128x2, 153 i128x2, 154 ); 155 156 impl_swap_bytes!( 157 v512: u8x64, 158 i8x64, 159 u16x32, 160 i16x32, 161 u32x16, 162 i32x16, 163 u64x8, 164 i64x8, 165 u128x4, 166 i128x4, 167 ); 168 169 cfg_if! { 170 if #[cfg(target_pointer_width = "8")] { 171 impl_swap_bytes!(v16: isizex2, usizex2,); 172 impl_swap_bytes!(v32: isizex4, usizex4,); 173 impl_swap_bytes!(v64: isizex8, usizex8,); 174 } else if #[cfg(target_pointer_width = "16")] { 175 impl_swap_bytes!(v32: isizex2, usizex2,); 176 impl_swap_bytes!(v64: isizex4, usizex4,); 177 impl_swap_bytes!(v128: isizex8, usizex8,); 178 } else if #[cfg(target_pointer_width = "32")] { 179 impl_swap_bytes!(v64: isizex2, usizex2,); 180 impl_swap_bytes!(v128: isizex4, usizex4,); 181 impl_swap_bytes!(v256: isizex8, usizex8,); 182 } else if #[cfg(target_pointer_width = "64")] { 183 impl_swap_bytes!(v128: isizex2, usizex2,); 184 impl_swap_bytes!(v256: isizex4, usizex4,); 185 impl_swap_bytes!(v512: isizex8, usizex8,); 186 } else { 187 compile_error!("unsupported target_pointer_width"); 188 } 189 } 190