1 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. 2 3 use crate::types::*; 4 use crate::{vec128_storage, vec256_storage, vec512_storage}; 5 use core::marker::PhantomData; 6 use core::ops::*; 7 8 #[derive(Copy, Clone, Default)] 9 #[allow(non_camel_case_types)] 10 pub struct x2<W, G>(pub [W; 2], PhantomData<G>); 11 impl<W, G> x2<W, G> { 12 #[inline(always)] new(xs: [W; 2]) -> Self13 pub fn new(xs: [W; 2]) -> Self { 14 x2(xs, PhantomData) 15 } 16 } 17 macro_rules! fwd_binop_x2 { 18 ($trait:ident, $fn:ident) => { 19 impl<W: $trait + Copy, G> $trait for x2<W, G> { 20 type Output = x2<W::Output, G>; 21 #[inline(always)] 22 fn $fn(self, rhs: Self) -> Self::Output { 23 x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])]) 24 } 25 } 26 }; 27 } 28 macro_rules! fwd_binop_assign_x2 { 29 ($trait:ident, $fn_assign:ident) => { 30 impl<W: $trait + Copy, G> $trait for x2<W, G> { 31 #[inline(always)] 32 fn $fn_assign(&mut self, rhs: Self) { 33 (self.0[0]).$fn_assign(rhs.0[0]); 34 (self.0[1]).$fn_assign(rhs.0[1]); 35 } 36 } 37 }; 38 } 39 macro_rules! fwd_unop_x2 { 40 ($fn:ident) => { 41 #[inline(always)] 42 fn $fn(self) -> Self { 43 x2::new([self.0[0].$fn(), self.0[1].$fn()]) 44 } 45 }; 46 } 47 impl<W, G> RotateEachWord32 for x2<W, G> 48 where 49 W: Copy + RotateEachWord32, 50 { 51 fwd_unop_x2!(rotate_each_word_right7); 52 fwd_unop_x2!(rotate_each_word_right8); 53 fwd_unop_x2!(rotate_each_word_right11); 54 fwd_unop_x2!(rotate_each_word_right12); 55 fwd_unop_x2!(rotate_each_word_right16); 56 fwd_unop_x2!(rotate_each_word_right20); 57 fwd_unop_x2!(rotate_each_word_right24); 58 fwd_unop_x2!(rotate_each_word_right25); 59 } 60 impl<W, G> RotateEachWord64 for x2<W, G> 61 where 62 W: Copy + RotateEachWord64, 63 { 64 fwd_unop_x2!(rotate_each_word_right32); 65 } 66 impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {} 67 impl<W, G> BitOps0 for x2<W, G> 68 where 69 W: BitOps0, 70 G: Copy, 71 { 72 } 73 impl<W, G> BitOps32 for x2<W, G> 74 where 75 W: BitOps32 + BitOps0, 76 G: Copy, 77 { 78 } 79 impl<W, G> BitOps64 for x2<W, G> 80 where 81 W: BitOps64 + BitOps0, 82 G: Copy, 83 { 84 } 85 impl<W, G> BitOps128 for x2<W, G> 86 where 87 W: BitOps128 + BitOps0, 88 G: Copy, 89 { 90 } 91 fwd_binop_x2!(BitAnd, bitand); 92 fwd_binop_x2!(BitOr, bitor); 93 fwd_binop_x2!(BitXor, bitxor); 94 fwd_binop_x2!(AndNot, andnot); 95 fwd_binop_assign_x2!(BitAndAssign, bitand_assign); 96 fwd_binop_assign_x2!(BitOrAssign, bitor_assign); 97 fwd_binop_assign_x2!(BitXorAssign, bitxor_assign); 98 impl<W, G> ArithOps for x2<W, G> 99 where 100 W: ArithOps, 101 G: Copy, 102 { 103 } 104 fwd_binop_x2!(Add, add); 105 fwd_binop_assign_x2!(AddAssign, add_assign); 106 impl<W: Not + Copy, G> Not for x2<W, G> { 107 type Output = x2<W::Output, G>; 108 #[inline(always)] not(self) -> Self::Output109 fn not(self) -> Self::Output { 110 x2::new([self.0[0].not(), self.0[1].not()]) 111 } 112 } 113 impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> { 114 #[inline(always)] unsafe_from(xs: [W; 2]) -> Self115 unsafe fn unsafe_from(xs: [W; 2]) -> Self { 116 x2::new(xs) 117 } 118 } 119 impl<W: Copy, G> Vec2<W> for x2<W, G> { 120 #[inline(always)] extract(self, i: u32) -> W121 fn extract(self, i: u32) -> W { 122 self.0[i as usize] 123 } 124 #[inline(always)] insert(mut self, w: W, i: u32) -> Self125 fn insert(mut self, w: W, i: u32) -> Self { 126 self.0[i as usize] = w; 127 self 128 } 129 } 130 impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> { 131 #[inline(always)] unpack(p: vec256_storage) -> Self132 unsafe fn unpack(p: vec256_storage) -> Self { 133 let p = p.split128(); 134 x2::new([W::unpack(p[0]), W::unpack(p[1])]) 135 } 136 } 137 impl<W, G> From<x2<W, G>> for vec256_storage 138 where 139 W: Copy, 140 vec128_storage: From<W>, 141 { 142 #[inline(always)] from(x: x2<W, G>) -> Self143 fn from(x: x2<W, G>) -> Self { 144 vec256_storage::new128([x.0[0].into(), x.0[1].into()]) 145 } 146 } 147 impl<W, G> Swap64 for x2<W, G> 148 where 149 W: Swap64 + Copy, 150 { 151 fwd_unop_x2!(swap1); 152 fwd_unop_x2!(swap2); 153 fwd_unop_x2!(swap4); 154 fwd_unop_x2!(swap8); 155 fwd_unop_x2!(swap16); 156 fwd_unop_x2!(swap32); 157 fwd_unop_x2!(swap64); 158 } 159 impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> { 160 #[inline(always)] to_lanes(self) -> [W; 2]161 fn to_lanes(self) -> [W; 2] { 162 self.0 163 } 164 #[inline(always)] from_lanes(lanes: [W; 2]) -> Self165 fn from_lanes(lanes: [W; 2]) -> Self { 166 x2::new(lanes) 167 } 168 } 169 impl<W: BSwap + Copy, G> BSwap for x2<W, G> { 170 #[inline(always)] bswap(self) -> Self171 fn bswap(self) -> Self { 172 x2::new([self.0[0].bswap(), self.0[1].bswap()]) 173 } 174 } 175 impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> { 176 #[inline(always)] unsafe_read_le(input: &[u8]) -> Self177 unsafe fn unsafe_read_le(input: &[u8]) -> Self { 178 let input = input.split_at(16); 179 x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)]) 180 } 181 #[inline(always)] unsafe_read_be(input: &[u8]) -> Self182 unsafe fn unsafe_read_be(input: &[u8]) -> Self { 183 x2::unsafe_read_le(input).bswap() 184 } 185 #[inline(always)] write_le(self, out: &mut [u8])186 fn write_le(self, out: &mut [u8]) { 187 let out = out.split_at_mut(16); 188 self.0[0].write_le(out.0); 189 self.0[1].write_le(out.1); 190 } 191 #[inline(always)] write_be(self, out: &mut [u8])192 fn write_be(self, out: &mut [u8]) { 193 let out = out.split_at_mut(16); 194 self.0[0].write_be(out.0); 195 self.0[1].write_be(out.1); 196 } 197 } 198 199 #[derive(Copy, Clone, Default)] 200 #[allow(non_camel_case_types)] 201 pub struct x4<W>(pub [W; 4]); 202 impl<W> x4<W> { 203 #[inline(always)] new(xs: [W; 4]) -> Self204 pub fn new(xs: [W; 4]) -> Self { 205 x4(xs) 206 } 207 } 208 macro_rules! fwd_binop_x4 { 209 ($trait:ident, $fn:ident) => { 210 impl<W: $trait + Copy> $trait for x4<W> { 211 type Output = x4<W::Output>; 212 #[inline(always)] 213 fn $fn(self, rhs: Self) -> Self::Output { 214 x4([ 215 self.0[0].$fn(rhs.0[0]), 216 self.0[1].$fn(rhs.0[1]), 217 self.0[2].$fn(rhs.0[2]), 218 self.0[3].$fn(rhs.0[3]), 219 ]) 220 } 221 } 222 }; 223 } 224 macro_rules! fwd_binop_assign_x4 { 225 ($trait:ident, $fn_assign:ident) => { 226 impl<W: $trait + Copy> $trait for x4<W> { 227 #[inline(always)] 228 fn $fn_assign(&mut self, rhs: Self) { 229 self.0[0].$fn_assign(rhs.0[0]); 230 self.0[1].$fn_assign(rhs.0[1]); 231 self.0[2].$fn_assign(rhs.0[2]); 232 self.0[3].$fn_assign(rhs.0[3]); 233 } 234 } 235 }; 236 } 237 macro_rules! fwd_unop_x4 { 238 ($fn:ident) => { 239 #[inline(always)] 240 fn $fn(self) -> Self { 241 x4([ 242 self.0[0].$fn(), 243 self.0[1].$fn(), 244 self.0[2].$fn(), 245 self.0[3].$fn(), 246 ]) 247 } 248 }; 249 } 250 impl<W> RotateEachWord32 for x4<W> 251 where 252 W: Copy + RotateEachWord32, 253 { 254 fwd_unop_x4!(rotate_each_word_right7); 255 fwd_unop_x4!(rotate_each_word_right8); 256 fwd_unop_x4!(rotate_each_word_right11); 257 fwd_unop_x4!(rotate_each_word_right12); 258 fwd_unop_x4!(rotate_each_word_right16); 259 fwd_unop_x4!(rotate_each_word_right20); 260 fwd_unop_x4!(rotate_each_word_right24); 261 fwd_unop_x4!(rotate_each_word_right25); 262 } 263 impl<W> RotateEachWord64 for x4<W> 264 where 265 W: Copy + RotateEachWord64, 266 { 267 fwd_unop_x4!(rotate_each_word_right32); 268 } 269 impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {} 270 impl<W> BitOps0 for x4<W> where W: BitOps0 {} 271 impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {} 272 impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {} 273 impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {} 274 fwd_binop_x4!(BitAnd, bitand); 275 fwd_binop_x4!(BitOr, bitor); 276 fwd_binop_x4!(BitXor, bitxor); 277 fwd_binop_x4!(AndNot, andnot); 278 fwd_binop_assign_x4!(BitAndAssign, bitand_assign); 279 fwd_binop_assign_x4!(BitOrAssign, bitor_assign); 280 fwd_binop_assign_x4!(BitXorAssign, bitxor_assign); 281 impl<W> ArithOps for x4<W> where W: ArithOps {} 282 fwd_binop_x4!(Add, add); 283 fwd_binop_assign_x4!(AddAssign, add_assign); 284 impl<W: Not + Copy> Not for x4<W> { 285 type Output = x4<W::Output>; 286 #[inline(always)] not(self) -> Self::Output287 fn not(self) -> Self::Output { 288 x4([ 289 self.0[0].not(), 290 self.0[1].not(), 291 self.0[2].not(), 292 self.0[3].not(), 293 ]) 294 } 295 } 296 impl<W> UnsafeFrom<[W; 4]> for x4<W> { 297 #[inline(always)] unsafe_from(xs: [W; 4]) -> Self298 unsafe fn unsafe_from(xs: [W; 4]) -> Self { 299 x4(xs) 300 } 301 } 302 impl<W: Copy> Vec4<W> for x4<W> { 303 #[inline(always)] extract(self, i: u32) -> W304 fn extract(self, i: u32) -> W { 305 self.0[i as usize] 306 } 307 #[inline(always)] insert(mut self, w: W, i: u32) -> Self308 fn insert(mut self, w: W, i: u32) -> Self { 309 self.0[i as usize] = w; 310 self 311 } 312 } 313 impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> { 314 #[inline(always)] unpack(p: vec512_storage) -> Self315 unsafe fn unpack(p: vec512_storage) -> Self { 316 let p = p.split128(); 317 x4([ 318 W::unpack(p[0]), 319 W::unpack(p[1]), 320 W::unpack(p[2]), 321 W::unpack(p[3]), 322 ]) 323 } 324 } 325 impl<W> From<x4<W>> for vec512_storage 326 where 327 W: Copy, 328 vec128_storage: From<W>, 329 { 330 #[inline(always)] from(x: x4<W>) -> Self331 fn from(x: x4<W>) -> Self { 332 vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()]) 333 } 334 } 335 impl<W> Swap64 for x4<W> 336 where 337 W: Swap64 + Copy, 338 { 339 fwd_unop_x4!(swap1); 340 fwd_unop_x4!(swap2); 341 fwd_unop_x4!(swap4); 342 fwd_unop_x4!(swap8); 343 fwd_unop_x4!(swap16); 344 fwd_unop_x4!(swap32); 345 fwd_unop_x4!(swap64); 346 } 347 impl<W: Copy> MultiLane<[W; 4]> for x4<W> { 348 #[inline(always)] to_lanes(self) -> [W; 4]349 fn to_lanes(self) -> [W; 4] { 350 self.0 351 } 352 #[inline(always)] from_lanes(lanes: [W; 4]) -> Self353 fn from_lanes(lanes: [W; 4]) -> Self { 354 x4(lanes) 355 } 356 } 357 impl<W: BSwap + Copy> BSwap for x4<W> { 358 #[inline(always)] bswap(self) -> Self359 fn bswap(self) -> Self { 360 x4([ 361 self.0[0].bswap(), 362 self.0[1].bswap(), 363 self.0[2].bswap(), 364 self.0[3].bswap(), 365 ]) 366 } 367 } 368 impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> { 369 #[inline(always)] unsafe_read_le(input: &[u8]) -> Self370 unsafe fn unsafe_read_le(input: &[u8]) -> Self { 371 x4([ 372 W::unsafe_read_le(&input[0..16]), 373 W::unsafe_read_le(&input[16..32]), 374 W::unsafe_read_le(&input[32..48]), 375 W::unsafe_read_le(&input[48..64]), 376 ]) 377 } 378 #[inline(always)] unsafe_read_be(input: &[u8]) -> Self379 unsafe fn unsafe_read_be(input: &[u8]) -> Self { 380 x4::unsafe_read_le(input).bswap() 381 } 382 #[inline(always)] write_le(self, out: &mut [u8])383 fn write_le(self, out: &mut [u8]) { 384 self.0[0].write_le(&mut out[0..16]); 385 self.0[1].write_le(&mut out[16..32]); 386 self.0[2].write_le(&mut out[32..48]); 387 self.0[3].write_le(&mut out[48..64]); 388 } 389 #[inline(always)] write_be(self, out: &mut [u8])390 fn write_be(self, out: &mut [u8]) { 391 self.0[0].write_be(&mut out[0..16]); 392 self.0[1].write_be(&mut out[16..32]); 393 self.0[2].write_be(&mut out[32..48]); 394 self.0[3].write_be(&mut out[48..64]); 395 } 396 } 397 impl<W: Copy + LaneWords4> LaneWords4 for x4<W> { 398 #[inline(always)] shuffle_lane_words2301(self) -> Self399 fn shuffle_lane_words2301(self) -> Self { 400 x4([ 401 self.0[0].shuffle_lane_words2301(), 402 self.0[1].shuffle_lane_words2301(), 403 self.0[2].shuffle_lane_words2301(), 404 self.0[3].shuffle_lane_words2301(), 405 ]) 406 } 407 #[inline(always)] shuffle_lane_words1230(self) -> Self408 fn shuffle_lane_words1230(self) -> Self { 409 x4([ 410 self.0[0].shuffle_lane_words1230(), 411 self.0[1].shuffle_lane_words1230(), 412 self.0[2].shuffle_lane_words1230(), 413 self.0[3].shuffle_lane_words1230(), 414 ]) 415 } 416 #[inline(always)] shuffle_lane_words3012(self) -> Self417 fn shuffle_lane_words3012(self) -> Self { 418 x4([ 419 self.0[0].shuffle_lane_words3012(), 420 self.0[1].shuffle_lane_words3012(), 421 self.0[2].shuffle_lane_words3012(), 422 self.0[3].shuffle_lane_words3012(), 423 ]) 424 } 425 } 426