1 //! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD. 2 3 use crate::types::*; 4 use crate::{vec128_storage, vec256_storage, vec512_storage}; 5 use core::marker::PhantomData; 6 use core::ops::*; 7 8 #[derive(Copy, Clone, Default)] 9 #[allow(non_camel_case_types)] 10 pub struct x2<W, G>(pub [W; 2], PhantomData<G>); 11 impl<W, G> x2<W, G> { 12 #[inline(always)] new(xs: [W; 2]) -> Self13 pub fn new(xs: [W; 2]) -> Self { 14 x2(xs, PhantomData) 15 } 16 } 17 macro_rules! fwd_binop_x2 { 18 ($trait:ident, $fn:ident) => { 19 impl<W: $trait + Copy, G> $trait for x2<W, G> { 20 type Output = x2<W::Output, G>; 21 #[inline(always)] 22 fn $fn(self, rhs: Self) -> Self::Output { 23 x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])]) 24 } 25 } 26 }; 27 } 28 macro_rules! fwd_binop_assign_x2 { 29 ($trait:ident, $fn_assign:ident) => { 30 impl<W: $trait + Copy, G> $trait for x2<W, G> { 31 #[inline(always)] 32 fn $fn_assign(&mut self, rhs: Self) { 33 (self.0[0]).$fn_assign(rhs.0[0]); 34 (self.0[1]).$fn_assign(rhs.0[1]); 35 } 36 } 37 }; 38 } 39 macro_rules! fwd_unop_x2 { 40 ($fn:ident) => { 41 #[inline(always)] 42 fn $fn(self) -> Self { 43 x2::new([self.0[0].$fn(), self.0[1].$fn()]) 44 } 45 }; 46 } 47 impl<W, G> RotateEachWord32 for x2<W, G> 48 where 49 W: Copy + RotateEachWord32, 50 { 51 fwd_unop_x2!(rotate_each_word_right7); 52 fwd_unop_x2!(rotate_each_word_right8); 53 fwd_unop_x2!(rotate_each_word_right11); 54 fwd_unop_x2!(rotate_each_word_right12); 55 fwd_unop_x2!(rotate_each_word_right16); 56 fwd_unop_x2!(rotate_each_word_right20); 57 fwd_unop_x2!(rotate_each_word_right24); 58 fwd_unop_x2!(rotate_each_word_right25); 59 } 60 impl<W, G> RotateEachWord64 for x2<W, G> 61 where 62 W: Copy + RotateEachWord64, 63 { 64 fwd_unop_x2!(rotate_each_word_right32); 65 } 66 impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {} 67 impl<W, G> BitOps0 for x2<W, G> 68 where 69 W: BitOps0, 70 G: Copy, 71 { 72 } 73 impl<W, G> BitOps32 for x2<W, G> 74 where 75 W: BitOps32 + BitOps0, 76 G: Copy, 77 { 78 } 79 impl<W, G> BitOps64 for x2<W, G> 80 where 81 W: BitOps64 + BitOps0, 82 G: Copy, 83 { 84 } 85 impl<W, G> BitOps128 for x2<W, G> 86 where 87 W: BitOps128 + BitOps0, 88 G: Copy, 89 { 90 } 91 fwd_binop_x2!(BitAnd, bitand); 92 fwd_binop_x2!(BitOr, bitor); 93 fwd_binop_x2!(BitXor, bitxor); 94 fwd_binop_x2!(AndNot, andnot); 95 fwd_binop_assign_x2!(BitAndAssign, bitand_assign); 96 fwd_binop_assign_x2!(BitOrAssign, bitor_assign); 97 fwd_binop_assign_x2!(BitXorAssign, bitxor_assign); 98 impl<W, G> ArithOps for x2<W, G> 99 where 100 W: ArithOps, 101 G: Copy, 102 { 103 } 104 fwd_binop_x2!(Add, add); 105 fwd_binop_assign_x2!(AddAssign, add_assign); 106 impl<W: Not + Copy, G> Not for x2<W, G> { 107 type Output = x2<W::Output, G>; 108 #[inline(always)] not(self) -> Self::Output109 fn not(self) -> Self::Output { 110 x2::new([self.0[0].not(), self.0[1].not()]) 111 } 112 } 113 impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> { 114 #[inline(always)] unsafe_from(xs: [W; 2]) -> Self115 unsafe fn unsafe_from(xs: [W; 2]) -> Self { 116 x2::new(xs) 117 } 118 } 119 impl<W: Copy, G> Vec2<W> for x2<W, G> { 120 #[inline(always)] extract(self, i: u32) -> W121 fn extract(self, i: u32) -> W { 122 self.0[i as usize] 123 } 124 #[inline(always)] insert(mut self, w: W, i: u32) -> Self125 fn insert(mut self, w: W, i: u32) -> Self { 126 self.0[i as usize] = w; 127 self 128 } 129 } 130 impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> { 131 #[inline(always)] unpack(p: vec256_storage) -> Self132 unsafe fn unpack(p: vec256_storage) -> Self { 133 let p = p.split128(); 134 x2::new([W::unpack(p[0]), W::unpack(p[1])]) 135 } 136 } 137 impl<W, G> From<x2<W, G>> for vec256_storage 138 where 139 W: Copy, 140 vec128_storage: From<W>, 141 { 142 #[inline(always)] from(x: x2<W, G>) -> Self143 fn from(x: x2<W, G>) -> Self { 144 vec256_storage::new128([x.0[0].into(), x.0[1].into()]) 145 } 146 } 147 impl<W, G> Swap64 for x2<W, G> 148 where 149 W: Swap64 + Copy, 150 { 151 fwd_unop_x2!(swap1); 152 fwd_unop_x2!(swap2); 153 fwd_unop_x2!(swap4); 154 fwd_unop_x2!(swap8); 155 fwd_unop_x2!(swap16); 156 fwd_unop_x2!(swap32); 157 fwd_unop_x2!(swap64); 158 } 159 impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> { 160 #[inline(always)] to_lanes(self) -> [W; 2]161 fn to_lanes(self) -> [W; 2] { 162 self.0 163 } 164 #[inline(always)] from_lanes(lanes: [W; 2]) -> Self165 fn from_lanes(lanes: [W; 2]) -> Self { 166 x2::new(lanes) 167 } 168 } 169 impl<W: BSwap + Copy, G> BSwap for x2<W, G> { 170 #[inline(always)] bswap(self) -> Self171 fn bswap(self) -> Self { 172 x2::new([self.0[0].bswap(), self.0[1].bswap()]) 173 } 174 } 175 impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> { 176 #[inline(always)] unsafe_read_le(input: &[u8]) -> Self177 unsafe fn unsafe_read_le(input: &[u8]) -> Self { 178 let input = input.split_at(input.len() / 2); 179 x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)]) 180 } 181 #[inline(always)] unsafe_read_be(input: &[u8]) -> Self182 unsafe fn unsafe_read_be(input: &[u8]) -> Self { 183 let input = input.split_at(input.len() / 2); 184 x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)]) 185 } 186 #[inline(always)] write_le(self, out: &mut [u8])187 fn write_le(self, out: &mut [u8]) { 188 let out = out.split_at_mut(out.len() / 2); 189 self.0[0].write_le(out.0); 190 self.0[1].write_le(out.1); 191 } 192 #[inline(always)] write_be(self, out: &mut [u8])193 fn write_be(self, out: &mut [u8]) { 194 let out = out.split_at_mut(out.len() / 2); 195 self.0[0].write_be(out.0); 196 self.0[1].write_be(out.1); 197 } 198 } 199 impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> { 200 #[inline(always)] shuffle_lane_words2301(self) -> Self201 fn shuffle_lane_words2301(self) -> Self { 202 Self::new([ 203 self.0[0].shuffle_lane_words2301(), 204 self.0[1].shuffle_lane_words2301(), 205 ]) 206 } 207 #[inline(always)] shuffle_lane_words1230(self) -> Self208 fn shuffle_lane_words1230(self) -> Self { 209 Self::new([ 210 self.0[0].shuffle_lane_words1230(), 211 self.0[1].shuffle_lane_words1230(), 212 ]) 213 } 214 #[inline(always)] shuffle_lane_words3012(self) -> Self215 fn shuffle_lane_words3012(self) -> Self { 216 Self::new([ 217 self.0[0].shuffle_lane_words3012(), 218 self.0[1].shuffle_lane_words3012(), 219 ]) 220 } 221 } 222 223 #[derive(Copy, Clone, Default)] 224 #[allow(non_camel_case_types)] 225 pub struct x4<W>(pub [W; 4]); 226 impl<W> x4<W> { 227 #[inline(always)] new(xs: [W; 4]) -> Self228 pub fn new(xs: [W; 4]) -> Self { 229 x4(xs) 230 } 231 } 232 macro_rules! fwd_binop_x4 { 233 ($trait:ident, $fn:ident) => { 234 impl<W: $trait + Copy> $trait for x4<W> { 235 type Output = x4<W::Output>; 236 #[inline(always)] 237 fn $fn(self, rhs: Self) -> Self::Output { 238 x4([ 239 self.0[0].$fn(rhs.0[0]), 240 self.0[1].$fn(rhs.0[1]), 241 self.0[2].$fn(rhs.0[2]), 242 self.0[3].$fn(rhs.0[3]), 243 ]) 244 } 245 } 246 }; 247 } 248 macro_rules! fwd_binop_assign_x4 { 249 ($trait:ident, $fn_assign:ident) => { 250 impl<W: $trait + Copy> $trait for x4<W> { 251 #[inline(always)] 252 fn $fn_assign(&mut self, rhs: Self) { 253 self.0[0].$fn_assign(rhs.0[0]); 254 self.0[1].$fn_assign(rhs.0[1]); 255 self.0[2].$fn_assign(rhs.0[2]); 256 self.0[3].$fn_assign(rhs.0[3]); 257 } 258 } 259 }; 260 } 261 macro_rules! fwd_unop_x4 { 262 ($fn:ident) => { 263 #[inline(always)] 264 fn $fn(self) -> Self { 265 x4([ 266 self.0[0].$fn(), 267 self.0[1].$fn(), 268 self.0[2].$fn(), 269 self.0[3].$fn(), 270 ]) 271 } 272 }; 273 } 274 impl<W> RotateEachWord32 for x4<W> 275 where 276 W: Copy + RotateEachWord32, 277 { 278 fwd_unop_x4!(rotate_each_word_right7); 279 fwd_unop_x4!(rotate_each_word_right8); 280 fwd_unop_x4!(rotate_each_word_right11); 281 fwd_unop_x4!(rotate_each_word_right12); 282 fwd_unop_x4!(rotate_each_word_right16); 283 fwd_unop_x4!(rotate_each_word_right20); 284 fwd_unop_x4!(rotate_each_word_right24); 285 fwd_unop_x4!(rotate_each_word_right25); 286 } 287 impl<W> RotateEachWord64 for x4<W> 288 where 289 W: Copy + RotateEachWord64, 290 { 291 fwd_unop_x4!(rotate_each_word_right32); 292 } 293 impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {} 294 impl<W> BitOps0 for x4<W> where W: BitOps0 {} 295 impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {} 296 impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {} 297 impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {} 298 fwd_binop_x4!(BitAnd, bitand); 299 fwd_binop_x4!(BitOr, bitor); 300 fwd_binop_x4!(BitXor, bitxor); 301 fwd_binop_x4!(AndNot, andnot); 302 fwd_binop_assign_x4!(BitAndAssign, bitand_assign); 303 fwd_binop_assign_x4!(BitOrAssign, bitor_assign); 304 fwd_binop_assign_x4!(BitXorAssign, bitxor_assign); 305 impl<W> ArithOps for x4<W> where W: ArithOps {} 306 fwd_binop_x4!(Add, add); 307 fwd_binop_assign_x4!(AddAssign, add_assign); 308 impl<W: Not + Copy> Not for x4<W> { 309 type Output = x4<W::Output>; 310 #[inline(always)] not(self) -> Self::Output311 fn not(self) -> Self::Output { 312 x4([ 313 self.0[0].not(), 314 self.0[1].not(), 315 self.0[2].not(), 316 self.0[3].not(), 317 ]) 318 } 319 } 320 impl<W> UnsafeFrom<[W; 4]> for x4<W> { 321 #[inline(always)] unsafe_from(xs: [W; 4]) -> Self322 unsafe fn unsafe_from(xs: [W; 4]) -> Self { 323 x4(xs) 324 } 325 } 326 impl<W: Copy> Vec4<W> for x4<W> { 327 #[inline(always)] extract(self, i: u32) -> W328 fn extract(self, i: u32) -> W { 329 self.0[i as usize] 330 } 331 #[inline(always)] insert(mut self, w: W, i: u32) -> Self332 fn insert(mut self, w: W, i: u32) -> Self { 333 self.0[i as usize] = w; 334 self 335 } 336 } 337 impl<W: Copy> Vec4Ext<W> for x4<W> { 338 #[inline(always)] transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) where Self: Sized,339 fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self) 340 where 341 Self: Sized, 342 { 343 ( 344 x4([a.0[0], b.0[0], c.0[0], d.0[0]]), 345 x4([a.0[1], b.0[1], c.0[1], d.0[1]]), 346 x4([a.0[2], b.0[2], c.0[2], d.0[2]]), 347 x4([a.0[3], b.0[3], c.0[3], d.0[3]]), 348 ) 349 } 350 } 351 impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> { 352 #[inline(always)] unpack(p: vec512_storage) -> Self353 unsafe fn unpack(p: vec512_storage) -> Self { 354 let p = p.split128(); 355 x4([ 356 W::unpack(p[0]), 357 W::unpack(p[1]), 358 W::unpack(p[2]), 359 W::unpack(p[3]), 360 ]) 361 } 362 } 363 impl<W> From<x4<W>> for vec512_storage 364 where 365 W: Copy, 366 vec128_storage: From<W>, 367 { 368 #[inline(always)] from(x: x4<W>) -> Self369 fn from(x: x4<W>) -> Self { 370 vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()]) 371 } 372 } 373 impl<W> Swap64 for x4<W> 374 where 375 W: Swap64 + Copy, 376 { 377 fwd_unop_x4!(swap1); 378 fwd_unop_x4!(swap2); 379 fwd_unop_x4!(swap4); 380 fwd_unop_x4!(swap8); 381 fwd_unop_x4!(swap16); 382 fwd_unop_x4!(swap32); 383 fwd_unop_x4!(swap64); 384 } 385 impl<W: Copy> MultiLane<[W; 4]> for x4<W> { 386 #[inline(always)] to_lanes(self) -> [W; 4]387 fn to_lanes(self) -> [W; 4] { 388 self.0 389 } 390 #[inline(always)] from_lanes(lanes: [W; 4]) -> Self391 fn from_lanes(lanes: [W; 4]) -> Self { 392 x4(lanes) 393 } 394 } 395 impl<W: BSwap + Copy> BSwap for x4<W> { 396 #[inline(always)] bswap(self) -> Self397 fn bswap(self) -> Self { 398 x4([ 399 self.0[0].bswap(), 400 self.0[1].bswap(), 401 self.0[2].bswap(), 402 self.0[3].bswap(), 403 ]) 404 } 405 } 406 impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> { 407 #[inline(always)] unsafe_read_le(input: &[u8]) -> Self408 unsafe fn unsafe_read_le(input: &[u8]) -> Self { 409 let n = input.len() / 4; 410 x4([ 411 W::unsafe_read_le(&input[..n]), 412 W::unsafe_read_le(&input[n..n * 2]), 413 W::unsafe_read_le(&input[n * 2..n * 3]), 414 W::unsafe_read_le(&input[n * 3..]), 415 ]) 416 } 417 #[inline(always)] unsafe_read_be(input: &[u8]) -> Self418 unsafe fn unsafe_read_be(input: &[u8]) -> Self { 419 let n = input.len() / 4; 420 x4([ 421 W::unsafe_read_be(&input[..n]), 422 W::unsafe_read_be(&input[n..n * 2]), 423 W::unsafe_read_be(&input[n * 2..n * 3]), 424 W::unsafe_read_be(&input[n * 3..]), 425 ]) 426 } 427 #[inline(always)] write_le(self, out: &mut [u8])428 fn write_le(self, out: &mut [u8]) { 429 let n = out.len() / 4; 430 self.0[0].write_le(&mut out[..n]); 431 self.0[1].write_le(&mut out[n..n * 2]); 432 self.0[2].write_le(&mut out[n * 2..n * 3]); 433 self.0[3].write_le(&mut out[n * 3..]); 434 } 435 #[inline(always)] write_be(self, out: &mut [u8])436 fn write_be(self, out: &mut [u8]) { 437 let n = out.len() / 4; 438 self.0[0].write_be(&mut out[..n]); 439 self.0[1].write_be(&mut out[n..n * 2]); 440 self.0[2].write_be(&mut out[n * 2..n * 3]); 441 self.0[3].write_be(&mut out[n * 3..]); 442 } 443 } 444 impl<W: Copy + LaneWords4> LaneWords4 for x4<W> { 445 #[inline(always)] shuffle_lane_words2301(self) -> Self446 fn shuffle_lane_words2301(self) -> Self { 447 x4([ 448 self.0[0].shuffle_lane_words2301(), 449 self.0[1].shuffle_lane_words2301(), 450 self.0[2].shuffle_lane_words2301(), 451 self.0[3].shuffle_lane_words2301(), 452 ]) 453 } 454 #[inline(always)] shuffle_lane_words1230(self) -> Self455 fn shuffle_lane_words1230(self) -> Self { 456 x4([ 457 self.0[0].shuffle_lane_words1230(), 458 self.0[1].shuffle_lane_words1230(), 459 self.0[2].shuffle_lane_words1230(), 460 self.0[3].shuffle_lane_words1230(), 461 ]) 462 } 463 #[inline(always)] shuffle_lane_words3012(self) -> Self464 fn shuffle_lane_words3012(self) -> Self { 465 x4([ 466 self.0[0].shuffle_lane_words3012(), 467 self.0[1].shuffle_lane_words3012(), 468 self.0[2].shuffle_lane_words3012(), 469 self.0[3].shuffle_lane_words3012(), 470 ]) 471 } 472 } 473