1 //! Shuffle vector lanes with run-time indices. 2 3 use crate::*; 4 5 pub trait Shuffle1Dyn { 6 type Indices; shuffle1_dyn(self, _: Self::Indices) -> Self7 fn shuffle1_dyn(self, _: Self::Indices) -> Self; 8 } 9 10 // Fallback implementation 11 macro_rules! impl_fallback { 12 ($id:ident) => { 13 impl Shuffle1Dyn for $id { 14 type Indices = Self; 15 #[inline] 16 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 17 let mut result = Self::splat(0); 18 for i in 0..$id::lanes() { 19 result = result 20 .replace(i, self.extract(indices.extract(i) as usize)); 21 } 22 result 23 } 24 } 25 }; 26 } 27 28 macro_rules! impl_shuffle1_dyn { 29 (u8x8) => { 30 cfg_if! { 31 if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), 32 target_feature = "ssse3"))] { 33 impl Shuffle1Dyn for u8x8 { 34 type Indices = Self; 35 #[inline] 36 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 37 #[cfg(target_arch = "x86")] 38 use crate::arch::x86::_mm_shuffle_pi8; 39 #[cfg(target_arch = "x86_64")] 40 use crate::arch::x86_64::_mm_shuffle_pi8; 41 42 unsafe { 43 crate::mem::transmute( 44 _mm_shuffle_pi8( 45 crate::mem::transmute(self.0), 46 crate::mem::transmute(indices.0) 47 ) 48 ) 49 } 50 } 51 } 52 } else if #[cfg(all( 53 any( 54 all(target_aarch = "aarch64", target_feature = "neon"), 55 all(target_aarch = "arm", target_feature = "v7", 56 target_feature = "neon") 57 ), 58 any(feature = "core_arch", libcore_neon) 59 ) 60 )] { 61 impl Shuffle1Dyn for u8x8 { 62 type Indices = Self; 63 #[inline] 64 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 65 #[cfg(targt_arch = "aarch64")] 66 use crate::arch::aarch64::vtbl1_u8; 67 #[cfg(targt_arch = "arm")] 68 use crate::arch::arm::vtbl1_u8; 69 70 // This is safe because the binary is compiled with 71 // neon enabled at compile-time and can therefore only 72 // run on CPUs that have it enabled. 73 unsafe { 74 Simd(mem::transmute( 75 vtbl1_u8(mem::transmute(self.0), 76 crate::mem::transmute(indices.0)) 77 )) 78 } 79 } 80 } 81 } else { 82 impl_fallback!(u8x8); 83 } 84 } 85 }; 86 (u8x16) => { 87 cfg_if! { 88 if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), 89 target_feature = "ssse3"))] { 90 impl Shuffle1Dyn for u8x16 { 91 type Indices = Self; 92 #[inline] 93 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 94 #[cfg(target_arch = "x86")] 95 use crate::arch::x86::_mm_shuffle_epi8; 96 #[cfg(target_arch = "x86_64")] 97 use crate::arch::x86_64::_mm_shuffle_epi8; 98 // This is safe because the binary is compiled with 99 // ssse3 enabled at compile-time and can therefore only 100 // run on CPUs that have it enabled. 101 unsafe { 102 Simd(mem::transmute( 103 _mm_shuffle_epi8(mem::transmute(self.0), 104 crate::mem::transmute(indices)) 105 )) 106 } 107 } 108 } 109 } else if #[cfg(all(target_aarch = "aarch64", target_feature = "neon", 110 any(feature = "core_arch", libcore_neon)))] { 111 impl Shuffle1Dyn for u8x16 { 112 type Indices = Self; 113 #[inline] 114 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 115 use crate::arch::aarch64::vqtbl1q_u8; 116 117 // This is safe because the binary is compiled with 118 // neon enabled at compile-time and can therefore only 119 // run on CPUs that have it enabled. 120 unsafe { 121 Simd(mem::transmute( 122 vqtbl1q_u8(mem::transmute(self.0), 123 crate::mem::transmute(indices.0)) 124 )) 125 } 126 } 127 } 128 } else if #[cfg(all(target_aarch = "arm", target_feature = "v7", 129 target_feature = "neon", 130 any(feature = "core_arch", libcore_neon)))] { 131 impl Shuffle1Dyn for u8x16 { 132 type Indices = Self; 133 #[inline] 134 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 135 use crate::arch::arm::vtbl2_u8; 136 137 // This is safe because the binary is compiled with 138 // neon enabled at compile-time and can therefore only 139 // run on CPUs that have it enabled. 140 unsafe { 141 union U { 142 j: u8x16, 143 s: (u8x8, u8x8), 144 } 145 146 let (i0, i1) = U { j: y }.s; 147 148 let r0 = vtbl2_u8( 149 mem::transmute(x), 150 crate::mem::transmute(i0) 151 ); 152 let r1 = vtbl2_u8( 153 mem::transmute(x), 154 crate::mem::transmute(i1) 155 ); 156 157 let r = U { s: (r0, r1) }.j; 158 159 Simd(mem::transmute(r)) 160 } 161 } 162 } 163 } else { 164 impl_fallback!(u8x16); 165 } 166 } 167 }; 168 (u16x8) => { 169 impl Shuffle1Dyn for u16x8 { 170 type Indices = Self; 171 #[inline] 172 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 173 let indices: u8x8 = (indices * 2).cast(); 174 let indices: u8x16 = shuffle!( 175 indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7] 176 ); 177 let v = u8x16::new( 178 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 179 ); 180 let indices = indices + v; 181 unsafe { 182 let s: u8x16 =crate::mem::transmute(self); 183 crate::mem::transmute(s.shuffle1_dyn(indices)) 184 } 185 } 186 } 187 }; 188 (u32x4) => { 189 cfg_if! { 190 if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), 191 target_feature = "avx"))] { 192 impl Shuffle1Dyn for u32x4 { 193 type Indices = Self; 194 #[inline] 195 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 196 #[cfg(target_arch = "x86")] 197 use crate::arch::x86::{_mm_permutevar_ps}; 198 #[cfg(target_arch = "x86_64")] 199 use crate::arch::x86_64::{_mm_permutevar_ps}; 200 201 unsafe { 202 crate::mem::transmute( 203 _mm_permutevar_ps( 204 crate::mem::transmute(self.0), 205 crate::mem::transmute(indices.0) 206 ) 207 ) 208 } 209 } 210 } 211 } else { 212 impl Shuffle1Dyn for u32x4 { 213 type Indices = Self; 214 #[inline] 215 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 216 let indices: u8x4 = (indices * 4).cast(); 217 let indices: u8x16 = shuffle!( 218 indices, 219 [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3] 220 ); 221 let v = u8x16::new( 222 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 223 ); 224 let indices = indices + v; 225 unsafe { 226 let s: u8x16 =crate::mem::transmute(self); 227 crate::mem::transmute(s.shuffle1_dyn(indices)) 228 } 229 } 230 } 231 } 232 } 233 }; 234 (u64x2) => { 235 cfg_if! { 236 if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), 237 target_feature = "avx"))] { 238 impl Shuffle1Dyn for u64x2 { 239 type Indices = Self; 240 #[inline] 241 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 242 #[cfg(target_arch = "x86")] 243 use crate::arch::x86::{_mm_permutevar_pd}; 244 #[cfg(target_arch = "x86_64")] 245 use crate::arch::x86_64::{_mm_permutevar_pd}; 246 // _mm_permutevar_pd uses the _second_ bit of each 247 // element to perform the selection, that is: 0b00 => 0, 248 // 0b10 => 1: 249 let indices = indices << 1; 250 unsafe { 251 crate::mem::transmute( 252 _mm_permutevar_pd( 253 crate::mem::transmute(self), 254 crate::mem::transmute(indices) 255 ) 256 ) 257 } 258 } 259 } 260 } else { 261 impl Shuffle1Dyn for u64x2 { 262 type Indices = Self; 263 #[inline] 264 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 265 let indices: u8x2 = (indices * 8).cast(); 266 let indices: u8x16 = shuffle!( 267 indices, 268 [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] 269 ); 270 let v = u8x16::new( 271 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 272 ); 273 let indices = indices + v; 274 unsafe { 275 let s: u8x16 =crate::mem::transmute(self); 276 crate::mem::transmute(s.shuffle1_dyn(indices)) 277 } 278 } 279 } 280 } 281 } 282 }; 283 (u128x1) => { 284 impl Shuffle1Dyn for u128x1 { 285 type Indices = Self; 286 #[inline] 287 fn shuffle1_dyn(self, _indices: Self::Indices) -> Self { 288 self 289 } 290 } 291 }; 292 ($id:ident) => { impl_fallback!($id); } 293 } 294 295 impl_shuffle1_dyn!(u8x2); 296 impl_shuffle1_dyn!(u8x4); 297 impl_shuffle1_dyn!(u8x8); 298 impl_shuffle1_dyn!(u8x16); 299 impl_shuffle1_dyn!(u8x32); 300 impl_shuffle1_dyn!(u8x64); 301 302 impl_shuffle1_dyn!(u16x2); 303 impl_shuffle1_dyn!(u16x4); 304 impl_shuffle1_dyn!(u16x8); 305 impl_shuffle1_dyn!(u16x16); 306 impl_shuffle1_dyn!(u16x32); 307 308 impl_shuffle1_dyn!(u32x2); 309 impl_shuffle1_dyn!(u32x4); 310 impl_shuffle1_dyn!(u32x8); 311 impl_shuffle1_dyn!(u32x16); 312 313 impl_shuffle1_dyn!(u64x2); 314 impl_shuffle1_dyn!(u64x4); 315 impl_shuffle1_dyn!(u64x8); 316 317 impl_shuffle1_dyn!(usizex2); 318 impl_shuffle1_dyn!(usizex4); 319 impl_shuffle1_dyn!(usizex8); 320 321 impl_shuffle1_dyn!(u128x1); 322 impl_shuffle1_dyn!(u128x2); 323 impl_shuffle1_dyn!(u128x4); 324 325 // Implementation for non-unsigned vector types 326 macro_rules! impl_shuffle1_dyn_non_u { 327 ($id:ident, $uid:ident) => { 328 impl Shuffle1Dyn for $id { 329 type Indices = $uid; 330 #[inline] 331 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 332 unsafe { 333 let u: $uid = crate::mem::transmute(self); 334 crate::mem::transmute(u.shuffle1_dyn(indices)) 335 } 336 } 337 } 338 }; 339 } 340 341 impl_shuffle1_dyn_non_u!(i8x2, u8x2); 342 impl_shuffle1_dyn_non_u!(i8x4, u8x4); 343 impl_shuffle1_dyn_non_u!(i8x8, u8x8); 344 impl_shuffle1_dyn_non_u!(i8x16, u8x16); 345 impl_shuffle1_dyn_non_u!(i8x32, u8x32); 346 impl_shuffle1_dyn_non_u!(i8x64, u8x64); 347 348 impl_shuffle1_dyn_non_u!(i16x2, u16x2); 349 impl_shuffle1_dyn_non_u!(i16x4, u16x4); 350 impl_shuffle1_dyn_non_u!(i16x8, u16x8); 351 impl_shuffle1_dyn_non_u!(i16x16, u16x16); 352 impl_shuffle1_dyn_non_u!(i16x32, u16x32); 353 354 impl_shuffle1_dyn_non_u!(i32x2, u32x2); 355 impl_shuffle1_dyn_non_u!(i32x4, u32x4); 356 impl_shuffle1_dyn_non_u!(i32x8, u32x8); 357 impl_shuffle1_dyn_non_u!(i32x16, u32x16); 358 359 impl_shuffle1_dyn_non_u!(i64x2, u64x2); 360 impl_shuffle1_dyn_non_u!(i64x4, u64x4); 361 impl_shuffle1_dyn_non_u!(i64x8, u64x8); 362 363 impl_shuffle1_dyn_non_u!(isizex2, usizex2); 364 impl_shuffle1_dyn_non_u!(isizex4, usizex4); 365 impl_shuffle1_dyn_non_u!(isizex8, usizex8); 366 367 impl_shuffle1_dyn_non_u!(i128x1, u128x1); 368 impl_shuffle1_dyn_non_u!(i128x2, u128x2); 369 impl_shuffle1_dyn_non_u!(i128x4, u128x4); 370 371 impl_shuffle1_dyn_non_u!(m8x2, u8x2); 372 impl_shuffle1_dyn_non_u!(m8x4, u8x4); 373 impl_shuffle1_dyn_non_u!(m8x8, u8x8); 374 impl_shuffle1_dyn_non_u!(m8x16, u8x16); 375 impl_shuffle1_dyn_non_u!(m8x32, u8x32); 376 impl_shuffle1_dyn_non_u!(m8x64, u8x64); 377 378 impl_shuffle1_dyn_non_u!(m16x2, u16x2); 379 impl_shuffle1_dyn_non_u!(m16x4, u16x4); 380 impl_shuffle1_dyn_non_u!(m16x8, u16x8); 381 impl_shuffle1_dyn_non_u!(m16x16, u16x16); 382 impl_shuffle1_dyn_non_u!(m16x32, u16x32); 383 384 impl_shuffle1_dyn_non_u!(m32x2, u32x2); 385 impl_shuffle1_dyn_non_u!(m32x4, u32x4); 386 impl_shuffle1_dyn_non_u!(m32x8, u32x8); 387 impl_shuffle1_dyn_non_u!(m32x16, u32x16); 388 389 impl_shuffle1_dyn_non_u!(m64x2, u64x2); 390 impl_shuffle1_dyn_non_u!(m64x4, u64x4); 391 impl_shuffle1_dyn_non_u!(m64x8, u64x8); 392 393 impl_shuffle1_dyn_non_u!(msizex2, usizex2); 394 impl_shuffle1_dyn_non_u!(msizex4, usizex4); 395 impl_shuffle1_dyn_non_u!(msizex8, usizex8); 396 397 impl_shuffle1_dyn_non_u!(m128x1, u128x1); 398 impl_shuffle1_dyn_non_u!(m128x2, u128x2); 399 impl_shuffle1_dyn_non_u!(m128x4, u128x4); 400 401 impl_shuffle1_dyn_non_u!(f32x2, u32x2); 402 impl_shuffle1_dyn_non_u!(f32x4, u32x4); 403 impl_shuffle1_dyn_non_u!(f32x8, u32x8); 404 impl_shuffle1_dyn_non_u!(f32x16, u32x16); 405 406 impl_shuffle1_dyn_non_u!(f64x2, u64x2); 407 impl_shuffle1_dyn_non_u!(f64x4, u64x4); 408 impl_shuffle1_dyn_non_u!(f64x8, u64x8); 409 410 // Implementation for non-unsigned vector types 411 macro_rules! impl_shuffle1_dyn_ptr { 412 ($id:ident, $uid:ident) => { 413 impl<T> Shuffle1Dyn for $id<T> { 414 type Indices = $uid; 415 #[inline] 416 fn shuffle1_dyn(self, indices: Self::Indices) -> Self { 417 unsafe { 418 let u: $uid = crate::mem::transmute(self); 419 crate::mem::transmute(u.shuffle1_dyn(indices)) 420 } 421 } 422 } 423 }; 424 } 425 426 impl_shuffle1_dyn_ptr!(cptrx2, usizex2); 427 impl_shuffle1_dyn_ptr!(cptrx4, usizex4); 428 impl_shuffle1_dyn_ptr!(cptrx8, usizex8); 429 430 impl_shuffle1_dyn_ptr!(mptrx2, usizex2); 431 impl_shuffle1_dyn_ptr!(mptrx4, usizex4); 432 impl_shuffle1_dyn_ptr!(mptrx8, usizex8); 433