1 //! Shuffle vector lanes with run-time indices.
2 
3 use crate::*;
4 
5 pub trait Shuffle1Dyn {
6     type Indices;
shuffle1_dyn(self, _: Self::Indices) -> Self7     fn shuffle1_dyn(self, _: Self::Indices) -> Self;
8 }
9 
10 // Fallback implementation
11 macro_rules! impl_fallback {
12     ($id:ident) => {
13         impl Shuffle1Dyn for $id {
14             type Indices = Self;
15             #[inline]
16             fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
17                 let mut result = Self::splat(0);
18                 for i in 0..$id::lanes() {
19                     result = result
20                         .replace(i, self.extract(indices.extract(i) as usize));
21                 }
22                 result
23             }
24         }
25     };
26 }
27 
28 macro_rules! impl_shuffle1_dyn {
29     (u8x8) => {
30         cfg_if! {
31             if #[cfg(all(
32                 any(
33                     all(target_arch = "aarch64", target_feature = "neon"),
34                     all(target_arch = "doesnotexist", target_feature = "v7",
35                         target_feature = "neon")
36                 ),
37                 any(feature = "core_arch", libcore_neon)
38             )
39             )] {
40                 impl Shuffle1Dyn for u8x8 {
41                     type Indices = Self;
42                     #[inline]
43                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
44                         #[cfg(target_arch = "aarch64")]
45                         use crate::arch::aarch64::vtbl1_u8;
46                         #[cfg(target_arch = "doesnotexist")]
47                         use crate::arch::arm::vtbl1_u8;
48 
49                         // This is safe because the binary is compiled with
50                         // neon enabled at compile-time and can therefore only
51                         // run on CPUs that have it enabled.
52                         unsafe {
53                             Simd(mem::transmute(
54                                 vtbl1_u8(mem::transmute(self.0),
55                                         crate::mem::transmute(indices.0))
56                             ))
57                         }
58                     }
59                 }
60             } else {
61                 impl_fallback!(u8x8);
62             }
63         }
64     };
65     (u8x16) => {
66         cfg_if! {
67             if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
68                          target_feature = "ssse3"))] {
69                 impl Shuffle1Dyn for u8x16 {
70                     type Indices = Self;
71                     #[inline]
72                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
73                         #[cfg(target_arch = "x86")]
74                         use crate::arch::x86::_mm_shuffle_epi8;
75                         #[cfg(target_arch = "x86_64")]
76                         use crate::arch::x86_64::_mm_shuffle_epi8;
77                         // This is safe because the binary is compiled with
78                         // ssse3 enabled at compile-time and can therefore only
79                         // run on CPUs that have it enabled.
80                         unsafe {
81                             Simd(mem::transmute(
82                                 _mm_shuffle_epi8(mem::transmute(self.0),
83                                                 crate::mem::transmute(indices))
84                             ))
85                         }
86                     }
87                 }
88             } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon",
89                                 any(feature = "core_arch", libcore_neon)))] {
90                 impl Shuffle1Dyn for u8x16 {
91                     type Indices = Self;
92                     #[inline]
93                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
94                         use crate::arch::aarch64::vqtbl1q_u8;
95 
96                         // This is safe because the binary is compiled with
97                         // neon enabled at compile-time and can therefore only
98                         // run on CPUs that have it enabled.
99                         unsafe {
100                             Simd(mem::transmute(
101                                 vqtbl1q_u8(mem::transmute(self.0),
102                                           crate::mem::transmute(indices.0))
103                             ))
104                         }
105                     }
106                 }
107             } else if #[cfg(all(target_arch = "doesnotexist", target_feature = "v7",
108                                 target_feature = "neon",
109                                 any(feature = "core_arch", libcore_neon)))] {
110                 impl Shuffle1Dyn for u8x16 {
111                     type Indices = Self;
112                     #[inline]
113                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
114                         use crate::arch::arm::vtbl2_u8;
115 
116                         // This is safe because the binary is compiled with
117                         // neon enabled at compile-time and can therefore only
118                         // run on CPUs that have it enabled.
119                         unsafe {
120                             union U {
121                                 j: u8x16,
122                                 s: (u8x8, u8x8),
123                             }
124 
125                             let (i0, i1) = U { j: y }.s;
126 
127                             let r0 = vtbl2_u8(
128                                 mem::transmute(x),
129                                 crate::mem::transmute(i0)
130                             );
131                             let r1 = vtbl2_u8(
132                                 mem::transmute(x),
133                                 crate::mem::transmute(i1)
134                             );
135 
136                             let r = U { s: (r0, r1) }.j;
137 
138                             Simd(mem::transmute(r))
139                         }
140                     }
141                 }
142             } else {
143                 impl_fallback!(u8x16);
144             }
145         }
146     };
147     (u16x8) => {
148         impl Shuffle1Dyn for u16x8 {
149             type Indices = Self;
150             #[inline]
151             fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
152                 let indices: u8x8 = (indices * 2).cast();
153                 let indices: u8x16 = shuffle!(
154                     indices, [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7]
155                 );
156                 let v = u8x16::new(
157                     0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1
158                 );
159                 let indices = indices + v;
160                 unsafe {
161                     let s: u8x16 =crate::mem::transmute(self);
162                    crate::mem::transmute(s.shuffle1_dyn(indices))
163                 }
164             }
165         }
166     };
167     (u32x4) => {
168         cfg_if! {
169             if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
170                          target_feature = "avx"))] {
171                 impl Shuffle1Dyn for u32x4 {
172                     type Indices = Self;
173                     #[inline]
174                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
175                         #[cfg(target_arch = "x86")]
176                         use crate::arch::x86::{_mm_permutevar_ps};
177                         #[cfg(target_arch = "x86_64")]
178                         use crate::arch::x86_64::{_mm_permutevar_ps};
179 
180                         unsafe {
181                             crate::mem::transmute(
182                                 _mm_permutevar_ps(
183                                     crate::mem::transmute(self.0),
184                                     crate::mem::transmute(indices.0)
185                                 )
186                             )
187                         }
188                     }
189                 }
190             } else {
191                 impl Shuffle1Dyn for u32x4 {
192                     type Indices = Self;
193                     #[inline]
194                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
195                         let indices: u8x4 = (indices * 4).cast();
196                         let indices: u8x16 = shuffle!(
197                             indices,
198                             [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3]
199                         );
200                         let v = u8x16::new(
201                             0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3
202                         );
203                         let indices = indices + v;
204                         unsafe {
205                             let s: u8x16 =crate::mem::transmute(self);
206                            crate::mem::transmute(s.shuffle1_dyn(indices))
207                         }
208                     }
209                 }
210             }
211         }
212     };
213     (u64x2) => {
214         cfg_if! {
215             if #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"),
216                          target_feature = "avx"))] {
217                 impl Shuffle1Dyn for u64x2 {
218                     type Indices = Self;
219                     #[inline]
220                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
221                         #[cfg(target_arch = "x86")]
222                         use crate::arch::x86::{_mm_permutevar_pd};
223                         #[cfg(target_arch = "x86_64")]
224                         use crate::arch::x86_64::{_mm_permutevar_pd};
225                         // _mm_permutevar_pd uses the _second_ bit of each
226                         // element to perform the selection, that is: 0b00 => 0,
227                         // 0b10 => 1:
228                         let indices = indices << 1;
229                         unsafe {
230                             crate::mem::transmute(
231                                 _mm_permutevar_pd(
232                                     crate::mem::transmute(self),
233                                     crate::mem::transmute(indices)
234                                 )
235                             )
236                         }
237                     }
238                 }
239             } else {
240                 impl Shuffle1Dyn for u64x2 {
241                     type Indices = Self;
242                     #[inline]
243                     fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
244                         let indices: u8x2 = (indices * 8).cast();
245                         let indices: u8x16 = shuffle!(
246                             indices,
247                             [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
248                         );
249                         let v = u8x16::new(
250                             0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7
251                         );
252                         let indices = indices + v;
253                         unsafe {
254                             let s: u8x16 =crate::mem::transmute(self);
255                            crate::mem::transmute(s.shuffle1_dyn(indices))
256                         }
257                     }
258                 }
259             }
260         }
261     };
262     (u128x1) => {
263         impl Shuffle1Dyn for u128x1 {
264             type Indices = Self;
265             #[inline]
266             fn shuffle1_dyn(self, _indices: Self::Indices) -> Self {
267                 self
268             }
269         }
270     };
271     ($id:ident) => { impl_fallback!($id); }
272 }
273 
274 impl_shuffle1_dyn!(u8x2);
275 impl_shuffle1_dyn!(u8x4);
276 impl_shuffle1_dyn!(u8x8);
277 impl_shuffle1_dyn!(u8x16);
278 impl_shuffle1_dyn!(u8x32);
279 impl_shuffle1_dyn!(u8x64);
280 
281 impl_shuffle1_dyn!(u16x2);
282 impl_shuffle1_dyn!(u16x4);
283 impl_shuffle1_dyn!(u16x8);
284 impl_shuffle1_dyn!(u16x16);
285 impl_shuffle1_dyn!(u16x32);
286 
287 impl_shuffle1_dyn!(u32x2);
288 impl_shuffle1_dyn!(u32x4);
289 impl_shuffle1_dyn!(u32x8);
290 impl_shuffle1_dyn!(u32x16);
291 
292 impl_shuffle1_dyn!(u64x2);
293 impl_shuffle1_dyn!(u64x4);
294 impl_shuffle1_dyn!(u64x8);
295 
296 impl_shuffle1_dyn!(usizex2);
297 impl_shuffle1_dyn!(usizex4);
298 impl_shuffle1_dyn!(usizex8);
299 
300 impl_shuffle1_dyn!(u128x1);
301 impl_shuffle1_dyn!(u128x2);
302 impl_shuffle1_dyn!(u128x4);
303 
304 // Implementation for non-unsigned vector types
305 macro_rules! impl_shuffle1_dyn_non_u {
306     ($id:ident, $uid:ident) => {
307         impl Shuffle1Dyn for $id {
308             type Indices = $uid;
309             #[inline]
310             fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
311                 unsafe {
312                     let u: $uid = crate::mem::transmute(self);
313                     crate::mem::transmute(u.shuffle1_dyn(indices))
314                 }
315             }
316         }
317     };
318 }
319 
320 impl_shuffle1_dyn_non_u!(i8x2, u8x2);
321 impl_shuffle1_dyn_non_u!(i8x4, u8x4);
322 impl_shuffle1_dyn_non_u!(i8x8, u8x8);
323 impl_shuffle1_dyn_non_u!(i8x16, u8x16);
324 impl_shuffle1_dyn_non_u!(i8x32, u8x32);
325 impl_shuffle1_dyn_non_u!(i8x64, u8x64);
326 
327 impl_shuffle1_dyn_non_u!(i16x2, u16x2);
328 impl_shuffle1_dyn_non_u!(i16x4, u16x4);
329 impl_shuffle1_dyn_non_u!(i16x8, u16x8);
330 impl_shuffle1_dyn_non_u!(i16x16, u16x16);
331 impl_shuffle1_dyn_non_u!(i16x32, u16x32);
332 
333 impl_shuffle1_dyn_non_u!(i32x2, u32x2);
334 impl_shuffle1_dyn_non_u!(i32x4, u32x4);
335 impl_shuffle1_dyn_non_u!(i32x8, u32x8);
336 impl_shuffle1_dyn_non_u!(i32x16, u32x16);
337 
338 impl_shuffle1_dyn_non_u!(i64x2, u64x2);
339 impl_shuffle1_dyn_non_u!(i64x4, u64x4);
340 impl_shuffle1_dyn_non_u!(i64x8, u64x8);
341 
342 impl_shuffle1_dyn_non_u!(isizex2, usizex2);
343 impl_shuffle1_dyn_non_u!(isizex4, usizex4);
344 impl_shuffle1_dyn_non_u!(isizex8, usizex8);
345 
346 impl_shuffle1_dyn_non_u!(i128x1, u128x1);
347 impl_shuffle1_dyn_non_u!(i128x2, u128x2);
348 impl_shuffle1_dyn_non_u!(i128x4, u128x4);
349 
350 impl_shuffle1_dyn_non_u!(m8x2, u8x2);
351 impl_shuffle1_dyn_non_u!(m8x4, u8x4);
352 impl_shuffle1_dyn_non_u!(m8x8, u8x8);
353 impl_shuffle1_dyn_non_u!(m8x16, u8x16);
354 impl_shuffle1_dyn_non_u!(m8x32, u8x32);
355 impl_shuffle1_dyn_non_u!(m8x64, u8x64);
356 
357 impl_shuffle1_dyn_non_u!(m16x2, u16x2);
358 impl_shuffle1_dyn_non_u!(m16x4, u16x4);
359 impl_shuffle1_dyn_non_u!(m16x8, u16x8);
360 impl_shuffle1_dyn_non_u!(m16x16, u16x16);
361 impl_shuffle1_dyn_non_u!(m16x32, u16x32);
362 
363 impl_shuffle1_dyn_non_u!(m32x2, u32x2);
364 impl_shuffle1_dyn_non_u!(m32x4, u32x4);
365 impl_shuffle1_dyn_non_u!(m32x8, u32x8);
366 impl_shuffle1_dyn_non_u!(m32x16, u32x16);
367 
368 impl_shuffle1_dyn_non_u!(m64x2, u64x2);
369 impl_shuffle1_dyn_non_u!(m64x4, u64x4);
370 impl_shuffle1_dyn_non_u!(m64x8, u64x8);
371 
372 impl_shuffle1_dyn_non_u!(msizex2, usizex2);
373 impl_shuffle1_dyn_non_u!(msizex4, usizex4);
374 impl_shuffle1_dyn_non_u!(msizex8, usizex8);
375 
376 impl_shuffle1_dyn_non_u!(m128x1, u128x1);
377 impl_shuffle1_dyn_non_u!(m128x2, u128x2);
378 impl_shuffle1_dyn_non_u!(m128x4, u128x4);
379 
380 impl_shuffle1_dyn_non_u!(f32x2, u32x2);
381 impl_shuffle1_dyn_non_u!(f32x4, u32x4);
382 impl_shuffle1_dyn_non_u!(f32x8, u32x8);
383 impl_shuffle1_dyn_non_u!(f32x16, u32x16);
384 
385 impl_shuffle1_dyn_non_u!(f64x2, u64x2);
386 impl_shuffle1_dyn_non_u!(f64x4, u64x4);
387 impl_shuffle1_dyn_non_u!(f64x8, u64x8);
388 
389 // Implementation for non-unsigned vector types
390 macro_rules! impl_shuffle1_dyn_ptr {
391     ($id:ident, $uid:ident) => {
392         impl<T> Shuffle1Dyn for $id<T> {
393             type Indices = $uid;
394             #[inline]
395             fn shuffle1_dyn(self, indices: Self::Indices) -> Self {
396                 unsafe {
397                     let u: $uid = crate::mem::transmute(self);
398                     crate::mem::transmute(u.shuffle1_dyn(indices))
399                 }
400             }
401         }
402     };
403 }
404 
405 impl_shuffle1_dyn_ptr!(cptrx2, usizex2);
406 impl_shuffle1_dyn_ptr!(cptrx4, usizex4);
407 impl_shuffle1_dyn_ptr!(cptrx8, usizex8);
408 
409 impl_shuffle1_dyn_ptr!(mptrx2, usizex2);
410 impl_shuffle1_dyn_ptr!(mptrx4, usizex4);
411 impl_shuffle1_dyn_ptr!(mptrx8, usizex8);
412