1 use super::*; 2 #[allow(unused_imports)] 3 use super::{ 4 simd_eq, simd_ne, simd_lt, simd_le, simd_gt, simd_ge, 5 simd_shuffle2, simd_shuffle4, simd_shuffle8, simd_shuffle16, 6 simd_insert, simd_extract, 7 simd_cast, 8 simd_add, simd_sub, simd_mul, simd_div, simd_shl, simd_shr, simd_and, simd_or, simd_xor, 9 10 Unalign, bitcast, 11 }; 12 use core::{mem,ops}; 13 14 #[cfg(any(target_arch = "x86", 15 target_arch = "x86_64"))] 16 use x86::sse2::common; 17 #[cfg(any(target_arch = "arm"))] 18 use arm::neon::common; 19 #[cfg(any(target_arch = "aarch64"))] 20 use aarch64::neon::common; 21 22 macro_rules! basic_impls { 23 ($( 24 $name: ident: 25 $elem: ident, $bool: ident, $shuffle: ident, $length: expr, $($first: ident),* | $($last: ident),*; 26 )*) => { 27 $(impl $name { 28 /// Create a new instance. 29 #[inline] 30 pub const fn new($($first: $elem),*, $($last: $elem),*) -> $name { 31 $name($($first),*, $($last),*) 32 } 33 34 /// Create a new instance where every lane has value `x`. 35 #[inline] 36 pub const fn splat(x: $elem) -> $name { 37 $name($({ #[allow(dead_code)] struct $first; x }),*, 38 $({ #[allow(dead_code)] struct $last; x }),*) 39 } 40 41 /// Compare for equality. 42 #[inline] 43 pub fn eq(self, other: Self) -> $bool { 44 unsafe {simd_eq(self, other)} 45 } 46 /// Compare for equality. 47 #[inline] 48 pub fn ne(self, other: Self) -> $bool { 49 unsafe {simd_ne(self, other)} 50 } 51 /// Compare for equality. 52 #[inline] 53 pub fn lt(self, other: Self) -> $bool { 54 unsafe {simd_lt(self, other)} 55 } 56 /// Compare for equality. 57 #[inline] 58 pub fn le(self, other: Self) -> $bool { 59 unsafe {simd_le(self, other)} 60 } 61 /// Compare for equality. 62 #[inline] 63 pub fn gt(self, other: Self) -> $bool { 64 unsafe {simd_gt(self, other)} 65 } 66 /// Compare for equality. 67 #[inline] 68 pub fn ge(self, other: Self) -> $bool { 69 unsafe {simd_ge(self, other)} 70 } 71 72 /// Extract the value of the `idx`th lane of `self`. 73 /// 74 /// # Panics 75 /// 76 /// `extract` will panic if `idx` is out of bounds. 77 #[inline] 78 pub fn extract(self, idx: u32) -> $elem { 79 assert!(idx < $length); 80 unsafe {simd_extract(self, idx)} 81 } 82 /// Return a new vector where the `idx`th lane is replaced 83 /// by `elem`. 84 /// 85 /// # Panics 86 /// 87 /// `replace` will panic if `idx` is out of bounds. 88 #[inline] 89 pub fn replace(self, idx: u32, elem: $elem) -> Self { 90 assert!(idx < $length); 91 unsafe {simd_insert(self, idx, elem)} 92 } 93 94 /// Load a new value from the `idx`th position of `array`. 95 /// 96 /// This is equivalent to the following, but is possibly 97 /// more efficient: 98 /// 99 /// ```rust,ignore 100 /// Self::new(array[idx], array[idx + 1], ...) 101 /// ``` 102 /// 103 /// # Panics 104 /// 105 /// `load` will panic if `idx` is out of bounds in 106 /// `array`, or if `array[idx..]` is too short. 107 #[inline] 108 pub fn load(array: &[$elem], idx: usize) -> Self { 109 let data = &array[idx..idx + $length]; 110 let loaded = unsafe { 111 *(data.as_ptr() as *const Unalign<Self>) 112 }; 113 loaded.0 114 } 115 116 /// Store the elements of `self` to `array`, starting at 117 /// the `idx`th position. 118 /// 119 /// This is equivalent to the following, but is possibly 120 /// more efficient: 121 /// 122 /// ```rust,ignore 123 /// array[i] = self.extract(0); 124 /// array[i + 1] = self.extract(1); 125 /// // ... 126 /// ``` 127 /// 128 /// # Panics 129 /// 130 /// `store` will panic if `idx` is out of bounds in 131 /// `array`, or if `array[idx...]` is too short. 132 #[inline] 133 pub fn store(self, array: &mut [$elem], idx: usize) { 134 let place = &mut array[idx..idx + $length]; 135 unsafe { 136 *(place.as_mut_ptr() as *mut Unalign<Self>) = Unalign(self) 137 } 138 } 139 })* 140 } 141 } 142 143 basic_impls! { 144 u32x4: u32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; 145 i32x4: i32, bool32ix4, simd_shuffle4, 4, x0, x1 | x2, x3; 146 f32x4: f32, bool32fx4, simd_shuffle4, 4, x0, x1 | x2, x3; 147 148 u16x8: u16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; 149 i16x8: i16, bool16ix8, simd_shuffle8, 8, x0, x1, x2, x3 | x4, x5, x6, x7; 150 151 u8x16: u8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; 152 i8x16: i8, bool8ix16, simd_shuffle16, 16, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15; 153 } 154 155 macro_rules! bool_impls { 156 ($( 157 $name: ident: 158 $elem: ident, $repr: ident, $repr_elem: ident, $length: expr, $all: ident, $any: ident, 159 $($first: ident),* | $($last: ident),* 160 [$(#[$cvt_meta: meta] $cvt: ident -> $cvt_to: ident),*]; 161 )*) => { 162 $(impl $name { 163 /// Convert to integer representation. 164 #[inline] 165 pub fn to_repr(self) -> $repr { 166 unsafe {mem::transmute(self)} 167 } 168 /// Convert from integer representation. 169 #[inline] 170 #[inline] 171 pub fn from_repr(x: $repr) -> Self { 172 unsafe {mem::transmute(x)} 173 } 174 175 /// Create a new instance. 176 #[inline] 177 pub fn new($($first: bool),*, $($last: bool),*) -> $name { 178 unsafe { 179 // negate everything together 180 simd_sub($name::splat(false), 181 $name($( ($first as $repr_elem) ),*, 182 $( ($last as $repr_elem) ),*)) 183 } 184 } 185 186 /// Create a new instance where every lane has value `x`. 187 #[allow(unused_variables)] 188 #[inline] 189 pub fn splat(x: bool) -> $name { 190 let x = if x {!(0 as $repr_elem)} else {0}; 191 $name($({ let $first = (); x}),*, 192 $({ let $last = (); x}),*) 193 } 194 195 /// Extract the value of the `idx`th lane of `self`. 196 /// 197 /// # Panics 198 /// 199 /// `extract` will panic if `idx` is out of bounds. 200 #[inline] 201 pub fn extract(self, idx: u32) -> bool { 202 assert!(idx < $length); 203 unsafe {simd_extract(self.to_repr(), idx) != 0} 204 } 205 /// Return a new vector where the `idx`th lane is replaced 206 /// by `elem`. 207 /// 208 /// # Panics 209 /// 210 /// `replace` will panic if `idx` is out of bounds. 211 #[inline] 212 pub fn replace(self, idx: u32, elem: bool) -> Self { 213 assert!(idx < $length); 214 let x = if elem {!(0 as $repr_elem)} else {0}; 215 unsafe {Self::from_repr(simd_insert(self.to_repr(), idx, x))} 216 } 217 /// Select between elements of `then` and `else_`, based on 218 /// the corresponding element of `self`. 219 /// 220 /// This is equivalent to the following, but is possibly 221 /// more efficient: 222 /// 223 /// ```rust,ignore 224 /// T::new(if self.extract(0) { then.extract(0) } else { else_.extract(0) }, 225 /// if self.extract(1) { then.extract(1) } else { else_.extract(1) }, 226 /// ...) 227 /// ``` 228 #[inline] 229 pub fn select<T: Simd<Bool = $name>>(self, then: T, else_: T) -> T { 230 let then: $repr = bitcast(then); 231 let else_: $repr = bitcast(else_); 232 bitcast((then & self.to_repr()) | (else_ & (!self).to_repr())) 233 } 234 235 /// Check if every element of `self` is true. 236 /// 237 /// This is equivalent to the following, but is possibly 238 /// more efficient: 239 /// 240 /// ```rust,ignore 241 /// self.extract(0) && self.extract(1) && ... 242 /// ``` 243 #[inline] 244 pub fn all(self) -> bool { 245 common::$all(self) 246 } 247 /// Check if any element of `self` is true. 248 /// 249 /// This is equivalent to the following, but is possibly 250 /// more efficient: 251 /// 252 /// ```rust,ignore 253 /// self.extract(0) || self.extract(1) || ... 254 /// ``` 255 #[inline] 256 pub fn any(self) -> bool { 257 common::$any(self) 258 } 259 260 $( 261 #[$cvt_meta] 262 #[inline] 263 pub fn $cvt(self) -> $cvt_to { 264 bitcast(self) 265 } 266 )* 267 } 268 impl ops::Not for $name { 269 type Output = Self; 270 271 #[inline] 272 fn not(self) -> Self { 273 Self::from_repr($repr::splat(!(0 as $repr_elem)) ^ self.to_repr()) 274 } 275 } 276 )* 277 } 278 } 279 280 bool_impls! { 281 bool32ix4: bool32i, i32x4, i32, 4, bool32ix4_all, bool32ix4_any, x0, x1 | x2, x3 282 [/// Convert `self` to a boolean vector for interacting with floating point vectors. 283 to_f -> bool32fx4]; 284 bool32fx4: bool32f, i32x4, i32, 4, bool32fx4_all, bool32fx4_any, x0, x1 | x2, x3 285 [/// Convert `self` to a boolean vector for interacting with integer vectors. 286 to_i -> bool32ix4]; 287 288 bool16ix8: bool16i, i16x8, i16, 8, bool16ix8_all, bool16ix8_any, x0, x1, x2, x3 | x4, x5, x6, x7 []; 289 290 bool8ix16: bool8i, i8x16, i8, 16, bool8ix16_all, bool8ix16_any, x0, x1, x2, x3, x4, x5, x6, x7 | x8, x9, x10, x11, x12, x13, x14, x15 []; 291 } 292 293 impl u32x4 { 294 /// Convert each lane to a signed integer. 295 #[inline] to_i32(self) -> i32x4296 pub fn to_i32(self) -> i32x4 { 297 unsafe {simd_cast(self)} 298 } 299 /// Convert each lane to a 32-bit float. 300 #[inline] to_f32(self) -> f32x4301 pub fn to_f32(self) -> f32x4 { 302 unsafe {simd_cast(self)} 303 } 304 } 305 impl i32x4 { 306 /// Convert each lane to an unsigned integer. 307 #[inline] to_u32(self) -> u32x4308 pub fn to_u32(self) -> u32x4 { 309 unsafe {simd_cast(self)} 310 } 311 /// Convert each lane to a 32-bit float. 312 #[inline] to_f32(self) -> f32x4313 pub fn to_f32(self) -> f32x4 { 314 unsafe {simd_cast(self)} 315 } 316 } 317 impl f32x4 { 318 /// Compute the square root of each lane. 319 #[inline] sqrt(self) -> Self320 pub fn sqrt(self) -> Self { 321 common::f32x4_sqrt(self) 322 } 323 /// Compute an approximation to the reciprocal of the square root 324 /// of `self`, that is, `f32::splat(1.0) / self.sqrt()`. 325 /// 326 /// The accuracy of this approximation is platform dependent. 327 #[inline] approx_rsqrt(self) -> Self328 pub fn approx_rsqrt(self) -> Self { 329 common::f32x4_approx_rsqrt(self) 330 } 331 /// Compute an approximation to the reciprocal of `self`, that is, 332 /// `f32::splat(1.0) / self`. 333 /// 334 /// The accuracy of this approximation is platform dependent. 335 #[inline] approx_reciprocal(self) -> Self336 pub fn approx_reciprocal(self) -> Self { 337 common::f32x4_approx_reciprocal(self) 338 } 339 /// Compute the lane-wise maximum of `self` and `other`. 340 /// 341 /// This is equivalent to the following, but is possibly more 342 /// efficient: 343 /// 344 /// ```rust,ignore 345 /// f32x4::new(self.extract(0).max(other.extract(0)), 346 /// self.extract(1).max(other.extract(1)), 347 /// ...) 348 /// ``` 349 #[inline] max(self, other: Self) -> Self350 pub fn max(self, other: Self) -> Self { 351 common::f32x4_max(self, other) 352 } 353 /// Compute the lane-wise minimum of `self` and `other`. 354 /// 355 /// This is equivalent to the following, but is possibly more 356 /// efficient: 357 /// 358 /// ```rust,ignore 359 /// f32x4::new(self.extract(0).min(other.extract(0)), 360 /// self.extract(1).min(other.extract(1)), 361 /// ...) 362 /// ``` 363 #[inline] min(self, other: Self) -> Self364 pub fn min(self, other: Self) -> Self { 365 common::f32x4_min(self, other) 366 } 367 /// Convert each lane to a signed integer. 368 #[inline] to_i32(self) -> i32x4369 pub fn to_i32(self) -> i32x4 { 370 unsafe {simd_cast(self)} 371 } 372 /// Convert each lane to an unsigned integer. 373 #[inline] to_u32(self) -> u32x4374 pub fn to_u32(self) -> u32x4 { 375 unsafe {simd_cast(self)} 376 } 377 } 378 379 impl i16x8 { 380 /// Convert each lane to an unsigned integer. 381 #[inline] to_u16(self) -> u16x8382 pub fn to_u16(self) -> u16x8 { 383 unsafe {simd_cast(self)} 384 } 385 } 386 impl u16x8 { 387 /// Convert each lane to a signed integer. 388 #[inline] to_i16(self) -> i16x8389 pub fn to_i16(self) -> i16x8 { 390 unsafe {simd_cast(self)} 391 } 392 } 393 394 impl i8x16 { 395 /// Convert each lane to an unsigned integer. 396 #[inline] to_u8(self) -> u8x16397 pub fn to_u8(self) -> u8x16 { 398 unsafe {simd_cast(self)} 399 } 400 } 401 impl u8x16 { 402 /// Convert each lane to a signed integer. 403 #[inline] to_i8(self) -> i8x16404 pub fn to_i8(self) -> i8x16 { 405 unsafe {simd_cast(self)} 406 } 407 } 408 409 410 macro_rules! neg_impls { 411 ($zero: expr, $($ty: ident,)*) => { 412 $(impl ops::Neg for $ty { 413 type Output = Self; 414 fn neg(self) -> Self { 415 $ty::splat($zero) - self 416 } 417 })* 418 } 419 } 420 neg_impls!{ 421 0, 422 i32x4, 423 i16x8, 424 i8x16, 425 } 426 neg_impls! { 427 0.0, 428 f32x4, 429 } 430 macro_rules! not_impls { 431 ($($ty: ident,)*) => { 432 $(impl ops::Not for $ty { 433 type Output = Self; 434 fn not(self) -> Self { 435 $ty::splat(!0) ^ self 436 } 437 })* 438 } 439 } 440 not_impls! { 441 i32x4, 442 i16x8, 443 i8x16, 444 u32x4, 445 u16x8, 446 u8x16, 447 } 448 449 macro_rules! operators { 450 ($($trayt: ident ($func: ident, $method: ident): $($ty: ty),*;)*) => { 451 $( 452 $(impl ops::$trayt for $ty { 453 type Output = Self; 454 #[inline] 455 fn $method(self, x: Self) -> Self { 456 unsafe {$func(self, x)} 457 } 458 })* 459 )* 460 } 461 } 462 operators! { 463 Add (simd_add, add): 464 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 465 f32x4; 466 Sub (simd_sub, sub): 467 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 468 f32x4; 469 Mul (simd_mul, mul): 470 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 471 f32x4; 472 Div (simd_div, div): f32x4; 473 474 BitAnd (simd_and, bitand): 475 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 476 bool8ix16, bool16ix8, bool32ix4, 477 bool32fx4; 478 BitOr (simd_or, bitor): 479 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 480 bool8ix16, bool16ix8, bool32ix4, 481 bool32fx4; 482 BitXor (simd_xor, bitxor): 483 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, 484 bool8ix16, bool16ix8, bool32ix4, 485 bool32fx4; 486 } 487 488 macro_rules! shift_one { 489 ($ty: ident, $($by: ident),*) => { 490 $( 491 impl ops::Shl<$by> for $ty { 492 type Output = Self; 493 #[inline] 494 fn shl(self, other: $by) -> Self { 495 unsafe { simd_shl(self, $ty::splat(other as <$ty as Simd>::Elem)) } 496 } 497 } 498 impl ops::Shr<$by> for $ty { 499 type Output = Self; 500 #[inline] 501 fn shr(self, other: $by) -> Self { 502 unsafe {simd_shr(self, $ty::splat(other as <$ty as Simd>::Elem))} 503 } 504 } 505 )* 506 } 507 } 508 509 macro_rules! shift { 510 ($($ty: ident),*) => { 511 $(shift_one! { 512 $ty, 513 u8, u16, u32, u64, usize, 514 i8, i16, i32, i64, isize 515 })* 516 } 517 } 518 shift! { 519 i8x16, u8x16, i16x8, u16x8, i32x4, u32x4 520 } 521