1 //! Streaming SIMD Extensions 4.1 (SSE4.1)
2
3 use crate::{
4 core_arch::{simd::*, simd_llvm::*, x86::*},
5 mem::transmute,
6 };
7
8 #[cfg(test)]
9 use stdarch_test::assert_instr;
10
11 // SSE4 rounding constans
12 /// round to nearest
13 #[stable(feature = "simd_x86", since = "1.27.0")]
14 pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
15 /// round down
16 #[stable(feature = "simd_x86", since = "1.27.0")]
17 pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
18 /// round up
19 #[stable(feature = "simd_x86", since = "1.27.0")]
20 pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
21 /// truncate
22 #[stable(feature = "simd_x86", since = "1.27.0")]
23 pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
24 /// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
25 #[stable(feature = "simd_x86", since = "1.27.0")]
26 pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
27 /// do not suppress exceptions
28 #[stable(feature = "simd_x86", since = "1.27.0")]
29 pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
30 /// suppress exceptions
31 #[stable(feature = "simd_x86", since = "1.27.0")]
32 pub const _MM_FROUND_NO_EXC: i32 = 0x08;
33 /// round to nearest and do not suppress exceptions
34 #[stable(feature = "simd_x86", since = "1.27.0")]
35 pub const _MM_FROUND_NINT: i32 = 0x00;
36 /// round down and do not suppress exceptions
37 #[stable(feature = "simd_x86", since = "1.27.0")]
38 pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
39 /// round up and do not suppress exceptions
40 #[stable(feature = "simd_x86", since = "1.27.0")]
41 pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
42 /// truncate and do not suppress exceptions
43 #[stable(feature = "simd_x86", since = "1.27.0")]
44 pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
45 /// use MXCSR.RC and do not suppress exceptions; see
46 /// `vendor::_MM_SET_ROUNDING_MODE`
47 #[stable(feature = "simd_x86", since = "1.27.0")]
48 pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
49 /// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
50 #[stable(feature = "simd_x86", since = "1.27.0")]
51 pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
52
53 /// Blend packed 8-bit integers from `a` and `b` using `mask`
54 ///
55 /// The high bit of each corresponding mask byte determines the selection.
56 /// If the high bit is set the element of `a` is selected. The element
57 /// of `b` is selected otherwise.
58 ///
59 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_epi8)
60 #[inline]
61 #[target_feature(enable = "sse4.1")]
62 #[cfg_attr(test, assert_instr(pblendvb))]
63 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i64 pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
65 transmute(pblendvb(a.as_i8x16(), b.as_i8x16(), mask.as_i8x16()))
66 }
67
68 /// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`.
69 ///
70 /// The mask bits determine the selection. A clear bit selects the
71 /// corresponding element of `a`, and a set bit the corresponding
72 /// element of `b`.
73 ///
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_epi16)
75 #[inline]
76 #[target_feature(enable = "sse4.1")]
77 // Note: LLVM7 prefers the single-precision floating-point domain when possible
78 // see https://bugs.llvm.org/show_bug.cgi?id=38195
79 // #[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xF0))]
80 #[cfg_attr(test, assert_instr(blendps, IMM8 = 0xF0))]
81 #[rustc_legacy_const_generics(2)]
82 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i83 pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
84 static_assert_imm8!(IMM8);
85 transmute(pblendw(a.as_i16x8(), b.as_i16x8(), IMM8 as u8))
86 }
87
88 /// Blend packed double-precision (64-bit) floating-point elements from `a`
89 /// and `b` using `mask`
90 ///
91 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_pd)
92 #[inline]
93 #[target_feature(enable = "sse4.1")]
94 #[cfg_attr(test, assert_instr(blendvpd))]
95 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d96 pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
97 blendvpd(a, b, mask)
98 }
99
100 /// Blend packed single-precision (32-bit) floating-point elements from `a`
101 /// and `b` using `mask`
102 ///
103 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blendv_ps)
104 #[inline]
105 #[target_feature(enable = "sse4.1")]
106 #[cfg_attr(test, assert_instr(blendvps))]
107 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128108 pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
109 blendvps(a, b, mask)
110 }
111
112 /// Blend packed double-precision (64-bit) floating-point elements from `a`
113 /// and `b` using control mask `IMM2`
114 ///
115 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_pd)
116 #[inline]
117 #[target_feature(enable = "sse4.1")]
118 // Note: LLVM7 prefers the single-precision floating-point domain when possible
119 // see https://bugs.llvm.org/show_bug.cgi?id=38195
120 // #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))]
121 #[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
122 #[rustc_legacy_const_generics(2)]
123 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d124 pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
125 static_assert_imm2!(IMM2);
126 blendpd(a, b, IMM2 as u8)
127 }
128
129 /// Blend packed single-precision (32-bit) floating-point elements from `a`
130 /// and `b` using mask `IMM4`
131 ///
132 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_blend_ps)
133 #[inline]
134 #[target_feature(enable = "sse4.1")]
135 #[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
136 #[rustc_legacy_const_generics(2)]
137 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128138 pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
139 static_assert_imm4!(IMM4);
140 blendps(a, b, IMM4 as u8)
141 }
142
143 /// Extracts a single-precision (32-bit) floating-point element from `a`,
144 /// selected with `IMM8`. The returned `i32` stores the float's bit-pattern,
145 /// and may be converted back to a floating point number via casting.
146 ///
147 /// # Example
148 /// ```rust
149 /// # #[cfg(target_arch = "x86")]
150 /// # use std::arch::x86::*;
151 /// # #[cfg(target_arch = "x86_64")]
152 /// # use std::arch::x86_64::*;
153 /// # fn main() {
154 /// # if is_x86_feature_detected!("sse4.1") {
155 /// # #[target_feature(enable = "sse4.1")]
156 /// # unsafe fn worker() {
157 /// let mut float_store = vec![1.0, 1.0, 2.0, 3.0];
158 /// unsafe {
159 /// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0);
160 /// let x: i32 = _mm_extract_ps::<2>(simd_floats);
161 /// float_store.push(f32::from_bits(x as u32));
162 /// }
163 /// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]);
164 /// # }
165 /// # unsafe { worker() }
166 /// # }
167 /// # }
168 /// ```
169 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_ps)
170 #[inline]
171 #[target_feature(enable = "sse4.1")]
172 #[cfg_attr(
173 all(test, not(target_os = "windows")),
174 assert_instr(extractps, IMM8 = 0)
175 )]
176 #[rustc_legacy_const_generics(1)]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_extract_ps<const IMM8: i32>(a: __m128) -> i32178 pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
179 static_assert_imm2!(IMM8);
180 transmute(simd_extract::<_, f32>(a, IMM8 as u32))
181 }
182
183 /// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
184 /// integer containing the zero-extended integer data.
185 ///
186 /// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi8)
189 #[inline]
190 #[target_feature(enable = "sse4.1")]
191 #[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
192 #[rustc_legacy_const_generics(1)]
193 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32194 pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
195 static_assert_imm4!(IMM8);
196 simd_extract::<_, u8>(a.as_u8x16(), IMM8 as u32) as i32
197 }
198
199 /// Extracts an 32-bit integer from `a` selected with `IMM8`
200 ///
201 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi32)
202 #[inline]
203 #[target_feature(enable = "sse4.1")]
204 #[cfg_attr(
205 all(test, not(target_os = "windows")),
206 assert_instr(extractps, IMM8 = 1)
207 )]
208 #[rustc_legacy_const_generics(1)]
209 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32210 pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
211 static_assert_imm2!(IMM8);
212 simd_extract::<_, i32>(a.as_i32x4(), IMM8 as u32)
213 }
214
215 /// Select a single value in `a` to store at some position in `b`,
216 /// Then zero elements according to `IMM8`.
217 ///
218 /// `IMM8` specifies which bits from operand `a` will be copied, which bits in
219 /// the result they will be copied to, and which bits in the result will be
220 /// cleared. The following assignments are made:
221 ///
222 /// * Bits `[7:6]` specify the bits to copy from operand `a`:
223 /// - `00`: Selects bits `[31:0]` from operand `a`.
224 /// - `01`: Selects bits `[63:32]` from operand `a`.
225 /// - `10`: Selects bits `[95:64]` from operand `a`.
226 /// - `11`: Selects bits `[127:96]` from operand `a`.
227 ///
228 /// * Bits `[5:4]` specify the bits in the result to which the selected bits
229 /// from operand `a` are copied:
230 /// - `00`: Copies the selected bits from `a` to result bits `[31:0]`.
231 /// - `01`: Copies the selected bits from `a` to result bits `[63:32]`.
232 /// - `10`: Copies the selected bits from `a` to result bits `[95:64]`.
233 /// - `11`: Copies the selected bits from `a` to result bits `[127:96]`.
234 ///
235 /// * Bits `[3:0]`: If any of these bits are set, the corresponding result
236 /// element is cleared.
237 ///
238 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_ps)
239 #[inline]
240 #[target_feature(enable = "sse4.1")]
241 #[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
242 #[rustc_legacy_const_generics(2)]
243 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128244 pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
245 static_assert_imm8!(IMM8);
246 insertps(a, b, IMM8 as u8)
247 }
248
249 /// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
250 /// location specified by `IMM8`.
251 ///
252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi8)
253 #[inline]
254 #[target_feature(enable = "sse4.1")]
255 #[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
256 #[rustc_legacy_const_generics(2)]
257 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i258 pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
259 static_assert_imm4!(IMM8);
260 transmute(simd_insert(a.as_i8x16(), IMM8 as u32, i as i8))
261 }
262
263 /// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
264 /// location specified by `IMM8`.
265 ///
266 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi32)
267 #[inline]
268 #[target_feature(enable = "sse4.1")]
269 #[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
270 #[rustc_legacy_const_generics(2)]
271 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i272 pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
273 static_assert_imm2!(IMM8);
274 transmute(simd_insert(a.as_i32x4(), IMM8 as u32, i))
275 }
276
277 /// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
278 /// values in dst.
279 ///
280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi8)
281 #[inline]
282 #[target_feature(enable = "sse4.1")]
283 #[cfg_attr(test, assert_instr(pmaxsb))]
284 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epi8(a: __m128i, b: __m128i) -> __m128i285 pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
286 transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
287 }
288
289 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
290 /// maximum.
291 ///
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu16)
293 #[inline]
294 #[target_feature(enable = "sse4.1")]
295 #[cfg_attr(test, assert_instr(pmaxuw))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epu16(a: __m128i, b: __m128i) -> __m128i297 pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
298 transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
299 }
300
301 /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
302 /// values.
303 ///
304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi32)
305 #[inline]
306 #[target_feature(enable = "sse4.1")]
307 #[cfg_attr(test, assert_instr(pmaxsd))]
308 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epi32(a: __m128i, b: __m128i) -> __m128i309 pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
310 transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
311 }
312
313 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
314 /// maximum values.
315 ///
316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu32)
317 #[inline]
318 #[target_feature(enable = "sse4.1")]
319 #[cfg_attr(test, assert_instr(pmaxud))]
320 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epu32(a: __m128i, b: __m128i) -> __m128i321 pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
322 transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
323 }
324
325 /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
326 /// values in dst.
327 ///
328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi8)
329 #[inline]
330 #[target_feature(enable = "sse4.1")]
331 #[cfg_attr(test, assert_instr(pminsb))]
332 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epi8(a: __m128i, b: __m128i) -> __m128i333 pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
334 transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
335 }
336
337 /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
338 /// minimum.
339 ///
340 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu16)
341 #[inline]
342 #[target_feature(enable = "sse4.1")]
343 #[cfg_attr(test, assert_instr(pminuw))]
344 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epu16(a: __m128i, b: __m128i) -> __m128i345 pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
346 transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
347 }
348
349 /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
350 /// values.
351 ///
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi32)
353 #[inline]
354 #[target_feature(enable = "sse4.1")]
355 #[cfg_attr(test, assert_instr(pminsd))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epi32(a: __m128i, b: __m128i) -> __m128i357 pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
358 transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
359 }
360
361 /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
362 /// minimum values.
363 ///
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu32)
365 #[inline]
366 #[target_feature(enable = "sse4.1")]
367 #[cfg_attr(test, assert_instr(pminud))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epu32(a: __m128i, b: __m128i) -> __m128i369 pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
370 transmute(pminud(a.as_u32x4(), b.as_u32x4()))
371 }
372
373 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
374 /// using unsigned saturation
375 ///
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi32)
377 #[inline]
378 #[target_feature(enable = "sse4.1")]
379 #[cfg_attr(test, assert_instr(packusdw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i381 pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
382 transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
383 }
384
385 /// Compares packed 64-bit integers in `a` and `b` for equality
386 ///
387 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi64)
388 #[inline]
389 #[target_feature(enable = "sse4.1")]
390 #[cfg_attr(test, assert_instr(pcmpeqq))]
391 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i392 pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
393 transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
394 }
395
396 /// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
397 ///
398 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi16)
399 #[inline]
400 #[target_feature(enable = "sse4.1")]
401 #[cfg_attr(test, assert_instr(pmovsxbw))]
402 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi8_epi16(a: __m128i) -> __m128i403 pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
404 let a = a.as_i8x16();
405 let a: i8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
406 transmute(simd_cast::<_, i16x8>(a))
407 }
408
409 /// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi32)
412 #[inline]
413 #[target_feature(enable = "sse4.1")]
414 #[cfg_attr(test, assert_instr(pmovsxbd))]
415 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi8_epi32(a: __m128i) -> __m128i416 pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
417 let a = a.as_i8x16();
418 let a: i8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
419 transmute(simd_cast::<_, i32x4>(a))
420 }
421
422 /// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
423 /// 64-bit integers
424 ///
425 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi8_epi64)
426 #[inline]
427 #[target_feature(enable = "sse4.1")]
428 #[cfg_attr(test, assert_instr(pmovsxbq))]
429 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi8_epi64(a: __m128i) -> __m128i430 pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
431 let a = a.as_i8x16();
432 let a: i8x2 = simd_shuffle2!(a, a, [0, 1]);
433 transmute(simd_cast::<_, i64x2>(a))
434 }
435
436 /// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
437 ///
438 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi32)
439 #[inline]
440 #[target_feature(enable = "sse4.1")]
441 #[cfg_attr(test, assert_instr(pmovsxwd))]
442 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi16_epi32(a: __m128i) -> __m128i443 pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
444 let a = a.as_i16x8();
445 let a: i16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
446 transmute(simd_cast::<_, i32x4>(a))
447 }
448
449 /// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
450 ///
451 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi16_epi64)
452 #[inline]
453 #[target_feature(enable = "sse4.1")]
454 #[cfg_attr(test, assert_instr(pmovsxwq))]
455 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi16_epi64(a: __m128i) -> __m128i456 pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
457 let a = a.as_i16x8();
458 let a: i16x2 = simd_shuffle2!(a, a, [0, 1]);
459 transmute(simd_cast::<_, i64x2>(a))
460 }
461
462 /// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
463 ///
464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_epi64)
465 #[inline]
466 #[target_feature(enable = "sse4.1")]
467 #[cfg_attr(test, assert_instr(pmovsxdq))]
468 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi32_epi64(a: __m128i) -> __m128i469 pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
470 let a = a.as_i32x4();
471 let a: i32x2 = simd_shuffle2!(a, a, [0, 1]);
472 transmute(simd_cast::<_, i64x2>(a))
473 }
474
475 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
476 ///
477 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi16)
478 #[inline]
479 #[target_feature(enable = "sse4.1")]
480 #[cfg_attr(test, assert_instr(pmovzxbw))]
481 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu8_epi16(a: __m128i) -> __m128i482 pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
483 let a = a.as_u8x16();
484 let a: u8x8 = simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
485 transmute(simd_cast::<_, i16x8>(a))
486 }
487
488 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
489 ///
490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi32)
491 #[inline]
492 #[target_feature(enable = "sse4.1")]
493 #[cfg_attr(test, assert_instr(pmovzxbd))]
494 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu8_epi32(a: __m128i) -> __m128i495 pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
496 let a = a.as_u8x16();
497 let a: u8x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
498 transmute(simd_cast::<_, i32x4>(a))
499 }
500
501 /// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
502 ///
503 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu8_epi64)
504 #[inline]
505 #[target_feature(enable = "sse4.1")]
506 #[cfg_attr(test, assert_instr(pmovzxbq))]
507 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu8_epi64(a: __m128i) -> __m128i508 pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
509 let a = a.as_u8x16();
510 let a: u8x2 = simd_shuffle2!(a, a, [0, 1]);
511 transmute(simd_cast::<_, i64x2>(a))
512 }
513
514 /// Zeroes extend packed unsigned 16-bit integers in `a`
515 /// to packed 32-bit integers
516 ///
517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi32)
518 #[inline]
519 #[target_feature(enable = "sse4.1")]
520 #[cfg_attr(test, assert_instr(pmovzxwd))]
521 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu16_epi32(a: __m128i) -> __m128i522 pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
523 let a = a.as_u16x8();
524 let a: u16x4 = simd_shuffle4!(a, a, [0, 1, 2, 3]);
525 transmute(simd_cast::<_, i32x4>(a))
526 }
527
528 /// Zeroes extend packed unsigned 16-bit integers in `a`
529 /// to packed 64-bit integers
530 ///
531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu16_epi64)
532 #[inline]
533 #[target_feature(enable = "sse4.1")]
534 #[cfg_attr(test, assert_instr(pmovzxwq))]
535 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu16_epi64(a: __m128i) -> __m128i536 pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
537 let a = a.as_u16x8();
538 let a: u16x2 = simd_shuffle2!(a, a, [0, 1]);
539 transmute(simd_cast::<_, i64x2>(a))
540 }
541
542 /// Zeroes extend packed unsigned 32-bit integers in `a`
543 /// to packed 64-bit integers
544 ///
545 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepu32_epi64)
546 #[inline]
547 #[target_feature(enable = "sse4.1")]
548 #[cfg_attr(test, assert_instr(pmovzxdq))]
549 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepu32_epi64(a: __m128i) -> __m128i550 pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
551 let a = a.as_u32x4();
552 let a: u32x2 = simd_shuffle2!(a, a, [0, 1]);
553 transmute(simd_cast::<_, i64x2>(a))
554 }
555
556 /// Returns the dot product of two __m128d vectors.
557 ///
558 /// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask.
559 /// If a condition mask bit is zero, the corresponding multiplication is
560 /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
561 /// the dot product will be stored in the return value component. Otherwise if
562 /// the broadcast mask bit is zero then the return component will be zero.
563 ///
564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_pd)
565 #[inline]
566 #[target_feature(enable = "sse4.1")]
567 #[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
568 #[rustc_legacy_const_generics(2)]
569 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d570 pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
571 static_assert_imm8!(IMM8);
572 dppd(a, b, IMM8 as u8)
573 }
574
575 /// Returns the dot product of two __m128 vectors.
576 ///
577 /// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask.
578 /// If a condition mask bit is zero, the corresponding multiplication is
579 /// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
580 /// the dot product will be stored in the return value component. Otherwise if
581 /// the broadcast mask bit is zero then the return component will be zero.
582 ///
583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_dp_ps)
584 #[inline]
585 #[target_feature(enable = "sse4.1")]
586 #[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
587 #[rustc_legacy_const_generics(2)]
588 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128589 pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
590 static_assert_imm8!(IMM8);
591 dpps(a, b, IMM8 as u8)
592 }
593
594 /// Round the packed double-precision (64-bit) floating-point elements in `a`
595 /// down to an integer value, and stores the results as packed double-precision
596 /// floating-point elements.
597 ///
598 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_pd)
599 #[inline]
600 #[target_feature(enable = "sse4.1")]
601 #[cfg_attr(test, assert_instr(roundpd))]
602 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_floor_pd(a: __m128d) -> __m128d603 pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
604 simd_floor(a)
605 }
606
607 /// Round the packed single-precision (32-bit) floating-point elements in `a`
608 /// down to an integer value, and stores the results as packed single-precision
609 /// floating-point elements.
610 ///
611 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ps)
612 #[inline]
613 #[target_feature(enable = "sse4.1")]
614 #[cfg_attr(test, assert_instr(roundps))]
615 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_floor_ps(a: __m128) -> __m128616 pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
617 simd_floor(a)
618 }
619
620 /// Round the lower double-precision (64-bit) floating-point element in `b`
621 /// down to an integer value, store the result as a double-precision
622 /// floating-point element in the lower element of the intrinsic result,
623 /// and copies the upper element from `a` to the upper element of the intrinsic
624 /// result.
625 ///
626 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_sd)
627 #[inline]
628 #[target_feature(enable = "sse4.1")]
629 #[cfg_attr(test, assert_instr(roundsd))]
630 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_floor_sd(a: __m128d, b: __m128d) -> __m128d631 pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
632 roundsd(a, b, _MM_FROUND_FLOOR)
633 }
634
635 /// Round the lower single-precision (32-bit) floating-point element in `b`
636 /// down to an integer value, store the result as a single-precision
637 /// floating-point element in the lower element of the intrinsic result,
638 /// and copies the upper 3 packed elements from `a` to the upper elements
639 /// of the intrinsic result.
640 ///
641 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_floor_ss)
642 #[inline]
643 #[target_feature(enable = "sse4.1")]
644 #[cfg_attr(test, assert_instr(roundss))]
645 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_floor_ss(a: __m128, b: __m128) -> __m128646 pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
647 roundss(a, b, _MM_FROUND_FLOOR)
648 }
649
650 /// Round the packed double-precision (64-bit) floating-point elements in `a`
651 /// up to an integer value, and stores the results as packed double-precision
652 /// floating-point elements.
653 ///
654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_pd)
655 #[inline]
656 #[target_feature(enable = "sse4.1")]
657 #[cfg_attr(test, assert_instr(roundpd))]
658 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ceil_pd(a: __m128d) -> __m128d659 pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
660 simd_ceil(a)
661 }
662
663 /// Round the packed single-precision (32-bit) floating-point elements in `a`
664 /// up to an integer value, and stores the results as packed single-precision
665 /// floating-point elements.
666 ///
667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ps)
668 #[inline]
669 #[target_feature(enable = "sse4.1")]
670 #[cfg_attr(test, assert_instr(roundps))]
671 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ceil_ps(a: __m128) -> __m128672 pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
673 simd_ceil(a)
674 }
675
676 /// Round the lower double-precision (64-bit) floating-point element in `b`
677 /// up to an integer value, store the result as a double-precision
678 /// floating-point element in the lower element of the intrisic result,
679 /// and copies the upper element from `a` to the upper element
680 /// of the intrinsic result.
681 ///
682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_sd)
683 #[inline]
684 #[target_feature(enable = "sse4.1")]
685 #[cfg_attr(test, assert_instr(roundsd))]
686 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d687 pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
688 roundsd(a, b, _MM_FROUND_CEIL)
689 }
690
691 /// Round the lower single-precision (32-bit) floating-point element in `b`
692 /// up to an integer value, store the result as a single-precision
693 /// floating-point element in the lower element of the intrinsic result,
694 /// and copies the upper 3 packed elements from `a` to the upper elements
695 /// of the intrinsic result.
696 ///
697 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ceil_ss)
698 #[inline]
699 #[target_feature(enable = "sse4.1")]
700 #[cfg_attr(test, assert_instr(roundss))]
701 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ceil_ss(a: __m128, b: __m128) -> __m128702 pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
703 roundss(a, b, _MM_FROUND_CEIL)
704 }
705
706 /// Round the packed double-precision (64-bit) floating-point elements in `a`
707 /// using the `ROUNDING` parameter, and stores the results as packed
708 /// double-precision floating-point elements.
709 /// Rounding is done according to the rounding parameter, which can be one of:
710 ///
711 /// ```
712 /// #[cfg(target_arch = "x86")]
713 /// use std::arch::x86::*;
714 /// #[cfg(target_arch = "x86_64")]
715 /// use std::arch::x86_64::*;
716 ///
717 /// # fn main() {
718 /// // round to nearest, and suppress exceptions:
719 /// # let _x =
720 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
721 /// // round down, and suppress exceptions:
722 /// # let _x =
723 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
724 /// // round up, and suppress exceptions:
725 /// # let _x =
726 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
727 /// // truncate, and suppress exceptions:
728 /// # let _x =
729 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
730 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
731 /// # let _x =
732 /// _MM_FROUND_CUR_DIRECTION;
733 /// # }
734 /// ```
735 ///
736 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_pd)
737 #[inline]
738 #[target_feature(enable = "sse4.1")]
739 #[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
740 #[rustc_legacy_const_generics(1)]
741 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d742 pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
743 static_assert_imm4!(ROUNDING);
744 roundpd(a, ROUNDING)
745 }
746
747 /// Round the packed single-precision (32-bit) floating-point elements in `a`
748 /// using the `ROUNDING` parameter, and stores the results as packed
749 /// single-precision floating-point elements.
750 /// Rounding is done according to the rounding parameter, which can be one of:
751 ///
752 /// ```
753 /// #[cfg(target_arch = "x86")]
754 /// use std::arch::x86::*;
755 /// #[cfg(target_arch = "x86_64")]
756 /// use std::arch::x86_64::*;
757 ///
758 /// # fn main() {
759 /// // round to nearest, and suppress exceptions:
760 /// # let _x =
761 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
762 /// // round down, and suppress exceptions:
763 /// # let _x =
764 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
765 /// // round up, and suppress exceptions:
766 /// # let _x =
767 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
768 /// // truncate, and suppress exceptions:
769 /// # let _x =
770 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
771 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
772 /// # let _x =
773 /// _MM_FROUND_CUR_DIRECTION;
774 /// # }
775 /// ```
776 ///
777 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ps)
778 #[inline]
779 #[target_feature(enable = "sse4.1")]
780 #[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
781 #[rustc_legacy_const_generics(1)]
782 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128783 pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
784 static_assert_imm4!(ROUNDING);
785 roundps(a, ROUNDING)
786 }
787
788 /// Round the lower double-precision (64-bit) floating-point element in `b`
789 /// using the `ROUNDING` parameter, store the result as a double-precision
790 /// floating-point element in the lower element of the intrinsic result,
791 /// and copies the upper element from `a` to the upper element of the intrinsic
792 /// result.
793 /// Rounding is done according to the rounding parameter, which can be one of:
794 ///
795 /// ```
796 /// #[cfg(target_arch = "x86")]
797 /// use std::arch::x86::*;
798 /// #[cfg(target_arch = "x86_64")]
799 /// use std::arch::x86_64::*;
800 ///
801 /// # fn main() {
802 /// // round to nearest, and suppress exceptions:
803 /// # let _x =
804 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
805 /// // round down, and suppress exceptions:
806 /// # let _x =
807 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
808 /// // round up, and suppress exceptions:
809 /// # let _x =
810 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
811 /// // truncate, and suppress exceptions:
812 /// # let _x =
813 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
814 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
815 /// # let _x =
816 /// _MM_FROUND_CUR_DIRECTION;
817 /// # }
818 /// ```
819 ///
820 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_sd)
821 #[inline]
822 #[target_feature(enable = "sse4.1")]
823 #[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
824 #[rustc_legacy_const_generics(2)]
825 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d826 pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
827 static_assert_imm4!(ROUNDING);
828 roundsd(a, b, ROUNDING)
829 }
830
831 /// Round the lower single-precision (32-bit) floating-point element in `b`
832 /// using the `ROUNDING` parameter, store the result as a single-precision
833 /// floating-point element in the lower element of the intrinsic result,
834 /// and copies the upper 3 packed elements from `a` to the upper elements
835 /// of the instrinsic result.
836 /// Rounding is done according to the rounding parameter, which can be one of:
837 ///
838 /// ```
839 /// #[cfg(target_arch = "x86")]
840 /// use std::arch::x86::*;
841 /// #[cfg(target_arch = "x86_64")]
842 /// use std::arch::x86_64::*;
843 ///
844 /// # fn main() {
845 /// // round to nearest, and suppress exceptions:
846 /// # let _x =
847 /// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
848 /// // round down, and suppress exceptions:
849 /// # let _x =
850 /// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
851 /// // round up, and suppress exceptions:
852 /// # let _x =
853 /// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
854 /// // truncate, and suppress exceptions:
855 /// # let _x =
856 /// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
857 /// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
858 /// # let _x =
859 /// _MM_FROUND_CUR_DIRECTION;
860 /// # }
861 /// ```
862 ///
863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_round_ss)
864 #[inline]
865 #[target_feature(enable = "sse4.1")]
866 #[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
867 #[rustc_legacy_const_generics(2)]
868 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128869 pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
870 static_assert_imm4!(ROUNDING);
871 roundss(a, b, ROUNDING)
872 }
873
874 /// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector,
875 /// returning a vector containing its value in its first position, and its
876 /// index
877 /// in its second position; all other elements are set to zero.
878 ///
879 /// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW`
880 /// instruction.
881 ///
882 /// Arguments:
883 ///
884 /// * `a` - A 128-bit vector of type `__m128i`.
885 ///
886 /// Returns:
887 ///
888 /// A 128-bit value where:
889 ///
890 /// * bits `[15:0]` - contain the minimum value found in parameter `a`,
891 /// * bits `[18:16]` - contain the index of the minimum value
892 /// * remaining bits are set to `0`.
893 ///
894 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_minpos_epu16)
895 #[inline]
896 #[target_feature(enable = "sse4.1")]
897 #[cfg_attr(test, assert_instr(phminposuw))]
898 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_minpos_epu16(a: __m128i) -> __m128i899 pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
900 transmute(phminposuw(a.as_u16x8()))
901 }
902
903 /// Multiplies the low 32-bit integers from each packed 64-bit
904 /// element in `a` and `b`, and returns the signed 64-bit result.
905 ///
906 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epi32)
907 #[inline]
908 #[target_feature(enable = "sse4.1")]
909 #[cfg_attr(test, assert_instr(pmuldq))]
910 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i911 pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
912 transmute(pmuldq(a.as_i32x4(), b.as_i32x4()))
913 }
914
915 /// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
916 /// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
917 /// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
918 /// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
919 /// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
920 /// return a negative number.
921 ///
922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi32)
923 #[inline]
924 #[target_feature(enable = "sse4.1")]
925 #[cfg_attr(test, assert_instr(pmulld))]
926 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i927 pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
928 transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
929 }
930
931 /// Subtracts 8-bit unsigned integer values and computes the absolute
932 /// values of the differences to the corresponding bits in the destination.
933 /// Then sums of the absolute differences are returned according to the bit
934 /// fields in the immediate operand.
935 ///
936 /// The following algorithm is performed:
937 ///
938 /// ```ignore
939 /// i = IMM8[2] * 4
940 /// j = IMM8[1:0] * 4
941 /// for k := 0 to 7
942 /// d0 = abs(a[i + k + 0] - b[j + 0])
943 /// d1 = abs(a[i + k + 1] - b[j + 1])
944 /// d2 = abs(a[i + k + 2] - b[j + 2])
945 /// d3 = abs(a[i + k + 3] - b[j + 3])
946 /// r[k] = d0 + d1 + d2 + d3
947 /// ```
948 ///
949 /// Arguments:
950 ///
951 /// * `a` - A 128-bit vector of type `__m128i`.
952 /// * `b` - A 128-bit vector of type `__m128i`.
953 /// * `IMM8` - An 8-bit immediate operand specifying how the absolute
954 /// differences are to be calculated
955 /// * Bit `[2]` specify the offset for operand `a`
956 /// * Bits `[1:0]` specify the offset for operand `b`
957 ///
958 /// Returns:
959 ///
960 /// * A `__m128i` vector containing the sums of the sets of absolute
961 /// differences between both operands.
962 ///
963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mpsadbw_epu8)
964 #[inline]
965 #[target_feature(enable = "sse4.1")]
966 #[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
967 #[rustc_legacy_const_generics(2)]
968 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i969 pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
970 static_assert_imm3!(IMM8);
971 transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8))
972 }
973
974 /// Tests whether the specified bits in a 128-bit integer vector are all
975 /// zeros.
976 ///
977 /// Arguments:
978 ///
979 /// * `a` - A 128-bit integer vector containing the bits to be tested.
980 /// * `mask` - A 128-bit integer vector selecting which bits to test in
981 /// operand `a`.
982 ///
983 /// Returns:
984 ///
985 /// * `1` - if the specified bits are all zeros,
986 /// * `0` - otherwise.
987 ///
988 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testz_si128)
989 #[inline]
990 #[target_feature(enable = "sse4.1")]
991 #[cfg_attr(test, assert_instr(ptest))]
992 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_testz_si128(a: __m128i, mask: __m128i) -> i32993 pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
994 ptestz(a.as_i64x2(), mask.as_i64x2())
995 }
996
997 /// Tests whether the specified bits in a 128-bit integer vector are all
998 /// ones.
999 ///
1000 /// Arguments:
1001 ///
1002 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1003 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1004 /// operand `a`.
1005 ///
1006 /// Returns:
1007 ///
1008 /// * `1` - if the specified bits are all ones,
1009 /// * `0` - otherwise.
1010 ///
1011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testc_si128)
1012 #[inline]
1013 #[target_feature(enable = "sse4.1")]
1014 #[cfg_attr(test, assert_instr(ptest))]
1015 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_testc_si128(a: __m128i, mask: __m128i) -> i321016 pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1017 ptestc(a.as_i64x2(), mask.as_i64x2())
1018 }
1019
1020 /// Tests whether the specified bits in a 128-bit integer vector are
1021 /// neither all zeros nor all ones.
1022 ///
1023 /// Arguments:
1024 ///
1025 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1026 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1027 /// operand `a`.
1028 ///
1029 /// Returns:
1030 ///
1031 /// * `1` - if the specified bits are neither all zeros nor all ones,
1032 /// * `0` - otherwise.
1033 ///
1034 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_testnzc_si128)
1035 #[inline]
1036 #[target_feature(enable = "sse4.1")]
1037 #[cfg_attr(test, assert_instr(ptest))]
1038 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_testnzc_si128(a: __m128i, mask: __m128i) -> i321039 pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1040 ptestnzc(a.as_i64x2(), mask.as_i64x2())
1041 }
1042
1043 /// Tests whether the specified bits in a 128-bit integer vector are all
1044 /// zeros.
1045 ///
1046 /// Arguments:
1047 ///
1048 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1049 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1050 /// operand `a`.
1051 ///
1052 /// Returns:
1053 ///
1054 /// * `1` - if the specified bits are all zeros,
1055 /// * `0` - otherwise.
1056 ///
1057 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_zeros)
1058 #[inline]
1059 #[target_feature(enable = "sse4.1")]
1060 #[cfg_attr(test, assert_instr(ptest))]
1061 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_test_all_zeros(a: __m128i, mask: __m128i) -> i321062 pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1063 _mm_testz_si128(a, mask)
1064 }
1065
1066 /// Tests whether the specified bits in `a` 128-bit integer vector are all
1067 /// ones.
1068 ///
1069 /// Argument:
1070 ///
1071 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1072 ///
1073 /// Returns:
1074 ///
1075 /// * `1` - if the bits specified in the operand are all set to 1,
1076 /// * `0` - otherwise.
1077 ///
1078 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_all_ones)
1079 #[inline]
1080 #[target_feature(enable = "sse4.1")]
1081 #[cfg_attr(test, assert_instr(pcmpeqd))]
1082 #[cfg_attr(test, assert_instr(ptest))]
1083 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_test_all_ones(a: __m128i) -> i321084 pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
1085 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1086 }
1087
1088 /// Tests whether the specified bits in a 128-bit integer vector are
1089 /// neither all zeros nor all ones.
1090 ///
1091 /// Arguments:
1092 ///
1093 /// * `a` - A 128-bit integer vector containing the bits to be tested.
1094 /// * `mask` - A 128-bit integer vector selecting which bits to test in
1095 /// operand `a`.
1096 ///
1097 /// Returns:
1098 ///
1099 /// * `1` - if the specified bits are neither all zeros nor all ones,
1100 /// * `0` - otherwise.
1101 ///
1102 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_test_mix_ones_zeros)
1103 #[inline]
1104 #[target_feature(enable = "sse4.1")]
1105 #[cfg_attr(test, assert_instr(ptest))]
1106 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i321107 pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1108 _mm_testnzc_si128(a, mask)
1109 }
1110
1111 #[allow(improper_ctypes)]
1112 extern "C" {
1113 #[link_name = "llvm.x86.sse41.pblendvb"]
pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x161114 fn pblendvb(a: i8x16, b: i8x16, mask: i8x16) -> i8x16;
1115 #[link_name = "llvm.x86.sse41.blendvpd"]
blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d1116 fn blendvpd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d;
1117 #[link_name = "llvm.x86.sse41.blendvps"]
blendvps(a: __m128, b: __m128, mask: __m128) -> __m1281118 fn blendvps(a: __m128, b: __m128, mask: __m128) -> __m128;
1119 #[link_name = "llvm.x86.sse41.blendpd"]
blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d1120 fn blendpd(a: __m128d, b: __m128d, imm2: u8) -> __m128d;
1121 #[link_name = "llvm.x86.sse41.blendps"]
blendps(a: __m128, b: __m128, imm4: u8) -> __m1281122 fn blendps(a: __m128, b: __m128, imm4: u8) -> __m128;
1123 #[link_name = "llvm.x86.sse41.pblendw"]
pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x81124 fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8;
1125 #[link_name = "llvm.x86.sse41.insertps"]
insertps(a: __m128, b: __m128, imm8: u8) -> __m1281126 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
1127 #[link_name = "llvm.x86.sse41.pmaxsb"]
pmaxsb(a: i8x16, b: i8x16) -> i8x161128 fn pmaxsb(a: i8x16, b: i8x16) -> i8x16;
1129 #[link_name = "llvm.x86.sse41.pmaxuw"]
pmaxuw(a: u16x8, b: u16x8) -> u16x81130 fn pmaxuw(a: u16x8, b: u16x8) -> u16x8;
1131 #[link_name = "llvm.x86.sse41.pmaxsd"]
pmaxsd(a: i32x4, b: i32x4) -> i32x41132 fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
1133 #[link_name = "llvm.x86.sse41.pmaxud"]
pmaxud(a: u32x4, b: u32x4) -> u32x41134 fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
1135 #[link_name = "llvm.x86.sse41.pminsb"]
pminsb(a: i8x16, b: i8x16) -> i8x161136 fn pminsb(a: i8x16, b: i8x16) -> i8x16;
1137 #[link_name = "llvm.x86.sse41.pminuw"]
pminuw(a: u16x8, b: u16x8) -> u16x81138 fn pminuw(a: u16x8, b: u16x8) -> u16x8;
1139 #[link_name = "llvm.x86.sse41.pminsd"]
pminsd(a: i32x4, b: i32x4) -> i32x41140 fn pminsd(a: i32x4, b: i32x4) -> i32x4;
1141 #[link_name = "llvm.x86.sse41.pminud"]
pminud(a: u32x4, b: u32x4) -> u32x41142 fn pminud(a: u32x4, b: u32x4) -> u32x4;
1143 #[link_name = "llvm.x86.sse41.packusdw"]
packusdw(a: i32x4, b: i32x4) -> u16x81144 fn packusdw(a: i32x4, b: i32x4) -> u16x8;
1145 #[link_name = "llvm.x86.sse41.dppd"]
dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d1146 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1147 #[link_name = "llvm.x86.sse41.dpps"]
dpps(a: __m128, b: __m128, imm8: u8) -> __m1281148 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1149 #[link_name = "llvm.x86.sse41.round.pd"]
roundpd(a: __m128d, rounding: i32) -> __m128d1150 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1151 #[link_name = "llvm.x86.sse41.round.ps"]
roundps(a: __m128, rounding: i32) -> __m1281152 fn roundps(a: __m128, rounding: i32) -> __m128;
1153 #[link_name = "llvm.x86.sse41.round.sd"]
roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d1154 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1155 #[link_name = "llvm.x86.sse41.round.ss"]
roundss(a: __m128, b: __m128, rounding: i32) -> __m1281156 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1157 #[link_name = "llvm.x86.sse41.phminposuw"]
phminposuw(a: u16x8) -> u16x81158 fn phminposuw(a: u16x8) -> u16x8;
1159 #[link_name = "llvm.x86.sse41.pmuldq"]
pmuldq(a: i32x4, b: i32x4) -> i64x21160 fn pmuldq(a: i32x4, b: i32x4) -> i64x2;
1161 #[link_name = "llvm.x86.sse41.mpsadbw"]
mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x81162 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1163 #[link_name = "llvm.x86.sse41.ptestz"]
ptestz(a: i64x2, mask: i64x2) -> i321164 fn ptestz(a: i64x2, mask: i64x2) -> i32;
1165 #[link_name = "llvm.x86.sse41.ptestc"]
ptestc(a: i64x2, mask: i64x2) -> i321166 fn ptestc(a: i64x2, mask: i64x2) -> i32;
1167 #[link_name = "llvm.x86.sse41.ptestnzc"]
ptestnzc(a: i64x2, mask: i64x2) -> i321168 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1169 }
1170
1171 #[cfg(test)]
1172 mod tests {
1173 use crate::core_arch::x86::*;
1174 use std::mem;
1175 use stdarch_test::simd_test;
1176
1177 #[simd_test(enable = "sse4.1")]
test_mm_blendv_epi8()1178 unsafe fn test_mm_blendv_epi8() {
1179 #[rustfmt::skip]
1180 let a = _mm_setr_epi8(
1181 0, 1, 2, 3, 4, 5, 6, 7,
1182 8, 9, 10, 11, 12, 13, 14, 15,
1183 );
1184 #[rustfmt::skip]
1185 let b = _mm_setr_epi8(
1186 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1187 );
1188 #[rustfmt::skip]
1189 let mask = _mm_setr_epi8(
1190 0, -1, 0, -1, 0, -1, 0, -1,
1191 0, -1, 0, -1, 0, -1, 0, -1,
1192 );
1193 #[rustfmt::skip]
1194 let e = _mm_setr_epi8(
1195 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1196 );
1197 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1198 }
1199
1200 #[simd_test(enable = "sse4.1")]
test_mm_blendv_pd()1201 unsafe fn test_mm_blendv_pd() {
1202 let a = _mm_set1_pd(0.0);
1203 let b = _mm_set1_pd(1.0);
1204 let mask = transmute(_mm_setr_epi64x(0, -1));
1205 let r = _mm_blendv_pd(a, b, mask);
1206 let e = _mm_setr_pd(0.0, 1.0);
1207 assert_eq_m128d(r, e);
1208 }
1209
1210 #[simd_test(enable = "sse4.1")]
test_mm_blendv_ps()1211 unsafe fn test_mm_blendv_ps() {
1212 let a = _mm_set1_ps(0.0);
1213 let b = _mm_set1_ps(1.0);
1214 let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
1215 let r = _mm_blendv_ps(a, b, mask);
1216 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1217 assert_eq_m128(r, e);
1218 }
1219
1220 #[simd_test(enable = "sse4.1")]
test_mm_blend_pd()1221 unsafe fn test_mm_blend_pd() {
1222 let a = _mm_set1_pd(0.0);
1223 let b = _mm_set1_pd(1.0);
1224 let r = _mm_blend_pd::<0b10>(a, b);
1225 let e = _mm_setr_pd(0.0, 1.0);
1226 assert_eq_m128d(r, e);
1227 }
1228
1229 #[simd_test(enable = "sse4.1")]
test_mm_blend_ps()1230 unsafe fn test_mm_blend_ps() {
1231 let a = _mm_set1_ps(0.0);
1232 let b = _mm_set1_ps(1.0);
1233 let r = _mm_blend_ps::<0b1010>(a, b);
1234 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1235 assert_eq_m128(r, e);
1236 }
1237
1238 #[simd_test(enable = "sse4.1")]
test_mm_blend_epi16()1239 unsafe fn test_mm_blend_epi16() {
1240 let a = _mm_set1_epi16(0);
1241 let b = _mm_set1_epi16(1);
1242 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
1243 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1244 assert_eq_m128i(r, e);
1245 }
1246
1247 #[simd_test(enable = "sse4.1")]
test_mm_extract_ps()1248 unsafe fn test_mm_extract_ps() {
1249 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
1250 let r: f32 = transmute(_mm_extract_ps::<1>(a));
1251 assert_eq!(r, 1.0);
1252 let r: f32 = transmute(_mm_extract_ps::<3>(a));
1253 assert_eq!(r, 3.0);
1254 }
1255
1256 #[simd_test(enable = "sse4.1")]
test_mm_extract_epi8()1257 unsafe fn test_mm_extract_epi8() {
1258 #[rustfmt::skip]
1259 let a = _mm_setr_epi8(
1260 -1, 1, 2, 3, 4, 5, 6, 7,
1261 8, 9, 10, 11, 12, 13, 14, 15
1262 );
1263 let r1 = _mm_extract_epi8::<0>(a);
1264 let r2 = _mm_extract_epi8::<3>(a);
1265 assert_eq!(r1, 0xFF);
1266 assert_eq!(r2, 3);
1267 }
1268
1269 #[simd_test(enable = "sse4.1")]
test_mm_extract_epi32()1270 unsafe fn test_mm_extract_epi32() {
1271 let a = _mm_setr_epi32(0, 1, 2, 3);
1272 let r = _mm_extract_epi32::<1>(a);
1273 assert_eq!(r, 1);
1274 let r = _mm_extract_epi32::<3>(a);
1275 assert_eq!(r, 3);
1276 }
1277
1278 #[simd_test(enable = "sse4.1")]
test_mm_insert_ps()1279 unsafe fn test_mm_insert_ps() {
1280 let a = _mm_set1_ps(1.0);
1281 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1282 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
1283 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1284 assert_eq_m128(r, e);
1285 }
1286
1287 #[simd_test(enable = "sse4.1")]
test_mm_insert_epi8()1288 unsafe fn test_mm_insert_epi8() {
1289 let a = _mm_set1_epi8(0);
1290 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
1291 let r = _mm_insert_epi8::<1>(a, 32);
1292 assert_eq_m128i(r, e);
1293 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1294 let r = _mm_insert_epi8::<14>(a, 32);
1295 assert_eq_m128i(r, e);
1296 }
1297
1298 #[simd_test(enable = "sse4.1")]
test_mm_insert_epi32()1299 unsafe fn test_mm_insert_epi32() {
1300 let a = _mm_set1_epi32(0);
1301 let e = _mm_setr_epi32(0, 32, 0, 0);
1302 let r = _mm_insert_epi32::<1>(a, 32);
1303 assert_eq_m128i(r, e);
1304 let e = _mm_setr_epi32(0, 0, 0, 32);
1305 let r = _mm_insert_epi32::<3>(a, 32);
1306 assert_eq_m128i(r, e);
1307 }
1308
1309 #[simd_test(enable = "sse4.1")]
test_mm_max_epi8()1310 unsafe fn test_mm_max_epi8() {
1311 #[rustfmt::skip]
1312 let a = _mm_setr_epi8(
1313 1, 4, 5, 8, 9, 12, 13, 16,
1314 17, 20, 21, 24, 25, 28, 29, 32,
1315 );
1316 #[rustfmt::skip]
1317 let b = _mm_setr_epi8(
1318 2, 3, 6, 7, 10, 11, 14, 15,
1319 18, 19, 22, 23, 26, 27, 30, 31,
1320 );
1321 let r = _mm_max_epi8(a, b);
1322 #[rustfmt::skip]
1323 let e = _mm_setr_epi8(
1324 2, 4, 6, 8, 10, 12, 14, 16,
1325 18, 20, 22, 24, 26, 28, 30, 32,
1326 );
1327 assert_eq_m128i(r, e);
1328 }
1329
1330 #[simd_test(enable = "sse4.1")]
test_mm_max_epu16()1331 unsafe fn test_mm_max_epu16() {
1332 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1333 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1334 let r = _mm_max_epu16(a, b);
1335 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1336 assert_eq_m128i(r, e);
1337 }
1338
1339 #[simd_test(enable = "sse4.1")]
test_mm_max_epi32()1340 unsafe fn test_mm_max_epi32() {
1341 let a = _mm_setr_epi32(1, 4, 5, 8);
1342 let b = _mm_setr_epi32(2, 3, 6, 7);
1343 let r = _mm_max_epi32(a, b);
1344 let e = _mm_setr_epi32(2, 4, 6, 8);
1345 assert_eq_m128i(r, e);
1346 }
1347
1348 #[simd_test(enable = "sse4.1")]
test_mm_max_epu32()1349 unsafe fn test_mm_max_epu32() {
1350 let a = _mm_setr_epi32(1, 4, 5, 8);
1351 let b = _mm_setr_epi32(2, 3, 6, 7);
1352 let r = _mm_max_epu32(a, b);
1353 let e = _mm_setr_epi32(2, 4, 6, 8);
1354 assert_eq_m128i(r, e);
1355 }
1356
1357 #[simd_test(enable = "sse4.1")]
test_mm_min_epi8_1()1358 unsafe fn test_mm_min_epi8_1() {
1359 #[rustfmt::skip]
1360 let a = _mm_setr_epi8(
1361 1, 4, 5, 8, 9, 12, 13, 16,
1362 17, 20, 21, 24, 25, 28, 29, 32,
1363 );
1364 #[rustfmt::skip]
1365 let b = _mm_setr_epi8(
1366 2, 3, 6, 7, 10, 11, 14, 15,
1367 18, 19, 22, 23, 26, 27, 30, 31,
1368 );
1369 let r = _mm_min_epi8(a, b);
1370 #[rustfmt::skip]
1371 let e = _mm_setr_epi8(
1372 1, 3, 5, 7, 9, 11, 13, 15,
1373 17, 19, 21, 23, 25, 27, 29, 31,
1374 );
1375 assert_eq_m128i(r, e);
1376 }
1377
1378 #[simd_test(enable = "sse4.1")]
test_mm_min_epi8_2()1379 unsafe fn test_mm_min_epi8_2() {
1380 #[rustfmt::skip]
1381 let a = _mm_setr_epi8(
1382 1, -4, -5, 8, -9, -12, 13, -16,
1383 17, 20, 21, 24, 25, 28, 29, 32,
1384 );
1385 #[rustfmt::skip]
1386 let b = _mm_setr_epi8(
1387 2, -3, -6, 7, -10, -11, 14, -15,
1388 18, 19, 22, 23, 26, 27, 30, 31,
1389 );
1390 let r = _mm_min_epi8(a, b);
1391 #[rustfmt::skip]
1392 let e = _mm_setr_epi8(
1393 1, -4, -6, 7, -10, -12, 13, -16,
1394 17, 19, 21, 23, 25, 27, 29, 31,
1395 );
1396 assert_eq_m128i(r, e);
1397 }
1398
1399 #[simd_test(enable = "sse4.1")]
test_mm_min_epu16()1400 unsafe fn test_mm_min_epu16() {
1401 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1402 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1403 let r = _mm_min_epu16(a, b);
1404 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1405 assert_eq_m128i(r, e);
1406 }
1407
1408 #[simd_test(enable = "sse4.1")]
test_mm_min_epi32_1()1409 unsafe fn test_mm_min_epi32_1() {
1410 let a = _mm_setr_epi32(1, 4, 5, 8);
1411 let b = _mm_setr_epi32(2, 3, 6, 7);
1412 let r = _mm_min_epi32(a, b);
1413 let e = _mm_setr_epi32(1, 3, 5, 7);
1414 assert_eq_m128i(r, e);
1415 }
1416
1417 #[simd_test(enable = "sse4.1")]
test_mm_min_epi32_2()1418 unsafe fn test_mm_min_epi32_2() {
1419 let a = _mm_setr_epi32(-1, 4, 5, -7);
1420 let b = _mm_setr_epi32(-2, 3, -6, 8);
1421 let r = _mm_min_epi32(a, b);
1422 let e = _mm_setr_epi32(-2, 3, -6, -7);
1423 assert_eq_m128i(r, e);
1424 }
1425
1426 #[simd_test(enable = "sse4.1")]
test_mm_min_epu32()1427 unsafe fn test_mm_min_epu32() {
1428 let a = _mm_setr_epi32(1, 4, 5, 8);
1429 let b = _mm_setr_epi32(2, 3, 6, 7);
1430 let r = _mm_min_epu32(a, b);
1431 let e = _mm_setr_epi32(1, 3, 5, 7);
1432 assert_eq_m128i(r, e);
1433 }
1434
1435 #[simd_test(enable = "sse4.1")]
test_mm_packus_epi32()1436 unsafe fn test_mm_packus_epi32() {
1437 let a = _mm_setr_epi32(1, 2, 3, 4);
1438 let b = _mm_setr_epi32(-1, -2, -3, -4);
1439 let r = _mm_packus_epi32(a, b);
1440 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1441 assert_eq_m128i(r, e);
1442 }
1443
1444 #[simd_test(enable = "sse4.1")]
test_mm_cmpeq_epi64()1445 unsafe fn test_mm_cmpeq_epi64() {
1446 let a = _mm_setr_epi64x(0, 1);
1447 let b = _mm_setr_epi64x(0, 0);
1448 let r = _mm_cmpeq_epi64(a, b);
1449 let e = _mm_setr_epi64x(-1, 0);
1450 assert_eq_m128i(r, e);
1451 }
1452
1453 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi8_epi16()1454 unsafe fn test_mm_cvtepi8_epi16() {
1455 let a = _mm_set1_epi8(10);
1456 let r = _mm_cvtepi8_epi16(a);
1457 let e = _mm_set1_epi16(10);
1458 assert_eq_m128i(r, e);
1459 let a = _mm_set1_epi8(-10);
1460 let r = _mm_cvtepi8_epi16(a);
1461 let e = _mm_set1_epi16(-10);
1462 assert_eq_m128i(r, e);
1463 }
1464
1465 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi8_epi32()1466 unsafe fn test_mm_cvtepi8_epi32() {
1467 let a = _mm_set1_epi8(10);
1468 let r = _mm_cvtepi8_epi32(a);
1469 let e = _mm_set1_epi32(10);
1470 assert_eq_m128i(r, e);
1471 let a = _mm_set1_epi8(-10);
1472 let r = _mm_cvtepi8_epi32(a);
1473 let e = _mm_set1_epi32(-10);
1474 assert_eq_m128i(r, e);
1475 }
1476
1477 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi8_epi64()1478 unsafe fn test_mm_cvtepi8_epi64() {
1479 let a = _mm_set1_epi8(10);
1480 let r = _mm_cvtepi8_epi64(a);
1481 let e = _mm_set1_epi64x(10);
1482 assert_eq_m128i(r, e);
1483 let a = _mm_set1_epi8(-10);
1484 let r = _mm_cvtepi8_epi64(a);
1485 let e = _mm_set1_epi64x(-10);
1486 assert_eq_m128i(r, e);
1487 }
1488
1489 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi16_epi32()1490 unsafe fn test_mm_cvtepi16_epi32() {
1491 let a = _mm_set1_epi16(10);
1492 let r = _mm_cvtepi16_epi32(a);
1493 let e = _mm_set1_epi32(10);
1494 assert_eq_m128i(r, e);
1495 let a = _mm_set1_epi16(-10);
1496 let r = _mm_cvtepi16_epi32(a);
1497 let e = _mm_set1_epi32(-10);
1498 assert_eq_m128i(r, e);
1499 }
1500
1501 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi16_epi64()1502 unsafe fn test_mm_cvtepi16_epi64() {
1503 let a = _mm_set1_epi16(10);
1504 let r = _mm_cvtepi16_epi64(a);
1505 let e = _mm_set1_epi64x(10);
1506 assert_eq_m128i(r, e);
1507 let a = _mm_set1_epi16(-10);
1508 let r = _mm_cvtepi16_epi64(a);
1509 let e = _mm_set1_epi64x(-10);
1510 assert_eq_m128i(r, e);
1511 }
1512
1513 #[simd_test(enable = "sse4.1")]
test_mm_cvtepi32_epi64()1514 unsafe fn test_mm_cvtepi32_epi64() {
1515 let a = _mm_set1_epi32(10);
1516 let r = _mm_cvtepi32_epi64(a);
1517 let e = _mm_set1_epi64x(10);
1518 assert_eq_m128i(r, e);
1519 let a = _mm_set1_epi32(-10);
1520 let r = _mm_cvtepi32_epi64(a);
1521 let e = _mm_set1_epi64x(-10);
1522 assert_eq_m128i(r, e);
1523 }
1524
1525 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu8_epi16()1526 unsafe fn test_mm_cvtepu8_epi16() {
1527 let a = _mm_set1_epi8(10);
1528 let r = _mm_cvtepu8_epi16(a);
1529 let e = _mm_set1_epi16(10);
1530 assert_eq_m128i(r, e);
1531 }
1532
1533 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu8_epi32()1534 unsafe fn test_mm_cvtepu8_epi32() {
1535 let a = _mm_set1_epi8(10);
1536 let r = _mm_cvtepu8_epi32(a);
1537 let e = _mm_set1_epi32(10);
1538 assert_eq_m128i(r, e);
1539 }
1540
1541 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu8_epi64()1542 unsafe fn test_mm_cvtepu8_epi64() {
1543 let a = _mm_set1_epi8(10);
1544 let r = _mm_cvtepu8_epi64(a);
1545 let e = _mm_set1_epi64x(10);
1546 assert_eq_m128i(r, e);
1547 }
1548
1549 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu16_epi32()1550 unsafe fn test_mm_cvtepu16_epi32() {
1551 let a = _mm_set1_epi16(10);
1552 let r = _mm_cvtepu16_epi32(a);
1553 let e = _mm_set1_epi32(10);
1554 assert_eq_m128i(r, e);
1555 }
1556
1557 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu16_epi64()1558 unsafe fn test_mm_cvtepu16_epi64() {
1559 let a = _mm_set1_epi16(10);
1560 let r = _mm_cvtepu16_epi64(a);
1561 let e = _mm_set1_epi64x(10);
1562 assert_eq_m128i(r, e);
1563 }
1564
1565 #[simd_test(enable = "sse4.1")]
test_mm_cvtepu32_epi64()1566 unsafe fn test_mm_cvtepu32_epi64() {
1567 let a = _mm_set1_epi32(10);
1568 let r = _mm_cvtepu32_epi64(a);
1569 let e = _mm_set1_epi64x(10);
1570 assert_eq_m128i(r, e);
1571 }
1572
1573 #[simd_test(enable = "sse4.1")]
test_mm_dp_pd()1574 unsafe fn test_mm_dp_pd() {
1575 let a = _mm_setr_pd(2.0, 3.0);
1576 let b = _mm_setr_pd(1.0, 4.0);
1577 let e = _mm_setr_pd(14.0, 0.0);
1578 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
1579 }
1580
1581 #[simd_test(enable = "sse4.1")]
test_mm_dp_ps()1582 unsafe fn test_mm_dp_ps() {
1583 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1584 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1585 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
1586 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
1587 }
1588
1589 #[simd_test(enable = "sse4.1")]
test_mm_floor_pd()1590 unsafe fn test_mm_floor_pd() {
1591 let a = _mm_setr_pd(2.5, 4.5);
1592 let r = _mm_floor_pd(a);
1593 let e = _mm_setr_pd(2.0, 4.0);
1594 assert_eq_m128d(r, e);
1595 }
1596
1597 #[simd_test(enable = "sse4.1")]
test_mm_floor_ps()1598 unsafe fn test_mm_floor_ps() {
1599 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1600 let r = _mm_floor_ps(a);
1601 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1602 assert_eq_m128(r, e);
1603 }
1604
1605 #[simd_test(enable = "sse4.1")]
test_mm_floor_sd()1606 unsafe fn test_mm_floor_sd() {
1607 let a = _mm_setr_pd(2.5, 4.5);
1608 let b = _mm_setr_pd(-1.5, -3.5);
1609 let r = _mm_floor_sd(a, b);
1610 let e = _mm_setr_pd(-2.0, 4.5);
1611 assert_eq_m128d(r, e);
1612 }
1613
1614 #[simd_test(enable = "sse4.1")]
test_mm_floor_ss()1615 unsafe fn test_mm_floor_ss() {
1616 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1617 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1618 let r = _mm_floor_ss(a, b);
1619 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1620 assert_eq_m128(r, e);
1621 }
1622
1623 #[simd_test(enable = "sse4.1")]
test_mm_ceil_pd()1624 unsafe fn test_mm_ceil_pd() {
1625 let a = _mm_setr_pd(1.5, 3.5);
1626 let r = _mm_ceil_pd(a);
1627 let e = _mm_setr_pd(2.0, 4.0);
1628 assert_eq_m128d(r, e);
1629 }
1630
1631 #[simd_test(enable = "sse4.1")]
test_mm_ceil_ps()1632 unsafe fn test_mm_ceil_ps() {
1633 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1634 let r = _mm_ceil_ps(a);
1635 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1636 assert_eq_m128(r, e);
1637 }
1638
1639 #[simd_test(enable = "sse4.1")]
test_mm_ceil_sd()1640 unsafe fn test_mm_ceil_sd() {
1641 let a = _mm_setr_pd(1.5, 3.5);
1642 let b = _mm_setr_pd(-2.5, -4.5);
1643 let r = _mm_ceil_sd(a, b);
1644 let e = _mm_setr_pd(-2.0, 3.5);
1645 assert_eq_m128d(r, e);
1646 }
1647
1648 #[simd_test(enable = "sse4.1")]
test_mm_ceil_ss()1649 unsafe fn test_mm_ceil_ss() {
1650 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1651 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1652 let r = _mm_ceil_ss(a, b);
1653 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1654 assert_eq_m128(r, e);
1655 }
1656
1657 #[simd_test(enable = "sse4.1")]
test_mm_round_pd()1658 unsafe fn test_mm_round_pd() {
1659 let a = _mm_setr_pd(1.25, 3.75);
1660 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
1661 let e = _mm_setr_pd(1.0, 4.0);
1662 assert_eq_m128d(r, e);
1663 }
1664
1665 #[simd_test(enable = "sse4.1")]
test_mm_round_ps()1666 unsafe fn test_mm_round_ps() {
1667 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
1668 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
1669 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1670 assert_eq_m128(r, e);
1671 }
1672
1673 #[simd_test(enable = "sse4.1")]
test_mm_round_sd()1674 unsafe fn test_mm_round_sd() {
1675 let a = _mm_setr_pd(1.5, 3.5);
1676 let b = _mm_setr_pd(-2.5, -4.5);
1677 let old_mode = _MM_GET_ROUNDING_MODE();
1678 _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
1679 let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
1680 _MM_SET_ROUNDING_MODE(old_mode);
1681 let e = _mm_setr_pd(-2.0, 3.5);
1682 assert_eq_m128d(r, e);
1683 }
1684
1685 #[simd_test(enable = "sse4.1")]
test_mm_round_ss()1686 unsafe fn test_mm_round_ss() {
1687 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1688 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1689 let old_mode = _MM_GET_ROUNDING_MODE();
1690 _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST);
1691 let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
1692 _MM_SET_ROUNDING_MODE(old_mode);
1693 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1694 assert_eq_m128(r, e);
1695 }
1696
1697 #[simd_test(enable = "sse4.1")]
test_mm_minpos_epu16_1()1698 unsafe fn test_mm_minpos_epu16_1() {
1699 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1700 let r = _mm_minpos_epu16(a);
1701 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1702 assert_eq_m128i(r, e);
1703 }
1704
1705 #[simd_test(enable = "sse4.1")]
test_mm_minpos_epu16_2()1706 unsafe fn test_mm_minpos_epu16_2() {
1707 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1708 let r = _mm_minpos_epu16(a);
1709 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1710 assert_eq_m128i(r, e);
1711 }
1712
1713 #[simd_test(enable = "sse4.1")]
test_mm_mul_epi32()1714 unsafe fn test_mm_mul_epi32() {
1715 {
1716 let a = _mm_setr_epi32(1, 1, 1, 1);
1717 let b = _mm_setr_epi32(1, 2, 3, 4);
1718 let r = _mm_mul_epi32(a, b);
1719 let e = _mm_setr_epi64x(1, 3);
1720 assert_eq_m128i(r, e);
1721 }
1722 {
1723 let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */);
1724 let b = _mm_setr_epi32(
1725 -20, -256, /* ignored */
1726 666666, 666666, /* ignored */
1727 );
1728 let r = _mm_mul_epi32(a, b);
1729 let e = _mm_setr_epi64x(-300, 823043843622);
1730 assert_eq_m128i(r, e);
1731 }
1732 }
1733
1734 #[simd_test(enable = "sse4.1")]
test_mm_mullo_epi32()1735 unsafe fn test_mm_mullo_epi32() {
1736 {
1737 let a = _mm_setr_epi32(1, 1, 1, 1);
1738 let b = _mm_setr_epi32(1, 2, 3, 4);
1739 let r = _mm_mullo_epi32(a, b);
1740 let e = _mm_setr_epi32(1, 2, 3, 4);
1741 assert_eq_m128i(r, e);
1742 }
1743 {
1744 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1745 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1746 let r = _mm_mullo_epi32(a, b);
1747 // Attention, most significant bit in r[2] is treated
1748 // as a sign bit:
1749 // 1234567 * 666666 = -1589877210
1750 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1751 assert_eq_m128i(r, e);
1752 }
1753 }
1754
1755 #[simd_test(enable = "sse4.1")]
test_mm_minpos_epu16()1756 unsafe fn test_mm_minpos_epu16() {
1757 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1758 let r = _mm_minpos_epu16(a);
1759 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1760 assert_eq_m128i(r, e);
1761 }
1762
1763 #[simd_test(enable = "sse4.1")]
test_mm_mpsadbw_epu8()1764 unsafe fn test_mm_mpsadbw_epu8() {
1765 #[rustfmt::skip]
1766 let a = _mm_setr_epi8(
1767 0, 1, 2, 3, 4, 5, 6, 7,
1768 8, 9, 10, 11, 12, 13, 14, 15,
1769 );
1770
1771 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
1772 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1773 assert_eq_m128i(r, e);
1774
1775 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
1776 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1777 assert_eq_m128i(r, e);
1778
1779 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
1780 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1781 assert_eq_m128i(r, e);
1782
1783 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
1784 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1785 assert_eq_m128i(r, e);
1786
1787 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
1788 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1789 assert_eq_m128i(r, e);
1790 }
1791
1792 #[simd_test(enable = "sse4.1")]
test_mm_testz_si128()1793 unsafe fn test_mm_testz_si128() {
1794 let a = _mm_set1_epi8(1);
1795 let mask = _mm_set1_epi8(0);
1796 let r = _mm_testz_si128(a, mask);
1797 assert_eq!(r, 1);
1798 let a = _mm_set1_epi8(0b101);
1799 let mask = _mm_set1_epi8(0b110);
1800 let r = _mm_testz_si128(a, mask);
1801 assert_eq!(r, 0);
1802 let a = _mm_set1_epi8(0b011);
1803 let mask = _mm_set1_epi8(0b100);
1804 let r = _mm_testz_si128(a, mask);
1805 assert_eq!(r, 1);
1806 }
1807
1808 #[simd_test(enable = "sse4.1")]
test_mm_testc_si128()1809 unsafe fn test_mm_testc_si128() {
1810 let a = _mm_set1_epi8(-1);
1811 let mask = _mm_set1_epi8(0);
1812 let r = _mm_testc_si128(a, mask);
1813 assert_eq!(r, 1);
1814 let a = _mm_set1_epi8(0b101);
1815 let mask = _mm_set1_epi8(0b110);
1816 let r = _mm_testc_si128(a, mask);
1817 assert_eq!(r, 0);
1818 let a = _mm_set1_epi8(0b101);
1819 let mask = _mm_set1_epi8(0b100);
1820 let r = _mm_testc_si128(a, mask);
1821 assert_eq!(r, 1);
1822 }
1823
1824 #[simd_test(enable = "sse4.1")]
test_mm_testnzc_si128()1825 unsafe fn test_mm_testnzc_si128() {
1826 let a = _mm_set1_epi8(0);
1827 let mask = _mm_set1_epi8(1);
1828 let r = _mm_testnzc_si128(a, mask);
1829 assert_eq!(r, 0);
1830 let a = _mm_set1_epi8(-1);
1831 let mask = _mm_set1_epi8(0);
1832 let r = _mm_testnzc_si128(a, mask);
1833 assert_eq!(r, 0);
1834 let a = _mm_set1_epi8(0b101);
1835 let mask = _mm_set1_epi8(0b110);
1836 let r = _mm_testnzc_si128(a, mask);
1837 assert_eq!(r, 1);
1838 let a = _mm_set1_epi8(0b101);
1839 let mask = _mm_set1_epi8(0b101);
1840 let r = _mm_testnzc_si128(a, mask);
1841 assert_eq!(r, 0);
1842 }
1843
1844 #[simd_test(enable = "sse4.1")]
test_mm_test_all_zeros()1845 unsafe fn test_mm_test_all_zeros() {
1846 let a = _mm_set1_epi8(1);
1847 let mask = _mm_set1_epi8(0);
1848 let r = _mm_test_all_zeros(a, mask);
1849 assert_eq!(r, 1);
1850 let a = _mm_set1_epi8(0b101);
1851 let mask = _mm_set1_epi8(0b110);
1852 let r = _mm_test_all_zeros(a, mask);
1853 assert_eq!(r, 0);
1854 let a = _mm_set1_epi8(0b011);
1855 let mask = _mm_set1_epi8(0b100);
1856 let r = _mm_test_all_zeros(a, mask);
1857 assert_eq!(r, 1);
1858 }
1859
1860 #[simd_test(enable = "sse4.1")]
test_mm_test_all_ones()1861 unsafe fn test_mm_test_all_ones() {
1862 let a = _mm_set1_epi8(-1);
1863 let r = _mm_test_all_ones(a);
1864 assert_eq!(r, 1);
1865 let a = _mm_set1_epi8(0b101);
1866 let r = _mm_test_all_ones(a);
1867 assert_eq!(r, 0);
1868 }
1869
1870 #[simd_test(enable = "sse4.1")]
test_mm_test_mix_ones_zeros()1871 unsafe fn test_mm_test_mix_ones_zeros() {
1872 let a = _mm_set1_epi8(0);
1873 let mask = _mm_set1_epi8(1);
1874 let r = _mm_test_mix_ones_zeros(a, mask);
1875 assert_eq!(r, 0);
1876 let a = _mm_set1_epi8(-1);
1877 let mask = _mm_set1_epi8(0);
1878 let r = _mm_test_mix_ones_zeros(a, mask);
1879 assert_eq!(r, 0);
1880 let a = _mm_set1_epi8(0b101);
1881 let mask = _mm_set1_epi8(0b110);
1882 let r = _mm_test_mix_ones_zeros(a, mask);
1883 assert_eq!(r, 1);
1884 let a = _mm_set1_epi8(0b101);
1885 let mask = _mm_set1_epi8(0b101);
1886 let r = _mm_test_mix_ones_zeros(a, mask);
1887 assert_eq!(r, 0);
1888 }
1889 }
1890