1 //! Streaming SIMD Extensions 2 (SSE2)
2 
3 #[cfg(test)]
4 use stdarch_test::assert_instr;
5 
6 use crate::{
7     core_arch::{simd::*, simd_llvm::*, x86::*},
8     intrinsics,
9     mem::{self, transmute},
10     ptr,
11 };
12 
13 /// Provides a hint to the processor that the code sequence is a spin-wait loop.
14 ///
15 /// This can help improve the performance and power consumption of spin-wait
16 /// loops.
17 ///
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
19 #[inline]
20 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_pause()22 pub unsafe fn _mm_pause() {
23     // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24     // the SSE2 target-feature - therefore it does not require any target features
25     pause()
26 }
27 
28 /// Invalidates and flushes the cache line that contains `p` from all levels of
29 /// the cache hierarchy.
30 ///
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
32 #[inline]
33 #[target_feature(enable = "sse2")]
34 #[cfg_attr(test, assert_instr(clflush))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_clflush(p: *const u8)36 pub unsafe fn _mm_clflush(p: *const u8) {
37     clflush(p)
38 }
39 
40 /// Performs a serializing operation on all load-from-memory instructions
41 /// that were issued prior to this instruction.
42 ///
43 /// Guarantees that every load instruction that precedes, in program order, is
44 /// globally visible before any load instruction which follows the fence in
45 /// program order.
46 ///
47 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
48 #[inline]
49 #[target_feature(enable = "sse2")]
50 #[cfg_attr(test, assert_instr(lfence))]
51 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_lfence()52 pub unsafe fn _mm_lfence() {
53     lfence()
54 }
55 
56 /// Performs a serializing operation on all load-from-memory and store-to-memory
57 /// instructions that were issued prior to this instruction.
58 ///
59 /// Guarantees that every memory access that precedes, in program order, the
60 /// memory fence instruction is globally visible before any memory instruction
61 /// which follows the fence in program order.
62 ///
63 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
64 #[inline]
65 #[target_feature(enable = "sse2")]
66 #[cfg_attr(test, assert_instr(mfence))]
67 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mfence()68 pub unsafe fn _mm_mfence() {
69     mfence()
70 }
71 
72 /// Adds packed 8-bit integers in `a` and `b`.
73 ///
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
75 #[inline]
76 #[target_feature(enable = "sse2")]
77 #[cfg_attr(test, assert_instr(paddb))]
78 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_epi8(a: __m128i, b: __m128i) -> __m128i79 pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80     transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
81 }
82 
83 /// Adds packed 16-bit integers in `a` and `b`.
84 ///
85 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
86 #[inline]
87 #[target_feature(enable = "sse2")]
88 #[cfg_attr(test, assert_instr(paddw))]
89 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_epi16(a: __m128i, b: __m128i) -> __m128i90 pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91     transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
92 }
93 
94 /// Adds packed 32-bit integers in `a` and `b`.
95 ///
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
97 #[inline]
98 #[target_feature(enable = "sse2")]
99 #[cfg_attr(test, assert_instr(paddd))]
100 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_epi32(a: __m128i, b: __m128i) -> __m128i101 pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102     transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
103 }
104 
105 /// Adds packed 64-bit integers in `a` and `b`.
106 ///
107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
108 #[inline]
109 #[target_feature(enable = "sse2")]
110 #[cfg_attr(test, assert_instr(paddq))]
111 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_epi64(a: __m128i, b: __m128i) -> __m128i112 pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113     transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
114 }
115 
116 /// Adds packed 8-bit integers in `a` and `b` using saturation.
117 ///
118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
119 #[inline]
120 #[target_feature(enable = "sse2")]
121 #[cfg_attr(test, assert_instr(paddsb))]
122 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i123 pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124     transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
125 }
126 
127 /// Adds packed 16-bit integers in `a` and `b` using saturation.
128 ///
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
130 #[inline]
131 #[target_feature(enable = "sse2")]
132 #[cfg_attr(test, assert_instr(paddsw))]
133 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i134 pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135     transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
136 }
137 
138 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139 ///
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
141 #[inline]
142 #[target_feature(enable = "sse2")]
143 #[cfg_attr(test, assert_instr(paddusb))]
144 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i145 pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146     transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
147 }
148 
149 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150 ///
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
152 #[inline]
153 #[target_feature(enable = "sse2")]
154 #[cfg_attr(test, assert_instr(paddusw))]
155 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i156 pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157     transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
158 }
159 
160 /// Averages packed unsigned 8-bit integers in `a` and `b`.
161 ///
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
163 #[inline]
164 #[target_feature(enable = "sse2")]
165 #[cfg_attr(test, assert_instr(pavgb))]
166 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i167 pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168     transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
169 }
170 
171 /// Averages packed unsigned 16-bit integers in `a` and `b`.
172 ///
173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
174 #[inline]
175 #[target_feature(enable = "sse2")]
176 #[cfg_attr(test, assert_instr(pavgw))]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i178 pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
179     transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
180 }
181 
182 /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
183 ///
184 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
185 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186 /// intermediate 32-bit integers.
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
189 #[inline]
190 #[target_feature(enable = "sse2")]
191 #[cfg_attr(test, assert_instr(pmaddwd))]
192 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i193 pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
194     transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
195 }
196 
197 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
198 /// maximum values.
199 ///
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
201 #[inline]
202 #[target_feature(enable = "sse2")]
203 #[cfg_attr(test, assert_instr(pmaxsw))]
204 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epi16(a: __m128i, b: __m128i) -> __m128i205 pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
206     transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
207 }
208 
209 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
210 /// packed maximum values.
211 ///
212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
213 #[inline]
214 #[target_feature(enable = "sse2")]
215 #[cfg_attr(test, assert_instr(pmaxub))]
216 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_epu8(a: __m128i, b: __m128i) -> __m128i217 pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
218     transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
219 }
220 
221 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
222 /// minimum values.
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
225 #[inline]
226 #[target_feature(enable = "sse2")]
227 #[cfg_attr(test, assert_instr(pminsw))]
228 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epi16(a: __m128i, b: __m128i) -> __m128i229 pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
230     transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
231 }
232 
233 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
234 /// packed minimum values.
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
237 #[inline]
238 #[target_feature(enable = "sse2")]
239 #[cfg_attr(test, assert_instr(pminub))]
240 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_epu8(a: __m128i, b: __m128i) -> __m128i241 pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
242     transmute(pminub(a.as_u8x16(), b.as_u8x16()))
243 }
244 
245 /// Multiplies the packed 16-bit integers in `a` and `b`.
246 ///
247 /// The multiplication produces intermediate 32-bit integers, and returns the
248 /// high 16 bits of the intermediate integers.
249 ///
250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
251 #[inline]
252 #[target_feature(enable = "sse2")]
253 #[cfg_attr(test, assert_instr(pmulhw))]
254 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i255 pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
256     transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
257 }
258 
259 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
260 ///
261 /// The multiplication produces intermediate 32-bit integers, and returns the
262 /// high 16 bits of the intermediate integers.
263 ///
264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
265 #[inline]
266 #[target_feature(enable = "sse2")]
267 #[cfg_attr(test, assert_instr(pmulhuw))]
268 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i269 pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
270     transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
271 }
272 
273 /// Multiplies the packed 16-bit integers in `a` and `b`.
274 ///
275 /// The multiplication produces intermediate 32-bit integers, and returns the
276 /// low 16 bits of the intermediate integers.
277 ///
278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
279 #[inline]
280 #[target_feature(enable = "sse2")]
281 #[cfg_attr(test, assert_instr(pmullw))]
282 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i283 pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
284     transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
285 }
286 
287 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
288 /// in `a` and `b`.
289 ///
290 /// Returns the unsigned 64-bit results.
291 ///
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
293 #[inline]
294 #[target_feature(enable = "sse2")]
295 #[cfg_attr(test, assert_instr(pmuludq))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i297 pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
298     transmute(pmuludq(a.as_u32x4(), b.as_u32x4()))
299 }
300 
301 /// Sum the absolute differences of packed unsigned 8-bit integers.
302 ///
303 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
304 /// and `b`, then horizontally sum each consecutive 8 differences to produce
305 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306 /// the low 16 bits of 64-bit elements returned.
307 ///
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
309 #[inline]
310 #[target_feature(enable = "sse2")]
311 #[cfg_attr(test, assert_instr(psadbw))]
312 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i313 pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
314     transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
315 }
316 
317 /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
318 ///
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
320 #[inline]
321 #[target_feature(enable = "sse2")]
322 #[cfg_attr(test, assert_instr(psubb))]
323 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i324 pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
325     transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
326 }
327 
328 /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
329 ///
330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
331 #[inline]
332 #[target_feature(enable = "sse2")]
333 #[cfg_attr(test, assert_instr(psubw))]
334 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i335 pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
336     transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
337 }
338 
339 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
340 ///
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
342 #[inline]
343 #[target_feature(enable = "sse2")]
344 #[cfg_attr(test, assert_instr(psubd))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i346 pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
347     transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
348 }
349 
350 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
351 ///
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
353 #[inline]
354 #[target_feature(enable = "sse2")]
355 #[cfg_attr(test, assert_instr(psubq))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i357 pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
358     transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
359 }
360 
361 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362 /// using saturation.
363 ///
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
365 #[inline]
366 #[target_feature(enable = "sse2")]
367 #[cfg_attr(test, assert_instr(psubsb))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i369 pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
370     transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
371 }
372 
373 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374 /// using saturation.
375 ///
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
377 #[inline]
378 #[target_feature(enable = "sse2")]
379 #[cfg_attr(test, assert_instr(psubsw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i381 pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
382     transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
383 }
384 
385 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386 /// integers in `a` using saturation.
387 ///
388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
389 #[inline]
390 #[target_feature(enable = "sse2")]
391 #[cfg_attr(test, assert_instr(psubusb))]
392 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i393 pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
394     transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
395 }
396 
397 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398 /// integers in `a` using saturation.
399 ///
400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
401 #[inline]
402 #[target_feature(enable = "sse2")]
403 #[cfg_attr(test, assert_instr(psubusw))]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i405 pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
406     transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
407 }
408 
409 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
412 #[inline]
413 #[target_feature(enable = "sse2")]
414 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
415 #[rustc_legacy_const_generics(1)]
416 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i417 pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
418     static_assert_imm8!(IMM8);
419     _mm_slli_si128_impl::<IMM8>(a)
420 }
421 
422 /// Implementation detail: converts the immediate argument of the
423 /// `_mm_slli_si128` intrinsic into a compile-time constant.
424 #[inline]
425 #[target_feature(enable = "sse2")]
_mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i426 unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
427     const fn mask(shift: i32, i: u32) -> u32 {
428         let shift = shift as u32 & 0xff;
429         if shift > 15 {
430             i
431         } else {
432             16 - shift + i
433         }
434     }
435     let zero = _mm_set1_epi8(0).as_i8x16();
436     transmute::<i8x16, _>(simd_shuffle16!(
437         zero,
438         a.as_i8x16(),
439         <const IMM8: i32> [
440             mask(IMM8, 0),
441             mask(IMM8, 1),
442             mask(IMM8, 2),
443             mask(IMM8, 3),
444             mask(IMM8, 4),
445             mask(IMM8, 5),
446             mask(IMM8, 6),
447             mask(IMM8, 7),
448             mask(IMM8, 8),
449             mask(IMM8, 9),
450             mask(IMM8, 10),
451             mask(IMM8, 11),
452             mask(IMM8, 12),
453             mask(IMM8, 13),
454             mask(IMM8, 14),
455             mask(IMM8, 15),
456         ],
457     ))
458 }
459 
460 /// Shifts `a` left by `IMM8` bytes while shifting in zeros.
461 ///
462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
463 #[inline]
464 #[target_feature(enable = "sse2")]
465 #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))]
466 #[rustc_legacy_const_generics(1)]
467 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i468 pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
469     static_assert_imm8!(IMM8);
470     _mm_slli_si128_impl::<IMM8>(a)
471 }
472 
473 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
474 ///
475 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
476 #[inline]
477 #[target_feature(enable = "sse2")]
478 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
479 #[rustc_legacy_const_generics(1)]
480 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i481 pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
482     static_assert_imm8!(IMM8);
483     _mm_srli_si128_impl::<IMM8>(a)
484 }
485 
486 /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros.
487 ///
488 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
489 #[inline]
490 #[target_feature(enable = "sse2")]
491 #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))]
492 #[rustc_legacy_const_generics(1)]
493 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i494 pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
495     static_assert_imm8!(IMM8);
496     transmute(pslliw(a.as_i16x8(), IMM8))
497 }
498 
499 /// Shifts packed 16-bit integers in `a` left by `count` while shifting in
500 /// zeros.
501 ///
502 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
503 #[inline]
504 #[target_feature(enable = "sse2")]
505 #[cfg_attr(test, assert_instr(psllw))]
506 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i507 pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
508     transmute(psllw(a.as_i16x8(), count.as_i16x8()))
509 }
510 
511 /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros.
512 ///
513 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
514 #[inline]
515 #[target_feature(enable = "sse2")]
516 #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))]
517 #[rustc_legacy_const_generics(1)]
518 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i519 pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
520     static_assert_imm8!(IMM8);
521     transmute(psllid(a.as_i32x4(), IMM8))
522 }
523 
524 /// Shifts packed 32-bit integers in `a` left by `count` while shifting in
525 /// zeros.
526 ///
527 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
528 #[inline]
529 #[target_feature(enable = "sse2")]
530 #[cfg_attr(test, assert_instr(pslld))]
531 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i532 pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
533     transmute(pslld(a.as_i32x4(), count.as_i32x4()))
534 }
535 
536 /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros.
537 ///
538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
539 #[inline]
540 #[target_feature(enable = "sse2")]
541 #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))]
542 #[rustc_legacy_const_generics(1)]
543 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i544 pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
545     static_assert_imm8!(IMM8);
546     transmute(pslliq(a.as_i64x2(), IMM8))
547 }
548 
549 /// Shifts packed 64-bit integers in `a` left by `count` while shifting in
550 /// zeros.
551 ///
552 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
553 #[inline]
554 #[target_feature(enable = "sse2")]
555 #[cfg_attr(test, assert_instr(psllq))]
556 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i557 pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
558     transmute(psllq(a.as_i64x2(), count.as_i64x2()))
559 }
560 
561 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign
562 /// bits.
563 ///
564 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
565 #[inline]
566 #[target_feature(enable = "sse2")]
567 #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))]
568 #[rustc_legacy_const_generics(1)]
569 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i570 pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
571     static_assert_imm8!(IMM8);
572     transmute(psraiw(a.as_i16x8(), IMM8))
573 }
574 
575 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
576 /// bits.
577 ///
578 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
579 #[inline]
580 #[target_feature(enable = "sse2")]
581 #[cfg_attr(test, assert_instr(psraw))]
582 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i583 pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
584     transmute(psraw(a.as_i16x8(), count.as_i16x8()))
585 }
586 
587 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign
588 /// bits.
589 ///
590 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
591 #[inline]
592 #[target_feature(enable = "sse2")]
593 #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))]
594 #[rustc_legacy_const_generics(1)]
595 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i596 pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
597     static_assert_imm8!(IMM8);
598     transmute(psraid(a.as_i32x4(), IMM8))
599 }
600 
601 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
602 /// bits.
603 ///
604 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
605 #[inline]
606 #[target_feature(enable = "sse2")]
607 #[cfg_attr(test, assert_instr(psrad))]
608 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i609 pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
610     transmute(psrad(a.as_i32x4(), count.as_i32x4()))
611 }
612 
613 /// Shifts `a` right by `IMM8` bytes while shifting in zeros.
614 ///
615 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
616 #[inline]
617 #[target_feature(enable = "sse2")]
618 #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))]
619 #[rustc_legacy_const_generics(1)]
620 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i621 pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i {
622     static_assert_imm8!(IMM8);
623     _mm_srli_si128_impl::<IMM8>(a)
624 }
625 
626 /// Implementation detail: converts the immediate argument of the
627 /// `_mm_srli_si128` intrinsic into a compile-time constant.
628 #[inline]
629 #[target_feature(enable = "sse2")]
_mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i630 unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i {
631     const fn mask(shift: i32, i: u32) -> u32 {
632         if (shift as u32) > 15 {
633             i + 16
634         } else {
635             i + (shift as u32)
636         }
637     }
638     let zero = _mm_set1_epi8(0).as_i8x16();
639     let x: i8x16 = simd_shuffle16!(
640         a.as_i8x16(),
641         zero,
642         <const IMM8: i32> [
643             mask(IMM8, 0),
644             mask(IMM8, 1),
645             mask(IMM8, 2),
646             mask(IMM8, 3),
647             mask(IMM8, 4),
648             mask(IMM8, 5),
649             mask(IMM8, 6),
650             mask(IMM8, 7),
651             mask(IMM8, 8),
652             mask(IMM8, 9),
653             mask(IMM8, 10),
654             mask(IMM8, 11),
655             mask(IMM8, 12),
656             mask(IMM8, 13),
657             mask(IMM8, 14),
658             mask(IMM8, 15),
659         ],
660     );
661     transmute(x)
662 }
663 
664 /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in
665 /// zeros.
666 ///
667 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
668 #[inline]
669 #[target_feature(enable = "sse2")]
670 #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))]
671 #[rustc_legacy_const_generics(1)]
672 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i673 pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
674     static_assert_imm8!(IMM8);
675     transmute(psrliw(a.as_i16x8(), IMM8))
676 }
677 
678 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
679 /// zeros.
680 ///
681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
682 #[inline]
683 #[target_feature(enable = "sse2")]
684 #[cfg_attr(test, assert_instr(psrlw))]
685 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i686 pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
687     transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
688 }
689 
690 /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in
691 /// zeros.
692 ///
693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
694 #[inline]
695 #[target_feature(enable = "sse2")]
696 #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))]
697 #[rustc_legacy_const_generics(1)]
698 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i699 pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
700     static_assert_imm8!(IMM8);
701     transmute(psrlid(a.as_i32x4(), IMM8))
702 }
703 
704 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
705 /// zeros.
706 ///
707 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
708 #[inline]
709 #[target_feature(enable = "sse2")]
710 #[cfg_attr(test, assert_instr(psrld))]
711 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i712 pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
713     transmute(psrld(a.as_i32x4(), count.as_i32x4()))
714 }
715 
716 /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in
717 /// zeros.
718 ///
719 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
720 #[inline]
721 #[target_feature(enable = "sse2")]
722 #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))]
723 #[rustc_legacy_const_generics(1)]
724 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i725 pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
726     static_assert_imm8!(IMM8);
727     transmute(psrliq(a.as_i64x2(), IMM8))
728 }
729 
730 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
731 /// zeros.
732 ///
733 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
734 #[inline]
735 #[target_feature(enable = "sse2")]
736 #[cfg_attr(test, assert_instr(psrlq))]
737 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i738 pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
739     transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
740 }
741 
742 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
743 /// `b`.
744 ///
745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
746 #[inline]
747 #[target_feature(enable = "sse2")]
748 #[cfg_attr(test, assert_instr(andps))]
749 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_and_si128(a: __m128i, b: __m128i) -> __m128i750 pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
751     simd_and(a, b)
752 }
753 
754 /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
755 /// then AND with `b`.
756 ///
757 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
758 #[inline]
759 #[target_feature(enable = "sse2")]
760 #[cfg_attr(test, assert_instr(andnps))]
761 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i762 pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
763     simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
764 }
765 
766 /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
767 /// `b`.
768 ///
769 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
770 #[inline]
771 #[target_feature(enable = "sse2")]
772 #[cfg_attr(test, assert_instr(orps))]
773 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_or_si128(a: __m128i, b: __m128i) -> __m128i774 pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
775     simd_or(a, b)
776 }
777 
778 /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
779 /// `b`.
780 ///
781 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
782 #[inline]
783 #[target_feature(enable = "sse2")]
784 #[cfg_attr(test, assert_instr(xorps))]
785 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_xor_si128(a: __m128i, b: __m128i) -> __m128i786 pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
787     simd_xor(a, b)
788 }
789 
790 /// Compares packed 8-bit integers in `a` and `b` for equality.
791 ///
792 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
793 #[inline]
794 #[target_feature(enable = "sse2")]
795 #[cfg_attr(test, assert_instr(pcmpeqb))]
796 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i797 pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
798     transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
799 }
800 
801 /// Compares packed 16-bit integers in `a` and `b` for equality.
802 ///
803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
804 #[inline]
805 #[target_feature(enable = "sse2")]
806 #[cfg_attr(test, assert_instr(pcmpeqw))]
807 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i808 pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
809     transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
810 }
811 
812 /// Compares packed 32-bit integers in `a` and `b` for equality.
813 ///
814 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
815 #[inline]
816 #[target_feature(enable = "sse2")]
817 #[cfg_attr(test, assert_instr(pcmpeqd))]
818 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i819 pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
820     transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
821 }
822 
823 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
824 ///
825 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
826 #[inline]
827 #[target_feature(enable = "sse2")]
828 #[cfg_attr(test, assert_instr(pcmpgtb))]
829 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i830 pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
831     transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
832 }
833 
834 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
835 ///
836 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
837 #[inline]
838 #[target_feature(enable = "sse2")]
839 #[cfg_attr(test, assert_instr(pcmpgtw))]
840 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i841 pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
842     transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
843 }
844 
845 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
846 ///
847 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
848 #[inline]
849 #[target_feature(enable = "sse2")]
850 #[cfg_attr(test, assert_instr(pcmpgtd))]
851 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i852 pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
853     transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
854 }
855 
856 /// Compares packed 8-bit integers in `a` and `b` for less-than.
857 ///
858 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
859 #[inline]
860 #[target_feature(enable = "sse2")]
861 #[cfg_attr(test, assert_instr(pcmpgtb))]
862 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i863 pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
864     transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
865 }
866 
867 /// Compares packed 16-bit integers in `a` and `b` for less-than.
868 ///
869 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
870 #[inline]
871 #[target_feature(enable = "sse2")]
872 #[cfg_attr(test, assert_instr(pcmpgtw))]
873 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i874 pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
875     transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
876 }
877 
878 /// Compares packed 32-bit integers in `a` and `b` for less-than.
879 ///
880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
881 #[inline]
882 #[target_feature(enable = "sse2")]
883 #[cfg_attr(test, assert_instr(pcmpgtd))]
884 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i885 pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
886     transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
887 }
888 
889 /// Converts the lower two packed 32-bit integers in `a` to packed
890 /// double-precision (64-bit) floating-point elements.
891 ///
892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
893 #[inline]
894 #[target_feature(enable = "sse2")]
895 #[cfg_attr(test, assert_instr(cvtdq2pd))]
896 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi32_pd(a: __m128i) -> __m128d897 pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
898     let a = a.as_i32x4();
899     simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1]))
900 }
901 
902 /// Returns `a` with its lower element replaced by `b` after converting it to
903 /// an `f64`.
904 ///
905 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
906 #[inline]
907 #[target_feature(enable = "sse2")]
908 #[cfg_attr(test, assert_instr(cvtsi2sd))]
909 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d910 pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
911     simd_insert(a, 0, b as f64)
912 }
913 
914 /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
915 /// floating-point elements.
916 ///
917 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
918 #[inline]
919 #[target_feature(enable = "sse2")]
920 #[cfg_attr(test, assert_instr(cvtdq2ps))]
921 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtepi32_ps(a: __m128i) -> __m128922 pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
923     cvtdq2ps(a.as_i32x4())
924 }
925 
926 /// Converts packed single-precision (32-bit) floating-point elements in `a`
927 /// to packed 32-bit integers.
928 ///
929 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
930 #[inline]
931 #[target_feature(enable = "sse2")]
932 #[cfg_attr(test, assert_instr(cvtps2dq))]
933 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtps_epi32(a: __m128) -> __m128i934 pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
935     transmute(cvtps2dq(a))
936 }
937 
938 /// Returns a vector whose lowest element is `a` and all higher elements are
939 /// `0`.
940 ///
941 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
942 #[inline]
943 #[target_feature(enable = "sse2")]
944 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
945 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsi32_si128(a: i32) -> __m128i946 pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
947     transmute(i32x4::new(a, 0, 0, 0))
948 }
949 
950 /// Returns the lowest element of `a`.
951 ///
952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
953 #[inline]
954 #[target_feature(enable = "sse2")]
955 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
956 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsi128_si32(a: __m128i) -> i32957 pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
958     simd_extract(a.as_i32x4(), 0)
959 }
960 
961 /// Sets packed 64-bit integers with the supplied values, from highest to
962 /// lowest.
963 ///
964 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
965 #[inline]
966 #[target_feature(enable = "sse2")]
967 // no particular instruction to test
968 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_epi64x(e1: i64, e0: i64) -> __m128i969 pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
970     transmute(i64x2::new(e0, e1))
971 }
972 
973 /// Sets packed 32-bit integers with the supplied values.
974 ///
975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
976 #[inline]
977 #[target_feature(enable = "sse2")]
978 // no particular instruction to test
979 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i980 pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
981     transmute(i32x4::new(e0, e1, e2, e3))
982 }
983 
984 /// Sets packed 16-bit integers with the supplied values.
985 ///
986 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
987 #[inline]
988 #[target_feature(enable = "sse2")]
989 // no particular instruction to test
990 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16, ) -> __m128i991 pub unsafe fn _mm_set_epi16(
992     e7: i16,
993     e6: i16,
994     e5: i16,
995     e4: i16,
996     e3: i16,
997     e2: i16,
998     e1: i16,
999     e0: i16,
1000 ) -> __m128i {
1001     transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
1002 }
1003 
1004 /// Sets packed 8-bit integers with the supplied values.
1005 ///
1006 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
1007 #[inline]
1008 #[target_feature(enable = "sse2")]
1009 // no particular instruction to test
1010 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, ) -> __m128i1011 pub unsafe fn _mm_set_epi8(
1012     e15: i8,
1013     e14: i8,
1014     e13: i8,
1015     e12: i8,
1016     e11: i8,
1017     e10: i8,
1018     e9: i8,
1019     e8: i8,
1020     e7: i8,
1021     e6: i8,
1022     e5: i8,
1023     e4: i8,
1024     e3: i8,
1025     e2: i8,
1026     e1: i8,
1027     e0: i8,
1028 ) -> __m128i {
1029     #[rustfmt::skip]
1030     transmute(i8x16::new(
1031         e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1032     ))
1033 }
1034 
1035 /// Broadcasts 64-bit integer `a` to all elements.
1036 ///
1037 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
1038 #[inline]
1039 #[target_feature(enable = "sse2")]
1040 // no particular instruction to test
1041 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set1_epi64x(a: i64) -> __m128i1042 pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1043     _mm_set_epi64x(a, a)
1044 }
1045 
1046 /// Broadcasts 32-bit integer `a` to all elements.
1047 ///
1048 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
1049 #[inline]
1050 #[target_feature(enable = "sse2")]
1051 // no particular instruction to test
1052 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set1_epi32(a: i32) -> __m128i1053 pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1054     _mm_set_epi32(a, a, a, a)
1055 }
1056 
1057 /// Broadcasts 16-bit integer `a` to all elements.
1058 ///
1059 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
1060 #[inline]
1061 #[target_feature(enable = "sse2")]
1062 // no particular instruction to test
1063 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set1_epi16(a: i16) -> __m128i1064 pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1065     _mm_set_epi16(a, a, a, a, a, a, a, a)
1066 }
1067 
1068 /// Broadcasts 8-bit integer `a` to all elements.
1069 ///
1070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
1071 #[inline]
1072 #[target_feature(enable = "sse2")]
1073 // no particular instruction to test
1074 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set1_epi8(a: i8) -> __m128i1075 pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1076     _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1077 }
1078 
1079 /// Sets packed 32-bit integers with the supplied values in reverse order.
1080 ///
1081 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
1082 #[inline]
1083 #[target_feature(enable = "sse2")]
1084 // no particular instruction to test
1085 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i1086 pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1087     _mm_set_epi32(e0, e1, e2, e3)
1088 }
1089 
1090 /// Sets packed 16-bit integers with the supplied values in reverse order.
1091 ///
1092 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
1093 #[inline]
1094 #[target_feature(enable = "sse2")]
1095 // no particular instruction to test
1096 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setr_epi16( e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16, ) -> __m128i1097 pub unsafe fn _mm_setr_epi16(
1098     e7: i16,
1099     e6: i16,
1100     e5: i16,
1101     e4: i16,
1102     e3: i16,
1103     e2: i16,
1104     e1: i16,
1105     e0: i16,
1106 ) -> __m128i {
1107     _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1108 }
1109 
1110 /// Sets packed 8-bit integers with the supplied values in reverse order.
1111 ///
1112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
1113 #[inline]
1114 #[target_feature(enable = "sse2")]
1115 // no particular instruction to test
1116 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setr_epi8( e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8, ) -> __m128i1117 pub unsafe fn _mm_setr_epi8(
1118     e15: i8,
1119     e14: i8,
1120     e13: i8,
1121     e12: i8,
1122     e11: i8,
1123     e10: i8,
1124     e9: i8,
1125     e8: i8,
1126     e7: i8,
1127     e6: i8,
1128     e5: i8,
1129     e4: i8,
1130     e3: i8,
1131     e2: i8,
1132     e1: i8,
1133     e0: i8,
1134 ) -> __m128i {
1135     #[rustfmt::skip]
1136     _mm_set_epi8(
1137         e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1138     )
1139 }
1140 
1141 /// Returns a vector with all elements set to zero.
1142 ///
1143 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
1144 #[inline]
1145 #[target_feature(enable = "sse2")]
1146 #[cfg_attr(test, assert_instr(xorps))]
1147 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setzero_si128() -> __m128i1148 pub unsafe fn _mm_setzero_si128() -> __m128i {
1149     _mm_set1_epi64x(0)
1150 }
1151 
1152 /// Loads 64-bit integer from memory into first element of returned vector.
1153 ///
1154 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
1155 #[inline]
1156 #[target_feature(enable = "sse2")]
1157 // FIXME movsd on windows
1158 #[cfg_attr(
1159     all(
1160         test,
1161         not(windows),
1162         not(all(target_os = "linux", target_arch = "x86_64")),
1163         target_arch = "x86_64"
1164     ),
1165     assert_instr(movq)
1166 )]
1167 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i1168 pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1169     _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1170 }
1171 
1172 /// Loads 128-bits of integer data from memory into a new vector.
1173 ///
1174 /// `mem_addr` must be aligned on a 16-byte boundary.
1175 ///
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
1177 #[inline]
1178 #[target_feature(enable = "sse2")]
1179 #[cfg_attr(test, assert_instr(movaps))]
1180 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_load_si128(mem_addr: *const __m128i) -> __m128i1181 pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1182     *mem_addr
1183 }
1184 
1185 /// Loads 128-bits of integer data from memory into a new vector.
1186 ///
1187 /// `mem_addr` does not need to be aligned on any particular boundary.
1188 ///
1189 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
1190 #[inline]
1191 #[target_feature(enable = "sse2")]
1192 #[cfg_attr(test, assert_instr(movups))]
1193 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadu_si128(mem_addr: *const __m128i) -> __m128i1194 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1195     let mut dst: __m128i = _mm_undefined_si128();
1196     ptr::copy_nonoverlapping(
1197         mem_addr as *const u8,
1198         &mut dst as *mut __m128i as *mut u8,
1199         mem::size_of::<__m128i>(),
1200     );
1201     dst
1202 }
1203 
1204 /// Conditionally store 8-bit integer elements from `a` into memory using
1205 /// `mask`.
1206 ///
1207 /// Elements are not stored when the highest bit is not set in the
1208 /// corresponding element.
1209 ///
1210 /// `mem_addr` should correspond to a 128-bit memory location and does not need
1211 /// to be aligned on any particular boundary.
1212 ///
1213 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
1214 #[inline]
1215 #[target_feature(enable = "sse2")]
1216 #[cfg_attr(test, assert_instr(maskmovdqu))]
1217 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8)1218 pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1219     maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1220 }
1221 
1222 /// Stores 128-bits of integer data from `a` into memory.
1223 ///
1224 /// `mem_addr` must be aligned on a 16-byte boundary.
1225 ///
1226 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
1227 #[inline]
1228 #[target_feature(enable = "sse2")]
1229 #[cfg_attr(test, assert_instr(movaps))]
1230 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_store_si128(mem_addr: *mut __m128i, a: __m128i)1231 pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1232     *mem_addr = a;
1233 }
1234 
1235 /// Stores 128-bits of integer data from `a` into memory.
1236 ///
1237 /// `mem_addr` does not need to be aligned on any particular boundary.
1238 ///
1239 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
1240 #[inline]
1241 #[target_feature(enable = "sse2")]
1242 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1243 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i)1244 pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1245     storeudq(mem_addr as *mut i8, a);
1246 }
1247 
1248 /// Stores the lower 64-bit integer `a` to a memory location.
1249 ///
1250 /// `mem_addr` does not need to be aligned on any particular boundary.
1251 ///
1252 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
1253 #[inline]
1254 #[target_feature(enable = "sse2")]
1255 // FIXME mov on windows, movlps on i686
1256 #[cfg_attr(
1257     all(
1258         test,
1259         not(windows),
1260         not(all(target_os = "linux", target_arch = "x86_64")),
1261         target_arch = "x86_64"
1262     ),
1263     assert_instr(movq)
1264 )]
1265 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i)1266 pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1267     ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8);
1268 }
1269 
1270 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1271 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1272 /// used again soon).
1273 ///
1274 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
1275 #[inline]
1276 #[target_feature(enable = "sse2")]
1277 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1278 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_stream_si128(mem_addr: *mut __m128i, a: __m128i)1279 pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1280     intrinsics::nontemporal_store(mem_addr, a);
1281 }
1282 
1283 /// Stores a 32-bit integer value in the specified memory location.
1284 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1285 /// used again soon).
1286 ///
1287 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
1288 #[inline]
1289 #[target_feature(enable = "sse2")]
1290 #[cfg_attr(test, assert_instr(movnti))]
1291 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_stream_si32(mem_addr: *mut i32, a: i32)1292 pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1293     intrinsics::nontemporal_store(mem_addr, a);
1294 }
1295 
1296 /// Returns a vector where the low element is extracted from `a` and its upper
1297 /// element is zero.
1298 ///
1299 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
1300 #[inline]
1301 #[target_feature(enable = "sse2")]
1302 // FIXME movd on windows, movd on i686
1303 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1304 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_move_epi64(a: __m128i) -> __m128i1305 pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1306     let zero = _mm_setzero_si128();
1307     let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
1308     transmute(r)
1309 }
1310 
1311 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1312 /// using signed saturation.
1313 ///
1314 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
1315 #[inline]
1316 #[target_feature(enable = "sse2")]
1317 #[cfg_attr(test, assert_instr(packsswb))]
1318 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i1319 pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1320     transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
1321 }
1322 
1323 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1324 /// using signed saturation.
1325 ///
1326 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
1327 #[inline]
1328 #[target_feature(enable = "sse2")]
1329 #[cfg_attr(test, assert_instr(packssdw))]
1330 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i1331 pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1332     transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
1333 }
1334 
1335 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1336 /// using unsigned saturation.
1337 ///
1338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
1339 #[inline]
1340 #[target_feature(enable = "sse2")]
1341 #[cfg_attr(test, assert_instr(packuswb))]
1342 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i1343 pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1344     transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
1345 }
1346 
1347 /// Returns the `imm8` element of `a`.
1348 ///
1349 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
1350 #[inline]
1351 #[target_feature(enable = "sse2")]
1352 #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))]
1353 #[rustc_legacy_const_generics(1)]
1354 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i321355 pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 {
1356     static_assert_imm3!(IMM8);
1357     simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32
1358 }
1359 
1360 /// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1361 ///
1362 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
1363 #[inline]
1364 #[target_feature(enable = "sse2")]
1365 #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))]
1366 #[rustc_legacy_const_generics(2)]
1367 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i1368 pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
1369     static_assert_imm3!(IMM8);
1370     transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16))
1371 }
1372 
1373 /// Returns a mask of the most significant bit of each element in `a`.
1374 ///
1375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
1376 #[inline]
1377 #[target_feature(enable = "sse2")]
1378 #[cfg_attr(test, assert_instr(pmovmskb))]
1379 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_movemask_epi8(a: __m128i) -> i321380 pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1381     pmovmskb(a.as_i8x16())
1382 }
1383 
1384 /// Shuffles 32-bit integers in `a` using the control in `IMM8`.
1385 ///
1386 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
1387 #[inline]
1388 #[target_feature(enable = "sse2")]
1389 #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))]
1390 #[rustc_legacy_const_generics(1)]
1391 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i1392 pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
1393     static_assert_imm8!(IMM8);
1394     let a = a.as_i32x4();
1395     let x: i32x4 = simd_shuffle4!(
1396         a,
1397         a,
1398         <const IMM8: i32> [
1399             IMM8 as u32 & 0b11,
1400             (IMM8 as u32 >> 2) & 0b11,
1401             (IMM8 as u32 >> 4) & 0b11,
1402             (IMM8 as u32 >> 6) & 0b11,
1403         ],
1404     );
1405     transmute(x)
1406 }
1407 
1408 /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1409 /// `IMM8`.
1410 ///
1411 /// Put the results in the high 64 bits of the returned vector, with the low 64
1412 /// bits being copied from from `a`.
1413 ///
1414 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
1415 #[inline]
1416 #[target_feature(enable = "sse2")]
1417 #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))]
1418 #[rustc_legacy_const_generics(1)]
1419 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i1420 pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1421     static_assert_imm8!(IMM8);
1422     let a = a.as_i16x8();
1423     let x: i16x8 = simd_shuffle8!(
1424         a,
1425         a,
1426         <const IMM8: i32> [
1427             0,
1428             1,
1429             2,
1430             3,
1431             (IMM8 as u32 & 0b11) + 4,
1432             ((IMM8 as u32 >> 2) & 0b11) + 4,
1433             ((IMM8 as u32 >> 4) & 0b11) + 4,
1434             ((IMM8 as u32 >> 6) & 0b11) + 4,
1435         ],
1436     );
1437     transmute(x)
1438 }
1439 
1440 /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1441 /// `IMM8`.
1442 ///
1443 /// Put the results in the low 64 bits of the returned vector, with the high 64
1444 /// bits being copied from from `a`.
1445 ///
1446 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
1447 #[inline]
1448 #[target_feature(enable = "sse2")]
1449 #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))]
1450 #[rustc_legacy_const_generics(1)]
1451 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i1452 pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
1453     static_assert_imm8!(IMM8);
1454     let a = a.as_i16x8();
1455     let x: i16x8 = simd_shuffle8!(
1456         a,
1457         a,
1458         <const IMM8: i32> [
1459             IMM8 as u32 & 0b11,
1460             (IMM8 as u32 >> 2) & 0b11,
1461             (IMM8 as u32 >> 4) & 0b11,
1462             (IMM8 as u32 >> 6) & 0b11,
1463             4,
1464             5,
1465             6,
1466             7,
1467         ],
1468     );
1469     transmute(x)
1470 }
1471 
1472 /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1473 ///
1474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
1475 #[inline]
1476 #[target_feature(enable = "sse2")]
1477 #[cfg_attr(test, assert_instr(punpckhbw))]
1478 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i1479 pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1480     transmute::<i8x16, _>(simd_shuffle16!(
1481         a.as_i8x16(),
1482         b.as_i8x16(),
1483         [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1484     ))
1485 }
1486 
1487 /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1488 ///
1489 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
1490 #[inline]
1491 #[target_feature(enable = "sse2")]
1492 #[cfg_attr(test, assert_instr(punpckhwd))]
1493 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i1494 pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1495     let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1496     transmute::<i16x8, _>(x)
1497 }
1498 
1499 /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1500 ///
1501 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
1502 #[inline]
1503 #[target_feature(enable = "sse2")]
1504 #[cfg_attr(test, assert_instr(unpckhps))]
1505 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i1506 pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1507     transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1508 }
1509 
1510 /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1511 ///
1512 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
1513 #[inline]
1514 #[target_feature(enable = "sse2")]
1515 #[cfg_attr(test, assert_instr(unpckhpd))]
1516 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i1517 pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1518     transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1519 }
1520 
1521 /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1522 ///
1523 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
1524 #[inline]
1525 #[target_feature(enable = "sse2")]
1526 #[cfg_attr(test, assert_instr(punpcklbw))]
1527 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i1528 pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1529     transmute::<i8x16, _>(simd_shuffle16!(
1530         a.as_i8x16(),
1531         b.as_i8x16(),
1532         [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1533     ))
1534 }
1535 
1536 /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1537 ///
1538 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
1539 #[inline]
1540 #[target_feature(enable = "sse2")]
1541 #[cfg_attr(test, assert_instr(punpcklwd))]
1542 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i1543 pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1544     let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1545     transmute::<i16x8, _>(x)
1546 }
1547 
1548 /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1549 ///
1550 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
1551 #[inline]
1552 #[target_feature(enable = "sse2")]
1553 #[cfg_attr(test, assert_instr(unpcklps))]
1554 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i1555 pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1556     transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1557 }
1558 
1559 /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1560 ///
1561 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
1562 #[inline]
1563 #[target_feature(enable = "sse2")]
1564 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1565 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i1566 pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1567     transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1568 }
1569 
1570 /// Returns a new vector with the low element of `a` replaced by the sum of the
1571 /// low elements of `a` and `b`.
1572 ///
1573 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
1574 #[inline]
1575 #[target_feature(enable = "sse2")]
1576 #[cfg_attr(test, assert_instr(addsd))]
1577 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_sd(a: __m128d, b: __m128d) -> __m128d1578 pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1579     simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1580 }
1581 
1582 /// Adds packed double-precision (64-bit) floating-point elements in `a` and
1583 /// `b`.
1584 ///
1585 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
1586 #[inline]
1587 #[target_feature(enable = "sse2")]
1588 #[cfg_attr(test, assert_instr(addpd))]
1589 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_add_pd(a: __m128d, b: __m128d) -> __m128d1590 pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1591     simd_add(a, b)
1592 }
1593 
1594 /// Returns a new vector with the low element of `a` replaced by the result of
1595 /// diving the lower element of `a` by the lower element of `b`.
1596 ///
1597 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
1598 #[inline]
1599 #[target_feature(enable = "sse2")]
1600 #[cfg_attr(test, assert_instr(divsd))]
1601 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_div_sd(a: __m128d, b: __m128d) -> __m128d1602 pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1603     simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1604 }
1605 
1606 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
1607 /// packed elements in `b`.
1608 ///
1609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
1610 #[inline]
1611 #[target_feature(enable = "sse2")]
1612 #[cfg_attr(test, assert_instr(divpd))]
1613 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_div_pd(a: __m128d, b: __m128d) -> __m128d1614 pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1615     simd_div(a, b)
1616 }
1617 
1618 /// Returns a new vector with the low element of `a` replaced by the maximum
1619 /// of the lower elements of `a` and `b`.
1620 ///
1621 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
1622 #[inline]
1623 #[target_feature(enable = "sse2")]
1624 #[cfg_attr(test, assert_instr(maxsd))]
1625 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_sd(a: __m128d, b: __m128d) -> __m128d1626 pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1627     maxsd(a, b)
1628 }
1629 
1630 /// Returns a new vector with the maximum values from corresponding elements in
1631 /// `a` and `b`.
1632 ///
1633 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
1634 #[inline]
1635 #[target_feature(enable = "sse2")]
1636 #[cfg_attr(test, assert_instr(maxpd))]
1637 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_max_pd(a: __m128d, b: __m128d) -> __m128d1638 pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1639     maxpd(a, b)
1640 }
1641 
1642 /// Returns a new vector with the low element of `a` replaced by the minimum
1643 /// of the lower elements of `a` and `b`.
1644 ///
1645 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
1646 #[inline]
1647 #[target_feature(enable = "sse2")]
1648 #[cfg_attr(test, assert_instr(minsd))]
1649 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_sd(a: __m128d, b: __m128d) -> __m128d1650 pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1651     minsd(a, b)
1652 }
1653 
1654 /// Returns a new vector with the minimum values from corresponding elements in
1655 /// `a` and `b`.
1656 ///
1657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
1658 #[inline]
1659 #[target_feature(enable = "sse2")]
1660 #[cfg_attr(test, assert_instr(minpd))]
1661 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_min_pd(a: __m128d, b: __m128d) -> __m128d1662 pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1663     minpd(a, b)
1664 }
1665 
1666 /// Returns a new vector with the low element of `a` replaced by multiplying the
1667 /// low elements of `a` and `b`.
1668 ///
1669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
1670 #[inline]
1671 #[target_feature(enable = "sse2")]
1672 #[cfg_attr(test, assert_instr(mulsd))]
1673 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mul_sd(a: __m128d, b: __m128d) -> __m128d1674 pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1675     simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1676 }
1677 
1678 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1679 /// and `b`.
1680 ///
1681 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
1682 #[inline]
1683 #[target_feature(enable = "sse2")]
1684 #[cfg_attr(test, assert_instr(mulpd))]
1685 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_mul_pd(a: __m128d, b: __m128d) -> __m128d1686 pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1687     simd_mul(a, b)
1688 }
1689 
1690 /// Returns a new vector with the low element of `a` replaced by the square
1691 /// root of the lower element `b`.
1692 ///
1693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
1694 #[inline]
1695 #[target_feature(enable = "sse2")]
1696 #[cfg_attr(test, assert_instr(sqrtsd))]
1697 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d1698 pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1699     simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
1700 }
1701 
1702 /// Returns a new vector with the square root of each of the values in `a`.
1703 ///
1704 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
1705 #[inline]
1706 #[target_feature(enable = "sse2")]
1707 #[cfg_attr(test, assert_instr(sqrtpd))]
1708 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sqrt_pd(a: __m128d) -> __m128d1709 pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1710     simd_fsqrt(a)
1711 }
1712 
1713 /// Returns a new vector with the low element of `a` replaced by subtracting the
1714 /// low element by `b` from the low element of `a`.
1715 ///
1716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
1717 #[inline]
1718 #[target_feature(enable = "sse2")]
1719 #[cfg_attr(test, assert_instr(subsd))]
1720 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_sd(a: __m128d, b: __m128d) -> __m128d1721 pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1722     simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1723 }
1724 
1725 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
1726 /// from `a`.
1727 ///
1728 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
1729 #[inline]
1730 #[target_feature(enable = "sse2")]
1731 #[cfg_attr(test, assert_instr(subpd))]
1732 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_sub_pd(a: __m128d, b: __m128d) -> __m128d1733 pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1734     simd_sub(a, b)
1735 }
1736 
1737 /// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1738 /// elements in `a` and `b`.
1739 ///
1740 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
1741 #[inline]
1742 #[target_feature(enable = "sse2")]
1743 #[cfg_attr(test, assert_instr(andps))]
1744 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_and_pd(a: __m128d, b: __m128d) -> __m128d1745 pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1746     let a: __m128i = transmute(a);
1747     let b: __m128i = transmute(b);
1748     transmute(_mm_and_si128(a, b))
1749 }
1750 
1751 /// Computes the bitwise NOT of `a` and then AND with `b`.
1752 ///
1753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
1754 #[inline]
1755 #[target_feature(enable = "sse2")]
1756 #[cfg_attr(test, assert_instr(andnps))]
1757 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d1758 pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1759     let a: __m128i = transmute(a);
1760     let b: __m128i = transmute(b);
1761     transmute(_mm_andnot_si128(a, b))
1762 }
1763 
1764 /// Computes the bitwise OR of `a` and `b`.
1765 ///
1766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
1767 #[inline]
1768 #[target_feature(enable = "sse2")]
1769 #[cfg_attr(test, assert_instr(orps))]
1770 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_or_pd(a: __m128d, b: __m128d) -> __m128d1771 pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1772     let a: __m128i = transmute(a);
1773     let b: __m128i = transmute(b);
1774     transmute(_mm_or_si128(a, b))
1775 }
1776 
1777 /// Computes the bitwise OR of `a` and `b`.
1778 ///
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
1780 #[inline]
1781 #[target_feature(enable = "sse2")]
1782 #[cfg_attr(test, assert_instr(xorps))]
1783 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_xor_pd(a: __m128d, b: __m128d) -> __m128d1784 pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1785     let a: __m128i = transmute(a);
1786     let b: __m128i = transmute(b);
1787     transmute(_mm_xor_si128(a, b))
1788 }
1789 
1790 /// Returns a new vector with the low element of `a` replaced by the equality
1791 /// comparison of the lower elements of `a` and `b`.
1792 ///
1793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
1794 #[inline]
1795 #[target_feature(enable = "sse2")]
1796 #[cfg_attr(test, assert_instr(cmpeqsd))]
1797 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d1798 pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1799     cmpsd(a, b, 0)
1800 }
1801 
1802 /// Returns a new vector with the low element of `a` replaced by the less-than
1803 /// comparison of the lower elements of `a` and `b`.
1804 ///
1805 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
1806 #[inline]
1807 #[target_feature(enable = "sse2")]
1808 #[cfg_attr(test, assert_instr(cmpltsd))]
1809 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d1810 pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1811     cmpsd(a, b, 1)
1812 }
1813 
1814 /// Returns a new vector with the low element of `a` replaced by the
1815 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
1816 ///
1817 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
1818 #[inline]
1819 #[target_feature(enable = "sse2")]
1820 #[cfg_attr(test, assert_instr(cmplesd))]
1821 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d1822 pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1823     cmpsd(a, b, 2)
1824 }
1825 
1826 /// Returns a new vector with the low element of `a` replaced by the
1827 /// greater-than comparison of the lower elements of `a` and `b`.
1828 ///
1829 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
1830 #[inline]
1831 #[target_feature(enable = "sse2")]
1832 #[cfg_attr(test, assert_instr(cmpltsd))]
1833 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d1834 pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
1835     simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1836 }
1837 
1838 /// Returns a new vector with the low element of `a` replaced by the
1839 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
1840 ///
1841 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
1842 #[inline]
1843 #[target_feature(enable = "sse2")]
1844 #[cfg_attr(test, assert_instr(cmplesd))]
1845 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d1846 pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
1847     simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1848 }
1849 
1850 /// Returns a new vector with the low element of `a` replaced by the result
1851 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
1852 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
1853 /// otherwise.
1854 ///
1855 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
1856 #[inline]
1857 #[target_feature(enable = "sse2")]
1858 #[cfg_attr(test, assert_instr(cmpordsd))]
1859 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d1860 pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
1861     cmpsd(a, b, 7)
1862 }
1863 
1864 /// Returns a new vector with the low element of `a` replaced by the result of
1865 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
1866 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
1867 ///
1868 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
1869 #[inline]
1870 #[target_feature(enable = "sse2")]
1871 #[cfg_attr(test, assert_instr(cmpunordsd))]
1872 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d1873 pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
1874     cmpsd(a, b, 3)
1875 }
1876 
1877 /// Returns a new vector with the low element of `a` replaced by the not-equal
1878 /// comparison of the lower elements of `a` and `b`.
1879 ///
1880 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
1881 #[inline]
1882 #[target_feature(enable = "sse2")]
1883 #[cfg_attr(test, assert_instr(cmpneqsd))]
1884 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d1885 pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
1886     cmpsd(a, b, 4)
1887 }
1888 
1889 /// Returns a new vector with the low element of `a` replaced by the
1890 /// not-less-than comparison of the lower elements of `a` and `b`.
1891 ///
1892 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
1893 #[inline]
1894 #[target_feature(enable = "sse2")]
1895 #[cfg_attr(test, assert_instr(cmpnltsd))]
1896 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d1897 pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
1898     cmpsd(a, b, 5)
1899 }
1900 
1901 /// Returns a new vector with the low element of `a` replaced by the
1902 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
1903 ///
1904 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
1905 #[inline]
1906 #[target_feature(enable = "sse2")]
1907 #[cfg_attr(test, assert_instr(cmpnlesd))]
1908 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d1909 pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
1910     cmpsd(a, b, 6)
1911 }
1912 
1913 /// Returns a new vector with the low element of `a` replaced by the
1914 /// not-greater-than comparison of the lower elements of `a` and `b`.
1915 ///
1916 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
1917 #[inline]
1918 #[target_feature(enable = "sse2")]
1919 #[cfg_attr(test, assert_instr(cmpnltsd))]
1920 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d1921 pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
1922     simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1923 }
1924 
1925 /// Returns a new vector with the low element of `a` replaced by the
1926 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
1927 ///
1928 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
1929 #[inline]
1930 #[target_feature(enable = "sse2")]
1931 #[cfg_attr(test, assert_instr(cmpnlesd))]
1932 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d1933 pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
1934     simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
1935 }
1936 
1937 /// Compares corresponding elements in `a` and `b` for equality.
1938 ///
1939 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
1940 #[inline]
1941 #[target_feature(enable = "sse2")]
1942 #[cfg_attr(test, assert_instr(cmpeqpd))]
1943 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d1944 pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
1945     cmppd(a, b, 0)
1946 }
1947 
1948 /// Compares corresponding elements in `a` and `b` for less-than.
1949 ///
1950 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
1951 #[inline]
1952 #[target_feature(enable = "sse2")]
1953 #[cfg_attr(test, assert_instr(cmpltpd))]
1954 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d1955 pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
1956     cmppd(a, b, 1)
1957 }
1958 
1959 /// Compares corresponding elements in `a` and `b` for less-than-or-equal
1960 ///
1961 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
1962 #[inline]
1963 #[target_feature(enable = "sse2")]
1964 #[cfg_attr(test, assert_instr(cmplepd))]
1965 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d1966 pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
1967     cmppd(a, b, 2)
1968 }
1969 
1970 /// Compares corresponding elements in `a` and `b` for greater-than.
1971 ///
1972 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
1973 #[inline]
1974 #[target_feature(enable = "sse2")]
1975 #[cfg_attr(test, assert_instr(cmpltpd))]
1976 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d1977 pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
1978     _mm_cmplt_pd(b, a)
1979 }
1980 
1981 /// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
1982 ///
1983 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
1984 #[inline]
1985 #[target_feature(enable = "sse2")]
1986 #[cfg_attr(test, assert_instr(cmplepd))]
1987 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d1988 pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
1989     _mm_cmple_pd(b, a)
1990 }
1991 
1992 /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
1993 ///
1994 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
1995 #[inline]
1996 #[target_feature(enable = "sse2")]
1997 #[cfg_attr(test, assert_instr(cmpordpd))]
1998 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d1999 pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2000     cmppd(a, b, 7)
2001 }
2002 
2003 /// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2004 ///
2005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
2006 #[inline]
2007 #[target_feature(enable = "sse2")]
2008 #[cfg_attr(test, assert_instr(cmpunordpd))]
2009 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d2010 pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2011     cmppd(a, b, 3)
2012 }
2013 
2014 /// Compares corresponding elements in `a` and `b` for not-equal.
2015 ///
2016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
2017 #[inline]
2018 #[target_feature(enable = "sse2")]
2019 #[cfg_attr(test, assert_instr(cmpneqpd))]
2020 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d2021 pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2022     cmppd(a, b, 4)
2023 }
2024 
2025 /// Compares corresponding elements in `a` and `b` for not-less-than.
2026 ///
2027 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
2028 #[inline]
2029 #[target_feature(enable = "sse2")]
2030 #[cfg_attr(test, assert_instr(cmpnltpd))]
2031 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d2032 pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2033     cmppd(a, b, 5)
2034 }
2035 
2036 /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2037 ///
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
2039 #[inline]
2040 #[target_feature(enable = "sse2")]
2041 #[cfg_attr(test, assert_instr(cmpnlepd))]
2042 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d2043 pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2044     cmppd(a, b, 6)
2045 }
2046 
2047 /// Compares corresponding elements in `a` and `b` for not-greater-than.
2048 ///
2049 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
2050 #[inline]
2051 #[target_feature(enable = "sse2")]
2052 #[cfg_attr(test, assert_instr(cmpnltpd))]
2053 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d2054 pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2055     _mm_cmpnlt_pd(b, a)
2056 }
2057 
2058 /// Compares corresponding elements in `a` and `b` for
2059 /// not-greater-than-or-equal.
2060 ///
2061 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
2062 #[inline]
2063 #[target_feature(enable = "sse2")]
2064 #[cfg_attr(test, assert_instr(cmpnlepd))]
2065 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d2066 pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2067     _mm_cmpnle_pd(b, a)
2068 }
2069 
2070 /// Compares the lower element of `a` and `b` for equality.
2071 ///
2072 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
2073 #[inline]
2074 #[target_feature(enable = "sse2")]
2075 #[cfg_attr(test, assert_instr(comisd))]
2076 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comieq_sd(a: __m128d, b: __m128d) -> i322077 pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2078     comieqsd(a, b)
2079 }
2080 
2081 /// Compares the lower element of `a` and `b` for less-than.
2082 ///
2083 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
2084 #[inline]
2085 #[target_feature(enable = "sse2")]
2086 #[cfg_attr(test, assert_instr(comisd))]
2087 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comilt_sd(a: __m128d, b: __m128d) -> i322088 pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2089     comiltsd(a, b)
2090 }
2091 
2092 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2093 ///
2094 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
2095 #[inline]
2096 #[target_feature(enable = "sse2")]
2097 #[cfg_attr(test, assert_instr(comisd))]
2098 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comile_sd(a: __m128d, b: __m128d) -> i322099 pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2100     comilesd(a, b)
2101 }
2102 
2103 /// Compares the lower element of `a` and `b` for greater-than.
2104 ///
2105 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
2106 #[inline]
2107 #[target_feature(enable = "sse2")]
2108 #[cfg_attr(test, assert_instr(comisd))]
2109 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comigt_sd(a: __m128d, b: __m128d) -> i322110 pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2111     comigtsd(a, b)
2112 }
2113 
2114 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2115 ///
2116 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
2117 #[inline]
2118 #[target_feature(enable = "sse2")]
2119 #[cfg_attr(test, assert_instr(comisd))]
2120 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comige_sd(a: __m128d, b: __m128d) -> i322121 pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2122     comigesd(a, b)
2123 }
2124 
2125 /// Compares the lower element of `a` and `b` for not-equal.
2126 ///
2127 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
2128 #[inline]
2129 #[target_feature(enable = "sse2")]
2130 #[cfg_attr(test, assert_instr(comisd))]
2131 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_comineq_sd(a: __m128d, b: __m128d) -> i322132 pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2133     comineqsd(a, b)
2134 }
2135 
2136 /// Compares the lower element of `a` and `b` for equality.
2137 ///
2138 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
2139 #[inline]
2140 #[target_feature(enable = "sse2")]
2141 #[cfg_attr(test, assert_instr(ucomisd))]
2142 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomieq_sd(a: __m128d, b: __m128d) -> i322143 pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2144     ucomieqsd(a, b)
2145 }
2146 
2147 /// Compares the lower element of `a` and `b` for less-than.
2148 ///
2149 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
2150 #[inline]
2151 #[target_feature(enable = "sse2")]
2152 #[cfg_attr(test, assert_instr(ucomisd))]
2153 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomilt_sd(a: __m128d, b: __m128d) -> i322154 pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2155     ucomiltsd(a, b)
2156 }
2157 
2158 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2159 ///
2160 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
2161 #[inline]
2162 #[target_feature(enable = "sse2")]
2163 #[cfg_attr(test, assert_instr(ucomisd))]
2164 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomile_sd(a: __m128d, b: __m128d) -> i322165 pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2166     ucomilesd(a, b)
2167 }
2168 
2169 /// Compares the lower element of `a` and `b` for greater-than.
2170 ///
2171 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
2172 #[inline]
2173 #[target_feature(enable = "sse2")]
2174 #[cfg_attr(test, assert_instr(ucomisd))]
2175 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomigt_sd(a: __m128d, b: __m128d) -> i322176 pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2177     ucomigtsd(a, b)
2178 }
2179 
2180 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2181 ///
2182 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
2183 #[inline]
2184 #[target_feature(enable = "sse2")]
2185 #[cfg_attr(test, assert_instr(ucomisd))]
2186 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomige_sd(a: __m128d, b: __m128d) -> i322187 pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2188     ucomigesd(a, b)
2189 }
2190 
2191 /// Compares the lower element of `a` and `b` for not-equal.
2192 ///
2193 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
2194 #[inline]
2195 #[target_feature(enable = "sse2")]
2196 #[cfg_attr(test, assert_instr(ucomisd))]
2197 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_ucomineq_sd(a: __m128d, b: __m128d) -> i322198 pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2199     ucomineqsd(a, b)
2200 }
2201 
2202 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2203 /// packed single-precision (32-bit) floating-point elements
2204 ///
2205 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
2206 #[inline]
2207 #[target_feature(enable = "sse2")]
2208 #[cfg_attr(test, assert_instr(cvtpd2ps))]
2209 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtpd_ps(a: __m128d) -> __m1282210 pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2211     cvtpd2ps(a)
2212 }
2213 
2214 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2215 /// packed
2216 /// double-precision (64-bit) floating-point elements.
2217 ///
2218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
2219 #[inline]
2220 #[target_feature(enable = "sse2")]
2221 #[cfg_attr(test, assert_instr(cvtps2pd))]
2222 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtps_pd(a: __m128) -> __m128d2223 pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2224     cvtps2pd(a)
2225 }
2226 
2227 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2228 /// packed 32-bit integers.
2229 ///
2230 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
2231 #[inline]
2232 #[target_feature(enable = "sse2")]
2233 #[cfg_attr(test, assert_instr(cvtpd2dq))]
2234 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtpd_epi32(a: __m128d) -> __m128i2235 pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2236     transmute(cvtpd2dq(a))
2237 }
2238 
2239 /// Converts the lower double-precision (64-bit) floating-point element in a to
2240 /// a 32-bit integer.
2241 ///
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
2243 #[inline]
2244 #[target_feature(enable = "sse2")]
2245 #[cfg_attr(test, assert_instr(cvtsd2si))]
2246 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsd_si32(a: __m128d) -> i322247 pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2248     cvtsd2si(a)
2249 }
2250 
2251 /// Converts the lower double-precision (64-bit) floating-point element in `b`
2252 /// to a single-precision (32-bit) floating-point element, store the result in
2253 /// the lower element of the return value, and copies the upper element from `a`
2254 /// to the upper element the return value.
2255 ///
2256 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
2257 #[inline]
2258 #[target_feature(enable = "sse2")]
2259 #[cfg_attr(test, assert_instr(cvtsd2ss))]
2260 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsd_ss(a: __m128, b: __m128d) -> __m1282261 pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2262     cvtsd2ss(a, b)
2263 }
2264 
2265 /// Returns the lower double-precision (64-bit) floating-point element of `a`.
2266 ///
2267 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
2268 #[inline]
2269 #[target_feature(enable = "sse2")]
2270 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtsd_f64(a: __m128d) -> f642271 pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2272     simd_extract(a, 0)
2273 }
2274 
2275 /// Converts the lower single-precision (32-bit) floating-point element in `b`
2276 /// to a double-precision (64-bit) floating-point element, store the result in
2277 /// the lower element of the return value, and copies the upper element from `a`
2278 /// to the upper element the return value.
2279 ///
2280 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
2281 #[inline]
2282 #[target_feature(enable = "sse2")]
2283 #[cfg_attr(test, assert_instr(cvtss2sd))]
2284 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d2285 pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2286     cvtss2sd(a, b)
2287 }
2288 
2289 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2290 /// packed 32-bit integers with truncation.
2291 ///
2292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
2293 #[inline]
2294 #[target_feature(enable = "sse2")]
2295 #[cfg_attr(test, assert_instr(cvttpd2dq))]
2296 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvttpd_epi32(a: __m128d) -> __m128i2297 pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2298     transmute(cvttpd2dq(a))
2299 }
2300 
2301 /// Converts the lower double-precision (64-bit) floating-point element in `a`
2302 /// to a 32-bit integer with truncation.
2303 ///
2304 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
2305 #[inline]
2306 #[target_feature(enable = "sse2")]
2307 #[cfg_attr(test, assert_instr(cvttsd2si))]
2308 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvttsd_si32(a: __m128d) -> i322309 pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2310     cvttsd2si(a)
2311 }
2312 
2313 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2314 /// packed 32-bit integers with truncation.
2315 ///
2316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
2317 #[inline]
2318 #[target_feature(enable = "sse2")]
2319 #[cfg_attr(test, assert_instr(cvttps2dq))]
2320 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_cvttps_epi32(a: __m128) -> __m128i2321 pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2322     transmute(cvttps2dq(a))
2323 }
2324 
2325 /// Copies double-precision (64-bit) floating-point element `a` to the lower
2326 /// element of the packed 64-bit return value.
2327 ///
2328 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
2329 #[inline]
2330 #[target_feature(enable = "sse2")]
2331 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_sd(a: f64) -> __m128d2332 pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2333     _mm_set_pd(0.0, a)
2334 }
2335 
2336 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2337 /// of the return value.
2338 ///
2339 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
2340 #[inline]
2341 #[target_feature(enable = "sse2")]
2342 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set1_pd(a: f64) -> __m128d2343 pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2344     _mm_set_pd(a, a)
2345 }
2346 
2347 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2348 /// of the return value.
2349 ///
2350 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
2351 #[inline]
2352 #[target_feature(enable = "sse2")]
2353 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_pd1(a: f64) -> __m128d2354 pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2355     _mm_set_pd(a, a)
2356 }
2357 
2358 /// Sets packed double-precision (64-bit) floating-point elements in the return
2359 /// value with the supplied values.
2360 ///
2361 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
2362 #[inline]
2363 #[target_feature(enable = "sse2")]
2364 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_set_pd(a: f64, b: f64) -> __m128d2365 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2366     __m128d(b, a)
2367 }
2368 
2369 /// Sets packed double-precision (64-bit) floating-point elements in the return
2370 /// value with the supplied values in reverse order.
2371 ///
2372 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
2373 #[inline]
2374 #[target_feature(enable = "sse2")]
2375 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setr_pd(a: f64, b: f64) -> __m128d2376 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2377     _mm_set_pd(b, a)
2378 }
2379 
2380 /// Returns packed double-precision (64-bit) floating-point elements with all
2381 /// zeros.
2382 ///
2383 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
2384 #[inline]
2385 #[target_feature(enable = "sse2")]
2386 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2387 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_setzero_pd() -> __m128d2388 pub unsafe fn _mm_setzero_pd() -> __m128d {
2389     _mm_set_pd(0.0, 0.0)
2390 }
2391 
2392 /// Returns a mask of the most significant bit of each element in `a`.
2393 ///
2394 /// The mask is stored in the 2 least significant bits of the return value.
2395 /// All other bits are set to `0`.
2396 ///
2397 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
2398 #[inline]
2399 #[target_feature(enable = "sse2")]
2400 #[cfg_attr(test, assert_instr(movmskpd))]
2401 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_movemask_pd(a: __m128d) -> i322402 pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2403     movmskpd(a)
2404 }
2405 
2406 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2407 /// floating-point elements) from memory into the returned vector.
2408 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2409 /// exception may be generated.
2410 ///
2411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
2412 #[inline]
2413 #[target_feature(enable = "sse2")]
2414 #[cfg_attr(test, assert_instr(movaps))]
2415 #[stable(feature = "simd_x86", since = "1.27.0")]
2416 #[allow(clippy::cast_ptr_alignment)]
_mm_load_pd(mem_addr: *const f64) -> __m128d2417 pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2418     *(mem_addr as *const __m128d)
2419 }
2420 
2421 /// Loads a 64-bit double-precision value to the low element of a
2422 /// 128-bit integer vector and clears the upper element.
2423 ///
2424 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
2425 #[inline]
2426 #[target_feature(enable = "sse2")]
2427 #[cfg_attr(test, assert_instr(movsd))]
2428 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_load_sd(mem_addr: *const f64) -> __m128d2429 pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2430     _mm_setr_pd(*mem_addr, 0.)
2431 }
2432 
2433 /// Loads a double-precision value into the high-order bits of a 128-bit
2434 /// vector of `[2 x double]`. The low-order bits are copied from the low-order
2435 /// bits of the first operand.
2436 ///
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
2438 #[inline]
2439 #[target_feature(enable = "sse2")]
2440 #[cfg_attr(test, assert_instr(movhps))]
2441 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d2442 pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2443     _mm_setr_pd(simd_extract(a, 0), *mem_addr)
2444 }
2445 
2446 /// Loads a double-precision value into the low-order bits of a 128-bit
2447 /// vector of `[2 x double]`. The high-order bits are copied from the
2448 /// high-order bits of the first operand.
2449 ///
2450 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
2451 #[inline]
2452 #[target_feature(enable = "sse2")]
2453 #[cfg_attr(test, assert_instr(movlps))]
2454 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d2455 pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2456     _mm_setr_pd(*mem_addr, simd_extract(a, 1))
2457 }
2458 
2459 /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2460 /// aligned memory location.
2461 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
2462 /// used again soon).
2463 ///
2464 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
2465 #[inline]
2466 #[target_feature(enable = "sse2")]
2467 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2468 #[stable(feature = "simd_x86", since = "1.27.0")]
2469 #[allow(clippy::cast_ptr_alignment)]
_mm_stream_pd(mem_addr: *mut f64, a: __m128d)2470 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2471     intrinsics::nontemporal_store(mem_addr as *mut __m128d, a);
2472 }
2473 
2474 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2475 /// memory location.
2476 ///
2477 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
2478 #[inline]
2479 #[target_feature(enable = "sse2")]
2480 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2481 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_store_sd(mem_addr: *mut f64, a: __m128d)2482 pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2483     *mem_addr = simd_extract(a, 0)
2484 }
2485 
2486 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2487 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2488 /// on a 16-byte boundary or a general-protection exception may be generated.
2489 ///
2490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
2491 #[inline]
2492 #[target_feature(enable = "sse2")]
2493 #[cfg_attr(test, assert_instr(movaps))]
2494 #[stable(feature = "simd_x86", since = "1.27.0")]
2495 #[allow(clippy::cast_ptr_alignment)]
_mm_store_pd(mem_addr: *mut f64, a: __m128d)2496 pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2497     *(mem_addr as *mut __m128d) = a;
2498 }
2499 
2500 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2501 /// floating-point elements) from `a` into memory.
2502 /// `mem_addr` does not need to be aligned on any particular boundary.
2503 ///
2504 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
2505 #[inline]
2506 #[target_feature(enable = "sse2")]
2507 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2508 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_storeu_pd(mem_addr: *mut f64, a: __m128d)2509 pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2510     storeupd(mem_addr as *mut i8, a);
2511 }
2512 
2513 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2514 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2515 /// 16-byte boundary or a general-protection exception may be generated.
2516 ///
2517 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
2518 #[inline]
2519 #[target_feature(enable = "sse2")]
2520 #[stable(feature = "simd_x86", since = "1.27.0")]
2521 #[allow(clippy::cast_ptr_alignment)]
_mm_store1_pd(mem_addr: *mut f64, a: __m128d)2522 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2523     let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
2524     *(mem_addr as *mut __m128d) = b;
2525 }
2526 
2527 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2528 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2529 /// 16-byte boundary or a general-protection exception may be generated.
2530 ///
2531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
2532 #[inline]
2533 #[target_feature(enable = "sse2")]
2534 #[stable(feature = "simd_x86", since = "1.27.0")]
2535 #[allow(clippy::cast_ptr_alignment)]
_mm_store_pd1(mem_addr: *mut f64, a: __m128d)2536 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2537     let b: __m128d = simd_shuffle2!(a, a, [0, 0]);
2538     *(mem_addr as *mut __m128d) = b;
2539 }
2540 
2541 /// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2542 /// memory in reverse order.
2543 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2544 /// exception may be generated.
2545 ///
2546 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
2547 #[inline]
2548 #[target_feature(enable = "sse2")]
2549 #[stable(feature = "simd_x86", since = "1.27.0")]
2550 #[allow(clippy::cast_ptr_alignment)]
_mm_storer_pd(mem_addr: *mut f64, a: __m128d)2551 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2552     let b: __m128d = simd_shuffle2!(a, a, [1, 0]);
2553     *(mem_addr as *mut __m128d) = b;
2554 }
2555 
2556 /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2557 /// memory location.
2558 ///
2559 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
2560 #[inline]
2561 #[target_feature(enable = "sse2")]
2562 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2563 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_storeh_pd(mem_addr: *mut f64, a: __m128d)2564 pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2565     *mem_addr = simd_extract(a, 1);
2566 }
2567 
2568 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2569 /// memory location.
2570 ///
2571 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
2572 #[inline]
2573 #[target_feature(enable = "sse2")]
2574 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2575 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_storel_pd(mem_addr: *mut f64, a: __m128d)2576 pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2577     *mem_addr = simd_extract(a, 0);
2578 }
2579 
2580 /// Loads a double-precision (64-bit) floating-point element from memory
2581 /// into both elements of returned vector.
2582 ///
2583 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
2584 #[inline]
2585 #[target_feature(enable = "sse2")]
2586 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2587 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_load1_pd(mem_addr: *const f64) -> __m128d2588 pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2589     let d = *mem_addr;
2590     _mm_setr_pd(d, d)
2591 }
2592 
2593 /// Loads a double-precision (64-bit) floating-point element from memory
2594 /// into both elements of returned vector.
2595 ///
2596 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
2597 #[inline]
2598 #[target_feature(enable = "sse2")]
2599 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2600 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_load_pd1(mem_addr: *const f64) -> __m128d2601 pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2602     _mm_load1_pd(mem_addr)
2603 }
2604 
2605 /// Loads 2 double-precision (64-bit) floating-point elements from memory into
2606 /// the returned vector in reverse order. `mem_addr` must be aligned on a
2607 /// 16-byte boundary or a general-protection exception may be generated.
2608 ///
2609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
2610 #[inline]
2611 #[target_feature(enable = "sse2")]
2612 #[cfg_attr(test, assert_instr(movaps))]
2613 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadr_pd(mem_addr: *const f64) -> __m128d2614 pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2615     let a = _mm_load_pd(mem_addr);
2616     simd_shuffle2!(a, a, [1, 0])
2617 }
2618 
2619 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2620 /// floating-point elements) from memory into the returned vector.
2621 /// `mem_addr` does not need to be aligned on any particular boundary.
2622 ///
2623 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
2624 #[inline]
2625 #[target_feature(enable = "sse2")]
2626 #[cfg_attr(test, assert_instr(movups))]
2627 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_loadu_pd(mem_addr: *const f64) -> __m128d2628 pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2629     let mut dst = _mm_undefined_pd();
2630     ptr::copy_nonoverlapping(
2631         mem_addr as *const u8,
2632         &mut dst as *mut __m128d as *mut u8,
2633         mem::size_of::<__m128d>(),
2634     );
2635     dst
2636 }
2637 
2638 /// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2639 /// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2640 /// parameter as a specifier.
2641 ///
2642 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
2643 #[inline]
2644 #[target_feature(enable = "sse2")]
2645 #[cfg_attr(test, assert_instr(shufps, MASK = 2))]
2646 #[rustc_legacy_const_generics(2)]
2647 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d2648 pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d {
2649     static_assert_imm8!(MASK);
2650     simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2])
2651 }
2652 
2653 /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2654 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2655 /// 64 bits are set to the upper 64 bits of the first parameter.
2656 ///
2657 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
2658 #[inline]
2659 #[target_feature(enable = "sse2")]
2660 #[cfg_attr(test, assert_instr(movsd))]
2661 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_move_sd(a: __m128d, b: __m128d) -> __m128d2662 pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2663     _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1))
2664 }
2665 
2666 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2667 /// floating-point vector of `[4 x float]`.
2668 ///
2669 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
2670 #[inline]
2671 #[target_feature(enable = "sse2")]
2672 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castpd_ps(a: __m128d) -> __m1282673 pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2674     transmute(a)
2675 }
2676 
2677 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2678 /// integer vector.
2679 ///
2680 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
2681 #[inline]
2682 #[target_feature(enable = "sse2")]
2683 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castpd_si128(a: __m128d) -> __m128i2684 pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2685     transmute(a)
2686 }
2687 
2688 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2689 /// floating-point vector of `[2 x double]`.
2690 ///
2691 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
2692 #[inline]
2693 #[target_feature(enable = "sse2")]
2694 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castps_pd(a: __m128) -> __m128d2695 pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2696     transmute(a)
2697 }
2698 
2699 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2700 /// integer vector.
2701 ///
2702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
2703 #[inline]
2704 #[target_feature(enable = "sse2")]
2705 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castps_si128(a: __m128) -> __m128i2706 pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2707     transmute(a)
2708 }
2709 
2710 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2711 /// of `[2 x double]`.
2712 ///
2713 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
2714 #[inline]
2715 #[target_feature(enable = "sse2")]
2716 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castsi128_pd(a: __m128i) -> __m128d2717 pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2718     transmute(a)
2719 }
2720 
2721 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2722 /// of `[4 x float]`.
2723 ///
2724 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
2725 #[inline]
2726 #[target_feature(enable = "sse2")]
2727 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_castsi128_ps(a: __m128i) -> __m1282728 pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2729     transmute(a)
2730 }
2731 
2732 /// Returns vector of type __m128d with undefined elements.
2733 ///
2734 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
2735 #[inline]
2736 #[target_feature(enable = "sse2")]
2737 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_undefined_pd() -> __m128d2738 pub unsafe fn _mm_undefined_pd() -> __m128d {
2739     // FIXME: this function should return MaybeUninit<__m128d>
2740     mem::MaybeUninit::<__m128d>::uninit().assume_init()
2741 }
2742 
2743 /// Returns vector of type __m128i with undefined elements.
2744 ///
2745 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
2746 #[inline]
2747 #[target_feature(enable = "sse2")]
2748 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_undefined_si128() -> __m128i2749 pub unsafe fn _mm_undefined_si128() -> __m128i {
2750     // FIXME: this function should return MaybeUninit<__m128i>
2751     mem::MaybeUninit::<__m128i>::uninit().assume_init()
2752 }
2753 
2754 /// The resulting `__m128d` element is composed by the low-order values of
2755 /// the two `__m128d` interleaved input elements, i.e.:
2756 ///
2757 /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2758 /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2759 /// input
2760 ///
2761 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
2762 #[inline]
2763 #[target_feature(enable = "sse2")]
2764 #[cfg_attr(test, assert_instr(unpckhpd))]
2765 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d2766 pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2767     simd_shuffle2!(a, b, [1, 3])
2768 }
2769 
2770 /// The resulting `__m128d` element is composed by the high-order values of
2771 /// the two `__m128d` interleaved input elements, i.e.:
2772 ///
2773 /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2774 /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2775 ///
2776 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
2777 #[inline]
2778 #[target_feature(enable = "sse2")]
2779 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2780 #[stable(feature = "simd_x86", since = "1.27.0")]
_mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d2781 pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2782     simd_shuffle2!(a, b, [0, 2])
2783 }
2784 
2785 #[allow(improper_ctypes)]
2786 extern "C" {
2787     #[link_name = "llvm.x86.sse2.pause"]
pause()2788     fn pause();
2789     #[link_name = "llvm.x86.sse2.clflush"]
clflush(p: *const u8)2790     fn clflush(p: *const u8);
2791     #[link_name = "llvm.x86.sse2.lfence"]
lfence()2792     fn lfence();
2793     #[link_name = "llvm.x86.sse2.mfence"]
mfence()2794     fn mfence();
2795     #[link_name = "llvm.x86.sse2.pavg.b"]
pavgb(a: u8x16, b: u8x16) -> u8x162796     fn pavgb(a: u8x16, b: u8x16) -> u8x16;
2797     #[link_name = "llvm.x86.sse2.pavg.w"]
pavgw(a: u16x8, b: u16x8) -> u16x82798     fn pavgw(a: u16x8, b: u16x8) -> u16x8;
2799     #[link_name = "llvm.x86.sse2.pmadd.wd"]
pmaddwd(a: i16x8, b: i16x8) -> i32x42800     fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2801     #[link_name = "llvm.x86.sse2.pmaxs.w"]
pmaxsw(a: i16x8, b: i16x8) -> i16x82802     fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
2803     #[link_name = "llvm.x86.sse2.pmaxu.b"]
pmaxub(a: u8x16, b: u8x16) -> u8x162804     fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
2805     #[link_name = "llvm.x86.sse2.pmins.w"]
pminsw(a: i16x8, b: i16x8) -> i16x82806     fn pminsw(a: i16x8, b: i16x8) -> i16x8;
2807     #[link_name = "llvm.x86.sse2.pminu.b"]
pminub(a: u8x16, b: u8x16) -> u8x162808     fn pminub(a: u8x16, b: u8x16) -> u8x16;
2809     #[link_name = "llvm.x86.sse2.pmulh.w"]
pmulhw(a: i16x8, b: i16x8) -> i16x82810     fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
2811     #[link_name = "llvm.x86.sse2.pmulhu.w"]
pmulhuw(a: u16x8, b: u16x8) -> u16x82812     fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
2813     #[link_name = "llvm.x86.sse2.pmulu.dq"]
pmuludq(a: u32x4, b: u32x4) -> u64x22814     fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
2815     #[link_name = "llvm.x86.sse2.psad.bw"]
psadbw(a: u8x16, b: u8x16) -> u64x22816     fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2817     #[link_name = "llvm.x86.sse2.pslli.w"]
pslliw(a: i16x8, imm8: i32) -> i16x82818     fn pslliw(a: i16x8, imm8: i32) -> i16x8;
2819     #[link_name = "llvm.x86.sse2.psll.w"]
psllw(a: i16x8, count: i16x8) -> i16x82820     fn psllw(a: i16x8, count: i16x8) -> i16x8;
2821     #[link_name = "llvm.x86.sse2.pslli.d"]
psllid(a: i32x4, imm8: i32) -> i32x42822     fn psllid(a: i32x4, imm8: i32) -> i32x4;
2823     #[link_name = "llvm.x86.sse2.psll.d"]
pslld(a: i32x4, count: i32x4) -> i32x42824     fn pslld(a: i32x4, count: i32x4) -> i32x4;
2825     #[link_name = "llvm.x86.sse2.pslli.q"]
pslliq(a: i64x2, imm8: i32) -> i64x22826     fn pslliq(a: i64x2, imm8: i32) -> i64x2;
2827     #[link_name = "llvm.x86.sse2.psll.q"]
psllq(a: i64x2, count: i64x2) -> i64x22828     fn psllq(a: i64x2, count: i64x2) -> i64x2;
2829     #[link_name = "llvm.x86.sse2.psrai.w"]
psraiw(a: i16x8, imm8: i32) -> i16x82830     fn psraiw(a: i16x8, imm8: i32) -> i16x8;
2831     #[link_name = "llvm.x86.sse2.psra.w"]
psraw(a: i16x8, count: i16x8) -> i16x82832     fn psraw(a: i16x8, count: i16x8) -> i16x8;
2833     #[link_name = "llvm.x86.sse2.psrai.d"]
psraid(a: i32x4, imm8: i32) -> i32x42834     fn psraid(a: i32x4, imm8: i32) -> i32x4;
2835     #[link_name = "llvm.x86.sse2.psra.d"]
psrad(a: i32x4, count: i32x4) -> i32x42836     fn psrad(a: i32x4, count: i32x4) -> i32x4;
2837     #[link_name = "llvm.x86.sse2.psrli.w"]
psrliw(a: i16x8, imm8: i32) -> i16x82838     fn psrliw(a: i16x8, imm8: i32) -> i16x8;
2839     #[link_name = "llvm.x86.sse2.psrl.w"]
psrlw(a: i16x8, count: i16x8) -> i16x82840     fn psrlw(a: i16x8, count: i16x8) -> i16x8;
2841     #[link_name = "llvm.x86.sse2.psrli.d"]
psrlid(a: i32x4, imm8: i32) -> i32x42842     fn psrlid(a: i32x4, imm8: i32) -> i32x4;
2843     #[link_name = "llvm.x86.sse2.psrl.d"]
psrld(a: i32x4, count: i32x4) -> i32x42844     fn psrld(a: i32x4, count: i32x4) -> i32x4;
2845     #[link_name = "llvm.x86.sse2.psrli.q"]
psrliq(a: i64x2, imm8: i32) -> i64x22846     fn psrliq(a: i64x2, imm8: i32) -> i64x2;
2847     #[link_name = "llvm.x86.sse2.psrl.q"]
psrlq(a: i64x2, count: i64x2) -> i64x22848     fn psrlq(a: i64x2, count: i64x2) -> i64x2;
2849     #[link_name = "llvm.x86.sse2.cvtdq2ps"]
cvtdq2ps(a: i32x4) -> __m1282850     fn cvtdq2ps(a: i32x4) -> __m128;
2851     #[link_name = "llvm.x86.sse2.cvtps2dq"]
cvtps2dq(a: __m128) -> i32x42852     fn cvtps2dq(a: __m128) -> i32x4;
2853     #[link_name = "llvm.x86.sse2.maskmov.dqu"]
maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8)2854     fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
2855     #[link_name = "llvm.x86.sse2.packsswb.128"]
packsswb(a: i16x8, b: i16x8) -> i8x162856     fn packsswb(a: i16x8, b: i16x8) -> i8x16;
2857     #[link_name = "llvm.x86.sse2.packssdw.128"]
packssdw(a: i32x4, b: i32x4) -> i16x82858     fn packssdw(a: i32x4, b: i32x4) -> i16x8;
2859     #[link_name = "llvm.x86.sse2.packuswb.128"]
packuswb(a: i16x8, b: i16x8) -> u8x162860     fn packuswb(a: i16x8, b: i16x8) -> u8x16;
2861     #[link_name = "llvm.x86.sse2.pmovmskb.128"]
pmovmskb(a: i8x16) -> i322862     fn pmovmskb(a: i8x16) -> i32;
2863     #[link_name = "llvm.x86.sse2.max.sd"]
maxsd(a: __m128d, b: __m128d) -> __m128d2864     fn maxsd(a: __m128d, b: __m128d) -> __m128d;
2865     #[link_name = "llvm.x86.sse2.max.pd"]
maxpd(a: __m128d, b: __m128d) -> __m128d2866     fn maxpd(a: __m128d, b: __m128d) -> __m128d;
2867     #[link_name = "llvm.x86.sse2.min.sd"]
minsd(a: __m128d, b: __m128d) -> __m128d2868     fn minsd(a: __m128d, b: __m128d) -> __m128d;
2869     #[link_name = "llvm.x86.sse2.min.pd"]
minpd(a: __m128d, b: __m128d) -> __m128d2870     fn minpd(a: __m128d, b: __m128d) -> __m128d;
2871     #[link_name = "llvm.x86.sse2.sqrt.sd"]
sqrtsd(a: __m128d) -> __m128d2872     fn sqrtsd(a: __m128d) -> __m128d;
2873     #[link_name = "llvm.x86.sse2.sqrt.pd"]
sqrtpd(a: __m128d) -> __m128d2874     fn sqrtpd(a: __m128d) -> __m128d;
2875     #[link_name = "llvm.x86.sse2.cmp.sd"]
cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d2876     fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2877     #[link_name = "llvm.x86.sse2.cmp.pd"]
cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d2878     fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
2879     #[link_name = "llvm.x86.sse2.comieq.sd"]
comieqsd(a: __m128d, b: __m128d) -> i322880     fn comieqsd(a: __m128d, b: __m128d) -> i32;
2881     #[link_name = "llvm.x86.sse2.comilt.sd"]
comiltsd(a: __m128d, b: __m128d) -> i322882     fn comiltsd(a: __m128d, b: __m128d) -> i32;
2883     #[link_name = "llvm.x86.sse2.comile.sd"]
comilesd(a: __m128d, b: __m128d) -> i322884     fn comilesd(a: __m128d, b: __m128d) -> i32;
2885     #[link_name = "llvm.x86.sse2.comigt.sd"]
comigtsd(a: __m128d, b: __m128d) -> i322886     fn comigtsd(a: __m128d, b: __m128d) -> i32;
2887     #[link_name = "llvm.x86.sse2.comige.sd"]
comigesd(a: __m128d, b: __m128d) -> i322888     fn comigesd(a: __m128d, b: __m128d) -> i32;
2889     #[link_name = "llvm.x86.sse2.comineq.sd"]
comineqsd(a: __m128d, b: __m128d) -> i322890     fn comineqsd(a: __m128d, b: __m128d) -> i32;
2891     #[link_name = "llvm.x86.sse2.ucomieq.sd"]
ucomieqsd(a: __m128d, b: __m128d) -> i322892     fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
2893     #[link_name = "llvm.x86.sse2.ucomilt.sd"]
ucomiltsd(a: __m128d, b: __m128d) -> i322894     fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
2895     #[link_name = "llvm.x86.sse2.ucomile.sd"]
ucomilesd(a: __m128d, b: __m128d) -> i322896     fn ucomilesd(a: __m128d, b: __m128d) -> i32;
2897     #[link_name = "llvm.x86.sse2.ucomigt.sd"]
ucomigtsd(a: __m128d, b: __m128d) -> i322898     fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
2899     #[link_name = "llvm.x86.sse2.ucomige.sd"]
ucomigesd(a: __m128d, b: __m128d) -> i322900     fn ucomigesd(a: __m128d, b: __m128d) -> i32;
2901     #[link_name = "llvm.x86.sse2.ucomineq.sd"]
ucomineqsd(a: __m128d, b: __m128d) -> i322902     fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
2903     #[link_name = "llvm.x86.sse2.movmsk.pd"]
movmskpd(a: __m128d) -> i322904     fn movmskpd(a: __m128d) -> i32;
2905     #[link_name = "llvm.x86.sse2.cvtpd2ps"]
cvtpd2ps(a: __m128d) -> __m1282906     fn cvtpd2ps(a: __m128d) -> __m128;
2907     #[link_name = "llvm.x86.sse2.cvtps2pd"]
cvtps2pd(a: __m128) -> __m128d2908     fn cvtps2pd(a: __m128) -> __m128d;
2909     #[link_name = "llvm.x86.sse2.cvtpd2dq"]
cvtpd2dq(a: __m128d) -> i32x42910     fn cvtpd2dq(a: __m128d) -> i32x4;
2911     #[link_name = "llvm.x86.sse2.cvtsd2si"]
cvtsd2si(a: __m128d) -> i322912     fn cvtsd2si(a: __m128d) -> i32;
2913     #[link_name = "llvm.x86.sse2.cvtsd2ss"]
cvtsd2ss(a: __m128, b: __m128d) -> __m1282914     fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
2915     #[link_name = "llvm.x86.sse2.cvtss2sd"]
cvtss2sd(a: __m128d, b: __m128) -> __m128d2916     fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
2917     #[link_name = "llvm.x86.sse2.cvttpd2dq"]
cvttpd2dq(a: __m128d) -> i32x42918     fn cvttpd2dq(a: __m128d) -> i32x4;
2919     #[link_name = "llvm.x86.sse2.cvttsd2si"]
cvttsd2si(a: __m128d) -> i322920     fn cvttsd2si(a: __m128d) -> i32;
2921     #[link_name = "llvm.x86.sse2.cvttps2dq"]
cvttps2dq(a: __m128) -> i32x42922     fn cvttps2dq(a: __m128) -> i32x4;
2923     #[link_name = "llvm.x86.sse2.storeu.dq"]
storeudq(mem_addr: *mut i8, a: __m128i)2924     fn storeudq(mem_addr: *mut i8, a: __m128i);
2925     #[link_name = "llvm.x86.sse2.storeu.pd"]
storeupd(mem_addr: *mut i8, a: __m128d)2926     fn storeupd(mem_addr: *mut i8, a: __m128d);
2927 }
2928 
2929 #[cfg(test)]
2930 mod tests {
2931     use crate::{
2932         core_arch::{simd::*, x86::*},
2933         hint::black_box,
2934     };
2935     use std::{
2936         boxed, f32,
2937         f64::{self, NAN},
2938         i32,
2939         mem::{self, transmute},
2940     };
2941     use stdarch_test::simd_test;
2942 
2943     #[test]
test_mm_pause()2944     fn test_mm_pause() {
2945         unsafe { _mm_pause() }
2946     }
2947 
2948     #[simd_test(enable = "sse2")]
test_mm_clflush()2949     unsafe fn test_mm_clflush() {
2950         let x = 0_u8;
2951         _mm_clflush(&x as *const _);
2952     }
2953 
2954     #[simd_test(enable = "sse2")]
test_mm_lfence()2955     unsafe fn test_mm_lfence() {
2956         _mm_lfence();
2957     }
2958 
2959     #[simd_test(enable = "sse2")]
test_mm_mfence()2960     unsafe fn test_mm_mfence() {
2961         _mm_mfence();
2962     }
2963 
2964     #[simd_test(enable = "sse2")]
test_mm_add_epi8()2965     unsafe fn test_mm_add_epi8() {
2966         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2967         #[rustfmt::skip]
2968         let b = _mm_setr_epi8(
2969             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
2970         );
2971         let r = _mm_add_epi8(a, b);
2972         #[rustfmt::skip]
2973         let e = _mm_setr_epi8(
2974             16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
2975         );
2976         assert_eq_m128i(r, e);
2977     }
2978 
2979     #[simd_test(enable = "sse2")]
test_mm_add_epi8_overflow()2980     unsafe fn test_mm_add_epi8_overflow() {
2981         let a = _mm_set1_epi8(0x7F);
2982         let b = _mm_set1_epi8(1);
2983         let r = _mm_add_epi8(a, b);
2984         assert_eq_m128i(r, _mm_set1_epi8(-128));
2985     }
2986 
2987     #[simd_test(enable = "sse2")]
test_mm_add_epi16()2988     unsafe fn test_mm_add_epi16() {
2989         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
2990         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
2991         let r = _mm_add_epi16(a, b);
2992         let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
2993         assert_eq_m128i(r, e);
2994     }
2995 
2996     #[simd_test(enable = "sse2")]
test_mm_add_epi32()2997     unsafe fn test_mm_add_epi32() {
2998         let a = _mm_setr_epi32(0, 1, 2, 3);
2999         let b = _mm_setr_epi32(4, 5, 6, 7);
3000         let r = _mm_add_epi32(a, b);
3001         let e = _mm_setr_epi32(4, 6, 8, 10);
3002         assert_eq_m128i(r, e);
3003     }
3004 
3005     #[simd_test(enable = "sse2")]
test_mm_add_epi64()3006     unsafe fn test_mm_add_epi64() {
3007         let a = _mm_setr_epi64x(0, 1);
3008         let b = _mm_setr_epi64x(2, 3);
3009         let r = _mm_add_epi64(a, b);
3010         let e = _mm_setr_epi64x(2, 4);
3011         assert_eq_m128i(r, e);
3012     }
3013 
3014     #[simd_test(enable = "sse2")]
test_mm_adds_epi8()3015     unsafe fn test_mm_adds_epi8() {
3016         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3017         #[rustfmt::skip]
3018         let b = _mm_setr_epi8(
3019             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3020         );
3021         let r = _mm_adds_epi8(a, b);
3022         #[rustfmt::skip]
3023         let e = _mm_setr_epi8(
3024             16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3025         );
3026         assert_eq_m128i(r, e);
3027     }
3028 
3029     #[simd_test(enable = "sse2")]
test_mm_adds_epi8_saturate_positive()3030     unsafe fn test_mm_adds_epi8_saturate_positive() {
3031         let a = _mm_set1_epi8(0x7F);
3032         let b = _mm_set1_epi8(1);
3033         let r = _mm_adds_epi8(a, b);
3034         assert_eq_m128i(r, a);
3035     }
3036 
3037     #[simd_test(enable = "sse2")]
test_mm_adds_epi8_saturate_negative()3038     unsafe fn test_mm_adds_epi8_saturate_negative() {
3039         let a = _mm_set1_epi8(-0x80);
3040         let b = _mm_set1_epi8(-1);
3041         let r = _mm_adds_epi8(a, b);
3042         assert_eq_m128i(r, a);
3043     }
3044 
3045     #[simd_test(enable = "sse2")]
test_mm_adds_epi16()3046     unsafe fn test_mm_adds_epi16() {
3047         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3048         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3049         let r = _mm_adds_epi16(a, b);
3050         let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3051         assert_eq_m128i(r, e);
3052     }
3053 
3054     #[simd_test(enable = "sse2")]
test_mm_adds_epi16_saturate_positive()3055     unsafe fn test_mm_adds_epi16_saturate_positive() {
3056         let a = _mm_set1_epi16(0x7FFF);
3057         let b = _mm_set1_epi16(1);
3058         let r = _mm_adds_epi16(a, b);
3059         assert_eq_m128i(r, a);
3060     }
3061 
3062     #[simd_test(enable = "sse2")]
test_mm_adds_epi16_saturate_negative()3063     unsafe fn test_mm_adds_epi16_saturate_negative() {
3064         let a = _mm_set1_epi16(-0x8000);
3065         let b = _mm_set1_epi16(-1);
3066         let r = _mm_adds_epi16(a, b);
3067         assert_eq_m128i(r, a);
3068     }
3069 
3070     #[simd_test(enable = "sse2")]
test_mm_adds_epu8()3071     unsafe fn test_mm_adds_epu8() {
3072         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3073         #[rustfmt::skip]
3074         let b = _mm_setr_epi8(
3075             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3076         );
3077         let r = _mm_adds_epu8(a, b);
3078         #[rustfmt::skip]
3079         let e = _mm_setr_epi8(
3080             16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3081         );
3082         assert_eq_m128i(r, e);
3083     }
3084 
3085     #[simd_test(enable = "sse2")]
test_mm_adds_epu8_saturate()3086     unsafe fn test_mm_adds_epu8_saturate() {
3087         let a = _mm_set1_epi8(!0);
3088         let b = _mm_set1_epi8(1);
3089         let r = _mm_adds_epu8(a, b);
3090         assert_eq_m128i(r, a);
3091     }
3092 
3093     #[simd_test(enable = "sse2")]
test_mm_adds_epu16()3094     unsafe fn test_mm_adds_epu16() {
3095         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3096         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3097         let r = _mm_adds_epu16(a, b);
3098         let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3099         assert_eq_m128i(r, e);
3100     }
3101 
3102     #[simd_test(enable = "sse2")]
test_mm_adds_epu16_saturate()3103     unsafe fn test_mm_adds_epu16_saturate() {
3104         let a = _mm_set1_epi16(!0);
3105         let b = _mm_set1_epi16(1);
3106         let r = _mm_adds_epu16(a, b);
3107         assert_eq_m128i(r, a);
3108     }
3109 
3110     #[simd_test(enable = "sse2")]
test_mm_avg_epu8()3111     unsafe fn test_mm_avg_epu8() {
3112         let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3113         let r = _mm_avg_epu8(a, b);
3114         assert_eq_m128i(r, _mm_set1_epi8(6));
3115     }
3116 
3117     #[simd_test(enable = "sse2")]
test_mm_avg_epu16()3118     unsafe fn test_mm_avg_epu16() {
3119         let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3120         let r = _mm_avg_epu16(a, b);
3121         assert_eq_m128i(r, _mm_set1_epi16(6));
3122     }
3123 
3124     #[simd_test(enable = "sse2")]
test_mm_madd_epi16()3125     unsafe fn test_mm_madd_epi16() {
3126         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3127         let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3128         let r = _mm_madd_epi16(a, b);
3129         let e = _mm_setr_epi32(29, 81, 149, 233);
3130         assert_eq_m128i(r, e);
3131     }
3132 
3133     #[simd_test(enable = "sse2")]
test_mm_max_epi16()3134     unsafe fn test_mm_max_epi16() {
3135         let a = _mm_set1_epi16(1);
3136         let b = _mm_set1_epi16(-1);
3137         let r = _mm_max_epi16(a, b);
3138         assert_eq_m128i(r, a);
3139     }
3140 
3141     #[simd_test(enable = "sse2")]
test_mm_max_epu8()3142     unsafe fn test_mm_max_epu8() {
3143         let a = _mm_set1_epi8(1);
3144         let b = _mm_set1_epi8(!0);
3145         let r = _mm_max_epu8(a, b);
3146         assert_eq_m128i(r, b);
3147     }
3148 
3149     #[simd_test(enable = "sse2")]
test_mm_min_epi16()3150     unsafe fn test_mm_min_epi16() {
3151         let a = _mm_set1_epi16(1);
3152         let b = _mm_set1_epi16(-1);
3153         let r = _mm_min_epi16(a, b);
3154         assert_eq_m128i(r, b);
3155     }
3156 
3157     #[simd_test(enable = "sse2")]
test_mm_min_epu8()3158     unsafe fn test_mm_min_epu8() {
3159         let a = _mm_set1_epi8(1);
3160         let b = _mm_set1_epi8(!0);
3161         let r = _mm_min_epu8(a, b);
3162         assert_eq_m128i(r, a);
3163     }
3164 
3165     #[simd_test(enable = "sse2")]
test_mm_mulhi_epi16()3166     unsafe fn test_mm_mulhi_epi16() {
3167         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3168         let r = _mm_mulhi_epi16(a, b);
3169         assert_eq_m128i(r, _mm_set1_epi16(-16));
3170     }
3171 
3172     #[simd_test(enable = "sse2")]
test_mm_mulhi_epu16()3173     unsafe fn test_mm_mulhi_epu16() {
3174         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3175         let r = _mm_mulhi_epu16(a, b);
3176         assert_eq_m128i(r, _mm_set1_epi16(15));
3177     }
3178 
3179     #[simd_test(enable = "sse2")]
test_mm_mullo_epi16()3180     unsafe fn test_mm_mullo_epi16() {
3181         let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3182         let r = _mm_mullo_epi16(a, b);
3183         assert_eq_m128i(r, _mm_set1_epi16(-17960));
3184     }
3185 
3186     #[simd_test(enable = "sse2")]
test_mm_mul_epu32()3187     unsafe fn test_mm_mul_epu32() {
3188         let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3189         let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3190         let r = _mm_mul_epu32(a, b);
3191         let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3192         assert_eq_m128i(r, e);
3193     }
3194 
3195     #[simd_test(enable = "sse2")]
test_mm_sad_epu8()3196     unsafe fn test_mm_sad_epu8() {
3197         #[rustfmt::skip]
3198         let a = _mm_setr_epi8(
3199             255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3200             1, 2, 3, 4,
3201             155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3202             1, 2, 3, 4,
3203         );
3204         let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3205         let r = _mm_sad_epu8(a, b);
3206         let e = _mm_setr_epi64x(1020, 614);
3207         assert_eq_m128i(r, e);
3208     }
3209 
3210     #[simd_test(enable = "sse2")]
test_mm_sub_epi8()3211     unsafe fn test_mm_sub_epi8() {
3212         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3213         let r = _mm_sub_epi8(a, b);
3214         assert_eq_m128i(r, _mm_set1_epi8(-1));
3215     }
3216 
3217     #[simd_test(enable = "sse2")]
test_mm_sub_epi16()3218     unsafe fn test_mm_sub_epi16() {
3219         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3220         let r = _mm_sub_epi16(a, b);
3221         assert_eq_m128i(r, _mm_set1_epi16(-1));
3222     }
3223 
3224     #[simd_test(enable = "sse2")]
test_mm_sub_epi32()3225     unsafe fn test_mm_sub_epi32() {
3226         let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3227         let r = _mm_sub_epi32(a, b);
3228         assert_eq_m128i(r, _mm_set1_epi32(-1));
3229     }
3230 
3231     #[simd_test(enable = "sse2")]
test_mm_sub_epi64()3232     unsafe fn test_mm_sub_epi64() {
3233         let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3234         let r = _mm_sub_epi64(a, b);
3235         assert_eq_m128i(r, _mm_set1_epi64x(-1));
3236     }
3237 
3238     #[simd_test(enable = "sse2")]
test_mm_subs_epi8()3239     unsafe fn test_mm_subs_epi8() {
3240         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3241         let r = _mm_subs_epi8(a, b);
3242         assert_eq_m128i(r, _mm_set1_epi8(3));
3243     }
3244 
3245     #[simd_test(enable = "sse2")]
test_mm_subs_epi8_saturate_positive()3246     unsafe fn test_mm_subs_epi8_saturate_positive() {
3247         let a = _mm_set1_epi8(0x7F);
3248         let b = _mm_set1_epi8(-1);
3249         let r = _mm_subs_epi8(a, b);
3250         assert_eq_m128i(r, a);
3251     }
3252 
3253     #[simd_test(enable = "sse2")]
test_mm_subs_epi8_saturate_negative()3254     unsafe fn test_mm_subs_epi8_saturate_negative() {
3255         let a = _mm_set1_epi8(-0x80);
3256         let b = _mm_set1_epi8(1);
3257         let r = _mm_subs_epi8(a, b);
3258         assert_eq_m128i(r, a);
3259     }
3260 
3261     #[simd_test(enable = "sse2")]
test_mm_subs_epi16()3262     unsafe fn test_mm_subs_epi16() {
3263         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3264         let r = _mm_subs_epi16(a, b);
3265         assert_eq_m128i(r, _mm_set1_epi16(3));
3266     }
3267 
3268     #[simd_test(enable = "sse2")]
test_mm_subs_epi16_saturate_positive()3269     unsafe fn test_mm_subs_epi16_saturate_positive() {
3270         let a = _mm_set1_epi16(0x7FFF);
3271         let b = _mm_set1_epi16(-1);
3272         let r = _mm_subs_epi16(a, b);
3273         assert_eq_m128i(r, a);
3274     }
3275 
3276     #[simd_test(enable = "sse2")]
test_mm_subs_epi16_saturate_negative()3277     unsafe fn test_mm_subs_epi16_saturate_negative() {
3278         let a = _mm_set1_epi16(-0x8000);
3279         let b = _mm_set1_epi16(1);
3280         let r = _mm_subs_epi16(a, b);
3281         assert_eq_m128i(r, a);
3282     }
3283 
3284     #[simd_test(enable = "sse2")]
test_mm_subs_epu8()3285     unsafe fn test_mm_subs_epu8() {
3286         let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3287         let r = _mm_subs_epu8(a, b);
3288         assert_eq_m128i(r, _mm_set1_epi8(3));
3289     }
3290 
3291     #[simd_test(enable = "sse2")]
test_mm_subs_epu8_saturate()3292     unsafe fn test_mm_subs_epu8_saturate() {
3293         let a = _mm_set1_epi8(0);
3294         let b = _mm_set1_epi8(1);
3295         let r = _mm_subs_epu8(a, b);
3296         assert_eq_m128i(r, a);
3297     }
3298 
3299     #[simd_test(enable = "sse2")]
test_mm_subs_epu16()3300     unsafe fn test_mm_subs_epu16() {
3301         let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3302         let r = _mm_subs_epu16(a, b);
3303         assert_eq_m128i(r, _mm_set1_epi16(3));
3304     }
3305 
3306     #[simd_test(enable = "sse2")]
test_mm_subs_epu16_saturate()3307     unsafe fn test_mm_subs_epu16_saturate() {
3308         let a = _mm_set1_epi16(0);
3309         let b = _mm_set1_epi16(1);
3310         let r = _mm_subs_epu16(a, b);
3311         assert_eq_m128i(r, a);
3312     }
3313 
3314     #[simd_test(enable = "sse2")]
test_mm_slli_si128()3315     unsafe fn test_mm_slli_si128() {
3316         #[rustfmt::skip]
3317         let a = _mm_setr_epi8(
3318             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3319         );
3320         let r = _mm_slli_si128::<1>(a);
3321         let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3322         assert_eq_m128i(r, e);
3323 
3324         #[rustfmt::skip]
3325         let a = _mm_setr_epi8(
3326             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3327         );
3328         let r = _mm_slli_si128::<15>(a);
3329         let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3330         assert_eq_m128i(r, e);
3331 
3332         #[rustfmt::skip]
3333         let a = _mm_setr_epi8(
3334             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3335         );
3336         let r = _mm_slli_si128::<16>(a);
3337         assert_eq_m128i(r, _mm_set1_epi8(0));
3338     }
3339 
3340     #[simd_test(enable = "sse2")]
test_mm_slli_epi16()3341     unsafe fn test_mm_slli_epi16() {
3342         #[rustfmt::skip]
3343         let a = _mm_setr_epi16(
3344             0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3345         );
3346         let r = _mm_slli_epi16::<4>(a);
3347 
3348         #[rustfmt::skip]
3349         let e = _mm_setr_epi16(
3350             0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3351             0, 0, 0, 0,
3352         );
3353         assert_eq_m128i(r, e);
3354     }
3355 
3356     #[simd_test(enable = "sse2")]
test_mm_sll_epi16()3357     unsafe fn test_mm_sll_epi16() {
3358         let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3359         let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3360         assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3361         let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3362         assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3363     }
3364 
3365     #[simd_test(enable = "sse2")]
test_mm_slli_epi32()3366     unsafe fn test_mm_slli_epi32() {
3367         let r = _mm_slli_epi32::<4>(_mm_set1_epi32(0xFFFF));
3368         assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3369     }
3370 
3371     #[simd_test(enable = "sse2")]
test_mm_sll_epi32()3372     unsafe fn test_mm_sll_epi32() {
3373         let a = _mm_set1_epi32(0xFFFF);
3374         let b = _mm_setr_epi32(4, 0, 0, 0);
3375         let r = _mm_sll_epi32(a, b);
3376         assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3377     }
3378 
3379     #[simd_test(enable = "sse2")]
test_mm_slli_epi64()3380     unsafe fn test_mm_slli_epi64() {
3381         let r = _mm_slli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3382         assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3383     }
3384 
3385     #[simd_test(enable = "sse2")]
test_mm_sll_epi64()3386     unsafe fn test_mm_sll_epi64() {
3387         let a = _mm_set1_epi64x(0xFFFFFFFF);
3388         let b = _mm_setr_epi64x(4, 0);
3389         let r = _mm_sll_epi64(a, b);
3390         assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3391     }
3392 
3393     #[simd_test(enable = "sse2")]
test_mm_srai_epi16()3394     unsafe fn test_mm_srai_epi16() {
3395         let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1));
3396         assert_eq_m128i(r, _mm_set1_epi16(-1));
3397     }
3398 
3399     #[simd_test(enable = "sse2")]
test_mm_sra_epi16()3400     unsafe fn test_mm_sra_epi16() {
3401         let a = _mm_set1_epi16(-1);
3402         let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3403         let r = _mm_sra_epi16(a, b);
3404         assert_eq_m128i(r, _mm_set1_epi16(-1));
3405     }
3406 
3407     #[simd_test(enable = "sse2")]
test_mm_srai_epi32()3408     unsafe fn test_mm_srai_epi32() {
3409         let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1));
3410         assert_eq_m128i(r, _mm_set1_epi32(-1));
3411     }
3412 
3413     #[simd_test(enable = "sse2")]
test_mm_sra_epi32()3414     unsafe fn test_mm_sra_epi32() {
3415         let a = _mm_set1_epi32(-1);
3416         let b = _mm_setr_epi32(1, 0, 0, 0);
3417         let r = _mm_sra_epi32(a, b);
3418         assert_eq_m128i(r, _mm_set1_epi32(-1));
3419     }
3420 
3421     #[simd_test(enable = "sse2")]
test_mm_srli_si128()3422     unsafe fn test_mm_srli_si128() {
3423         #[rustfmt::skip]
3424         let a = _mm_setr_epi8(
3425             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3426         );
3427         let r = _mm_srli_si128::<1>(a);
3428         #[rustfmt::skip]
3429         let e = _mm_setr_epi8(
3430             2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3431         );
3432         assert_eq_m128i(r, e);
3433 
3434         #[rustfmt::skip]
3435         let a = _mm_setr_epi8(
3436             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3437         );
3438         let r = _mm_srli_si128::<15>(a);
3439         let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3440         assert_eq_m128i(r, e);
3441 
3442         #[rustfmt::skip]
3443         let a = _mm_setr_epi8(
3444             1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3445         );
3446         let r = _mm_srli_si128::<16>(a);
3447         assert_eq_m128i(r, _mm_set1_epi8(0));
3448     }
3449 
3450     #[simd_test(enable = "sse2")]
test_mm_srli_epi16()3451     unsafe fn test_mm_srli_epi16() {
3452         #[rustfmt::skip]
3453         let a = _mm_setr_epi16(
3454             0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3455         );
3456         let r = _mm_srli_epi16::<4>(a);
3457         #[rustfmt::skip]
3458         let e = _mm_setr_epi16(
3459             0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3460         );
3461         assert_eq_m128i(r, e);
3462     }
3463 
3464     #[simd_test(enable = "sse2")]
test_mm_srl_epi16()3465     unsafe fn test_mm_srl_epi16() {
3466         let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3467         let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3468         assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3469         let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3470         assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3471     }
3472 
3473     #[simd_test(enable = "sse2")]
test_mm_srli_epi32()3474     unsafe fn test_mm_srli_epi32() {
3475         let r = _mm_srli_epi32::<4>(_mm_set1_epi32(0xFFFF));
3476         assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3477     }
3478 
3479     #[simd_test(enable = "sse2")]
test_mm_srl_epi32()3480     unsafe fn test_mm_srl_epi32() {
3481         let a = _mm_set1_epi32(0xFFFF);
3482         let b = _mm_setr_epi32(4, 0, 0, 0);
3483         let r = _mm_srl_epi32(a, b);
3484         assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3485     }
3486 
3487     #[simd_test(enable = "sse2")]
test_mm_srli_epi64()3488     unsafe fn test_mm_srli_epi64() {
3489         let r = _mm_srli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF));
3490         assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3491     }
3492 
3493     #[simd_test(enable = "sse2")]
test_mm_srl_epi64()3494     unsafe fn test_mm_srl_epi64() {
3495         let a = _mm_set1_epi64x(0xFFFFFFFF);
3496         let b = _mm_setr_epi64x(4, 0);
3497         let r = _mm_srl_epi64(a, b);
3498         assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3499     }
3500 
3501     #[simd_test(enable = "sse2")]
test_mm_and_si128()3502     unsafe fn test_mm_and_si128() {
3503         let a = _mm_set1_epi8(5);
3504         let b = _mm_set1_epi8(3);
3505         let r = _mm_and_si128(a, b);
3506         assert_eq_m128i(r, _mm_set1_epi8(1));
3507     }
3508 
3509     #[simd_test(enable = "sse2")]
test_mm_andnot_si128()3510     unsafe fn test_mm_andnot_si128() {
3511         let a = _mm_set1_epi8(5);
3512         let b = _mm_set1_epi8(3);
3513         let r = _mm_andnot_si128(a, b);
3514         assert_eq_m128i(r, _mm_set1_epi8(2));
3515     }
3516 
3517     #[simd_test(enable = "sse2")]
test_mm_or_si128()3518     unsafe fn test_mm_or_si128() {
3519         let a = _mm_set1_epi8(5);
3520         let b = _mm_set1_epi8(3);
3521         let r = _mm_or_si128(a, b);
3522         assert_eq_m128i(r, _mm_set1_epi8(7));
3523     }
3524 
3525     #[simd_test(enable = "sse2")]
test_mm_xor_si128()3526     unsafe fn test_mm_xor_si128() {
3527         let a = _mm_set1_epi8(5);
3528         let b = _mm_set1_epi8(3);
3529         let r = _mm_xor_si128(a, b);
3530         assert_eq_m128i(r, _mm_set1_epi8(6));
3531     }
3532 
3533     #[simd_test(enable = "sse2")]
test_mm_cmpeq_epi8()3534     unsafe fn test_mm_cmpeq_epi8() {
3535         let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3536         let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3537         let r = _mm_cmpeq_epi8(a, b);
3538         #[rustfmt::skip]
3539         assert_eq_m128i(
3540             r,
3541             _mm_setr_epi8(
3542                 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3543             )
3544         );
3545     }
3546 
3547     #[simd_test(enable = "sse2")]
test_mm_cmpeq_epi16()3548     unsafe fn test_mm_cmpeq_epi16() {
3549         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3550         let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3551         let r = _mm_cmpeq_epi16(a, b);
3552         assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3553     }
3554 
3555     #[simd_test(enable = "sse2")]
test_mm_cmpeq_epi32()3556     unsafe fn test_mm_cmpeq_epi32() {
3557         let a = _mm_setr_epi32(0, 1, 2, 3);
3558         let b = _mm_setr_epi32(3, 2, 2, 0);
3559         let r = _mm_cmpeq_epi32(a, b);
3560         assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3561     }
3562 
3563     #[simd_test(enable = "sse2")]
test_mm_cmpgt_epi8()3564     unsafe fn test_mm_cmpgt_epi8() {
3565         let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3566         let b = _mm_set1_epi8(0);
3567         let r = _mm_cmpgt_epi8(a, b);
3568         let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3569         assert_eq_m128i(r, e);
3570     }
3571 
3572     #[simd_test(enable = "sse2")]
test_mm_cmpgt_epi16()3573     unsafe fn test_mm_cmpgt_epi16() {
3574         let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3575         let b = _mm_set1_epi16(0);
3576         let r = _mm_cmpgt_epi16(a, b);
3577         let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3578         assert_eq_m128i(r, e);
3579     }
3580 
3581     #[simd_test(enable = "sse2")]
test_mm_cmpgt_epi32()3582     unsafe fn test_mm_cmpgt_epi32() {
3583         let a = _mm_set_epi32(5, 0, 0, 0);
3584         let b = _mm_set1_epi32(0);
3585         let r = _mm_cmpgt_epi32(a, b);
3586         assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3587     }
3588 
3589     #[simd_test(enable = "sse2")]
test_mm_cmplt_epi8()3590     unsafe fn test_mm_cmplt_epi8() {
3591         let a = _mm_set1_epi8(0);
3592         let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3593         let r = _mm_cmplt_epi8(a, b);
3594         let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3595         assert_eq_m128i(r, e);
3596     }
3597 
3598     #[simd_test(enable = "sse2")]
test_mm_cmplt_epi16()3599     unsafe fn test_mm_cmplt_epi16() {
3600         let a = _mm_set1_epi16(0);
3601         let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3602         let r = _mm_cmplt_epi16(a, b);
3603         let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3604         assert_eq_m128i(r, e);
3605     }
3606 
3607     #[simd_test(enable = "sse2")]
test_mm_cmplt_epi32()3608     unsafe fn test_mm_cmplt_epi32() {
3609         let a = _mm_set1_epi32(0);
3610         let b = _mm_set_epi32(5, 0, 0, 0);
3611         let r = _mm_cmplt_epi32(a, b);
3612         assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3613     }
3614 
3615     #[simd_test(enable = "sse2")]
test_mm_cvtepi32_pd()3616     unsafe fn test_mm_cvtepi32_pd() {
3617         let a = _mm_set_epi32(35, 25, 15, 5);
3618         let r = _mm_cvtepi32_pd(a);
3619         assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3620     }
3621 
3622     #[simd_test(enable = "sse2")]
test_mm_cvtsi32_sd()3623     unsafe fn test_mm_cvtsi32_sd() {
3624         let a = _mm_set1_pd(3.5);
3625         let r = _mm_cvtsi32_sd(a, 5);
3626         assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3627     }
3628 
3629     #[simd_test(enable = "sse2")]
test_mm_cvtepi32_ps()3630     unsafe fn test_mm_cvtepi32_ps() {
3631         let a = _mm_setr_epi32(1, 2, 3, 4);
3632         let r = _mm_cvtepi32_ps(a);
3633         assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3634     }
3635 
3636     #[simd_test(enable = "sse2")]
test_mm_cvtps_epi32()3637     unsafe fn test_mm_cvtps_epi32() {
3638         let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3639         let r = _mm_cvtps_epi32(a);
3640         assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3641     }
3642 
3643     #[simd_test(enable = "sse2")]
test_mm_cvtsi32_si128()3644     unsafe fn test_mm_cvtsi32_si128() {
3645         let r = _mm_cvtsi32_si128(5);
3646         assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3647     }
3648 
3649     #[simd_test(enable = "sse2")]
test_mm_cvtsi128_si32()3650     unsafe fn test_mm_cvtsi128_si32() {
3651         let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3652         assert_eq!(r, 5);
3653     }
3654 
3655     #[simd_test(enable = "sse2")]
test_mm_set_epi64x()3656     unsafe fn test_mm_set_epi64x() {
3657         let r = _mm_set_epi64x(0, 1);
3658         assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3659     }
3660 
3661     #[simd_test(enable = "sse2")]
test_mm_set_epi32()3662     unsafe fn test_mm_set_epi32() {
3663         let r = _mm_set_epi32(0, 1, 2, 3);
3664         assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3665     }
3666 
3667     #[simd_test(enable = "sse2")]
test_mm_set_epi16()3668     unsafe fn test_mm_set_epi16() {
3669         let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3670         assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3671     }
3672 
3673     #[simd_test(enable = "sse2")]
test_mm_set_epi8()3674     unsafe fn test_mm_set_epi8() {
3675         #[rustfmt::skip]
3676         let r = _mm_set_epi8(
3677             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3678         );
3679         #[rustfmt::skip]
3680         let e = _mm_setr_epi8(
3681             15, 14, 13, 12, 11, 10, 9, 8,
3682             7, 6, 5, 4, 3, 2, 1, 0,
3683         );
3684         assert_eq_m128i(r, e);
3685     }
3686 
3687     #[simd_test(enable = "sse2")]
test_mm_set1_epi64x()3688     unsafe fn test_mm_set1_epi64x() {
3689         let r = _mm_set1_epi64x(1);
3690         assert_eq_m128i(r, _mm_set1_epi64x(1));
3691     }
3692 
3693     #[simd_test(enable = "sse2")]
test_mm_set1_epi32()3694     unsafe fn test_mm_set1_epi32() {
3695         let r = _mm_set1_epi32(1);
3696         assert_eq_m128i(r, _mm_set1_epi32(1));
3697     }
3698 
3699     #[simd_test(enable = "sse2")]
test_mm_set1_epi16()3700     unsafe fn test_mm_set1_epi16() {
3701         let r = _mm_set1_epi16(1);
3702         assert_eq_m128i(r, _mm_set1_epi16(1));
3703     }
3704 
3705     #[simd_test(enable = "sse2")]
test_mm_set1_epi8()3706     unsafe fn test_mm_set1_epi8() {
3707         let r = _mm_set1_epi8(1);
3708         assert_eq_m128i(r, _mm_set1_epi8(1));
3709     }
3710 
3711     #[simd_test(enable = "sse2")]
test_mm_setr_epi32()3712     unsafe fn test_mm_setr_epi32() {
3713         let r = _mm_setr_epi32(0, 1, 2, 3);
3714         assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3715     }
3716 
3717     #[simd_test(enable = "sse2")]
test_mm_setr_epi16()3718     unsafe fn test_mm_setr_epi16() {
3719         let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3720         assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3721     }
3722 
3723     #[simd_test(enable = "sse2")]
test_mm_setr_epi8()3724     unsafe fn test_mm_setr_epi8() {
3725         #[rustfmt::skip]
3726         let r = _mm_setr_epi8(
3727             0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3728         );
3729         #[rustfmt::skip]
3730         let e = _mm_setr_epi8(
3731             0, 1, 2, 3, 4, 5, 6, 7,
3732             8, 9, 10, 11, 12, 13, 14, 15,
3733         );
3734         assert_eq_m128i(r, e);
3735     }
3736 
3737     #[simd_test(enable = "sse2")]
test_mm_setzero_si128()3738     unsafe fn test_mm_setzero_si128() {
3739         let r = _mm_setzero_si128();
3740         assert_eq_m128i(r, _mm_set1_epi64x(0));
3741     }
3742 
3743     #[simd_test(enable = "sse2")]
test_mm_loadl_epi64()3744     unsafe fn test_mm_loadl_epi64() {
3745         let a = _mm_setr_epi64x(6, 5);
3746         let r = _mm_loadl_epi64(&a as *const _);
3747         assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3748     }
3749 
3750     #[simd_test(enable = "sse2")]
test_mm_load_si128()3751     unsafe fn test_mm_load_si128() {
3752         let a = _mm_set_epi64x(5, 6);
3753         let r = _mm_load_si128(&a as *const _ as *const _);
3754         assert_eq_m128i(a, r);
3755     }
3756 
3757     #[simd_test(enable = "sse2")]
test_mm_loadu_si128()3758     unsafe fn test_mm_loadu_si128() {
3759         let a = _mm_set_epi64x(5, 6);
3760         let r = _mm_loadu_si128(&a as *const _ as *const _);
3761         assert_eq_m128i(a, r);
3762     }
3763 
3764     #[simd_test(enable = "sse2")]
test_mm_maskmoveu_si128()3765     unsafe fn test_mm_maskmoveu_si128() {
3766         let a = _mm_set1_epi8(9);
3767         #[rustfmt::skip]
3768         let mask = _mm_set_epi8(
3769             0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3770             0, 0, 0, 0, 0, 0, 0, 0,
3771         );
3772         let mut r = _mm_set1_epi8(0);
3773         _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3774         let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3775         assert_eq_m128i(r, e);
3776     }
3777 
3778     #[simd_test(enable = "sse2")]
test_mm_store_si128()3779     unsafe fn test_mm_store_si128() {
3780         let a = _mm_set1_epi8(9);
3781         let mut r = _mm_set1_epi8(0);
3782         _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3783         assert_eq_m128i(r, a);
3784     }
3785 
3786     #[simd_test(enable = "sse2")]
test_mm_storeu_si128()3787     unsafe fn test_mm_storeu_si128() {
3788         let a = _mm_set1_epi8(9);
3789         let mut r = _mm_set1_epi8(0);
3790         _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3791         assert_eq_m128i(r, a);
3792     }
3793 
3794     #[simd_test(enable = "sse2")]
test_mm_storel_epi64()3795     unsafe fn test_mm_storel_epi64() {
3796         let a = _mm_setr_epi64x(2, 9);
3797         let mut r = _mm_set1_epi8(0);
3798         _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
3799         assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
3800     }
3801 
3802     #[simd_test(enable = "sse2")]
test_mm_stream_si128()3803     unsafe fn test_mm_stream_si128() {
3804         let a = _mm_setr_epi32(1, 2, 3, 4);
3805         let mut r = _mm_undefined_si128();
3806         _mm_stream_si128(&mut r as *mut _, a);
3807         assert_eq_m128i(r, a);
3808     }
3809 
3810     #[simd_test(enable = "sse2")]
test_mm_stream_si32()3811     unsafe fn test_mm_stream_si32() {
3812         let a: i32 = 7;
3813         let mut mem = boxed::Box::<i32>::new(-1);
3814         _mm_stream_si32(&mut *mem as *mut i32, a);
3815         assert_eq!(a, *mem);
3816     }
3817 
3818     #[simd_test(enable = "sse2")]
test_mm_move_epi64()3819     unsafe fn test_mm_move_epi64() {
3820         let a = _mm_setr_epi64x(5, 6);
3821         let r = _mm_move_epi64(a);
3822         assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
3823     }
3824 
3825     #[simd_test(enable = "sse2")]
test_mm_packs_epi16()3826     unsafe fn test_mm_packs_epi16() {
3827         let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
3828         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
3829         let r = _mm_packs_epi16(a, b);
3830         #[rustfmt::skip]
3831         assert_eq_m128i(
3832             r,
3833             _mm_setr_epi8(
3834                 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
3835             )
3836         );
3837     }
3838 
3839     #[simd_test(enable = "sse2")]
test_mm_packs_epi32()3840     unsafe fn test_mm_packs_epi32() {
3841         let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
3842         let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
3843         let r = _mm_packs_epi32(a, b);
3844         assert_eq_m128i(
3845             r,
3846             _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
3847         );
3848     }
3849 
3850     #[simd_test(enable = "sse2")]
test_mm_packus_epi16()3851     unsafe fn test_mm_packus_epi16() {
3852         let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
3853         let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
3854         let r = _mm_packus_epi16(a, b);
3855         assert_eq_m128i(
3856             r,
3857             _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
3858         );
3859     }
3860 
3861     #[simd_test(enable = "sse2")]
test_mm_extract_epi16()3862     unsafe fn test_mm_extract_epi16() {
3863         let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
3864         let r1 = _mm_extract_epi16::<0>(a);
3865         let r2 = _mm_extract_epi16::<3>(a);
3866         assert_eq!(r1, 0xFFFF);
3867         assert_eq!(r2, 3);
3868     }
3869 
3870     #[simd_test(enable = "sse2")]
test_mm_insert_epi16()3871     unsafe fn test_mm_insert_epi16() {
3872         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3873         let r = _mm_insert_epi16::<0>(a, 9);
3874         let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
3875         assert_eq_m128i(r, e);
3876     }
3877 
3878     #[simd_test(enable = "sse2")]
test_mm_movemask_epi8()3879     unsafe fn test_mm_movemask_epi8() {
3880         #[rustfmt::skip]
3881         let a = _mm_setr_epi8(
3882             0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
3883             0b0101, 0b1111_0000u8 as i8, 0, 0,
3884             0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
3885             0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
3886         );
3887         let r = _mm_movemask_epi8(a);
3888         assert_eq!(r, 0b10100110_00100101);
3889     }
3890 
3891     #[simd_test(enable = "sse2")]
test_mm_shuffle_epi32()3892     unsafe fn test_mm_shuffle_epi32() {
3893         let a = _mm_setr_epi32(5, 10, 15, 20);
3894         let r = _mm_shuffle_epi32::<0b00_01_01_11>(a);
3895         let e = _mm_setr_epi32(20, 10, 10, 5);
3896         assert_eq_m128i(r, e);
3897     }
3898 
3899     #[simd_test(enable = "sse2")]
test_mm_shufflehi_epi16()3900     unsafe fn test_mm_shufflehi_epi16() {
3901         let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
3902         let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a);
3903         let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
3904         assert_eq_m128i(r, e);
3905     }
3906 
3907     #[simd_test(enable = "sse2")]
test_mm_shufflelo_epi16()3908     unsafe fn test_mm_shufflelo_epi16() {
3909         let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
3910         let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a);
3911         let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
3912         assert_eq_m128i(r, e);
3913     }
3914 
3915     #[simd_test(enable = "sse2")]
test_mm_unpackhi_epi8()3916     unsafe fn test_mm_unpackhi_epi8() {
3917         #[rustfmt::skip]
3918         let a = _mm_setr_epi8(
3919             0, 1, 2, 3, 4, 5, 6, 7,
3920             8, 9, 10, 11, 12, 13, 14, 15,
3921         );
3922         #[rustfmt::skip]
3923         let b = _mm_setr_epi8(
3924             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3925         );
3926         let r = _mm_unpackhi_epi8(a, b);
3927         #[rustfmt::skip]
3928         let e = _mm_setr_epi8(
3929             8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
3930         );
3931         assert_eq_m128i(r, e);
3932     }
3933 
3934     #[simd_test(enable = "sse2")]
test_mm_unpackhi_epi16()3935     unsafe fn test_mm_unpackhi_epi16() {
3936         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3937         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3938         let r = _mm_unpackhi_epi16(a, b);
3939         let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
3940         assert_eq_m128i(r, e);
3941     }
3942 
3943     #[simd_test(enable = "sse2")]
test_mm_unpackhi_epi32()3944     unsafe fn test_mm_unpackhi_epi32() {
3945         let a = _mm_setr_epi32(0, 1, 2, 3);
3946         let b = _mm_setr_epi32(4, 5, 6, 7);
3947         let r = _mm_unpackhi_epi32(a, b);
3948         let e = _mm_setr_epi32(2, 6, 3, 7);
3949         assert_eq_m128i(r, e);
3950     }
3951 
3952     #[simd_test(enable = "sse2")]
test_mm_unpackhi_epi64()3953     unsafe fn test_mm_unpackhi_epi64() {
3954         let a = _mm_setr_epi64x(0, 1);
3955         let b = _mm_setr_epi64x(2, 3);
3956         let r = _mm_unpackhi_epi64(a, b);
3957         let e = _mm_setr_epi64x(1, 3);
3958         assert_eq_m128i(r, e);
3959     }
3960 
3961     #[simd_test(enable = "sse2")]
test_mm_unpacklo_epi8()3962     unsafe fn test_mm_unpacklo_epi8() {
3963         #[rustfmt::skip]
3964         let a = _mm_setr_epi8(
3965             0, 1, 2, 3, 4, 5, 6, 7,
3966             8, 9, 10, 11, 12, 13, 14, 15,
3967         );
3968         #[rustfmt::skip]
3969         let b = _mm_setr_epi8(
3970             16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3971         );
3972         let r = _mm_unpacklo_epi8(a, b);
3973         #[rustfmt::skip]
3974         let e = _mm_setr_epi8(
3975             0, 16, 1, 17, 2, 18, 3, 19,
3976             4, 20, 5, 21, 6, 22, 7, 23,
3977         );
3978         assert_eq_m128i(r, e);
3979     }
3980 
3981     #[simd_test(enable = "sse2")]
test_mm_unpacklo_epi16()3982     unsafe fn test_mm_unpacklo_epi16() {
3983         let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3984         let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3985         let r = _mm_unpacklo_epi16(a, b);
3986         let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
3987         assert_eq_m128i(r, e);
3988     }
3989 
3990     #[simd_test(enable = "sse2")]
test_mm_unpacklo_epi32()3991     unsafe fn test_mm_unpacklo_epi32() {
3992         let a = _mm_setr_epi32(0, 1, 2, 3);
3993         let b = _mm_setr_epi32(4, 5, 6, 7);
3994         let r = _mm_unpacklo_epi32(a, b);
3995         let e = _mm_setr_epi32(0, 4, 1, 5);
3996         assert_eq_m128i(r, e);
3997     }
3998 
3999     #[simd_test(enable = "sse2")]
test_mm_unpacklo_epi64()4000     unsafe fn test_mm_unpacklo_epi64() {
4001         let a = _mm_setr_epi64x(0, 1);
4002         let b = _mm_setr_epi64x(2, 3);
4003         let r = _mm_unpacklo_epi64(a, b);
4004         let e = _mm_setr_epi64x(0, 2);
4005         assert_eq_m128i(r, e);
4006     }
4007 
4008     #[simd_test(enable = "sse2")]
test_mm_add_sd()4009     unsafe fn test_mm_add_sd() {
4010         let a = _mm_setr_pd(1.0, 2.0);
4011         let b = _mm_setr_pd(5.0, 10.0);
4012         let r = _mm_add_sd(a, b);
4013         assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4014     }
4015 
4016     #[simd_test(enable = "sse2")]
test_mm_add_pd()4017     unsafe fn test_mm_add_pd() {
4018         let a = _mm_setr_pd(1.0, 2.0);
4019         let b = _mm_setr_pd(5.0, 10.0);
4020         let r = _mm_add_pd(a, b);
4021         assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4022     }
4023 
4024     #[simd_test(enable = "sse2")]
test_mm_div_sd()4025     unsafe fn test_mm_div_sd() {
4026         let a = _mm_setr_pd(1.0, 2.0);
4027         let b = _mm_setr_pd(5.0, 10.0);
4028         let r = _mm_div_sd(a, b);
4029         assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4030     }
4031 
4032     #[simd_test(enable = "sse2")]
test_mm_div_pd()4033     unsafe fn test_mm_div_pd() {
4034         let a = _mm_setr_pd(1.0, 2.0);
4035         let b = _mm_setr_pd(5.0, 10.0);
4036         let r = _mm_div_pd(a, b);
4037         assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4038     }
4039 
4040     #[simd_test(enable = "sse2")]
test_mm_max_sd()4041     unsafe fn test_mm_max_sd() {
4042         let a = _mm_setr_pd(1.0, 2.0);
4043         let b = _mm_setr_pd(5.0, 10.0);
4044         let r = _mm_max_sd(a, b);
4045         assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4046     }
4047 
4048     #[simd_test(enable = "sse2")]
test_mm_max_pd()4049     unsafe fn test_mm_max_pd() {
4050         let a = _mm_setr_pd(1.0, 2.0);
4051         let b = _mm_setr_pd(5.0, 10.0);
4052         let r = _mm_max_pd(a, b);
4053         assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4054     }
4055 
4056     #[simd_test(enable = "sse2")]
test_mm_min_sd()4057     unsafe fn test_mm_min_sd() {
4058         let a = _mm_setr_pd(1.0, 2.0);
4059         let b = _mm_setr_pd(5.0, 10.0);
4060         let r = _mm_min_sd(a, b);
4061         assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4062     }
4063 
4064     #[simd_test(enable = "sse2")]
test_mm_min_pd()4065     unsafe fn test_mm_min_pd() {
4066         let a = _mm_setr_pd(1.0, 2.0);
4067         let b = _mm_setr_pd(5.0, 10.0);
4068         let r = _mm_min_pd(a, b);
4069         assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4070     }
4071 
4072     #[simd_test(enable = "sse2")]
test_mm_mul_sd()4073     unsafe fn test_mm_mul_sd() {
4074         let a = _mm_setr_pd(1.0, 2.0);
4075         let b = _mm_setr_pd(5.0, 10.0);
4076         let r = _mm_mul_sd(a, b);
4077         assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4078     }
4079 
4080     #[simd_test(enable = "sse2")]
test_mm_mul_pd()4081     unsafe fn test_mm_mul_pd() {
4082         let a = _mm_setr_pd(1.0, 2.0);
4083         let b = _mm_setr_pd(5.0, 10.0);
4084         let r = _mm_mul_pd(a, b);
4085         assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4086     }
4087 
4088     #[simd_test(enable = "sse2")]
test_mm_sqrt_sd()4089     unsafe fn test_mm_sqrt_sd() {
4090         let a = _mm_setr_pd(1.0, 2.0);
4091         let b = _mm_setr_pd(5.0, 10.0);
4092         let r = _mm_sqrt_sd(a, b);
4093         assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4094     }
4095 
4096     #[simd_test(enable = "sse2")]
test_mm_sqrt_pd()4097     unsafe fn test_mm_sqrt_pd() {
4098         let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4099         assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4100     }
4101 
4102     #[simd_test(enable = "sse2")]
test_mm_sub_sd()4103     unsafe fn test_mm_sub_sd() {
4104         let a = _mm_setr_pd(1.0, 2.0);
4105         let b = _mm_setr_pd(5.0, 10.0);
4106         let r = _mm_sub_sd(a, b);
4107         assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4108     }
4109 
4110     #[simd_test(enable = "sse2")]
test_mm_sub_pd()4111     unsafe fn test_mm_sub_pd() {
4112         let a = _mm_setr_pd(1.0, 2.0);
4113         let b = _mm_setr_pd(5.0, 10.0);
4114         let r = _mm_sub_pd(a, b);
4115         assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4116     }
4117 
4118     #[simd_test(enable = "sse2")]
test_mm_and_pd()4119     unsafe fn test_mm_and_pd() {
4120         let a = transmute(u64x2::splat(5));
4121         let b = transmute(u64x2::splat(3));
4122         let r = _mm_and_pd(a, b);
4123         let e = transmute(u64x2::splat(1));
4124         assert_eq_m128d(r, e);
4125     }
4126 
4127     #[simd_test(enable = "sse2")]
test_mm_andnot_pd()4128     unsafe fn test_mm_andnot_pd() {
4129         let a = transmute(u64x2::splat(5));
4130         let b = transmute(u64x2::splat(3));
4131         let r = _mm_andnot_pd(a, b);
4132         let e = transmute(u64x2::splat(2));
4133         assert_eq_m128d(r, e);
4134     }
4135 
4136     #[simd_test(enable = "sse2")]
test_mm_or_pd()4137     unsafe fn test_mm_or_pd() {
4138         let a = transmute(u64x2::splat(5));
4139         let b = transmute(u64x2::splat(3));
4140         let r = _mm_or_pd(a, b);
4141         let e = transmute(u64x2::splat(7));
4142         assert_eq_m128d(r, e);
4143     }
4144 
4145     #[simd_test(enable = "sse2")]
test_mm_xor_pd()4146     unsafe fn test_mm_xor_pd() {
4147         let a = transmute(u64x2::splat(5));
4148         let b = transmute(u64x2::splat(3));
4149         let r = _mm_xor_pd(a, b);
4150         let e = transmute(u64x2::splat(6));
4151         assert_eq_m128d(r, e);
4152     }
4153 
4154     #[simd_test(enable = "sse2")]
test_mm_cmpeq_sd()4155     unsafe fn test_mm_cmpeq_sd() {
4156         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4157         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4158         let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4159         assert_eq_m128i(r, e);
4160     }
4161 
4162     #[simd_test(enable = "sse2")]
test_mm_cmplt_sd()4163     unsafe fn test_mm_cmplt_sd() {
4164         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4165         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4166         let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4167         assert_eq_m128i(r, e);
4168     }
4169 
4170     #[simd_test(enable = "sse2")]
test_mm_cmple_sd()4171     unsafe fn test_mm_cmple_sd() {
4172         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4173         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4174         let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4175         assert_eq_m128i(r, e);
4176     }
4177 
4178     #[simd_test(enable = "sse2")]
test_mm_cmpgt_sd()4179     unsafe fn test_mm_cmpgt_sd() {
4180         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4181         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4182         let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4183         assert_eq_m128i(r, e);
4184     }
4185 
4186     #[simd_test(enable = "sse2")]
test_mm_cmpge_sd()4187     unsafe fn test_mm_cmpge_sd() {
4188         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4189         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4190         let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4191         assert_eq_m128i(r, e);
4192     }
4193 
4194     #[simd_test(enable = "sse2")]
test_mm_cmpord_sd()4195     unsafe fn test_mm_cmpord_sd() {
4196         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4197         let e = _mm_setr_epi64x(0, transmute(2.0f64));
4198         let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4199         assert_eq_m128i(r, e);
4200     }
4201 
4202     #[simd_test(enable = "sse2")]
test_mm_cmpunord_sd()4203     unsafe fn test_mm_cmpunord_sd() {
4204         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4205         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4206         let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4207         assert_eq_m128i(r, e);
4208     }
4209 
4210     #[simd_test(enable = "sse2")]
test_mm_cmpneq_sd()4211     unsafe fn test_mm_cmpneq_sd() {
4212         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4213         let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4214         let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4215         assert_eq_m128i(r, e);
4216     }
4217 
4218     #[simd_test(enable = "sse2")]
test_mm_cmpnlt_sd()4219     unsafe fn test_mm_cmpnlt_sd() {
4220         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4221         let e = _mm_setr_epi64x(0, transmute(2.0f64));
4222         let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4223         assert_eq_m128i(r, e);
4224     }
4225 
4226     #[simd_test(enable = "sse2")]
test_mm_cmpnle_sd()4227     unsafe fn test_mm_cmpnle_sd() {
4228         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4229         let e = _mm_setr_epi64x(0, transmute(2.0f64));
4230         let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4231         assert_eq_m128i(r, e);
4232     }
4233 
4234     #[simd_test(enable = "sse2")]
test_mm_cmpngt_sd()4235     unsafe fn test_mm_cmpngt_sd() {
4236         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4237         let e = _mm_setr_epi64x(0, transmute(2.0f64));
4238         let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4239         assert_eq_m128i(r, e);
4240     }
4241 
4242     #[simd_test(enable = "sse2")]
test_mm_cmpnge_sd()4243     unsafe fn test_mm_cmpnge_sd() {
4244         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4245         let e = _mm_setr_epi64x(0, transmute(2.0f64));
4246         let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4247         assert_eq_m128i(r, e);
4248     }
4249 
4250     #[simd_test(enable = "sse2")]
test_mm_cmpeq_pd()4251     unsafe fn test_mm_cmpeq_pd() {
4252         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4253         let e = _mm_setr_epi64x(!0, 0);
4254         let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4255         assert_eq_m128i(r, e);
4256     }
4257 
4258     #[simd_test(enable = "sse2")]
test_mm_cmplt_pd()4259     unsafe fn test_mm_cmplt_pd() {
4260         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4261         let e = _mm_setr_epi64x(0, !0);
4262         let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4263         assert_eq_m128i(r, e);
4264     }
4265 
4266     #[simd_test(enable = "sse2")]
test_mm_cmple_pd()4267     unsafe fn test_mm_cmple_pd() {
4268         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4269         let e = _mm_setr_epi64x(!0, !0);
4270         let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4271         assert_eq_m128i(r, e);
4272     }
4273 
4274     #[simd_test(enable = "sse2")]
test_mm_cmpgt_pd()4275     unsafe fn test_mm_cmpgt_pd() {
4276         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4277         let e = _mm_setr_epi64x(0, 0);
4278         let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4279         assert_eq_m128i(r, e);
4280     }
4281 
4282     #[simd_test(enable = "sse2")]
test_mm_cmpge_pd()4283     unsafe fn test_mm_cmpge_pd() {
4284         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4285         let e = _mm_setr_epi64x(!0, 0);
4286         let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4287         assert_eq_m128i(r, e);
4288     }
4289 
4290     #[simd_test(enable = "sse2")]
test_mm_cmpord_pd()4291     unsafe fn test_mm_cmpord_pd() {
4292         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4293         let e = _mm_setr_epi64x(0, !0);
4294         let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4295         assert_eq_m128i(r, e);
4296     }
4297 
4298     #[simd_test(enable = "sse2")]
test_mm_cmpunord_pd()4299     unsafe fn test_mm_cmpunord_pd() {
4300         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4301         let e = _mm_setr_epi64x(!0, 0);
4302         let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4303         assert_eq_m128i(r, e);
4304     }
4305 
4306     #[simd_test(enable = "sse2")]
test_mm_cmpneq_pd()4307     unsafe fn test_mm_cmpneq_pd() {
4308         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4309         let e = _mm_setr_epi64x(!0, !0);
4310         let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4311         assert_eq_m128i(r, e);
4312     }
4313 
4314     #[simd_test(enable = "sse2")]
test_mm_cmpnlt_pd()4315     unsafe fn test_mm_cmpnlt_pd() {
4316         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4317         let e = _mm_setr_epi64x(0, 0);
4318         let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4319         assert_eq_m128i(r, e);
4320     }
4321 
4322     #[simd_test(enable = "sse2")]
test_mm_cmpnle_pd()4323     unsafe fn test_mm_cmpnle_pd() {
4324         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4325         let e = _mm_setr_epi64x(0, 0);
4326         let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4327         assert_eq_m128i(r, e);
4328     }
4329 
4330     #[simd_test(enable = "sse2")]
test_mm_cmpngt_pd()4331     unsafe fn test_mm_cmpngt_pd() {
4332         let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4333         let e = _mm_setr_epi64x(0, !0);
4334         let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4335         assert_eq_m128i(r, e);
4336     }
4337 
4338     #[simd_test(enable = "sse2")]
test_mm_cmpnge_pd()4339     unsafe fn test_mm_cmpnge_pd() {
4340         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4341         let e = _mm_setr_epi64x(0, !0);
4342         let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4343         assert_eq_m128i(r, e);
4344     }
4345 
4346     #[simd_test(enable = "sse2")]
test_mm_comieq_sd()4347     unsafe fn test_mm_comieq_sd() {
4348         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4349         assert!(_mm_comieq_sd(a, b) != 0);
4350 
4351         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4352         assert!(_mm_comieq_sd(a, b) == 0);
4353     }
4354 
4355     #[simd_test(enable = "sse2")]
test_mm_comilt_sd()4356     unsafe fn test_mm_comilt_sd() {
4357         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4358         assert!(_mm_comilt_sd(a, b) == 0);
4359     }
4360 
4361     #[simd_test(enable = "sse2")]
test_mm_comile_sd()4362     unsafe fn test_mm_comile_sd() {
4363         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4364         assert!(_mm_comile_sd(a, b) != 0);
4365     }
4366 
4367     #[simd_test(enable = "sse2")]
test_mm_comigt_sd()4368     unsafe fn test_mm_comigt_sd() {
4369         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4370         assert!(_mm_comigt_sd(a, b) == 0);
4371     }
4372 
4373     #[simd_test(enable = "sse2")]
test_mm_comige_sd()4374     unsafe fn test_mm_comige_sd() {
4375         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4376         assert!(_mm_comige_sd(a, b) != 0);
4377     }
4378 
4379     #[simd_test(enable = "sse2")]
test_mm_comineq_sd()4380     unsafe fn test_mm_comineq_sd() {
4381         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4382         assert!(_mm_comineq_sd(a, b) == 0);
4383     }
4384 
4385     #[simd_test(enable = "sse2")]
test_mm_ucomieq_sd()4386     unsafe fn test_mm_ucomieq_sd() {
4387         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4388         assert!(_mm_ucomieq_sd(a, b) != 0);
4389 
4390         let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4391         assert!(_mm_ucomieq_sd(a, b) == 0);
4392     }
4393 
4394     #[simd_test(enable = "sse2")]
test_mm_ucomilt_sd()4395     unsafe fn test_mm_ucomilt_sd() {
4396         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4397         assert!(_mm_ucomilt_sd(a, b) == 0);
4398     }
4399 
4400     #[simd_test(enable = "sse2")]
test_mm_ucomile_sd()4401     unsafe fn test_mm_ucomile_sd() {
4402         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4403         assert!(_mm_ucomile_sd(a, b) != 0);
4404     }
4405 
4406     #[simd_test(enable = "sse2")]
test_mm_ucomigt_sd()4407     unsafe fn test_mm_ucomigt_sd() {
4408         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4409         assert!(_mm_ucomigt_sd(a, b) == 0);
4410     }
4411 
4412     #[simd_test(enable = "sse2")]
test_mm_ucomige_sd()4413     unsafe fn test_mm_ucomige_sd() {
4414         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4415         assert!(_mm_ucomige_sd(a, b) != 0);
4416     }
4417 
4418     #[simd_test(enable = "sse2")]
test_mm_ucomineq_sd()4419     unsafe fn test_mm_ucomineq_sd() {
4420         let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4421         assert!(_mm_ucomineq_sd(a, b) == 0);
4422     }
4423 
4424     #[simd_test(enable = "sse2")]
test_mm_movemask_pd()4425     unsafe fn test_mm_movemask_pd() {
4426         let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4427         assert_eq!(r, 0b01);
4428 
4429         let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4430         assert_eq!(r, 0b11);
4431     }
4432 
4433     #[repr(align(16))]
4434     struct Memory {
4435         data: [f64; 4],
4436     }
4437 
4438     #[simd_test(enable = "sse2")]
test_mm_load_pd()4439     unsafe fn test_mm_load_pd() {
4440         let mem = Memory {
4441             data: [1.0f64, 2.0, 3.0, 4.0],
4442         };
4443         let vals = &mem.data;
4444         let d = vals.as_ptr();
4445 
4446         let r = _mm_load_pd(d);
4447         assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4448     }
4449 
4450     #[simd_test(enable = "sse2")]
test_mm_load_sd()4451     unsafe fn test_mm_load_sd() {
4452         let a = 1.;
4453         let expected = _mm_setr_pd(a, 0.);
4454         let r = _mm_load_sd(&a);
4455         assert_eq_m128d(r, expected);
4456     }
4457 
4458     #[simd_test(enable = "sse2")]
test_mm_loadh_pd()4459     unsafe fn test_mm_loadh_pd() {
4460         let a = _mm_setr_pd(1., 2.);
4461         let b = 3.;
4462         let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4463         let r = _mm_loadh_pd(a, &b);
4464         assert_eq_m128d(r, expected);
4465     }
4466 
4467     #[simd_test(enable = "sse2")]
test_mm_loadl_pd()4468     unsafe fn test_mm_loadl_pd() {
4469         let a = _mm_setr_pd(1., 2.);
4470         let b = 3.;
4471         let expected = _mm_setr_pd(3., get_m128d(a, 1));
4472         let r = _mm_loadl_pd(a, &b);
4473         assert_eq_m128d(r, expected);
4474     }
4475 
4476     #[simd_test(enable = "sse2")]
test_mm_stream_pd()4477     unsafe fn test_mm_stream_pd() {
4478         #[repr(align(128))]
4479         struct Memory {
4480             pub data: [f64; 2],
4481         }
4482         let a = _mm_set1_pd(7.0);
4483         let mut mem = Memory { data: [-1.0; 2] };
4484 
4485         _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
4486         for i in 0..2 {
4487             assert_eq!(mem.data[i], get_m128d(a, i));
4488         }
4489     }
4490 
4491     #[simd_test(enable = "sse2")]
test_mm_store_sd()4492     unsafe fn test_mm_store_sd() {
4493         let mut dest = 0.;
4494         let a = _mm_setr_pd(1., 2.);
4495         _mm_store_sd(&mut dest, a);
4496         assert_eq!(dest, _mm_cvtsd_f64(a));
4497     }
4498 
4499     #[simd_test(enable = "sse2")]
test_mm_store_pd()4500     unsafe fn test_mm_store_pd() {
4501         let mut mem = Memory { data: [0.0f64; 4] };
4502         let vals = &mut mem.data;
4503         let a = _mm_setr_pd(1.0, 2.0);
4504         let d = vals.as_mut_ptr();
4505 
4506         _mm_store_pd(d, *black_box(&a));
4507         assert_eq!(vals[0], 1.0);
4508         assert_eq!(vals[1], 2.0);
4509     }
4510 
4511     #[simd_test(enable = "sse2")]
test_mm_storeu_pd()4512     unsafe fn test_mm_storeu_pd() {
4513         let mut mem = Memory { data: [0.0f64; 4] };
4514         let vals = &mut mem.data;
4515         let a = _mm_setr_pd(1.0, 2.0);
4516 
4517         let mut ofs = 0;
4518         let mut p = vals.as_mut_ptr();
4519 
4520         // Make sure p is **not** aligned to 16-byte boundary
4521         if (p as usize) & 0xf == 0 {
4522             ofs = 1;
4523             p = p.offset(1);
4524         }
4525 
4526         _mm_storeu_pd(p, *black_box(&a));
4527 
4528         if ofs > 0 {
4529             assert_eq!(vals[ofs - 1], 0.0);
4530         }
4531         assert_eq!(vals[ofs + 0], 1.0);
4532         assert_eq!(vals[ofs + 1], 2.0);
4533     }
4534 
4535     #[simd_test(enable = "sse2")]
test_mm_store1_pd()4536     unsafe fn test_mm_store1_pd() {
4537         let mut mem = Memory { data: [0.0f64; 4] };
4538         let vals = &mut mem.data;
4539         let a = _mm_setr_pd(1.0, 2.0);
4540         let d = vals.as_mut_ptr();
4541 
4542         _mm_store1_pd(d, *black_box(&a));
4543         assert_eq!(vals[0], 1.0);
4544         assert_eq!(vals[1], 1.0);
4545     }
4546 
4547     #[simd_test(enable = "sse2")]
test_mm_store_pd1()4548     unsafe fn test_mm_store_pd1() {
4549         let mut mem = Memory { data: [0.0f64; 4] };
4550         let vals = &mut mem.data;
4551         let a = _mm_setr_pd(1.0, 2.0);
4552         let d = vals.as_mut_ptr();
4553 
4554         _mm_store_pd1(d, *black_box(&a));
4555         assert_eq!(vals[0], 1.0);
4556         assert_eq!(vals[1], 1.0);
4557     }
4558 
4559     #[simd_test(enable = "sse2")]
test_mm_storer_pd()4560     unsafe fn test_mm_storer_pd() {
4561         let mut mem = Memory { data: [0.0f64; 4] };
4562         let vals = &mut mem.data;
4563         let a = _mm_setr_pd(1.0, 2.0);
4564         let d = vals.as_mut_ptr();
4565 
4566         _mm_storer_pd(d, *black_box(&a));
4567         assert_eq!(vals[0], 2.0);
4568         assert_eq!(vals[1], 1.0);
4569     }
4570 
4571     #[simd_test(enable = "sse2")]
test_mm_storeh_pd()4572     unsafe fn test_mm_storeh_pd() {
4573         let mut dest = 0.;
4574         let a = _mm_setr_pd(1., 2.);
4575         _mm_storeh_pd(&mut dest, a);
4576         assert_eq!(dest, get_m128d(a, 1));
4577     }
4578 
4579     #[simd_test(enable = "sse2")]
test_mm_storel_pd()4580     unsafe fn test_mm_storel_pd() {
4581         let mut dest = 0.;
4582         let a = _mm_setr_pd(1., 2.);
4583         _mm_storel_pd(&mut dest, a);
4584         assert_eq!(dest, _mm_cvtsd_f64(a));
4585     }
4586 
4587     #[simd_test(enable = "sse2")]
test_mm_loadr_pd()4588     unsafe fn test_mm_loadr_pd() {
4589         let mut mem = Memory {
4590             data: [1.0f64, 2.0, 3.0, 4.0],
4591         };
4592         let vals = &mut mem.data;
4593         let d = vals.as_ptr();
4594 
4595         let r = _mm_loadr_pd(d);
4596         assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4597     }
4598 
4599     #[simd_test(enable = "sse2")]
test_mm_loadu_pd()4600     unsafe fn test_mm_loadu_pd() {
4601         let mut mem = Memory {
4602             data: [1.0f64, 2.0, 3.0, 4.0],
4603         };
4604         let vals = &mut mem.data;
4605         let mut d = vals.as_ptr();
4606 
4607         // make sure d is not aligned to 16-byte boundary
4608         let mut offset = 0;
4609         if (d as usize) & 0xf == 0 {
4610             offset = 1;
4611             d = d.offset(offset as isize);
4612         }
4613 
4614         let r = _mm_loadu_pd(d);
4615         let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4616         assert_eq_m128d(r, e);
4617     }
4618 
4619     #[simd_test(enable = "sse2")]
test_mm_cvtpd_ps()4620     unsafe fn test_mm_cvtpd_ps() {
4621         let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4622         assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4623 
4624         let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4625         assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4626 
4627         let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4628         assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4629 
4630         let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4631         assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4632     }
4633 
4634     #[simd_test(enable = "sse2")]
test_mm_cvtps_pd()4635     unsafe fn test_mm_cvtps_pd() {
4636         let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4637         assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4638 
4639         let r = _mm_cvtps_pd(_mm_setr_ps(
4640             f32::MAX,
4641             f32::INFINITY,
4642             f32::NEG_INFINITY,
4643             f32::MIN,
4644         ));
4645         assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4646     }
4647 
4648     #[simd_test(enable = "sse2")]
test_mm_cvtpd_epi32()4649     unsafe fn test_mm_cvtpd_epi32() {
4650         let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4651         assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4652 
4653         let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4654         assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4655 
4656         let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4657         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4658 
4659         let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4660         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4661 
4662         let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4663         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4664     }
4665 
4666     #[simd_test(enable = "sse2")]
test_mm_cvtsd_si32()4667     unsafe fn test_mm_cvtsd_si32() {
4668         let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4669         assert_eq!(r, -2);
4670 
4671         let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4672         assert_eq!(r, i32::MIN);
4673 
4674         let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4675         assert_eq!(r, i32::MIN);
4676     }
4677 
4678     #[simd_test(enable = "sse2")]
test_mm_cvtsd_ss()4679     unsafe fn test_mm_cvtsd_ss() {
4680         let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4681         let b = _mm_setr_pd(2.0, -5.0);
4682 
4683         let r = _mm_cvtsd_ss(a, b);
4684 
4685         assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4686 
4687         let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4688         let b = _mm_setr_pd(f64::INFINITY, -5.0);
4689 
4690         let r = _mm_cvtsd_ss(a, b);
4691 
4692         assert_eq_m128(
4693             r,
4694             _mm_setr_ps(
4695                 f32::INFINITY,
4696                 f32::NEG_INFINITY,
4697                 f32::MAX,
4698                 f32::NEG_INFINITY,
4699             ),
4700         );
4701     }
4702 
4703     #[simd_test(enable = "sse2")]
test_mm_cvtsd_f64()4704     unsafe fn test_mm_cvtsd_f64() {
4705         let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4706         assert_eq!(r, -1.1);
4707     }
4708 
4709     #[simd_test(enable = "sse2")]
test_mm_cvtss_sd()4710     unsafe fn test_mm_cvtss_sd() {
4711         let a = _mm_setr_pd(-1.1, 2.2);
4712         let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4713 
4714         let r = _mm_cvtss_sd(a, b);
4715         assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4716 
4717         let a = _mm_setr_pd(-1.1, f64::INFINITY);
4718         let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4719 
4720         let r = _mm_cvtss_sd(a, b);
4721         assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4722     }
4723 
4724     #[simd_test(enable = "sse2")]
test_mm_cvttpd_epi32()4725     unsafe fn test_mm_cvttpd_epi32() {
4726         let a = _mm_setr_pd(-1.1, 2.2);
4727         let r = _mm_cvttpd_epi32(a);
4728         assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4729 
4730         let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4731         let r = _mm_cvttpd_epi32(a);
4732         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4733     }
4734 
4735     #[simd_test(enable = "sse2")]
test_mm_cvttsd_si32()4736     unsafe fn test_mm_cvttsd_si32() {
4737         let a = _mm_setr_pd(-1.1, 2.2);
4738         let r = _mm_cvttsd_si32(a);
4739         assert_eq!(r, -1);
4740 
4741         let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4742         let r = _mm_cvttsd_si32(a);
4743         assert_eq!(r, i32::MIN);
4744     }
4745 
4746     #[simd_test(enable = "sse2")]
test_mm_cvttps_epi32()4747     unsafe fn test_mm_cvttps_epi32() {
4748         let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4749         let r = _mm_cvttps_epi32(a);
4750         assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4751 
4752         let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4753         let r = _mm_cvttps_epi32(a);
4754         assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4755     }
4756 
4757     #[simd_test(enable = "sse2")]
test_mm_set_sd()4758     unsafe fn test_mm_set_sd() {
4759         let r = _mm_set_sd(-1.0_f64);
4760         assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4761     }
4762 
4763     #[simd_test(enable = "sse2")]
test_mm_set1_pd()4764     unsafe fn test_mm_set1_pd() {
4765         let r = _mm_set1_pd(-1.0_f64);
4766         assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4767     }
4768 
4769     #[simd_test(enable = "sse2")]
test_mm_set_pd1()4770     unsafe fn test_mm_set_pd1() {
4771         let r = _mm_set_pd1(-2.0_f64);
4772         assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4773     }
4774 
4775     #[simd_test(enable = "sse2")]
test_mm_set_pd()4776     unsafe fn test_mm_set_pd() {
4777         let r = _mm_set_pd(1.0_f64, 5.0_f64);
4778         assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4779     }
4780 
4781     #[simd_test(enable = "sse2")]
test_mm_setr_pd()4782     unsafe fn test_mm_setr_pd() {
4783         let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4784         assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4785     }
4786 
4787     #[simd_test(enable = "sse2")]
test_mm_setzero_pd()4788     unsafe fn test_mm_setzero_pd() {
4789         let r = _mm_setzero_pd();
4790         assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4791     }
4792 
4793     #[simd_test(enable = "sse2")]
test_mm_load1_pd()4794     unsafe fn test_mm_load1_pd() {
4795         let d = -5.0;
4796         let r = _mm_load1_pd(&d);
4797         assert_eq_m128d(r, _mm_setr_pd(d, d));
4798     }
4799 
4800     #[simd_test(enable = "sse2")]
test_mm_load_pd1()4801     unsafe fn test_mm_load_pd1() {
4802         let d = -5.0;
4803         let r = _mm_load_pd1(&d);
4804         assert_eq_m128d(r, _mm_setr_pd(d, d));
4805     }
4806 
4807     #[simd_test(enable = "sse2")]
test_mm_unpackhi_pd()4808     unsafe fn test_mm_unpackhi_pd() {
4809         let a = _mm_setr_pd(1.0, 2.0);
4810         let b = _mm_setr_pd(3.0, 4.0);
4811         let r = _mm_unpackhi_pd(a, b);
4812         assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
4813     }
4814 
4815     #[simd_test(enable = "sse2")]
test_mm_unpacklo_pd()4816     unsafe fn test_mm_unpacklo_pd() {
4817         let a = _mm_setr_pd(1.0, 2.0);
4818         let b = _mm_setr_pd(3.0, 4.0);
4819         let r = _mm_unpacklo_pd(a, b);
4820         assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
4821     }
4822 
4823     #[simd_test(enable = "sse2")]
test_mm_shuffle_pd()4824     unsafe fn test_mm_shuffle_pd() {
4825         let a = _mm_setr_pd(1., 2.);
4826         let b = _mm_setr_pd(3., 4.);
4827         let expected = _mm_setr_pd(1., 3.);
4828         let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b);
4829         assert_eq_m128d(r, expected);
4830     }
4831 
4832     #[simd_test(enable = "sse2")]
test_mm_move_sd()4833     unsafe fn test_mm_move_sd() {
4834         let a = _mm_setr_pd(1., 2.);
4835         let b = _mm_setr_pd(3., 4.);
4836         let expected = _mm_setr_pd(3., 2.);
4837         let r = _mm_move_sd(a, b);
4838         assert_eq_m128d(r, expected);
4839     }
4840 
4841     #[simd_test(enable = "sse2")]
test_mm_castpd_ps()4842     unsafe fn test_mm_castpd_ps() {
4843         let a = _mm_set1_pd(0.);
4844         let expected = _mm_set1_ps(0.);
4845         let r = _mm_castpd_ps(a);
4846         assert_eq_m128(r, expected);
4847     }
4848 
4849     #[simd_test(enable = "sse2")]
test_mm_castpd_si128()4850     unsafe fn test_mm_castpd_si128() {
4851         let a = _mm_set1_pd(0.);
4852         let expected = _mm_set1_epi64x(0);
4853         let r = _mm_castpd_si128(a);
4854         assert_eq_m128i(r, expected);
4855     }
4856 
4857     #[simd_test(enable = "sse2")]
test_mm_castps_pd()4858     unsafe fn test_mm_castps_pd() {
4859         let a = _mm_set1_ps(0.);
4860         let expected = _mm_set1_pd(0.);
4861         let r = _mm_castps_pd(a);
4862         assert_eq_m128d(r, expected);
4863     }
4864 
4865     #[simd_test(enable = "sse2")]
test_mm_castps_si128()4866     unsafe fn test_mm_castps_si128() {
4867         let a = _mm_set1_ps(0.);
4868         let expected = _mm_set1_epi32(0);
4869         let r = _mm_castps_si128(a);
4870         assert_eq_m128i(r, expected);
4871     }
4872 
4873     #[simd_test(enable = "sse2")]
test_mm_castsi128_pd()4874     unsafe fn test_mm_castsi128_pd() {
4875         let a = _mm_set1_epi64x(0);
4876         let expected = _mm_set1_pd(0.);
4877         let r = _mm_castsi128_pd(a);
4878         assert_eq_m128d(r, expected);
4879     }
4880 
4881     #[simd_test(enable = "sse2")]
test_mm_castsi128_ps()4882     unsafe fn test_mm_castsi128_ps() {
4883         let a = _mm_set1_epi32(0);
4884         let expected = _mm_set1_ps(0.);
4885         let r = _mm_castsi128_ps(a);
4886         assert_eq_m128(r, expected);
4887     }
4888 }
4889