1 #![cfg(target_feature = "sse2")]
2 
3 use super::*;
4 
5 /// Lanewise `a + b` with lanes as `i8`.
6 /// ```
7 /// # use safe_arch::*;
8 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
9 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
10 /// let c: [i8; 16] = add_i8_m128i(a, b).into();
11 /// assert_eq!(c, [0, 12, 4, 16, 8, 20, 12, 24, 16, 28, -10, 32, 34, -10, 38, -114]);
12 /// ```
13 #[must_use]
14 #[inline(always)]
15 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_i8_m128i(a: m128i, b: m128i) -> m128i16 pub fn add_i8_m128i(a: m128i, b: m128i) -> m128i {
17   m128i(unsafe { _mm_add_epi8(a.0, b.0) })
18 }
19 
20 /// Lanewise `a + b` with lanes as `i16`.
21 /// ```
22 /// # use safe_arch::*;
23 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
24 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
25 /// let c: [i16; 8] = add_i16_m128i(a, b).into();
26 /// assert_eq!(c, [6, 8, 10, 12, -16, -28, -40, 44]);
27 /// ```
28 #[must_use]
29 #[inline(always)]
30 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_i16_m128i(a: m128i, b: m128i) -> m128i31 pub fn add_i16_m128i(a: m128i, b: m128i) -> m128i {
32   m128i(unsafe { _mm_add_epi16(a.0, b.0) })
33 }
34 
35 /// Lanewise `a + b` with lanes as `i32`.
36 /// ```
37 /// # use safe_arch::*;
38 /// let a = m128i::from([1, 2, 3, 4]);
39 /// let b = m128i::from([5, 6, 7, 8]);
40 /// let c: [i32; 4] = add_i32_m128i(a, b).into();
41 /// assert_eq!(c, [6, 8, 10, 12]);
42 /// ```
43 #[must_use]
44 #[inline(always)]
45 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_i32_m128i(a: m128i, b: m128i) -> m128i46 pub fn add_i32_m128i(a: m128i, b: m128i) -> m128i {
47   m128i(unsafe { _mm_add_epi32(a.0, b.0) })
48 }
49 
50 /// Lanewise `a + b` with lanes as `i64`.
51 /// ```
52 /// # use safe_arch::*;
53 /// let a = m128i::from([92_i64, 87]);
54 /// let b = m128i::from([-9001_i64, 1]);
55 /// let c: [i64; 2] = add_i64_m128i(a, b).into();
56 /// assert_eq!(c, [-8909, 88]);
57 /// ```
58 #[must_use]
59 #[inline(always)]
60 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_i64_m128i(a: m128i, b: m128i) -> m128i61 pub fn add_i64_m128i(a: m128i, b: m128i) -> m128i {
62   m128i(unsafe { _mm_add_epi64(a.0, b.0) })
63 }
64 
65 /// Lanewise `a + b`.
66 /// ```
67 /// # use safe_arch::*;
68 /// let a = m128d::from_array([92.0, 87.5]);
69 /// let b = m128d::from_array([100.0, -6.0]);
70 /// let c = add_m128d(a, b).to_array();
71 /// assert_eq!(c, [192.0, 81.5]);
72 /// ```
73 #[must_use]
74 #[inline(always)]
75 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_m128d(a: m128d, b: m128d) -> m128d76 pub fn add_m128d(a: m128d, b: m128d) -> m128d {
77   m128d(unsafe { _mm_add_pd(a.0, b.0) })
78 }
79 
80 /// Lowest lane `a + b`, high lane unchanged.
81 /// ```
82 /// # use safe_arch::*;
83 /// let a = m128d::from_array([92.0, 87.5]);
84 /// let b = m128d::from_array([100.0, -600.0]);
85 /// let c = add_m128d_s(a, b).to_array();
86 /// assert_eq!(c, [192.0, 87.5]);
87 /// ```
88 #[must_use]
89 #[inline(always)]
90 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_m128d_s(a: m128d, b: m128d) -> m128d91 pub fn add_m128d_s(a: m128d, b: m128d) -> m128d {
92   m128d(unsafe { _mm_add_sd(a.0, b.0) })
93 }
94 
95 /// Lanewise saturating `a + b` with lanes as `i8`.
96 /// ```
97 /// # use safe_arch::*;
98 /// let a = m128i::from([
99 ///   i8::MAX, i8::MIN, 3, 4, -1, -2, -3, -4,
100 ///   3, 4, -1, -2, -1, -2, -3, -4,
101 /// ]);
102 /// let b = m128i::from([
103 ///   i8::MAX, i8::MIN, 7, 8, -15, -26, -37, 48,
104 ///   7, 8, -15, -26, -15, -26, -37, 48,
105 /// ]);
106 /// let c: [i8; 16] = add_saturating_i8_m128i(a, b).into();
107 /// assert_eq!(
108 ///   c,
109 ///   [
110 ///     i8::MAX, i8::MIN, 10, 12, -16, -28, -40, 44,
111 ///     10, 12, -16, -28, -16, -28, -40, 44
112 ///   ]
113 /// );
114 /// ```
115 #[must_use]
116 #[inline(always)]
117 #[rustfmt::skip]
118 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_saturating_i8_m128i(a: m128i, b: m128i) -> m128i119 pub fn add_saturating_i8_m128i(a: m128i, b: m128i) -> m128i {
120   m128i(unsafe { _mm_adds_epi8(a.0, b.0) })
121 }
122 
123 /// Lanewise saturating `a + b` with lanes as `i16`.
124 /// ```
125 /// # use safe_arch::*;
126 /// let a = m128i::from([i16::MAX, i16::MIN, 3, 4, -1, -2, -3, -4]);
127 /// let b = m128i::from([i16::MAX, i16::MIN, 7, 8, -15, -26, -37, 48]);
128 /// let c: [i16; 8] = add_saturating_i16_m128i(a, b).into();
129 /// assert_eq!(c, [i16::MAX, i16::MIN, 10, 12, -16, -28, -40, 44]);
130 /// ```
131 #[must_use]
132 #[inline(always)]
133 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_saturating_i16_m128i(a: m128i, b: m128i) -> m128i134 pub fn add_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
135   m128i(unsafe { _mm_adds_epi16(a.0, b.0) })
136 }
137 
138 /// Lanewise saturating `a + b` with lanes as `u8`.
139 /// ```
140 /// # use safe_arch::*;
141 /// let a = m128i::from([
142 ///   u8::MAX, 0, 3, 4, 254, 2, 3, 4,
143 ///   3, 4, 1, 2, 1, 2, 128, 4,
144 /// ]);
145 /// let b = m128i::from([
146 ///   u8::MAX, 0, 7, 8, 15, 26, 37, 48,
147 ///   7, 8, 15, 26, 15, 26, 37, 48,
148 /// ]);
149 /// let c: [u8; 16] = add_saturating_u8_m128i(a, b).into();
150 /// assert_eq!(
151 ///   c,
152 ///   [
153 ///     u8::MAX, 0, 10, 12, 255, 28, 40, 52,
154 ///     10, 12, 16, 28, 16, 28, 165, 52
155 ///   ]
156 /// );
157 /// ```
158 #[must_use]
159 #[inline(always)]
160 #[rustfmt::skip]
161 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_saturating_u8_m128i(a: m128i, b: m128i) -> m128i162 pub fn add_saturating_u8_m128i(a: m128i, b: m128i) -> m128i {
163   m128i(unsafe { _mm_adds_epu8(a.0, b.0) })
164 }
165 
166 /// Lanewise saturating `a + b` with lanes as `u16`.
167 /// ```
168 /// # use safe_arch::*;
169 /// let a = m128i::from([u16::MAX, 0, 3, 4, 1, 2, 3, 4]);
170 /// let b = m128i::from([u16::MAX, 0, 7, 8, 15, 26, 37, 48]);
171 /// let c: [u16; 8] = add_saturating_u16_m128i(a, b).into();
172 /// assert_eq!(c, [u16::MAX, 0, 10, 12, 16, 28, 40, 52]);
173 /// ```
174 #[must_use]
175 #[inline(always)]
176 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
add_saturating_u16_m128i(a: m128i, b: m128i) -> m128i177 pub fn add_saturating_u16_m128i(a: m128i, b: m128i) -> m128i {
178   m128i(unsafe { _mm_adds_epu16(a.0, b.0) })
179 }
180 
181 /// Bitwise `a & b`.
182 /// ```
183 /// # use safe_arch::*;
184 /// let a = m128d::from_array([1.0, 0.0]);
185 /// let b = m128d::from_array([1.0, 1.0]);
186 /// let c = bitand_m128d(a, b).to_array();
187 /// assert_eq!(c, [1.0, 0.0]);
188 /// ```
189 #[must_use]
190 #[inline(always)]
191 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitand_m128d(a: m128d, b: m128d) -> m128d192 pub fn bitand_m128d(a: m128d, b: m128d) -> m128d {
193   m128d(unsafe { _mm_and_pd(a.0, b.0) })
194 }
195 
196 /// Bitwise `a & b`.
197 /// ```
198 /// # use safe_arch::*;
199 /// let a = m128i::from([1, 0, 1, 0]);
200 /// let b = m128i::from([1, 1, 0, 0]);
201 /// let c: [i32; 4] = bitand_m128i(a, b).into();
202 /// assert_eq!(c, [1, 0, 0, 0]);
203 /// ```
204 #[must_use]
205 #[inline(always)]
206 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitand_m128i(a: m128i, b: m128i) -> m128i207 pub fn bitand_m128i(a: m128i, b: m128i) -> m128i {
208   m128i(unsafe { _mm_and_si128(a.0, b.0) })
209 }
210 
211 /// Bitwise `(!a) & b`.
212 /// ```
213 /// # use safe_arch::*;
214 /// let a = m128d::from_array([1.0, 0.0]);
215 /// let b = m128d::from_array([1.0, 1.0]);
216 /// let c = bitandnot_m128d(a, b).to_array();
217 /// assert_eq!(c, [0.0, 1.0]);
218 /// ```
219 #[must_use]
220 #[inline(always)]
221 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitandnot_m128d(a: m128d, b: m128d) -> m128d222 pub fn bitandnot_m128d(a: m128d, b: m128d) -> m128d {
223   m128d(unsafe { _mm_andnot_pd(a.0, b.0) })
224 }
225 
226 /// Bitwise `(!a) & b`.
227 /// ```
228 /// # use safe_arch::*;
229 /// let a = m128i::from([1, 0, 1, 0]);
230 /// let b = m128i::from([1, 1, 0, 0]);
231 /// let c: [i32; 4] = bitandnot_m128i(a, b).into();
232 /// assert_eq!(c, [0, 1, 0, 0]);
233 /// ```
234 #[must_use]
235 #[inline(always)]
236 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitandnot_m128i(a: m128i, b: m128i) -> m128i237 pub fn bitandnot_m128i(a: m128i, b: m128i) -> m128i {
238   m128i(unsafe { _mm_andnot_si128(a.0, b.0) })
239 }
240 
241 /// Lanewise average of the `u8` values.
242 /// ```
243 /// # use safe_arch::*;
244 /// let a = m128i::from([
245 ///   u8::MAX, 0, 3, 4, 254, 2, 3, 4,
246 ///   3, 4, 1, 2, 1, 2, 128, 4,
247 /// ]);
248 /// let b = m128i::from([
249 ///   u8::MAX, 0, 7, 8, 15, 26, 37, 48,
250 ///   7, 8, 15, 26, 15, 26, 37, 48,
251 /// ]);
252 /// let c: [u8; 16] = average_u8_m128i(a, b).into();
253 /// assert_eq!(
254 ///   c,
255 ///   [
256 ///     u8::MAX, 0, 5, 6, 135, 14, 20, 26,
257 ///     5, 6, 8, 14, 8, 14, 83, 26
258 ///   ]
259 /// );
260 /// ```
261 #[must_use]
262 #[inline(always)]
263 #[rustfmt::skip]
264 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
average_u8_m128i(a: m128i, b: m128i) -> m128i265 pub fn average_u8_m128i(a: m128i, b: m128i) -> m128i {
266   m128i(unsafe { _mm_avg_epu8(a.0, b.0) })
267 }
268 
269 /// Lanewise average of the `u16` values.
270 /// ```
271 /// # use safe_arch::*;
272 /// let a = m128i::from([u16::MAX, 0, 3, 4, 1, 2, 3, 4]);
273 /// let b = m128i::from([u16::MAX, 0, 7, 8, 15, 26, 37, 48]);
274 /// let c: [u16; 8] = average_u16_m128i(a, b).into();
275 /// assert_eq!(c, [u16::MAX, 0, 5, 6, 8, 14, 20, 26]);
276 /// ```
277 #[must_use]
278 #[inline(always)]
279 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
average_u16_m128i(a: m128i, b: m128i) -> m128i280 pub fn average_u16_m128i(a: m128i, b: m128i) -> m128i {
281   m128i(unsafe { _mm_avg_epu16(a.0, b.0) })
282 }
283 
284 /// Shifts all bits in the entire register left by a number of **bytes**.
285 ///
286 /// ```
287 /// # use safe_arch::*;
288 /// let a = m128i::from(0x0000000B_0000000A_0000000F_11111111_u128);
289 /// //
290 /// let b: u128 = byte_shl_imm_u128_m128i::<1>(a).into();
291 /// assert_eq!(b, 0x00000B00_00000A00_00000F11_11111100);
292 /// ```
293 #[must_use]
294 #[inline(always)]
295 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
byte_shl_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i296 pub fn byte_shl_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i {
297   m128i(unsafe { _mm_bslli_si128(a.0, IMM) })
298 }
299 
300 /// Shifts all bits in the entire register right by a number of **bytes**.
301 ///
302 /// ```
303 /// # use safe_arch::*;
304 /// let a = m128i::from(0x0000000B_0000000A_0000000F_11111111_u128);
305 /// //
306 /// let c: u128 = byte_shr_imm_u128_m128i::<1>(a).into();
307 /// assert_eq!(c, 0x00000000_0B000000_0A000000_0F111111);
308 /// ```
309 #[must_use]
310 #[inline(always)]
311 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
byte_shr_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i312 pub fn byte_shr_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i {
313   m128i(unsafe { _mm_bsrli_si128(a.0, IMM) })
314 }
315 
316 /// Bit-preserving cast to `m128` from `m128d`
317 /// ```
318 /// # use safe_arch::*;
319 /// let a = m128d::from_array([1.0, 2.0]);
320 /// let c: [u32; 4] = cast_to_m128_from_m128d(a).to_bits();
321 /// assert_eq!(c, [0, 0x3FF00000, 0, 0x40000000]);
322 /// ```
323 #[must_use]
324 #[inline(always)]
325 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128_from_m128d(a: m128d) -> m128326 pub fn cast_to_m128_from_m128d(a: m128d) -> m128 {
327   m128(unsafe { _mm_castpd_ps(a.0) })
328 }
329 
330 /// Bit-preserving cast to `m128i` from `m128d`
331 /// ```
332 /// # use safe_arch::*;
333 /// let a = m128d::from_array([1.0, 2.0]);
334 /// let c: [u32; 4] = cast_to_m128i_from_m128d(a).into();
335 /// assert_eq!(c, [0, 0x3FF00000, 0, 0x40000000]);
336 /// ```
337 #[must_use]
338 #[inline(always)]
339 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128i_from_m128d(a: m128d) -> m128i340 pub fn cast_to_m128i_from_m128d(a: m128d) -> m128i {
341   m128i(unsafe { _mm_castpd_si128(a.0) })
342 }
343 
344 /// Bit-preserving cast to `m128d` from `m128`
345 /// ```
346 /// # use safe_arch::*;
347 /// let a = m128::from_array([1.0, 2.0, 3.0, 4.0]);
348 /// let c: [u64; 2] = cast_to_m128d_from_m128(a).to_bits();
349 /// assert_eq!(c, [0x400000003F800000, 0x4080000040400000]);
350 /// ```
351 #[must_use]
352 #[inline(always)]
353 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128d_from_m128(a: m128) -> m128d354 pub fn cast_to_m128d_from_m128(a: m128) -> m128d {
355   m128d(unsafe { _mm_castps_pd(a.0) })
356 }
357 
358 /// Bit-preserving cast to `m128i` from `m128`
359 /// ```
360 /// # use safe_arch::*;
361 /// let a = m128::from_array([1.0, 2.0, 3.0, 4.0]);
362 /// let c: [u32; 4] = cast_to_m128i_from_m128(a).into();
363 /// assert_eq!(c, [0x3F800000, 0x40000000, 0x40400000, 0x40800000]);
364 /// ```
365 #[must_use]
366 #[inline(always)]
367 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128i_from_m128(a: m128) -> m128i368 pub fn cast_to_m128i_from_m128(a: m128) -> m128i {
369   m128i(unsafe { _mm_castps_si128(a.0) })
370 }
371 
372 /// Bit-preserving cast to `m128d` from `m128i`
373 /// ```
374 /// # use safe_arch::*;
375 /// let a = m128i::from([1, 2, 3, 4]);
376 /// let c: [u64; 2] = cast_to_m128d_from_m128i(a).to_bits();
377 /// assert_eq!(c, [0x200000001, 0x400000003]);
378 /// ```
379 #[must_use]
380 #[inline(always)]
381 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128d_from_m128i(a: m128i) -> m128d382 pub fn cast_to_m128d_from_m128i(a: m128i) -> m128d {
383   m128d(unsafe { _mm_castsi128_pd(a.0) })
384 }
385 
386 /// Bit-preserving cast to `m128` from `m128i`
387 /// ```
388 /// # use safe_arch::*;
389 /// let a = m128i::from([1, 2, 3, 4]);
390 /// let c: [u32; 4] = cast_to_m128_from_m128i(a).to_bits();
391 /// assert_eq!(c, [1, 2, 3, 4]);
392 /// ```
393 #[must_use]
394 #[inline(always)]
395 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cast_to_m128_from_m128i(a: m128i) -> m128396 pub fn cast_to_m128_from_m128i(a: m128i) -> m128 {
397   m128(unsafe { _mm_castsi128_ps(a.0) })
398 }
399 
400 /// Lanewise `a == b` with lanes as `i8`.
401 ///
402 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
403 /// ```
404 /// # use safe_arch::*;
405 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 127]);
406 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
407 /// let c: [i8; 16] = cmp_eq_mask_i8_m128i(a, b).into();
408 /// assert_eq!(c, [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, 0, 0, 0, 0, -1]);
409 /// ```
410 #[must_use]
411 #[inline(always)]
412 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_mask_i8_m128i(a: m128i, b: m128i) -> m128i413 pub fn cmp_eq_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
414   m128i(unsafe { _mm_cmpeq_epi8(a.0, b.0) })
415 }
416 
417 /// Lanewise `a == b` with lanes as `i16`.
418 ///
419 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
420 /// ```
421 /// # use safe_arch::*;
422 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
423 /// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
424 /// let c: [i16; 8] = cmp_eq_mask_i16_m128i(a, b).into();
425 /// assert_eq!(c, [0, -1, 0, -1, 0, 0, 0, -1]);
426 /// ```
427 #[must_use]
428 #[inline(always)]
429 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_mask_i16_m128i(a: m128i, b: m128i) -> m128i430 pub fn cmp_eq_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
431   m128i(unsafe { _mm_cmpeq_epi16(a.0, b.0) })
432 }
433 
434 /// Lanewise `a == b` with lanes as `i32`.
435 ///
436 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
437 /// ```
438 /// # use safe_arch::*;
439 /// let a = m128i::from([1, 2, 3, 4]);
440 /// let b = m128i::from([5, 2, 7, 4]);
441 /// let c: [i32; 4] = cmp_eq_mask_i32_m128i(a, b).into();
442 /// assert_eq!(c, [0, -1, 0, -1]);
443 /// ```
444 #[must_use]
445 #[inline(always)]
446 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_mask_i32_m128i(a: m128i, b: m128i) -> m128i447 pub fn cmp_eq_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
448   m128i(unsafe { _mm_cmpeq_epi32(a.0, b.0) })
449 }
450 
451 /// Lanewise `a == b`, mask output.
452 ///
453 /// Mask output.
454 /// ```
455 /// # use safe_arch::*;
456 /// let a = m128d::from_array([1.0, 0.0]);
457 /// let b = m128d::from_array([1.0, 1.0]);
458 /// let c = cmp_eq_mask_m128d(a, b).to_bits();
459 /// assert_eq!(c, [u64::MAX, 0]);
460 /// ```
461 #[must_use]
462 #[inline(always)]
463 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_mask_m128d(a: m128d, b: m128d) -> m128d464 pub fn cmp_eq_mask_m128d(a: m128d, b: m128d) -> m128d {
465   m128d(unsafe { _mm_cmpeq_pd(a.0, b.0) })
466 }
467 
468 /// Low lane `a == b`, other lanes unchanged.
469 ///
470 /// Mask output.
471 /// ```
472 /// # use safe_arch::*;
473 /// let a = m128d::from_array([1.0, 5.0]);
474 /// let b = m128d::from_array([1.0, 1.0]);
475 /// let c = cmp_eq_mask_m128d_s(a, b).to_bits();
476 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
477 /// ```
478 #[must_use]
479 #[inline(always)]
480 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_mask_m128d_s(a: m128d, b: m128d) -> m128d481 pub fn cmp_eq_mask_m128d_s(a: m128d, b: m128d) -> m128d {
482   m128d(unsafe { _mm_cmpeq_sd(a.0, b.0) })
483 }
484 
485 /// Lanewise `a >= b`.
486 ///
487 /// Mask output.
488 /// ```
489 /// # use safe_arch::*;
490 /// let a = m128d::from_array([3.0, 1.0]);
491 /// let b = m128d::from_array([1.0, 1.0]);
492 /// let c = cmp_ge_mask_m128d(a, b).to_bits();
493 /// assert_eq!(c, [u64::MAX, u64::MAX]);
494 /// ```
495 #[must_use]
496 #[inline(always)]
497 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ge_mask_m128d(a: m128d, b: m128d) -> m128d498 pub fn cmp_ge_mask_m128d(a: m128d, b: m128d) -> m128d {
499   m128d(unsafe { _mm_cmpge_pd(a.0, b.0) })
500 }
501 
502 /// Low lane `a >= b`, other lanes unchanged.
503 ///
504 /// Mask output.
505 /// ```
506 /// # use safe_arch::*;
507 /// let a = m128d::from_array([1.0, 5.0]);
508 /// let b = m128d::from_array([1.0, 1.0]);
509 /// let c = cmp_ge_mask_m128d_s(a, b).to_bits();
510 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
511 /// ```
512 #[must_use]
513 #[inline(always)]
514 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ge_mask_m128d_s(a: m128d, b: m128d) -> m128d515 pub fn cmp_ge_mask_m128d_s(a: m128d, b: m128d) -> m128d {
516   m128d(unsafe { _mm_cmpge_sd(a.0, b.0) })
517 }
518 
519 /// Lanewise `a > b` with lanes as `i8`.
520 ///
521 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
522 /// ```
523 /// # use safe_arch::*;
524 /// let a = m128i::from([1_i8, 1, 20, 3, 40, 5, 60, 7, 80, 9, 10, 11, 12, 13, 14, 127]);
525 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 120]);
526 /// let c: [i8; 16] = cmp_gt_mask_i8_m128i(a, b).into();
527 /// assert_eq!(c, [-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, 0, -1, 0, -1]);
528 /// ```
529 #[must_use]
530 #[inline(always)]
531 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_mask_i8_m128i(a: m128i, b: m128i) -> m128i532 pub fn cmp_gt_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
533   m128i(unsafe { _mm_cmpgt_epi8(a.0, b.0) })
534 }
535 
536 /// Lanewise `a > b` with lanes as `i16`.
537 ///
538 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
539 /// ```
540 /// # use safe_arch::*;
541 /// let a = m128i::from([1_i16, 20, 3, 40, -1, -2, -3, 0]);
542 /// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
543 /// let c: [i16; 8] = cmp_gt_mask_i16_m128i(a, b).into();
544 /// assert_eq!(c, [0, -1, 0, -1, -1, -1, -1, -1]);
545 /// ```
546 #[must_use]
547 #[inline(always)]
548 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_mask_i16_m128i(a: m128i, b: m128i) -> m128i549 pub fn cmp_gt_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
550   m128i(unsafe { _mm_cmpgt_epi16(a.0, b.0) })
551 }
552 
553 /// Lanewise `a > b` with lanes as `i32`.
554 ///
555 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
556 /// ```
557 /// # use safe_arch::*;
558 /// let a = m128i::from([1, 20, 7, 40]);
559 /// let b = m128i::from([5, 2, 7, 4]);
560 /// let c: [i32; 4] = cmp_gt_mask_i32_m128i(a, b).into();
561 /// assert_eq!(c, [0, -1, 0, -1]);
562 /// ```
563 #[must_use]
564 #[inline(always)]
565 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_mask_i32_m128i(a: m128i, b: m128i) -> m128i566 pub fn cmp_gt_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
567   m128i(unsafe { _mm_cmpgt_epi32(a.0, b.0) })
568 }
569 
570 /// Lanewise `a > b`.
571 ///
572 /// Mask output.
573 /// ```
574 /// # use safe_arch::*;
575 /// let a = m128d::from_array([2.0, 0.0]);
576 /// let b = m128d::from_array([1.0, 1.0]);
577 /// let c = cmp_gt_mask_m128d(a, b).to_bits();
578 /// assert_eq!(c, [u64::MAX, 0]);
579 /// ```
580 #[must_use]
581 #[inline(always)]
582 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_mask_m128d(a: m128d, b: m128d) -> m128d583 pub fn cmp_gt_mask_m128d(a: m128d, b: m128d) -> m128d {
584   m128d(unsafe { _mm_cmpgt_pd(a.0, b.0) })
585 }
586 
587 /// Low lane `a > b`, other lanes unchanged.
588 ///
589 /// Mask output.
590 /// ```
591 /// # use safe_arch::*;
592 /// let a = m128d::from_array([2.0, 5.0]);
593 /// let b = m128d::from_array([1.0, 1.0]);
594 /// let c = cmp_gt_mask_m128d_s(a, b).to_bits();
595 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
596 /// ```
597 #[must_use]
598 #[inline(always)]
599 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_mask_m128d_s(a: m128d, b: m128d) -> m128d600 pub fn cmp_gt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
601   m128d(unsafe { _mm_cmpgt_sd(a.0, b.0) })
602 }
603 
604 /// Lanewise `a <= b`.
605 ///
606 /// Mask output.
607 /// ```
608 /// # use safe_arch::*;
609 /// let a = m128d::from_array([0.0, 1.0]);
610 /// let b = m128d::from_array([1.0, 1.0]);
611 /// let c = cmp_le_mask_m128d(a, b).to_bits();
612 /// assert_eq!(c, [u64::MAX, u64::MAX]);
613 /// ```
614 #[must_use]
615 #[inline(always)]
616 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_le_mask_m128d(a: m128d, b: m128d) -> m128d617 pub fn cmp_le_mask_m128d(a: m128d, b: m128d) -> m128d {
618   m128d(unsafe { _mm_cmple_pd(a.0, b.0) })
619 }
620 
621 /// Low lane `a <= b`, other lanes unchanged.
622 ///
623 /// Mask output.
624 /// ```
625 /// # use safe_arch::*;
626 /// let a = m128d::from_array([0.0, 5.0]);
627 /// let b = m128d::from_array([1.0, 1.0]);
628 /// let c = cmp_le_mask_m128d_s(a, b).to_bits();
629 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
630 /// ```
631 #[must_use]
632 #[inline(always)]
633 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_le_mask_m128d_s(a: m128d, b: m128d) -> m128d634 pub fn cmp_le_mask_m128d_s(a: m128d, b: m128d) -> m128d {
635   m128d(unsafe { _mm_cmple_sd(a.0, b.0) })
636 }
637 
638 /// Lanewise `a < b` with lanes as `i8`.
639 ///
640 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
641 /// ```
642 /// # use safe_arch::*;
643 /// let a = m128i::from([1_i8, 1, 20, 3, 40, 5, 60, 7, 80, 9, 10, 11, 12, 13, 14, 127]);
644 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 120]);
645 /// let c: [i8; 16] = cmp_lt_mask_i8_m128i(a, b).into();
646 /// assert_eq!(c, [0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0]);
647 /// ```
648 #[must_use]
649 #[inline(always)]
650 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_mask_i8_m128i(a: m128i, b: m128i) -> m128i651 pub fn cmp_lt_mask_i8_m128i(a: m128i, b: m128i) -> m128i {
652   m128i(unsafe { _mm_cmplt_epi8(a.0, b.0) })
653 }
654 
655 /// Lanewise `a < b` with lanes as `i16`.
656 ///
657 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
658 /// ```
659 /// # use safe_arch::*;
660 /// let a = m128i::from([1_i16, 20, 3, 40, -1, -2, -3, 0]);
661 /// let b = m128i::from([5_i16, 2, 7, 4, -15, -26, -37, -4]);
662 /// let c: [i16; 8] = cmp_lt_mask_i16_m128i(a, b).into();
663 /// assert_eq!(c, [-1, 0, -1, 0, 0, 0, 0, 0]);
664 /// ```
665 #[must_use]
666 #[inline(always)]
667 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_mask_i16_m128i(a: m128i, b: m128i) -> m128i668 pub fn cmp_lt_mask_i16_m128i(a: m128i, b: m128i) -> m128i {
669   m128i(unsafe { _mm_cmplt_epi16(a.0, b.0) })
670 }
671 
672 /// Lanewise `a < b` with lanes as `i32`.
673 ///
674 /// All bits 1 for true (`-1`), all bit 0 for false (`0`).
675 /// ```
676 /// # use safe_arch::*;
677 /// let a = m128i::from([1, 20, 7, 40]);
678 /// let b = m128i::from([5, 2, 7, 4]);
679 /// let c: [i32; 4] = cmp_lt_mask_i32_m128i(a, b).into();
680 /// assert_eq!(c, [-1, 0, 0, 0]);
681 /// ```
682 #[must_use]
683 #[inline(always)]
684 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_mask_i32_m128i(a: m128i, b: m128i) -> m128i685 pub fn cmp_lt_mask_i32_m128i(a: m128i, b: m128i) -> m128i {
686   m128i(unsafe { _mm_cmplt_epi32(a.0, b.0) })
687 }
688 
689 /// Lanewise `a < b`.
690 ///
691 /// Mask output.
692 /// ```
693 /// # use safe_arch::*;
694 /// let a = m128d::from_array([0.0, 7.0]);
695 /// let b = m128d::from_array([1.0, 1.0]);
696 /// let c = cmp_lt_mask_m128d(a, b).to_bits();
697 /// assert_eq!(c, [u64::MAX, 0]);
698 /// ```
699 #[must_use]
700 #[inline(always)]
701 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_mask_m128d(a: m128d, b: m128d) -> m128d702 pub fn cmp_lt_mask_m128d(a: m128d, b: m128d) -> m128d {
703   m128d(unsafe { _mm_cmplt_pd(a.0, b.0) })
704 }
705 
706 /// Low lane `a < b`, other lane unchanged.
707 ///
708 /// Mask output.
709 /// ```
710 /// # use safe_arch::*;
711 /// let a = m128d::from_array([0.0, 5.0]);
712 /// let b = m128d::from_array([1.0, 1.0]);
713 /// let c = cmp_lt_mask_m128d_s(a, b).to_bits();
714 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
715 /// ```
716 #[must_use]
717 #[inline(always)]
718 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_mask_m128d_s(a: m128d, b: m128d) -> m128d719 pub fn cmp_lt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
720   m128d(unsafe { _mm_cmplt_sd(a.0, b.0) })
721 }
722 
723 /// Lanewise `a != b`.
724 ///
725 /// Mask output.
726 /// ```
727 /// # use safe_arch::*;
728 /// let a = m128d::from_array([3.0, 1.0]);
729 /// let b = m128d::from_array([1.0, 1.0]);
730 /// let c = cmp_neq_mask_m128d(a, b).to_bits();
731 /// assert_eq!(c, [u64::MAX, 0]);
732 /// ```
733 #[must_use]
734 #[inline(always)]
735 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_neq_mask_m128d(a: m128d, b: m128d) -> m128d736 pub fn cmp_neq_mask_m128d(a: m128d, b: m128d) -> m128d {
737   m128d(unsafe { _mm_cmpneq_pd(a.0, b.0) })
738 }
739 
740 /// Low lane `a != b`, other lane unchanged.
741 ///
742 /// Mask output.
743 /// ```
744 /// # use safe_arch::*;
745 /// let a = m128d::from_array([2.0, 5.0]);
746 /// let b = m128d::from_array([1.0, 1.0]);
747 /// let c = cmp_neq_mask_m128d_s(a, b).to_bits();
748 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
749 /// ```
750 #[must_use]
751 #[inline(always)]
752 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_neq_mask_m128d_s(a: m128d, b: m128d) -> m128d753 pub fn cmp_neq_mask_m128d_s(a: m128d, b: m128d) -> m128d {
754   m128d(unsafe { _mm_cmpneq_sd(a.0, b.0) })
755 }
756 
757 /// Lanewise `!(a >= b)`.
758 ///
759 /// Mask output.
760 /// ```
761 /// # use safe_arch::*;
762 /// let a = m128d::from_array([3.0, 0.0]);
763 /// let b = m128d::from_array([1.0, 1.0]);
764 /// let c = cmp_nge_mask_m128d(a, b).to_bits();
765 /// assert_eq!(c, [0, u64::MAX]);
766 /// ```
767 #[must_use]
768 #[inline(always)]
769 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nge_mask_m128d(a: m128d, b: m128d) -> m128d770 pub fn cmp_nge_mask_m128d(a: m128d, b: m128d) -> m128d {
771   m128d(unsafe { _mm_cmpnge_pd(a.0, b.0) })
772 }
773 
774 /// Low lane `!(a >= b)`, other lane unchanged.
775 ///
776 /// Mask output.
777 /// ```
778 /// # use safe_arch::*;
779 /// let a = m128d::from_array([2.0, 5.0]);
780 /// let b = m128d::from_array([1.0, 1.0]);
781 /// let c = cmp_nge_mask_m128d_s(a, b).to_bits();
782 /// assert_eq!(c, [0, 5_f64.to_bits()]);
783 /// ```
784 #[must_use]
785 #[inline(always)]
786 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nge_mask_m128d_s(a: m128d, b: m128d) -> m128d787 pub fn cmp_nge_mask_m128d_s(a: m128d, b: m128d) -> m128d {
788   m128d(unsafe { _mm_cmpnge_sd(a.0, b.0) })
789 }
790 
791 /// Lanewise `!(a > b)`.
792 ///
793 /// Mask output.
794 /// ```
795 /// # use safe_arch::*;
796 /// let a = m128d::from_array([3.0, 0.0]);
797 /// let b = m128d::from_array([1.0, 1.0]);
798 /// let c = cmp_ngt_mask_m128d(a, b).to_bits();
799 /// assert_eq!(c, [0, u64::MAX]);
800 /// ```
801 #[must_use]
802 #[inline(always)]
803 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ngt_mask_m128d(a: m128d, b: m128d) -> m128d804 pub fn cmp_ngt_mask_m128d(a: m128d, b: m128d) -> m128d {
805   m128d(unsafe { _mm_cmpngt_pd(a.0, b.0) })
806 }
807 
808 /// Low lane `!(a > b)`, other lane unchanged.
809 ///
810 /// Mask output.
811 /// ```
812 /// # use safe_arch::*;
813 /// let a = m128d::from_array([2.0, 5.0]);
814 /// let b = m128d::from_array([1.0, 1.0]);
815 /// let c = cmp_ngt_mask_m128d_s(a, b).to_bits();
816 /// assert_eq!(c, [0, 5_f64.to_bits()]);
817 /// ```
818 #[must_use]
819 #[inline(always)]
820 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ngt_mask_m128d_s(a: m128d, b: m128d) -> m128d821 pub fn cmp_ngt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
822   m128d(unsafe { _mm_cmpngt_sd(a.0, b.0) })
823 }
824 
825 /// Lanewise `!(a <= b)`.
826 ///
827 /// Mask output.
828 /// ```
829 /// # use safe_arch::*;
830 /// let a = m128d::from_array([3.0, 0.0]);
831 /// let b = m128d::from_array([1.0, 1.0]);
832 /// let c = cmp_nle_mask_m128d(a, b).to_bits();
833 /// assert_eq!(c, [u64::MAX, 0]);
834 /// ```
835 #[must_use]
836 #[inline(always)]
837 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nle_mask_m128d(a: m128d, b: m128d) -> m128d838 pub fn cmp_nle_mask_m128d(a: m128d, b: m128d) -> m128d {
839   m128d(unsafe { _mm_cmpnle_pd(a.0, b.0) })
840 }
841 
842 /// Low lane `!(a <= b)`, other lane unchanged.
843 ///
844 /// Mask output.
845 /// ```
846 /// # use safe_arch::*;
847 /// let a = m128d::from_array([2.0, 5.0]);
848 /// let b = m128d::from_array([1.0, 1.0]);
849 /// let c = cmp_nle_mask_m128d_s(a, b).to_bits();
850 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
851 /// ```
852 #[must_use]
853 #[inline(always)]
854 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nle_mask_m128d_s(a: m128d, b: m128d) -> m128d855 pub fn cmp_nle_mask_m128d_s(a: m128d, b: m128d) -> m128d {
856   m128d(unsafe { _mm_cmpnle_sd(a.0, b.0) })
857 }
858 
859 /// Lanewise `!(a < b)`.
860 ///
861 /// Mask output.
862 /// ```
863 /// # use safe_arch::*;
864 /// let a = m128d::from_array([3.0, 0.0]);
865 /// let b = m128d::from_array([1.0, 1.0]);
866 /// let c = cmp_nlt_mask_m128d(a, b).to_bits();
867 /// assert_eq!(c, [u64::MAX, 0]);
868 /// ```
869 #[must_use]
870 #[inline(always)]
871 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nlt_mask_m128d(a: m128d, b: m128d) -> m128d872 pub fn cmp_nlt_mask_m128d(a: m128d, b: m128d) -> m128d {
873   m128d(unsafe { _mm_cmpnlt_pd(a.0, b.0) })
874 }
875 
876 /// Low lane `!(a < b)`, other lane unchanged.
877 ///
878 /// Mask output.
879 /// ```
880 /// # use safe_arch::*;
881 /// let a = m128d::from_array([2.0, 5.0]);
882 /// let b = m128d::from_array([1.0, 1.0]);
883 /// let c = cmp_nlt_mask_m128d_s(a, b).to_bits();
884 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
885 /// ```
886 #[must_use]
887 #[inline(always)]
888 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_nlt_mask_m128d_s(a: m128d, b: m128d) -> m128d889 pub fn cmp_nlt_mask_m128d_s(a: m128d, b: m128d) -> m128d {
890   m128d(unsafe { _mm_cmpnlt_sd(a.0, b.0) })
891 }
892 
893 /// Lanewise `(!a.is_nan()) & (!b.is_nan())`.
894 ///
895 /// Mask output.
896 /// ```
897 /// # use safe_arch::*;
898 /// let a = m128d::from_array([3.0, f64::NAN]);
899 /// let b = m128d::from_array([1.0, 1.0]);
900 /// let c = cmp_ordered_mask_m128d(a, b).to_bits();
901 /// assert_eq!(c, [u64::MAX, 0]);
902 /// ```
903 #[must_use]
904 #[inline(always)]
905 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ordered_mask_m128d(a: m128d, b: m128d) -> m128d906 pub fn cmp_ordered_mask_m128d(a: m128d, b: m128d) -> m128d {
907   m128d(unsafe { _mm_cmpord_pd(a.0, b.0) })
908 }
909 
910 /// Low lane `(!a.is_nan()) & (!b.is_nan())`, other lane unchanged.
911 ///
912 /// Mask output.
913 /// ```
914 /// # use safe_arch::*;
915 /// let a = m128d::from_array([2.0, 5.0]);
916 /// let b = m128d::from_array([1.0, 1.0]);
917 /// let c = cmp_ordered_mask_m128d_s(a, b).to_bits();
918 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
919 /// ```
920 #[must_use]
921 #[inline(always)]
922 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ordered_mask_m128d_s(a: m128d, b: m128d) -> m128d923 pub fn cmp_ordered_mask_m128d_s(a: m128d, b: m128d) -> m128d {
924   m128d(unsafe { _mm_cmpord_sd(a.0, b.0) })
925 }
926 
927 /// Lanewise `a.is_nan() | b.is_nan()`.
928 ///
929 /// Mask output.
930 /// ```
931 /// # use safe_arch::*;
932 /// let a = m128d::from_array([f64::NAN, 0.0]);
933 /// let b = m128d::from_array([1.0, 1.0]);
934 /// let c = cmp_unord_mask_m128d(a, b).to_bits();
935 /// assert_eq!(c, [u64::MAX, 0]);
936 /// ```
937 #[must_use]
938 #[inline(always)]
939 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_unord_mask_m128d(a: m128d, b: m128d) -> m128d940 pub fn cmp_unord_mask_m128d(a: m128d, b: m128d) -> m128d {
941   m128d(unsafe { _mm_cmpunord_pd(a.0, b.0) })
942 }
943 
944 /// Low lane `a.is_nan() | b.is_nan()`, other lane unchanged.
945 ///
946 /// Mask output.
947 /// ```
948 /// # use safe_arch::*;
949 /// let a = m128d::from_array([f64::NAN, 5.0]);
950 /// let b = m128d::from_array([1.0, 1.0]);
951 /// let c = cmp_unord_mask_m128d_s(a, b).to_bits();
952 /// assert_eq!(c, [u64::MAX, 5_f64.to_bits()]);
953 /// ```
954 #[must_use]
955 #[inline(always)]
956 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_unord_mask_m128d_s(a: m128d, b: m128d) -> m128d957 pub fn cmp_unord_mask_m128d_s(a: m128d, b: m128d) -> m128d {
958   m128d(unsafe { _mm_cmpunord_sd(a.0, b.0) })
959 }
960 
961 /// Low lane `f64` equal to.
962 ///
963 /// `i32` output.
964 /// ```
965 /// # use safe_arch::*;
966 /// let a = m128d::from_array([1.0, 5.0]);
967 /// let b = m128d::from_array([1.0, 1.0]);
968 /// assert_eq!(1_i32, cmp_eq_i32_m128d_s(a, b));
969 /// ```
970 #[must_use]
971 #[inline(always)]
972 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_eq_i32_m128d_s(a: m128d, b: m128d) -> i32973 pub fn cmp_eq_i32_m128d_s(a: m128d, b: m128d) -> i32 {
974   unsafe { _mm_comieq_sd(a.0, b.0) }
975 }
976 
977 /// Low lane `f64` greater than or equal to.
978 ///
979 /// `i32` output.
980 /// ```
981 /// # use safe_arch::*;
982 /// let a = m128d::from_array([1.0, 5.0]);
983 /// let b = m128d::from_array([1.0, 1.0]);
984 /// assert_eq!(1_i32, cmp_ge_i32_m128d_s(a, b));
985 /// ```
986 #[must_use]
987 #[inline(always)]
988 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_ge_i32_m128d_s(a: m128d, b: m128d) -> i32989 pub fn cmp_ge_i32_m128d_s(a: m128d, b: m128d) -> i32 {
990   unsafe { _mm_comige_sd(a.0, b.0) }
991 }
992 
993 /// Low lane `f64` greater than.
994 ///
995 /// `i32` output.
996 /// ```
997 /// # use safe_arch::*;
998 /// let a = m128d::from_array([1.0, 5.0]);
999 /// let b = m128d::from_array([1.0, 1.0]);
1000 /// assert_eq!(1_i32, cmp_ge_i32_m128d_s(a, b));
1001 /// ```
1002 #[must_use]
1003 #[inline(always)]
1004 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_gt_i32_m128d_s(a: m128d, b: m128d) -> i321005 pub fn cmp_gt_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1006   unsafe { _mm_comigt_sd(a.0, b.0) }
1007 }
1008 
1009 /// Low lane `f64` less than or equal to.
1010 ///
1011 /// `i32` output.
1012 /// ```
1013 /// # use safe_arch::*;
1014 /// let a = m128d::from_array([1.0, 5.0]);
1015 /// let b = m128d::from_array([1.0, 1.0]);
1016 /// assert_eq!(1_i32, cmp_le_i32_m128d_s(a, b));
1017 /// ```
1018 #[must_use]
1019 #[inline(always)]
1020 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_le_i32_m128d_s(a: m128d, b: m128d) -> i321021 pub fn cmp_le_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1022   unsafe { _mm_comile_sd(a.0, b.0) }
1023 }
1024 
1025 /// Low lane `f64` less than.
1026 ///
1027 /// `i32` output.
1028 /// ```
1029 /// # use safe_arch::*;
1030 /// let a = m128d::from_array([0.0, 5.0]);
1031 /// let b = m128d::from_array([1.0, 1.0]);
1032 /// assert_eq!(1_i32, cmp_lt_i32_m128d_s(a, b));
1033 /// ```
1034 #[must_use]
1035 #[inline(always)]
1036 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_lt_i32_m128d_s(a: m128d, b: m128d) -> i321037 pub fn cmp_lt_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1038   unsafe { _mm_comilt_sd(a.0, b.0) }
1039 }
1040 
1041 /// Low lane `f64` less than.
1042 ///
1043 /// `i32` output.
1044 /// ```
1045 /// # use safe_arch::*;
1046 /// let a = m128d::from_array([0.0, 5.0]);
1047 /// let b = m128d::from_array([1.0, 1.0]);
1048 /// assert_eq!(1_i32, cmp_neq_i32_m128d_s(a, b));
1049 /// ```
1050 #[must_use]
1051 #[inline(always)]
1052 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
cmp_neq_i32_m128d_s(a: m128d, b: m128d) -> i321053 pub fn cmp_neq_i32_m128d_s(a: m128d, b: m128d) -> i32 {
1054   unsafe { _mm_comineq_sd(a.0, b.0) }
1055 }
1056 
1057 /// Rounds the lower two `i32` lanes to two `f64` lanes.
1058 /// ```
1059 /// # use safe_arch::*;
1060 /// let a = m128i::from([1, 2, 3, 4]);
1061 /// let b = convert_to_m128d_from_lower2_i32_m128i(a);
1062 /// let c = m128d::from_array([1.0, 2.0]);
1063 /// assert_eq!(b.to_bits(), c.to_bits());
1064 /// ```
1065 /// * **Intrinsic:** [`_mm_cvtepi32_pd`]
1066 /// * **Assembly:** `cvtdq2pd xmm, xmm`
1067 #[must_use]
1068 #[inline(always)]
1069 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_m128d_from_lower2_i32_m128i(a: m128i) -> m128d1070 pub fn convert_to_m128d_from_lower2_i32_m128i(a: m128i) -> m128d {
1071   m128d(unsafe { _mm_cvtepi32_pd(a.0) })
1072 }
1073 
1074 /// Rounds the four `i32` lanes to four `f32` lanes.
1075 /// ```
1076 /// # use safe_arch::*;
1077 /// let a = m128i::from([1, 2, 3, 4]);
1078 /// let b = convert_to_m128_from_i32_m128i(a);
1079 /// let c = m128::from_array([1.0, 2.0, 3.0, 4.0]);
1080 /// assert_eq!(b.to_bits(), c.to_bits());
1081 /// ```
1082 /// * **Intrinsic:** [`_mm_cvtepi32_ps`]
1083 /// * **Assembly:** `cvtdq2ps xmm, xmm`
1084 #[must_use]
1085 #[inline(always)]
1086 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_m128_from_i32_m128i(a: m128i) -> m1281087 pub fn convert_to_m128_from_i32_m128i(a: m128i) -> m128 {
1088   m128(unsafe { _mm_cvtepi32_ps(a.0) })
1089 }
1090 
1091 /// Rounds the two `f64` lanes to the low two `i32` lanes.
1092 /// ```
1093 /// # use safe_arch::*;
1094 /// let a = m128d::from_array([1.0, 2.5]);
1095 /// let b = convert_to_i32_m128i_from_m128d(a);
1096 /// let c: [i32; 4] = b.into();
1097 /// assert_eq!(c, [1, 2, 0, 0]);
1098 /// ```
1099 /// * **Intrinsic:** [`_mm_cvtpd_epi32`]
1100 /// * **Assembly:** `cvtpd2dq xmm, xmm`
1101 #[must_use]
1102 #[inline(always)]
1103 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_i32_m128i_from_m128d(a: m128d) -> m128i1104 pub fn convert_to_i32_m128i_from_m128d(a: m128d) -> m128i {
1105   m128i(unsafe { _mm_cvtpd_epi32(a.0) })
1106 }
1107 
1108 /// Rounds the two `f64` lanes to the low two `f32` lanes.
1109 /// ```
1110 /// # use safe_arch::*;
1111 /// let a = m128d::from_array([1.0, 2.5]);
1112 /// let b = convert_to_m128_from_m128d(a);
1113 /// assert_eq!(b.to_bits(), [1_f32.to_bits(), 2.5_f32.to_bits(), 0, 0]);
1114 /// ```
1115 /// * **Intrinsic:** [`_mm_cvtpd_ps`]
1116 /// * **Assembly:** `cvtpd2ps xmm, xmm`
1117 #[must_use]
1118 #[inline(always)]
1119 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_m128_from_m128d(a: m128d) -> m1281120 pub fn convert_to_m128_from_m128d(a: m128d) -> m128 {
1121   m128(unsafe { _mm_cvtpd_ps(a.0) })
1122 }
1123 
1124 /// Rounds the `f32` lanes to `i32` lanes.
1125 /// ```
1126 /// # use safe_arch::*;
1127 /// let a = m128::from_array([1.0, 2.5, 3.0, 4.0]);
1128 /// let b = convert_to_i32_m128i_from_m128(a);
1129 /// let c: [i32; 4] = b.into();
1130 /// assert_eq!(c, [1, 2, 3, 4]);
1131 /// ```
1132 /// * **Intrinsic:** [`_mm_cvtps_epi32`]
1133 /// * **Assembly:** `cvtps2dq xmm, xmm`
1134 #[must_use]
1135 #[inline(always)]
1136 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_i32_m128i_from_m128(a: m128) -> m128i1137 pub fn convert_to_i32_m128i_from_m128(a: m128) -> m128i {
1138   m128i(unsafe { _mm_cvtps_epi32(a.0) })
1139 }
1140 
1141 /// Rounds the two `f64` lanes to the low two `f32` lanes.
1142 /// ```
1143 /// # use safe_arch::*;
1144 /// let a = m128::from_array([1.0, 2.5, 3.6, 4.7]);
1145 /// let b = convert_to_m128d_from_lower2_m128(a);
1146 /// assert_eq!(b.to_bits(), [1_f64.to_bits(), 2.5_f64.to_bits()]);
1147 /// ```
1148 /// * **Intrinsic:** [`_mm_cvtps_pd`]
1149 /// * **Assembly:** `cvtps2pd xmm, xmm`
1150 #[must_use]
1151 #[inline(always)]
1152 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_to_m128d_from_lower2_m128(a: m128) -> m128d1153 pub fn convert_to_m128d_from_lower2_m128(a: m128) -> m128d {
1154   m128d(unsafe { _mm_cvtps_pd(a.0) })
1155 }
1156 
1157 /// Gets the lower lane as an `f64` value.
1158 /// ```
1159 /// # use safe_arch::*;
1160 /// let a = m128d::from_array([1.0, 2.5]);
1161 /// let b = get_f64_from_m128d_s(a);
1162 /// assert_eq!(b, 1.0_f64);
1163 /// ```
1164 #[must_use]
1165 #[inline(always)]
1166 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
get_f64_from_m128d_s(a: m128d) -> f641167 pub fn get_f64_from_m128d_s(a: m128d) -> f64 {
1168   unsafe { _mm_cvtsd_f64(a.0) }
1169 }
1170 
1171 /// Converts the lower lane to an `i32` value.
1172 /// ```
1173 /// # use safe_arch::*;
1174 /// let a = m128d::from_array([1.0, 2.5]);
1175 /// let b = get_i32_from_m128d_s(a);
1176 /// assert_eq!(b, 1_i32);
1177 /// ```
1178 #[must_use]
1179 #[inline(always)]
1180 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
get_i32_from_m128d_s(a: m128d) -> i321181 pub fn get_i32_from_m128d_s(a: m128d) -> i32 {
1182   unsafe { _mm_cvtsd_si32(a.0) }
1183 }
1184 
1185 /// Converts the lower lane to an `i64` value.
1186 /// ```
1187 /// # use safe_arch::*;
1188 /// let a = m128d::from_array([1.0, 2.5]);
1189 /// let b = get_i64_from_m128d_s(a);
1190 /// assert_eq!(b, 1_i64);
1191 /// ```
1192 #[must_use]
1193 #[inline(always)]
1194 #[cfg(target_arch = "x86_64")]
1195 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
get_i64_from_m128d_s(a: m128d) -> i641196 pub fn get_i64_from_m128d_s(a: m128d) -> i64 {
1197   unsafe { _mm_cvtsd_si64(a.0) }
1198 }
1199 
1200 /// Converts the low `f64` to `f32` and replaces the low lane of the input.
1201 /// ```
1202 /// # use safe_arch::*;
1203 /// let a = m128::from_array([3.0, 4.0, 5.0, 6.0]);
1204 /// let b = m128d::from_array([1.0, 2.5]);
1205 /// let c = convert_m128d_s_replace_m128_s(a, b);
1206 /// assert_eq!(c.to_array(), [1.0, 4.0, 5.0, 6.0]);
1207 /// ```
1208 /// * **Intrinsic:** [`_mm_cvtsd_ss`]
1209 /// * **Assembly:** `cvtsd2ss xmm, xmm`
1210 #[must_use]
1211 #[inline(always)]
1212 #[cfg(target_arch = "x86_64")]
1213 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_m128d_s_replace_m128_s(a: m128, b: m128d) -> m1281214 pub fn convert_m128d_s_replace_m128_s(a: m128, b: m128d) -> m128 {
1215   m128(unsafe { _mm_cvtsd_ss(a.0, b.0) })
1216 }
1217 
1218 /// Converts the lower lane to an `i32` value.
1219 /// ```
1220 /// # use safe_arch::*;
1221 /// let a = m128i::from([1, 3, 5, 7]);
1222 /// let b = get_i32_from_m128i_s(a);
1223 /// assert_eq!(b, 1_i32);
1224 /// ```
1225 #[must_use]
1226 #[inline(always)]
1227 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
get_i32_from_m128i_s(a: m128i) -> i321228 pub fn get_i32_from_m128i_s(a: m128i) -> i32 {
1229   unsafe { _mm_cvtsi128_si32(a.0) }
1230 }
1231 
1232 /// Converts the lower lane to an `i64` value.
1233 /// ```
1234 /// # use safe_arch::*;
1235 /// let a = m128i::from([1_i64, 3]);
1236 /// let b = get_i64_from_m128i_s(a);
1237 /// assert_eq!(b, 1_i64);
1238 /// ```
1239 #[must_use]
1240 #[inline(always)]
1241 #[cfg(target_arch = "x86_64")]
1242 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
get_i64_from_m128i_s(a: m128i) -> i641243 pub fn get_i64_from_m128i_s(a: m128i) -> i64 {
1244   unsafe { _mm_cvtsi128_si64(a.0) }
1245 }
1246 
1247 /// Convert `i32` to `f64` and replace the low lane of the input.
1248 /// ```
1249 /// # use safe_arch::*;
1250 /// let a = m128d::from_array([1.0, 2.0]);
1251 /// let b = convert_i32_replace_m128d_s(a, 5_i32);
1252 /// assert_eq!(b.to_array(), [5.0, 2.0]);
1253 /// ```
1254 /// * **Intrinsic:** [`_mm_cvtsi32_sd`]
1255 /// * **Assembly:** `cvtsi2sd xmm, r32`
1256 #[must_use]
1257 #[inline(always)]
1258 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_i32_replace_m128d_s(a: m128d, i: i32) -> m128d1259 pub fn convert_i32_replace_m128d_s(a: m128d, i: i32) -> m128d {
1260   m128d(unsafe { _mm_cvtsi32_sd(a.0, i) })
1261 }
1262 
1263 /// Set an `i32` as the low 32-bit lane of an `m128i`, other lanes blank.
1264 /// ```
1265 /// # use safe_arch::*;
1266 /// let a: [i32; 4] = set_i32_m128i_s(1_i32).into();
1267 /// let b: [i32; 4] = m128i::from([1, 0, 0, 0]).into();
1268 /// assert_eq!(a, b);
1269 /// ```
1270 #[must_use]
1271 #[inline(always)]
1272 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i32_m128i_s(i: i32) -> m128i1273 pub fn set_i32_m128i_s(i: i32) -> m128i {
1274   m128i(unsafe { _mm_cvtsi32_si128(i) })
1275 }
1276 
1277 /// Convert `i64` to `f64` and replace the low lane of the input.
1278 /// ```
1279 /// # use safe_arch::*;
1280 /// let a = m128d::from_array([1.0, 2.0]);
1281 /// let b = convert_i64_replace_m128d_s(a, 5_i64);
1282 /// assert_eq!(b.to_array(), [5.0, 2.0]);
1283 /// ```
1284 /// * **Intrinsic:** [`_mm_cvtsi64_sd`]
1285 /// * **Assembly:** `cvtsi2sd xmm, r64`
1286 #[must_use]
1287 #[inline(always)]
1288 #[cfg(target_arch = "x86_64")]
1289 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_i64_replace_m128d_s(a: m128d, i: i64) -> m128d1290 pub fn convert_i64_replace_m128d_s(a: m128d, i: i64) -> m128d {
1291   m128d(unsafe { _mm_cvtsi64_sd(a.0, i) })
1292 }
1293 
1294 /// Set an `i64` as the low 64-bit lane of an `m128i`, other lanes blank.
1295 /// ```
1296 /// # use safe_arch::*;
1297 /// let a: [i64; 2] = set_i64_m128i_s(1_i64).into();
1298 /// let b: [i64; 2] = m128i::from([1_i64, 0]).into();
1299 /// assert_eq!(a, b);
1300 /// ```
1301 #[must_use]
1302 #[inline(always)]
1303 #[cfg(target_arch = "x86_64")]
1304 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i64_m128i_s(i: i64) -> m128i1305 pub fn set_i64_m128i_s(i: i64) -> m128i {
1306   m128i(unsafe { _mm_cvtsi64_si128(i) })
1307 }
1308 
1309 /// Converts the lower `f32` to `f64` and replace the low lane of the input
1310 /// ```
1311 /// # use safe_arch::*;
1312 /// let a = m128d::from_array([1.0, 2.5]);
1313 /// let b = m128::from_array([3.0, 4.0, 5.0, 6.0]);
1314 /// let c = convert_m128_s_replace_m128d_s(a, b);
1315 /// assert_eq!(c.to_array(), [3.0, 2.5]);
1316 /// ```
1317 /// * **Intrinsic:** [`_mm_cvtss_sd`]
1318 /// * **Assembly:** `cvtss2sd xmm, xmm`
1319 #[must_use]
1320 #[inline(always)]
1321 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
convert_m128_s_replace_m128d_s(a: m128d, b: m128) -> m128d1322 pub fn convert_m128_s_replace_m128d_s(a: m128d, b: m128) -> m128d {
1323   m128d(unsafe { _mm_cvtss_sd(a.0, b.0) })
1324 }
1325 
1326 /// Truncate the `f64` lanes to the lower `i32` lanes (upper `i32` lanes 0).
1327 /// ```
1328 /// # use safe_arch::*;
1329 /// let a = m128d::from_array([1.1, 2.6]);
1330 /// let b = truncate_m128d_to_m128i(a);
1331 /// assert_eq!(<[i32; 4]>::from(b), [1, 2, 0, 0]);
1332 /// ```
1333 #[must_use]
1334 #[inline(always)]
1335 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
truncate_m128d_to_m128i(a: m128d) -> m128i1336 pub fn truncate_m128d_to_m128i(a: m128d) -> m128i {
1337   m128i(unsafe { _mm_cvttpd_epi32(a.0) })
1338 }
1339 
1340 /// Truncate the `f32` lanes to `i32` lanes.
1341 /// ```
1342 /// # use safe_arch::*;
1343 /// let a = m128::from_array([1.1, 2.6, 3.5, 4.0]);
1344 /// let b = truncate_m128_to_m128i(a);
1345 /// assert_eq!(<[i32; 4]>::from(b), [1, 2, 3, 4]);
1346 /// ```
1347 #[must_use]
1348 #[inline(always)]
1349 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
truncate_m128_to_m128i(a: m128) -> m128i1350 pub fn truncate_m128_to_m128i(a: m128) -> m128i {
1351   m128i(unsafe { _mm_cvttps_epi32(a.0) })
1352 }
1353 
1354 /// Truncate the lower lane into an `i32`.
1355 /// ```
1356 /// # use safe_arch::*;
1357 /// let a = m128d::from_array([1.7, 2.6]);
1358 /// assert_eq!(truncate_to_i32_m128d_s(a), 1_i32);
1359 /// ```
1360 #[must_use]
1361 #[inline(always)]
1362 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
truncate_to_i32_m128d_s(a: m128d) -> i321363 pub fn truncate_to_i32_m128d_s(a: m128d) -> i32 {
1364   unsafe { _mm_cvttsd_si32(a.0) }
1365 }
1366 
1367 /// Truncate the lower lane into an `i64`.
1368 /// ```
1369 /// # use safe_arch::*;
1370 /// let a = m128d::from_array([1.7, 2.6]);
1371 /// assert_eq!(truncate_to_i64_m128d_s(a), 1_i64);
1372 /// ```
1373 #[must_use]
1374 #[inline(always)]
1375 #[cfg(target_arch = "x86_64")]
1376 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
truncate_to_i64_m128d_s(a: m128d) -> i641377 pub fn truncate_to_i64_m128d_s(a: m128d) -> i64 {
1378   unsafe { _mm_cvttsd_si64(a.0) }
1379 }
1380 
1381 /// Lanewise `a / b`.
1382 /// ```
1383 /// # use safe_arch::*;
1384 /// let a = m128d::from_array([92.0, 42.0]);
1385 /// let b = m128d::from_array([100.0, -6.0]);
1386 /// let c = div_m128d(a, b).to_array();
1387 /// assert_eq!(c, [0.92, -7.0]);
1388 /// ```
1389 #[must_use]
1390 #[inline(always)]
1391 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
div_m128d(a: m128d, b: m128d) -> m128d1392 pub fn div_m128d(a: m128d, b: m128d) -> m128d {
1393   m128d(unsafe { _mm_div_pd(a.0, b.0) })
1394 }
1395 
1396 /// Lowest lane `a / b`, high lane unchanged.
1397 /// ```
1398 /// # use safe_arch::*;
1399 /// let a = m128d::from_array([92.0, 87.5]);
1400 /// let b = m128d::from_array([100.0, -600.0]);
1401 /// let c = div_m128d_s(a, b).to_array();
1402 /// assert_eq!(c, [0.92, 87.5]);
1403 /// ```
1404 #[must_use]
1405 #[inline(always)]
1406 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
div_m128d_s(a: m128d, b: m128d) -> m128d1407 pub fn div_m128d_s(a: m128d, b: m128d) -> m128d {
1408   m128d(unsafe { _mm_div_sd(a.0, b.0) })
1409 }
1410 
1411 /// Gets an `i16` value out of an `m128i`, returns as `i32`.
1412 ///
1413 /// The lane to get must be a constant in `0..8`.
1414 ///
1415 /// ```
1416 /// # use safe_arch::*;
1417 /// let a = m128i::from([0xA_i16, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1418 /// //
1419 /// assert_eq!(extract_i16_as_i32_m128i::<0>(a), 0xA);
1420 /// assert_eq!(extract_i16_as_i32_m128i::<1>(a), 0xB);
1421 /// ```
1422 #[must_use]
1423 #[inline(always)]
1424 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
extract_i16_as_i32_m128i<const LANE: i32>(a: m128i) -> i321425 pub fn extract_i16_as_i32_m128i<const LANE: i32>(a: m128i) -> i32 {
1426   unsafe { _mm_extract_epi16(a.0, LANE) }
1427 }
1428 
1429 /// Inserts the low 16 bits of an `i32` value into an `m128i`.
1430 ///
1431 /// The lane to get must be a constant in `0..8`.
1432 ///
1433 /// ```
1434 /// # use safe_arch::*;
1435 /// let a = m128i::from([0xA_i16, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1436 /// //
1437 /// let b = insert_i16_from_i32_m128i::<0>(a, -1);
1438 /// assert_eq!(<[i16; 8]>::from(b), [-1, 0xB, 0xC, 0xD, 0, 0, 0, 0]);
1439 /// ```
1440 #[must_use]
1441 #[inline(always)]
1442 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
insert_i16_from_i32_m128i<const LANE: i32>(a: m128i, i: i32) -> m128i1443 pub fn insert_i16_from_i32_m128i<const LANE: i32>(a: m128i, i: i32) -> m128i {
1444   m128i(unsafe { _mm_insert_epi16(a.0, i, LANE) })
1445 }
1446 
1447 /// Loads the reference into a register.
1448 /// ```
1449 /// # use safe_arch::*;
1450 /// let a = m128d::from_array([10.0, 12.0]);
1451 /// let b = load_m128d(&a);
1452 /// assert_eq!(a.to_bits(), b.to_bits());
1453 /// ```
1454 #[must_use]
1455 #[inline(always)]
1456 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_m128d(a: &m128d) -> m128d1457 pub fn load_m128d(a: &m128d) -> m128d {
1458   m128d(unsafe { _mm_load_pd(a as *const m128d as *const f64) })
1459 }
1460 
1461 /// Loads the `f64` reference into all lanes of a register.
1462 /// ```
1463 /// # use safe_arch::*;
1464 /// let a = 1.0;
1465 /// let b = load_f64_splat_m128d(&a);
1466 /// assert_eq!(m128d::from_array([1.0, 1.0]).to_bits(), b.to_bits());
1467 /// ```
1468 #[must_use]
1469 #[inline(always)]
1470 #[allow(clippy::trivially_copy_pass_by_ref)]
1471 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_f64_splat_m128d(a: &f64) -> m128d1472 pub fn load_f64_splat_m128d(a: &f64) -> m128d {
1473   m128d(unsafe { _mm_load1_pd(a) })
1474 }
1475 
1476 /// Loads the reference into the low lane of the register.
1477 /// ```
1478 /// # use safe_arch::*;
1479 /// let a = 1.0;
1480 /// let b = load_f64_m128d_s(&a);
1481 /// assert_eq!(m128d::from_array([1.0, 0.0]).to_bits(), b.to_bits());
1482 /// ```
1483 #[must_use]
1484 #[inline(always)]
1485 #[allow(clippy::trivially_copy_pass_by_ref)]
1486 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_f64_m128d_s(a: &f64) -> m128d1487 pub fn load_f64_m128d_s(a: &f64) -> m128d {
1488   m128d(unsafe { _mm_load_sd(a) })
1489 }
1490 
1491 /// Loads the reference into a register.
1492 /// ```
1493 /// # use safe_arch::*;
1494 /// let a = m128i::from([1, 2, 3, 4]);
1495 /// let b = load_m128i(&a);
1496 /// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
1497 /// ```
1498 #[must_use]
1499 #[inline(always)]
1500 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_m128i(a: &m128i) -> m128i1501 pub fn load_m128i(a: &m128i) -> m128i {
1502   m128i(unsafe { _mm_load_si128(a as *const m128i as *const __m128i) })
1503 }
1504 
1505 /// Loads the reference into a register, replacing the high lane.
1506 /// ```
1507 /// # use safe_arch::*;
1508 /// let a = m128d::from([1.0, 2.0]);
1509 /// let double = 7.0;
1510 /// let b = load_replace_high_m128d(a, &double);
1511 /// assert_eq!(b.to_array(), [1.0, 7.0]);
1512 /// ```
1513 #[must_use]
1514 #[inline(always)]
1515 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_replace_high_m128d(a: m128d, b: &f64) -> m128d1516 pub fn load_replace_high_m128d(a: m128d, b: &f64) -> m128d {
1517   m128d(unsafe { _mm_loadh_pd(a.0, b) })
1518 }
1519 
1520 /// Loads the low `i64` into a register.
1521 /// ```
1522 /// # use safe_arch::*;
1523 /// let a = m128i::from([1_i64, 2]);
1524 /// let b = load_i64_m128i_s(&a);
1525 /// assert_eq!([1_i64, 0], <[i64; 2]>::from(b));
1526 /// ```
1527 #[must_use]
1528 #[inline(always)]
1529 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_i64_m128i_s(a: &m128i) -> m128i1530 pub fn load_i64_m128i_s(a: &m128i) -> m128i {
1531   m128i(unsafe { _mm_loadl_epi64(a as *const m128i as *const __m128i) })
1532 }
1533 
1534 /// Loads the reference into a register, replacing the low lane.
1535 /// ```
1536 /// # use safe_arch::*;
1537 /// let a = m128d::from([1.0, 2.0]);
1538 /// let double = 7.0;
1539 /// let b = load_replace_low_m128d(a, &double);
1540 /// assert_eq!(b.to_array(), [7.0, 2.0]);
1541 /// ```
1542 #[must_use]
1543 #[inline(always)]
1544 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_replace_low_m128d(a: m128d, b: &f64) -> m128d1545 pub fn load_replace_low_m128d(a: m128d, b: &f64) -> m128d {
1546   m128d(unsafe { _mm_loadl_pd(a.0, b) })
1547 }
1548 
1549 /// Loads the reference into a register with reversed order.
1550 /// ```
1551 /// # use safe_arch::*;
1552 /// let a = m128d::from_array([10.0, 12.0]);
1553 /// let b = load_reverse_m128d(&a);
1554 /// assert_eq!(m128d::from_array([12.0, 10.0]).to_bits(), b.to_bits());
1555 /// ```
1556 #[must_use]
1557 #[inline(always)]
1558 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_reverse_m128d(a: &m128d) -> m128d1559 pub fn load_reverse_m128d(a: &m128d) -> m128d {
1560   m128d(unsafe { _mm_loadr_pd(a as *const m128d as *const f64) })
1561 }
1562 
1563 /// Loads the reference into a register.
1564 ///
1565 /// This generally has no speed penalty if the reference happens to be 16-byte
1566 /// aligned, but there is a slight speed penalty if the reference is only 8-byte
1567 /// aligned.
1568 /// ```
1569 /// # use safe_arch::*;
1570 /// let a = [10.0, 12.0];
1571 /// let b = load_unaligned_m128d(&a);
1572 /// assert_eq!(m128d::from_array(a).to_bits(), b.to_bits());
1573 /// ```
1574 #[must_use]
1575 #[inline(always)]
1576 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_unaligned_m128d(a: &[f64; 2]) -> m128d1577 pub fn load_unaligned_m128d(a: &[f64; 2]) -> m128d {
1578   m128d(unsafe { _mm_loadu_pd(a as *const [f64; 2] as *const f64) })
1579 }
1580 
1581 /// Loads the reference into a register.
1582 ///
1583 /// This generally has no speed penalty if the reference happens to be 16-byte
1584 /// aligned, but there is a slight speed penalty if the reference is less
1585 /// aligned.
1586 /// ```
1587 /// # use safe_arch::*;
1588 /// let a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
1589 /// let b = load_unaligned_m128i(&a);
1590 /// assert_eq!(a, <[u8; 16]>::from(b));
1591 /// ```
1592 #[must_use]
1593 #[inline(always)]
1594 #[allow(clippy::cast_ptr_alignment)]
1595 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
load_unaligned_m128i(a: &[u8; 16]) -> m128i1596 pub fn load_unaligned_m128i(a: &[u8; 16]) -> m128i {
1597   m128i(unsafe { _mm_loadu_si128(a as *const [u8; 16] as *const __m128i) })
1598 }
1599 
1600 /// Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
1601 /// values to produce the final output.
1602 /// ```
1603 /// # use safe_arch::*;
1604 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1605 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1606 /// let c: [i32; 4] = mul_i16_horizontal_add_m128i(a, b).into();
1607 /// assert_eq!(c, [17, 53, 67, -81]);
1608 /// ```
1609 #[must_use]
1610 #[inline(always)]
1611 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_i16_horizontal_add_m128i(a: m128i, b: m128i) -> m128i1612 pub fn mul_i16_horizontal_add_m128i(a: m128i, b: m128i) -> m128i {
1613   m128i(unsafe { _mm_madd_epi16(a.0, b.0) })
1614 }
1615 
1616 /// Lanewise `max(a, b)` with lanes as `u8`.
1617 /// ```
1618 /// # use safe_arch::*;
1619 /// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
1620 /// let b = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1621 /// let c: [u8; 16] = max_u8_m128i(a, b).into();
1622 /// assert_eq!(c, [0, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1623 /// ```
1624 #[must_use]
1625 #[inline(always)]
1626 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
max_u8_m128i(a: m128i, b: m128i) -> m128i1627 pub fn max_u8_m128i(a: m128i, b: m128i) -> m128i {
1628   m128i(unsafe { _mm_max_epu8(a.0, b.0) })
1629 }
1630 
1631 /// Lanewise `max(a, b)` with lanes as `i16`.
1632 /// ```
1633 /// # use safe_arch::*;
1634 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1635 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1636 /// let c: [i16; 8] = max_i16_m128i(a, b).into();
1637 /// assert_eq!(c, [5_i16, 6, 7, 8, -1, -2, -3, 48]);
1638 /// ```
1639 #[must_use]
1640 #[inline(always)]
1641 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
max_i16_m128i(a: m128i, b: m128i) -> m128i1642 pub fn max_i16_m128i(a: m128i, b: m128i) -> m128i {
1643   m128i(unsafe { _mm_max_epi16(a.0, b.0) })
1644 }
1645 
1646 /// Lanewise `max(a, b)`.
1647 /// ```
1648 /// # use safe_arch::*;
1649 /// let a = m128d::from_array([5.0, 2.0]);
1650 /// let b = m128d::from_array([1.0, 6.0]);
1651 /// let c = max_m128d(a, b).to_array();
1652 /// assert_eq!(c, [5.0, 6.0]);
1653 /// ```
1654 #[must_use]
1655 #[inline(always)]
1656 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
max_m128d(a: m128d, b: m128d) -> m128d1657 pub fn max_m128d(a: m128d, b: m128d) -> m128d {
1658   m128d(unsafe { _mm_max_pd(a.0, b.0) })
1659 }
1660 
1661 /// Low lane `max(a, b)`, other lanes unchanged.
1662 /// ```
1663 /// # use safe_arch::*;
1664 /// let a = m128d::from_array([1.0, 12.0]);
1665 /// let b = m128d::from_array([5.0, 6.0]);
1666 /// let c = max_m128d_s(a, b).to_array();
1667 /// assert_eq!(c, [5.0, 12.0]);
1668 /// ```
1669 #[must_use]
1670 #[inline(always)]
1671 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
max_m128d_s(a: m128d, b: m128d) -> m128d1672 pub fn max_m128d_s(a: m128d, b: m128d) -> m128d {
1673   m128d(unsafe { _mm_max_sd(a.0, b.0) })
1674 }
1675 
1676 /// Lanewise `min(a, b)` with lanes as `u8`.
1677 /// ```
1678 /// # use safe_arch::*;
1679 /// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
1680 /// let b = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 0, 20, 0, 22, 0, 24, 0]);
1681 /// let c: [u8; 16] = min_u8_m128i(a, b).into();
1682 /// assert_eq!(c, [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 0, 10, 0, 12, 0, 14, 0]);
1683 /// ```
1684 #[must_use]
1685 #[inline(always)]
1686 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
min_u8_m128i(a: m128i, b: m128i) -> m128i1687 pub fn min_u8_m128i(a: m128i, b: m128i) -> m128i {
1688   m128i(unsafe { _mm_min_epu8(a.0, b.0) })
1689 }
1690 
1691 /// Lanewise `min(a, b)` with lanes as `i16`.
1692 /// ```
1693 /// # use safe_arch::*;
1694 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
1695 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
1696 /// let c: [i16; 8] = min_i16_m128i(a, b).into();
1697 /// assert_eq!(c, [1_i16, 2, 3, 4, -15, -26, -37, -4]);
1698 /// ```
1699 #[must_use]
1700 #[inline(always)]
1701 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
min_i16_m128i(a: m128i, b: m128i) -> m128i1702 pub fn min_i16_m128i(a: m128i, b: m128i) -> m128i {
1703   m128i(unsafe { _mm_min_epi16(a.0, b.0) })
1704 }
1705 
1706 /// Lanewise `min(a, b)`.
1707 /// ```
1708 /// # use safe_arch::*;
1709 /// let a = m128d::from_array([1.0, 12.0]);
1710 /// let b = m128d::from_array([5.0, 6.0]);
1711 /// let c = min_m128d(a, b).to_array();
1712 /// assert_eq!(c, [1.0, 6.0]);
1713 /// ```
1714 #[must_use]
1715 #[inline(always)]
1716 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
min_m128d(a: m128d, b: m128d) -> m128d1717 pub fn min_m128d(a: m128d, b: m128d) -> m128d {
1718   m128d(unsafe { _mm_min_pd(a.0, b.0) })
1719 }
1720 
1721 /// Low lane `min(a, b)`, other lanes unchanged.
1722 /// ```
1723 /// # use safe_arch::*;
1724 /// let a = m128d::from_array([1.0, 12.0]);
1725 /// let b = m128d::from_array([0.0, 6.0]);
1726 /// let c = min_m128d_s(a, b).to_array();
1727 /// assert_eq!(c, [0.0, 12.0]);
1728 /// ```
1729 #[must_use]
1730 #[inline(always)]
1731 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
min_m128d_s(a: m128d, b: m128d) -> m128d1732 pub fn min_m128d_s(a: m128d, b: m128d) -> m128d {
1733   m128d(unsafe { _mm_min_sd(a.0, b.0) })
1734 }
1735 
1736 /// Copy the low `i64` lane to a new register, upper bits 0.
1737 /// ```
1738 /// # use safe_arch::*;
1739 /// let a = m128i::from([1_i64, 2]);
1740 /// let b = copy_i64_m128i_s(a);
1741 /// assert_eq!(<[i64; 2]>::from(b), [1, 0]);
1742 /// ```
1743 #[must_use]
1744 #[inline(always)]
1745 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
copy_i64_m128i_s(a: m128i) -> m128i1746 pub fn copy_i64_m128i_s(a: m128i) -> m128i {
1747   m128i(unsafe { _mm_move_epi64(a.0) })
1748 }
1749 
1750 /// Copies the `a` value and replaces the low lane with the low `b` value.
1751 /// ```
1752 /// # use safe_arch::*;
1753 /// let a = m128d::from([1.0, 2.0]);
1754 /// let b = m128d::from([3.0, 4.0]);
1755 /// let c = copy_replace_low_f64_m128d(a, b);
1756 /// assert_eq!(c.to_array(), [3.0, 2.0]);
1757 /// ```
1758 #[must_use]
1759 #[inline(always)]
1760 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
copy_replace_low_f64_m128d(a: m128d, b: m128d) -> m128d1761 pub fn copy_replace_low_f64_m128d(a: m128d, b: m128d) -> m128d {
1762   m128d(unsafe { _mm_move_sd(a.0, b.0) })
1763 }
1764 
1765 /// Gathers the `i8` sign bit of each lane.
1766 ///
1767 /// The output has lane 0 as bit 0, lane 1 as bit 1, and so on.
1768 /// ```
1769 /// # use safe_arch::*;
1770 /// let a = m128i::from([0_i8, -11, -2, 13, 4, 15, -6, 17, 8, 19, -20, 21, 22, 23, -24, 127]);
1771 /// let i = move_mask_i8_m128i(a);
1772 /// assert_eq!(i, 0b0100010001000110);
1773 /// ```
1774 #[must_use]
1775 #[inline(always)]
1776 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
move_mask_i8_m128i(a: m128i) -> i321777 pub fn move_mask_i8_m128i(a: m128i) -> i32 {
1778   unsafe { _mm_movemask_epi8(a.0) }
1779 }
1780 
1781 /// Gathers the sign bit of each lane.
1782 ///
1783 /// The output has lane 0 as bit 0, lane 1 as bit 1.
1784 /// ```
1785 /// # use safe_arch::*;
1786 /// let a = m128d::from_array([-1.0, 12.0]);
1787 /// let i = move_mask_m128d(a);
1788 /// assert_eq!(i, 0b01);
1789 /// ```
1790 #[must_use]
1791 #[inline(always)]
1792 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
move_mask_m128d(a: m128d) -> i321793 pub fn move_mask_m128d(a: m128d) -> i32 {
1794   unsafe { _mm_movemask_pd(a.0) }
1795 }
1796 
1797 /// Multiplies the odd `u32` lanes and gives the widened (`u64`) results.
1798 ///
1799 /// ```
1800 /// # use safe_arch::*;
1801 /// let a = m128i::from([1, 7, u32::MAX, 7]);
1802 /// let b = m128i::from([5, 7, u32::MAX, 7]);
1803 /// let c: [u64; 2] = mul_widen_u32_odd_m128i(a, b).into();
1804 /// assert_eq!(c, [(1 * 5), (u32::MAX as u64 * u32::MAX as u64)]);
1805 /// ```
1806 #[must_use]
1807 #[inline(always)]
1808 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_widen_u32_odd_m128i(a: m128i, b: m128i) -> m128i1809 pub fn mul_widen_u32_odd_m128i(a: m128i, b: m128i) -> m128i {
1810   m128i(unsafe { _mm_mul_epu32(a.0, b.0) })
1811 }
1812 
1813 /// Lanewise `a * b`.
1814 /// ```
1815 /// # use safe_arch::*;
1816 /// let a = m128d::from_array([92.0, 87.5]);
1817 /// let b = m128d::from_array([100.0, -6.0]);
1818 /// let c = mul_m128d(a, b).to_array();
1819 /// assert_eq!(c, [9200.0, -525.0]);
1820 /// ```
1821 #[must_use]
1822 #[inline(always)]
1823 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_m128d(a: m128d, b: m128d) -> m128d1824 pub fn mul_m128d(a: m128d, b: m128d) -> m128d {
1825   m128d(unsafe { _mm_mul_pd(a.0, b.0) })
1826 }
1827 
1828 /// Lowest lane `a * b`, high lane unchanged.
1829 /// ```
1830 /// # use safe_arch::*;
1831 /// let a = m128d::from_array([92.0, 87.5]);
1832 /// let b = m128d::from_array([100.0, -600.0]);
1833 /// let c = mul_m128d_s(a, b).to_array();
1834 /// assert_eq!(c, [9200.0, 87.5]);
1835 /// ```
1836 #[must_use]
1837 #[inline(always)]
1838 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_m128d_s(a: m128d, b: m128d) -> m128d1839 pub fn mul_m128d_s(a: m128d, b: m128d) -> m128d {
1840   m128d(unsafe { _mm_mul_sd(a.0, b.0) })
1841 }
1842 
1843 /// Lanewise `a * b` with lanes as `i16`, keep the high bits of the `i32`
1844 /// intermediates.
1845 /// ```
1846 /// # use safe_arch::*;
1847 /// let a = m128i::from([1_i16, 200, 300, 4568, -1, -2, -3, -4]);
1848 /// let b = m128i::from([5_i16, 600, 700, 8910, -15, -26, -37, 48]);
1849 /// let c: [i16; 8] = mul_i16_keep_high_m128i(a, b).into();
1850 /// assert_eq!(c, [0, 1, 3, 621, 0, 0, 0, -1]);
1851 /// ```
1852 #[must_use]
1853 #[inline(always)]
1854 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_i16_keep_high_m128i(a: m128i, b: m128i) -> m128i1855 pub fn mul_i16_keep_high_m128i(a: m128i, b: m128i) -> m128i {
1856   m128i(unsafe { _mm_mulhi_epi16(a.0, b.0) })
1857 }
1858 
1859 /// Lanewise `a * b` with lanes as `u16`, keep the high bits of the `u32`
1860 /// intermediates.
1861 /// ```
1862 /// # use safe_arch::*;
1863 /// let a = m128i::from([1_u16, 2003, 3005, 45687, 1, 2, 3, 4]);
1864 /// let b = m128i::from([5_u16, 6004, 7006, 8910, 15, 26, 37, 48]);
1865 /// let c: [u16; 8] = mul_u16_keep_high_m128i(a, b).into();
1866 /// assert_eq!(c, [0, 183, 321, 6211, 0, 0, 0, 0]);
1867 /// ```
1868 #[must_use]
1869 #[inline(always)]
1870 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_u16_keep_high_m128i(a: m128i, b: m128i) -> m128i1871 pub fn mul_u16_keep_high_m128i(a: m128i, b: m128i) -> m128i {
1872   m128i(unsafe { _mm_mulhi_epu16(a.0, b.0) })
1873 }
1874 
1875 /// Lanewise `a * b` with lanes as `i16`, keep the low bits of the `i32`
1876 /// intermediates.
1877 /// ```
1878 /// # use safe_arch::*;
1879 /// let a = m128i::from([1_i16, 200, 300, 4568, -1, -2, -3, -4]);
1880 /// let b = m128i::from([5_i16, 600, 700, 8910, -15, -26, -37, 48]);
1881 /// let c: [i16; 8] = mul_i16_keep_low_m128i(a, b).into();
1882 /// assert_eq!(c, [5, -11072, 13392, 3024, 15, 52, 111, -192]);
1883 /// ```
1884 #[must_use]
1885 #[inline(always)]
1886 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
mul_i16_keep_low_m128i(a: m128i, b: m128i) -> m128i1887 pub fn mul_i16_keep_low_m128i(a: m128i, b: m128i) -> m128i {
1888   m128i(unsafe { _mm_mullo_epi16(a.0, b.0) })
1889 }
1890 
1891 /// Bitwise `a | b`.
1892 /// ```
1893 /// # use safe_arch::*;
1894 /// let a = m128d::from_array([1.0, 0.0]);
1895 /// let b = m128d::from_array([1.0, 1.0]);
1896 /// let c = bitor_m128d(a, b).to_array();
1897 /// assert_eq!(c, [1.0, 1.0]);
1898 /// ```
1899 #[must_use]
1900 #[inline(always)]
1901 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitor_m128d(a: m128d, b: m128d) -> m128d1902 pub fn bitor_m128d(a: m128d, b: m128d) -> m128d {
1903   m128d(unsafe { _mm_or_pd(a.0, b.0) })
1904 }
1905 
1906 /// Bitwise `a | b`.
1907 /// ```
1908 /// # use safe_arch::*;
1909 /// let a = m128i::from([1, 0, 1, 0]);
1910 /// let b = m128i::from([1, 1, 0, 0]);
1911 /// let c: [i32; 4] = bitor_m128i(a, b).into();
1912 /// assert_eq!(c, [1, 1, 1, 0]);
1913 /// ```
1914 #[must_use]
1915 #[inline(always)]
1916 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitor_m128i(a: m128i, b: m128i) -> m128i1917 pub fn bitor_m128i(a: m128i, b: m128i) -> m128i {
1918   m128i(unsafe { _mm_or_si128(a.0, b.0) })
1919 }
1920 
1921 /// Saturating convert `i16` to `i8`, and pack the values.
1922 /// ```
1923 /// # use safe_arch::*;
1924 /// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
1925 /// let b = m128i::from([9_i16, 10, 11, 12, 13, 14, 15, 16]);
1926 /// let c: [i8; 16] = pack_i16_to_i8_m128i(a, b).into();
1927 /// assert_eq!(c, [1_i8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]);
1928 /// ```
1929 #[must_use]
1930 #[inline(always)]
1931 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
pack_i16_to_i8_m128i(a: m128i, b: m128i) -> m128i1932 pub fn pack_i16_to_i8_m128i(a: m128i, b: m128i) -> m128i {
1933   m128i(unsafe { _mm_packs_epi16(a.0, b.0) })
1934 }
1935 
1936 /// Saturating convert `i32` to `i16`, and pack the values.
1937 /// ```
1938 /// # use safe_arch::*;
1939 /// let a = m128i::from([1_i32, 2, 3, 4]);
1940 /// let b = m128i::from([5_i32, 6, 7, 8]);
1941 /// let c: [i16; 8] = pack_i32_to_i16_m128i(a, b).into();
1942 /// assert_eq!(c, [1_i16, 2, 3, 4, 5, 6, 7, 8]);
1943 /// ```
1944 #[must_use]
1945 #[inline(always)]
1946 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
pack_i32_to_i16_m128i(a: m128i, b: m128i) -> m128i1947 pub fn pack_i32_to_i16_m128i(a: m128i, b: m128i) -> m128i {
1948   m128i(unsafe { _mm_packs_epi32(a.0, b.0) })
1949 }
1950 
1951 /// Saturating convert `i16` to `u8`, and pack the values.
1952 /// ```
1953 /// # use safe_arch::*;
1954 /// let a = m128i::from([-1_i16, 2, -3, 4, -5, 6, -7, 8]);
1955 /// let b = m128i::from([9_i16, 10, 11, 12, 13, -14, 15, -16]);
1956 /// let c: [u8; 16] = pack_i16_to_i8_m128i(a, b).into();
1957 /// assert_eq!(c, [255_u8, 2, 253, 4, 251, 6, 249, 8, 9, 10, 11, 12, 13, 242, 15, 240]);
1958 /// ```
1959 #[must_use]
1960 #[inline(always)]
1961 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
pack_i16_to_u8_m128i(a: m128i, b: m128i) -> m128i1962 pub fn pack_i16_to_u8_m128i(a: m128i, b: m128i) -> m128i {
1963   m128i(unsafe { _mm_packus_epi16(a.0, b.0) })
1964 }
1965 
1966 /// Compute "sum of `u8` absolute differences".
1967 ///
1968 /// * `u8` lanewise `abs(a - b)`, producing `u8` intermediate values.
1969 /// * Sum the first eight and second eight values.
1970 /// * Place into the low 16 bits of two `u64` lanes.
1971 /// ```
1972 /// # use safe_arch::*;
1973 /// let a = m128i::from([0_u8, 11, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
1974 /// let b = m128i::from([20_u8, 110, 250, 103, 34, 105, 60, 217, 8, 19, 210, 201, 202, 203, 204, 127]);
1975 /// let c: [u64; 2] = sum_of_u8_abs_diff_m128i(a, b).into();
1976 /// assert_eq!(c, [831_u64, 910]);
1977 /// ```
1978 #[must_use]
1979 #[inline(always)]
1980 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sum_of_u8_abs_diff_m128i(a: m128i, b: m128i) -> m128i1981 pub fn sum_of_u8_abs_diff_m128i(a: m128i, b: m128i) -> m128i {
1982   m128i(unsafe { _mm_sad_epu8(a.0, b.0) })
1983 }
1984 
1985 /// Sets the args into an `m128i`, first arg is the high lane.
1986 /// ```
1987 /// # use safe_arch::*;
1988 /// let a = m128i::from([15_i8, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]);
1989 /// let b = set_i8_m128i(0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
1990 /// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(b));
1991 /// ```
1992 #[must_use]
1993 #[inline(always)]
1994 #[allow(clippy::too_many_arguments)]
1995 #[allow(clippy::many_single_char_names)]
1996 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i1997 pub fn set_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i {
1998   m128i(unsafe { _mm_set_epi8(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) })
1999 }
2000 
2001 /// Sets the args into an `m128i`, first arg is the high lane.
2002 /// ```
2003 /// # use safe_arch::*;
2004 /// let a = m128i::from([7_i16, 6, 5, 4, 3, 2, 1, 0]);
2005 /// let b = set_i16_m128i(0_i16, 1, 2, 3, 4, 5, 6, 7);
2006 /// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(b));
2007 /// ```
2008 #[must_use]
2009 #[inline(always)]
2010 #[allow(clippy::too_many_arguments)]
2011 #[allow(clippy::many_single_char_names)]
2012 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i2013 pub fn set_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i {
2014   m128i(unsafe { _mm_set_epi16(a, b, c, d, e, f, g, h) })
2015 }
2016 
2017 /// Sets the args into an `m128i`, first arg is the high lane.
2018 /// ```
2019 /// # use safe_arch::*;
2020 /// let a = m128i::from([3, 2, 1, 0]);
2021 /// let b = set_i32_m128i(0, 1, 2, 3);
2022 /// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
2023 /// ```
2024 #[must_use]
2025 #[inline(always)]
2026 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i2027 pub fn set_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i {
2028   m128i(unsafe { _mm_set_epi32(a, b, c, d) })
2029 }
2030 
2031 /// Sets the args into an `m128i`, first arg is the high lane.
2032 /// ```
2033 /// # use safe_arch::*;
2034 /// let a = m128i::from([1_i64, 0]);
2035 /// let b = set_i64_m128i(0, 1);
2036 /// assert_eq!(<[i64; 2]>::from(a), <[i64; 2]>::from(b));
2037 /// ```
2038 #[must_use]
2039 #[inline(always)]
2040 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_i64_m128i(a: i64, b: i64) -> m128i2041 pub fn set_i64_m128i(a: i64, b: i64) -> m128i {
2042   m128i(unsafe { _mm_set_epi64x(a, b) })
2043 }
2044 
2045 /// Sets the args into an `m128d`, first arg is the high lane.
2046 /// ```
2047 /// # use safe_arch::*;
2048 /// let a = m128d::from_array([1.0, 0.0]);
2049 /// let b = set_m128d(0.0, 1.0);
2050 /// assert_eq!(a.to_array(), b.to_array());
2051 /// ```
2052 #[must_use]
2053 #[inline(always)]
2054 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_m128d(a: f64, b: f64) -> m128d2055 pub fn set_m128d(a: f64, b: f64) -> m128d {
2056   m128d(unsafe { _mm_set_pd(a, b) })
2057 }
2058 
2059 /// Sets the args into the low lane of a `m128d`.
2060 /// ```
2061 /// # use safe_arch::*;
2062 /// let a = m128d::from_array([1.0, 0.0]);
2063 /// let b = set_m128d_s(1.0);
2064 /// assert_eq!(a.to_array(), b.to_array());
2065 /// ```
2066 #[must_use]
2067 #[inline(always)]
2068 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_m128d_s(a: f64) -> m128d2069 pub fn set_m128d_s(a: f64) -> m128d {
2070   m128d(unsafe { _mm_set_sd(a) })
2071 }
2072 
2073 /// Splats the args into both lanes of the `m128d`.
2074 /// ```
2075 /// # use safe_arch::*;
2076 /// let a = m128d::from_array([1.0, 1.0]);
2077 /// let b = set_splat_m128d(1.0);
2078 /// assert_eq!(a.to_array(), b.to_array());
2079 /// ```
2080 #[must_use]
2081 #[inline(always)]
2082 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_splat_m128d(a: f64) -> m128d2083 pub fn set_splat_m128d(a: f64) -> m128d {
2084   m128d(unsafe { _mm_set1_pd(a) })
2085 }
2086 
2087 /// Splats the `i8` to all lanes of the `m128i`.
2088 /// ```
2089 /// # use safe_arch::*;
2090 /// let a = m128i::from([1_i8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
2091 /// let b = set_splat_i8_m128i(1);
2092 /// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(a));
2093 /// ```
2094 #[must_use]
2095 #[inline(always)]
2096 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_splat_i8_m128i(i: i8) -> m128i2097 pub fn set_splat_i8_m128i(i: i8) -> m128i {
2098   m128i(unsafe { _mm_set1_epi8(i) })
2099 }
2100 
2101 /// Splats the `i16` to all lanes of the `m128i`.
2102 /// ```
2103 /// # use safe_arch::*;
2104 /// let a = m128i::from([1_i16, 1, 1, 1, 1, 1, 1, 1]);
2105 /// let b = set_splat_i16_m128i(1);
2106 /// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(a));
2107 /// ```
2108 #[must_use]
2109 #[inline(always)]
2110 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_splat_i16_m128i(i: i16) -> m128i2111 pub fn set_splat_i16_m128i(i: i16) -> m128i {
2112   m128i(unsafe { _mm_set1_epi16(i) })
2113 }
2114 
2115 /// Splats the `i32` to all lanes of the `m128i`.
2116 /// ```
2117 /// # use safe_arch::*;
2118 /// let a = m128i::from([1, 1, 1, 1]);
2119 /// let b = set_splat_i32_m128i(1);
2120 /// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(a));
2121 /// ```
2122 #[must_use]
2123 #[inline(always)]
2124 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_splat_i32_m128i(i: i32) -> m128i2125 pub fn set_splat_i32_m128i(i: i32) -> m128i {
2126   m128i(unsafe { _mm_set1_epi32(i) })
2127 }
2128 
2129 /// Splats the `i64` to both lanes of the `m128i`.
2130 /// ```
2131 /// # use safe_arch::*;
2132 /// let a = m128i::from([1_i64, 1]);
2133 /// let b = set_splat_i64_m128i(1);
2134 /// assert_eq!(<[i64; 2]>::from(a), <[i64; 2]>::from(a));
2135 /// ```
2136 #[must_use]
2137 #[inline(always)]
2138 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_splat_i64_m128i(i: i64) -> m128i2139 pub fn set_splat_i64_m128i(i: i64) -> m128i {
2140   m128i(unsafe { _mm_set1_epi64x(i) })
2141 }
2142 
2143 /// Sets the args into an `m128i`, first arg is the low lane.
2144 /// ```
2145 /// # use safe_arch::*;
2146 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2147 /// let b = set_reversed_i8_m128i(0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
2148 /// assert_eq!(<[i8; 16]>::from(a), <[i8; 16]>::from(b));
2149 /// ```
2150 #[must_use]
2151 #[inline(always)]
2152 #[allow(clippy::too_many_arguments)]
2153 #[allow(clippy::many_single_char_names)]
2154 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_reversed_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i2155 pub fn set_reversed_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i {
2156   m128i(unsafe { _mm_setr_epi8(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) })
2157 }
2158 
2159 /// Sets the args into an `m128i`, first arg is the low lane.
2160 /// ```
2161 /// # use safe_arch::*;
2162 /// let a = m128i::from([0_i16, 1, 2, 3, 4, 5, 6, 7]);
2163 /// let b = set_reversed_i16_m128i(0_i16, 1, 2, 3, 4, 5, 6, 7);
2164 /// assert_eq!(<[i16; 8]>::from(a), <[i16; 8]>::from(b));
2165 /// ```
2166 #[must_use]
2167 #[inline(always)]
2168 #[allow(clippy::too_many_arguments)]
2169 #[allow(clippy::many_single_char_names)]
2170 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_reversed_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i2171 pub fn set_reversed_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i {
2172   m128i(unsafe { _mm_setr_epi16(a, b, c, d, e, f, g, h) })
2173 }
2174 
2175 /// Sets the args into an `m128i`, first arg is the low lane.
2176 /// ```
2177 /// # use safe_arch::*;
2178 /// let a = m128i::from([0, 1, 2, 3]);
2179 /// let b = set_reversed_i32_m128i(0, 1, 2, 3);
2180 /// assert_eq!(<[i32; 4]>::from(a), <[i32; 4]>::from(b));
2181 /// ```
2182 #[must_use]
2183 #[inline(always)]
2184 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_reversed_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i2185 pub fn set_reversed_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i {
2186   m128i(unsafe { _mm_setr_epi32(a, b, c, d) })
2187 }
2188 
2189 /// Sets the args into an `m128d`, first arg is the low lane.
2190 /// ```
2191 /// # use safe_arch::*;
2192 /// let a = m128d::from_array([0.0, 1.0]);
2193 /// let b = set_reversed_m128d(0.0, 1.0);
2194 /// assert_eq!(a.to_array(), b.to_array());
2195 /// ```
2196 #[must_use]
2197 #[inline(always)]
2198 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
set_reversed_m128d(a: f64, b: f64) -> m128d2199 pub fn set_reversed_m128d(a: f64, b: f64) -> m128d {
2200   m128d(unsafe { _mm_setr_pd(a, b) })
2201 }
2202 
2203 /// All lanes zero.
2204 /// ```
2205 /// # use safe_arch::*;
2206 /// let a = zeroed_m128i();
2207 /// assert_eq!(u128::from(a), 0);
2208 /// ```
2209 #[must_use]
2210 #[inline(always)]
2211 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
zeroed_m128i() -> m128i2212 pub fn zeroed_m128i() -> m128i {
2213   m128i(unsafe { _mm_setzero_si128() })
2214 }
2215 
2216 /// Both lanes zero.
2217 /// ```
2218 /// # use safe_arch::*;
2219 /// let a = zeroed_m128d();
2220 /// assert_eq!(a.to_array(), [0.0, 0.0]);
2221 /// ```
2222 #[must_use]
2223 #[inline(always)]
2224 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
zeroed_m128d() -> m128d2225 pub fn zeroed_m128d() -> m128d {
2226   m128d(unsafe { _mm_setzero_pd() })
2227 }
2228 
2229 /// Shuffle the `i32` lanes in `$a` using an immediate
2230 /// control value.
2231 ///
2232 /// ```
2233 /// # use safe_arch::*;
2234 /// let a = m128i::from([6, 7, 8, 9]);
2235 /// //
2236 /// let c = shuffle_ai_f32_all_m128i::<0b01_10_10_00>(a);
2237 /// assert_eq!(<[i32; 4]>::from(c), [6, 8, 8, 7]);
2238 /// ```
2239 /// * **Intrinsic:** [`_mm_shuffle_epi32`]
2240 /// * **Assembly:** `pshufd xmm, xmm, imm8`
2241 #[must_use]
2242 #[inline(always)]
2243 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shuffle_ai_f32_all_m128i<const MASK: i32>(a: m128i) -> m128i2244 pub fn shuffle_ai_f32_all_m128i<const MASK: i32>(a: m128i) -> m128i {
2245   m128i(unsafe { _mm_shuffle_epi32(a.0, MASK) })
2246 }
2247 
2248 /// Shuffle the `f64` lanes from `$a` and `$b` together using an immediate
2249 /// control value.
2250 ///
2251 /// The `a:` and `b:` prefixes on the index selection values are literal tokens
2252 /// that you type. It helps keep clear what value comes from where. The first
2253 /// two output lanes come from `$a`, the second two output lanes come from `$b`.
2254 ///
2255 /// You can pass the same value as both arguments, but if you want to swizzle
2256 /// within only a single register and you have `avx` available consider using
2257 /// [`shuffle_ai_f64_all_m128d`] instead. You'll get much better performance.
2258 /// ```
2259 /// # use safe_arch::*;
2260 /// let a = m128d::from_array([1.0, 2.0]);
2261 /// let b = m128d::from_array([3.0, 4.0]);
2262 /// //
2263 /// let c = shuffle_abi_f64_all_m128d::<0b00>(a, b).to_array();
2264 /// assert_eq!(c, [1.0, 3.0]);
2265 /// //
2266 /// let c = shuffle_abi_f64_all_m128d::<0b10>(a, b).to_array();
2267 /// assert_eq!(c, [1.0, 4.0]);
2268 /// ```
2269 #[must_use]
2270 #[inline(always)]
2271 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shuffle_abi_f64_all_m128d<const MASK: i32>(a: m128d, b: m128d) -> m128d2272 pub fn shuffle_abi_f64_all_m128d<const MASK: i32>(a: m128d, b: m128d) -> m128d {
2273   m128d(unsafe { _mm_shuffle_pd(a.0, b.0, MASK) })
2274 }
2275 
2276 /// Shuffle the high `i16` lanes in `$a` using an immediate control value.
2277 /// ```
2278 /// # use safe_arch::*;
2279 /// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
2280 /// let c = shuffle_ai_i16_h64all_m128i::<0b01_00_10_11>(a);
2281 /// assert_eq!(<[i16; 8]>::from(c), [1_i16, 2, 3, 4, 8, 7, 5, 6]);
2282 /// ```
2283 /// * **Intrinsic:** [`_mm_shufflehi_epi16`]
2284 /// * **Assembly:** `pshufhw xmm, xmm, imm8`
2285 #[must_use]
2286 #[inline(always)]
2287 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shuffle_ai_i16_h64all_m128i<const MASK: i32>(a: m128i) -> m128i2288 pub fn shuffle_ai_i16_h64all_m128i<const MASK: i32>(a: m128i) -> m128i {
2289   m128i(unsafe { _mm_shufflehi_epi16(a.0, MASK) })
2290 }
2291 
2292 /// Shuffle the low `i16` lanes in `$a` using an immediate control value.
2293 /// ```
2294 /// # use safe_arch::*;
2295 /// let a = m128i::from([1_i16, 2, 3, 4, 5, 6, 7, 8]);
2296 /// //
2297 /// let c = shuffle_ai_i16_l64all_m128i::<0b01_11_10_00>(a);
2298 /// assert_eq!(<[i16; 8]>::from(c), [1_i16, 3, 4, 2, 5, 6, 7, 8]);
2299 /// ```
2300 /// * **Intrinsic:** [`_mm_shufflelo_epi16`]
2301 /// * **Assembly:** `pshuflw xmm, xmm, imm8`
2302 #[must_use]
2303 #[inline(always)]
2304 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shuffle_ai_i16_l64all_m128i<const MASK: i32>(a: m128i) -> m128i2305 pub fn shuffle_ai_i16_l64all_m128i<const MASK: i32>(a: m128i) -> m128i {
2306   m128i(unsafe { _mm_shufflelo_epi16(a.0, MASK) })
2307 }
2308 
2309 /// Shift all `u16` lanes to the left by the `count` in the lower `u64` lane.
2310 ///
2311 /// New bits are 0s.
2312 /// ```
2313 /// # use safe_arch::*;
2314 /// let a = m128i::from([1_u16, 2, 3, 4, 1, 2, 3, 4]);
2315 /// let b = m128i::from([3_u64, 0]);
2316 /// let c: [u16; 8] = shl_all_u16_m128i(a, b).into();
2317 /// assert_eq!(c, [1_u16 << 3, 2 << 3, 3 << 3, 4 << 3, 1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2318 /// ```
2319 #[must_use]
2320 #[inline(always)]
2321 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_all_u16_m128i(a: m128i, count: m128i) -> m128i2322 pub fn shl_all_u16_m128i(a: m128i, count: m128i) -> m128i {
2323   m128i(unsafe { _mm_sll_epi16(a.0, count.0) })
2324 }
2325 
2326 /// Shift all `u32` lanes to the left by the `count` in the lower `u64` lane.
2327 ///
2328 /// New bits are 0s.
2329 /// ```
2330 /// # use safe_arch::*;
2331 /// let a = m128i::from([1_u32, 2, 3, 4]);
2332 /// let b = m128i::from([3_u64, 0]);
2333 /// let c: [u32; 4] = shl_all_u32_m128i(a, b).into();
2334 /// assert_eq!(c, [1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2335 /// ```
2336 #[must_use]
2337 #[inline(always)]
2338 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_all_u32_m128i(a: m128i, count: m128i) -> m128i2339 pub fn shl_all_u32_m128i(a: m128i, count: m128i) -> m128i {
2340   m128i(unsafe { _mm_sll_epi32(a.0, count.0) })
2341 }
2342 
2343 /// Shift all `u64` lanes to the left by the `count` in the lower `u64` lane.
2344 ///
2345 /// New bits are 0s.
2346 /// ```
2347 /// # use safe_arch::*;
2348 /// let a = m128i::from([1_u64, 2]);
2349 /// let b = m128i::from([3_u64, 0]);
2350 /// let c: [u64; 2] = shl_all_u64_m128i(a, b).into();
2351 /// assert_eq!(c, [1 << 3, 2 << 3]);
2352 /// ```
2353 #[must_use]
2354 #[inline(always)]
2355 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_all_u64_m128i(a: m128i, count: m128i) -> m128i2356 pub fn shl_all_u64_m128i(a: m128i, count: m128i) -> m128i {
2357   m128i(unsafe { _mm_sll_epi64(a.0, count.0) })
2358 }
2359 
2360 /// Shifts all `u16` lanes left by an immediate.
2361 ///
2362 /// ```
2363 /// # use safe_arch::*;
2364 /// let a = m128i::from([1_u16, 2, 3, 4, 1, 2, 3, 4]);
2365 /// let c: [u16; 8] = shl_imm_u16_m128i::<3>(a).into();
2366 /// assert_eq!(c, [1_u16 << 3, 2 << 3, 3 << 3, 4 << 3, 1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2367 /// ```
2368 #[must_use]
2369 #[inline(always)]
2370 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i2371 pub fn shl_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i {
2372   m128i(unsafe { _mm_slli_epi16(a.0, IMM) })
2373 }
2374 
2375 /// Shifts all `u32` lanes left by an immediate.
2376 ///
2377 /// ```
2378 /// # use safe_arch::*;
2379 /// let a = m128i::from([1, 2, 3, 4]);
2380 /// let c: [u32; 4] = shl_imm_u32_m128i::<3>(a).into();
2381 /// assert_eq!(c, [1 << 3, 2 << 3, 3 << 3, 4 << 3]);
2382 /// ```
2383 #[must_use]
2384 #[inline(always)]
2385 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i2386 pub fn shl_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i {
2387   m128i(unsafe { _mm_slli_epi32(a.0, IMM) })
2388 }
2389 
2390 /// Shifts both `u64` lanes left by an immediate.
2391 ///
2392 /// ```
2393 /// # use safe_arch::*;
2394 /// let a = m128i::from([1_u64, 2]);
2395 /// let c: [u64; 2] = shl_imm_u64_m128i::<3>(a).into();
2396 /// assert_eq!(c, [1_u64 << 3, 2 << 3]);
2397 /// ```
2398 #[must_use]
2399 #[inline(always)]
2400 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shl_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i2401 pub fn shl_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i {
2402   m128i(unsafe { _mm_slli_epi64(a.0, IMM) })
2403 }
2404 
2405 /// Lanewise `sqrt(a)`.
2406 /// ```
2407 /// # use safe_arch::*;
2408 /// let a = m128d::from_array([25.0, 16.0]);
2409 /// let b = sqrt_m128d(a).to_array();
2410 /// assert_eq!(b, [5.0, 4.0]);
2411 /// ```
2412 #[must_use]
2413 #[inline(always)]
2414 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sqrt_m128d(a: m128d) -> m128d2415 pub fn sqrt_m128d(a: m128d) -> m128d {
2416   m128d(unsafe { _mm_sqrt_pd(a.0) })
2417 }
2418 
2419 /// Low lane `sqrt(b)`, upper lane is unchanged from `a`.
2420 /// ```
2421 /// # use safe_arch::*;
2422 /// let a = m128d::from_array([1.0, 2.0]);
2423 /// let b = m128d::from_array([25.0, 4.0]);
2424 /// let c = sqrt_m128d_s(a, b);
2425 /// assert_eq!(c.to_array(), [5.0, 2.0]);
2426 /// ```
2427 #[must_use]
2428 #[inline(always)]
2429 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sqrt_m128d_s(a: m128d, b: m128d) -> m128d2430 pub fn sqrt_m128d_s(a: m128d, b: m128d) -> m128d {
2431   m128d(unsafe { _mm_sqrt_sd(a.0, b.0) })
2432 }
2433 
2434 /// Shift each `i16` lane to the right by the `count` in the lower `i64` lane.
2435 ///
2436 /// New bits are the sign bit.
2437 /// ```
2438 /// # use safe_arch::*;
2439 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2440 /// let b = m128i::from([3_i64, 0]);
2441 /// let c: [i16; 8] = shr_all_i16_m128i(a, b).into();
2442 /// assert_eq!(c, [1_i16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, -1 >> 3, -2 >> 3, -3 >> 3, -4 >> 3]);
2443 /// ```
2444 #[must_use]
2445 #[inline(always)]
2446 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_all_i16_m128i(a: m128i, count: m128i) -> m128i2447 pub fn shr_all_i16_m128i(a: m128i, count: m128i) -> m128i {
2448   m128i(unsafe { _mm_sra_epi16(a.0, count.0) })
2449 }
2450 
2451 /// Shift each `i32` lane to the right by the `count` in the lower `i64` lane.
2452 ///
2453 /// New bits are the sign bit.
2454 /// ```
2455 /// # use safe_arch::*;
2456 /// let a = m128i::from([1_i32, 2, -3, -4]);
2457 /// let b = m128i::from([3_i64, 0]);
2458 /// let c: [i32; 4] = shr_all_i32_m128i(a, b).into();
2459 /// assert_eq!(c, [1 >> 3, 2 >> 3, -3 >> 3, -4 >> 3]);
2460 /// ```
2461 #[must_use]
2462 #[inline(always)]
2463 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_all_i32_m128i(a: m128i, count: m128i) -> m128i2464 pub fn shr_all_i32_m128i(a: m128i, count: m128i) -> m128i {
2465   m128i(unsafe { _mm_sra_epi32(a.0, count.0) })
2466 }
2467 
2468 /// Shifts all `i16` lanes right by an immediate.
2469 ///
2470 /// New bits are the sign bit.
2471 ///
2472 /// ```
2473 /// # use safe_arch::*;
2474 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2475 /// let c: [i16; 8] = shr_imm_i16_m128i::<3>(a).into();
2476 /// assert_eq!(c, [1_i16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, -1 >> 3, -2 >> 3, -3 >> 3, -4 >> 3]);
2477 /// ```
2478 #[must_use]
2479 #[inline(always)]
2480 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_imm_i16_m128i<const IMM: i32>(a: m128i) -> m128i2481 pub fn shr_imm_i16_m128i<const IMM: i32>(a: m128i) -> m128i {
2482   m128i(unsafe { _mm_srai_epi16(a.0, IMM) })
2483 }
2484 
2485 /// Shifts all `i32` lanes right by an immediate.
2486 ///
2487 /// New bits are the sign bit.
2488 ///
2489 /// ```
2490 /// # use safe_arch::*;
2491 /// let a = m128i::from([1, 2, -3, -4]);
2492 /// let c: [i32; 4] = shr_imm_i32_m128i::<3>(a).into();
2493 /// assert_eq!(c, [1 >> 3, 2 >> 3, -3 >> 3, -4 >> 3]);
2494 /// ```
2495 #[must_use]
2496 #[inline(always)]
2497 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_imm_i32_m128i<const IMM: i32>(a: m128i) -> m128i2498 pub fn shr_imm_i32_m128i<const IMM: i32>(a: m128i) -> m128i {
2499   m128i(unsafe { _mm_srai_epi32(a.0, IMM) })
2500 }
2501 
2502 /// Shift each `u16` lane to the right by the `count` in the lower `u64` lane.
2503 ///
2504 /// ```
2505 /// # use safe_arch::*;
2506 /// let a = m128i::from([1_u16, 2, 3, 4, 100, 200, 300, 400]);
2507 /// let b = m128i::from([3_u64, 0]);
2508 /// let c: [u16; 8] = shr_all_u16_m128i(a, b).into();
2509 /// assert_eq!(c, [1_u16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, 100 >> 3, 200 >> 3, 300 >> 3, 400 >> 3,]);
2510 /// ```
2511 #[must_use]
2512 #[inline(always)]
2513 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_all_u16_m128i(a: m128i, count: m128i) -> m128i2514 pub fn shr_all_u16_m128i(a: m128i, count: m128i) -> m128i {
2515   m128i(unsafe { _mm_srl_epi16(a.0, count.0) })
2516 }
2517 
2518 /// Shift each `u32` lane to the right by the `count` in the lower `u64` lane.
2519 ///
2520 /// ```
2521 /// # use safe_arch::*;
2522 /// let a = m128i::from([1_u32, 2, 300, 400]);
2523 /// let b = m128i::from([3_u64, 0]);
2524 /// let c: [u32; 4] = shr_all_u32_m128i(a, b).into();
2525 /// assert_eq!(c, [1 >> 3, 2 >> 3, 300 >> 3, 400 >> 3,]);
2526 /// ```
2527 #[must_use]
2528 #[inline(always)]
2529 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_all_u32_m128i(a: m128i, count: m128i) -> m128i2530 pub fn shr_all_u32_m128i(a: m128i, count: m128i) -> m128i {
2531   m128i(unsafe { _mm_srl_epi32(a.0, count.0) })
2532 }
2533 
2534 /// Shift each `u64` lane to the right by the `count` in the lower `u64` lane.
2535 ///
2536 /// New bits are 0s.
2537 /// ```
2538 /// # use safe_arch::*;
2539 /// let a = m128i::from([1_u64, 56]);
2540 /// let b = m128i::from([3_u64, 0]);
2541 /// let c: [u64; 2] = shr_all_u64_m128i(a, b).into();
2542 /// assert_eq!(c, [1 >> 3, 56 >> 3]);
2543 /// ```
2544 #[must_use]
2545 #[inline(always)]
2546 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_all_u64_m128i(a: m128i, count: m128i) -> m128i2547 pub fn shr_all_u64_m128i(a: m128i, count: m128i) -> m128i {
2548   m128i(unsafe { _mm_srl_epi64(a.0, count.0) })
2549 }
2550 
2551 /// Shifts all `u16` lanes right by an immediate.
2552 ///
2553 /// New bits are 0s.
2554 ///
2555 /// ```
2556 /// # use safe_arch::*;
2557 /// let a = m128i::from([1_u16, 2, 3, 4, 100, 200, 300, 400]);
2558 /// let c: [u16; 8] = shr_imm_u16_m128i::<3>(a).into();
2559 /// assert_eq!(c, [1_u16 >> 3, 2 >> 3, 3 >> 3, 4 >> 3, 100 >> 3, 200 >> 3, 300 >> 3, 400 >> 3,]);
2560 /// ```
2561 /// * **Intrinsic:** [`_mm_srli_epi16`]
2562 /// * **Assembly:** `psrlw xmm, imm8`
2563 #[must_use]
2564 #[inline(always)]
2565 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i2566 pub fn shr_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i {
2567   m128i(unsafe { _mm_srli_epi16(a.0, IMM) })
2568 }
2569 
2570 /// Shifts all `u32` lanes right by an immediate.
2571 ///
2572 /// ```
2573 /// # use safe_arch::*;
2574 /// let a = m128i::from([1, 2, 300, 400]);
2575 /// let c: [u32; 4] = shr_imm_u32_m128i::<3>(a).into();
2576 /// assert_eq!(c, [1 >> 3, 2 >> 3, 300 >> 3, 400 >> 3]);
2577 /// ```
2578 /// * **Intrinsic:** [`_mm_srli_epi32`]
2579 /// * **Assembly:** `psrld xmm, imm8`
2580 #[must_use]
2581 #[inline(always)]
2582 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i2583 pub fn shr_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i {
2584   m128i(unsafe { _mm_srli_epi32(a.0, IMM) })
2585 }
2586 
2587 /// Shifts both `u64` lanes right by an immediate.
2588 ///
2589 /// ```
2590 /// # use safe_arch::*;
2591 /// let a = m128i::from([1_u64, 200]);
2592 /// let c: [u64; 2] = shr_imm_u64_m128i::<3>(a).into();
2593 /// assert_eq!(c, [1_u64 >> 3, 200 >> 3]);
2594 /// ```
2595 /// * **Intrinsic:** [`_mm_srli_epi64`]
2596 /// * **Assembly:** `psrlq xmm, imm8`
2597 #[must_use]
2598 #[inline(always)]
2599 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
shr_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i2600 pub fn shr_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i {
2601   m128i(unsafe { _mm_srli_epi64(a.0, IMM) })
2602 }
2603 
2604 /// Stores the value to the reference given.
2605 /// ```
2606 /// # use safe_arch::*;
2607 /// let a = m128d::from_array([10.0, 12.0]);
2608 /// let mut b = zeroed_m128d();
2609 /// store_m128d(&mut b, a);
2610 /// let c = b.to_array();
2611 /// assert_eq!(c, [10.0, 12.0]);
2612 /// ```
2613 #[inline(always)]
2614 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_m128d(r: &mut m128d, a: m128d)2615 pub fn store_m128d(r: &mut m128d, a: m128d) {
2616   unsafe { _mm_store_pd(r as *mut m128d as *mut f64, a.0) }
2617 }
2618 
2619 /// Stores the low lane value to the reference given.
2620 /// ```
2621 /// # use safe_arch::*;
2622 /// let a = m128d::from_array([10.0, 12.0]);
2623 /// let mut f = 0.0;
2624 /// store_m128d_s(&mut f, a);
2625 /// assert_eq!(f, 10.0);
2626 /// ```
2627 #[inline(always)]
2628 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_m128d_s(r: &mut f64, a: m128d)2629 pub fn store_m128d_s(r: &mut f64, a: m128d) {
2630   unsafe { _mm_store_sd(r as *mut f64, a.0) }
2631 }
2632 
2633 /// Stores the low lane value to all lanes of the reference given.
2634 /// ```
2635 /// # use safe_arch::*;
2636 /// let a = m128d::from_array([10.0, 12.0]);
2637 /// let mut b = zeroed_m128d();
2638 /// store_splat_m128d(&mut b, a);
2639 /// let c = b.to_array();
2640 /// assert_eq!(c, [10.0, 10.0]);
2641 /// ```
2642 #[inline(always)]
2643 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_splat_m128d(r: &mut m128d, a: m128d)2644 pub fn store_splat_m128d(r: &mut m128d, a: m128d) {
2645   unsafe { _mm_store1_pd(r as *mut m128d as *mut f64, a.0) }
2646 }
2647 
2648 /// Stores the value to the reference given.
2649 /// ```
2650 /// # use safe_arch::*;
2651 /// let a = m128i::from([1, 2, 3, 4]);
2652 /// let mut b = zeroed_m128i();
2653 /// store_m128i(&mut b, a);
2654 /// let c: [i32; 4] = b.into();
2655 /// assert_eq!(c, [1, 2, 3, 4]);
2656 /// ```
2657 #[inline(always)]
2658 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_m128i(r: &mut m128i, a: m128i)2659 pub fn store_m128i(r: &mut m128i, a: m128i) {
2660   unsafe { _mm_store_si128(&mut r.0, a.0) }
2661 }
2662 
2663 /// Stores the high lane value to the reference given.
2664 /// ```
2665 /// # use safe_arch::*;
2666 /// let a = m128d::from_array([10.0, 12.0]);
2667 /// let mut f = 0.0;
2668 /// store_high_m128d_s(&mut f, a);
2669 /// assert_eq!(f, 12.0);
2670 /// ```
2671 #[inline(always)]
2672 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_high_m128d_s(r: &mut f64, a: m128d)2673 pub fn store_high_m128d_s(r: &mut f64, a: m128d) {
2674   unsafe { _mm_storeh_pd(r as *mut f64, a.0) }
2675 }
2676 
2677 /// Stores the value to the reference given.
2678 /// ```
2679 /// # use safe_arch::*;
2680 /// let a = m128i::from([1_i64, 2]);
2681 /// let mut b = 0_i64;
2682 /// store_i64_m128i_s(&mut b, a);
2683 /// assert_eq!(b, 1_i64);
2684 /// ```
2685 #[inline(always)]
2686 #[allow(clippy::cast_ptr_alignment)]
2687 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_i64_m128i_s(r: &mut i64, a: m128i)2688 pub fn store_i64_m128i_s(r: &mut i64, a: m128i) {
2689   unsafe { _mm_storel_epi64(r as *mut i64 as *mut __m128i, a.0) }
2690 }
2691 
2692 /// Stores the value to the reference given.
2693 /// ```
2694 /// # use safe_arch::*;
2695 /// let a = m128d::from_array([10.0, 12.0]);
2696 /// let mut b = zeroed_m128d();
2697 /// store_reversed_m128d(&mut b, a);
2698 /// let c = b.to_array();
2699 /// assert_eq!(c, [12.0, 10.0]);
2700 /// ```
2701 #[inline(always)]
2702 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_reversed_m128d(r: &mut m128d, a: m128d)2703 pub fn store_reversed_m128d(r: &mut m128d, a: m128d) {
2704   unsafe { _mm_storer_pd(r as *mut m128d as *mut f64, a.0) }
2705 }
2706 
2707 /// Stores the value to the reference given.
2708 /// ```
2709 /// # use safe_arch::*;
2710 /// let a = m128d::from_array([10.0, 12.0]);
2711 /// let mut b = [0.0, 0.0];
2712 /// store_unaligned_m128d(&mut b, a);
2713 /// assert_eq!(b, [10.0, 12.0]);
2714 /// ```
2715 #[inline(always)]
2716 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_unaligned_m128d(r: &mut [f64; 2], a: m128d)2717 pub fn store_unaligned_m128d(r: &mut [f64; 2], a: m128d) {
2718   unsafe { _mm_storeu_pd(r.as_mut_ptr(), a.0) }
2719 }
2720 
2721 /// Stores the value to the reference given.
2722 /// ```
2723 /// # use safe_arch::*;
2724 /// let a = m128i::from([0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2725 /// let mut b = [0_u8; 16];
2726 /// store_unaligned_m128i(&mut b, a);
2727 /// assert_eq!(b, [0_u8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2728 /// ```
2729 #[inline(always)]
2730 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
store_unaligned_m128i(r: &mut [u8; 16], a: m128i)2731 pub fn store_unaligned_m128i(r: &mut [u8; 16], a: m128i) {
2732   unsafe { _mm_storeu_si128(r.as_mut_ptr().cast(), a.0) }
2733 }
2734 
2735 /// Lanewise `a - b` with lanes as `i8`.
2736 /// ```
2737 /// # use safe_arch::*;
2738 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2739 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2740 /// let c: [i8; 16] = sub_i8_m128i(a, b).into();
2741 /// assert_eq!(c, [0, -10, 0, -10, 0, -10, 0, -10, 0, -10, 30, -10, -10, 36, -10, -112]);
2742 /// ```
2743 #[must_use]
2744 #[inline(always)]
2745 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_i8_m128i(a: m128i, b: m128i) -> m128i2746 pub fn sub_i8_m128i(a: m128i, b: m128i) -> m128i {
2747   m128i(unsafe { _mm_sub_epi8(a.0, b.0) })
2748 }
2749 
2750 /// Lanewise `a - b` with lanes as `i16`.
2751 /// ```
2752 /// # use safe_arch::*;
2753 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2754 /// let b = m128i::from([51_i16, 61, 71, 81, -15, -26, -37, 48]);
2755 /// let c: [i16; 8] = sub_i16_m128i(a, b).into();
2756 /// assert_eq!(c, [-50, -59, -68, -77, 14, 24, 34, -52]);
2757 /// ```
2758 #[must_use]
2759 #[inline(always)]
2760 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_i16_m128i(a: m128i, b: m128i) -> m128i2761 pub fn sub_i16_m128i(a: m128i, b: m128i) -> m128i {
2762   m128i(unsafe { _mm_sub_epi16(a.0, b.0) })
2763 }
2764 
2765 /// Lanewise `a - b` with lanes as `i32`.
2766 /// ```
2767 /// # use safe_arch::*;
2768 /// let a = m128i::from([1, 2, 3, 4]);
2769 /// let b = m128i::from([50, 60, 70, 87]);
2770 /// let c: [i32; 4] = sub_i32_m128i(a, b).into();
2771 /// assert_eq!(c, [-49, -58, -67, -83]);
2772 /// ```
2773 #[must_use]
2774 #[inline(always)]
2775 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_i32_m128i(a: m128i, b: m128i) -> m128i2776 pub fn sub_i32_m128i(a: m128i, b: m128i) -> m128i {
2777   m128i(unsafe { _mm_sub_epi32(a.0, b.0) })
2778 }
2779 
2780 /// Lanewise `a - b` with lanes as `i64`.
2781 /// ```
2782 /// # use safe_arch::*;
2783 /// let a = m128i::from([92_i64, 87]);
2784 /// let b = m128i::from([-9001_i64, 1]);
2785 /// let c: [i64; 2] = sub_i64_m128i(a, b).into();
2786 /// assert_eq!(c, [9093, 86]);
2787 /// ```
2788 #[must_use]
2789 #[inline(always)]
2790 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_i64_m128i(a: m128i, b: m128i) -> m128i2791 pub fn sub_i64_m128i(a: m128i, b: m128i) -> m128i {
2792   m128i(unsafe { _mm_sub_epi64(a.0, b.0) })
2793 }
2794 
2795 /// Lanewise `a - b`.
2796 /// ```
2797 /// # use safe_arch::*;
2798 /// let a = m128d::from_array([92.0, 87.5]);
2799 /// let b = m128d::from_array([100.0, -6.0]);
2800 /// let c = sub_m128d(a, b).to_array();
2801 /// assert_eq!(c, [-8.0, 93.5]);
2802 /// ```
2803 #[must_use]
2804 #[inline(always)]
2805 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_m128d(a: m128d, b: m128d) -> m128d2806 pub fn sub_m128d(a: m128d, b: m128d) -> m128d {
2807   m128d(unsafe { _mm_sub_pd(a.0, b.0) })
2808 }
2809 
2810 /// Lowest lane `a - b`, high lane unchanged.
2811 /// ```
2812 /// # use safe_arch::*;
2813 /// let a = m128d::from_array([92.0, 87.5]);
2814 /// let b = m128d::from_array([100.0, -600.0]);
2815 /// let c = sub_m128d_s(a, b).to_array();
2816 /// assert_eq!(c, [-8.0, 87.5]);
2817 /// ```
2818 #[must_use]
2819 #[inline(always)]
2820 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_m128d_s(a: m128d, b: m128d) -> m128d2821 pub fn sub_m128d_s(a: m128d, b: m128d) -> m128d {
2822   m128d(unsafe { _mm_sub_sd(a.0, b.0) })
2823 }
2824 
2825 /// Lanewise saturating `a - b` with lanes as `i8`.
2826 /// ```
2827 /// # use safe_arch::*;
2828 /// let a = m128i::from([0_i8, -128, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -127]);
2829 /// let b = m128i::from([0_i8, 1, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2830 /// let c: [i8; 16] = sub_saturating_i8_m128i(a, b).into();
2831 /// assert_eq!(c, [0, -128, 0, -10, 0, -10, 0, -10, 0, -10, 30, -10, -10, 36, -10, -128]);
2832 /// ```
2833 #[must_use]
2834 #[inline(always)]
2835 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_saturating_i8_m128i(a: m128i, b: m128i) -> m128i2836 pub fn sub_saturating_i8_m128i(a: m128i, b: m128i) -> m128i {
2837   m128i(unsafe { _mm_subs_epi8(a.0, b.0) })
2838 }
2839 
2840 /// Lanewise saturating `a - b` with lanes as `i16`.
2841 /// ```
2842 /// # use safe_arch::*;
2843 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2844 /// let b = m128i::from([51_i16, 61, 71, 81, i16::MAX, -26, -37, 48]);
2845 /// let c: [i16; 8] = sub_saturating_i16_m128i(a, b).into();
2846 /// assert_eq!(c, [-50, -59, -68, -77, -32768, 24, 34, -52]);
2847 /// ```
2848 #[must_use]
2849 #[inline(always)]
2850 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_saturating_i16_m128i(a: m128i, b: m128i) -> m128i2851 pub fn sub_saturating_i16_m128i(a: m128i, b: m128i) -> m128i {
2852   m128i(unsafe { _mm_subs_epi16(a.0, b.0) })
2853 }
2854 
2855 /// Lanewise saturating `a - b` with lanes as `u8`.
2856 /// ```
2857 /// # use safe_arch::*;
2858 /// let a = m128i::from([10_u8, 255, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 255]);
2859 /// let b = m128i::from([1_u8, 1, 2, 13, 4, 15, 6, 17, 8, 19, 20, 21, 22, 23, 24, 127]);
2860 /// let c: [u8; 16] = sub_saturating_u8_m128i(a, b).into();
2861 /// assert_eq!(c, [9_u8, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128]);
2862 /// ```
2863 #[must_use]
2864 #[inline(always)]
2865 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_saturating_u8_m128i(a: m128i, b: m128i) -> m128i2866 pub fn sub_saturating_u8_m128i(a: m128i, b: m128i) -> m128i {
2867   m128i(unsafe { _mm_subs_epu8(a.0, b.0) })
2868 }
2869 
2870 /// Lanewise saturating `a - b` with lanes as `u16`.
2871 /// ```
2872 /// # use safe_arch::*;
2873 /// let a = m128i::from([51_u16, 61, 3, 4, u16::MAX, 2, 3, u16::MAX]);
2874 /// let b = m128i::from([5_u16, 2, 71, 81, u16::MAX, 26, 37, u16::MIN]);
2875 /// let c: [u16; 8] = sub_saturating_u16_m128i(a, b).into();
2876 /// assert_eq!(c, [46, 59, 0, 0, 0, 0, 0, u16::MAX]);
2877 /// ```
2878 #[must_use]
2879 #[inline(always)]
2880 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
sub_saturating_u16_m128i(a: m128i, b: m128i) -> m128i2881 pub fn sub_saturating_u16_m128i(a: m128i, b: m128i) -> m128i {
2882   m128i(unsafe { _mm_subs_epu16(a.0, b.0) })
2883 }
2884 
2885 /// Unpack and interleave high `i8` lanes of `a` and `b`.
2886 /// ```
2887 /// # use safe_arch::*;
2888 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2889 /// let b = m128i::from([0_i8, 11, 2, 13, 4, 15, 6, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2890 /// let c: [i8; 16] = unpack_high_i8_m128i(a, b).into();
2891 /// assert_eq!(c, [8, 8, 9, 19, 10, -20, 11, 21, 12, 22, 13, -23, 14, 24, 15, 127]);
2892 /// ```
2893 #[must_use]
2894 #[inline(always)]
2895 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_high_i8_m128i(a: m128i, b: m128i) -> m128i2896 pub fn unpack_high_i8_m128i(a: m128i, b: m128i) -> m128i {
2897   m128i(unsafe { _mm_unpackhi_epi8(a.0, b.0) })
2898 }
2899 
2900 /// Unpack and interleave high `i16` lanes of `a` and `b`.
2901 /// ```
2902 /// # use safe_arch::*;
2903 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2904 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
2905 /// let c: [i16; 8] = unpack_high_i16_m128i(a, b).into();
2906 /// assert_eq!(c, [-1, -15, -2, -26, -3, -37, -4, 48]);
2907 /// ```
2908 #[must_use]
2909 #[inline(always)]
2910 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_high_i16_m128i(a: m128i, b: m128i) -> m128i2911 pub fn unpack_high_i16_m128i(a: m128i, b: m128i) -> m128i {
2912   m128i(unsafe { _mm_unpackhi_epi16(a.0, b.0) })
2913 }
2914 
2915 /// Unpack and interleave high `i32` lanes of `a` and `b`.
2916 /// ```
2917 /// # use safe_arch::*;
2918 /// let a = m128i::from([1, 2, 3, 4]);
2919 /// let b = m128i::from([5, 6, 7, 8]);
2920 /// let c: [i32; 4] = unpack_high_i32_m128i(a, b).into();
2921 /// assert_eq!(c, [3, 7, 4, 8]);
2922 /// ```
2923 #[must_use]
2924 #[inline(always)]
2925 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_high_i32_m128i(a: m128i, b: m128i) -> m128i2926 pub fn unpack_high_i32_m128i(a: m128i, b: m128i) -> m128i {
2927   m128i(unsafe { _mm_unpackhi_epi32(a.0, b.0) })
2928 }
2929 
2930 /// Unpack and interleave high `i64` lanes of `a` and `b`.
2931 /// ```
2932 /// # use safe_arch::*;
2933 /// let a = m128i::from([92_i64, 87]);
2934 /// let b = m128i::from([-9001_i64, 1]);
2935 /// let c: [i64; 2] = unpack_high_i64_m128i(a, b).into();
2936 /// assert_eq!(c, [87, 1]);
2937 /// ```
2938 #[must_use]
2939 #[inline(always)]
2940 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_high_i64_m128i(a: m128i, b: m128i) -> m128i2941 pub fn unpack_high_i64_m128i(a: m128i, b: m128i) -> m128i {
2942   m128i(unsafe { _mm_unpackhi_epi64(a.0, b.0) })
2943 }
2944 
2945 /// Unpack and interleave high lanes of `a` and `b`.
2946 /// ```
2947 /// # use safe_arch::*;
2948 /// let a = m128d::from_array([92.0, 87.5]);
2949 /// let b = m128d::from_array([100.0, -6.0]);
2950 /// let c = unpack_high_m128d(a, b).to_array();
2951 /// assert_eq!(c, [87.5, -6.0]);
2952 /// ```
2953 #[must_use]
2954 #[inline(always)]
2955 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_high_m128d(a: m128d, b: m128d) -> m128d2956 pub fn unpack_high_m128d(a: m128d, b: m128d) -> m128d {
2957   m128d(unsafe { _mm_unpackhi_pd(a.0, b.0) })
2958 }
2959 
2960 /// Unpack and interleave low `i8` lanes of `a` and `b`.
2961 /// ```
2962 /// # use safe_arch::*;
2963 /// let a = m128i::from([0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
2964 /// let b = m128i::from([12_i8, 11, 22, 13, 99, 15, 16, 17, 8, 19, -20, 21, 22, -23, 24, 127]);
2965 /// let c: [i8; 16] = unpack_low_i8_m128i(a, b).into();
2966 /// assert_eq!(c, [0, 12, 1, 11, 2, 22, 3, 13, 4, 99, 5, 15, 6, 16, 7, 17]);
2967 /// ```
2968 #[must_use]
2969 #[inline(always)]
2970 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_low_i8_m128i(a: m128i, b: m128i) -> m128i2971 pub fn unpack_low_i8_m128i(a: m128i, b: m128i) -> m128i {
2972   m128i(unsafe { _mm_unpacklo_epi8(a.0, b.0) })
2973 }
2974 
2975 /// Unpack and interleave low `i16` lanes of `a` and `b`.
2976 /// ```
2977 /// # use safe_arch::*;
2978 /// let a = m128i::from([1_i16, 2, 3, 4, -1, -2, -3, -4]);
2979 /// let b = m128i::from([5_i16, 6, 7, 8, -15, -26, -37, 48]);
2980 /// let c: [i16; 8] = unpack_low_i16_m128i(a, b).into();
2981 /// assert_eq!(c, [1, 5, 2, 6, 3, 7, 4, 8]);
2982 /// ```
2983 #[must_use]
2984 #[inline(always)]
2985 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_low_i16_m128i(a: m128i, b: m128i) -> m128i2986 pub fn unpack_low_i16_m128i(a: m128i, b: m128i) -> m128i {
2987   m128i(unsafe { _mm_unpacklo_epi16(a.0, b.0) })
2988 }
2989 
2990 /// Unpack and interleave low `i32` lanes of `a` and `b`.
2991 /// ```
2992 /// # use safe_arch::*;
2993 /// let a = m128i::from([1, 2, 3, 4]);
2994 /// let b = m128i::from([5, 6, 7, 8]);
2995 /// let c: [i32; 4] = unpack_low_i32_m128i(a, b).into();
2996 /// assert_eq!(c, [1, 5, 2, 6]);
2997 /// ```
2998 #[must_use]
2999 #[inline(always)]
3000 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_low_i32_m128i(a: m128i, b: m128i) -> m128i3001 pub fn unpack_low_i32_m128i(a: m128i, b: m128i) -> m128i {
3002   m128i(unsafe { _mm_unpacklo_epi32(a.0, b.0) })
3003 }
3004 
3005 /// Unpack and interleave low `i64` lanes of `a` and `b`.
3006 /// ```
3007 /// # use safe_arch::*;
3008 /// let a = m128i::from([92_i64, 87]);
3009 /// let b = m128i::from([-9001_i64, 1]);
3010 /// let c: [i64; 2] = unpack_low_i64_m128i(a, b).into();
3011 /// assert_eq!(c, [92, -9001]);
3012 /// ```
3013 #[must_use]
3014 #[inline(always)]
3015 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_low_i64_m128i(a: m128i, b: m128i) -> m128i3016 pub fn unpack_low_i64_m128i(a: m128i, b: m128i) -> m128i {
3017   m128i(unsafe { _mm_unpacklo_epi64(a.0, b.0) })
3018 }
3019 
3020 /// Unpack and interleave low lanes of `a` and `b`.
3021 /// ```
3022 /// # use safe_arch::*;
3023 /// let a = m128d::from_array([92.0, 87.5]);
3024 /// let b = m128d::from_array([100.0, -6.0]);
3025 /// let c = unpack_low_m128d(a, b).to_array();
3026 /// assert_eq!(c, [92.0, 100.0]);
3027 /// ```
3028 #[must_use]
3029 #[inline(always)]
3030 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
unpack_low_m128d(a: m128d, b: m128d) -> m128d3031 pub fn unpack_low_m128d(a: m128d, b: m128d) -> m128d {
3032   m128d(unsafe { _mm_unpacklo_pd(a.0, b.0) })
3033 }
3034 
3035 /// Bitwise `a ^ b`.
3036 /// ```
3037 /// # use safe_arch::*;
3038 /// let a = m128d::from_array([1.0, 0.0]);
3039 /// let b = m128d::from_array([1.0, 1.0]);
3040 /// let c = bitxor_m128d(a, b).to_array();
3041 /// assert_eq!(c, [0.0, 1.0]);
3042 /// ```
3043 #[must_use]
3044 #[inline(always)]
3045 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitxor_m128d(a: m128d, b: m128d) -> m128d3046 pub fn bitxor_m128d(a: m128d, b: m128d) -> m128d {
3047   m128d(unsafe { _mm_xor_pd(a.0, b.0) })
3048 }
3049 
3050 /// Bitwise `a ^ b`.
3051 /// ```
3052 /// # use safe_arch::*;
3053 /// let a = m128i::from([1, 0, 1, 0]);
3054 /// let b = m128i::from([1, 1, 0, 0]);
3055 /// let c: [i32; 4] = bitxor_m128i(a, b).into();
3056 /// assert_eq!(c, [0, 1, 1, 0]);
3057 /// ```
3058 #[must_use]
3059 #[inline(always)]
3060 #[cfg_attr(docs_rs, doc(cfg(target_feature = "sse2")))]
bitxor_m128i(a: m128i, b: m128i) -> m128i3061 pub fn bitxor_m128i(a: m128i, b: m128i) -> m128i {
3062   m128i(unsafe { _mm_xor_si128(a.0, b.0) })
3063 }
3064 
3065 //
3066 // Here we define the Operator Overloads for `m128`. Each one just calls the
3067 // correct function from above. By putting the impls here and not with the
3068 // `m128` type we theoretically would be able to build the crate safely even if
3069 // there's no `sse` feature enabled. You'd just have a `m128` type without the
3070 // operator overloads is all. Not that the standard Rust distribution can build
3071 // properly without `sse` enabled, but maybe you're using a custom target or
3072 // something. It doesn't really put us out of our way, so it doesn't hurt to try
3073 // and accommodate the potential use case.
3074 //
3075 
3076 // First we provide all `m128d` impls.
3077 
3078 impl Add for m128d {
3079   type Output = Self;
3080   #[must_use]
3081   #[inline(always)]
add(self, rhs: Self) -> Self3082   fn add(self, rhs: Self) -> Self {
3083     add_m128d(self, rhs)
3084   }
3085 }
3086 impl AddAssign for m128d {
3087   #[inline(always)]
add_assign(&mut self, rhs: Self)3088   fn add_assign(&mut self, rhs: Self) {
3089     *self = *self + rhs;
3090   }
3091 }
3092 
3093 impl BitAnd for m128d {
3094   type Output = Self;
3095   #[must_use]
3096   #[inline(always)]
bitand(self, rhs: Self) -> Self3097   fn bitand(self, rhs: Self) -> Self {
3098     bitand_m128d(self, rhs)
3099   }
3100 }
3101 impl BitAndAssign for m128d {
3102   #[inline(always)]
bitand_assign(&mut self, rhs: Self)3103   fn bitand_assign(&mut self, rhs: Self) {
3104     *self = *self & rhs;
3105   }
3106 }
3107 
3108 impl BitOr for m128d {
3109   type Output = Self;
3110   #[must_use]
3111   #[inline(always)]
bitor(self, rhs: Self) -> Self3112   fn bitor(self, rhs: Self) -> Self {
3113     bitor_m128d(self, rhs)
3114   }
3115 }
3116 impl BitOrAssign for m128d {
3117   #[inline(always)]
bitor_assign(&mut self, rhs: Self)3118   fn bitor_assign(&mut self, rhs: Self) {
3119     *self = *self | rhs;
3120   }
3121 }
3122 
3123 impl BitXor for m128d {
3124   type Output = Self;
3125   #[must_use]
3126   #[inline(always)]
bitxor(self, rhs: Self) -> Self3127   fn bitxor(self, rhs: Self) -> Self {
3128     bitxor_m128d(self, rhs)
3129   }
3130 }
3131 impl BitXorAssign for m128d {
3132   #[inline(always)]
bitxor_assign(&mut self, rhs: Self)3133   fn bitxor_assign(&mut self, rhs: Self) {
3134     *self = *self ^ rhs;
3135   }
3136 }
3137 
3138 impl Div for m128d {
3139   type Output = Self;
3140   #[must_use]
3141   #[inline(always)]
div(self, rhs: Self) -> Self3142   fn div(self, rhs: Self) -> Self {
3143     div_m128d(self, rhs)
3144   }
3145 }
3146 impl DivAssign for m128d {
3147   #[inline(always)]
div_assign(&mut self, rhs: Self)3148   fn div_assign(&mut self, rhs: Self) {
3149     *self = *self / rhs;
3150   }
3151 }
3152 
3153 impl Mul for m128d {
3154   type Output = Self;
3155   #[must_use]
3156   #[inline(always)]
mul(self, rhs: Self) -> Self3157   fn mul(self, rhs: Self) -> Self {
3158     mul_m128d(self, rhs)
3159   }
3160 }
3161 impl MulAssign for m128d {
3162   #[inline(always)]
mul_assign(&mut self, rhs: Self)3163   fn mul_assign(&mut self, rhs: Self) {
3164     *self = *self * rhs;
3165   }
3166 }
3167 
3168 impl Neg for m128d {
3169   type Output = Self;
3170   #[must_use]
3171   #[inline(always)]
neg(self) -> Self3172   fn neg(self) -> Self {
3173     sub_m128d(zeroed_m128d(), self)
3174   }
3175 }
3176 
3177 impl Not for m128d {
3178   type Output = Self;
3179   /// Not a direct intrinsic, but it's very useful and the implementation is
3180   /// simple enough.
3181   ///
3182   /// Negates the bits by performing an `xor` with an all-1s bit pattern.
3183   #[must_use]
3184   #[inline(always)]
not(self) -> Self3185   fn not(self) -> Self {
3186     let all_bits = set_splat_m128d(f64::from_bits(u64::MAX));
3187     self ^ all_bits
3188   }
3189 }
3190 
3191 impl Sub for m128d {
3192   type Output = Self;
3193   #[must_use]
3194   #[inline(always)]
sub(self, rhs: Self) -> Self3195   fn sub(self, rhs: Self) -> Self {
3196     sub_m128d(self, rhs)
3197   }
3198 }
3199 impl SubAssign for m128d {
3200   #[inline(always)]
sub_assign(&mut self, rhs: Self)3201   fn sub_assign(&mut self, rhs: Self) {
3202     *self = *self - rhs;
3203   }
3204 }
3205 
3206 impl PartialEq for m128d {
3207   /// Not a direct intrinsic, this is a `cmp_eq_mask` and then a `move_mask`.
3208   #[must_use]
3209   #[inline(always)]
eq(&self, other: &Self) -> bool3210   fn eq(&self, other: &Self) -> bool {
3211     move_mask_m128d(cmp_eq_mask_m128d(*self, *other)) == 0b11
3212   }
3213 }
3214 
3215 // Next we provide all `m128i` impls. Since the interpretation of the lanes
3216 // depends on the operation used, we only provide the bit ops (which are "lane
3217 // agnostic").
3218 
3219 impl BitAnd for m128i {
3220   type Output = Self;
3221   #[must_use]
3222   #[inline(always)]
bitand(self, rhs: Self) -> Self3223   fn bitand(self, rhs: Self) -> Self {
3224     bitand_m128i(self, rhs)
3225   }
3226 }
3227 impl BitAndAssign for m128i {
3228   #[inline(always)]
bitand_assign(&mut self, rhs: Self)3229   fn bitand_assign(&mut self, rhs: Self) {
3230     *self = *self & rhs;
3231   }
3232 }
3233 
3234 impl BitOr for m128i {
3235   type Output = Self;
3236   #[must_use]
3237   #[inline(always)]
bitor(self, rhs: Self) -> Self3238   fn bitor(self, rhs: Self) -> Self {
3239     bitor_m128i(self, rhs)
3240   }
3241 }
3242 impl BitOrAssign for m128i {
3243   #[inline(always)]
bitor_assign(&mut self, rhs: Self)3244   fn bitor_assign(&mut self, rhs: Self) {
3245     *self = *self | rhs;
3246   }
3247 }
3248 
3249 impl BitXor for m128i {
3250   type Output = Self;
3251   #[must_use]
3252   #[inline(always)]
bitxor(self, rhs: Self) -> Self3253   fn bitxor(self, rhs: Self) -> Self {
3254     bitxor_m128i(self, rhs)
3255   }
3256 }
3257 impl BitXorAssign for m128i {
3258   #[inline(always)]
bitxor_assign(&mut self, rhs: Self)3259   fn bitxor_assign(&mut self, rhs: Self) {
3260     *self = *self ^ rhs;
3261   }
3262 }
3263 
3264 impl Not for m128i {
3265   type Output = Self;
3266   /// Not a direct intrinsic, but it's very useful and the implementation is
3267   /// simple enough.
3268   ///
3269   /// Negates the bits by performing an `xor` with an all-1s bit pattern.
3270   #[must_use]
3271   #[inline(always)]
not(self) -> Self3272   fn not(self) -> Self {
3273     let all_bits = set_splat_i32_m128i(-1);
3274     self ^ all_bits
3275   }
3276 }
3277 
3278 impl PartialEq for m128i {
3279   /// Not a direct intrinsic, this is a `cmp_eq_mask_i8_m128i` and then a
3280   /// `move_mask_i8_m128i`.
3281   #[must_use]
3282   #[inline(always)]
eq(&self, other: &Self) -> bool3283   fn eq(&self, other: &Self) -> bool {
3284     move_mask_i8_m128i(cmp_eq_mask_i8_m128i(*self, *other)) == 0b11111111_11111111
3285   }
3286 }
3287 /// Unlike with the floating types, ints have absolute equality.
3288 impl Eq for m128i {}
3289