1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10 #ifndef __TMMINTRIN_H
11 #define __TMMINTRIN_H
12
13 #include <pmmintrin.h>
14
15 /* Define the default attributes for the functions in this file. */
16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
17 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
18
19 /// Computes the absolute value of each of the packed 8-bit signed
20 /// integers in the source operand and stores the 8-bit unsigned integer
21 /// results in the destination.
22 ///
23 /// \headerfile <x86intrin.h>
24 ///
25 /// This intrinsic corresponds to the \c PABSB instruction.
26 ///
27 /// \param __a
28 /// A 64-bit vector of [8 x i8].
29 /// \returns A 64-bit integer vector containing the absolute values of the
30 /// elements in the operand.
31 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi8(__m64 __a)32 _mm_abs_pi8(__m64 __a)
33 {
34 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
35 }
36
37 /// Computes the absolute value of each of the packed 8-bit signed
38 /// integers in the source operand and stores the 8-bit unsigned integer
39 /// results in the destination.
40 ///
41 /// \headerfile <x86intrin.h>
42 ///
43 /// This intrinsic corresponds to the \c VPABSB instruction.
44 ///
45 /// \param __a
46 /// A 128-bit vector of [16 x i8].
47 /// \returns A 128-bit integer vector containing the absolute values of the
48 /// elements in the operand.
49 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi8(__m128i __a)50 _mm_abs_epi8(__m128i __a)
51 {
52 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
53 }
54
55 /// Computes the absolute value of each of the packed 16-bit signed
56 /// integers in the source operand and stores the 16-bit unsigned integer
57 /// results in the destination.
58 ///
59 /// \headerfile <x86intrin.h>
60 ///
61 /// This intrinsic corresponds to the \c PABSW instruction.
62 ///
63 /// \param __a
64 /// A 64-bit vector of [4 x i16].
65 /// \returns A 64-bit integer vector containing the absolute values of the
66 /// elements in the operand.
67 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi16(__m64 __a)68 _mm_abs_pi16(__m64 __a)
69 {
70 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
71 }
72
73 /// Computes the absolute value of each of the packed 16-bit signed
74 /// integers in the source operand and stores the 16-bit unsigned integer
75 /// results in the destination.
76 ///
77 /// \headerfile <x86intrin.h>
78 ///
79 /// This intrinsic corresponds to the \c VPABSW instruction.
80 ///
81 /// \param __a
82 /// A 128-bit vector of [8 x i16].
83 /// \returns A 128-bit integer vector containing the absolute values of the
84 /// elements in the operand.
85 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi16(__m128i __a)86 _mm_abs_epi16(__m128i __a)
87 {
88 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
89 }
90
91 /// Computes the absolute value of each of the packed 32-bit signed
92 /// integers in the source operand and stores the 32-bit unsigned integer
93 /// results in the destination.
94 ///
95 /// \headerfile <x86intrin.h>
96 ///
97 /// This intrinsic corresponds to the \c PABSD instruction.
98 ///
99 /// \param __a
100 /// A 64-bit vector of [2 x i32].
101 /// \returns A 64-bit integer vector containing the absolute values of the
102 /// elements in the operand.
103 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_abs_pi32(__m64 __a)104 _mm_abs_pi32(__m64 __a)
105 {
106 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
107 }
108
109 /// Computes the absolute value of each of the packed 32-bit signed
110 /// integers in the source operand and stores the 32-bit unsigned integer
111 /// results in the destination.
112 ///
113 /// \headerfile <x86intrin.h>
114 ///
115 /// This intrinsic corresponds to the \c VPABSD instruction.
116 ///
117 /// \param __a
118 /// A 128-bit vector of [4 x i32].
119 /// \returns A 128-bit integer vector containing the absolute values of the
120 /// elements in the operand.
121 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_abs_epi32(__m128i __a)122 _mm_abs_epi32(__m128i __a)
123 {
124 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
125 }
126
127 /// Concatenates the two 128-bit integer vector operands, and
128 /// right-shifts the result by the number of bytes specified in the immediate
129 /// operand.
130 ///
131 /// \headerfile <x86intrin.h>
132 ///
133 /// \code
134 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
135 /// \endcode
136 ///
137 /// This intrinsic corresponds to the \c PALIGNR instruction.
138 ///
139 /// \param a
140 /// A 128-bit vector of [16 x i8] containing one of the source operands.
141 /// \param b
142 /// A 128-bit vector of [16 x i8] containing one of the source operands.
143 /// \param n
144 /// An immediate operand specifying how many bytes to right-shift the result.
145 /// \returns A 128-bit integer vector containing the concatenated right-shifted
146 /// value.
147 #define _mm_alignr_epi8(a, b, n) \
148 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
149 (__v16qi)(__m128i)(b), (n))
150
151 /// Concatenates the two 64-bit integer vector operands, and right-shifts
152 /// the result by the number of bytes specified in the immediate operand.
153 ///
154 /// \headerfile <x86intrin.h>
155 ///
156 /// \code
157 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
158 /// \endcode
159 ///
160 /// This intrinsic corresponds to the \c PALIGNR instruction.
161 ///
162 /// \param a
163 /// A 64-bit vector of [8 x i8] containing one of the source operands.
164 /// \param b
165 /// A 64-bit vector of [8 x i8] containing one of the source operands.
166 /// \param n
167 /// An immediate operand specifying how many bytes to right-shift the result.
168 /// \returns A 64-bit integer vector containing the concatenated right-shifted
169 /// value.
170 #define _mm_alignr_pi8(a, b, n) \
171 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
172
173 /// Horizontally adds the adjacent pairs of values contained in 2 packed
174 /// 128-bit vectors of [8 x i16].
175 ///
176 /// \headerfile <x86intrin.h>
177 ///
178 /// This intrinsic corresponds to the \c VPHADDW instruction.
179 ///
180 /// \param __a
181 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
182 /// horizontal sums of the values are stored in the lower bits of the
183 /// destination.
184 /// \param __b
185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
186 /// horizontal sums of the values are stored in the upper bits of the
187 /// destination.
188 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
189 /// both operands.
190 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi16(__m128i __a,__m128i __b)191 _mm_hadd_epi16(__m128i __a, __m128i __b)
192 {
193 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
194 }
195
196 /// Horizontally adds the adjacent pairs of values contained in 2 packed
197 /// 128-bit vectors of [4 x i32].
198 ///
199 /// \headerfile <x86intrin.h>
200 ///
201 /// This intrinsic corresponds to the \c VPHADDD instruction.
202 ///
203 /// \param __a
204 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
205 /// horizontal sums of the values are stored in the lower bits of the
206 /// destination.
207 /// \param __b
208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
209 /// horizontal sums of the values are stored in the upper bits of the
210 /// destination.
211 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
212 /// both operands.
213 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadd_epi32(__m128i __a,__m128i __b)214 _mm_hadd_epi32(__m128i __a, __m128i __b)
215 {
216 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
217 }
218
219 /// Horizontally adds the adjacent pairs of values contained in 2 packed
220 /// 64-bit vectors of [4 x i16].
221 ///
222 /// \headerfile <x86intrin.h>
223 ///
224 /// This intrinsic corresponds to the \c PHADDW instruction.
225 ///
226 /// \param __a
227 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
228 /// horizontal sums of the values are stored in the lower bits of the
229 /// destination.
230 /// \param __b
231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
232 /// horizontal sums of the values are stored in the upper bits of the
233 /// destination.
234 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
235 /// operands.
236 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi16(__m64 __a,__m64 __b)237 _mm_hadd_pi16(__m64 __a, __m64 __b)
238 {
239 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
240 }
241
242 /// Horizontally adds the adjacent pairs of values contained in 2 packed
243 /// 64-bit vectors of [2 x i32].
244 ///
245 /// \headerfile <x86intrin.h>
246 ///
247 /// This intrinsic corresponds to the \c PHADDD instruction.
248 ///
249 /// \param __a
250 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
251 /// horizontal sums of the values are stored in the lower bits of the
252 /// destination.
253 /// \param __b
254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
255 /// horizontal sums of the values are stored in the upper bits of the
256 /// destination.
257 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
258 /// operands.
259 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadd_pi32(__m64 __a,__m64 __b)260 _mm_hadd_pi32(__m64 __a, __m64 __b)
261 {
262 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
263 }
264
265 /// Horizontally adds the adjacent pairs of values contained in 2 packed
266 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
267 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
268 /// 0x8000.
269 ///
270 /// \headerfile <x86intrin.h>
271 ///
272 /// This intrinsic corresponds to the \c VPHADDSW instruction.
273 ///
274 /// \param __a
275 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
276 /// horizontal sums of the values are stored in the lower bits of the
277 /// destination.
278 /// \param __b
279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
280 /// horizontal sums of the values are stored in the upper bits of the
281 /// destination.
282 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
283 /// sums of both operands.
284 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hadds_epi16(__m128i __a,__m128i __b)285 _mm_hadds_epi16(__m128i __a, __m128i __b)
286 {
287 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
288 }
289
290 /// Horizontally adds the adjacent pairs of values contained in 2 packed
291 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
292 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
293 /// 0x8000.
294 ///
295 /// \headerfile <x86intrin.h>
296 ///
297 /// This intrinsic corresponds to the \c PHADDSW instruction.
298 ///
299 /// \param __a
300 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
301 /// horizontal sums of the values are stored in the lower bits of the
302 /// destination.
303 /// \param __b
304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
305 /// horizontal sums of the values are stored in the upper bits of the
306 /// destination.
307 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
308 /// sums of both operands.
309 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hadds_pi16(__m64 __a,__m64 __b)310 _mm_hadds_pi16(__m64 __a, __m64 __b)
311 {
312 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
313 }
314
315 /// Horizontally subtracts the adjacent pairs of values contained in 2
316 /// packed 128-bit vectors of [8 x i16].
317 ///
318 /// \headerfile <x86intrin.h>
319 ///
320 /// This intrinsic corresponds to the \c VPHSUBW instruction.
321 ///
322 /// \param __a
323 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
324 /// horizontal differences between the values are stored in the lower bits of
325 /// the destination.
326 /// \param __b
327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
328 /// horizontal differences between the values are stored in the upper bits of
329 /// the destination.
330 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
331 /// of both operands.
332 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi16(__m128i __a,__m128i __b)333 _mm_hsub_epi16(__m128i __a, __m128i __b)
334 {
335 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
336 }
337
338 /// Horizontally subtracts the adjacent pairs of values contained in 2
339 /// packed 128-bit vectors of [4 x i32].
340 ///
341 /// \headerfile <x86intrin.h>
342 ///
343 /// This intrinsic corresponds to the \c VPHSUBD instruction.
344 ///
345 /// \param __a
346 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
347 /// horizontal differences between the values are stored in the lower bits of
348 /// the destination.
349 /// \param __b
350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
351 /// horizontal differences between the values are stored in the upper bits of
352 /// the destination.
353 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
354 /// of both operands.
355 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsub_epi32(__m128i __a,__m128i __b)356 _mm_hsub_epi32(__m128i __a, __m128i __b)
357 {
358 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
359 }
360
361 /// Horizontally subtracts the adjacent pairs of values contained in 2
362 /// packed 64-bit vectors of [4 x i16].
363 ///
364 /// \headerfile <x86intrin.h>
365 ///
366 /// This intrinsic corresponds to the \c PHSUBW instruction.
367 ///
368 /// \param __a
369 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
370 /// horizontal differences between the values are stored in the lower bits of
371 /// the destination.
372 /// \param __b
373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
374 /// horizontal differences between the values are stored in the upper bits of
375 /// the destination.
376 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
377 /// of both operands.
378 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi16(__m64 __a,__m64 __b)379 _mm_hsub_pi16(__m64 __a, __m64 __b)
380 {
381 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
382 }
383
384 /// Horizontally subtracts the adjacent pairs of values contained in 2
385 /// packed 64-bit vectors of [2 x i32].
386 ///
387 /// \headerfile <x86intrin.h>
388 ///
389 /// This intrinsic corresponds to the \c PHSUBD instruction.
390 ///
391 /// \param __a
392 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
393 /// horizontal differences between the values are stored in the lower bits of
394 /// the destination.
395 /// \param __b
396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
397 /// horizontal differences between the values are stored in the upper bits of
398 /// the destination.
399 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
400 /// of both operands.
401 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsub_pi32(__m64 __a,__m64 __b)402 _mm_hsub_pi32(__m64 __a, __m64 __b)
403 {
404 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
405 }
406
407 /// Horizontally subtracts the adjacent pairs of values contained in 2
408 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
409 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
410 /// saturated to 0x8000.
411 ///
412 /// \headerfile <x86intrin.h>
413 ///
414 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
415 ///
416 /// \param __a
417 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
418 /// horizontal differences between the values are stored in the lower bits of
419 /// the destination.
420 /// \param __b
421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
422 /// horizontal differences between the values are stored in the upper bits of
423 /// the destination.
424 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
425 /// differences of both operands.
426 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_hsubs_epi16(__m128i __a,__m128i __b)427 _mm_hsubs_epi16(__m128i __a, __m128i __b)
428 {
429 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
430 }
431
432 /// Horizontally subtracts the adjacent pairs of values contained in 2
433 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
434 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
435 /// saturated to 0x8000.
436 ///
437 /// \headerfile <x86intrin.h>
438 ///
439 /// This intrinsic corresponds to the \c PHSUBSW instruction.
440 ///
441 /// \param __a
442 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
443 /// horizontal differences between the values are stored in the lower bits of
444 /// the destination.
445 /// \param __b
446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
447 /// horizontal differences between the values are stored in the upper bits of
448 /// the destination.
449 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
450 /// differences of both operands.
451 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_hsubs_pi16(__m64 __a,__m64 __b)452 _mm_hsubs_pi16(__m64 __a, __m64 __b)
453 {
454 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
455 }
456
457 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
458 /// values contained in the first source operand and packed 8-bit signed
459 /// integer values contained in the second source operand, adds pairs of
460 /// contiguous products with signed saturation, and writes the 16-bit sums to
461 /// the corresponding bits in the destination.
462 ///
463 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
464 /// both operands are multiplied, and the sum of both results is written to
465 /// bits [15:0] of the destination.
466 ///
467 /// \headerfile <x86intrin.h>
468 ///
469 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
470 ///
471 /// \param __a
472 /// A 128-bit integer vector containing the first source operand.
473 /// \param __b
474 /// A 128-bit integer vector containing the second source operand.
475 /// \returns A 128-bit integer vector containing the sums of products of both
476 /// operands: \n
477 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
478 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
479 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
480 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
481 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
482 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
483 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
484 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
485 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_maddubs_epi16(__m128i __a,__m128i __b)486 _mm_maddubs_epi16(__m128i __a, __m128i __b)
487 {
488 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
489 }
490
491 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
492 /// values contained in the first source operand and packed 8-bit signed
493 /// integer values contained in the second source operand, adds pairs of
494 /// contiguous products with signed saturation, and writes the 16-bit sums to
495 /// the corresponding bits in the destination.
496 ///
497 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
498 /// both operands are multiplied, and the sum of both results is written to
499 /// bits [15:0] of the destination.
500 ///
501 /// \headerfile <x86intrin.h>
502 ///
503 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
504 ///
505 /// \param __a
506 /// A 64-bit integer vector containing the first source operand.
507 /// \param __b
508 /// A 64-bit integer vector containing the second source operand.
509 /// \returns A 64-bit integer vector containing the sums of products of both
510 /// operands: \n
511 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
512 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
513 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
514 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
515 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_maddubs_pi16(__m64 __a,__m64 __b)516 _mm_maddubs_pi16(__m64 __a, __m64 __b)
517 {
518 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
519 }
520
521 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
522 /// products to the 18 most significant bits by right-shifting, rounds the
523 /// truncated value by adding 1, and writes bits [16:1] to the destination.
524 ///
525 /// \headerfile <x86intrin.h>
526 ///
527 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
528 ///
529 /// \param __a
530 /// A 128-bit vector of [8 x i16] containing one of the source operands.
531 /// \param __b
532 /// A 128-bit vector of [8 x i16] containing one of the source operands.
533 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
534 /// products of both operands.
535 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_mulhrs_epi16(__m128i __a,__m128i __b)536 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
537 {
538 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
539 }
540
541 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
542 /// products to the 18 most significant bits by right-shifting, rounds the
543 /// truncated value by adding 1, and writes bits [16:1] to the destination.
544 ///
545 /// \headerfile <x86intrin.h>
546 ///
547 /// This intrinsic corresponds to the \c PMULHRSW instruction.
548 ///
549 /// \param __a
550 /// A 64-bit vector of [4 x i16] containing one of the source operands.
551 /// \param __b
552 /// A 64-bit vector of [4 x i16] containing one of the source operands.
553 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
554 /// products of both operands.
555 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_mulhrs_pi16(__m64 __a,__m64 __b)556 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
557 {
558 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
559 }
560
561 /// Copies the 8-bit integers from a 128-bit integer vector to the
562 /// destination or clears 8-bit values in the destination, as specified by
563 /// the second source operand.
564 ///
565 /// \headerfile <x86intrin.h>
566 ///
567 /// This intrinsic corresponds to the \c VPSHUFB instruction.
568 ///
569 /// \param __a
570 /// A 128-bit integer vector containing the values to be copied.
571 /// \param __b
572 /// A 128-bit integer vector containing control bytes corresponding to
573 /// positions in the destination:
574 /// Bit 7: \n
575 /// 1: Clear the corresponding byte in the destination. \n
576 /// 0: Copy the selected source byte to the corresponding byte in the
577 /// destination. \n
578 /// Bits [6:4] Reserved. \n
579 /// Bits [3:0] select the source byte to be copied.
580 /// \returns A 128-bit integer vector containing the copied or cleared values.
581 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_shuffle_epi8(__m128i __a,__m128i __b)582 _mm_shuffle_epi8(__m128i __a, __m128i __b)
583 {
584 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
585 }
586
587 /// Copies the 8-bit integers from a 64-bit integer vector to the
588 /// destination or clears 8-bit values in the destination, as specified by
589 /// the second source operand.
590 ///
591 /// \headerfile <x86intrin.h>
592 ///
593 /// This intrinsic corresponds to the \c PSHUFB instruction.
594 ///
595 /// \param __a
596 /// A 64-bit integer vector containing the values to be copied.
597 /// \param __b
598 /// A 64-bit integer vector containing control bytes corresponding to
599 /// positions in the destination:
600 /// Bit 7: \n
601 /// 1: Clear the corresponding byte in the destination. \n
602 /// 0: Copy the selected source byte to the corresponding byte in the
603 /// destination. \n
604 /// Bits [3:0] select the source byte to be copied.
605 /// \returns A 64-bit integer vector containing the copied or cleared values.
606 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_shuffle_pi8(__m64 __a,__m64 __b)607 _mm_shuffle_pi8(__m64 __a, __m64 __b)
608 {
609 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
610 }
611
612 /// For each 8-bit integer in the first source operand, perform one of
613 /// the following actions as specified by the second source operand.
614 ///
615 /// If the byte in the second source is negative, calculate the two's
616 /// complement of the corresponding byte in the first source, and write that
617 /// value to the destination. If the byte in the second source is positive,
618 /// copy the corresponding byte from the first source to the destination. If
619 /// the byte in the second source is zero, clear the corresponding byte in
620 /// the destination.
621 ///
622 /// \headerfile <x86intrin.h>
623 ///
624 /// This intrinsic corresponds to the \c VPSIGNB instruction.
625 ///
626 /// \param __a
627 /// A 128-bit integer vector containing the values to be copied.
628 /// \param __b
629 /// A 128-bit integer vector containing control bytes corresponding to
630 /// positions in the destination.
631 /// \returns A 128-bit integer vector containing the resultant values.
632 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi8(__m128i __a,__m128i __b)633 _mm_sign_epi8(__m128i __a, __m128i __b)
634 {
635 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
636 }
637
638 /// For each 16-bit integer in the first source operand, perform one of
639 /// the following actions as specified by the second source operand.
640 ///
641 /// If the word in the second source is negative, calculate the two's
642 /// complement of the corresponding word in the first source, and write that
643 /// value to the destination. If the word in the second source is positive,
644 /// copy the corresponding word from the first source to the destination. If
645 /// the word in the second source is zero, clear the corresponding word in
646 /// the destination.
647 ///
648 /// \headerfile <x86intrin.h>
649 ///
650 /// This intrinsic corresponds to the \c VPSIGNW instruction.
651 ///
652 /// \param __a
653 /// A 128-bit integer vector containing the values to be copied.
654 /// \param __b
655 /// A 128-bit integer vector containing control words corresponding to
656 /// positions in the destination.
657 /// \returns A 128-bit integer vector containing the resultant values.
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi16(__m128i __a,__m128i __b)659 _mm_sign_epi16(__m128i __a, __m128i __b)
660 {
661 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
662 }
663
664 /// For each 32-bit integer in the first source operand, perform one of
665 /// the following actions as specified by the second source operand.
666 ///
667 /// If the doubleword in the second source is negative, calculate the two's
668 /// complement of the corresponding word in the first source, and write that
669 /// value to the destination. If the doubleword in the second source is
670 /// positive, copy the corresponding word from the first source to the
671 /// destination. If the doubleword in the second source is zero, clear the
672 /// corresponding word in the destination.
673 ///
674 /// \headerfile <x86intrin.h>
675 ///
676 /// This intrinsic corresponds to the \c VPSIGND instruction.
677 ///
678 /// \param __a
679 /// A 128-bit integer vector containing the values to be copied.
680 /// \param __b
681 /// A 128-bit integer vector containing control doublewords corresponding to
682 /// positions in the destination.
683 /// \returns A 128-bit integer vector containing the resultant values.
684 static __inline__ __m128i __DEFAULT_FN_ATTRS
_mm_sign_epi32(__m128i __a,__m128i __b)685 _mm_sign_epi32(__m128i __a, __m128i __b)
686 {
687 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
688 }
689
690 /// For each 8-bit integer in the first source operand, perform one of
691 /// the following actions as specified by the second source operand.
692 ///
693 /// If the byte in the second source is negative, calculate the two's
694 /// complement of the corresponding byte in the first source, and write that
695 /// value to the destination. If the byte in the second source is positive,
696 /// copy the corresponding byte from the first source to the destination. If
697 /// the byte in the second source is zero, clear the corresponding byte in
698 /// the destination.
699 ///
700 /// \headerfile <x86intrin.h>
701 ///
702 /// This intrinsic corresponds to the \c PSIGNB instruction.
703 ///
704 /// \param __a
705 /// A 64-bit integer vector containing the values to be copied.
706 /// \param __b
707 /// A 64-bit integer vector containing control bytes corresponding to
708 /// positions in the destination.
709 /// \returns A 64-bit integer vector containing the resultant values.
710 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi8(__m64 __a,__m64 __b)711 _mm_sign_pi8(__m64 __a, __m64 __b)
712 {
713 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
714 }
715
716 /// For each 16-bit integer in the first source operand, perform one of
717 /// the following actions as specified by the second source operand.
718 ///
719 /// If the word in the second source is negative, calculate the two's
720 /// complement of the corresponding word in the first source, and write that
721 /// value to the destination. If the word in the second source is positive,
722 /// copy the corresponding word from the first source to the destination. If
723 /// the word in the second source is zero, clear the corresponding word in
724 /// the destination.
725 ///
726 /// \headerfile <x86intrin.h>
727 ///
728 /// This intrinsic corresponds to the \c PSIGNW instruction.
729 ///
730 /// \param __a
731 /// A 64-bit integer vector containing the values to be copied.
732 /// \param __b
733 /// A 64-bit integer vector containing control words corresponding to
734 /// positions in the destination.
735 /// \returns A 64-bit integer vector containing the resultant values.
736 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi16(__m64 __a,__m64 __b)737 _mm_sign_pi16(__m64 __a, __m64 __b)
738 {
739 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
740 }
741
742 /// For each 32-bit integer in the first source operand, perform one of
743 /// the following actions as specified by the second source operand.
744 ///
745 /// If the doubleword in the second source is negative, calculate the two's
746 /// complement of the corresponding doubleword in the first source, and
747 /// write that value to the destination. If the doubleword in the second
748 /// source is positive, copy the corresponding doubleword from the first
749 /// source to the destination. If the doubleword in the second source is
750 /// zero, clear the corresponding doubleword in the destination.
751 ///
752 /// \headerfile <x86intrin.h>
753 ///
754 /// This intrinsic corresponds to the \c PSIGND instruction.
755 ///
756 /// \param __a
757 /// A 64-bit integer vector containing the values to be copied.
758 /// \param __b
759 /// A 64-bit integer vector containing two control doublewords corresponding
760 /// to positions in the destination.
761 /// \returns A 64-bit integer vector containing the resultant values.
762 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
_mm_sign_pi32(__m64 __a,__m64 __b)763 _mm_sign_pi32(__m64 __a, __m64 __b)
764 {
765 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
766 }
767
768 #undef __DEFAULT_FN_ATTRS
769 #undef __DEFAULT_FN_ATTRS_MMX
770
771 #endif /* __TMMINTRIN_H */
772