1 /* Copyright (C) 2002-2021 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 /* Implemented from the specification included in the Intel C++ Compiler
25    User Guide and Reference, version 9.0.  */
26 
27 #ifndef _MMINTRIN_H_INCLUDED
28 #define _MMINTRIN_H_INCLUDED
29 
30 #if defined __x86_64__ && !defined __SSE__ || !defined __MMX__
31 #pragma GCC push_options
32 #ifdef __MMX_WITH_SSE__
33 #pragma GCC target("sse2")
34 #elif defined __x86_64__
35 #pragma GCC target("sse,mmx")
36 #else
37 #pragma GCC target("mmx")
38 #endif
39 #define __DISABLE_MMX__
40 #endif /* __MMX__ */
41 
42 /* The Intel API is flexible enough that we must allow aliasing with other
43    vector types, and their scalar components.  */
44 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
45 typedef int __m32 __attribute__ ((__vector_size__ (4), __may_alias__));
46 typedef short __m16 __attribute__ ((__vector_size__ (2), __may_alias__));
47 
48 /* Unaligned version of the same type  */
49 typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1)));
50 typedef int __m32_u __attribute__ ((__vector_size__ (4), \
51 				    __may_alias__, __aligned__ (1)));
52 typedef short __m16_u __attribute__ ((__vector_size__ (2), \
53 				      __may_alias__, __aligned__ (1)));
54 
55 /* Internal data types for implementing the intrinsics.  */
56 typedef int __v2si __attribute__ ((__vector_size__ (8)));
57 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
58 typedef char __v8qi __attribute__ ((__vector_size__ (8)));
59 typedef long long __v1di __attribute__ ((__vector_size__ (8)));
60 typedef float __v2sf __attribute__ ((__vector_size__ (8)));
61 
62 /* Empty the multimedia state.  */
63 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_empty(void)64 _mm_empty (void)
65 {
66   __builtin_ia32_emms ();
67 }
68 
69 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_empty(void)70 _m_empty (void)
71 {
72   _mm_empty ();
73 }
74 
75 /* Convert I to a __m64 object.  The integer is zero-extended to 64-bits.  */
76 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi32_si64(int __i)77 _mm_cvtsi32_si64 (int __i)
78 {
79   return (__m64) __builtin_ia32_vec_init_v2si (__i, 0);
80 }
81 
82 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int(int __i)83 _m_from_int (int __i)
84 {
85   return _mm_cvtsi32_si64 (__i);
86 }
87 
88 #ifdef __x86_64__
89 /* Convert I to a __m64 object.  */
90 
91 /* Intel intrinsic.  */
92 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_from_int64(long long __i)93 _m_from_int64 (long long __i)
94 {
95   return (__m64) __i;
96 }
97 
98 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_m64(long long __i)99 _mm_cvtsi64_m64 (long long __i)
100 {
101   return (__m64) __i;
102 }
103 
104 /* Microsoft intrinsic.  */
105 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64x_si64(long long __i)106 _mm_cvtsi64x_si64 (long long __i)
107 {
108   return (__m64) __i;
109 }
110 
111 extern __inline __m64  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi64x(long long __i)112 _mm_set_pi64x (long long __i)
113 {
114   return (__m64) __i;
115 }
116 #endif
117 
118 /* Convert the lower 32 bits of the __m64 object into an integer.  */
119 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si32(__m64 __i)120 _mm_cvtsi64_si32 (__m64 __i)
121 {
122   return __builtin_ia32_vec_ext_v2si ((__v2si)__i, 0);
123 }
124 
125 extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int(__m64 __i)126 _m_to_int (__m64 __i)
127 {
128   return _mm_cvtsi64_si32 (__i);
129 }
130 
131 #ifdef __x86_64__
132 /* Convert the __m64 object to a 64bit integer.  */
133 
134 /* Intel intrinsic.  */
135 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_to_int64(__m64 __i)136 _m_to_int64 (__m64 __i)
137 {
138   return (long long)__i;
139 }
140 
141 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtm64_si64(__m64 __i)142 _mm_cvtm64_si64 (__m64 __i)
143 {
144   return (long long)__i;
145 }
146 
147 /* Microsoft intrinsic.  */
148 extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cvtsi64_si64x(__m64 __i)149 _mm_cvtsi64_si64x (__m64 __i)
150 {
151   return (long long)__i;
152 }
153 #endif
154 
155 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
156    the result, and the four 16-bit values from M2 into the upper four 8-bit
157    values of the result, all with signed saturation.  */
158 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi16(__m64 __m1,__m64 __m2)159 _mm_packs_pi16 (__m64 __m1, __m64 __m2)
160 {
161   return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
162 }
163 
164 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packsswb(__m64 __m1,__m64 __m2)165 _m_packsswb (__m64 __m1, __m64 __m2)
166 {
167   return _mm_packs_pi16 (__m1, __m2);
168 }
169 
170 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
171    the result, and the two 32-bit values from M2 into the upper two 16-bit
172    values of the result, all with signed saturation.  */
173 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pi32(__m64 __m1,__m64 __m2)174 _mm_packs_pi32 (__m64 __m1, __m64 __m2)
175 {
176   return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
177 }
178 
179 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packssdw(__m64 __m1,__m64 __m2)180 _m_packssdw (__m64 __m1, __m64 __m2)
181 {
182   return _mm_packs_pi32 (__m1, __m2);
183 }
184 
185 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
186    the result, and the four 16-bit values from M2 into the upper four 8-bit
187    values of the result, all with unsigned saturation.  */
188 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_packs_pu16(__m64 __m1,__m64 __m2)189 _mm_packs_pu16 (__m64 __m1, __m64 __m2)
190 {
191   return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
192 }
193 
194 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_packuswb(__m64 __m1,__m64 __m2)195 _m_packuswb (__m64 __m1, __m64 __m2)
196 {
197   return _mm_packs_pu16 (__m1, __m2);
198 }
199 
200 /* Interleave the four 8-bit values from the high half of M1 with the four
201    8-bit values from the high half of M2.  */
202 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi8(__m64 __m1,__m64 __m2)203 _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
204 {
205   return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
206 }
207 
208 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhbw(__m64 __m1,__m64 __m2)209 _m_punpckhbw (__m64 __m1, __m64 __m2)
210 {
211   return _mm_unpackhi_pi8 (__m1, __m2);
212 }
213 
214 /* Interleave the two 16-bit values from the high half of M1 with the two
215    16-bit values from the high half of M2.  */
216 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi16(__m64 __m1,__m64 __m2)217 _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
218 {
219   return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
220 }
221 
222 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhwd(__m64 __m1,__m64 __m2)223 _m_punpckhwd (__m64 __m1, __m64 __m2)
224 {
225   return _mm_unpackhi_pi16 (__m1, __m2);
226 }
227 
228 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
229    value from the high half of M2.  */
230 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpackhi_pi32(__m64 __m1,__m64 __m2)231 _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
232 {
233   return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
234 }
235 
236 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckhdq(__m64 __m1,__m64 __m2)237 _m_punpckhdq (__m64 __m1, __m64 __m2)
238 {
239   return _mm_unpackhi_pi32 (__m1, __m2);
240 }
241 
242 /* Interleave the four 8-bit values from the low half of M1 with the four
243    8-bit values from the low half of M2.  */
244 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi8(__m64 __m1,__m64 __m2)245 _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
246 {
247   return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
248 }
249 
250 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklbw(__m64 __m1,__m64 __m2)251 _m_punpcklbw (__m64 __m1, __m64 __m2)
252 {
253   return _mm_unpacklo_pi8 (__m1, __m2);
254 }
255 
256 /* Interleave the two 16-bit values from the low half of M1 with the two
257    16-bit values from the low half of M2.  */
258 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi16(__m64 __m1,__m64 __m2)259 _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
260 {
261   return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
262 }
263 
264 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpcklwd(__m64 __m1,__m64 __m2)265 _m_punpcklwd (__m64 __m1, __m64 __m2)
266 {
267   return _mm_unpacklo_pi16 (__m1, __m2);
268 }
269 
270 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
271    value from the low half of M2.  */
272 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_unpacklo_pi32(__m64 __m1,__m64 __m2)273 _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
274 {
275   return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
276 }
277 
278 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_punpckldq(__m64 __m1,__m64 __m2)279 _m_punpckldq (__m64 __m1, __m64 __m2)
280 {
281   return _mm_unpacklo_pi32 (__m1, __m2);
282 }
283 
284 /* Add the 8-bit values in M1 to the 8-bit values in M2.  */
285 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi8(__m64 __m1,__m64 __m2)286 _mm_add_pi8 (__m64 __m1, __m64 __m2)
287 {
288   return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
289 }
290 
291 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddb(__m64 __m1,__m64 __m2)292 _m_paddb (__m64 __m1, __m64 __m2)
293 {
294   return _mm_add_pi8 (__m1, __m2);
295 }
296 
297 /* Add the 16-bit values in M1 to the 16-bit values in M2.  */
298 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi16(__m64 __m1,__m64 __m2)299 _mm_add_pi16 (__m64 __m1, __m64 __m2)
300 {
301   return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
302 }
303 
304 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddw(__m64 __m1,__m64 __m2)305 _m_paddw (__m64 __m1, __m64 __m2)
306 {
307   return _mm_add_pi16 (__m1, __m2);
308 }
309 
310 /* Add the 32-bit values in M1 to the 32-bit values in M2.  */
311 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_pi32(__m64 __m1,__m64 __m2)312 _mm_add_pi32 (__m64 __m1, __m64 __m2)
313 {
314   return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
315 }
316 
317 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddd(__m64 __m1,__m64 __m2)318 _m_paddd (__m64 __m1, __m64 __m2)
319 {
320   return _mm_add_pi32 (__m1, __m2);
321 }
322 
323 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
324 #ifndef __SSE2__
325 #pragma GCC push_options
326 #ifdef __MMX_WITH_SSE__
327 #pragma GCC target("sse2")
328 #else
329 #pragma GCC target("sse2,mmx")
330 #endif
331 #define __DISABLE_SSE2__
332 #endif /* __SSE2__ */
333 
334 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64(__m64 __m1,__m64 __m2)335 _mm_add_si64 (__m64 __m1, __m64 __m2)
336 {
337   return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
338 }
339 #ifdef __DISABLE_SSE2__
340 #undef __DISABLE_SSE2__
341 #pragma GCC pop_options
342 #endif /* __DISABLE_SSE2__ */
343 
344 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
345    saturated arithmetic.  */
346 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi8(__m64 __m1,__m64 __m2)347 _mm_adds_pi8 (__m64 __m1, __m64 __m2)
348 {
349   return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
350 }
351 
352 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsb(__m64 __m1,__m64 __m2)353 _m_paddsb (__m64 __m1, __m64 __m2)
354 {
355   return _mm_adds_pi8 (__m1, __m2);
356 }
357 
358 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
359    saturated arithmetic.  */
360 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pi16(__m64 __m1,__m64 __m2)361 _mm_adds_pi16 (__m64 __m1, __m64 __m2)
362 {
363   return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
364 }
365 
366 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddsw(__m64 __m1,__m64 __m2)367 _m_paddsw (__m64 __m1, __m64 __m2)
368 {
369   return _mm_adds_pi16 (__m1, __m2);
370 }
371 
372 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
373    saturated arithmetic.  */
374 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu8(__m64 __m1,__m64 __m2)375 _mm_adds_pu8 (__m64 __m1, __m64 __m2)
376 {
377   return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
378 }
379 
380 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusb(__m64 __m1,__m64 __m2)381 _m_paddusb (__m64 __m1, __m64 __m2)
382 {
383   return _mm_adds_pu8 (__m1, __m2);
384 }
385 
386 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
387    saturated arithmetic.  */
388 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_adds_pu16(__m64 __m1,__m64 __m2)389 _mm_adds_pu16 (__m64 __m1, __m64 __m2)
390 {
391   return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
392 }
393 
394 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_paddusw(__m64 __m1,__m64 __m2)395 _m_paddusw (__m64 __m1, __m64 __m2)
396 {
397   return _mm_adds_pu16 (__m1, __m2);
398 }
399 
400 /* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
401 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi8(__m64 __m1,__m64 __m2)402 _mm_sub_pi8 (__m64 __m1, __m64 __m2)
403 {
404   return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
405 }
406 
407 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubb(__m64 __m1,__m64 __m2)408 _m_psubb (__m64 __m1, __m64 __m2)
409 {
410   return _mm_sub_pi8 (__m1, __m2);
411 }
412 
413 /* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
414 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi16(__m64 __m1,__m64 __m2)415 _mm_sub_pi16 (__m64 __m1, __m64 __m2)
416 {
417   return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
418 }
419 
420 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubw(__m64 __m1,__m64 __m2)421 _m_psubw (__m64 __m1, __m64 __m2)
422 {
423   return _mm_sub_pi16 (__m1, __m2);
424 }
425 
426 /* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
427 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_pi32(__m64 __m1,__m64 __m2)428 _mm_sub_pi32 (__m64 __m1, __m64 __m2)
429 {
430   return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
431 }
432 
433 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubd(__m64 __m1,__m64 __m2)434 _m_psubd (__m64 __m1, __m64 __m2)
435 {
436   return _mm_sub_pi32 (__m1, __m2);
437 }
438 
439 /* Add the 64-bit values in M1 to the 64-bit values in M2.  */
440 #ifndef __SSE2__
441 #pragma GCC push_options
442 #ifdef __MMX_WITH_SSE__
443 #pragma GCC target("sse2")
444 #else
445 #pragma GCC target("sse2,mmx")
446 #endif
447 #define __DISABLE_SSE2__
448 #endif /* __SSE2__ */
449 
450 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64(__m64 __m1,__m64 __m2)451 _mm_sub_si64 (__m64 __m1, __m64 __m2)
452 {
453   return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
454 }
455 #ifdef __DISABLE_SSE2__
456 #undef __DISABLE_SSE2__
457 #pragma GCC pop_options
458 #endif /* __DISABLE_SSE2__ */
459 
460 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
461    saturating arithmetic.  */
462 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi8(__m64 __m1,__m64 __m2)463 _mm_subs_pi8 (__m64 __m1, __m64 __m2)
464 {
465   return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
466 }
467 
468 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsb(__m64 __m1,__m64 __m2)469 _m_psubsb (__m64 __m1, __m64 __m2)
470 {
471   return _mm_subs_pi8 (__m1, __m2);
472 }
473 
474 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
475    signed saturating arithmetic.  */
476 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pi16(__m64 __m1,__m64 __m2)477 _mm_subs_pi16 (__m64 __m1, __m64 __m2)
478 {
479   return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
480 }
481 
482 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubsw(__m64 __m1,__m64 __m2)483 _m_psubsw (__m64 __m1, __m64 __m2)
484 {
485   return _mm_subs_pi16 (__m1, __m2);
486 }
487 
488 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
489    unsigned saturating arithmetic.  */
490 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu8(__m64 __m1,__m64 __m2)491 _mm_subs_pu8 (__m64 __m1, __m64 __m2)
492 {
493   return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
494 }
495 
496 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusb(__m64 __m1,__m64 __m2)497 _m_psubusb (__m64 __m1, __m64 __m2)
498 {
499   return _mm_subs_pu8 (__m1, __m2);
500 }
501 
502 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
503    unsigned saturating arithmetic.  */
504 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_subs_pu16(__m64 __m1,__m64 __m2)505 _mm_subs_pu16 (__m64 __m1, __m64 __m2)
506 {
507   return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
508 }
509 
510 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psubusw(__m64 __m1,__m64 __m2)511 _m_psubusw (__m64 __m1, __m64 __m2)
512 {
513   return _mm_subs_pu16 (__m1, __m2);
514 }
515 
516 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
517    four 32-bit intermediate results, which are then summed by pairs to
518    produce two 32-bit results.  */
519 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_madd_pi16(__m64 __m1,__m64 __m2)520 _mm_madd_pi16 (__m64 __m1, __m64 __m2)
521 {
522   return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
523 }
524 
525 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmaddwd(__m64 __m1,__m64 __m2)526 _m_pmaddwd (__m64 __m1, __m64 __m2)
527 {
528   return _mm_madd_pi16 (__m1, __m2);
529 }
530 
531 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
532    M2 and produce the high 16 bits of the 32-bit results.  */
533 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mulhi_pi16(__m64 __m1,__m64 __m2)534 _mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
535 {
536   return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
537 }
538 
539 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmulhw(__m64 __m1,__m64 __m2)540 _m_pmulhw (__m64 __m1, __m64 __m2)
541 {
542   return _mm_mulhi_pi16 (__m1, __m2);
543 }
544 
545 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
546    the low 16 bits of the results.  */
547 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mullo_pi16(__m64 __m1,__m64 __m2)548 _mm_mullo_pi16 (__m64 __m1, __m64 __m2)
549 {
550   return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
551 }
552 
553 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pmullw(__m64 __m1,__m64 __m2)554 _m_pmullw (__m64 __m1, __m64 __m2)
555 {
556   return _mm_mullo_pi16 (__m1, __m2);
557 }
558 
559 /* Shift four 16-bit values in M left by COUNT.  */
560 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi16(__m64 __m,__m64 __count)561 _mm_sll_pi16 (__m64 __m, __m64 __count)
562 {
563   return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (__v4hi)__count);
564 }
565 
566 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllw(__m64 __m,__m64 __count)567 _m_psllw (__m64 __m, __m64 __count)
568 {
569   return _mm_sll_pi16 (__m, __count);
570 }
571 
572 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi16(__m64 __m,int __count)573 _mm_slli_pi16 (__m64 __m, int __count)
574 {
575   return (__m64) __builtin_ia32_psllwi ((__v4hi)__m, __count);
576 }
577 
578 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllwi(__m64 __m,int __count)579 _m_psllwi (__m64 __m, int __count)
580 {
581   return _mm_slli_pi16 (__m, __count);
582 }
583 
584 /* Shift two 32-bit values in M left by COUNT.  */
585 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_pi32(__m64 __m,__m64 __count)586 _mm_sll_pi32 (__m64 __m, __m64 __count)
587 {
588   return (__m64) __builtin_ia32_pslld ((__v2si)__m, (__v2si)__count);
589 }
590 
591 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslld(__m64 __m,__m64 __count)592 _m_pslld (__m64 __m, __m64 __count)
593 {
594   return _mm_sll_pi32 (__m, __count);
595 }
596 
597 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_pi32(__m64 __m,int __count)598 _mm_slli_pi32 (__m64 __m, int __count)
599 {
600   return (__m64) __builtin_ia32_pslldi ((__v2si)__m, __count);
601 }
602 
603 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pslldi(__m64 __m,int __count)604 _m_pslldi (__m64 __m, int __count)
605 {
606   return _mm_slli_pi32 (__m, __count);
607 }
608 
609 /* Shift the 64-bit value in M left by COUNT.  */
610 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sll_si64(__m64 __m,__m64 __count)611 _mm_sll_si64 (__m64 __m, __m64 __count)
612 {
613   return (__m64) __builtin_ia32_psllq ((__v1di)__m, (__v1di)__count);
614 }
615 
616 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllq(__m64 __m,__m64 __count)617 _m_psllq (__m64 __m, __m64 __count)
618 {
619   return _mm_sll_si64 (__m, __count);
620 }
621 
622 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_slli_si64(__m64 __m,int __count)623 _mm_slli_si64 (__m64 __m, int __count)
624 {
625   return (__m64) __builtin_ia32_psllqi ((__v1di)__m, __count);
626 }
627 
628 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psllqi(__m64 __m,int __count)629 _m_psllqi (__m64 __m, int __count)
630 {
631   return _mm_slli_si64 (__m, __count);
632 }
633 
634 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
635 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi16(__m64 __m,__m64 __count)636 _mm_sra_pi16 (__m64 __m, __m64 __count)
637 {
638   return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (__v4hi)__count);
639 }
640 
641 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psraw(__m64 __m,__m64 __count)642 _m_psraw (__m64 __m, __m64 __count)
643 {
644   return _mm_sra_pi16 (__m, __count);
645 }
646 
647 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi16(__m64 __m,int __count)648 _mm_srai_pi16 (__m64 __m, int __count)
649 {
650   return (__m64) __builtin_ia32_psrawi ((__v4hi)__m, __count);
651 }
652 
653 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrawi(__m64 __m,int __count)654 _m_psrawi (__m64 __m, int __count)
655 {
656   return _mm_srai_pi16 (__m, __count);
657 }
658 
659 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
660 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sra_pi32(__m64 __m,__m64 __count)661 _mm_sra_pi32 (__m64 __m, __m64 __count)
662 {
663   return (__m64) __builtin_ia32_psrad ((__v2si)__m, (__v2si)__count);
664 }
665 
666 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrad(__m64 __m,__m64 __count)667 _m_psrad (__m64 __m, __m64 __count)
668 {
669   return _mm_sra_pi32 (__m, __count);
670 }
671 
672 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srai_pi32(__m64 __m,int __count)673 _mm_srai_pi32 (__m64 __m, int __count)
674 {
675   return (__m64) __builtin_ia32_psradi ((__v2si)__m, __count);
676 }
677 
678 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psradi(__m64 __m,int __count)679 _m_psradi (__m64 __m, int __count)
680 {
681   return _mm_srai_pi32 (__m, __count);
682 }
683 
684 /* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
685 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi16(__m64 __m,__m64 __count)686 _mm_srl_pi16 (__m64 __m, __m64 __count)
687 {
688   return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (__v4hi)__count);
689 }
690 
691 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlw(__m64 __m,__m64 __count)692 _m_psrlw (__m64 __m, __m64 __count)
693 {
694   return _mm_srl_pi16 (__m, __count);
695 }
696 
697 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi16(__m64 __m,int __count)698 _mm_srli_pi16 (__m64 __m, int __count)
699 {
700   return (__m64) __builtin_ia32_psrlwi ((__v4hi)__m, __count);
701 }
702 
703 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlwi(__m64 __m,int __count)704 _m_psrlwi (__m64 __m, int __count)
705 {
706   return _mm_srli_pi16 (__m, __count);
707 }
708 
709 /* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
710 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_pi32(__m64 __m,__m64 __count)711 _mm_srl_pi32 (__m64 __m, __m64 __count)
712 {
713   return (__m64) __builtin_ia32_psrld ((__v2si)__m, (__v2si)__count);
714 }
715 
716 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrld(__m64 __m,__m64 __count)717 _m_psrld (__m64 __m, __m64 __count)
718 {
719   return _mm_srl_pi32 (__m, __count);
720 }
721 
722 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_pi32(__m64 __m,int __count)723 _mm_srli_pi32 (__m64 __m, int __count)
724 {
725   return (__m64) __builtin_ia32_psrldi ((__v2si)__m, __count);
726 }
727 
728 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrldi(__m64 __m,int __count)729 _m_psrldi (__m64 __m, int __count)
730 {
731   return _mm_srli_pi32 (__m, __count);
732 }
733 
734 /* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
735 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srl_si64(__m64 __m,__m64 __count)736 _mm_srl_si64 (__m64 __m, __m64 __count)
737 {
738   return (__m64) __builtin_ia32_psrlq ((__v1di)__m, (__v1di)__count);
739 }
740 
741 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlq(__m64 __m,__m64 __count)742 _m_psrlq (__m64 __m, __m64 __count)
743 {
744   return _mm_srl_si64 (__m, __count);
745 }
746 
747 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_srli_si64(__m64 __m,int __count)748 _mm_srli_si64 (__m64 __m, int __count)
749 {
750   return (__m64) __builtin_ia32_psrlqi ((__v1di)__m, __count);
751 }
752 
753 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_psrlqi(__m64 __m,int __count)754 _m_psrlqi (__m64 __m, int __count)
755 {
756   return _mm_srli_si64 (__m, __count);
757 }
758 
759 /* Bit-wise AND the 64-bit values in M1 and M2.  */
760 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_and_si64(__m64 __m1,__m64 __m2)761 _mm_and_si64 (__m64 __m1, __m64 __m2)
762 {
763   return __builtin_ia32_pand (__m1, __m2);
764 }
765 
766 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pand(__m64 __m1,__m64 __m2)767 _m_pand (__m64 __m1, __m64 __m2)
768 {
769   return _mm_and_si64 (__m1, __m2);
770 }
771 
772 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
773    64-bit value in M2.  */
774 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_andnot_si64(__m64 __m1,__m64 __m2)775 _mm_andnot_si64 (__m64 __m1, __m64 __m2)
776 {
777   return __builtin_ia32_pandn (__m1, __m2);
778 }
779 
780 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pandn(__m64 __m1,__m64 __m2)781 _m_pandn (__m64 __m1, __m64 __m2)
782 {
783   return _mm_andnot_si64 (__m1, __m2);
784 }
785 
786 /* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
787 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_or_si64(__m64 __m1,__m64 __m2)788 _mm_or_si64 (__m64 __m1, __m64 __m2)
789 {
790   return __builtin_ia32_por (__m1, __m2);
791 }
792 
793 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_por(__m64 __m1,__m64 __m2)794 _m_por (__m64 __m1, __m64 __m2)
795 {
796   return _mm_or_si64 (__m1, __m2);
797 }
798 
799 /* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
800 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_xor_si64(__m64 __m1,__m64 __m2)801 _mm_xor_si64 (__m64 __m1, __m64 __m2)
802 {
803   return __builtin_ia32_pxor (__m1, __m2);
804 }
805 
806 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pxor(__m64 __m1,__m64 __m2)807 _m_pxor (__m64 __m1, __m64 __m2)
808 {
809   return _mm_xor_si64 (__m1, __m2);
810 }
811 
812 /* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
813    test is true and zero if false.  */
814 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi8(__m64 __m1,__m64 __m2)815 _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
816 {
817   return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
818 }
819 
820 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqb(__m64 __m1,__m64 __m2)821 _m_pcmpeqb (__m64 __m1, __m64 __m2)
822 {
823   return _mm_cmpeq_pi8 (__m1, __m2);
824 }
825 
826 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi8(__m64 __m1,__m64 __m2)827 _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
828 {
829   return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
830 }
831 
832 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtb(__m64 __m1,__m64 __m2)833 _m_pcmpgtb (__m64 __m1, __m64 __m2)
834 {
835   return _mm_cmpgt_pi8 (__m1, __m2);
836 }
837 
838 /* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
839    the test is true and zero if false.  */
840 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi16(__m64 __m1,__m64 __m2)841 _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
842 {
843   return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
844 }
845 
846 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqw(__m64 __m1,__m64 __m2)847 _m_pcmpeqw (__m64 __m1, __m64 __m2)
848 {
849   return _mm_cmpeq_pi16 (__m1, __m2);
850 }
851 
852 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi16(__m64 __m1,__m64 __m2)853 _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
854 {
855   return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
856 }
857 
858 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtw(__m64 __m1,__m64 __m2)859 _m_pcmpgtw (__m64 __m1, __m64 __m2)
860 {
861   return _mm_cmpgt_pi16 (__m1, __m2);
862 }
863 
864 /* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
865    the test is true and zero if false.  */
866 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpeq_pi32(__m64 __m1,__m64 __m2)867 _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
868 {
869   return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
870 }
871 
872 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpeqd(__m64 __m1,__m64 __m2)873 _m_pcmpeqd (__m64 __m1, __m64 __m2)
874 {
875   return _mm_cmpeq_pi32 (__m1, __m2);
876 }
877 
878 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_cmpgt_pi32(__m64 __m1,__m64 __m2)879 _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
880 {
881   return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
882 }
883 
884 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_pcmpgtd(__m64 __m1,__m64 __m2)885 _m_pcmpgtd (__m64 __m1, __m64 __m2)
886 {
887   return _mm_cmpgt_pi32 (__m1, __m2);
888 }
889 
890 /* Creates a 64-bit zero.  */
891 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setzero_si64(void)892 _mm_setzero_si64 (void)
893 {
894   return (__m64)0LL;
895 }
896 
897 /* Creates a vector of two 32-bit values; I0 is least significant.  */
898 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi32(int __i1,int __i0)899 _mm_set_pi32 (int __i1, int __i0)
900 {
901   return (__m64) __builtin_ia32_vec_init_v2si (__i0, __i1);
902 }
903 
904 /* Creates a vector of four 16-bit values; W0 is least significant.  */
905 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi16(short __w3,short __w2,short __w1,short __w0)906 _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
907 {
908   return (__m64) __builtin_ia32_vec_init_v4hi (__w0, __w1, __w2, __w3);
909 }
910 
911 /* Creates a vector of eight 8-bit values; B0 is least significant.  */
912 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set_pi8(char __b7,char __b6,char __b5,char __b4,char __b3,char __b2,char __b1,char __b0)913 _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
914 	     char __b3, char __b2, char __b1, char __b0)
915 {
916   return (__m64) __builtin_ia32_vec_init_v8qi (__b0, __b1, __b2, __b3,
917 					       __b4, __b5, __b6, __b7);
918 }
919 
920 /* Similar, but with the arguments in reverse order.  */
921 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi32(int __i0,int __i1)922 _mm_setr_pi32 (int __i0, int __i1)
923 {
924   return _mm_set_pi32 (__i1, __i0);
925 }
926 
927 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi16(short __w0,short __w1,short __w2,short __w3)928 _mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
929 {
930   return _mm_set_pi16 (__w3, __w2, __w1, __w0);
931 }
932 
933 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_setr_pi8(char __b0,char __b1,char __b2,char __b3,char __b4,char __b5,char __b6,char __b7)934 _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
935 	      char __b4, char __b5, char __b6, char __b7)
936 {
937   return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
938 }
939 
940 /* Creates a vector of two 32-bit values, both elements containing I.  */
941 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi32(int __i)942 _mm_set1_pi32 (int __i)
943 {
944   return _mm_set_pi32 (__i, __i);
945 }
946 
947 /* Creates a vector of four 16-bit values, all elements containing W.  */
948 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi16(short __w)949 _mm_set1_pi16 (short __w)
950 {
951   return _mm_set_pi16 (__w, __w, __w, __w);
952 }
953 
954 /* Creates a vector of eight 8-bit values, all elements containing B.  */
955 extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_set1_pi8(char __b)956 _mm_set1_pi8 (char __b)
957 {
958   return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
959 }
960 #ifdef __DISABLE_MMX__
961 #undef __DISABLE_MMX__
962 #pragma GCC pop_options
963 #endif /* __DISABLE_MMX__ */
964 
965 #endif /* _MMINTRIN_H_INCLUDED */
966