xref: /reactos/sdk/include/vcruntime/mmintrin.h (revision abbc7840)
1 /*
2  * mmintrin.h
3  *
4  * This file is part of the ReactOS CRT package.
5  *
6  * Contributors:
7  *   Timo Kreuzer (timo.kreuzer@reactos.org)
8  *
9  * THIS SOFTWARE IS NOT COPYRIGHTED
10  *
11  * This source code is offered for use in the public domain. You may
12  * use, modify or distribute it freely.
13  *
14  * This code is distributed in the hope that it will be useful but
15  * WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESS OR IMPLIED ARE HEREBY
16  * DISCLAIMED. This includes but is not limited to warranties of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
18  *
19  */
20 
21 #pragma once
22 #ifndef _MMINTRIN_H_INCLUDED
23 #define _MMINTRIN_H_INCLUDED
24 
25 #include <vcruntime.h>
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 #ifdef _MSC_VER
32 #define DECLSPEC_INTRINTYPE __declspec(intrin_type)
33 #else
34 #define DECLSPEC_INTRINTYPE
35 #endif
36 
37 #if defined(_MSC_VER) && !defined(__clang__)
38 
39     typedef union DECLSPEC_INTRINTYPE _CRT_ALIGN(8) __m64
40     {
41         unsigned __int64 m64_u64;
42         float m64_f32[2];
43         __int8 m64_i8[8];
44         __int16 m64_i16[4];
45         __int32 m64_i32[2];
46         __int64 m64_i64;
47         unsigned __int8 m64_u8[8];
48         unsigned __int16 m64_u16[4];
49         unsigned __int32 m64_u32[2];
50     } __m64;
51 
52 #else /* _MSC_VER */
53 
54     typedef long long __v1di __attribute__((__vector_size__(8)));
55     typedef int __v2si __attribute__((__vector_size__(8)));
56     typedef short __v4hi __attribute__((__vector_size__(8)));
57     typedef char __v8qi __attribute__((__vector_size__(8)));
58 
59     typedef float __m64 __attribute__((__vector_size__(8), __aligned__(16)));
60 
61 #ifdef __clang__
62 #define __INTRIN_INLINE_MMX __INTRIN_INLINE __attribute__((__target__("mmx"),__min_vector_width__(64)))
63 #else
64 #define __INTRIN_INLINE_MMX __INTRIN_INLINE __attribute__((__target__("mmx")))
65 #endif
66 
67 #endif /* _MSC_VER */
68 
69 #ifdef _M_IX86
70 
71 void  _m_empty(void);
72 __m64 _m_from_int(int i);
73 int   _m_to_int(__m64 m);
74 __m64 _m_packsswb(__m64 a, __m64 b);
75 __m64 _m_packssdw(__m64 a, __m64 b);
76 __m64 _m_packuswb(__m64 a, __m64 b);
77 __m64 _m_punpckhbw(__m64 a, __m64 b);
78 __m64 _m_punpckhwd(__m64 a, __m64 b);
79 __m64 _m_punpckhdq(__m64 a, __m64 b);
80 __m64 _m_punpcklbw(__m64 a, __m64 b);
81 __m64 _m_punpcklwd(__m64 a, __m64 b);
82 __m64 _m_punpckldq(__m64 a, __m64 b);
83 __m64 _m_paddb(__m64 a, __m64 b);
84 __m64 _m_paddw(__m64 a, __m64 b);
85 __m64 _m_paddd(__m64 a, __m64 b);
86 __m64 _m_paddsb(__m64 a, __m64 b);
87 __m64 _m_paddsw(__m64 a, __m64 b);
88 __m64 _m_paddusb(__m64 a, __m64 b);
89 __m64 _m_paddusw(__m64 a, __m64 b);
90 __m64 _m_psubb(__m64 a, __m64 b);
91 __m64 _m_psubw(__m64 a, __m64 b);
92 __m64 _m_psubd(__m64 a, __m64 b);
93 __m64 _m_psubsb(__m64 a, __m64 b);
94 __m64 _m_psubsw(__m64 a, __m64 b);
95 __m64 _m_psubusb(__m64 a, __m64 b);
96 __m64 _m_psubusw(__m64 a, __m64 b);
97 __m64 _m_pmaddwd(__m64 a, __m64 b);
98 __m64 _m_pmulhw(__m64 a, __m64 b);
99 __m64 _m_pmullw(__m64 a, __m64 b);
100 __m64 _m_psllw(__m64 a, __m64 count);
101 __m64 _m_psllwi(__m64 a, int imm8);
102 __m64 _m_pslld(__m64 a, __m64 count);
103 __m64 _m_pslldi(__m64 a, int imm8);
104 __m64 _m_psllq(__m64 a, __m64 count);
105 __m64 _m_psllqi(__m64 a, int imm8);
106 __m64 _m_psraw(__m64 a, __m64 count);
107 __m64 _m_psrawi(__m64 a, int imm8);
108 __m64 _m_psrad(__m64 a, __m64 count);
109 __m64 _m_psradi(__m64 a, int imm8);
110 __m64 _m_psrlw(__m64 a, __m64 count);
111 __m64 _m_psrlwi(__m64 a, int imm8);
112 __m64 _m_psrld(__m64 a, __m64 count);
113 __m64 _m_psrldi(__m64 a, int imm8);
114 __m64 _m_psrlq(__m64 a, __m64 count);
115 __m64 _m_psrlqi(__m64 a, int imm8);
116 __m64 _m_pand(__m64 a, __m64 b);
117 __m64 _m_pandn(__m64 a, __m64 b);
118 __m64 _m_por(__m64 a, __m64 b);
119 __m64 _m_pxor(__m64 a, __m64 b);
120 __m64 _m_pcmpeqb(__m64 a, __m64 b);
121 __m64 _m_pcmpgtb(__m64 a, __m64 b);
122 __m64 _m_pcmpeqw(__m64 a, __m64 b);
123 __m64 _m_pcmpgtw(__m64 a, __m64 b);
124 __m64 _m_pcmpeqd(__m64 a, __m64 b);
125 __m64 _m_pcmpgtd(__m64 a, __m64 b);
126 __m64 _mm_setzero_si64(void);
127 __m64 _mm_set_pi32(int i1, int i0);
128 __m64 _mm_set_pi16(short s3, short s2, short s1, short s0);
129 __m64 _mm_set_pi8(char b7, char b6, char b5, char b4,
130                   char b3, char b2, char b1, char b0);
131 __m64 _mm_setr_pi32(int i1, int i0);
132 __m64 _mm_setr_pi16(short s3, short s2, short s1, short s0);
133 __m64 _mm_setr_pi8(char b7, char b6, char b5, char b4,
134                    char b3, char b2, char b1, char b0);
135 __m64 _mm_set1_pi32(int i);
136 __m64 _mm_set1_pi16(short s);
137 __m64 _mm_set1_pi8(char b);
138 
139 /* Alternate names */
140 #define _mm_empty _m_empty
141 #define _mm_cvtsi32_si64 _m_from_int
142 #define _mm_cvtsi64_si32 _m_to_int
143 #define _mm_packs_pi16 _m_packsswb
144 #define _mm_packs_pi32 _m_packssdw
145 #define _mm_packs_pu16 _m_packuswb
146 #define _mm_unpackhi_pi8 _m_punpckhbw
147 #define _mm_unpackhi_pi16 _m_punpckhwd
148 #define _mm_unpackhi_pi32 _m_punpckhdq
149 #define _mm_unpacklo_pi8 _m_punpcklbw
150 #define _mm_unpacklo_pi16 _m_punpcklwd
151 #define _mm_unpacklo_pi32 _m_punpckldq
152 #define _mm_add_pi8 _m_paddb
153 #define _mm_add_pi16 _m_paddw
154 #define _mm_add_pi32 _m_paddd
155 #define _mm_adds_pi8 _m_paddsb
156 #define _mm_adds_pi16 _m_paddsw
157 #define _mm_adds_pu8 _m_paddusb
158 #define _mm_adds_pu16 _m_paddusw
159 #define _mm_sub_pi8 _m_psubb
160 #define _mm_sub_pi16 _m_psubw
161 #define _mm_sub_pi32 _m_psubd
162 #define _mm_subs_pi8 _m_psubsb
163 #define _mm_subs_pi16 _m_psubsw
164 #define _mm_subs_pu8 _m_psubusb
165 #define _mm_subs_pu16 _m_psubusw
166 #define _mm_madd_pi16 _m_pmaddwd
167 #define _mm_mulhi_pi16 _m_pmulhw
168 #define _mm_mullo_pi16 _m_pmullw
169 #define _mm_sll_pi16 _m_psllw
170 #define _mm_slli_pi16 _m_psllwi
171 #define _mm_sll_pi32 _m_pslld
172 #define _mm_slli_pi32 _m_pslldi
173 #define _mm_sll_si64 _m_psllq
174 #define _mm_slli_si64 _m_psllqi
175 #define _mm_sra_pi16 _m_psraw
176 #define _mm_srai_pi16 _m_psrawi
177 #define _mm_sra_pi32 _m_psrad
178 #define _mm_srai_pi32 _m_psradi
179 #define _mm_srl_pi16 _m_psrlw
180 #define _mm_srli_pi16 _m_psrlwi
181 #define _mm_srl_pi32 _m_psrld
182 #define _mm_srli_pi32 _m_psrldi
183 #define _mm_srl_si64 _m_psrlq
184 #define _mm_srli_si64 _m_psrlqi
185 #define _mm_and_si64 _m_pand
186 #define _mm_andnot_si64 _m_pandn
187 #define _mm_or_si64 _m_por
188 #define _mm_xor_si64 _m_pxor
189 #define _mm_cmpeq_pi8 _m_pcmpeqb
190 #define _mm_cmpgt_pi8 _m_pcmpgtb
191 #define _mm_cmpeq_pi16 _m_pcmpeqw
192 #define _mm_cmpgt_pi16 _m_pcmpgtw
193 #define _mm_cmpeq_pi32 _m_pcmpeqd
194 #define _mm_cmpgt_pi32 _m_pcmpgtd
195 
196 /* Use intrinsics on MSVC */
197 #if defined(_MSC_VER) && !defined(__clang__)
198 #pragma intrinsic(_m_empty)
199 #pragma intrinsic(_m_from_int)
200 #pragma intrinsic(_m_to_int)
201 #pragma intrinsic(_m_packsswb)
202 #pragma intrinsic(_m_packssdw)
203 #pragma intrinsic(_m_packuswb)
204 #pragma intrinsic(_m_punpckhbw)
205 #pragma intrinsic(_m_punpckhwd)
206 #pragma intrinsic(_m_punpckhdq)
207 #pragma intrinsic(_m_punpcklbw)
208 #pragma intrinsic(_m_punpcklwd)
209 #pragma intrinsic(_m_punpckldq)
210 #pragma intrinsic(_m_paddb)
211 #pragma intrinsic(_m_paddw)
212 #pragma intrinsic(_m_paddd)
213 #pragma intrinsic(_m_paddsb)
214 #pragma intrinsic(_m_paddsw)
215 #pragma intrinsic(_m_paddusb)
216 #pragma intrinsic(_m_paddusw)
217 #pragma intrinsic(_m_psubb)
218 #pragma intrinsic(_m_psubw)
219 #pragma intrinsic(_m_psubd)
220 #pragma intrinsic(_m_psubsb)
221 #pragma intrinsic(_m_psubsw)
222 #pragma intrinsic(_m_psubusb)
223 #pragma intrinsic(_m_psubusw)
224 #pragma intrinsic(_m_pmaddwd)
225 #pragma intrinsic(_m_pmulhw)
226 #pragma intrinsic(_m_pmullw)
227 #pragma intrinsic(_m_psllw)
228 #pragma intrinsic(_m_psllwi)
229 #pragma intrinsic(_m_pslld)
230 #pragma intrinsic(_m_pslldi)
231 #pragma intrinsic(_m_psllq)
232 #pragma intrinsic(_m_psllqi)
233 #pragma intrinsic(_m_psraw)
234 #pragma intrinsic(_m_psrawi)
235 #pragma intrinsic(_m_psrad)
236 #pragma intrinsic(_m_psradi)
237 #pragma intrinsic(_m_psrlw)
238 #pragma intrinsic(_m_psrlwi)
239 #pragma intrinsic(_m_psrld)
240 #pragma intrinsic(_m_psrldi)
241 #pragma intrinsic(_m_psrlq)
242 #pragma intrinsic(_m_psrlqi)
243 #pragma intrinsic(_m_pand)
244 #pragma intrinsic(_m_pandn)
245 #pragma intrinsic(_m_por)
246 #pragma intrinsic(_m_pxor)
247 #pragma intrinsic(_m_pcmpeqb)
248 #pragma intrinsic(_m_pcmpgtb)
249 #pragma intrinsic(_m_pcmpeqw)
250 #pragma intrinsic(_m_pcmpgtw)
251 #pragma intrinsic(_m_pcmpeqd)
252 #pragma intrinsic(_m_pcmpgtd)
253 #pragma intrinsic(_mm_setzero_si64)
254 #pragma intrinsic(_mm_set_pi32)
255 #pragma intrinsic(_mm_set_pi16)
256 #pragma intrinsic(_mm_set_pi8)
257 #pragma intrinsic(_mm_setr_pi32)
258 #pragma intrinsic(_mm_setr_pi16)
259 #pragma intrinsic(_mm_setr_pi8)
260 #pragma intrinsic(_mm_set1_pi32)
261 #pragma intrinsic(_mm_set1_pi16)
262 #pragma intrinsic(_mm_set1_pi8)
263 
264 /* Use inline functions on GCC/Clang */
265 #else // GCC / Clang  Clang-CL
266 
267 /*
268 - GCC: https://github.com/gcc-mirror/gcc/blob/master/gcc/config/i386/mmintrin.h
269 - Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/mmintrin.h
270 */
271 
272 // _m_empty
_mm_empty(void)273 __INTRIN_INLINE_MMX void _mm_empty(void)
274 {
275     __builtin_ia32_emms();
276 }
277 
278 // _m_from_int
_mm_cvtsi32_si64(int i)279 __INTRIN_INLINE_MMX __m64 _mm_cvtsi32_si64(int i)
280 {
281     return (__m64)__builtin_ia32_vec_init_v2si(i, 0);
282 }
283 
284 // _m_to_int
_mm_cvtsi64_si32(__m64 m)285 __INTRIN_INLINE_MMX int _mm_cvtsi64_si32(__m64 m)
286 {
287     return __builtin_ia32_vec_ext_v2si((__v2si)m, 0);
288 }
289 
290 // _m_packsswb
_mm_packs_pi16(__m64 a,__m64 b)291 __INTRIN_INLINE_MMX __m64 _mm_packs_pi16(__m64 a, __m64 b)
292 {
293     return (__m64)__builtin_ia32_packsswb((__v4hi)a, (__v4hi)b);
294 }
295 
296 // _m_packssdw
_mm_packs_pi32(__m64 a,__m64 b)297 __INTRIN_INLINE_MMX __m64 _mm_packs_pi32(__m64 a, __m64 b)
298 {
299     return (__m64)__builtin_ia32_packssdw((__v2si)a, (__v2si)b);
300 }
301 
302 // _m_packuswb
_mm_packs_pu16(__m64 a,__m64 b)303 __INTRIN_INLINE_MMX __m64 _mm_packs_pu16(__m64 a, __m64 b)
304 {
305     return (__m64)__builtin_ia32_packuswb((__v4hi)a, (__v4hi)b);
306 }
307 
308 // _m_punpckhbw
_mm_unpackhi_pi8(__m64 a,__m64 b)309 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi8(__m64 a, __m64 b)
310 {
311     return (__m64)__builtin_ia32_punpckhbw((__v8qi)a, (__v8qi)b);
312 }
313 
314 // _m_punpckhwd
_mm_unpackhi_pi16(__m64 a,__m64 b)315 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi16(__m64 a, __m64 b)
316 {
317     return (__m64)__builtin_ia32_punpckhwd((__v4hi)a, (__v4hi)b);
318 }
319 
320 // _m_punpckhdq
_mm_unpackhi_pi32(__m64 a,__m64 b)321 __INTRIN_INLINE_MMX __m64 _mm_unpackhi_pi32(__m64 a, __m64 b)
322 {
323     return (__m64)__builtin_ia32_punpckhdq((__v2si)a, (__v2si)b);
324 }
325 
326 // _m_punpcklbw
_mm_unpacklo_pi8(__m64 a,__m64 b)327 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi8(__m64 a, __m64 b)
328 {
329     return (__m64)__builtin_ia32_punpcklbw((__v8qi)a, (__v8qi)b);
330 }
331 
332 // _m_punpcklwd
_mm_unpacklo_pi16(__m64 a,__m64 b)333 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi16(__m64 a, __m64 b)
334 {
335     return (__m64)__builtin_ia32_punpcklwd((__v4hi)a, (__v4hi)b);
336 }
337 
338 // _m_punpckldq
_mm_unpacklo_pi32(__m64 a,__m64 b)339 __INTRIN_INLINE_MMX __m64 _mm_unpacklo_pi32(__m64 a, __m64 b)
340 {
341     return (__m64)__builtin_ia32_punpckldq((__v2si)a, (__v2si)b);
342 }
343 
344 // _m_paddb
_mm_add_pi8(__m64 a,__m64 b)345 __INTRIN_INLINE_MMX __m64 _mm_add_pi8(__m64 a, __m64 b)
346 {
347     return (__m64)__builtin_ia32_paddb((__v8qi)a, (__v8qi)b);
348 }
349 
350 // _m_paddw
_mm_add_pi16(__m64 a,__m64 b)351 __INTRIN_INLINE_MMX __m64 _mm_add_pi16(__m64 a, __m64 b)
352 {
353     return (__m64)__builtin_ia32_paddw((__v4hi)a, (__v4hi)b);
354 }
355 
356 // _m_paddd
_mm_add_pi32(__m64 a,__m64 b)357 __INTRIN_INLINE_MMX __m64 _mm_add_pi32(__m64 a, __m64 b)
358 {
359     return (__m64)__builtin_ia32_paddd((__v2si)a, (__v2si)b);
360 }
361 
362 // _m_paddsb
_mm_adds_pi8(__m64 a,__m64 b)363 __INTRIN_INLINE_MMX __m64 _mm_adds_pi8(__m64 a, __m64 b)
364 {
365     return (__m64)__builtin_ia32_paddsb((__v8qi)a, (__v8qi)b);
366 }
367 
368 // _m_paddsw
_mm_adds_pi16(__m64 a,__m64 b)369 __INTRIN_INLINE_MMX __m64 _mm_adds_pi16(__m64 a, __m64 b)
370 {
371     return (__m64)__builtin_ia32_paddsw((__v4hi)a, (__v4hi)b);
372 }
373 
374 // _m_paddusb
_mm_adds_pu8(__m64 a,__m64 b)375 __INTRIN_INLINE_MMX __m64 _mm_adds_pu8(__m64 a, __m64 b)
376 {
377     return (__m64)__builtin_ia32_paddusb((__v8qi)a, (__v8qi)b);
378 }
379 
380 // _m_paddusw
_mm_adds_pu16(__m64 a,__m64 b)381 __INTRIN_INLINE_MMX __m64 _mm_adds_pu16(__m64 a, __m64 b)
382 {
383     return (__m64)__builtin_ia32_paddusw((__v4hi)a, (__v4hi)b);
384 }
385 
386 // _m_psubb
_mm_sub_pi8(__m64 a,__m64 b)387 __INTRIN_INLINE_MMX __m64 _mm_sub_pi8(__m64 a, __m64 b)
388 {
389     return (__m64)__builtin_ia32_psubb((__v8qi)a, (__v8qi)b);
390 }
391 
392 // _m_psubw
_mm_sub_pi16(__m64 a,__m64 b)393 __INTRIN_INLINE_MMX __m64 _mm_sub_pi16(__m64 a, __m64 b)
394 {
395     return (__m64)__builtin_ia32_psubw((__v4hi)a, (__v4hi)b);
396 }
397 
398 // _m_psubd
_mm_sub_pi32(__m64 a,__m64 b)399 __INTRIN_INLINE_MMX __m64 _mm_sub_pi32(__m64 a, __m64 b)
400 {
401     return (__m64)__builtin_ia32_psubd((__v2si)a, (__v2si)b);
402 }
403 
404 // _m_psubsb
_mm_subs_pi8(__m64 a,__m64 b)405 __INTRIN_INLINE_MMX __m64 _mm_subs_pi8(__m64 a, __m64 b)
406 {
407     return (__m64)__builtin_ia32_psubsb((__v8qi)a, (__v8qi)b);
408 }
409 
410 // _m_psubsw
_mm_subs_pi16(__m64 a,__m64 b)411 __INTRIN_INLINE_MMX __m64 _mm_subs_pi16(__m64 a, __m64 b)
412 {
413     return (__m64)__builtin_ia32_psubsw((__v4hi)a, (__v4hi)b);
414 }
415 
416 // _m_psubusb
_mm_subs_pu8(__m64 a,__m64 b)417 __INTRIN_INLINE_MMX __m64 _mm_subs_pu8(__m64 a, __m64 b)
418 {
419     return (__m64)__builtin_ia32_psubusb((__v8qi)a, (__v8qi)b);
420 }
421 
422 // _m_psubusw
_mm_subs_pu16(__m64 a,__m64 b)423 __INTRIN_INLINE_MMX __m64 _mm_subs_pu16(__m64 a, __m64 b)
424 {
425     return (__m64)__builtin_ia32_psubusw((__v4hi)a, (__v4hi)b);
426 }
427 
428 // _m_pmaddwd
_mm_madd_pi16(__m64 a,__m64 b)429 __INTRIN_INLINE_MMX __m64 _mm_madd_pi16(__m64 a, __m64 b)
430 {
431     return (__m64)__builtin_ia32_pmaddwd((__v4hi)a, (__v4hi)b);
432 }
433 
434 // _m_pmulhw
_mm_mulhi_pi16(__m64 a,__m64 b)435 __INTRIN_INLINE_MMX __m64 _mm_mulhi_pi16(__m64 a, __m64 b)
436 {
437     return (__m64)__builtin_ia32_pmulhw((__v4hi)a, (__v4hi)b);
438 }
439 
440 // _m_pmullw
_mm_mullo_pi16(__m64 a,__m64 b)441 __INTRIN_INLINE_MMX __m64 _mm_mullo_pi16(__m64 a, __m64 b)
442 {
443     return (__m64)__builtin_ia32_pmullw((__v4hi)a, (__v4hi)b);
444 }
445 
446 // _m_psllw
_mm_sll_pi16(__m64 a,__m64 count)447 __INTRIN_INLINE_MMX __m64 _mm_sll_pi16(__m64 a, __m64 count)
448 {
449     return (__m64)__builtin_ia32_psllw((__v4hi)a, (__v4hi)count);
450 }
451 
452 // _m_psllwi
_mm_slli_pi16(__m64 a,int imm8)453 __INTRIN_INLINE_MMX __m64 _mm_slli_pi16(__m64 a, int imm8)
454 {
455     return (__m64)__builtin_ia32_psllwi((__v4hi)a, imm8);
456 }
457 
458 // _m_pslld
_mm_sll_pi32(__m64 a,__m64 count)459 __INTRIN_INLINE_MMX __m64 _mm_sll_pi32(__m64 a, __m64 count)
460 {
461     return (__m64)__builtin_ia32_pslld((__v2si)a, (__v2si)count);
462 }
463 
464 // _m_pslldi
_mm_slli_pi32(__m64 a,int imm8)465 __INTRIN_INLINE_MMX __m64 _mm_slli_pi32(__m64 a, int imm8)
466 {
467     return (__m64)__builtin_ia32_pslldi((__v2si)a, imm8);
468 }
469 
470 // _m_psllq
_mm_sll_si64(__m64 a,__m64 count)471 __INTRIN_INLINE_MMX __m64 _mm_sll_si64(__m64 a, __m64 count)
472 {
473     return (__m64)__builtin_ia32_psllq((__v1di)a, (__v1di)count);
474 }
475 
476 // _m_psllqi
_mm_slli_si64(__m64 a,int imm8)477 __INTRIN_INLINE_MMX __m64 _mm_slli_si64(__m64 a, int imm8)
478 {
479     return (__m64)__builtin_ia32_psllqi((__v1di)a, imm8);
480 }
481 
482 // _m_psraw
_mm_sra_pi16(__m64 a,__m64 count)483 __INTRIN_INLINE_MMX __m64 _mm_sra_pi16(__m64 a, __m64 count)
484 {
485     return (__m64)__builtin_ia32_psraw((__v4hi)a, (__v4hi)count);
486 }
487 
488 // _m_psrawi
_mm_srai_pi16(__m64 a,int imm8)489 __INTRIN_INLINE_MMX __m64 _mm_srai_pi16(__m64 a, int imm8)
490 {
491     return (__m64)__builtin_ia32_psrawi((__v4hi)a, imm8);
492 }
493 
494 // _m_psrad
_mm_sra_pi32(__m64 a,__m64 count)495 __INTRIN_INLINE_MMX __m64 _mm_sra_pi32(__m64 a, __m64 count)
496 {
497     return (__m64)__builtin_ia32_psrad((__v2si)a, (__v2si)count);
498 }
499 
500 // _m_psradi
_mm_srai_pi32(__m64 a,int imm8)501 __INTRIN_INLINE_MMX __m64 _mm_srai_pi32(__m64 a, int imm8)
502 {
503     return (__m64)__builtin_ia32_psradi((__v2si)a, imm8);
504 }
505 
506 // _m_psrlw
_mm_srl_pi16(__m64 a,__m64 count)507 __INTRIN_INLINE_MMX __m64 _mm_srl_pi16(__m64 a, __m64 count)
508 {
509     return (__m64)__builtin_ia32_psrlw((__v4hi)a, (__v4hi)count);
510 }
511 
512 // _m_psrlwi
_mm_srli_pi16(__m64 a,int imm8)513 __INTRIN_INLINE_MMX __m64 _mm_srli_pi16(__m64 a, int imm8)
514 {
515     return (__m64)__builtin_ia32_psrlwi((__v4hi)a, imm8);
516 }
517 
518 // _m_psrld
_mm_srl_pi32(__m64 a,__m64 count)519 __INTRIN_INLINE_MMX __m64 _mm_srl_pi32(__m64 a, __m64 count)
520 {
521     return (__m64)__builtin_ia32_psrld((__v2si)a, (__v2si)count);
522 }
523 
524 // _m_psrldi
_mm_srli_pi32(__m64 a,int imm8)525 __INTRIN_INLINE_MMX __m64 _mm_srli_pi32(__m64 a, int imm8)
526 {
527     return (__m64)__builtin_ia32_psrldi((__v2si)a, imm8);
528 }
529 
530 // _m_psrlq
_mm_srl_si64(__m64 a,__m64 count)531 __INTRIN_INLINE_MMX __m64 _mm_srl_si64(__m64 a, __m64 count)
532 {
533     return (__m64)__builtin_ia32_psrlq((__v1di)a, (__v1di)count);
534 }
535 
536 // _m_psrlqi
_mm_srli_si64(__m64 a,int imm8)537 __INTRIN_INLINE_MMX __m64 _mm_srli_si64(__m64 a, int imm8)
538 {
539     return (__m64)__builtin_ia32_psrlqi((__v1di)a, imm8);
540 }
541 
542 // _m_pand
_mm_and_si64(__m64 a,__m64 b)543 __INTRIN_INLINE_MMX __m64 _mm_and_si64(__m64 a, __m64 b)
544 {
545     return (__m64)__builtin_ia32_pand((__v2si)a, (__v2si)b);
546 }
547 
548 // _m_pandn
_mm_andnot_si64(__m64 a,__m64 b)549 __INTRIN_INLINE_MMX __m64 _mm_andnot_si64(__m64 a, __m64 b)
550 {
551     return (__m64)__builtin_ia32_pandn((__v2si)a, (__v2si)b);
552 }
553 
554 // _m_por
_mm_or_si64(__m64 a,__m64 b)555 __INTRIN_INLINE_MMX __m64 _mm_or_si64(__m64 a, __m64 b)
556 {
557     return (__m64)__builtin_ia32_por((__v2si)a, (__v2si)b);
558 }
559 
560 // _m_pxor
_mm_xor_si64(__m64 a,__m64 b)561 __INTRIN_INLINE_MMX __m64 _mm_xor_si64(__m64 a, __m64 b)
562 {
563     return (__m64)__builtin_ia32_pxor((__v2si)a, (__v2si)b);
564 }
565 
566 // _m_pcmpeqb
_mm_cmpeq_pi8(__m64 a,__m64 b)567 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi8(__m64 a, __m64 b)
568 {
569     return (__m64)__builtin_ia32_pcmpeqb((__v8qi)a, (__v8qi)b);
570 }
571 
572 // _m_pcmpgtb
_mm_cmpgt_pi8(__m64 a,__m64 b)573 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi8(__m64 a, __m64 b)
574 {
575     return (__m64)__builtin_ia32_pcmpgtb((__v8qi)a, (__v8qi)b);
576 }
577 
578 // _m_pcmpeqw
_mm_cmpeq_pi16(__m64 a,__m64 b)579 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi16(__m64 a, __m64 b)
580 {
581     return (__m64)__builtin_ia32_pcmpeqw((__v4hi)a, (__v4hi)b);
582 }
583 
584 // _m_pcmpgtw
_mm_cmpgt_pi16(__m64 a,__m64 b)585 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi16(__m64 a, __m64 b)
586 {
587     return (__m64)__builtin_ia32_pcmpgtw((__v4hi)a, (__v4hi)b);
588 }
589 
590 // _m_pcmpeqd
_mm_cmpeq_pi32(__m64 a,__m64 b)591 __INTRIN_INLINE_MMX __m64 _mm_cmpeq_pi32(__m64 a, __m64 b)
592 {
593     return (__m64)__builtin_ia32_pcmpeqd((__v2si)a, (__v2si)b);
594 }
595 
596 // _m_pcmpgtd
_mm_cmpgt_pi32(__m64 a,__m64 b)597 __INTRIN_INLINE_MMX __m64 _mm_cmpgt_pi32(__m64 a, __m64 b)
598 {
599     return (__m64)__builtin_ia32_pcmpgtd((__v2si)a, (__v2si)b);
600 }
601 
_mm_setzero_si64(void)602 __INTRIN_INLINE_MMX __m64 _mm_setzero_si64(void)
603 {
604     return (__m64) { 0 };
605 }
606 
_mm_set_pi32(int i1,int i0)607 __INTRIN_INLINE_MMX __m64 _mm_set_pi32(int i1, int i0)
608 {
609     return (__m64)__builtin_ia32_vec_init_v2si(i0, i1);
610 }
611 
_mm_set_pi16(short s3,short s2,short s1,short s0)612 __INTRIN_INLINE_MMX __m64 _mm_set_pi16(short s3, short s2, short s1, short s0)
613 {
614     return (__m64)__builtin_ia32_vec_init_v4hi(s0, s1, s2, s3);
615 }
616 
_mm_set_pi8(char b7,char b6,char b5,char b4,char b3,char b2,char b1,char b0)617 __INTRIN_INLINE_MMX __m64 _mm_set_pi8(char b7, char b6, char b5, char b4,
618                                   char b3, char b2, char b1, char b0)
619 {
620     return (__m64)__builtin_ia32_vec_init_v8qi(b0, b1, b2, b3, b4, b5, b6, b7);
621 }
622 
_mm_setr_pi32(int i1,int i0)623 __INTRIN_INLINE_MMX __m64 _mm_setr_pi32(int i1, int i0)
624 {
625     return _mm_set_pi32(i0, i1);
626 }
627 
_mm_setr_pi16(short s3,short s2,short s1,short s0)628 __INTRIN_INLINE_MMX __m64 _mm_setr_pi16(short s3, short s2, short s1, short s0)
629 {
630     return _mm_set_pi16(s0, s1, s2, s3);
631 }
632 
_mm_setr_pi8(char b7,char b6,char b5,char b4,char b3,char b2,char b1,char b0)633 __INTRIN_INLINE_MMX __m64 _mm_setr_pi8(char b7, char b6, char b5, char b4,
634                                    char b3, char b2, char b1, char b0)
635 {
636     return _mm_set_pi8(b7, b6, b5, b4, b3, b2, b1, b0);
637 }
638 
_mm_set1_pi32(int i)639 __INTRIN_INLINE_MMX __m64 _mm_set1_pi32(int i)
640 {
641     return _mm_set_pi32(i, i);
642 }
643 
_mm_set1_pi16(short s)644 __INTRIN_INLINE_MMX __m64 _mm_set1_pi16(short s)
645 {
646     return _mm_set_pi16(s, s, s, s);
647 }
648 
_mm_set1_pi8(char b)649 __INTRIN_INLINE_MMX __m64 _mm_set1_pi8(char b)
650 {
651     return _mm_set_pi8(b, b, b, b, b, b, b, b);
652 }
653 
654 #endif /* __GNUC__ */
655 
656 #endif /* _M_IX86 */
657 
658 #ifdef __cplusplus
659 }
660 #endif
661 
662 #endif /* _MMINTRIN_H_INCLUDED */
663