xref: /reactos/sdk/include/vcruntime/emmintrin.h (revision a67f3688)
184344399STimo Kreuzer /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
284344399STimo Kreuzer  *
384344399STimo Kreuzer  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
484344399STimo Kreuzer  * See https://llvm.org/LICENSE.txt for license information.
584344399STimo Kreuzer  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
684344399STimo Kreuzer  *
784344399STimo Kreuzer  *===-----------------------------------------------------------------------===
884344399STimo Kreuzer  */
984344399STimo Kreuzer 
1084344399STimo Kreuzer #pragma once
1184344399STimo Kreuzer #ifndef _INCLUDED_EMM
1284344399STimo Kreuzer #define _INCLUDED_EMM
1384344399STimo Kreuzer 
14abbc7840STimo Kreuzer #include <vcruntime.h>
1584344399STimo Kreuzer #include <xmmintrin.h>
1684344399STimo Kreuzer 
1784344399STimo Kreuzer #if defined(_MSC_VER) && !defined(__clang__)
1884344399STimo Kreuzer 
1984344399STimo Kreuzer typedef union _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128i
2084344399STimo Kreuzer {
2184344399STimo Kreuzer     __int8  m128i_i8[16];
2284344399STimo Kreuzer     __int16 m128i_i16[8];
2384344399STimo Kreuzer     __int32 m128i_i32[4];
2484344399STimo Kreuzer     __int64 m128i_i64[2];
2584344399STimo Kreuzer     unsigned __int8  m128i_u8[16];
2684344399STimo Kreuzer     unsigned __int16 m128i_u16[8];
2784344399STimo Kreuzer     unsigned __int32 m128i_u32[4];
2884344399STimo Kreuzer     unsigned __int64 m128i_u64[2];
2984344399STimo Kreuzer } __m128i;
3084344399STimo Kreuzer _STATIC_ASSERT(sizeof(__m128i) == 16);
3184344399STimo Kreuzer 
3284344399STimo Kreuzer typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) __m128d
3384344399STimo Kreuzer {
3484344399STimo Kreuzer     double m128d_f64[2];
3584344399STimo Kreuzer } __m128d;
3684344399STimo Kreuzer 
3784344399STimo Kreuzer typedef __declspec(align(1)) __m128i __m128i_u;
3884344399STimo Kreuzer 
3984344399STimo Kreuzer #define __ATTRIBUTE_SSE2__
4084344399STimo Kreuzer 
4184344399STimo Kreuzer #else /* _MSC_VER */
4284344399STimo Kreuzer 
4384344399STimo Kreuzer typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
4484344399STimo Kreuzer typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
4584344399STimo Kreuzer 
4684344399STimo Kreuzer typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
4784344399STimo Kreuzer typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
4884344399STimo Kreuzer 
4984344399STimo Kreuzer /* Type defines.  */
5084344399STimo Kreuzer typedef double __v2df __attribute__((__vector_size__(16)));
5184344399STimo Kreuzer typedef long long __v2di __attribute__((__vector_size__(16)));
5284344399STimo Kreuzer typedef short __v8hi __attribute__((__vector_size__(16)));
5384344399STimo Kreuzer typedef char __v16qi __attribute__((__vector_size__(16)));
5484344399STimo Kreuzer 
5584344399STimo Kreuzer /* Unsigned types */
5684344399STimo Kreuzer typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
5784344399STimo Kreuzer typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
5884344399STimo Kreuzer typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
5984344399STimo Kreuzer 
6084344399STimo Kreuzer /* We need an explicitly signed variant for char. Note that this shouldn't
6184344399STimo Kreuzer  * appear in the interface though. */
6284344399STimo Kreuzer typedef signed char __v16qs __attribute__((__vector_size__(16)));
6384344399STimo Kreuzer 
6484344399STimo Kreuzer #ifdef __clang__
6584344399STimo Kreuzer #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2"),__min_vector_width__(128)))
66*a67f3688STimo Kreuzer #define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2"),__min_vector_width__(128)))
6784344399STimo Kreuzer #else
6884344399STimo Kreuzer #define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
69*a67f3688STimo Kreuzer #define __ATTRIBUTE_MMXSSE2__ __attribute__((__target__("mmx,sse2")))
7084344399STimo Kreuzer #endif
7184344399STimo Kreuzer #define __INTRIN_INLINE_SSE2 __INTRIN_INLINE __ATTRIBUTE_SSE2__
72*a67f3688STimo Kreuzer #define __INTRIN_INLINE_MMXSSE2 __INTRIN_INLINE __ATTRIBUTE_MMXSSE2__
7384344399STimo Kreuzer 
7484344399STimo Kreuzer #endif /* _MSC_VER */
7584344399STimo Kreuzer 
76*a67f3688STimo Kreuzer #ifdef __cplusplus
77*a67f3688STimo Kreuzer extern "C" {
78*a67f3688STimo Kreuzer #endif
79*a67f3688STimo Kreuzer 
8084344399STimo Kreuzer extern __m128d _mm_add_sd(__m128d a, __m128d b);
8184344399STimo Kreuzer extern __m128d _mm_add_pd(__m128d a, __m128d b);
8284344399STimo Kreuzer extern __m128d _mm_sub_sd(__m128d a, __m128d b);
8384344399STimo Kreuzer extern __m128d _mm_sub_pd(__m128d a, __m128d b);
8484344399STimo Kreuzer extern __m128d _mm_mul_sd(__m128d a, __m128d b);
8584344399STimo Kreuzer extern __m128d _mm_mul_pd(__m128d a, __m128d b);
8684344399STimo Kreuzer extern __m128d _mm_div_sd(__m128d a, __m128d b);
8784344399STimo Kreuzer extern __m128d _mm_div_pd(__m128d a, __m128d b);
8884344399STimo Kreuzer extern __m128d _mm_sqrt_sd(__m128d a, __m128d b);
8984344399STimo Kreuzer extern __m128d _mm_sqrt_pd(__m128d a);
9084344399STimo Kreuzer extern __m128d _mm_min_sd(__m128d a, __m128d b);
9184344399STimo Kreuzer extern __m128d _mm_min_pd(__m128d a, __m128d b);
9284344399STimo Kreuzer extern __m128d _mm_max_sd(__m128d a, __m128d b);
9384344399STimo Kreuzer extern __m128d _mm_max_pd(__m128d a, __m128d b);
9484344399STimo Kreuzer extern __m128d _mm_and_pd(__m128d a, __m128d b);
9584344399STimo Kreuzer extern __m128d _mm_andnot_pd(__m128d a, __m128d b);
9684344399STimo Kreuzer extern __m128d _mm_or_pd(__m128d a, __m128d b);
9784344399STimo Kreuzer extern __m128d _mm_xor_pd(__m128d a, __m128d b);
9884344399STimo Kreuzer extern __m128d _mm_cmpeq_pd(__m128d a, __m128d b);
9984344399STimo Kreuzer extern __m128d _mm_cmplt_pd(__m128d a, __m128d b);
10084344399STimo Kreuzer extern __m128d _mm_cmple_pd(__m128d a, __m128d b);
10184344399STimo Kreuzer extern __m128d _mm_cmpgt_pd(__m128d a, __m128d b);
10284344399STimo Kreuzer extern __m128d _mm_cmpge_pd(__m128d a, __m128d b);
10384344399STimo Kreuzer extern __m128d _mm_cmpord_pd(__m128d a, __m128d b);
10484344399STimo Kreuzer extern __m128d _mm_cmpunord_pd(__m128d a, __m128d b);
10584344399STimo Kreuzer extern __m128d _mm_cmpneq_pd(__m128d a, __m128d b);
10684344399STimo Kreuzer extern __m128d _mm_cmpnlt_pd(__m128d a, __m128d b);
10784344399STimo Kreuzer extern __m128d _mm_cmpnle_pd(__m128d a, __m128d b);
10884344399STimo Kreuzer extern __m128d _mm_cmpngt_pd(__m128d a, __m128d b);
10984344399STimo Kreuzer extern __m128d _mm_cmpnge_pd(__m128d a, __m128d b);
11084344399STimo Kreuzer extern __m128d _mm_cmpeq_sd(__m128d a, __m128d b);
11184344399STimo Kreuzer extern __m128d _mm_cmplt_sd(__m128d a, __m128d b);
11284344399STimo Kreuzer extern __m128d _mm_cmple_sd(__m128d a, __m128d b);
11384344399STimo Kreuzer extern __m128d _mm_cmpgt_sd(__m128d a, __m128d b);
11484344399STimo Kreuzer extern __m128d _mm_cmpge_sd(__m128d a, __m128d b);
11584344399STimo Kreuzer extern __m128d _mm_cmpord_sd(__m128d a, __m128d b);
11684344399STimo Kreuzer extern __m128d _mm_cmpunord_sd(__m128d a, __m128d b);
11784344399STimo Kreuzer extern __m128d _mm_cmpneq_sd(__m128d a, __m128d b);
11884344399STimo Kreuzer extern __m128d _mm_cmpnlt_sd(__m128d a, __m128d b);
11984344399STimo Kreuzer extern __m128d _mm_cmpnle_sd(__m128d a, __m128d b);
12084344399STimo Kreuzer extern __m128d _mm_cmpngt_sd(__m128d a, __m128d b);
12184344399STimo Kreuzer extern __m128d _mm_cmpnge_sd(__m128d a, __m128d b);
12284344399STimo Kreuzer extern int _mm_comieq_sd(__m128d a, __m128d b);
12384344399STimo Kreuzer extern int _mm_comilt_sd(__m128d a, __m128d b);
12484344399STimo Kreuzer extern int _mm_comile_sd(__m128d a, __m128d b);
12584344399STimo Kreuzer extern int _mm_comigt_sd(__m128d a, __m128d b);
12684344399STimo Kreuzer extern int _mm_comige_sd(__m128d a, __m128d b);
12784344399STimo Kreuzer extern int _mm_comineq_sd(__m128d a, __m128d b);
12884344399STimo Kreuzer extern int _mm_ucomieq_sd(__m128d a, __m128d b);
12984344399STimo Kreuzer extern int _mm_ucomilt_sd(__m128d a, __m128d b);
13084344399STimo Kreuzer extern int _mm_ucomile_sd(__m128d a, __m128d b);
13184344399STimo Kreuzer extern int _mm_ucomigt_sd(__m128d a, __m128d b);
13284344399STimo Kreuzer extern int _mm_ucomige_sd(__m128d a, __m128d b);
13384344399STimo Kreuzer extern int _mm_ucomineq_sd(__m128d a, __m128d b);
13484344399STimo Kreuzer extern __m128 _mm_cvtpd_ps(__m128d a);
13584344399STimo Kreuzer extern __m128d _mm_cvtps_pd(__m128 a);
13684344399STimo Kreuzer extern __m128d _mm_cvtepi32_pd(__m128i a);
13784344399STimo Kreuzer extern __m128i _mm_cvtpd_epi32(__m128d a);
13884344399STimo Kreuzer extern int _mm_cvtsd_si32(__m128d a);
13984344399STimo Kreuzer extern __m128 _mm_cvtsd_ss(__m128 a, __m128d b);
14084344399STimo Kreuzer extern __m128d _mm_cvtsi32_sd(__m128d a, int b);
14184344399STimo Kreuzer extern __m128d _mm_cvtss_sd(__m128d a, __m128 b);
14284344399STimo Kreuzer extern __m128i _mm_cvttpd_epi32(__m128d a);
14384344399STimo Kreuzer extern int _mm_cvttsd_si32(__m128d a);
14484344399STimo Kreuzer extern __m64 _mm_cvtpd_pi32(__m128d a);
14584344399STimo Kreuzer extern __m64 _mm_cvttpd_pi32(__m128d a);
14684344399STimo Kreuzer extern __m128d _mm_cvtpi32_pd(__m64 a);
14784344399STimo Kreuzer extern double _mm_cvtsd_f64(__m128d a);
14884344399STimo Kreuzer extern __m128d _mm_load_pd(double const *dp);
14984344399STimo Kreuzer extern __m128d _mm_load1_pd(double const *dp);
15084344399STimo Kreuzer extern __m128d _mm_loadr_pd(double const *dp);
15184344399STimo Kreuzer extern __m128d _mm_loadu_pd(double const *dp);
15284344399STimo Kreuzer //extern __m128i _mm_loadu_si64(void const *a);
15384344399STimo Kreuzer //extern __m128i _mm_loadu_si32(void const *a);
15484344399STimo Kreuzer //extern __m128i _mm_loadu_si16(void const *a);
15584344399STimo Kreuzer extern __m128d _mm_load_sd(double const *dp);
15684344399STimo Kreuzer extern __m128d _mm_loadh_pd(__m128d a, double const *dp);
15784344399STimo Kreuzer extern __m128d _mm_loadl_pd(__m128d a, double const *dp);
15884344399STimo Kreuzer //extern __m128d _mm_undefined_pd(void);
15984344399STimo Kreuzer extern __m128d _mm_set_sd(double w);
16084344399STimo Kreuzer extern __m128d _mm_set1_pd(double w);
16184344399STimo Kreuzer extern __m128d _mm_set_pd(double w, double x);
16284344399STimo Kreuzer extern __m128d _mm_setr_pd(double w, double x);
16384344399STimo Kreuzer extern __m128d _mm_setzero_pd(void);
16484344399STimo Kreuzer extern __m128d _mm_move_sd(__m128d a, __m128d b);
16584344399STimo Kreuzer extern void _mm_store_sd(double *dp, __m128d a);
16684344399STimo Kreuzer extern void _mm_store_pd(double *dp, __m128d a);
16784344399STimo Kreuzer extern void _mm_store1_pd(double *dp, __m128d a);
16884344399STimo Kreuzer extern void _mm_storeu_pd(double *dp, __m128d a);
16984344399STimo Kreuzer extern void _mm_storer_pd(double *dp, __m128d a);
17084344399STimo Kreuzer extern void _mm_storeh_pd(double *dp, __m128d a);
17184344399STimo Kreuzer extern void _mm_storel_pd(double *dp, __m128d a);
17284344399STimo Kreuzer extern __m128i _mm_add_epi8(__m128i a, __m128i b);
17384344399STimo Kreuzer extern __m128i _mm_add_epi16(__m128i a, __m128i b);
17484344399STimo Kreuzer extern __m128i _mm_add_epi32(__m128i a, __m128i b);
17584344399STimo Kreuzer extern __m64 _mm_add_si64(__m64 a, __m64 b);
17684344399STimo Kreuzer extern __m128i _mm_add_epi64(__m128i a, __m128i b);
17784344399STimo Kreuzer extern __m128i _mm_adds_epi8(__m128i a, __m128i b);
17884344399STimo Kreuzer extern __m128i _mm_adds_epi16(__m128i a, __m128i b);
17984344399STimo Kreuzer extern __m128i _mm_adds_epu8(__m128i a, __m128i b);
18084344399STimo Kreuzer extern __m128i _mm_adds_epu16(__m128i a, __m128i b);
18184344399STimo Kreuzer extern __m128i _mm_avg_epu8(__m128i a, __m128i b);
18284344399STimo Kreuzer extern __m128i _mm_avg_epu16(__m128i a, __m128i b);
18384344399STimo Kreuzer extern __m128i _mm_madd_epi16(__m128i a, __m128i b);
18484344399STimo Kreuzer extern __m128i _mm_max_epi16(__m128i a, __m128i b);
18584344399STimo Kreuzer extern __m128i _mm_max_epu8(__m128i a, __m128i b);
18684344399STimo Kreuzer extern __m128i _mm_min_epi16(__m128i a, __m128i b);
18784344399STimo Kreuzer extern __m128i _mm_min_epu8(__m128i a, __m128i b);
18884344399STimo Kreuzer extern __m128i _mm_mulhi_epi16(__m128i a, __m128i b);
18984344399STimo Kreuzer extern __m128i _mm_mulhi_epu16(__m128i a, __m128i b);
19084344399STimo Kreuzer extern __m128i _mm_mullo_epi16(__m128i a, __m128i b);
19184344399STimo Kreuzer extern __m64 _mm_mul_su32(__m64 a, __m64 b);
19284344399STimo Kreuzer extern __m128i _mm_mul_epu32(__m128i a, __m128i b);
19384344399STimo Kreuzer extern __m128i _mm_sad_epu8(__m128i a, __m128i b);
19484344399STimo Kreuzer extern __m128i _mm_sub_epi8(__m128i a, __m128i b);
19584344399STimo Kreuzer extern __m128i _mm_sub_epi16(__m128i a, __m128i b);
19684344399STimo Kreuzer extern __m128i _mm_sub_epi32(__m128i a, __m128i b);
19784344399STimo Kreuzer extern __m64 _mm_sub_si64(__m64 a, __m64 b);
19884344399STimo Kreuzer extern __m128i _mm_sub_epi64(__m128i a, __m128i b);
19984344399STimo Kreuzer extern __m128i _mm_subs_epi8(__m128i a, __m128i b);
20084344399STimo Kreuzer extern __m128i _mm_subs_epi16(__m128i a, __m128i b);
20184344399STimo Kreuzer extern __m128i _mm_subs_epu8(__m128i a, __m128i b);
20284344399STimo Kreuzer extern __m128i _mm_subs_epu16(__m128i a, __m128i b);
20384344399STimo Kreuzer extern __m128i _mm_and_si128(__m128i a, __m128i b);
20484344399STimo Kreuzer extern __m128i _mm_andnot_si128(__m128i a, __m128i b);
20584344399STimo Kreuzer extern __m128i _mm_or_si128(__m128i a, __m128i b);
20684344399STimo Kreuzer extern __m128i _mm_xor_si128(__m128i a, __m128i b);
20784344399STimo Kreuzer extern __m128i _mm_slli_si128(__m128i a, int i);
20884344399STimo Kreuzer extern __m128i _mm_slli_epi16(__m128i a, int count);
20984344399STimo Kreuzer extern __m128i _mm_sll_epi16(__m128i a, __m128i count);
21084344399STimo Kreuzer extern __m128i _mm_slli_epi32(__m128i a, int count);
21184344399STimo Kreuzer extern __m128i _mm_sll_epi32(__m128i a, __m128i count);
21284344399STimo Kreuzer extern __m128i _mm_slli_epi64(__m128i a, int count);
21384344399STimo Kreuzer extern __m128i _mm_sll_epi64(__m128i a, __m128i count);
21484344399STimo Kreuzer extern __m128i _mm_srai_epi16(__m128i a, int count);
21584344399STimo Kreuzer extern __m128i _mm_sra_epi16(__m128i a, __m128i count);
21684344399STimo Kreuzer extern __m128i _mm_srai_epi32(__m128i a, int count);
21784344399STimo Kreuzer extern __m128i _mm_sra_epi32(__m128i a, __m128i count);
21884344399STimo Kreuzer extern __m128i _mm_srli_si128(__m128i a, int imm);
21984344399STimo Kreuzer extern __m128i _mm_srli_epi16(__m128i a, int count);
22084344399STimo Kreuzer extern __m128i _mm_srl_epi16(__m128i a, __m128i count);
22184344399STimo Kreuzer extern __m128i _mm_srli_epi32(__m128i a, int count);
22284344399STimo Kreuzer extern __m128i _mm_srl_epi32(__m128i a, __m128i count);
22384344399STimo Kreuzer extern __m128i _mm_srli_epi64(__m128i a, int count);
22484344399STimo Kreuzer extern __m128i _mm_srl_epi64(__m128i a, __m128i count);
22584344399STimo Kreuzer extern __m128i _mm_cmpeq_epi8(__m128i a, __m128i b);
22684344399STimo Kreuzer extern __m128i _mm_cmpeq_epi16(__m128i a, __m128i b);
22784344399STimo Kreuzer extern __m128i _mm_cmpeq_epi32(__m128i a, __m128i b);
22884344399STimo Kreuzer extern __m128i _mm_cmpgt_epi8(__m128i a, __m128i b);
22984344399STimo Kreuzer extern __m128i _mm_cmpgt_epi16(__m128i a, __m128i b);
23084344399STimo Kreuzer extern __m128i _mm_cmpgt_epi32(__m128i a, __m128i b);
23184344399STimo Kreuzer extern __m128i _mm_cmplt_epi8(__m128i a, __m128i b);
23284344399STimo Kreuzer extern __m128i _mm_cmplt_epi16(__m128i a, __m128i b);
23384344399STimo Kreuzer extern __m128i _mm_cmplt_epi32(__m128i a, __m128i b);
23484344399STimo Kreuzer #ifdef _M_AMD64
23584344399STimo Kreuzer extern __m128d _mm_cvtsi64_sd(__m128d a, long long b);
23684344399STimo Kreuzer extern long long _mm_cvtsd_si64(__m128d a);
23784344399STimo Kreuzer extern long long _mm_cvttsd_si64(__m128d a);
23884344399STimo Kreuzer #endif
23984344399STimo Kreuzer extern __m128 _mm_cvtepi32_ps(__m128i a);
24084344399STimo Kreuzer extern __m128i _mm_cvtps_epi32(__m128 a);
24184344399STimo Kreuzer extern __m128i _mm_cvttps_epi32(__m128 a);
24284344399STimo Kreuzer extern __m128i _mm_cvtsi32_si128(int a);
24384344399STimo Kreuzer #ifdef _M_AMD64
24484344399STimo Kreuzer extern __m128i _mm_cvtsi64_si128(long long a);
24584344399STimo Kreuzer #endif
24684344399STimo Kreuzer extern int _mm_cvtsi128_si32(__m128i a);
24784344399STimo Kreuzer #ifdef _M_AMD64
24884344399STimo Kreuzer extern long long _mm_cvtsi128_si64(__m128i a);
24984344399STimo Kreuzer #endif
25084344399STimo Kreuzer extern __m128i _mm_load_si128(__m128i const *p);
25184344399STimo Kreuzer extern __m128i _mm_loadu_si128(__m128i_u const *p);
25284344399STimo Kreuzer extern __m128i _mm_loadl_epi64(__m128i_u const *p);
25384344399STimo Kreuzer //extern __m128i _mm_undefined_si128(void);
25484344399STimo Kreuzer //extern __m128i _mm_set_epi64x(long long q1, long long q0); // FIXME
25584344399STimo Kreuzer extern __m128i _mm_set_epi64(__m64 q1, __m64 q0);
25684344399STimo Kreuzer //extern __m128i _mm_set_epi32(int i3, int i1, int i0);
25784344399STimo Kreuzer extern __m128i _mm_set_epi32(int i3, int i2, int i1, int i0);
25884344399STimo Kreuzer //extern __m128i _mm_set_epi16(short w7, short w2, short w1, short w0);
25984344399STimo Kreuzer extern __m128i _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0);
26084344399STimo Kreuzer //extern __m128i _mm_set_epi8(char b15, char b10, char b4, char b3, char b2, char b1, char b0);
26184344399STimo Kreuzer extern __m128i _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
26284344399STimo Kreuzer //extern __m128i _mm_set1_epi64x(long long q); // FIXME
26384344399STimo Kreuzer extern __m128i _mm_set1_epi64(__m64 q);
26484344399STimo Kreuzer extern __m128i _mm_set1_epi32(int i);
26584344399STimo Kreuzer extern __m128i _mm_set1_epi16(short w);
26684344399STimo Kreuzer extern __m128i _mm_set1_epi8(char b);
26784344399STimo Kreuzer extern __m128i _mm_setl_epi64(__m128i q); // FIXME: clang?
26884344399STimo Kreuzer extern __m128i _mm_setr_epi64(__m64 q0, __m64 q1);
26984344399STimo Kreuzer //extern __m128i _mm_setr_epi32(int i0, int i2, int i3);
27084344399STimo Kreuzer extern __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3);
27184344399STimo Kreuzer //extern __m128i _mm_setr_epi16(short w0, short w5, short w6, short w7);
27284344399STimo Kreuzer extern __m128i _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7);
27384344399STimo Kreuzer //extern __m128i _mm_setr_epi8(char b0, char b6, char b11, char b12, char b13, char b14, char b15);
27484344399STimo Kreuzer extern __m128i _mm_setr_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0);
27584344399STimo Kreuzer extern __m128i _mm_setzero_si128(void);
27684344399STimo Kreuzer extern void _mm_store_si128(__m128i *p, __m128i b);
27784344399STimo Kreuzer extern void _mm_storeu_si128(__m128i_u *p, __m128i b);
27884344399STimo Kreuzer //extern void _mm_storeu_si64(void *p, __m128i b);
27984344399STimo Kreuzer //extern void _mm_storeu_si32(void *p, __m128i b);
28084344399STimo Kreuzer //extern void _mm_storeu_si16(void *p, __m128i b);
28184344399STimo Kreuzer extern void _mm_maskmoveu_si128(__m128i d, __m128i n, _Out_writes_bytes_(16) char *p);
28284344399STimo Kreuzer extern void _mm_storel_epi64(__m128i_u *p, __m128i a);
28384344399STimo Kreuzer extern void _mm_stream_pd(double *p, __m128d a);
28484344399STimo Kreuzer extern void _mm_stream_si128(__m128i *p, __m128i a);
28584344399STimo Kreuzer extern void _mm_stream_si32(int *p, int a);
28684344399STimo Kreuzer extern void _mm_clflush(void const *p);
28784344399STimo Kreuzer extern void _mm_lfence(void);
28884344399STimo Kreuzer extern void _mm_mfence(void);
28984344399STimo Kreuzer extern __m128i _mm_packs_epi16(__m128i a, __m128i b);
29084344399STimo Kreuzer extern __m128i _mm_packs_epi32(__m128i a, __m128i b);
29184344399STimo Kreuzer extern __m128i _mm_packus_epi16(__m128i a, __m128i b);
29284344399STimo Kreuzer extern int _mm_extract_epi16(__m128i a, int imm);
29384344399STimo Kreuzer extern __m128i _mm_insert_epi16(__m128i a, int b, int imm);
29484344399STimo Kreuzer extern int _mm_movemask_epi8(__m128i a);
29584344399STimo Kreuzer extern __m128i _mm_shuffle_epi32(__m128i a, int imm);
29684344399STimo Kreuzer extern __m128i _mm_shufflelo_epi16(__m128i a, int imm);
29784344399STimo Kreuzer extern __m128i _mm_shufflehi_epi16(__m128i a, int imm);
29884344399STimo Kreuzer extern __m128i _mm_unpackhi_epi8(__m128i a, __m128i b);
29984344399STimo Kreuzer extern __m128i _mm_unpackhi_epi16(__m128i a, __m128i b);
30084344399STimo Kreuzer extern __m128i _mm_unpackhi_epi32(__m128i a, __m128i b);
30184344399STimo Kreuzer extern __m128i _mm_unpackhi_epi64(__m128i a, __m128i b);
30284344399STimo Kreuzer extern __m128i _mm_unpacklo_epi8(__m128i a, __m128i b);
30384344399STimo Kreuzer extern __m128i _mm_unpacklo_epi16(__m128i a, __m128i b);
30484344399STimo Kreuzer extern __m128i _mm_unpacklo_epi32(__m128i a, __m128i b);
30584344399STimo Kreuzer extern __m128i _mm_unpacklo_epi64(__m128i a, __m128i b);
30684344399STimo Kreuzer extern __m64 _mm_movepi64_pi64(__m128i a);
30784344399STimo Kreuzer extern __m128i _mm_movpi64_epi64(__m64 a);
30884344399STimo Kreuzer extern __m128i _mm_move_epi64(__m128i a);
30984344399STimo Kreuzer extern __m128d _mm_unpackhi_pd(__m128d a, __m128d b);
31084344399STimo Kreuzer extern __m128d _mm_unpacklo_pd(__m128d a, __m128d b);
31184344399STimo Kreuzer extern int _mm_movemask_pd(__m128d a);
31284344399STimo Kreuzer extern __m128d _mm_shuffle_pd(__m128d a, __m128d b, int imm);
31384344399STimo Kreuzer extern __m128 _mm_castpd_ps(__m128d a);
31484344399STimo Kreuzer extern __m128i _mm_castpd_si128(__m128d a);
31584344399STimo Kreuzer extern __m128d _mm_castps_pd(__m128 a);
31684344399STimo Kreuzer extern __m128i _mm_castps_si128(__m128 a);
31784344399STimo Kreuzer extern __m128 _mm_castsi128_ps(__m128i a);
31884344399STimo Kreuzer extern __m128d _mm_castsi128_pd(__m128i a);
31984344399STimo Kreuzer void _mm_pause(void);
32084344399STimo Kreuzer 
32184344399STimo Kreuzer /* Alternate names */
32284344399STimo Kreuzer #define _mm_set_pd1(a) _mm_set1_pd(a)
32384344399STimo Kreuzer #define _mm_load_pd1(p) _mm_load1_pd(p)
32484344399STimo Kreuzer #define _mm_store_pd1(p, a) _mm_store1_pd((p), (a))
32584344399STimo Kreuzer #define _mm_bslli_si128 _mm_slli_si128
32684344399STimo Kreuzer #define _mm_bsrli_si128 _mm_srli_si128
32784344399STimo Kreuzer #define _mm_stream_si64 _mm_stream_si64x
32884344399STimo Kreuzer 
32984344399STimo Kreuzer #if defined(_MSC_VER) && !defined(__clang__)
33084344399STimo Kreuzer 
33184344399STimo Kreuzer #pragma intrinsic(_mm_add_sd)
33284344399STimo Kreuzer #pragma intrinsic(_mm_add_pd)
33384344399STimo Kreuzer #pragma intrinsic(_mm_sub_sd)
33484344399STimo Kreuzer #pragma intrinsic(_mm_sub_pd)
33584344399STimo Kreuzer #pragma intrinsic(_mm_mul_sd)
33684344399STimo Kreuzer #pragma intrinsic(_mm_mul_pd)
33784344399STimo Kreuzer #pragma intrinsic(_mm_div_sd)
33884344399STimo Kreuzer #pragma intrinsic(_mm_div_pd)
33984344399STimo Kreuzer #pragma intrinsic(_mm_sqrt_sd)
34084344399STimo Kreuzer #pragma intrinsic(_mm_sqrt_pd)
34184344399STimo Kreuzer #pragma intrinsic(_mm_min_sd)
34284344399STimo Kreuzer #pragma intrinsic(_mm_min_pd)
34384344399STimo Kreuzer #pragma intrinsic(_mm_max_sd)
34484344399STimo Kreuzer #pragma intrinsic(_mm_max_pd)
34584344399STimo Kreuzer #pragma intrinsic(_mm_and_pd)
34684344399STimo Kreuzer #pragma intrinsic(_mm_andnot_pd)
34784344399STimo Kreuzer #pragma intrinsic(_mm_or_pd)
34884344399STimo Kreuzer #pragma intrinsic(_mm_xor_pd)
34984344399STimo Kreuzer #pragma intrinsic(_mm_cmpeq_pd)
35084344399STimo Kreuzer #pragma intrinsic(_mm_cmplt_pd)
35184344399STimo Kreuzer #pragma intrinsic(_mm_cmple_pd)
35284344399STimo Kreuzer #pragma intrinsic(_mm_cmpgt_pd)
35384344399STimo Kreuzer #pragma intrinsic(_mm_cmpge_pd)
35484344399STimo Kreuzer #pragma intrinsic(_mm_cmpord_pd)
35584344399STimo Kreuzer #pragma intrinsic(_mm_cmpunord_pd)
35684344399STimo Kreuzer #pragma intrinsic(_mm_cmpneq_pd)
35784344399STimo Kreuzer #pragma intrinsic(_mm_cmpnlt_pd)
35884344399STimo Kreuzer #pragma intrinsic(_mm_cmpnle_pd)
35984344399STimo Kreuzer #pragma intrinsic(_mm_cmpngt_pd)
36084344399STimo Kreuzer #pragma intrinsic(_mm_cmpnge_pd)
36184344399STimo Kreuzer #pragma intrinsic(_mm_cmpeq_sd)
36284344399STimo Kreuzer #pragma intrinsic(_mm_cmplt_sd)
36384344399STimo Kreuzer #pragma intrinsic(_mm_cmple_sd)
36484344399STimo Kreuzer #pragma intrinsic(_mm_cmpgt_sd)
36584344399STimo Kreuzer #pragma intrinsic(_mm_cmpge_sd)
36684344399STimo Kreuzer #pragma intrinsic(_mm_cmpord_sd)
36784344399STimo Kreuzer #pragma intrinsic(_mm_cmpunord_sd)
36884344399STimo Kreuzer #pragma intrinsic(_mm_cmpneq_sd)
36984344399STimo Kreuzer #pragma intrinsic(_mm_cmpnlt_sd)
37084344399STimo Kreuzer #pragma intrinsic(_mm_cmpnle_sd)
37184344399STimo Kreuzer #pragma intrinsic(_mm_cmpngt_sd)
37284344399STimo Kreuzer #pragma intrinsic(_mm_cmpnge_sd)
37384344399STimo Kreuzer #pragma intrinsic(_mm_comieq_sd)
37484344399STimo Kreuzer #pragma intrinsic(_mm_comilt_sd)
37584344399STimo Kreuzer #pragma intrinsic(_mm_comile_sd)
37684344399STimo Kreuzer #pragma intrinsic(_mm_comigt_sd)
37784344399STimo Kreuzer #pragma intrinsic(_mm_comige_sd)
37884344399STimo Kreuzer #pragma intrinsic(_mm_comineq_sd)
37984344399STimo Kreuzer #pragma intrinsic(_mm_ucomieq_sd)
38084344399STimo Kreuzer #pragma intrinsic(_mm_ucomilt_sd)
38184344399STimo Kreuzer #pragma intrinsic(_mm_ucomile_sd)
38284344399STimo Kreuzer #pragma intrinsic(_mm_ucomigt_sd)
38384344399STimo Kreuzer #pragma intrinsic(_mm_ucomige_sd)
38484344399STimo Kreuzer #pragma intrinsic(_mm_ucomineq_sd)
38584344399STimo Kreuzer #pragma intrinsic(_mm_cvtpd_ps)
38684344399STimo Kreuzer #pragma intrinsic(_mm_cvtps_pd)
38784344399STimo Kreuzer #pragma intrinsic(_mm_cvtepi32_pd)
38884344399STimo Kreuzer #pragma intrinsic(_mm_cvtpd_epi32)
38984344399STimo Kreuzer #pragma intrinsic(_mm_cvtsd_si32)
39084344399STimo Kreuzer #pragma intrinsic(_mm_cvtsd_ss)
39184344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi32_sd)
39284344399STimo Kreuzer #pragma intrinsic(_mm_cvtss_sd)
39384344399STimo Kreuzer #pragma intrinsic(_mm_cvttpd_epi32)
39484344399STimo Kreuzer #pragma intrinsic(_mm_cvttsd_si32)
39584344399STimo Kreuzer //#pragma intrinsic(_mm_cvtpd_pi32)
39684344399STimo Kreuzer //#pragma intrinsic(_mm_cvttpd_pi32)
39784344399STimo Kreuzer //#pragma intrinsic(_mm_cvtpi32_pd)
39884344399STimo Kreuzer #pragma intrinsic(_mm_cvtsd_f64)
39984344399STimo Kreuzer #pragma intrinsic(_mm_load_pd)
40084344399STimo Kreuzer #pragma intrinsic(_mm_load1_pd)
40184344399STimo Kreuzer #pragma intrinsic(_mm_loadr_pd)
40284344399STimo Kreuzer #pragma intrinsic(_mm_loadu_pd)
40384344399STimo Kreuzer //#pragma intrinsic(_mm_loadu_si64)
40484344399STimo Kreuzer //#pragma intrinsic(_mm_loadu_si32)
40584344399STimo Kreuzer //#pragma intrinsic(_mm_loadu_si16)
40684344399STimo Kreuzer #pragma intrinsic(_mm_load_sd)
40784344399STimo Kreuzer #pragma intrinsic(_mm_loadh_pd)
40884344399STimo Kreuzer #pragma intrinsic(_mm_loadl_pd)
40984344399STimo Kreuzer //#pragma intrinsic(_mm_undefined_pd)
41084344399STimo Kreuzer #pragma intrinsic(_mm_set_sd)
41184344399STimo Kreuzer #pragma intrinsic(_mm_set1_pd)
41284344399STimo Kreuzer #pragma intrinsic(_mm_set_pd)
41384344399STimo Kreuzer #pragma intrinsic(_mm_setr_pd)
41484344399STimo Kreuzer #pragma intrinsic(_mm_setzero_pd)
41584344399STimo Kreuzer #pragma intrinsic(_mm_move_sd)
41684344399STimo Kreuzer #pragma intrinsic(_mm_store_sd)
41784344399STimo Kreuzer #pragma intrinsic(_mm_store_pd)
41884344399STimo Kreuzer #pragma intrinsic(_mm_store1_pd)
41984344399STimo Kreuzer #pragma intrinsic(_mm_storeu_pd)
42084344399STimo Kreuzer #pragma intrinsic(_mm_storer_pd)
42184344399STimo Kreuzer #pragma intrinsic(_mm_storeh_pd)
42284344399STimo Kreuzer #pragma intrinsic(_mm_storel_pd)
42384344399STimo Kreuzer #pragma intrinsic(_mm_add_epi8)
42484344399STimo Kreuzer #pragma intrinsic(_mm_add_epi16)
42584344399STimo Kreuzer #pragma intrinsic(_mm_add_epi32)
42684344399STimo Kreuzer //#pragma intrinsic(_mm_add_si64)
42784344399STimo Kreuzer #pragma intrinsic(_mm_add_epi64)
42884344399STimo Kreuzer #pragma intrinsic(_mm_adds_epi8)
42984344399STimo Kreuzer #pragma intrinsic(_mm_adds_epi16)
43084344399STimo Kreuzer #pragma intrinsic(_mm_adds_epu8)
43184344399STimo Kreuzer #pragma intrinsic(_mm_adds_epu16)
43284344399STimo Kreuzer #pragma intrinsic(_mm_avg_epu8)
43384344399STimo Kreuzer #pragma intrinsic(_mm_avg_epu16)
43484344399STimo Kreuzer #pragma intrinsic(_mm_madd_epi16)
43584344399STimo Kreuzer #pragma intrinsic(_mm_max_epi16)
43684344399STimo Kreuzer #pragma intrinsic(_mm_max_epu8)
43784344399STimo Kreuzer #pragma intrinsic(_mm_min_epi16)
43884344399STimo Kreuzer #pragma intrinsic(_mm_min_epu8)
43984344399STimo Kreuzer #pragma intrinsic(_mm_mulhi_epi16)
44084344399STimo Kreuzer #pragma intrinsic(_mm_mulhi_epu16)
44184344399STimo Kreuzer #pragma intrinsic(_mm_mullo_epi16)
44284344399STimo Kreuzer //#pragma intrinsic(_mm_mul_su32)
44384344399STimo Kreuzer #pragma intrinsic(_mm_mul_epu32)
44484344399STimo Kreuzer #pragma intrinsic(_mm_sad_epu8)
44584344399STimo Kreuzer #pragma intrinsic(_mm_sub_epi8)
44684344399STimo Kreuzer #pragma intrinsic(_mm_sub_epi16)
44784344399STimo Kreuzer #pragma intrinsic(_mm_sub_epi32)
44884344399STimo Kreuzer //#pragma intrinsic(_mm_sub_si64)
44984344399STimo Kreuzer #pragma intrinsic(_mm_sub_epi64)
45084344399STimo Kreuzer #pragma intrinsic(_mm_subs_epi8)
45184344399STimo Kreuzer #pragma intrinsic(_mm_subs_epi16)
45284344399STimo Kreuzer #pragma intrinsic(_mm_subs_epu8)
45384344399STimo Kreuzer #pragma intrinsic(_mm_subs_epu16)
45484344399STimo Kreuzer #pragma intrinsic(_mm_and_si128)
45584344399STimo Kreuzer #pragma intrinsic(_mm_andnot_si128)
45684344399STimo Kreuzer #pragma intrinsic(_mm_or_si128)
45784344399STimo Kreuzer #pragma intrinsic(_mm_xor_si128)
45884344399STimo Kreuzer #pragma intrinsic(_mm_slli_si128)
45984344399STimo Kreuzer #pragma intrinsic(_mm_slli_epi16)
46084344399STimo Kreuzer #pragma intrinsic(_mm_sll_epi16)
46184344399STimo Kreuzer #pragma intrinsic(_mm_slli_epi32)
46284344399STimo Kreuzer #pragma intrinsic(_mm_sll_epi32)
46384344399STimo Kreuzer #pragma intrinsic(_mm_slli_epi64)
46484344399STimo Kreuzer #pragma intrinsic(_mm_sll_epi64)
46584344399STimo Kreuzer #pragma intrinsic(_mm_srai_epi16)
46684344399STimo Kreuzer #pragma intrinsic(_mm_sra_epi16)
46784344399STimo Kreuzer #pragma intrinsic(_mm_srai_epi32)
46884344399STimo Kreuzer #pragma intrinsic(_mm_sra_epi32)
46984344399STimo Kreuzer #pragma intrinsic(_mm_srli_si128)
47084344399STimo Kreuzer #pragma intrinsic(_mm_srli_epi16)
47184344399STimo Kreuzer #pragma intrinsic(_mm_srl_epi16)
47284344399STimo Kreuzer #pragma intrinsic(_mm_srli_epi32)
47384344399STimo Kreuzer #pragma intrinsic(_mm_srl_epi32)
47484344399STimo Kreuzer #pragma intrinsic(_mm_srli_epi64)
47584344399STimo Kreuzer #pragma intrinsic(_mm_srl_epi64)
47684344399STimo Kreuzer #pragma intrinsic(_mm_cmpeq_epi8)
47784344399STimo Kreuzer #pragma intrinsic(_mm_cmpeq_epi16)
47884344399STimo Kreuzer #pragma intrinsic(_mm_cmpeq_epi32)
47984344399STimo Kreuzer #pragma intrinsic(_mm_cmpgt_epi8)
48084344399STimo Kreuzer #pragma intrinsic(_mm_cmpgt_epi16)
48184344399STimo Kreuzer #pragma intrinsic(_mm_cmpgt_epi32)
48284344399STimo Kreuzer #pragma intrinsic(_mm_cmplt_epi8)
48384344399STimo Kreuzer #pragma intrinsic(_mm_cmplt_epi16)
48484344399STimo Kreuzer #pragma intrinsic(_mm_cmplt_epi32)
48584344399STimo Kreuzer #ifdef _M_AMD64
48684344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi64_sd)
48784344399STimo Kreuzer #pragma intrinsic(_mm_cvtsd_si64)
48884344399STimo Kreuzer #pragma intrinsic(_mm_cvttsd_si64)
48984344399STimo Kreuzer #endif
49084344399STimo Kreuzer #pragma intrinsic(_mm_cvtepi32_ps)
49184344399STimo Kreuzer #pragma intrinsic(_mm_cvtps_epi32)
49284344399STimo Kreuzer #pragma intrinsic(_mm_cvttps_epi32)
49384344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi32_si128)
49484344399STimo Kreuzer #ifdef _M_AMD64
49584344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi64_si128)
49684344399STimo Kreuzer #endif
49784344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi128_si32)
49884344399STimo Kreuzer #ifdef _M_AMD64
49984344399STimo Kreuzer #pragma intrinsic(_mm_cvtsi128_si64)
50084344399STimo Kreuzer #endif
50184344399STimo Kreuzer #pragma intrinsic(_mm_load_si128)
50284344399STimo Kreuzer #pragma intrinsic(_mm_loadu_si128)
50384344399STimo Kreuzer #pragma intrinsic(_mm_loadl_epi64)
50484344399STimo Kreuzer //#pragma intrinsic(_mm_undefined_si128)
505*a67f3688STimo Kreuzer //#pragma intrinsic(_mm_set_epi64x)
50684344399STimo Kreuzer //#pragma intrinsic(_mm_set_epi64)
50784344399STimo Kreuzer #pragma intrinsic(_mm_set_epi32)
50884344399STimo Kreuzer #pragma intrinsic(_mm_set_epi16)
50984344399STimo Kreuzer #pragma intrinsic(_mm_set_epi8)
510*a67f3688STimo Kreuzer //#pragma intrinsic(_mm_set1_epi64x)
51184344399STimo Kreuzer //#pragma intrinsic(_mm_set1_epi64)
51284344399STimo Kreuzer #pragma intrinsic(_mm_set1_epi32)
51384344399STimo Kreuzer #pragma intrinsic(_mm_set1_epi16)
51484344399STimo Kreuzer #pragma intrinsic(_mm_set1_epi8)
51584344399STimo Kreuzer #pragma intrinsic(_mm_setl_epi64)
51684344399STimo Kreuzer //#pragma intrinsic(_mm_setr_epi64)
51784344399STimo Kreuzer #pragma intrinsic(_mm_setr_epi32)
51884344399STimo Kreuzer #pragma intrinsic(_mm_setr_epi16)
51984344399STimo Kreuzer #pragma intrinsic(_mm_setr_epi8)
52084344399STimo Kreuzer #pragma intrinsic(_mm_setzero_si128)
52184344399STimo Kreuzer #pragma intrinsic(_mm_store_si128)
52284344399STimo Kreuzer #pragma intrinsic(_mm_storeu_si128)
52384344399STimo Kreuzer //#pragma intrinsic(_mm_storeu_si64)
52484344399STimo Kreuzer //#pragma intrinsic(_mm_storeu_si32)
52584344399STimo Kreuzer //#pragma intrinsic(_mm_storeu_si16)
52684344399STimo Kreuzer #pragma intrinsic(_mm_maskmoveu_si128)
52784344399STimo Kreuzer #pragma intrinsic(_mm_storel_epi64)
52884344399STimo Kreuzer #pragma intrinsic(_mm_stream_pd)
52984344399STimo Kreuzer #pragma intrinsic(_mm_stream_si128)
53084344399STimo Kreuzer #pragma intrinsic(_mm_stream_si32)
53184344399STimo Kreuzer #pragma intrinsic(_mm_clflush)
53284344399STimo Kreuzer #pragma intrinsic(_mm_lfence)
53384344399STimo Kreuzer #pragma intrinsic(_mm_mfence)
53484344399STimo Kreuzer #pragma intrinsic(_mm_packs_epi16)
53584344399STimo Kreuzer #pragma intrinsic(_mm_packs_epi32)
53684344399STimo Kreuzer #pragma intrinsic(_mm_packus_epi16)
53784344399STimo Kreuzer #pragma intrinsic(_mm_extract_epi16)
53884344399STimo Kreuzer #pragma intrinsic(_mm_insert_epi16)
53984344399STimo Kreuzer #pragma intrinsic(_mm_movemask_epi8)
54084344399STimo Kreuzer #pragma intrinsic(_mm_shuffle_epi32)
54184344399STimo Kreuzer #pragma intrinsic(_mm_shufflelo_epi16)
54284344399STimo Kreuzer #pragma intrinsic(_mm_shufflehi_epi16)
54384344399STimo Kreuzer #pragma intrinsic(_mm_unpackhi_epi8)
54484344399STimo Kreuzer #pragma intrinsic(_mm_unpackhi_epi16)
54584344399STimo Kreuzer #pragma intrinsic(_mm_unpackhi_epi32)
54684344399STimo Kreuzer #pragma intrinsic(_mm_unpackhi_epi64)
54784344399STimo Kreuzer #pragma intrinsic(_mm_unpacklo_epi8)
54884344399STimo Kreuzer #pragma intrinsic(_mm_unpacklo_epi16)
54984344399STimo Kreuzer #pragma intrinsic(_mm_unpacklo_epi32)
55084344399STimo Kreuzer #pragma intrinsic(_mm_unpacklo_epi64)
55184344399STimo Kreuzer //#pragma intrinsic(_mm_movepi64_pi64)
55284344399STimo Kreuzer //#pragma intrinsic(_mm_movpi64_epi64)
55384344399STimo Kreuzer #pragma intrinsic(_mm_move_epi64)
55484344399STimo Kreuzer #pragma intrinsic(_mm_unpackhi_pd)
55584344399STimo Kreuzer #pragma intrinsic(_mm_unpacklo_pd)
55684344399STimo Kreuzer #pragma intrinsic(_mm_movemask_pd)
55784344399STimo Kreuzer #pragma intrinsic(_mm_shuffle_pd)
55884344399STimo Kreuzer #pragma intrinsic(_mm_castpd_ps)
55984344399STimo Kreuzer #pragma intrinsic(_mm_castpd_si128)
56084344399STimo Kreuzer #pragma intrinsic(_mm_castps_pd)
56184344399STimo Kreuzer #pragma intrinsic(_mm_castps_si128)
56284344399STimo Kreuzer #pragma intrinsic(_mm_castsi128_ps)
56384344399STimo Kreuzer #pragma intrinsic(_mm_castsi128_pd)
56484344399STimo Kreuzer #pragma intrinsic(_mm_pause)
56584344399STimo Kreuzer 
56684344399STimo Kreuzer #else /* _MSC_VER */
56784344399STimo Kreuzer 
56884344399STimo Kreuzer /*
56984344399STimo Kreuzer   Clang: https://github.com/llvm/llvm-project/blob/main/clang/lib/Headers/emmintrin.h
57084344399STimo Kreuzer   Clang older version: https://github.com/llvm/llvm-project/blob/3ef88b31843e040c95f23ff2c3c206f1fa399c05/clang/lib/Headers/emmintrin.h
57184344399STimo Kreuzer   unikraft: https://github.com/unikraft/lib-intel-intrinsics/blob/staging/include/emmintrin.h
57284344399STimo Kreuzer */
57384344399STimo Kreuzer 
_mm_add_sd(__m128d a,__m128d b)57484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_add_sd(__m128d a, __m128d b)
57584344399STimo Kreuzer {
57684344399STimo Kreuzer     a[0] += b[0];
57784344399STimo Kreuzer     return a;
57884344399STimo Kreuzer }
57984344399STimo Kreuzer 
_mm_add_pd(__m128d a,__m128d b)58084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_add_pd(__m128d a, __m128d b)
58184344399STimo Kreuzer {
58284344399STimo Kreuzer     return (__m128d)((__v2df)a + (__v2df)b);
58384344399STimo Kreuzer }
58484344399STimo Kreuzer 
_mm_sub_sd(__m128d a,__m128d b)58584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_sub_sd(__m128d a, __m128d b)
58684344399STimo Kreuzer {
58784344399STimo Kreuzer     a[0] -= b[0];
58884344399STimo Kreuzer     return a;
58984344399STimo Kreuzer }
59084344399STimo Kreuzer 
_mm_sub_pd(__m128d a,__m128d b)59184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_sub_pd(__m128d a, __m128d b)
59284344399STimo Kreuzer {
59384344399STimo Kreuzer     return (__m128d)((__v2df)a - (__v2df)b);
59484344399STimo Kreuzer }
59584344399STimo Kreuzer 
_mm_mul_sd(__m128d a,__m128d b)59684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_mul_sd(__m128d a, __m128d b)
59784344399STimo Kreuzer {
59884344399STimo Kreuzer     a[0] *= b[0];
59984344399STimo Kreuzer     return a;
60084344399STimo Kreuzer }
60184344399STimo Kreuzer 
_mm_mul_pd(__m128d a,__m128d b)60284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_mul_pd(__m128d a, __m128d b)
60384344399STimo Kreuzer {
60484344399STimo Kreuzer     return (__m128d)((__v2df)a * (__v2df)b);
60584344399STimo Kreuzer }
60684344399STimo Kreuzer 
_mm_div_sd(__m128d a,__m128d b)60784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_div_sd(__m128d a, __m128d b)
60884344399STimo Kreuzer {
60984344399STimo Kreuzer     a[0] /= b[0];
61084344399STimo Kreuzer     return a;
61184344399STimo Kreuzer }
61284344399STimo Kreuzer 
_mm_div_pd(__m128d a,__m128d b)61384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_div_pd(__m128d a, __m128d b)
61484344399STimo Kreuzer {
61584344399STimo Kreuzer     return (__m128d)((__v2df)a / (__v2df)b);
61684344399STimo Kreuzer }
61784344399STimo Kreuzer 
_mm_sqrt_sd(__m128d a,__m128d b)61884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_sqrt_sd(__m128d a, __m128d b)
61984344399STimo Kreuzer {
62084344399STimo Kreuzer     __m128d __c = __builtin_ia32_sqrtsd((__v2df)b);
62184344399STimo Kreuzer     return __extension__(__m128d){__c[0], a[1]};
62284344399STimo Kreuzer }
62384344399STimo Kreuzer 
_mm_sqrt_pd(__m128d a)62484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_sqrt_pd(__m128d a)
62584344399STimo Kreuzer {
62684344399STimo Kreuzer     return __builtin_ia32_sqrtpd((__v2df)a);
62784344399STimo Kreuzer }
62884344399STimo Kreuzer 
_mm_min_sd(__m128d a,__m128d b)62984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_min_sd(__m128d a, __m128d b)
63084344399STimo Kreuzer {
63184344399STimo Kreuzer     return __builtin_ia32_minsd((__v2df)a, (__v2df)b);
63284344399STimo Kreuzer }
63384344399STimo Kreuzer 
_mm_min_pd(__m128d a,__m128d b)63484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_min_pd(__m128d a, __m128d b)
63584344399STimo Kreuzer {
63684344399STimo Kreuzer     return __builtin_ia32_minpd((__v2df)a, (__v2df)b);
63784344399STimo Kreuzer }
63884344399STimo Kreuzer 
_mm_max_sd(__m128d a,__m128d b)63984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_max_sd(__m128d a, __m128d b)
64084344399STimo Kreuzer {
64184344399STimo Kreuzer     return __builtin_ia32_maxsd((__v2df)a, (__v2df)b);
64284344399STimo Kreuzer }
64384344399STimo Kreuzer 
_mm_max_pd(__m128d a,__m128d b)64484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_max_pd(__m128d a, __m128d b)
64584344399STimo Kreuzer {
64684344399STimo Kreuzer     return __builtin_ia32_maxpd((__v2df)a, (__v2df)b);
64784344399STimo Kreuzer }
64884344399STimo Kreuzer 
_mm_and_pd(__m128d a,__m128d b)64984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_and_pd(__m128d a, __m128d b)
65084344399STimo Kreuzer {
65184344399STimo Kreuzer     return (__m128d)((__v2du)a & (__v2du)b);
65284344399STimo Kreuzer }
65384344399STimo Kreuzer 
_mm_andnot_pd(__m128d a,__m128d b)65484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_andnot_pd(__m128d a, __m128d b)
65584344399STimo Kreuzer {
65684344399STimo Kreuzer     return (__m128d)(~(__v2du)a & (__v2du)b);
65784344399STimo Kreuzer }
65884344399STimo Kreuzer 
_mm_or_pd(__m128d a,__m128d b)65984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_or_pd(__m128d a, __m128d b)
66084344399STimo Kreuzer {
66184344399STimo Kreuzer     return (__m128d)((__v2du)a | (__v2du)b);
66284344399STimo Kreuzer }
66384344399STimo Kreuzer 
_mm_xor_pd(__m128d a,__m128d b)66484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_xor_pd(__m128d a, __m128d b)
66584344399STimo Kreuzer {
66684344399STimo Kreuzer     return (__m128d)((__v2du)a ^ (__v2du)b);
66784344399STimo Kreuzer }
66884344399STimo Kreuzer 
_mm_cmpeq_pd(__m128d a,__m128d b)66984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_pd(__m128d a, __m128d b)
67084344399STimo Kreuzer {
67184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpeqpd((__v2df)a, (__v2df)b);
67284344399STimo Kreuzer }
67384344399STimo Kreuzer 
_mm_cmplt_pd(__m128d a,__m128d b)67484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmplt_pd(__m128d a, __m128d b)
67584344399STimo Kreuzer {
67684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpltpd((__v2df)a, (__v2df)b);
67784344399STimo Kreuzer }
67884344399STimo Kreuzer 
_mm_cmple_pd(__m128d a,__m128d b)67984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmple_pd(__m128d a, __m128d b)
68084344399STimo Kreuzer {
68184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmplepd((__v2df)a, (__v2df)b);
68284344399STimo Kreuzer }
68384344399STimo Kreuzer 
_mm_cmpgt_pd(__m128d a,__m128d b)68484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_pd(__m128d a, __m128d b)
68584344399STimo Kreuzer {
68684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpltpd((__v2df)b, (__v2df)a);
68784344399STimo Kreuzer }
68884344399STimo Kreuzer 
_mm_cmpge_pd(__m128d a,__m128d b)68984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpge_pd(__m128d a, __m128d b)
69084344399STimo Kreuzer {
69184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmplepd((__v2df)b, (__v2df)a);
69284344399STimo Kreuzer }
69384344399STimo Kreuzer 
_mm_cmpord_pd(__m128d a,__m128d b)69484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpord_pd(__m128d a, __m128d b)
69584344399STimo Kreuzer {
69684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpordpd((__v2df)a, (__v2df)b);
69784344399STimo Kreuzer }
69884344399STimo Kreuzer 
_mm_cmpunord_pd(__m128d a,__m128d b)69984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_pd(__m128d a, __m128d b)
70084344399STimo Kreuzer {
70184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpunordpd((__v2df)a, (__v2df)b);
70284344399STimo Kreuzer }
70384344399STimo Kreuzer 
_mm_cmpneq_pd(__m128d a,__m128d b)70484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_pd(__m128d a, __m128d b)
70584344399STimo Kreuzer {
70684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpneqpd((__v2df)a, (__v2df)b);
70784344399STimo Kreuzer }
70884344399STimo Kreuzer 
_mm_cmpnlt_pd(__m128d a,__m128d b)70984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_pd(__m128d a, __m128d b)
71084344399STimo Kreuzer {
71184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnltpd((__v2df)a, (__v2df)b);
71284344399STimo Kreuzer }
71384344399STimo Kreuzer 
_mm_cmpnle_pd(__m128d a,__m128d b)71484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_pd(__m128d a, __m128d b)
71584344399STimo Kreuzer {
71684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnlepd((__v2df)a, (__v2df)b);
71784344399STimo Kreuzer }
71884344399STimo Kreuzer 
_mm_cmpngt_pd(__m128d a,__m128d b)71984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_pd(__m128d a, __m128d b)
72084344399STimo Kreuzer {
72184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnltpd((__v2df)b, (__v2df)a);
72284344399STimo Kreuzer }
72384344399STimo Kreuzer 
_mm_cmpnge_pd(__m128d a,__m128d b)72484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_pd(__m128d a, __m128d b)
72584344399STimo Kreuzer {
72684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnlepd((__v2df)b, (__v2df)a);
72784344399STimo Kreuzer }
72884344399STimo Kreuzer 
_mm_cmpeq_sd(__m128d a,__m128d b)72984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpeq_sd(__m128d a, __m128d b)
73084344399STimo Kreuzer {
73184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpeqsd((__v2df)a, (__v2df)b);
73284344399STimo Kreuzer }
73384344399STimo Kreuzer 
_mm_cmplt_sd(__m128d a,__m128d b)73484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmplt_sd(__m128d a, __m128d b)
73584344399STimo Kreuzer {
73684344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpltsd((__v2df)a, (__v2df)b);
73784344399STimo Kreuzer }
73884344399STimo Kreuzer 
_mm_cmple_sd(__m128d a,__m128d b)73984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmple_sd(__m128d a, __m128d b)
74084344399STimo Kreuzer {
74184344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmplesd((__v2df)a, (__v2df)b);
74284344399STimo Kreuzer }
74384344399STimo Kreuzer 
_mm_cmpgt_sd(__m128d a,__m128d b)74484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpgt_sd(__m128d a, __m128d b)
74584344399STimo Kreuzer {
74684344399STimo Kreuzer     __m128d __c = __builtin_ia32_cmpltsd((__v2df)b, (__v2df)a);
74784344399STimo Kreuzer     return __extension__(__m128d){__c[0], a[1]};
74884344399STimo Kreuzer }
74984344399STimo Kreuzer 
_mm_cmpge_sd(__m128d a,__m128d b)75084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpge_sd(__m128d a, __m128d b)
75184344399STimo Kreuzer {
75284344399STimo Kreuzer     __m128d __c = __builtin_ia32_cmplesd((__v2df)b, (__v2df)a);
75384344399STimo Kreuzer     return __extension__(__m128d){__c[0], a[1]};
75484344399STimo Kreuzer }
75584344399STimo Kreuzer 
_mm_cmpord_sd(__m128d a,__m128d b)75684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpord_sd(__m128d a, __m128d b)
75784344399STimo Kreuzer {
75884344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpordsd((__v2df)a, (__v2df)b);
75984344399STimo Kreuzer }
76084344399STimo Kreuzer 
_mm_cmpunord_sd(__m128d a,__m128d b)76184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpunord_sd(__m128d a, __m128d b)
76284344399STimo Kreuzer {
76384344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpunordsd((__v2df)a, (__v2df)b);
76484344399STimo Kreuzer }
76584344399STimo Kreuzer 
_mm_cmpneq_sd(__m128d a,__m128d b)76684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpneq_sd(__m128d a, __m128d b)
76784344399STimo Kreuzer {
76884344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpneqsd((__v2df)a, (__v2df)b);
76984344399STimo Kreuzer }
77084344399STimo Kreuzer 
_mm_cmpnlt_sd(__m128d a,__m128d b)77184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnlt_sd(__m128d a, __m128d b)
77284344399STimo Kreuzer {
77384344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnltsd((__v2df)a, (__v2df)b);
77484344399STimo Kreuzer }
77584344399STimo Kreuzer 
_mm_cmpnle_sd(__m128d a,__m128d b)77684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnle_sd(__m128d a, __m128d b)
77784344399STimo Kreuzer {
77884344399STimo Kreuzer     return (__m128d)__builtin_ia32_cmpnlesd((__v2df)a, (__v2df)b);
77984344399STimo Kreuzer }
78084344399STimo Kreuzer 
_mm_cmpngt_sd(__m128d a,__m128d b)78184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpngt_sd(__m128d a, __m128d b)
78284344399STimo Kreuzer {
78384344399STimo Kreuzer     __m128d __c = __builtin_ia32_cmpnltsd((__v2df)b, (__v2df)a);
78484344399STimo Kreuzer     return __extension__(__m128d){__c[0], a[1]};
78584344399STimo Kreuzer }
78684344399STimo Kreuzer 
_mm_cmpnge_sd(__m128d a,__m128d b)78784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cmpnge_sd(__m128d a, __m128d b)
78884344399STimo Kreuzer {
78984344399STimo Kreuzer     __m128d __c = __builtin_ia32_cmpnlesd((__v2df)b, (__v2df)a);
79084344399STimo Kreuzer     return __extension__(__m128d){__c[0], a[1]};
79184344399STimo Kreuzer }
79284344399STimo Kreuzer 
_mm_comieq_sd(__m128d a,__m128d b)79384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comieq_sd(__m128d a, __m128d b)
79484344399STimo Kreuzer {
79584344399STimo Kreuzer     return __builtin_ia32_comisdeq((__v2df)a, (__v2df)b);
79684344399STimo Kreuzer }
79784344399STimo Kreuzer 
_mm_comilt_sd(__m128d a,__m128d b)79884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comilt_sd(__m128d a, __m128d b)
79984344399STimo Kreuzer {
80084344399STimo Kreuzer     return __builtin_ia32_comisdlt((__v2df)a, (__v2df)b);
80184344399STimo Kreuzer }
80284344399STimo Kreuzer 
_mm_comile_sd(__m128d a,__m128d b)80384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comile_sd(__m128d a, __m128d b)
80484344399STimo Kreuzer {
80584344399STimo Kreuzer     return __builtin_ia32_comisdle((__v2df)a, (__v2df)b);
80684344399STimo Kreuzer }
80784344399STimo Kreuzer 
_mm_comigt_sd(__m128d a,__m128d b)80884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comigt_sd(__m128d a, __m128d b)
80984344399STimo Kreuzer {
81084344399STimo Kreuzer     return __builtin_ia32_comisdgt((__v2df)a, (__v2df)b);
81184344399STimo Kreuzer }
81284344399STimo Kreuzer 
_mm_comige_sd(__m128d a,__m128d b)81384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comige_sd(__m128d a, __m128d b)
81484344399STimo Kreuzer {
81584344399STimo Kreuzer     return __builtin_ia32_comisdge((__v2df)a, (__v2df)b);
81684344399STimo Kreuzer }
81784344399STimo Kreuzer 
_mm_comineq_sd(__m128d a,__m128d b)81884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_comineq_sd(__m128d a, __m128d b)
81984344399STimo Kreuzer {
82084344399STimo Kreuzer     return __builtin_ia32_comisdneq((__v2df)a, (__v2df)b);
82184344399STimo Kreuzer }
82284344399STimo Kreuzer 
_mm_ucomieq_sd(__m128d a,__m128d b)82384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomieq_sd(__m128d a, __m128d b)
82484344399STimo Kreuzer {
82584344399STimo Kreuzer     return __builtin_ia32_ucomisdeq((__v2df)a, (__v2df)b);
82684344399STimo Kreuzer }
82784344399STimo Kreuzer 
_mm_ucomilt_sd(__m128d a,__m128d b)82884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomilt_sd(__m128d a, __m128d b)
82984344399STimo Kreuzer {
83084344399STimo Kreuzer     return __builtin_ia32_ucomisdlt((__v2df)a, (__v2df)b);
83184344399STimo Kreuzer }
83284344399STimo Kreuzer 
_mm_ucomile_sd(__m128d a,__m128d b)83384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomile_sd(__m128d a, __m128d b)
83484344399STimo Kreuzer {
83584344399STimo Kreuzer     return __builtin_ia32_ucomisdle((__v2df)a, (__v2df)b);
83684344399STimo Kreuzer }
83784344399STimo Kreuzer 
_mm_ucomigt_sd(__m128d a,__m128d b)83884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomigt_sd(__m128d a, __m128d b)
83984344399STimo Kreuzer {
84084344399STimo Kreuzer     return __builtin_ia32_ucomisdgt((__v2df)a, (__v2df)b);
84184344399STimo Kreuzer }
84284344399STimo Kreuzer 
_mm_ucomige_sd(__m128d a,__m128d b)84384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomige_sd(__m128d a, __m128d b)
84484344399STimo Kreuzer {
84584344399STimo Kreuzer     return __builtin_ia32_ucomisdge((__v2df)a, (__v2df)b);
84684344399STimo Kreuzer }
84784344399STimo Kreuzer 
_mm_ucomineq_sd(__m128d a,__m128d b)84884344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_ucomineq_sd(__m128d a, __m128d b)
84984344399STimo Kreuzer {
85084344399STimo Kreuzer     return __builtin_ia32_ucomisdneq((__v2df)a, (__v2df)b);
85184344399STimo Kreuzer }
85284344399STimo Kreuzer 
_mm_cvtpd_ps(__m128d a)85384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128 _mm_cvtpd_ps(__m128d a)
85484344399STimo Kreuzer {
85584344399STimo Kreuzer     return __builtin_ia32_cvtpd2ps((__v2df)a);
85684344399STimo Kreuzer }
85784344399STimo Kreuzer 
_mm_cvtps_pd(__m128 a)85884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cvtps_pd(__m128 a)
85984344399STimo Kreuzer {
86084344399STimo Kreuzer #if HAS_BUILTIN(__builtin_convertvector)
86184344399STimo Kreuzer     return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4sf)a, (__v4sf)a, 0, 1), __v2df);
86284344399STimo Kreuzer #else
86384344399STimo Kreuzer     return __builtin_ia32_cvtps2pd(a);
86484344399STimo Kreuzer #endif
86584344399STimo Kreuzer }
86684344399STimo Kreuzer 
_mm_cvtepi32_pd(__m128i a)86784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cvtepi32_pd(__m128i a)
86884344399STimo Kreuzer {
86984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_convertvector)
87084344399STimo Kreuzer     return (__m128d)__builtin_convertvector(__builtin_shufflevector((__v4si)a, (__v4si)a, 0, 1), __v2df);
87184344399STimo Kreuzer #else
87284344399STimo Kreuzer     return __builtin_ia32_cvtdq2pd((__v4si)a);
87384344399STimo Kreuzer #endif
87484344399STimo Kreuzer }
87584344399STimo Kreuzer 
_mm_cvtpd_epi32(__m128d a)87684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvtpd_epi32(__m128d a)
87784344399STimo Kreuzer {
87884344399STimo Kreuzer     return (__m128i)__builtin_ia32_cvtpd2dq((__v2df)a);
87984344399STimo Kreuzer }
88084344399STimo Kreuzer 
_mm_cvtsd_si32(__m128d a)88184344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_cvtsd_si32(__m128d a)
88284344399STimo Kreuzer {
88384344399STimo Kreuzer     return __builtin_ia32_cvtsd2si((__v2df)a);
88484344399STimo Kreuzer }
88584344399STimo Kreuzer 
_mm_cvtsd_ss(__m128 a,__m128d b)88684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128 _mm_cvtsd_ss(__m128 a, __m128d b)
88784344399STimo Kreuzer {
88884344399STimo Kreuzer     return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)a, (__v2df)b);
88984344399STimo Kreuzer }
89084344399STimo Kreuzer 
_mm_cvtsi32_sd(__m128d a,int b)89184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cvtsi32_sd(__m128d a,
89284344399STimo Kreuzer                                                               int b)
89384344399STimo Kreuzer {
89484344399STimo Kreuzer     a[0] = b;
89584344399STimo Kreuzer     return a;
89684344399STimo Kreuzer }
89784344399STimo Kreuzer 
_mm_cvtss_sd(__m128d a,__m128 b)89884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cvtss_sd(__m128d a, __m128 b)
89984344399STimo Kreuzer {
90084344399STimo Kreuzer     a[0] = b[0];
90184344399STimo Kreuzer     return a;
90284344399STimo Kreuzer }
90384344399STimo Kreuzer 
_mm_cvttpd_epi32(__m128d a)90484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvttpd_epi32(__m128d a)
90584344399STimo Kreuzer {
90684344399STimo Kreuzer     return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)a);
90784344399STimo Kreuzer }
90884344399STimo Kreuzer 
_mm_cvttsd_si32(__m128d a)90984344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_cvttsd_si32(__m128d a)
91084344399STimo Kreuzer {
91184344399STimo Kreuzer     return __builtin_ia32_cvttsd2si((__v2df)a);
91284344399STimo Kreuzer }
91384344399STimo Kreuzer 
_mm_cvtpd_pi32(__m128d a)914*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m64 _mm_cvtpd_pi32(__m128d a)
91584344399STimo Kreuzer {
91684344399STimo Kreuzer     return (__m64)__builtin_ia32_cvtpd2pi((__v2df)a);
91784344399STimo Kreuzer }
91884344399STimo Kreuzer 
_mm_cvttpd_pi32(__m128d a)919*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m64 _mm_cvttpd_pi32(__m128d a)
92084344399STimo Kreuzer {
92184344399STimo Kreuzer     return (__m64)__builtin_ia32_cvttpd2pi((__v2df)a);
92284344399STimo Kreuzer }
92384344399STimo Kreuzer 
_mm_cvtpi32_pd(__m64 a)924*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m128d _mm_cvtpi32_pd(__m64 a)
92584344399STimo Kreuzer {
92684344399STimo Kreuzer     return __builtin_ia32_cvtpi2pd((__v2si)a);
92784344399STimo Kreuzer }
92884344399STimo Kreuzer 
_mm_cvtsd_f64(__m128d a)92984344399STimo Kreuzer __INTRIN_INLINE_SSE2 double _mm_cvtsd_f64(__m128d a)
93084344399STimo Kreuzer {
93184344399STimo Kreuzer     return a[0];
93284344399STimo Kreuzer }
93384344399STimo Kreuzer 
_mm_load_pd(double const * dp)93484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_load_pd(double const *dp)
93584344399STimo Kreuzer {
93684344399STimo Kreuzer     return *(const __m128d *)dp;
93784344399STimo Kreuzer }
93884344399STimo Kreuzer 
_mm_load1_pd(double const * dp)93984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_load1_pd(double const *dp)
94084344399STimo Kreuzer {
94184344399STimo Kreuzer     struct __mm_load1_pd_struct {
94284344399STimo Kreuzer       double __u;
94384344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
94484344399STimo Kreuzer     double __u = ((const struct __mm_load1_pd_struct *)dp)->__u;
94584344399STimo Kreuzer     return __extension__(__m128d){__u, __u};
94684344399STimo Kreuzer }
94784344399STimo Kreuzer 
94884344399STimo Kreuzer // GCC:
94984344399STimo Kreuzer /* Create a selector for use with the SHUFPD instruction.  */
95084344399STimo Kreuzer #define _MM_SHUFFLE2(fp1,fp0) \
95184344399STimo Kreuzer  (((fp1) << 1) | (fp0))
95284344399STimo Kreuzer 
_mm_loadr_pd(double const * dp)95384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_loadr_pd(double const *dp)
95484344399STimo Kreuzer {
95584344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
95684344399STimo Kreuzer     __m128d u = *(const __m128d *)dp;
95784344399STimo Kreuzer     return __builtin_shufflevector((__v2df)u, (__v2df)u, 1, 0);
95884344399STimo Kreuzer #else
95984344399STimo Kreuzer     return (__m128d){ dp[1], dp[0] };
96084344399STimo Kreuzer #endif
96184344399STimo Kreuzer }
96284344399STimo Kreuzer 
_mm_loadu_pd(double const * dp)96384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_loadu_pd(double const *dp)
96484344399STimo Kreuzer {
96584344399STimo Kreuzer     struct __loadu_pd {
96684344399STimo Kreuzer       __m128d_u __v;
96784344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
96884344399STimo Kreuzer     return ((const struct __loadu_pd *)dp)->__v;
96984344399STimo Kreuzer }
97084344399STimo Kreuzer 
_mm_loadu_si64(void const * a)97184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_loadu_si64(void const *a)
97284344399STimo Kreuzer {
97384344399STimo Kreuzer     struct __loadu_si64 {
97484344399STimo Kreuzer       long long __v;
97584344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
97684344399STimo Kreuzer     long long __u = ((const struct __loadu_si64 *)a)->__v;
97784344399STimo Kreuzer     return __extension__(__m128i)(__v2di){__u, 0LL};
97884344399STimo Kreuzer }
97984344399STimo Kreuzer 
_mm_loadu_si32(void const * a)98084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_loadu_si32(void const *a)
98184344399STimo Kreuzer {
98284344399STimo Kreuzer     struct __loadu_si32 {
98384344399STimo Kreuzer       int __v;
98484344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
98584344399STimo Kreuzer     int __u = ((const struct __loadu_si32 *)a)->__v;
98684344399STimo Kreuzer     return __extension__(__m128i)(__v4si){__u, 0, 0, 0};
98784344399STimo Kreuzer }
98884344399STimo Kreuzer 
_mm_loadu_si16(void const * a)98984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_loadu_si16(void const *a)
99084344399STimo Kreuzer {
99184344399STimo Kreuzer     struct __loadu_si16 {
99284344399STimo Kreuzer       short __v;
99384344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
99484344399STimo Kreuzer     short __u = ((const struct __loadu_si16 *)a)->__v;
99584344399STimo Kreuzer     return __extension__(__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
99684344399STimo Kreuzer }
99784344399STimo Kreuzer 
_mm_load_sd(double const * dp)99884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_load_sd(double const *dp)
99984344399STimo Kreuzer {
100084344399STimo Kreuzer     struct __mm_load_sd_struct {
100184344399STimo Kreuzer       double __u;
100284344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
100384344399STimo Kreuzer     double __u = ((const struct __mm_load_sd_struct *)dp)->__u;
100484344399STimo Kreuzer     return __extension__(__m128d){__u, 0};
100584344399STimo Kreuzer }
100684344399STimo Kreuzer 
_mm_loadh_pd(__m128d a,double const * dp)100784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_loadh_pd(__m128d a, double const *dp)
100884344399STimo Kreuzer {
100984344399STimo Kreuzer     struct __mm_loadh_pd_struct {
101084344399STimo Kreuzer       double __u;
101184344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
101284344399STimo Kreuzer     double __u = ((const struct __mm_loadh_pd_struct *)dp)->__u;
101384344399STimo Kreuzer     return __extension__(__m128d){a[0], __u};
101484344399STimo Kreuzer }
101584344399STimo Kreuzer 
_mm_loadl_pd(__m128d a,double const * dp)101684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_loadl_pd(__m128d a, double const *dp)
101784344399STimo Kreuzer {
101884344399STimo Kreuzer     struct __mm_loadl_pd_struct {
101984344399STimo Kreuzer       double __u;
102084344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
102184344399STimo Kreuzer     double __u = ((const struct __mm_loadl_pd_struct *)dp)->__u;
102284344399STimo Kreuzer     return __extension__(__m128d){__u, a[1]};
102384344399STimo Kreuzer }
102484344399STimo Kreuzer 
_mm_undefined_pd(void)102584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_undefined_pd(void)
102684344399STimo Kreuzer {
102784344399STimo Kreuzer #if HAS_BUILTIN(__builtin_ia32_undef128)
102884344399STimo Kreuzer     return (__m128d)__builtin_ia32_undef128();
102984344399STimo Kreuzer #else
103084344399STimo Kreuzer     __m128d undef = undef;
103184344399STimo Kreuzer     return undef;
103284344399STimo Kreuzer #endif
103384344399STimo Kreuzer }
103484344399STimo Kreuzer 
_mm_set_sd(double w)103584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_set_sd(double w)
103684344399STimo Kreuzer {
103784344399STimo Kreuzer     return __extension__(__m128d){w, 0};
103884344399STimo Kreuzer }
103984344399STimo Kreuzer 
_mm_set1_pd(double w)104084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_set1_pd(double w)
104184344399STimo Kreuzer {
104284344399STimo Kreuzer     return __extension__(__m128d){w, w};
104384344399STimo Kreuzer }
104484344399STimo Kreuzer 
_mm_set_pd(double w,double x)104584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_set_pd(double w, double x)
104684344399STimo Kreuzer {
104784344399STimo Kreuzer     return __extension__(__m128d){x, w};
104884344399STimo Kreuzer }
104984344399STimo Kreuzer 
_mm_setr_pd(double w,double x)105084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_setr_pd(double w, double x)
105184344399STimo Kreuzer {
105284344399STimo Kreuzer     return __extension__(__m128d){w, x};
105384344399STimo Kreuzer }
105484344399STimo Kreuzer 
_mm_setzero_pd(void)105584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_setzero_pd(void)
105684344399STimo Kreuzer {
105784344399STimo Kreuzer     return __extension__(__m128d){0, 0};
105884344399STimo Kreuzer }
105984344399STimo Kreuzer 
_mm_move_sd(__m128d a,__m128d b)106084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_move_sd(__m128d a, __m128d b)
106184344399STimo Kreuzer {
106284344399STimo Kreuzer     a[0] = b[0];
106384344399STimo Kreuzer     return a;
106484344399STimo Kreuzer }
106584344399STimo Kreuzer 
_mm_store_sd(double * dp,__m128d a)106684344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_store_sd(double *dp, __m128d a)
106784344399STimo Kreuzer {
106884344399STimo Kreuzer     struct __mm_store_sd_struct {
106984344399STimo Kreuzer       double __u;
107084344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
107184344399STimo Kreuzer     ((struct __mm_store_sd_struct *)dp)->__u = a[0];
107284344399STimo Kreuzer }
107384344399STimo Kreuzer 
_mm_store_pd(double * dp,__m128d a)107484344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_store_pd(double *dp, __m128d a)
107584344399STimo Kreuzer {
107684344399STimo Kreuzer     *(__m128d *)dp = a;
107784344399STimo Kreuzer }
107884344399STimo Kreuzer 
_mm_store1_pd(double * dp,__m128d a)107984344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_store1_pd(double *dp, __m128d a)
108084344399STimo Kreuzer {
108184344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
108284344399STimo Kreuzer     a = __builtin_shufflevector((__v2df)a, (__v2df)a, 0, 0);
108384344399STimo Kreuzer     _mm_store_pd(dp, a);
108484344399STimo Kreuzer #else
108584344399STimo Kreuzer     dp[0] = a[0];
108684344399STimo Kreuzer     dp[1] = a[0];
108784344399STimo Kreuzer #endif
108884344399STimo Kreuzer }
108984344399STimo Kreuzer 
_mm_storeu_pd(double * dp,__m128d a)109084344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeu_pd(double *dp, __m128d a)
109184344399STimo Kreuzer {
109284344399STimo Kreuzer     struct __storeu_pd {
109384344399STimo Kreuzer       __m128d_u __v;
109484344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
109584344399STimo Kreuzer     ((struct __storeu_pd *)dp)->__v = a;
109684344399STimo Kreuzer }
109784344399STimo Kreuzer 
_mm_storer_pd(double * dp,__m128d a)109884344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storer_pd(double *dp, __m128d a)
109984344399STimo Kreuzer {
110084344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
110184344399STimo Kreuzer     a = __builtin_shufflevector((__v2df)a, (__v2df)a, 1, 0);
110284344399STimo Kreuzer     *(__m128d *)dp = a;
110384344399STimo Kreuzer #else
110484344399STimo Kreuzer     dp[0] = a[1];
110584344399STimo Kreuzer     dp[1] = a[0];
110684344399STimo Kreuzer #endif
110784344399STimo Kreuzer }
110884344399STimo Kreuzer 
_mm_storeh_pd(double * dp,__m128d a)110984344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeh_pd(double *dp, __m128d a)
111084344399STimo Kreuzer {
111184344399STimo Kreuzer     struct __mm_storeh_pd_struct {
111284344399STimo Kreuzer       double __u;
111384344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
111484344399STimo Kreuzer     ((struct __mm_storeh_pd_struct *)dp)->__u = a[1];
111584344399STimo Kreuzer }
111684344399STimo Kreuzer 
_mm_storel_pd(double * dp,__m128d a)111784344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storel_pd(double *dp, __m128d a)
111884344399STimo Kreuzer {
111984344399STimo Kreuzer     struct __mm_storeh_pd_struct {
112084344399STimo Kreuzer       double __u;
112184344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
112284344399STimo Kreuzer     ((struct __mm_storeh_pd_struct *)dp)->__u = a[0];
112384344399STimo Kreuzer }
112484344399STimo Kreuzer 
_mm_add_epi8(__m128i a,__m128i b)112584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_add_epi8(__m128i a, __m128i b)
112684344399STimo Kreuzer {
112784344399STimo Kreuzer     return (__m128i)((__v16qu)a + (__v16qu)b);
112884344399STimo Kreuzer }
112984344399STimo Kreuzer 
_mm_add_epi16(__m128i a,__m128i b)113084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_add_epi16(__m128i a, __m128i b)
113184344399STimo Kreuzer {
113284344399STimo Kreuzer     return (__m128i)((__v8hu)a + (__v8hu)b);
113384344399STimo Kreuzer }
113484344399STimo Kreuzer 
_mm_add_epi32(__m128i a,__m128i b)113584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_add_epi32(__m128i a, __m128i b)
113684344399STimo Kreuzer {
113784344399STimo Kreuzer     return (__m128i)((__v4su)a + (__v4su)b);
113884344399STimo Kreuzer }
113984344399STimo Kreuzer 
_mm_add_si64(__m64 a,__m64 b)1140*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m64 _mm_add_si64(__m64 a, __m64 b)
114184344399STimo Kreuzer {
114284344399STimo Kreuzer     return (__m64)__builtin_ia32_paddq((__v1di)a, (__v1di)b);
114384344399STimo Kreuzer }
114484344399STimo Kreuzer 
_mm_add_epi64(__m128i a,__m128i b)114584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_add_epi64(__m128i a, __m128i b)
114684344399STimo Kreuzer {
114784344399STimo Kreuzer     return (__m128i)((__v2du)a + (__v2du)b);
114884344399STimo Kreuzer }
114984344399STimo Kreuzer 
_mm_adds_epi8(__m128i a,__m128i b)115084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_adds_epi8(__m128i a, __m128i b)
115184344399STimo Kreuzer {
115284344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_add_sat)
115384344399STimo Kreuzer     return (__m128i)__builtin_elementwise_add_sat((__v16qs)a, (__v16qs)b);
115484344399STimo Kreuzer #else
115584344399STimo Kreuzer     return (__m128i)__builtin_ia32_paddsb128((__v16qi)a, (__v16qi)b);
115684344399STimo Kreuzer #endif
115784344399STimo Kreuzer }
115884344399STimo Kreuzer 
_mm_adds_epi16(__m128i a,__m128i b)115984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_adds_epi16(__m128i a, __m128i b)
116084344399STimo Kreuzer {
116184344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_add_sat)
116284344399STimo Kreuzer     return (__m128i)__builtin_elementwise_add_sat((__v8hi)a, (__v8hi)b);
116384344399STimo Kreuzer #else
116484344399STimo Kreuzer     return (__m128i)__builtin_ia32_paddsw128((__v8hi)a, (__v8hi)b);
116584344399STimo Kreuzer #endif
116684344399STimo Kreuzer }
116784344399STimo Kreuzer 
_mm_adds_epu8(__m128i a,__m128i b)116884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_adds_epu8(__m128i a, __m128i b)
116984344399STimo Kreuzer {
117084344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_add_sat)
117184344399STimo Kreuzer     return (__m128i)__builtin_elementwise_add_sat((__v16qu)a, (__v16qu)b);
117284344399STimo Kreuzer #else
117384344399STimo Kreuzer     return (__m128i)__builtin_ia32_paddusb128((__v16qi)a, (__v16qi)b);
117484344399STimo Kreuzer #endif
117584344399STimo Kreuzer }
117684344399STimo Kreuzer 
_mm_adds_epu16(__m128i a,__m128i b)117784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_adds_epu16(__m128i a, __m128i b)
117884344399STimo Kreuzer {
117984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_add_sat)
118084344399STimo Kreuzer     return (__m128i)__builtin_elementwise_add_sat((__v8hu)a, (__v8hu)b);
118184344399STimo Kreuzer #else
118284344399STimo Kreuzer     return (__m128i)__builtin_ia32_paddusw128((__v8hi)a, (__v8hi)b);
118384344399STimo Kreuzer #endif
118484344399STimo Kreuzer }
118584344399STimo Kreuzer 
_mm_avg_epu8(__m128i a,__m128i b)118684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_avg_epu8(__m128i a, __m128i b)
118784344399STimo Kreuzer {
118884344399STimo Kreuzer     return (__m128i)__builtin_ia32_pavgb128((__v16qi)a, (__v16qi)b);
118984344399STimo Kreuzer }
119084344399STimo Kreuzer 
_mm_avg_epu16(__m128i a,__m128i b)119184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_avg_epu16(__m128i a, __m128i b)
119284344399STimo Kreuzer {
119384344399STimo Kreuzer     return (__m128i)__builtin_ia32_pavgw128((__v8hi)a, (__v8hi)b);
119484344399STimo Kreuzer }
119584344399STimo Kreuzer 
_mm_madd_epi16(__m128i a,__m128i b)119684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_madd_epi16(__m128i a, __m128i b)
119784344399STimo Kreuzer {
119884344399STimo Kreuzer     return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)a, (__v8hi)b);
119984344399STimo Kreuzer }
120084344399STimo Kreuzer 
_mm_max_epi16(__m128i a,__m128i b)120184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_max_epi16(__m128i a, __m128i b)
120284344399STimo Kreuzer {
120384344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_max)
120484344399STimo Kreuzer     return (__m128i)__builtin_elementwise_max((__v8hi)a, (__v8hi)b);
120584344399STimo Kreuzer #else
120684344399STimo Kreuzer     return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)a, (__v8hi)b);
120784344399STimo Kreuzer #endif
120884344399STimo Kreuzer }
120984344399STimo Kreuzer 
_mm_max_epu8(__m128i a,__m128i b)121084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_max_epu8(__m128i a, __m128i b)
121184344399STimo Kreuzer {
121284344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_max)
121384344399STimo Kreuzer     return (__m128i)__builtin_elementwise_max((__v16qu)a, (__v16qu)b);
121484344399STimo Kreuzer #else
121584344399STimo Kreuzer     return (__m128i)__builtin_ia32_pmaxub128((__v16qi)a, (__v16qi)b);
121684344399STimo Kreuzer #endif
121784344399STimo Kreuzer }
121884344399STimo Kreuzer 
_mm_min_epi16(__m128i a,__m128i b)121984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_min_epi16(__m128i a, __m128i b)
122084344399STimo Kreuzer {
122184344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_min)
122284344399STimo Kreuzer     return (__m128i)__builtin_elementwise_min((__v8hi)a, (__v8hi)b);
122384344399STimo Kreuzer #else
122484344399STimo Kreuzer     return (__m128i)__builtin_ia32_pminsw128((__v8hi)a, (__v8hi)b);
122584344399STimo Kreuzer #endif
122684344399STimo Kreuzer }
122784344399STimo Kreuzer 
_mm_min_epu8(__m128i a,__m128i b)122884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_min_epu8(__m128i a, __m128i b)
122984344399STimo Kreuzer {
123084344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_min)
123184344399STimo Kreuzer     return (__m128i)__builtin_elementwise_min((__v16qu)a, (__v16qu)b);
123284344399STimo Kreuzer #else
123384344399STimo Kreuzer     return (__m128i)__builtin_ia32_pminub128((__v16qi)a, (__v16qi)b);
123484344399STimo Kreuzer #endif
123584344399STimo Kreuzer }
123684344399STimo Kreuzer 
_mm_mulhi_epi16(__m128i a,__m128i b)123784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epi16(__m128i a, __m128i b)
123884344399STimo Kreuzer {
123984344399STimo Kreuzer     return (__m128i)__builtin_ia32_pmulhw128((__v8hi)a, (__v8hi)b);
124084344399STimo Kreuzer }
124184344399STimo Kreuzer 
_mm_mulhi_epu16(__m128i a,__m128i b)124284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_mulhi_epu16(__m128i a, __m128i b)
124384344399STimo Kreuzer {
124484344399STimo Kreuzer     return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)a, (__v8hi)b);
124584344399STimo Kreuzer }
124684344399STimo Kreuzer 
_mm_mullo_epi16(__m128i a,__m128i b)124784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_mullo_epi16(__m128i a, __m128i b)
124884344399STimo Kreuzer {
124984344399STimo Kreuzer     return (__m128i)((__v8hu)a * (__v8hu)b);
125084344399STimo Kreuzer }
125184344399STimo Kreuzer 
_mm_mul_su32(__m64 a,__m64 b)1252*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m64 _mm_mul_su32(__m64 a, __m64 b)
125384344399STimo Kreuzer {
125484344399STimo Kreuzer     return (__m64)__builtin_ia32_pmuludq((__v2si)a, (__v2si)b);
125584344399STimo Kreuzer }
125684344399STimo Kreuzer 
_mm_mul_epu32(__m128i a,__m128i b)125784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_mul_epu32(__m128i a, __m128i b)
125884344399STimo Kreuzer {
125984344399STimo Kreuzer     return __builtin_ia32_pmuludq128((__v4si)a, (__v4si)b);
126084344399STimo Kreuzer }
126184344399STimo Kreuzer 
_mm_sad_epu8(__m128i a,__m128i b)126284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sad_epu8(__m128i a, __m128i b)
126384344399STimo Kreuzer {
126484344399STimo Kreuzer     return __builtin_ia32_psadbw128((__v16qi)a, (__v16qi)b);
126584344399STimo Kreuzer }
126684344399STimo Kreuzer 
_mm_sub_epi8(__m128i a,__m128i b)126784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi8(__m128i a, __m128i b)
126884344399STimo Kreuzer {
126984344399STimo Kreuzer     return (__m128i)((__v16qu)a - (__v16qu)b);
127084344399STimo Kreuzer }
127184344399STimo Kreuzer 
_mm_sub_epi16(__m128i a,__m128i b)127284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi16(__m128i a, __m128i b)
127384344399STimo Kreuzer {
127484344399STimo Kreuzer     return (__m128i)((__v8hu)a - (__v8hu)b);
127584344399STimo Kreuzer }
127684344399STimo Kreuzer 
_mm_sub_epi32(__m128i a,__m128i b)127784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi32(__m128i a, __m128i b)
127884344399STimo Kreuzer {
127984344399STimo Kreuzer     return (__m128i)((__v4su)a - (__v4su)b);
128084344399STimo Kreuzer }
128184344399STimo Kreuzer 
_mm_sub_si64(__m64 a,__m64 b)1282*a67f3688STimo Kreuzer __INTRIN_INLINE_MMXSSE2 __m64 _mm_sub_si64(__m64 a, __m64 b)
128384344399STimo Kreuzer {
128484344399STimo Kreuzer     return (__m64)__builtin_ia32_psubq((__v1di)a, (__v1di)b);
128584344399STimo Kreuzer }
128684344399STimo Kreuzer 
_mm_sub_epi64(__m128i a,__m128i b)128784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sub_epi64(__m128i a, __m128i b)
128884344399STimo Kreuzer {
128984344399STimo Kreuzer     return (__m128i)((__v2du)a - (__v2du)b);
129084344399STimo Kreuzer }
129184344399STimo Kreuzer 
_mm_subs_epi8(__m128i a,__m128i b)129284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_subs_epi8(__m128i a, __m128i b)
129384344399STimo Kreuzer {
129484344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
129584344399STimo Kreuzer     return (__m128i)__builtin_elementwise_sub_sat((__v16qs)a, (__v16qs)b);
129684344399STimo Kreuzer #else
129784344399STimo Kreuzer     return (__m128i)__builtin_ia32_psubsb128((__v16qi)a, (__v16qi)b);
129884344399STimo Kreuzer #endif
129984344399STimo Kreuzer }
130084344399STimo Kreuzer 
_mm_subs_epi16(__m128i a,__m128i b)130184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_subs_epi16(__m128i a, __m128i b)
130284344399STimo Kreuzer {
130384344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
130484344399STimo Kreuzer     return (__m128i)__builtin_elementwise_sub_sat((__v8hi)a, (__v8hi)b);
130584344399STimo Kreuzer #else
130684344399STimo Kreuzer     return (__m128i)__builtin_ia32_psubsw128((__v8hi)a, (__v8hi)b);
130784344399STimo Kreuzer #endif
130884344399STimo Kreuzer }
130984344399STimo Kreuzer 
_mm_subs_epu8(__m128i a,__m128i b)131084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_subs_epu8(__m128i a, __m128i b)
131184344399STimo Kreuzer {
131284344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
131384344399STimo Kreuzer     return (__m128i)__builtin_elementwise_sub_sat((__v16qu)a, (__v16qu)b);
131484344399STimo Kreuzer #else
131584344399STimo Kreuzer     return (__m128i)__builtin_ia32_psubusb128((__v16qi)a, (__v16qi)b);
131684344399STimo Kreuzer #endif
131784344399STimo Kreuzer }
131884344399STimo Kreuzer 
_mm_subs_epu16(__m128i a,__m128i b)131984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_subs_epu16(__m128i a, __m128i b)
132084344399STimo Kreuzer {
132184344399STimo Kreuzer #if HAS_BUILTIN(__builtin_elementwise_sub_sat)
132284344399STimo Kreuzer     return (__m128i)__builtin_elementwise_sub_sat((__v8hu)a, (__v8hu)b);
132384344399STimo Kreuzer #else
132484344399STimo Kreuzer     return (__m128i)__builtin_ia32_psubusw128((__v8hi)a, (__v8hi)b);
132584344399STimo Kreuzer #endif
132684344399STimo Kreuzer }
132784344399STimo Kreuzer 
_mm_and_si128(__m128i a,__m128i b)132884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_and_si128(__m128i a, __m128i b)
132984344399STimo Kreuzer {
133084344399STimo Kreuzer     return (__m128i)((__v2du)a & (__v2du)b);
133184344399STimo Kreuzer }
133284344399STimo Kreuzer 
_mm_andnot_si128(__m128i a,__m128i b)133384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_andnot_si128(__m128i a, __m128i b)
133484344399STimo Kreuzer {
133584344399STimo Kreuzer     return (__m128i)(~(__v2du)a & (__v2du)b);
133684344399STimo Kreuzer }
133784344399STimo Kreuzer 
_mm_or_si128(__m128i a,__m128i b)133884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_or_si128(__m128i a, __m128i b)
133984344399STimo Kreuzer {
134084344399STimo Kreuzer     return (__m128i)((__v2du)a | (__v2du)b);
134184344399STimo Kreuzer }
134284344399STimo Kreuzer 
_mm_xor_si128(__m128i a,__m128i b)134384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_xor_si128(__m128i a, __m128i b)
134484344399STimo Kreuzer {
134584344399STimo Kreuzer     return (__m128i)((__v2du)a ^ (__v2du)b);
134684344399STimo Kreuzer }
134784344399STimo Kreuzer 
134884344399STimo Kreuzer #define _mm_slli_si128(a, imm) \
134984344399STimo Kreuzer     ((__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
135084344399STimo Kreuzer 
_mm_slli_epi16(__m128i a,int count)135184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_slli_epi16(__m128i a, int count)
135284344399STimo Kreuzer {
135384344399STimo Kreuzer     return (__m128i)__builtin_ia32_psllwi128((__v8hi)a, count);
135484344399STimo Kreuzer }
135584344399STimo Kreuzer 
_mm_sll_epi16(__m128i a,__m128i count)135684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi16(__m128i a, __m128i count)
135784344399STimo Kreuzer {
135884344399STimo Kreuzer     return (__m128i)__builtin_ia32_psllw128((__v8hi)a, (__v8hi)count);
135984344399STimo Kreuzer }
136084344399STimo Kreuzer 
_mm_slli_epi32(__m128i a,int count)136184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_slli_epi32(__m128i a, int count)
136284344399STimo Kreuzer {
136384344399STimo Kreuzer     return (__m128i)__builtin_ia32_pslldi128((__v4si)a, count);
136484344399STimo Kreuzer }
136584344399STimo Kreuzer 
_mm_sll_epi32(__m128i a,__m128i count)136684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi32(__m128i a, __m128i count)
136784344399STimo Kreuzer {
136884344399STimo Kreuzer     return (__m128i)__builtin_ia32_pslld128((__v4si)a, (__v4si)count);
136984344399STimo Kreuzer }
137084344399STimo Kreuzer 
_mm_slli_epi64(__m128i a,int count)137184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_slli_epi64(__m128i a, int count)
137284344399STimo Kreuzer {
137384344399STimo Kreuzer     return __builtin_ia32_psllqi128((__v2di)a, count);
137484344399STimo Kreuzer }
137584344399STimo Kreuzer 
_mm_sll_epi64(__m128i a,__m128i count)137684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sll_epi64(__m128i a, __m128i count)
137784344399STimo Kreuzer {
137884344399STimo Kreuzer     return __builtin_ia32_psllq128((__v2di)a, (__v2di)count);
137984344399STimo Kreuzer }
138084344399STimo Kreuzer 
_mm_srai_epi16(__m128i a,int count)138184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srai_epi16(__m128i a, int count)
138284344399STimo Kreuzer {
138384344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrawi128((__v8hi)a, count);
138484344399STimo Kreuzer }
138584344399STimo Kreuzer 
_mm_sra_epi16(__m128i a,__m128i count)138684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sra_epi16(__m128i a, __m128i count)
138784344399STimo Kreuzer {
138884344399STimo Kreuzer     return (__m128i)__builtin_ia32_psraw128((__v8hi)a, (__v8hi)count);
138984344399STimo Kreuzer }
139084344399STimo Kreuzer 
_mm_srai_epi32(__m128i a,int count)139184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srai_epi32(__m128i a, int count)
139284344399STimo Kreuzer {
139384344399STimo Kreuzer     return (__m128i)__builtin_ia32_psradi128((__v4si)a, count);
139484344399STimo Kreuzer }
139584344399STimo Kreuzer 
_mm_sra_epi32(__m128i a,__m128i count)139684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_sra_epi32(__m128i a, __m128i count)
139784344399STimo Kreuzer {
139884344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrad128((__v4si)a, (__v4si)count);
139984344399STimo Kreuzer }
140084344399STimo Kreuzer 
140184344399STimo Kreuzer #define _mm_srli_si128(a, imm) \
140284344399STimo Kreuzer     ((__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm)))
140384344399STimo Kreuzer 
_mm_srli_epi16(__m128i a,int count)140484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srli_epi16(__m128i a, int count)
140584344399STimo Kreuzer {
140684344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrlwi128((__v8hi)a, count);
140784344399STimo Kreuzer }
140884344399STimo Kreuzer 
_mm_srl_epi16(__m128i a,__m128i count)140984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi16(__m128i a, __m128i count)
141084344399STimo Kreuzer {
141184344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrlw128((__v8hi)a, (__v8hi)count);
141284344399STimo Kreuzer }
141384344399STimo Kreuzer 
_mm_srli_epi32(__m128i a,int count)141484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srli_epi32(__m128i a, int count)
141584344399STimo Kreuzer {
141684344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrldi128((__v4si)a, count);
141784344399STimo Kreuzer }
141884344399STimo Kreuzer 
_mm_srl_epi32(__m128i a,__m128i count)141984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi32(__m128i a, __m128i count)
142084344399STimo Kreuzer {
142184344399STimo Kreuzer     return (__m128i)__builtin_ia32_psrld128((__v4si)a, (__v4si)count);
142284344399STimo Kreuzer }
142384344399STimo Kreuzer 
_mm_srli_epi64(__m128i a,int count)142484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srli_epi64(__m128i a, int count)
142584344399STimo Kreuzer {
142684344399STimo Kreuzer     return __builtin_ia32_psrlqi128((__v2di)a, count);
142784344399STimo Kreuzer }
142884344399STimo Kreuzer 
_mm_srl_epi64(__m128i a,__m128i count)142984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_srl_epi64(__m128i a, __m128i count)
143084344399STimo Kreuzer {
143184344399STimo Kreuzer     return __builtin_ia32_psrlq128((__v2di)a, (__v2di)count);
143284344399STimo Kreuzer }
143384344399STimo Kreuzer 
_mm_cmpeq_epi8(__m128i a,__m128i b)143484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi8(__m128i a, __m128i b)
143584344399STimo Kreuzer {
143684344399STimo Kreuzer     return (__m128i)((__v16qi)a == (__v16qi)b);
143784344399STimo Kreuzer }
143884344399STimo Kreuzer 
_mm_cmpeq_epi16(__m128i a,__m128i b)143984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi16(__m128i a, __m128i b)
144084344399STimo Kreuzer {
144184344399STimo Kreuzer     return (__m128i)((__v8hi)a == (__v8hi)b);
144284344399STimo Kreuzer }
144384344399STimo Kreuzer 
_mm_cmpeq_epi32(__m128i a,__m128i b)144484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpeq_epi32(__m128i a, __m128i b)
144584344399STimo Kreuzer {
144684344399STimo Kreuzer     return (__m128i)((__v4si)a == (__v4si)b);
144784344399STimo Kreuzer }
144884344399STimo Kreuzer 
_mm_cmpgt_epi8(__m128i a,__m128i b)144984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi8(__m128i a, __m128i b)
145084344399STimo Kreuzer {
145184344399STimo Kreuzer     /* This function always performs a signed comparison, but __v16qi is a char
145284344399STimo Kreuzer        which may be signed or unsigned, so use __v16qs. */
145384344399STimo Kreuzer     return (__m128i)((__v16qs)a > (__v16qs)b);
145484344399STimo Kreuzer }
145584344399STimo Kreuzer 
_mm_cmpgt_epi16(__m128i a,__m128i b)145684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi16(__m128i a, __m128i b)
145784344399STimo Kreuzer {
145884344399STimo Kreuzer     return (__m128i)((__v8hi)a > (__v8hi)b);
145984344399STimo Kreuzer }
146084344399STimo Kreuzer 
_mm_cmpgt_epi32(__m128i a,__m128i b)146184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmpgt_epi32(__m128i a, __m128i b)
146284344399STimo Kreuzer {
146384344399STimo Kreuzer     return (__m128i)((__v4si)a > (__v4si)b);
146484344399STimo Kreuzer }
146584344399STimo Kreuzer 
_mm_cmplt_epi8(__m128i a,__m128i b)146684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi8(__m128i a, __m128i b)
146784344399STimo Kreuzer {
146884344399STimo Kreuzer     return _mm_cmpgt_epi8(b, a);
146984344399STimo Kreuzer }
147084344399STimo Kreuzer 
_mm_cmplt_epi16(__m128i a,__m128i b)147184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi16(__m128i a, __m128i b)
147284344399STimo Kreuzer {
147384344399STimo Kreuzer     return _mm_cmpgt_epi16(b, a);
147484344399STimo Kreuzer }
147584344399STimo Kreuzer 
_mm_cmplt_epi32(__m128i a,__m128i b)147684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cmplt_epi32(__m128i a, __m128i b)
147784344399STimo Kreuzer {
147884344399STimo Kreuzer     return _mm_cmpgt_epi32(b, a);
147984344399STimo Kreuzer }
148084344399STimo Kreuzer 
148184344399STimo Kreuzer #ifdef _M_AMD64
148284344399STimo Kreuzer 
_mm_cvtsi64_sd(__m128d a,long long b)148384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_cvtsi64_sd(__m128d a, long long b)
148484344399STimo Kreuzer {
148584344399STimo Kreuzer     a[0] = b;
148684344399STimo Kreuzer     return a;
148784344399STimo Kreuzer }
148884344399STimo Kreuzer 
_mm_cvtsd_si64(__m128d a)148984344399STimo Kreuzer __INTRIN_INLINE_SSE2 long long _mm_cvtsd_si64(__m128d a)
149084344399STimo Kreuzer {
149184344399STimo Kreuzer     return __builtin_ia32_cvtsd2si64((__v2df)a);
149284344399STimo Kreuzer }
149384344399STimo Kreuzer 
_mm_cvttsd_si64(__m128d a)149484344399STimo Kreuzer __INTRIN_INLINE_SSE2 long long _mm_cvttsd_si64(__m128d a)
149584344399STimo Kreuzer {
149684344399STimo Kreuzer     return __builtin_ia32_cvttsd2si64((__v2df)a);
149784344399STimo Kreuzer }
149884344399STimo Kreuzer #endif
149984344399STimo Kreuzer 
_mm_cvtepi32_ps(__m128i a)150084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128 _mm_cvtepi32_ps(__m128i a)
150184344399STimo Kreuzer {
150284344399STimo Kreuzer #if HAS_BUILTIN(__builtin_convertvector)
150384344399STimo Kreuzer     return (__m128)__builtin_convertvector((__v4si)a, __v4sf);
150484344399STimo Kreuzer #else
150584344399STimo Kreuzer     return __builtin_ia32_cvtdq2ps((__v4si)a);
150684344399STimo Kreuzer #endif
150784344399STimo Kreuzer }
150884344399STimo Kreuzer 
_mm_cvtps_epi32(__m128 a)150984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvtps_epi32(__m128 a)
151084344399STimo Kreuzer {
151184344399STimo Kreuzer     return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)a);
151284344399STimo Kreuzer }
151384344399STimo Kreuzer 
_mm_cvttps_epi32(__m128 a)151484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvttps_epi32(__m128 a)
151584344399STimo Kreuzer {
151684344399STimo Kreuzer     return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)a);
151784344399STimo Kreuzer }
151884344399STimo Kreuzer 
_mm_cvtsi32_si128(int a)151984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvtsi32_si128(int a)
152084344399STimo Kreuzer {
152184344399STimo Kreuzer     return __extension__(__m128i)(__v4si){a, 0, 0, 0};
152284344399STimo Kreuzer }
152384344399STimo Kreuzer 
_mm_cvtsi64_si128(long long a)152484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_cvtsi64_si128(long long a)
152584344399STimo Kreuzer {
152684344399STimo Kreuzer     return __extension__(__m128i)(__v2di){a, 0};
152784344399STimo Kreuzer }
152884344399STimo Kreuzer 
_mm_cvtsi128_si32(__m128i a)152984344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_cvtsi128_si32(__m128i a)
153084344399STimo Kreuzer {
153184344399STimo Kreuzer     __v4si b = (__v4si)a;
153284344399STimo Kreuzer     return b[0];
153384344399STimo Kreuzer }
153484344399STimo Kreuzer 
_mm_cvtsi128_si64(__m128i a)153584344399STimo Kreuzer __INTRIN_INLINE_SSE2 long long _mm_cvtsi128_si64(__m128i a)
153684344399STimo Kreuzer {
153784344399STimo Kreuzer     return a[0];
153884344399STimo Kreuzer }
153984344399STimo Kreuzer 
_mm_load_si128(__m128i const * p)154084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_load_si128(__m128i const *p)
154184344399STimo Kreuzer {
154284344399STimo Kreuzer     return *p;
154384344399STimo Kreuzer }
154484344399STimo Kreuzer 
_mm_loadu_si128(__m128i_u const * p)154584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_loadu_si128(__m128i_u const *p)
154684344399STimo Kreuzer {
154784344399STimo Kreuzer     struct __loadu_si128 {
154884344399STimo Kreuzer       __m128i_u __v;
154984344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
155084344399STimo Kreuzer     return ((const struct __loadu_si128 *)p)->__v;
155184344399STimo Kreuzer }
155284344399STimo Kreuzer 
_mm_loadl_epi64(__m128i_u const * p)155384344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_loadl_epi64(__m128i_u const *p)
155484344399STimo Kreuzer {
155584344399STimo Kreuzer     struct __mm_loadl_epi64_struct {
155684344399STimo Kreuzer       long long __u;
155784344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
155884344399STimo Kreuzer     return __extension__(__m128i){
155984344399STimo Kreuzer         ((const struct __mm_loadl_epi64_struct *)p)->__u, 0};
156084344399STimo Kreuzer }
156184344399STimo Kreuzer 
_mm_undefined_si128(void)156284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_undefined_si128(void)
156384344399STimo Kreuzer {
156484344399STimo Kreuzer #if HAS_BUILTIN(__builtin_ia32_undef128)
156584344399STimo Kreuzer     return (__m128i)__builtin_ia32_undef128();
156684344399STimo Kreuzer #else
156784344399STimo Kreuzer     __m128i undef = undef;
156884344399STimo Kreuzer     return undef;
156984344399STimo Kreuzer #endif
157084344399STimo Kreuzer }
157184344399STimo Kreuzer 
_mm_set_epi64x(long long q1,long long q0)157284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set_epi64x(long long q1, long long q0)
157384344399STimo Kreuzer {
157484344399STimo Kreuzer     return __extension__(__m128i)(__v2di){q0, q1};
157584344399STimo Kreuzer }
157684344399STimo Kreuzer 
_mm_set_epi64(__m64 q1,__m64 q0)157784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set_epi64(__m64 q1, __m64 q0)
157884344399STimo Kreuzer {
157984344399STimo Kreuzer     return _mm_set_epi64x((long long)q1, (long long)q0);
158084344399STimo Kreuzer }
158184344399STimo Kreuzer 
_mm_set_epi32(int i3,int i2,int i1,int i0)158284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set_epi32(int i3, int i2, int i1, int i0)
158384344399STimo Kreuzer {
158484344399STimo Kreuzer     return __extension__(__m128i)(__v4si){i0, i1, i2, i3};
158584344399STimo Kreuzer }
158684344399STimo Kreuzer 
_mm_set_epi16(short w7,short w6,short w5,short w4,short w3,short w2,short w1,short w0)158784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set_epi16(
158884344399STimo Kreuzer     short w7, short w6, short w5, short w4,
158984344399STimo Kreuzer     short w3, short w2, short w1, short w0)
159084344399STimo Kreuzer {
159184344399STimo Kreuzer     return __extension__(__m128i)(__v8hi){w0, w1, w2, w3, w4, w5, w6, w7};
159284344399STimo Kreuzer }
159384344399STimo Kreuzer 
_mm_set_epi8(char b15,char b14,char b13,char b12,char b11,char b10,char b9,char b8,char b7,char b6,char b5,char b4,char b3,char b2,char b1,char b0)159484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set_epi8(
159584344399STimo Kreuzer     char b15, char b14, char b13, char b12,
159684344399STimo Kreuzer     char b11, char b10, char b9, char b8,
159784344399STimo Kreuzer     char b7, char b6, char b5, char b4,
159884344399STimo Kreuzer     char b3, char b2, char b1, char b0)
159984344399STimo Kreuzer {
160084344399STimo Kreuzer     return __extension__(__m128i)(__v16qi){
160184344399STimo Kreuzer         b0, b1, b2,  b3,  b4,  b5,  b6,  b7,
160284344399STimo Kreuzer         b8, b9, b10, b11, b12, b13, b14, b15};
160384344399STimo Kreuzer }
160484344399STimo Kreuzer 
_mm_set1_epi64x(long long q)160584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64x(long long q)
160684344399STimo Kreuzer {
160784344399STimo Kreuzer     return _mm_set_epi64x(q, q);
160884344399STimo Kreuzer }
160984344399STimo Kreuzer 
_mm_set1_epi64(__m64 q)161084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set1_epi64(__m64 q)
161184344399STimo Kreuzer {
161284344399STimo Kreuzer     return _mm_set_epi64(q, q);
161384344399STimo Kreuzer }
161484344399STimo Kreuzer 
_mm_set1_epi32(int i)161584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set1_epi32(int i)
161684344399STimo Kreuzer {
161784344399STimo Kreuzer     return _mm_set_epi32(i, i, i, i);
161884344399STimo Kreuzer }
161984344399STimo Kreuzer 
_mm_set1_epi16(short w)162084344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set1_epi16(short w)
162184344399STimo Kreuzer {
162284344399STimo Kreuzer     return _mm_set_epi16(w, w, w, w, w, w, w, w);
162384344399STimo Kreuzer }
162484344399STimo Kreuzer 
_mm_set1_epi8(char b)162584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_set1_epi8(char b)
162684344399STimo Kreuzer {
162784344399STimo Kreuzer     return _mm_set_epi8(b, b, b, b, b, b, b, b, b, b, b,
162884344399STimo Kreuzer                         b, b, b, b, b);
162984344399STimo Kreuzer }
163084344399STimo Kreuzer 
_mm_setr_epi64(__m64 q0,__m64 q1)163184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi64(__m64 q0, __m64 q1)
163284344399STimo Kreuzer {
163384344399STimo Kreuzer     return _mm_set_epi64(q1, q0);
163484344399STimo Kreuzer }
163584344399STimo Kreuzer 
_mm_setr_epi32(int i0,int i1,int i2,int i3)163684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi32(int i0, int i1, int i2, int i3)
163784344399STimo Kreuzer {
163884344399STimo Kreuzer     return _mm_set_epi32(i3, i2, i1, i0);
163984344399STimo Kreuzer }
164084344399STimo Kreuzer 
_mm_setr_epi16(short w0,short w1,short w2,short w3,short w4,short w5,short w6,short w7)164184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi16(
164284344399STimo Kreuzer     short w0, short w1, short w2, short w3,
164384344399STimo Kreuzer     short w4, short w5, short w6, short w7)
164484344399STimo Kreuzer {
164584344399STimo Kreuzer     return _mm_set_epi16(w7, w6, w5, w4, w3, w2, w1, w0);
164684344399STimo Kreuzer }
164784344399STimo Kreuzer 
_mm_setr_epi8(char b0,char b1,char b2,char b3,char b4,char b5,char b6,char b7,char b8,char b9,char b10,char b11,char b12,char b13,char b14,char b15)164884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_setr_epi8(
164984344399STimo Kreuzer     char b0, char b1, char b2, char b3,
165084344399STimo Kreuzer     char b4, char b5, char b6, char b7,
165184344399STimo Kreuzer     char b8, char b9, char b10,  char b11,
165284344399STimo Kreuzer     char b12, char b13, char b14, char b15)
165384344399STimo Kreuzer {
165484344399STimo Kreuzer     return _mm_set_epi8(b15, b14, b13, b12, b11, b10, b9, b8,
165584344399STimo Kreuzer                         b7, b6, b5, b4, b3, b2, b1, b0);
165684344399STimo Kreuzer }
165784344399STimo Kreuzer 
_mm_setzero_si128(void)165884344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_setzero_si128(void)
165984344399STimo Kreuzer {
166084344399STimo Kreuzer     return __extension__(__m128i)(__v2di){0LL, 0LL};
166184344399STimo Kreuzer }
166284344399STimo Kreuzer 
_mm_store_si128(__m128i * p,__m128i b)166384344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_store_si128(__m128i *p, __m128i b)
166484344399STimo Kreuzer {
166584344399STimo Kreuzer     *p = b;
166684344399STimo Kreuzer }
166784344399STimo Kreuzer 
_mm_storeu_si128(__m128i_u * p,__m128i b)166884344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeu_si128(__m128i_u *p, __m128i b)
166984344399STimo Kreuzer {
167084344399STimo Kreuzer     struct __storeu_si128 {
167184344399STimo Kreuzer       __m128i_u __v;
167284344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
167384344399STimo Kreuzer     ((struct __storeu_si128 *)p)->__v = b;
167484344399STimo Kreuzer }
167584344399STimo Kreuzer 
_mm_storeu_si64(void * p,__m128i b)167684344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeu_si64(void *p, __m128i b)
167784344399STimo Kreuzer {
167884344399STimo Kreuzer     struct __storeu_si64 {
167984344399STimo Kreuzer       long long __v;
168084344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
168184344399STimo Kreuzer     ((struct __storeu_si64 *)p)->__v = ((__v2di)b)[0];
168284344399STimo Kreuzer }
168384344399STimo Kreuzer 
_mm_storeu_si32(void * p,__m128i b)168484344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeu_si32(void *p, __m128i b)
168584344399STimo Kreuzer {
168684344399STimo Kreuzer     struct __storeu_si32 {
168784344399STimo Kreuzer       int __v;
168884344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
168984344399STimo Kreuzer     ((struct __storeu_si32 *)p)->__v = ((__v4si)b)[0];
169084344399STimo Kreuzer }
169184344399STimo Kreuzer 
_mm_storeu_si16(void * p,__m128i b)169284344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storeu_si16(void *p, __m128i b)
169384344399STimo Kreuzer {
169484344399STimo Kreuzer     struct __storeu_si16 {
169584344399STimo Kreuzer       short __v;
169684344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
169784344399STimo Kreuzer     ((struct __storeu_si16 *)p)->__v = ((__v8hi)b)[0];
169884344399STimo Kreuzer }
169984344399STimo Kreuzer 
_mm_maskmoveu_si128(__m128i d,__m128i n,char * p)170084344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_maskmoveu_si128(__m128i d, __m128i n, char *p)
170184344399STimo Kreuzer {
170284344399STimo Kreuzer     __builtin_ia32_maskmovdqu((__v16qi)d, (__v16qi)n, p);
170384344399STimo Kreuzer }
170484344399STimo Kreuzer 
_mm_storel_epi64(__m128i_u * p,__m128i a)170584344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_storel_epi64(__m128i_u *p, __m128i a)
170684344399STimo Kreuzer {
170784344399STimo Kreuzer     struct __mm_storel_epi64_struct {
170884344399STimo Kreuzer       long long __u;
170984344399STimo Kreuzer     } __attribute__((__packed__, __may_alias__));
171084344399STimo Kreuzer     ((struct __mm_storel_epi64_struct *)p)->__u = a[0];
171184344399STimo Kreuzer }
171284344399STimo Kreuzer 
_mm_stream_pd(double * p,__m128d a)171384344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_stream_pd(double *p, __m128d a)
171484344399STimo Kreuzer {
171584344399STimo Kreuzer #if HAS_BUILTIN(__builtin_nontemporal_store)
171684344399STimo Kreuzer     __builtin_nontemporal_store((__v2df)a, (__v2df *)p);
171784344399STimo Kreuzer #else
171884344399STimo Kreuzer     __builtin_ia32_movntpd(p, a);
171984344399STimo Kreuzer #endif
172084344399STimo Kreuzer }
172184344399STimo Kreuzer 
_mm_stream_si128(__m128i * p,__m128i a)172284344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_stream_si128(__m128i *p, __m128i a)
172384344399STimo Kreuzer {
172484344399STimo Kreuzer #if HAS_BUILTIN(__builtin_nontemporal_store)
172584344399STimo Kreuzer     __builtin_nontemporal_store((__v2di)a, (__v2di*)p);
172684344399STimo Kreuzer #else
172784344399STimo Kreuzer     __builtin_ia32_movntdq(p, a);
172884344399STimo Kreuzer #endif
172984344399STimo Kreuzer }
173084344399STimo Kreuzer 
_mm_stream_si32(int * p,int a)173184344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_stream_si32(int *p, int a)
173284344399STimo Kreuzer {
173384344399STimo Kreuzer     __builtin_ia32_movnti(p, a);
173484344399STimo Kreuzer }
173584344399STimo Kreuzer 
173684344399STimo Kreuzer #ifdef _M_AMD64
_mm_stream_si64(long long * p,long long a)173784344399STimo Kreuzer __INTRIN_INLINE_SSE2 void _mm_stream_si64(long long *p, long long a)
173884344399STimo Kreuzer {
173984344399STimo Kreuzer     __builtin_ia32_movnti64(p, a);
174084344399STimo Kreuzer }
174184344399STimo Kreuzer #endif
174284344399STimo Kreuzer 
174384344399STimo Kreuzer void _mm_clflush(void const *p);
174484344399STimo Kreuzer 
174584344399STimo Kreuzer void _mm_lfence(void);
174684344399STimo Kreuzer 
174784344399STimo Kreuzer void _mm_mfence(void);
174884344399STimo Kreuzer 
_mm_packs_epi16(__m128i a,__m128i b)174984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_packs_epi16(__m128i a, __m128i b)
175084344399STimo Kreuzer {
175184344399STimo Kreuzer     return (__m128i)__builtin_ia32_packsswb128((__v8hi)a, (__v8hi)b);
175284344399STimo Kreuzer }
175384344399STimo Kreuzer 
_mm_packs_epi32(__m128i a,__m128i b)175484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_packs_epi32(__m128i a, __m128i b)
175584344399STimo Kreuzer {
175684344399STimo Kreuzer     return (__m128i)__builtin_ia32_packssdw128((__v4si)a, (__v4si)b);
175784344399STimo Kreuzer }
175884344399STimo Kreuzer 
_mm_packus_epi16(__m128i a,__m128i b)175984344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_packus_epi16(__m128i a, __m128i b)
176084344399STimo Kreuzer {
176184344399STimo Kreuzer     return (__m128i)__builtin_ia32_packuswb128((__v8hi)a, (__v8hi)b);
176284344399STimo Kreuzer }
176384344399STimo Kreuzer 
176484344399STimo Kreuzer #define _mm_extract_epi16(a, imm)                                              \
176584344399STimo Kreuzer     ((int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a),      \
176684344399STimo Kreuzer                                                       (int)(imm)))
176784344399STimo Kreuzer 
176884344399STimo Kreuzer #define _mm_insert_epi16(a, b, imm)                                            \
176984344399STimo Kreuzer     ((__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b),        \
177084344399STimo Kreuzer                                           (int)(imm)))
177184344399STimo Kreuzer 
_mm_movemask_epi8(__m128i a)177284344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_movemask_epi8(__m128i a)
177384344399STimo Kreuzer {
177484344399STimo Kreuzer     return __builtin_ia32_pmovmskb128((__v16qi)a);
177584344399STimo Kreuzer }
177684344399STimo Kreuzer 
177784344399STimo Kreuzer #define _mm_shuffle_epi32(a, imm)                                              \
177884344399STimo Kreuzer     ((__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm)))
177984344399STimo Kreuzer 
178084344399STimo Kreuzer #define _mm_shufflelo_epi16(a, imm)                                            \
178184344399STimo Kreuzer     ((__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm)))
178284344399STimo Kreuzer 
178384344399STimo Kreuzer #define _mm_shufflehi_epi16(a, imm)                                            \
178484344399STimo Kreuzer     ((__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm)))
178584344399STimo Kreuzer 
_mm_unpackhi_epi8(__m128i a,__m128i b)178684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi8(__m128i a, __m128i b)
178784344399STimo Kreuzer {
178884344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
178984344399STimo Kreuzer     return (__m128i)__builtin_shufflevector(
179084344399STimo Kreuzer         (__v16qi)a, (__v16qi)b, 8, 16 + 8, 9, 16 + 9, 10, 16 + 10, 11,
179184344399STimo Kreuzer         16 + 11, 12, 16 + 12, 13, 16 + 13, 14, 16 + 14, 15, 16 + 15);
179284344399STimo Kreuzer #else
179384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpckhbw128((__v16qi)a, (__v16qi)b);
179484344399STimo Kreuzer #endif
179584344399STimo Kreuzer }
179684344399STimo Kreuzer 
_mm_unpackhi_epi16(__m128i a,__m128i b)179784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi16(__m128i a, __m128i b)
179884344399STimo Kreuzer {
179984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
180084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 4, 8 + 4, 5,
180184344399STimo Kreuzer                                             8 + 5, 6, 8 + 6, 7, 8 + 7);
180284344399STimo Kreuzer #else
180384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpckhwd128((__v8hi)a, (__v8hi)b);
180484344399STimo Kreuzer #endif
180584344399STimo Kreuzer }
180684344399STimo Kreuzer 
_mm_unpackhi_epi32(__m128i a,__m128i b)180784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi32(__m128i a, __m128i b)
180884344399STimo Kreuzer {
180984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
181084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 2, 4 + 2, 3,
181184344399STimo Kreuzer                                             4 + 3);
181284344399STimo Kreuzer #else
181384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpckhdq128((__v4si)a, (__v4si)b);
181484344399STimo Kreuzer #endif
181584344399STimo Kreuzer }
181684344399STimo Kreuzer 
_mm_unpackhi_epi64(__m128i a,__m128i b)181784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpackhi_epi64(__m128i a, __m128i b)
181884344399STimo Kreuzer {
181984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
182084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 1, 2 + 1);
182184344399STimo Kreuzer #else
182284344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpckhqdq128((__v2di)a, (__v2di)b);
182384344399STimo Kreuzer #endif
182484344399STimo Kreuzer }
182584344399STimo Kreuzer 
_mm_unpacklo_epi8(__m128i a,__m128i b)182684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi8(__m128i a, __m128i b)
182784344399STimo Kreuzer {
182884344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
182984344399STimo Kreuzer     return (__m128i)__builtin_shufflevector(
183084344399STimo Kreuzer         (__v16qi)a, (__v16qi)b, 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4,
183184344399STimo Kreuzer         16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7);
183284344399STimo Kreuzer #else
183384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpcklbw128((__v16qi)a, (__v16qi)b);
183484344399STimo Kreuzer #endif
183584344399STimo Kreuzer }
183684344399STimo Kreuzer 
_mm_unpacklo_epi16(__m128i a,__m128i b)183784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi16(__m128i a, __m128i b)
183884344399STimo Kreuzer {
183984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
184084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v8hi)a, (__v8hi)b, 0, 8 + 0, 1,
184184344399STimo Kreuzer                                             8 + 1, 2, 8 + 2, 3, 8 + 3);
184284344399STimo Kreuzer #else
184384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpcklwd128((__v8hi)a, (__v8hi)b);
184484344399STimo Kreuzer #endif
184584344399STimo Kreuzer }
184684344399STimo Kreuzer 
_mm_unpacklo_epi32(__m128i a,__m128i b)184784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi32(__m128i a, __m128i b)
184884344399STimo Kreuzer {
184984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
185084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v4si)a, (__v4si)b, 0, 4 + 0, 1,
185184344399STimo Kreuzer                                             4 + 1);
185284344399STimo Kreuzer #else
185384344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpckldq128((__v4si)a, (__v4si)b);
185484344399STimo Kreuzer #endif
185584344399STimo Kreuzer }
185684344399STimo Kreuzer 
_mm_unpacklo_epi64(__m128i a,__m128i b)185784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_unpacklo_epi64(__m128i a, __m128i b)
185884344399STimo Kreuzer {
185984344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
186084344399STimo Kreuzer     return (__m128i)__builtin_shufflevector((__v2di)a, (__v2di)b, 0, 2 + 0);
186184344399STimo Kreuzer #else
186284344399STimo Kreuzer     return (__m128i)__builtin_ia32_punpcklqdq128((__v2di)a, (__v2di)b);
186384344399STimo Kreuzer #endif
186484344399STimo Kreuzer }
186584344399STimo Kreuzer 
_mm_movepi64_pi64(__m128i a)186684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m64 _mm_movepi64_pi64(__m128i a)
186784344399STimo Kreuzer {
186884344399STimo Kreuzer     return (__m64)a[0];
186984344399STimo Kreuzer }
187084344399STimo Kreuzer 
_mm_movpi64_epi64(__m64 a)187184344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_movpi64_epi64(__m64 a)
187284344399STimo Kreuzer {
187384344399STimo Kreuzer     return __extension__(__m128i)(__v2di){(long long)a, 0};
187484344399STimo Kreuzer }
187584344399STimo Kreuzer 
_mm_move_epi64(__m128i a)187684344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_move_epi64(__m128i a)
187784344399STimo Kreuzer {
187884344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
187984344399STimo Kreuzer     return __builtin_shufflevector((__v2di)a, _mm_setzero_si128(), 0, 2);
188084344399STimo Kreuzer #else
188184344399STimo Kreuzer     return (__m128i)__builtin_ia32_movq128((__v2di)a);
188284344399STimo Kreuzer #endif
188384344399STimo Kreuzer }
188484344399STimo Kreuzer 
_mm_unpackhi_pd(__m128d a,__m128d b)188584344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_unpackhi_pd(__m128d a, __m128d b)
188684344399STimo Kreuzer {
188784344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
188884344399STimo Kreuzer     return __builtin_shufflevector((__v2df)a, (__v2df)b, 1, 2 + 1);
188984344399STimo Kreuzer #else
189084344399STimo Kreuzer     return (__m128d)__builtin_ia32_unpckhpd((__v2df)a, (__v2df)b);
189184344399STimo Kreuzer #endif
189284344399STimo Kreuzer }
189384344399STimo Kreuzer 
_mm_unpacklo_pd(__m128d a,__m128d b)189484344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_unpacklo_pd(__m128d a, __m128d b)
189584344399STimo Kreuzer {
189684344399STimo Kreuzer #if HAS_BUILTIN(__builtin_shufflevector)
189784344399STimo Kreuzer     return __builtin_shufflevector((__v2df)a, (__v2df)b, 0, 2 + 0);
189884344399STimo Kreuzer #else
189984344399STimo Kreuzer     return (__m128d)__builtin_ia32_unpcklpd((__v2df)a, (__v2df)b);
190084344399STimo Kreuzer #endif
190184344399STimo Kreuzer }
190284344399STimo Kreuzer 
_mm_movemask_pd(__m128d a)190384344399STimo Kreuzer __INTRIN_INLINE_SSE2 int _mm_movemask_pd(__m128d a)
190484344399STimo Kreuzer {
190584344399STimo Kreuzer     return __builtin_ia32_movmskpd((__v2df)a);
190684344399STimo Kreuzer }
190784344399STimo Kreuzer 
190884344399STimo Kreuzer #define _mm_shuffle_pd(a, b, i)                                                \
190984344399STimo Kreuzer     ((__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b),  \
191084344399STimo Kreuzer                                     (int)(i)))
191184344399STimo Kreuzer 
_mm_castpd_ps(__m128d a)191284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128 _mm_castpd_ps(__m128d a)
191384344399STimo Kreuzer {
191484344399STimo Kreuzer     return (__m128)a;
191584344399STimo Kreuzer }
191684344399STimo Kreuzer 
_mm_castpd_si128(__m128d a)191784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_castpd_si128(__m128d a)
191884344399STimo Kreuzer {
191984344399STimo Kreuzer     return (__m128i)a;
192084344399STimo Kreuzer }
192184344399STimo Kreuzer 
_mm_castps_pd(__m128 a)192284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_castps_pd(__m128 a)
192384344399STimo Kreuzer {
192484344399STimo Kreuzer     return (__m128d)a;
192584344399STimo Kreuzer }
192684344399STimo Kreuzer 
_mm_castps_si128(__m128 a)192784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128i _mm_castps_si128(__m128 a)
192884344399STimo Kreuzer {
192984344399STimo Kreuzer     return (__m128i)a;
193084344399STimo Kreuzer }
193184344399STimo Kreuzer 
_mm_castsi128_ps(__m128i a)193284344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128 _mm_castsi128_ps(__m128i a)
193384344399STimo Kreuzer {
193484344399STimo Kreuzer     return (__m128)a;
193584344399STimo Kreuzer }
193684344399STimo Kreuzer 
_mm_castsi128_pd(__m128i a)193784344399STimo Kreuzer __INTRIN_INLINE_SSE2 __m128d _mm_castsi128_pd(__m128i a)
193884344399STimo Kreuzer {
193984344399STimo Kreuzer     return (__m128d)a;
194084344399STimo Kreuzer }
194184344399STimo Kreuzer 
194284344399STimo Kreuzer void _mm_pause(void);
194384344399STimo Kreuzer 
194484344399STimo Kreuzer #endif /* _MSC_VER */
194584344399STimo Kreuzer 
1946*a67f3688STimo Kreuzer #ifdef __cplusplus
1947*a67f3688STimo Kreuzer } // extern "C"
1948*a67f3688STimo Kreuzer #endif
194984344399STimo Kreuzer 
195084344399STimo Kreuzer #endif /* _INCLUDED_EMM */
1951