1f4a2713aSLionel Sambuc /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2f4a2713aSLionel Sambuc  *
3f4a2713aSLionel Sambuc  * Permission is hereby granted, free of charge, to any person obtaining a copy
4f4a2713aSLionel Sambuc  * of this software and associated documentation files (the "Software"), to deal
5f4a2713aSLionel Sambuc  * in the Software without restriction, including without limitation the rights
6f4a2713aSLionel Sambuc  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7f4a2713aSLionel Sambuc  * copies of the Software, and to permit persons to whom the Software is
8f4a2713aSLionel Sambuc  * furnished to do so, subject to the following conditions:
9f4a2713aSLionel Sambuc  *
10f4a2713aSLionel Sambuc  * The above copyright notice and this permission notice shall be included in
11f4a2713aSLionel Sambuc  * all copies or substantial portions of the Software.
12f4a2713aSLionel Sambuc  *
13f4a2713aSLionel Sambuc  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14f4a2713aSLionel Sambuc  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15f4a2713aSLionel Sambuc  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16f4a2713aSLionel Sambuc  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17f4a2713aSLionel Sambuc  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18f4a2713aSLionel Sambuc  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19f4a2713aSLionel Sambuc  * THE SOFTWARE.
20f4a2713aSLionel Sambuc  *
21f4a2713aSLionel Sambuc  *===-----------------------------------------------------------------------===
22f4a2713aSLionel Sambuc  */
23f4a2713aSLionel Sambuc 
24f4a2713aSLionel Sambuc #ifndef __EMMINTRIN_H
25f4a2713aSLionel Sambuc #define __EMMINTRIN_H
26f4a2713aSLionel Sambuc 
27f4a2713aSLionel Sambuc #ifndef __SSE2__
28f4a2713aSLionel Sambuc #error "SSE2 instruction set not enabled"
29f4a2713aSLionel Sambuc #else
30f4a2713aSLionel Sambuc 
31f4a2713aSLionel Sambuc #include <xmmintrin.h>
32f4a2713aSLionel Sambuc 
33f4a2713aSLionel Sambuc typedef double __m128d __attribute__((__vector_size__(16)));
34f4a2713aSLionel Sambuc typedef long long __m128i __attribute__((__vector_size__(16)));
35f4a2713aSLionel Sambuc 
36f4a2713aSLionel Sambuc /* Type defines.  */
37f4a2713aSLionel Sambuc typedef double __v2df __attribute__ ((__vector_size__ (16)));
38f4a2713aSLionel Sambuc typedef long long __v2di __attribute__ ((__vector_size__ (16)));
39f4a2713aSLionel Sambuc typedef short __v8hi __attribute__((__vector_size__(16)));
40f4a2713aSLionel Sambuc typedef char __v16qi __attribute__((__vector_size__(16)));
41f4a2713aSLionel Sambuc 
42f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_add_sd(__m128d __a,__m128d __b)43f4a2713aSLionel Sambuc _mm_add_sd(__m128d __a, __m128d __b)
44f4a2713aSLionel Sambuc {
45f4a2713aSLionel Sambuc   __a[0] += __b[0];
46f4a2713aSLionel Sambuc   return __a;
47f4a2713aSLionel Sambuc }
48f4a2713aSLionel Sambuc 
49f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_add_pd(__m128d __a,__m128d __b)50f4a2713aSLionel Sambuc _mm_add_pd(__m128d __a, __m128d __b)
51f4a2713aSLionel Sambuc {
52f4a2713aSLionel Sambuc   return __a + __b;
53f4a2713aSLionel Sambuc }
54f4a2713aSLionel Sambuc 
55f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_sub_sd(__m128d __a,__m128d __b)56f4a2713aSLionel Sambuc _mm_sub_sd(__m128d __a, __m128d __b)
57f4a2713aSLionel Sambuc {
58f4a2713aSLionel Sambuc   __a[0] -= __b[0];
59f4a2713aSLionel Sambuc   return __a;
60f4a2713aSLionel Sambuc }
61f4a2713aSLionel Sambuc 
62f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_sub_pd(__m128d __a,__m128d __b)63f4a2713aSLionel Sambuc _mm_sub_pd(__m128d __a, __m128d __b)
64f4a2713aSLionel Sambuc {
65f4a2713aSLionel Sambuc   return __a - __b;
66f4a2713aSLionel Sambuc }
67f4a2713aSLionel Sambuc 
68f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_mul_sd(__m128d __a,__m128d __b)69f4a2713aSLionel Sambuc _mm_mul_sd(__m128d __a, __m128d __b)
70f4a2713aSLionel Sambuc {
71f4a2713aSLionel Sambuc   __a[0] *= __b[0];
72f4a2713aSLionel Sambuc   return __a;
73f4a2713aSLionel Sambuc }
74f4a2713aSLionel Sambuc 
75f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_mul_pd(__m128d __a,__m128d __b)76f4a2713aSLionel Sambuc _mm_mul_pd(__m128d __a, __m128d __b)
77f4a2713aSLionel Sambuc {
78f4a2713aSLionel Sambuc   return __a * __b;
79f4a2713aSLionel Sambuc }
80f4a2713aSLionel Sambuc 
81f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_div_sd(__m128d __a,__m128d __b)82f4a2713aSLionel Sambuc _mm_div_sd(__m128d __a, __m128d __b)
83f4a2713aSLionel Sambuc {
84f4a2713aSLionel Sambuc   __a[0] /= __b[0];
85f4a2713aSLionel Sambuc   return __a;
86f4a2713aSLionel Sambuc }
87f4a2713aSLionel Sambuc 
88f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_div_pd(__m128d __a,__m128d __b)89f4a2713aSLionel Sambuc _mm_div_pd(__m128d __a, __m128d __b)
90f4a2713aSLionel Sambuc {
91f4a2713aSLionel Sambuc   return __a / __b;
92f4a2713aSLionel Sambuc }
93f4a2713aSLionel Sambuc 
94f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_sqrt_sd(__m128d __a,__m128d __b)95f4a2713aSLionel Sambuc _mm_sqrt_sd(__m128d __a, __m128d __b)
96f4a2713aSLionel Sambuc {
97f4a2713aSLionel Sambuc   __m128d __c = __builtin_ia32_sqrtsd(__b);
98f4a2713aSLionel Sambuc   return (__m128d) { __c[0], __a[1] };
99f4a2713aSLionel Sambuc }
100f4a2713aSLionel Sambuc 
101f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_sqrt_pd(__m128d __a)102f4a2713aSLionel Sambuc _mm_sqrt_pd(__m128d __a)
103f4a2713aSLionel Sambuc {
104f4a2713aSLionel Sambuc   return __builtin_ia32_sqrtpd(__a);
105f4a2713aSLionel Sambuc }
106f4a2713aSLionel Sambuc 
107f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_min_sd(__m128d __a,__m128d __b)108f4a2713aSLionel Sambuc _mm_min_sd(__m128d __a, __m128d __b)
109f4a2713aSLionel Sambuc {
110f4a2713aSLionel Sambuc   return __builtin_ia32_minsd(__a, __b);
111f4a2713aSLionel Sambuc }
112f4a2713aSLionel Sambuc 
113f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_min_pd(__m128d __a,__m128d __b)114f4a2713aSLionel Sambuc _mm_min_pd(__m128d __a, __m128d __b)
115f4a2713aSLionel Sambuc {
116f4a2713aSLionel Sambuc   return __builtin_ia32_minpd(__a, __b);
117f4a2713aSLionel Sambuc }
118f4a2713aSLionel Sambuc 
119f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_max_sd(__m128d __a,__m128d __b)120f4a2713aSLionel Sambuc _mm_max_sd(__m128d __a, __m128d __b)
121f4a2713aSLionel Sambuc {
122f4a2713aSLionel Sambuc   return __builtin_ia32_maxsd(__a, __b);
123f4a2713aSLionel Sambuc }
124f4a2713aSLionel Sambuc 
125f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_max_pd(__m128d __a,__m128d __b)126f4a2713aSLionel Sambuc _mm_max_pd(__m128d __a, __m128d __b)
127f4a2713aSLionel Sambuc {
128f4a2713aSLionel Sambuc   return __builtin_ia32_maxpd(__a, __b);
129f4a2713aSLionel Sambuc }
130f4a2713aSLionel Sambuc 
131f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_and_pd(__m128d __a,__m128d __b)132f4a2713aSLionel Sambuc _mm_and_pd(__m128d __a, __m128d __b)
133f4a2713aSLionel Sambuc {
134f4a2713aSLionel Sambuc   return (__m128d)((__v4si)__a & (__v4si)__b);
135f4a2713aSLionel Sambuc }
136f4a2713aSLionel Sambuc 
137f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_andnot_pd(__m128d __a,__m128d __b)138f4a2713aSLionel Sambuc _mm_andnot_pd(__m128d __a, __m128d __b)
139f4a2713aSLionel Sambuc {
140f4a2713aSLionel Sambuc   return (__m128d)(~(__v4si)__a & (__v4si)__b);
141f4a2713aSLionel Sambuc }
142f4a2713aSLionel Sambuc 
143f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_or_pd(__m128d __a,__m128d __b)144f4a2713aSLionel Sambuc _mm_or_pd(__m128d __a, __m128d __b)
145f4a2713aSLionel Sambuc {
146f4a2713aSLionel Sambuc   return (__m128d)((__v4si)__a | (__v4si)__b);
147f4a2713aSLionel Sambuc }
148f4a2713aSLionel Sambuc 
149f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_xor_pd(__m128d __a,__m128d __b)150f4a2713aSLionel Sambuc _mm_xor_pd(__m128d __a, __m128d __b)
151f4a2713aSLionel Sambuc {
152f4a2713aSLionel Sambuc   return (__m128d)((__v4si)__a ^ (__v4si)__b);
153f4a2713aSLionel Sambuc }
154f4a2713aSLionel Sambuc 
155f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_pd(__m128d __a,__m128d __b)156f4a2713aSLionel Sambuc _mm_cmpeq_pd(__m128d __a, __m128d __b)
157f4a2713aSLionel Sambuc {
158*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpeqpd(__a, __b);
159f4a2713aSLionel Sambuc }
160f4a2713aSLionel Sambuc 
161f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_pd(__m128d __a,__m128d __b)162f4a2713aSLionel Sambuc _mm_cmplt_pd(__m128d __a, __m128d __b)
163f4a2713aSLionel Sambuc {
164*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpltpd(__a, __b);
165f4a2713aSLionel Sambuc }
166f4a2713aSLionel Sambuc 
167f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_pd(__m128d __a,__m128d __b)168f4a2713aSLionel Sambuc _mm_cmple_pd(__m128d __a, __m128d __b)
169f4a2713aSLionel Sambuc {
170*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmplepd(__a, __b);
171f4a2713aSLionel Sambuc }
172f4a2713aSLionel Sambuc 
173f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_pd(__m128d __a,__m128d __b)174f4a2713aSLionel Sambuc _mm_cmpgt_pd(__m128d __a, __m128d __b)
175f4a2713aSLionel Sambuc {
176*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpltpd(__b, __a);
177f4a2713aSLionel Sambuc }
178f4a2713aSLionel Sambuc 
179f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_pd(__m128d __a,__m128d __b)180f4a2713aSLionel Sambuc _mm_cmpge_pd(__m128d __a, __m128d __b)
181f4a2713aSLionel Sambuc {
182*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmplepd(__b, __a);
183f4a2713aSLionel Sambuc }
184f4a2713aSLionel Sambuc 
185f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_pd(__m128d __a,__m128d __b)186f4a2713aSLionel Sambuc _mm_cmpord_pd(__m128d __a, __m128d __b)
187f4a2713aSLionel Sambuc {
188*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpordpd(__a, __b);
189f4a2713aSLionel Sambuc }
190f4a2713aSLionel Sambuc 
191f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_pd(__m128d __a,__m128d __b)192f4a2713aSLionel Sambuc _mm_cmpunord_pd(__m128d __a, __m128d __b)
193f4a2713aSLionel Sambuc {
194*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpunordpd(__a, __b);
195f4a2713aSLionel Sambuc }
196f4a2713aSLionel Sambuc 
197f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_pd(__m128d __a,__m128d __b)198f4a2713aSLionel Sambuc _mm_cmpneq_pd(__m128d __a, __m128d __b)
199f4a2713aSLionel Sambuc {
200*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpneqpd(__a, __b);
201f4a2713aSLionel Sambuc }
202f4a2713aSLionel Sambuc 
203f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_pd(__m128d __a,__m128d __b)204f4a2713aSLionel Sambuc _mm_cmpnlt_pd(__m128d __a, __m128d __b)
205f4a2713aSLionel Sambuc {
206*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnltpd(__a, __b);
207f4a2713aSLionel Sambuc }
208f4a2713aSLionel Sambuc 
209f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_pd(__m128d __a,__m128d __b)210f4a2713aSLionel Sambuc _mm_cmpnle_pd(__m128d __a, __m128d __b)
211f4a2713aSLionel Sambuc {
212*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnlepd(__a, __b);
213f4a2713aSLionel Sambuc }
214f4a2713aSLionel Sambuc 
215f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_pd(__m128d __a,__m128d __b)216f4a2713aSLionel Sambuc _mm_cmpngt_pd(__m128d __a, __m128d __b)
217f4a2713aSLionel Sambuc {
218*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnltpd(__b, __a);
219f4a2713aSLionel Sambuc }
220f4a2713aSLionel Sambuc 
221f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_pd(__m128d __a,__m128d __b)222f4a2713aSLionel Sambuc _mm_cmpnge_pd(__m128d __a, __m128d __b)
223f4a2713aSLionel Sambuc {
224*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnlepd(__b, __a);
225f4a2713aSLionel Sambuc }
226f4a2713aSLionel Sambuc 
227f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_sd(__m128d __a,__m128d __b)228f4a2713aSLionel Sambuc _mm_cmpeq_sd(__m128d __a, __m128d __b)
229f4a2713aSLionel Sambuc {
230*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpeqsd(__a, __b);
231f4a2713aSLionel Sambuc }
232f4a2713aSLionel Sambuc 
233f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_sd(__m128d __a,__m128d __b)234f4a2713aSLionel Sambuc _mm_cmplt_sd(__m128d __a, __m128d __b)
235f4a2713aSLionel Sambuc {
236*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpltsd(__a, __b);
237f4a2713aSLionel Sambuc }
238f4a2713aSLionel Sambuc 
239f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmple_sd(__m128d __a,__m128d __b)240f4a2713aSLionel Sambuc _mm_cmple_sd(__m128d __a, __m128d __b)
241f4a2713aSLionel Sambuc {
242*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmplesd(__a, __b);
243f4a2713aSLionel Sambuc }
244f4a2713aSLionel Sambuc 
245f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_sd(__m128d __a,__m128d __b)246f4a2713aSLionel Sambuc _mm_cmpgt_sd(__m128d __a, __m128d __b)
247f4a2713aSLionel Sambuc {
248*0a6a1f1dSLionel Sambuc   __m128d __c = __builtin_ia32_cmpltsd(__b, __a);
249f4a2713aSLionel Sambuc   return (__m128d) { __c[0], __a[1] };
250f4a2713aSLionel Sambuc }
251f4a2713aSLionel Sambuc 
252f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpge_sd(__m128d __a,__m128d __b)253f4a2713aSLionel Sambuc _mm_cmpge_sd(__m128d __a, __m128d __b)
254f4a2713aSLionel Sambuc {
255*0a6a1f1dSLionel Sambuc   __m128d __c = __builtin_ia32_cmplesd(__b, __a);
256f4a2713aSLionel Sambuc   return (__m128d) { __c[0], __a[1] };
257f4a2713aSLionel Sambuc }
258f4a2713aSLionel Sambuc 
259f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpord_sd(__m128d __a,__m128d __b)260f4a2713aSLionel Sambuc _mm_cmpord_sd(__m128d __a, __m128d __b)
261f4a2713aSLionel Sambuc {
262*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpordsd(__a, __b);
263f4a2713aSLionel Sambuc }
264f4a2713aSLionel Sambuc 
265f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpunord_sd(__m128d __a,__m128d __b)266f4a2713aSLionel Sambuc _mm_cmpunord_sd(__m128d __a, __m128d __b)
267f4a2713aSLionel Sambuc {
268*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpunordsd(__a, __b);
269f4a2713aSLionel Sambuc }
270f4a2713aSLionel Sambuc 
271f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpneq_sd(__m128d __a,__m128d __b)272f4a2713aSLionel Sambuc _mm_cmpneq_sd(__m128d __a, __m128d __b)
273f4a2713aSLionel Sambuc {
274*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpneqsd(__a, __b);
275f4a2713aSLionel Sambuc }
276f4a2713aSLionel Sambuc 
277f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnlt_sd(__m128d __a,__m128d __b)278f4a2713aSLionel Sambuc _mm_cmpnlt_sd(__m128d __a, __m128d __b)
279f4a2713aSLionel Sambuc {
280*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnltsd(__a, __b);
281f4a2713aSLionel Sambuc }
282f4a2713aSLionel Sambuc 
283f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnle_sd(__m128d __a,__m128d __b)284f4a2713aSLionel Sambuc _mm_cmpnle_sd(__m128d __a, __m128d __b)
285f4a2713aSLionel Sambuc {
286*0a6a1f1dSLionel Sambuc   return (__m128d)__builtin_ia32_cmpnlesd(__a, __b);
287f4a2713aSLionel Sambuc }
288f4a2713aSLionel Sambuc 
289f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpngt_sd(__m128d __a,__m128d __b)290f4a2713aSLionel Sambuc _mm_cmpngt_sd(__m128d __a, __m128d __b)
291f4a2713aSLionel Sambuc {
292*0a6a1f1dSLionel Sambuc   __m128d __c = __builtin_ia32_cmpnltsd(__b, __a);
293f4a2713aSLionel Sambuc   return (__m128d) { __c[0], __a[1] };
294f4a2713aSLionel Sambuc }
295f4a2713aSLionel Sambuc 
296f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cmpnge_sd(__m128d __a,__m128d __b)297f4a2713aSLionel Sambuc _mm_cmpnge_sd(__m128d __a, __m128d __b)
298f4a2713aSLionel Sambuc {
299*0a6a1f1dSLionel Sambuc   __m128d __c = __builtin_ia32_cmpnlesd(__b, __a);
300f4a2713aSLionel Sambuc   return (__m128d) { __c[0], __a[1] };
301f4a2713aSLionel Sambuc }
302f4a2713aSLionel Sambuc 
303f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comieq_sd(__m128d __a,__m128d __b)304f4a2713aSLionel Sambuc _mm_comieq_sd(__m128d __a, __m128d __b)
305f4a2713aSLionel Sambuc {
306f4a2713aSLionel Sambuc   return __builtin_ia32_comisdeq(__a, __b);
307f4a2713aSLionel Sambuc }
308f4a2713aSLionel Sambuc 
309f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comilt_sd(__m128d __a,__m128d __b)310f4a2713aSLionel Sambuc _mm_comilt_sd(__m128d __a, __m128d __b)
311f4a2713aSLionel Sambuc {
312f4a2713aSLionel Sambuc   return __builtin_ia32_comisdlt(__a, __b);
313f4a2713aSLionel Sambuc }
314f4a2713aSLionel Sambuc 
315f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comile_sd(__m128d __a,__m128d __b)316f4a2713aSLionel Sambuc _mm_comile_sd(__m128d __a, __m128d __b)
317f4a2713aSLionel Sambuc {
318f4a2713aSLionel Sambuc   return __builtin_ia32_comisdle(__a, __b);
319f4a2713aSLionel Sambuc }
320f4a2713aSLionel Sambuc 
321f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comigt_sd(__m128d __a,__m128d __b)322f4a2713aSLionel Sambuc _mm_comigt_sd(__m128d __a, __m128d __b)
323f4a2713aSLionel Sambuc {
324f4a2713aSLionel Sambuc   return __builtin_ia32_comisdgt(__a, __b);
325f4a2713aSLionel Sambuc }
326f4a2713aSLionel Sambuc 
327f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comige_sd(__m128d __a,__m128d __b)328f4a2713aSLionel Sambuc _mm_comige_sd(__m128d __a, __m128d __b)
329f4a2713aSLionel Sambuc {
330f4a2713aSLionel Sambuc   return __builtin_ia32_comisdge(__a, __b);
331f4a2713aSLionel Sambuc }
332f4a2713aSLionel Sambuc 
333f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_comineq_sd(__m128d __a,__m128d __b)334f4a2713aSLionel Sambuc _mm_comineq_sd(__m128d __a, __m128d __b)
335f4a2713aSLionel Sambuc {
336f4a2713aSLionel Sambuc   return __builtin_ia32_comisdneq(__a, __b);
337f4a2713aSLionel Sambuc }
338f4a2713aSLionel Sambuc 
339f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomieq_sd(__m128d __a,__m128d __b)340f4a2713aSLionel Sambuc _mm_ucomieq_sd(__m128d __a, __m128d __b)
341f4a2713aSLionel Sambuc {
342f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdeq(__a, __b);
343f4a2713aSLionel Sambuc }
344f4a2713aSLionel Sambuc 
345f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomilt_sd(__m128d __a,__m128d __b)346f4a2713aSLionel Sambuc _mm_ucomilt_sd(__m128d __a, __m128d __b)
347f4a2713aSLionel Sambuc {
348f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdlt(__a, __b);
349f4a2713aSLionel Sambuc }
350f4a2713aSLionel Sambuc 
351f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomile_sd(__m128d __a,__m128d __b)352f4a2713aSLionel Sambuc _mm_ucomile_sd(__m128d __a, __m128d __b)
353f4a2713aSLionel Sambuc {
354f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdle(__a, __b);
355f4a2713aSLionel Sambuc }
356f4a2713aSLionel Sambuc 
357f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomigt_sd(__m128d __a,__m128d __b)358f4a2713aSLionel Sambuc _mm_ucomigt_sd(__m128d __a, __m128d __b)
359f4a2713aSLionel Sambuc {
360f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdgt(__a, __b);
361f4a2713aSLionel Sambuc }
362f4a2713aSLionel Sambuc 
363f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomige_sd(__m128d __a,__m128d __b)364f4a2713aSLionel Sambuc _mm_ucomige_sd(__m128d __a, __m128d __b)
365f4a2713aSLionel Sambuc {
366f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdge(__a, __b);
367f4a2713aSLionel Sambuc }
368f4a2713aSLionel Sambuc 
369f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_ucomineq_sd(__m128d __a,__m128d __b)370f4a2713aSLionel Sambuc _mm_ucomineq_sd(__m128d __a, __m128d __b)
371f4a2713aSLionel Sambuc {
372f4a2713aSLionel Sambuc   return __builtin_ia32_ucomisdneq(__a, __b);
373f4a2713aSLionel Sambuc }
374f4a2713aSLionel Sambuc 
375f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtpd_ps(__m128d __a)376f4a2713aSLionel Sambuc _mm_cvtpd_ps(__m128d __a)
377f4a2713aSLionel Sambuc {
378f4a2713aSLionel Sambuc   return __builtin_ia32_cvtpd2ps(__a);
379f4a2713aSLionel Sambuc }
380f4a2713aSLionel Sambuc 
381f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtps_pd(__m128 __a)382f4a2713aSLionel Sambuc _mm_cvtps_pd(__m128 __a)
383f4a2713aSLionel Sambuc {
384f4a2713aSLionel Sambuc   return __builtin_ia32_cvtps2pd(__a);
385f4a2713aSLionel Sambuc }
386f4a2713aSLionel Sambuc 
387f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtepi32_pd(__m128i __a)388f4a2713aSLionel Sambuc _mm_cvtepi32_pd(__m128i __a)
389f4a2713aSLionel Sambuc {
390f4a2713aSLionel Sambuc   return __builtin_ia32_cvtdq2pd((__v4si)__a);
391f4a2713aSLionel Sambuc }
392f4a2713aSLionel Sambuc 
393f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvtpd_epi32(__m128d __a)394f4a2713aSLionel Sambuc _mm_cvtpd_epi32(__m128d __a)
395f4a2713aSLionel Sambuc {
396f4a2713aSLionel Sambuc   return __builtin_ia32_cvtpd2dq(__a);
397f4a2713aSLionel Sambuc }
398f4a2713aSLionel Sambuc 
399f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_cvtsd_si32(__m128d __a)400f4a2713aSLionel Sambuc _mm_cvtsd_si32(__m128d __a)
401f4a2713aSLionel Sambuc {
402f4a2713aSLionel Sambuc   return __builtin_ia32_cvtsd2si(__a);
403f4a2713aSLionel Sambuc }
404f4a2713aSLionel Sambuc 
405f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsd_ss(__m128 __a,__m128d __b)406f4a2713aSLionel Sambuc _mm_cvtsd_ss(__m128 __a, __m128d __b)
407f4a2713aSLionel Sambuc {
408f4a2713aSLionel Sambuc   __a[0] = __b[0];
409f4a2713aSLionel Sambuc   return __a;
410f4a2713aSLionel Sambuc }
411f4a2713aSLionel Sambuc 
412f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi32_sd(__m128d __a,int __b)413f4a2713aSLionel Sambuc _mm_cvtsi32_sd(__m128d __a, int __b)
414f4a2713aSLionel Sambuc {
415f4a2713aSLionel Sambuc   __a[0] = __b;
416f4a2713aSLionel Sambuc   return __a;
417f4a2713aSLionel Sambuc }
418f4a2713aSLionel Sambuc 
419f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtss_sd(__m128d __a,__m128 __b)420f4a2713aSLionel Sambuc _mm_cvtss_sd(__m128d __a, __m128 __b)
421f4a2713aSLionel Sambuc {
422f4a2713aSLionel Sambuc   __a[0] = __b[0];
423f4a2713aSLionel Sambuc   return __a;
424f4a2713aSLionel Sambuc }
425f4a2713aSLionel Sambuc 
426f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvttpd_epi32(__m128d __a)427f4a2713aSLionel Sambuc _mm_cvttpd_epi32(__m128d __a)
428f4a2713aSLionel Sambuc {
429f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_cvttpd2dq(__a);
430f4a2713aSLionel Sambuc }
431f4a2713aSLionel Sambuc 
432f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_cvttsd_si32(__m128d __a)433f4a2713aSLionel Sambuc _mm_cvttsd_si32(__m128d __a)
434f4a2713aSLionel Sambuc {
435f4a2713aSLionel Sambuc   return __a[0];
436f4a2713aSLionel Sambuc }
437f4a2713aSLionel Sambuc 
438f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cvtpd_pi32(__m128d __a)439f4a2713aSLionel Sambuc _mm_cvtpd_pi32(__m128d __a)
440f4a2713aSLionel Sambuc {
441f4a2713aSLionel Sambuc   return (__m64)__builtin_ia32_cvtpd2pi(__a);
442f4a2713aSLionel Sambuc }
443f4a2713aSLionel Sambuc 
444f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cvttpd_pi32(__m128d __a)445f4a2713aSLionel Sambuc _mm_cvttpd_pi32(__m128d __a)
446f4a2713aSLionel Sambuc {
447f4a2713aSLionel Sambuc   return (__m64)__builtin_ia32_cvttpd2pi(__a);
448f4a2713aSLionel Sambuc }
449f4a2713aSLionel Sambuc 
450f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtpi32_pd(__m64 __a)451f4a2713aSLionel Sambuc _mm_cvtpi32_pd(__m64 __a)
452f4a2713aSLionel Sambuc {
453f4a2713aSLionel Sambuc   return __builtin_ia32_cvtpi2pd((__v2si)__a);
454f4a2713aSLionel Sambuc }
455f4a2713aSLionel Sambuc 
456f4a2713aSLionel Sambuc static __inline__ double __attribute__((__always_inline__, __nodebug__))
_mm_cvtsd_f64(__m128d __a)457f4a2713aSLionel Sambuc _mm_cvtsd_f64(__m128d __a)
458f4a2713aSLionel Sambuc {
459f4a2713aSLionel Sambuc   return __a[0];
460f4a2713aSLionel Sambuc }
461f4a2713aSLionel Sambuc 
462f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_load_pd(double const * __dp)463f4a2713aSLionel Sambuc _mm_load_pd(double const *__dp)
464f4a2713aSLionel Sambuc {
465f4a2713aSLionel Sambuc   return *(__m128d*)__dp;
466f4a2713aSLionel Sambuc }
467f4a2713aSLionel Sambuc 
468f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_load1_pd(double const * __dp)469f4a2713aSLionel Sambuc _mm_load1_pd(double const *__dp)
470f4a2713aSLionel Sambuc {
471f4a2713aSLionel Sambuc   struct __mm_load1_pd_struct {
472f4a2713aSLionel Sambuc     double __u;
473f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
474f4a2713aSLionel Sambuc   double __u = ((struct __mm_load1_pd_struct*)__dp)->__u;
475f4a2713aSLionel Sambuc   return (__m128d){ __u, __u };
476f4a2713aSLionel Sambuc }
477f4a2713aSLionel Sambuc 
478f4a2713aSLionel Sambuc #define        _mm_load_pd1(dp)        _mm_load1_pd(dp)
479f4a2713aSLionel Sambuc 
480f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_loadr_pd(double const * __dp)481f4a2713aSLionel Sambuc _mm_loadr_pd(double const *__dp)
482f4a2713aSLionel Sambuc {
483f4a2713aSLionel Sambuc   __m128d __u = *(__m128d*)__dp;
484f4a2713aSLionel Sambuc   return __builtin_shufflevector(__u, __u, 1, 0);
485f4a2713aSLionel Sambuc }
486f4a2713aSLionel Sambuc 
487f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_loadu_pd(double const * __dp)488f4a2713aSLionel Sambuc _mm_loadu_pd(double const *__dp)
489f4a2713aSLionel Sambuc {
490f4a2713aSLionel Sambuc   struct __loadu_pd {
491f4a2713aSLionel Sambuc     __m128d __v;
492f4a2713aSLionel Sambuc   } __attribute__((packed, may_alias));
493f4a2713aSLionel Sambuc   return ((struct __loadu_pd*)__dp)->__v;
494f4a2713aSLionel Sambuc }
495f4a2713aSLionel Sambuc 
496f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_load_sd(double const * __dp)497f4a2713aSLionel Sambuc _mm_load_sd(double const *__dp)
498f4a2713aSLionel Sambuc {
499f4a2713aSLionel Sambuc   struct __mm_load_sd_struct {
500f4a2713aSLionel Sambuc     double __u;
501f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
502f4a2713aSLionel Sambuc   double __u = ((struct __mm_load_sd_struct*)__dp)->__u;
503f4a2713aSLionel Sambuc   return (__m128d){ __u, 0 };
504f4a2713aSLionel Sambuc }
505f4a2713aSLionel Sambuc 
506f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_loadh_pd(__m128d __a,double const * __dp)507f4a2713aSLionel Sambuc _mm_loadh_pd(__m128d __a, double const *__dp)
508f4a2713aSLionel Sambuc {
509f4a2713aSLionel Sambuc   struct __mm_loadh_pd_struct {
510f4a2713aSLionel Sambuc     double __u;
511f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
512f4a2713aSLionel Sambuc   double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u;
513f4a2713aSLionel Sambuc   return (__m128d){ __a[0], __u };
514f4a2713aSLionel Sambuc }
515f4a2713aSLionel Sambuc 
516f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_loadl_pd(__m128d __a,double const * __dp)517f4a2713aSLionel Sambuc _mm_loadl_pd(__m128d __a, double const *__dp)
518f4a2713aSLionel Sambuc {
519f4a2713aSLionel Sambuc   struct __mm_loadl_pd_struct {
520f4a2713aSLionel Sambuc     double __u;
521f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
522f4a2713aSLionel Sambuc   double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u;
523f4a2713aSLionel Sambuc   return (__m128d){ __u, __a[1] };
524f4a2713aSLionel Sambuc }
525f4a2713aSLionel Sambuc 
526f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_set_sd(double __w)527f4a2713aSLionel Sambuc _mm_set_sd(double __w)
528f4a2713aSLionel Sambuc {
529f4a2713aSLionel Sambuc   return (__m128d){ __w, 0 };
530f4a2713aSLionel Sambuc }
531f4a2713aSLionel Sambuc 
532f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_set1_pd(double __w)533f4a2713aSLionel Sambuc _mm_set1_pd(double __w)
534f4a2713aSLionel Sambuc {
535f4a2713aSLionel Sambuc   return (__m128d){ __w, __w };
536f4a2713aSLionel Sambuc }
537f4a2713aSLionel Sambuc 
538f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_set_pd(double __w,double __x)539f4a2713aSLionel Sambuc _mm_set_pd(double __w, double __x)
540f4a2713aSLionel Sambuc {
541f4a2713aSLionel Sambuc   return (__m128d){ __x, __w };
542f4a2713aSLionel Sambuc }
543f4a2713aSLionel Sambuc 
544f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_setr_pd(double __w,double __x)545f4a2713aSLionel Sambuc _mm_setr_pd(double __w, double __x)
546f4a2713aSLionel Sambuc {
547f4a2713aSLionel Sambuc   return (__m128d){ __w, __x };
548f4a2713aSLionel Sambuc }
549f4a2713aSLionel Sambuc 
550f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_setzero_pd(void)551f4a2713aSLionel Sambuc _mm_setzero_pd(void)
552f4a2713aSLionel Sambuc {
553f4a2713aSLionel Sambuc   return (__m128d){ 0, 0 };
554f4a2713aSLionel Sambuc }
555f4a2713aSLionel Sambuc 
556f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_move_sd(__m128d __a,__m128d __b)557f4a2713aSLionel Sambuc _mm_move_sd(__m128d __a, __m128d __b)
558f4a2713aSLionel Sambuc {
559f4a2713aSLionel Sambuc   return (__m128d){ __b[0], __a[1] };
560f4a2713aSLionel Sambuc }
561f4a2713aSLionel Sambuc 
562f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store_sd(double * __dp,__m128d __a)563f4a2713aSLionel Sambuc _mm_store_sd(double *__dp, __m128d __a)
564f4a2713aSLionel Sambuc {
565f4a2713aSLionel Sambuc   struct __mm_store_sd_struct {
566f4a2713aSLionel Sambuc     double __u;
567f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
568f4a2713aSLionel Sambuc   ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
569f4a2713aSLionel Sambuc }
570f4a2713aSLionel Sambuc 
571f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store1_pd(double * __dp,__m128d __a)572f4a2713aSLionel Sambuc _mm_store1_pd(double *__dp, __m128d __a)
573f4a2713aSLionel Sambuc {
574f4a2713aSLionel Sambuc   struct __mm_store1_pd_struct {
575f4a2713aSLionel Sambuc     double __u[2];
576f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
577f4a2713aSLionel Sambuc   ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
578f4a2713aSLionel Sambuc   ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
579f4a2713aSLionel Sambuc }
580f4a2713aSLionel Sambuc 
581f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store_pd(double * __dp,__m128d __a)582f4a2713aSLionel Sambuc _mm_store_pd(double *__dp, __m128d __a)
583f4a2713aSLionel Sambuc {
584f4a2713aSLionel Sambuc   *(__m128d *)__dp = __a;
585f4a2713aSLionel Sambuc }
586f4a2713aSLionel Sambuc 
587f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storeu_pd(double * __dp,__m128d __a)588f4a2713aSLionel Sambuc _mm_storeu_pd(double *__dp, __m128d __a)
589f4a2713aSLionel Sambuc {
590f4a2713aSLionel Sambuc   __builtin_ia32_storeupd(__dp, __a);
591f4a2713aSLionel Sambuc }
592f4a2713aSLionel Sambuc 
593f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storer_pd(double * __dp,__m128d __a)594f4a2713aSLionel Sambuc _mm_storer_pd(double *__dp, __m128d __a)
595f4a2713aSLionel Sambuc {
596f4a2713aSLionel Sambuc   __a = __builtin_shufflevector(__a, __a, 1, 0);
597f4a2713aSLionel Sambuc   *(__m128d *)__dp = __a;
598f4a2713aSLionel Sambuc }
599f4a2713aSLionel Sambuc 
600f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storeh_pd(double * __dp,__m128d __a)601f4a2713aSLionel Sambuc _mm_storeh_pd(double *__dp, __m128d __a)
602f4a2713aSLionel Sambuc {
603f4a2713aSLionel Sambuc   struct __mm_storeh_pd_struct {
604f4a2713aSLionel Sambuc     double __u;
605f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
606f4a2713aSLionel Sambuc   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
607f4a2713aSLionel Sambuc }
608f4a2713aSLionel Sambuc 
609f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storel_pd(double * __dp,__m128d __a)610f4a2713aSLionel Sambuc _mm_storel_pd(double *__dp, __m128d __a)
611f4a2713aSLionel Sambuc {
612f4a2713aSLionel Sambuc   struct __mm_storeh_pd_struct {
613f4a2713aSLionel Sambuc     double __u;
614f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
615f4a2713aSLionel Sambuc   ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
616f4a2713aSLionel Sambuc }
617f4a2713aSLionel Sambuc 
618f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_add_epi8(__m128i __a,__m128i __b)619f4a2713aSLionel Sambuc _mm_add_epi8(__m128i __a, __m128i __b)
620f4a2713aSLionel Sambuc {
621f4a2713aSLionel Sambuc   return (__m128i)((__v16qi)__a + (__v16qi)__b);
622f4a2713aSLionel Sambuc }
623f4a2713aSLionel Sambuc 
624f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_add_epi16(__m128i __a,__m128i __b)625f4a2713aSLionel Sambuc _mm_add_epi16(__m128i __a, __m128i __b)
626f4a2713aSLionel Sambuc {
627f4a2713aSLionel Sambuc   return (__m128i)((__v8hi)__a + (__v8hi)__b);
628f4a2713aSLionel Sambuc }
629f4a2713aSLionel Sambuc 
630f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_add_epi32(__m128i __a,__m128i __b)631f4a2713aSLionel Sambuc _mm_add_epi32(__m128i __a, __m128i __b)
632f4a2713aSLionel Sambuc {
633f4a2713aSLionel Sambuc   return (__m128i)((__v4si)__a + (__v4si)__b);
634f4a2713aSLionel Sambuc }
635f4a2713aSLionel Sambuc 
636f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_add_si64(__m64 __a,__m64 __b)637f4a2713aSLionel Sambuc _mm_add_si64(__m64 __a, __m64 __b)
638f4a2713aSLionel Sambuc {
639f4a2713aSLionel Sambuc   return __a + __b;
640f4a2713aSLionel Sambuc }
641f4a2713aSLionel Sambuc 
642f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_add_epi64(__m128i __a,__m128i __b)643f4a2713aSLionel Sambuc _mm_add_epi64(__m128i __a, __m128i __b)
644f4a2713aSLionel Sambuc {
645f4a2713aSLionel Sambuc   return __a + __b;
646f4a2713aSLionel Sambuc }
647f4a2713aSLionel Sambuc 
648f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_adds_epi8(__m128i __a,__m128i __b)649f4a2713aSLionel Sambuc _mm_adds_epi8(__m128i __a, __m128i __b)
650f4a2713aSLionel Sambuc {
651f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
652f4a2713aSLionel Sambuc }
653f4a2713aSLionel Sambuc 
654f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_adds_epi16(__m128i __a,__m128i __b)655f4a2713aSLionel Sambuc _mm_adds_epi16(__m128i __a, __m128i __b)
656f4a2713aSLionel Sambuc {
657f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
658f4a2713aSLionel Sambuc }
659f4a2713aSLionel Sambuc 
660f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_adds_epu8(__m128i __a,__m128i __b)661f4a2713aSLionel Sambuc _mm_adds_epu8(__m128i __a, __m128i __b)
662f4a2713aSLionel Sambuc {
663f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
664f4a2713aSLionel Sambuc }
665f4a2713aSLionel Sambuc 
666f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_adds_epu16(__m128i __a,__m128i __b)667f4a2713aSLionel Sambuc _mm_adds_epu16(__m128i __a, __m128i __b)
668f4a2713aSLionel Sambuc {
669f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
670f4a2713aSLionel Sambuc }
671f4a2713aSLionel Sambuc 
672f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_avg_epu8(__m128i __a,__m128i __b)673f4a2713aSLionel Sambuc _mm_avg_epu8(__m128i __a, __m128i __b)
674f4a2713aSLionel Sambuc {
675f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
676f4a2713aSLionel Sambuc }
677f4a2713aSLionel Sambuc 
678f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_avg_epu16(__m128i __a,__m128i __b)679f4a2713aSLionel Sambuc _mm_avg_epu16(__m128i __a, __m128i __b)
680f4a2713aSLionel Sambuc {
681f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
682f4a2713aSLionel Sambuc }
683f4a2713aSLionel Sambuc 
684f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_madd_epi16(__m128i __a,__m128i __b)685f4a2713aSLionel Sambuc _mm_madd_epi16(__m128i __a, __m128i __b)
686f4a2713aSLionel Sambuc {
687f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
688f4a2713aSLionel Sambuc }
689f4a2713aSLionel Sambuc 
690f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_max_epi16(__m128i __a,__m128i __b)691f4a2713aSLionel Sambuc _mm_max_epi16(__m128i __a, __m128i __b)
692f4a2713aSLionel Sambuc {
693f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
694f4a2713aSLionel Sambuc }
695f4a2713aSLionel Sambuc 
696f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_max_epu8(__m128i __a,__m128i __b)697f4a2713aSLionel Sambuc _mm_max_epu8(__m128i __a, __m128i __b)
698f4a2713aSLionel Sambuc {
699f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
700f4a2713aSLionel Sambuc }
701f4a2713aSLionel Sambuc 
702f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_min_epi16(__m128i __a,__m128i __b)703f4a2713aSLionel Sambuc _mm_min_epi16(__m128i __a, __m128i __b)
704f4a2713aSLionel Sambuc {
705f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
706f4a2713aSLionel Sambuc }
707f4a2713aSLionel Sambuc 
708f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_min_epu8(__m128i __a,__m128i __b)709f4a2713aSLionel Sambuc _mm_min_epu8(__m128i __a, __m128i __b)
710f4a2713aSLionel Sambuc {
711f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
712f4a2713aSLionel Sambuc }
713f4a2713aSLionel Sambuc 
714f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mulhi_epi16(__m128i __a,__m128i __b)715f4a2713aSLionel Sambuc _mm_mulhi_epi16(__m128i __a, __m128i __b)
716f4a2713aSLionel Sambuc {
717f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
718f4a2713aSLionel Sambuc }
719f4a2713aSLionel Sambuc 
720f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mulhi_epu16(__m128i __a,__m128i __b)721f4a2713aSLionel Sambuc _mm_mulhi_epu16(__m128i __a, __m128i __b)
722f4a2713aSLionel Sambuc {
723f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
724f4a2713aSLionel Sambuc }
725f4a2713aSLionel Sambuc 
726f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mullo_epi16(__m128i __a,__m128i __b)727f4a2713aSLionel Sambuc _mm_mullo_epi16(__m128i __a, __m128i __b)
728f4a2713aSLionel Sambuc {
729f4a2713aSLionel Sambuc   return (__m128i)((__v8hi)__a * (__v8hi)__b);
730f4a2713aSLionel Sambuc }
731f4a2713aSLionel Sambuc 
732f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_mul_su32(__m64 __a,__m64 __b)733f4a2713aSLionel Sambuc _mm_mul_su32(__m64 __a, __m64 __b)
734f4a2713aSLionel Sambuc {
735f4a2713aSLionel Sambuc   return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
736f4a2713aSLionel Sambuc }
737f4a2713aSLionel Sambuc 
738f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_mul_epu32(__m128i __a,__m128i __b)739f4a2713aSLionel Sambuc _mm_mul_epu32(__m128i __a, __m128i __b)
740f4a2713aSLionel Sambuc {
741f4a2713aSLionel Sambuc   return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
742f4a2713aSLionel Sambuc }
743f4a2713aSLionel Sambuc 
744f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sad_epu8(__m128i __a,__m128i __b)745f4a2713aSLionel Sambuc _mm_sad_epu8(__m128i __a, __m128i __b)
746f4a2713aSLionel Sambuc {
747f4a2713aSLionel Sambuc   return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
748f4a2713aSLionel Sambuc }
749f4a2713aSLionel Sambuc 
750f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sub_epi8(__m128i __a,__m128i __b)751f4a2713aSLionel Sambuc _mm_sub_epi8(__m128i __a, __m128i __b)
752f4a2713aSLionel Sambuc {
753f4a2713aSLionel Sambuc   return (__m128i)((__v16qi)__a - (__v16qi)__b);
754f4a2713aSLionel Sambuc }
755f4a2713aSLionel Sambuc 
756f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sub_epi16(__m128i __a,__m128i __b)757f4a2713aSLionel Sambuc _mm_sub_epi16(__m128i __a, __m128i __b)
758f4a2713aSLionel Sambuc {
759f4a2713aSLionel Sambuc   return (__m128i)((__v8hi)__a - (__v8hi)__b);
760f4a2713aSLionel Sambuc }
761f4a2713aSLionel Sambuc 
762f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sub_epi32(__m128i __a,__m128i __b)763f4a2713aSLionel Sambuc _mm_sub_epi32(__m128i __a, __m128i __b)
764f4a2713aSLionel Sambuc {
765f4a2713aSLionel Sambuc   return (__m128i)((__v4si)__a - (__v4si)__b);
766f4a2713aSLionel Sambuc }
767f4a2713aSLionel Sambuc 
768f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_sub_si64(__m64 __a,__m64 __b)769f4a2713aSLionel Sambuc _mm_sub_si64(__m64 __a, __m64 __b)
770f4a2713aSLionel Sambuc {
771f4a2713aSLionel Sambuc   return __a - __b;
772f4a2713aSLionel Sambuc }
773f4a2713aSLionel Sambuc 
774f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sub_epi64(__m128i __a,__m128i __b)775f4a2713aSLionel Sambuc _mm_sub_epi64(__m128i __a, __m128i __b)
776f4a2713aSLionel Sambuc {
777f4a2713aSLionel Sambuc   return __a - __b;
778f4a2713aSLionel Sambuc }
779f4a2713aSLionel Sambuc 
780f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_subs_epi8(__m128i __a,__m128i __b)781f4a2713aSLionel Sambuc _mm_subs_epi8(__m128i __a, __m128i __b)
782f4a2713aSLionel Sambuc {
783f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
784f4a2713aSLionel Sambuc }
785f4a2713aSLionel Sambuc 
786f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_subs_epi16(__m128i __a,__m128i __b)787f4a2713aSLionel Sambuc _mm_subs_epi16(__m128i __a, __m128i __b)
788f4a2713aSLionel Sambuc {
789f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
790f4a2713aSLionel Sambuc }
791f4a2713aSLionel Sambuc 
792f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_subs_epu8(__m128i __a,__m128i __b)793f4a2713aSLionel Sambuc _mm_subs_epu8(__m128i __a, __m128i __b)
794f4a2713aSLionel Sambuc {
795f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
796f4a2713aSLionel Sambuc }
797f4a2713aSLionel Sambuc 
798f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_subs_epu16(__m128i __a,__m128i __b)799f4a2713aSLionel Sambuc _mm_subs_epu16(__m128i __a, __m128i __b)
800f4a2713aSLionel Sambuc {
801f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
802f4a2713aSLionel Sambuc }
803f4a2713aSLionel Sambuc 
804f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_and_si128(__m128i __a,__m128i __b)805f4a2713aSLionel Sambuc _mm_and_si128(__m128i __a, __m128i __b)
806f4a2713aSLionel Sambuc {
807f4a2713aSLionel Sambuc   return __a & __b;
808f4a2713aSLionel Sambuc }
809f4a2713aSLionel Sambuc 
810f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_andnot_si128(__m128i __a,__m128i __b)811f4a2713aSLionel Sambuc _mm_andnot_si128(__m128i __a, __m128i __b)
812f4a2713aSLionel Sambuc {
813f4a2713aSLionel Sambuc   return ~__a & __b;
814f4a2713aSLionel Sambuc }
815f4a2713aSLionel Sambuc 
816f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_or_si128(__m128i __a,__m128i __b)817f4a2713aSLionel Sambuc _mm_or_si128(__m128i __a, __m128i __b)
818f4a2713aSLionel Sambuc {
819f4a2713aSLionel Sambuc   return __a | __b;
820f4a2713aSLionel Sambuc }
821f4a2713aSLionel Sambuc 
822f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_xor_si128(__m128i __a,__m128i __b)823f4a2713aSLionel Sambuc _mm_xor_si128(__m128i __a, __m128i __b)
824f4a2713aSLionel Sambuc {
825f4a2713aSLionel Sambuc   return __a ^ __b;
826f4a2713aSLionel Sambuc }
827f4a2713aSLionel Sambuc 
828f4a2713aSLionel Sambuc #define _mm_slli_si128(a, count) __extension__ ({ \
829f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
830f4a2713aSLionel Sambuc   __m128i __a = (a); \
831f4a2713aSLionel Sambuc    _Pragma("clang diagnostic pop"); \
832f4a2713aSLionel Sambuc   (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); })
833f4a2713aSLionel Sambuc 
834f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_slli_epi16(__m128i __a,int __count)835f4a2713aSLionel Sambuc _mm_slli_epi16(__m128i __a, int __count)
836f4a2713aSLionel Sambuc {
837f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
838f4a2713aSLionel Sambuc }
839f4a2713aSLionel Sambuc 
840f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sll_epi16(__m128i __a,__m128i __count)841f4a2713aSLionel Sambuc _mm_sll_epi16(__m128i __a, __m128i __count)
842f4a2713aSLionel Sambuc {
843f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
844f4a2713aSLionel Sambuc }
845f4a2713aSLionel Sambuc 
846f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_slli_epi32(__m128i __a,int __count)847f4a2713aSLionel Sambuc _mm_slli_epi32(__m128i __a, int __count)
848f4a2713aSLionel Sambuc {
849f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
850f4a2713aSLionel Sambuc }
851f4a2713aSLionel Sambuc 
852f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sll_epi32(__m128i __a,__m128i __count)853f4a2713aSLionel Sambuc _mm_sll_epi32(__m128i __a, __m128i __count)
854f4a2713aSLionel Sambuc {
855f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
856f4a2713aSLionel Sambuc }
857f4a2713aSLionel Sambuc 
858f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_slli_epi64(__m128i __a,int __count)859f4a2713aSLionel Sambuc _mm_slli_epi64(__m128i __a, int __count)
860f4a2713aSLionel Sambuc {
861f4a2713aSLionel Sambuc   return __builtin_ia32_psllqi128(__a, __count);
862f4a2713aSLionel Sambuc }
863f4a2713aSLionel Sambuc 
864f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sll_epi64(__m128i __a,__m128i __count)865f4a2713aSLionel Sambuc _mm_sll_epi64(__m128i __a, __m128i __count)
866f4a2713aSLionel Sambuc {
867f4a2713aSLionel Sambuc   return __builtin_ia32_psllq128(__a, __count);
868f4a2713aSLionel Sambuc }
869f4a2713aSLionel Sambuc 
870f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srai_epi16(__m128i __a,int __count)871f4a2713aSLionel Sambuc _mm_srai_epi16(__m128i __a, int __count)
872f4a2713aSLionel Sambuc {
873f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
874f4a2713aSLionel Sambuc }
875f4a2713aSLionel Sambuc 
876f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sra_epi16(__m128i __a,__m128i __count)877f4a2713aSLionel Sambuc _mm_sra_epi16(__m128i __a, __m128i __count)
878f4a2713aSLionel Sambuc {
879f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
880f4a2713aSLionel Sambuc }
881f4a2713aSLionel Sambuc 
882f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srai_epi32(__m128i __a,int __count)883f4a2713aSLionel Sambuc _mm_srai_epi32(__m128i __a, int __count)
884f4a2713aSLionel Sambuc {
885f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
886f4a2713aSLionel Sambuc }
887f4a2713aSLionel Sambuc 
888f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_sra_epi32(__m128i __a,__m128i __count)889f4a2713aSLionel Sambuc _mm_sra_epi32(__m128i __a, __m128i __count)
890f4a2713aSLionel Sambuc {
891f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
892f4a2713aSLionel Sambuc }
893f4a2713aSLionel Sambuc 
894f4a2713aSLionel Sambuc 
895f4a2713aSLionel Sambuc #define _mm_srli_si128(a, count) __extension__ ({ \
896f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
897f4a2713aSLionel Sambuc   __m128i __a = (a); \
898f4a2713aSLionel Sambuc   _Pragma("clang diagnostic pop"); \
899f4a2713aSLionel Sambuc   (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); })
900f4a2713aSLionel Sambuc 
901f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srli_epi16(__m128i __a,int __count)902f4a2713aSLionel Sambuc _mm_srli_epi16(__m128i __a, int __count)
903f4a2713aSLionel Sambuc {
904f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
905f4a2713aSLionel Sambuc }
906f4a2713aSLionel Sambuc 
907f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srl_epi16(__m128i __a,__m128i __count)908f4a2713aSLionel Sambuc _mm_srl_epi16(__m128i __a, __m128i __count)
909f4a2713aSLionel Sambuc {
910f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
911f4a2713aSLionel Sambuc }
912f4a2713aSLionel Sambuc 
913f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srli_epi32(__m128i __a,int __count)914f4a2713aSLionel Sambuc _mm_srli_epi32(__m128i __a, int __count)
915f4a2713aSLionel Sambuc {
916f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
917f4a2713aSLionel Sambuc }
918f4a2713aSLionel Sambuc 
919f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srl_epi32(__m128i __a,__m128i __count)920f4a2713aSLionel Sambuc _mm_srl_epi32(__m128i __a, __m128i __count)
921f4a2713aSLionel Sambuc {
922f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
923f4a2713aSLionel Sambuc }
924f4a2713aSLionel Sambuc 
925f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srli_epi64(__m128i __a,int __count)926f4a2713aSLionel Sambuc _mm_srli_epi64(__m128i __a, int __count)
927f4a2713aSLionel Sambuc {
928f4a2713aSLionel Sambuc   return __builtin_ia32_psrlqi128(__a, __count);
929f4a2713aSLionel Sambuc }
930f4a2713aSLionel Sambuc 
931f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_srl_epi64(__m128i __a,__m128i __count)932f4a2713aSLionel Sambuc _mm_srl_epi64(__m128i __a, __m128i __count)
933f4a2713aSLionel Sambuc {
934f4a2713aSLionel Sambuc   return __builtin_ia32_psrlq128(__a, __count);
935f4a2713aSLionel Sambuc }
936f4a2713aSLionel Sambuc 
937f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi8(__m128i __a,__m128i __b)938f4a2713aSLionel Sambuc _mm_cmpeq_epi8(__m128i __a, __m128i __b)
939f4a2713aSLionel Sambuc {
940f4a2713aSLionel Sambuc   return (__m128i)((__v16qi)__a == (__v16qi)__b);
941f4a2713aSLionel Sambuc }
942f4a2713aSLionel Sambuc 
943f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi16(__m128i __a,__m128i __b)944f4a2713aSLionel Sambuc _mm_cmpeq_epi16(__m128i __a, __m128i __b)
945f4a2713aSLionel Sambuc {
946f4a2713aSLionel Sambuc   return (__m128i)((__v8hi)__a == (__v8hi)__b);
947f4a2713aSLionel Sambuc }
948f4a2713aSLionel Sambuc 
949f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpeq_epi32(__m128i __a,__m128i __b)950f4a2713aSLionel Sambuc _mm_cmpeq_epi32(__m128i __a, __m128i __b)
951f4a2713aSLionel Sambuc {
952f4a2713aSLionel Sambuc   return (__m128i)((__v4si)__a == (__v4si)__b);
953f4a2713aSLionel Sambuc }
954f4a2713aSLionel Sambuc 
955f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi8(__m128i __a,__m128i __b)956f4a2713aSLionel Sambuc _mm_cmpgt_epi8(__m128i __a, __m128i __b)
957f4a2713aSLionel Sambuc {
958f4a2713aSLionel Sambuc   /* This function always performs a signed comparison, but __v16qi is a char
959f4a2713aSLionel Sambuc      which may be signed or unsigned. */
960f4a2713aSLionel Sambuc   typedef signed char __v16qs __attribute__((__vector_size__(16)));
961f4a2713aSLionel Sambuc   return (__m128i)((__v16qs)__a > (__v16qs)__b);
962f4a2713aSLionel Sambuc }
963f4a2713aSLionel Sambuc 
964f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi16(__m128i __a,__m128i __b)965f4a2713aSLionel Sambuc _mm_cmpgt_epi16(__m128i __a, __m128i __b)
966f4a2713aSLionel Sambuc {
967f4a2713aSLionel Sambuc   return (__m128i)((__v8hi)__a > (__v8hi)__b);
968f4a2713aSLionel Sambuc }
969f4a2713aSLionel Sambuc 
970f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmpgt_epi32(__m128i __a,__m128i __b)971f4a2713aSLionel Sambuc _mm_cmpgt_epi32(__m128i __a, __m128i __b)
972f4a2713aSLionel Sambuc {
973f4a2713aSLionel Sambuc   return (__m128i)((__v4si)__a > (__v4si)__b);
974f4a2713aSLionel Sambuc }
975f4a2713aSLionel Sambuc 
976f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi8(__m128i __a,__m128i __b)977f4a2713aSLionel Sambuc _mm_cmplt_epi8(__m128i __a, __m128i __b)
978f4a2713aSLionel Sambuc {
979f4a2713aSLionel Sambuc   return _mm_cmpgt_epi8(__b, __a);
980f4a2713aSLionel Sambuc }
981f4a2713aSLionel Sambuc 
982f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi16(__m128i __a,__m128i __b)983f4a2713aSLionel Sambuc _mm_cmplt_epi16(__m128i __a, __m128i __b)
984f4a2713aSLionel Sambuc {
985f4a2713aSLionel Sambuc   return _mm_cmpgt_epi16(__b, __a);
986f4a2713aSLionel Sambuc }
987f4a2713aSLionel Sambuc 
988f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cmplt_epi32(__m128i __a,__m128i __b)989f4a2713aSLionel Sambuc _mm_cmplt_epi32(__m128i __a, __m128i __b)
990f4a2713aSLionel Sambuc {
991f4a2713aSLionel Sambuc   return _mm_cmpgt_epi32(__b, __a);
992f4a2713aSLionel Sambuc }
993f4a2713aSLionel Sambuc 
994f4a2713aSLionel Sambuc #ifdef __x86_64__
995f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_sd(__m128d __a,long long __b)996f4a2713aSLionel Sambuc _mm_cvtsi64_sd(__m128d __a, long long __b)
997f4a2713aSLionel Sambuc {
998f4a2713aSLionel Sambuc   __a[0] = __b;
999f4a2713aSLionel Sambuc   return __a;
1000f4a2713aSLionel Sambuc }
1001f4a2713aSLionel Sambuc 
1002f4a2713aSLionel Sambuc static __inline__ long long __attribute__((__always_inline__, __nodebug__))
_mm_cvtsd_si64(__m128d __a)1003f4a2713aSLionel Sambuc _mm_cvtsd_si64(__m128d __a)
1004f4a2713aSLionel Sambuc {
1005f4a2713aSLionel Sambuc   return __builtin_ia32_cvtsd2si64(__a);
1006f4a2713aSLionel Sambuc }
1007f4a2713aSLionel Sambuc 
1008f4a2713aSLionel Sambuc static __inline__ long long __attribute__((__always_inline__, __nodebug__))
_mm_cvttsd_si64(__m128d __a)1009f4a2713aSLionel Sambuc _mm_cvttsd_si64(__m128d __a)
1010f4a2713aSLionel Sambuc {
1011f4a2713aSLionel Sambuc   return __a[0];
1012f4a2713aSLionel Sambuc }
1013f4a2713aSLionel Sambuc #endif
1014f4a2713aSLionel Sambuc 
1015f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtepi32_ps(__m128i __a)1016f4a2713aSLionel Sambuc _mm_cvtepi32_ps(__m128i __a)
1017f4a2713aSLionel Sambuc {
1018f4a2713aSLionel Sambuc   return __builtin_ia32_cvtdq2ps((__v4si)__a);
1019f4a2713aSLionel Sambuc }
1020f4a2713aSLionel Sambuc 
1021f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvtps_epi32(__m128 __a)1022f4a2713aSLionel Sambuc _mm_cvtps_epi32(__m128 __a)
1023f4a2713aSLionel Sambuc {
1024f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_cvtps2dq(__a);
1025f4a2713aSLionel Sambuc }
1026f4a2713aSLionel Sambuc 
1027f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvttps_epi32(__m128 __a)1028f4a2713aSLionel Sambuc _mm_cvttps_epi32(__m128 __a)
1029f4a2713aSLionel Sambuc {
1030f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_cvttps2dq(__a);
1031f4a2713aSLionel Sambuc }
1032f4a2713aSLionel Sambuc 
1033f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi32_si128(int __a)1034f4a2713aSLionel Sambuc _mm_cvtsi32_si128(int __a)
1035f4a2713aSLionel Sambuc {
1036f4a2713aSLionel Sambuc   return (__m128i)(__v4si){ __a, 0, 0, 0 };
1037f4a2713aSLionel Sambuc }
1038f4a2713aSLionel Sambuc 
1039f4a2713aSLionel Sambuc #ifdef __x86_64__
1040f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_si128(long long __a)1041f4a2713aSLionel Sambuc _mm_cvtsi64_si128(long long __a)
1042f4a2713aSLionel Sambuc {
1043f4a2713aSLionel Sambuc   return (__m128i){ __a, 0 };
1044f4a2713aSLionel Sambuc }
1045f4a2713aSLionel Sambuc #endif
1046f4a2713aSLionel Sambuc 
1047f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi128_si32(__m128i __a)1048f4a2713aSLionel Sambuc _mm_cvtsi128_si32(__m128i __a)
1049f4a2713aSLionel Sambuc {
1050f4a2713aSLionel Sambuc   __v4si __b = (__v4si)__a;
1051f4a2713aSLionel Sambuc   return __b[0];
1052f4a2713aSLionel Sambuc }
1053f4a2713aSLionel Sambuc 
1054f4a2713aSLionel Sambuc #ifdef __x86_64__
1055f4a2713aSLionel Sambuc static __inline__ long long __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi128_si64(__m128i __a)1056f4a2713aSLionel Sambuc _mm_cvtsi128_si64(__m128i __a)
1057f4a2713aSLionel Sambuc {
1058f4a2713aSLionel Sambuc   return __a[0];
1059f4a2713aSLionel Sambuc }
1060f4a2713aSLionel Sambuc #endif
1061f4a2713aSLionel Sambuc 
1062f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_load_si128(__m128i const * __p)1063f4a2713aSLionel Sambuc _mm_load_si128(__m128i const *__p)
1064f4a2713aSLionel Sambuc {
1065f4a2713aSLionel Sambuc   return *__p;
1066f4a2713aSLionel Sambuc }
1067f4a2713aSLionel Sambuc 
1068f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_loadu_si128(__m128i const * __p)1069f4a2713aSLionel Sambuc _mm_loadu_si128(__m128i const *__p)
1070f4a2713aSLionel Sambuc {
1071f4a2713aSLionel Sambuc   struct __loadu_si128 {
1072f4a2713aSLionel Sambuc     __m128i __v;
1073f4a2713aSLionel Sambuc   } __attribute__((packed, may_alias));
1074f4a2713aSLionel Sambuc   return ((struct __loadu_si128*)__p)->__v;
1075f4a2713aSLionel Sambuc }
1076f4a2713aSLionel Sambuc 
1077f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_loadl_epi64(__m128i const * __p)1078f4a2713aSLionel Sambuc _mm_loadl_epi64(__m128i const *__p)
1079f4a2713aSLionel Sambuc {
1080f4a2713aSLionel Sambuc   struct __mm_loadl_epi64_struct {
1081f4a2713aSLionel Sambuc     long long __u;
1082f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
1083f4a2713aSLionel Sambuc   return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0};
1084f4a2713aSLionel Sambuc }
1085f4a2713aSLionel Sambuc 
1086f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set_epi64x(long long q1,long long q0)1087f4a2713aSLionel Sambuc _mm_set_epi64x(long long q1, long long q0)
1088f4a2713aSLionel Sambuc {
1089f4a2713aSLionel Sambuc   return (__m128i){ q0, q1 };
1090f4a2713aSLionel Sambuc }
1091f4a2713aSLionel Sambuc 
1092f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set_epi64(__m64 q1,__m64 q0)1093f4a2713aSLionel Sambuc _mm_set_epi64(__m64 q1, __m64 q0)
1094f4a2713aSLionel Sambuc {
1095f4a2713aSLionel Sambuc   return (__m128i){ (long long)q0, (long long)q1 };
1096f4a2713aSLionel Sambuc }
1097f4a2713aSLionel Sambuc 
1098f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set_epi32(int i3,int i2,int i1,int i0)1099f4a2713aSLionel Sambuc _mm_set_epi32(int i3, int i2, int i1, int i0)
1100f4a2713aSLionel Sambuc {
1101f4a2713aSLionel Sambuc   return (__m128i)(__v4si){ i0, i1, i2, i3};
1102f4a2713aSLionel Sambuc }
1103f4a2713aSLionel Sambuc 
1104f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set_epi16(short w7,short w6,short w5,short w4,short w3,short w2,short w1,short w0)1105f4a2713aSLionel Sambuc _mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
1106f4a2713aSLionel Sambuc {
1107f4a2713aSLionel Sambuc   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1108f4a2713aSLionel Sambuc }
1109f4a2713aSLionel Sambuc 
1110f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set_epi8(char b15,char b14,char b13,char b12,char b11,char b10,char b9,char b8,char b7,char b6,char b5,char b4,char b3,char b2,char b1,char b0)1111f4a2713aSLionel Sambuc _mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
1112f4a2713aSLionel Sambuc {
1113f4a2713aSLionel Sambuc   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1114f4a2713aSLionel Sambuc }
1115f4a2713aSLionel Sambuc 
1116f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set1_epi64x(long long __q)1117f4a2713aSLionel Sambuc _mm_set1_epi64x(long long __q)
1118f4a2713aSLionel Sambuc {
1119f4a2713aSLionel Sambuc   return (__m128i){ __q, __q };
1120f4a2713aSLionel Sambuc }
1121f4a2713aSLionel Sambuc 
1122f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set1_epi64(__m64 __q)1123f4a2713aSLionel Sambuc _mm_set1_epi64(__m64 __q)
1124f4a2713aSLionel Sambuc {
1125f4a2713aSLionel Sambuc   return (__m128i){ (long long)__q, (long long)__q };
1126f4a2713aSLionel Sambuc }
1127f4a2713aSLionel Sambuc 
1128f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set1_epi32(int __i)1129f4a2713aSLionel Sambuc _mm_set1_epi32(int __i)
1130f4a2713aSLionel Sambuc {
1131f4a2713aSLionel Sambuc   return (__m128i)(__v4si){ __i, __i, __i, __i };
1132f4a2713aSLionel Sambuc }
1133f4a2713aSLionel Sambuc 
1134f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set1_epi16(short __w)1135f4a2713aSLionel Sambuc _mm_set1_epi16(short __w)
1136f4a2713aSLionel Sambuc {
1137f4a2713aSLionel Sambuc   return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w };
1138f4a2713aSLionel Sambuc }
1139f4a2713aSLionel Sambuc 
1140f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_set1_epi8(char __b)1141f4a2713aSLionel Sambuc _mm_set1_epi8(char __b)
1142f4a2713aSLionel Sambuc {
1143f4a2713aSLionel Sambuc   return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b };
1144f4a2713aSLionel Sambuc }
1145f4a2713aSLionel Sambuc 
1146f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_setr_epi64(__m64 q0,__m64 q1)1147f4a2713aSLionel Sambuc _mm_setr_epi64(__m64 q0, __m64 q1)
1148f4a2713aSLionel Sambuc {
1149f4a2713aSLionel Sambuc   return (__m128i){ (long long)q0, (long long)q1 };
1150f4a2713aSLionel Sambuc }
1151f4a2713aSLionel Sambuc 
1152f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_setr_epi32(int i0,int i1,int i2,int i3)1153f4a2713aSLionel Sambuc _mm_setr_epi32(int i0, int i1, int i2, int i3)
1154f4a2713aSLionel Sambuc {
1155f4a2713aSLionel Sambuc   return (__m128i)(__v4si){ i0, i1, i2, i3};
1156f4a2713aSLionel Sambuc }
1157f4a2713aSLionel Sambuc 
1158f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_setr_epi16(short w0,short w1,short w2,short w3,short w4,short w5,short w6,short w7)1159f4a2713aSLionel Sambuc _mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
1160f4a2713aSLionel Sambuc {
1161f4a2713aSLionel Sambuc   return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
1162f4a2713aSLionel Sambuc }
1163f4a2713aSLionel Sambuc 
1164f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_setr_epi8(char b0,char b1,char b2,char b3,char b4,char b5,char b6,char b7,char b8,char b9,char b10,char b11,char b12,char b13,char b14,char b15)1165f4a2713aSLionel Sambuc _mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
1166f4a2713aSLionel Sambuc {
1167f4a2713aSLionel Sambuc   return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
1168f4a2713aSLionel Sambuc }
1169f4a2713aSLionel Sambuc 
1170f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_setzero_si128(void)1171f4a2713aSLionel Sambuc _mm_setzero_si128(void)
1172f4a2713aSLionel Sambuc {
1173f4a2713aSLionel Sambuc   return (__m128i){ 0LL, 0LL };
1174f4a2713aSLionel Sambuc }
1175f4a2713aSLionel Sambuc 
1176f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store_si128(__m128i * __p,__m128i __b)1177f4a2713aSLionel Sambuc _mm_store_si128(__m128i *__p, __m128i __b)
1178f4a2713aSLionel Sambuc {
1179f4a2713aSLionel Sambuc   *__p = __b;
1180f4a2713aSLionel Sambuc }
1181f4a2713aSLionel Sambuc 
1182f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storeu_si128(__m128i * __p,__m128i __b)1183f4a2713aSLionel Sambuc _mm_storeu_si128(__m128i *__p, __m128i __b)
1184f4a2713aSLionel Sambuc {
1185f4a2713aSLionel Sambuc   __builtin_ia32_storedqu((char *)__p, (__v16qi)__b);
1186f4a2713aSLionel Sambuc }
1187f4a2713aSLionel Sambuc 
1188f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_maskmoveu_si128(__m128i __d,__m128i __n,char * __p)1189f4a2713aSLionel Sambuc _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
1190f4a2713aSLionel Sambuc {
1191f4a2713aSLionel Sambuc   __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
1192f4a2713aSLionel Sambuc }
1193f4a2713aSLionel Sambuc 
1194f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storel_epi64(__m128i * __p,__m128i __a)1195f4a2713aSLionel Sambuc _mm_storel_epi64(__m128i *__p, __m128i __a)
1196f4a2713aSLionel Sambuc {
1197f4a2713aSLionel Sambuc   struct __mm_storel_epi64_struct {
1198f4a2713aSLionel Sambuc     long long __u;
1199f4a2713aSLionel Sambuc   } __attribute__((__packed__, __may_alias__));
1200f4a2713aSLionel Sambuc   ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
1201f4a2713aSLionel Sambuc }
1202f4a2713aSLionel Sambuc 
1203f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_pd(double * __p,__m128d __a)1204f4a2713aSLionel Sambuc _mm_stream_pd(double *__p, __m128d __a)
1205f4a2713aSLionel Sambuc {
1206f4a2713aSLionel Sambuc   __builtin_ia32_movntpd(__p, __a);
1207f4a2713aSLionel Sambuc }
1208f4a2713aSLionel Sambuc 
1209f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_si128(__m128i * __p,__m128i __a)1210f4a2713aSLionel Sambuc _mm_stream_si128(__m128i *__p, __m128i __a)
1211f4a2713aSLionel Sambuc {
1212f4a2713aSLionel Sambuc   __builtin_ia32_movntdq(__p, __a);
1213f4a2713aSLionel Sambuc }
1214f4a2713aSLionel Sambuc 
1215f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_si32(int * __p,int __a)1216f4a2713aSLionel Sambuc _mm_stream_si32(int *__p, int __a)
1217f4a2713aSLionel Sambuc {
1218f4a2713aSLionel Sambuc   __builtin_ia32_movnti(__p, __a);
1219f4a2713aSLionel Sambuc }
1220f4a2713aSLionel Sambuc 
1221f4a2713aSLionel Sambuc #ifdef __x86_64__
1222f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_stream_si64(long long * __p,long long __a)1223f4a2713aSLionel Sambuc _mm_stream_si64(long long *__p, long long __a)
1224f4a2713aSLionel Sambuc {
1225f4a2713aSLionel Sambuc   __builtin_ia32_movnti64(__p, __a);
1226f4a2713aSLionel Sambuc }
1227f4a2713aSLionel Sambuc #endif
1228f4a2713aSLionel Sambuc 
1229f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_clflush(void const * __p)1230f4a2713aSLionel Sambuc _mm_clflush(void const *__p)
1231f4a2713aSLionel Sambuc {
1232f4a2713aSLionel Sambuc   __builtin_ia32_clflush(__p);
1233f4a2713aSLionel Sambuc }
1234f4a2713aSLionel Sambuc 
1235f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_lfence(void)1236f4a2713aSLionel Sambuc _mm_lfence(void)
1237f4a2713aSLionel Sambuc {
1238f4a2713aSLionel Sambuc   __builtin_ia32_lfence();
1239f4a2713aSLionel Sambuc }
1240f4a2713aSLionel Sambuc 
1241f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_mfence(void)1242f4a2713aSLionel Sambuc _mm_mfence(void)
1243f4a2713aSLionel Sambuc {
1244f4a2713aSLionel Sambuc   __builtin_ia32_mfence();
1245f4a2713aSLionel Sambuc }
1246f4a2713aSLionel Sambuc 
1247f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_packs_epi16(__m128i __a,__m128i __b)1248f4a2713aSLionel Sambuc _mm_packs_epi16(__m128i __a, __m128i __b)
1249f4a2713aSLionel Sambuc {
1250f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
1251f4a2713aSLionel Sambuc }
1252f4a2713aSLionel Sambuc 
1253f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_packs_epi32(__m128i __a,__m128i __b)1254f4a2713aSLionel Sambuc _mm_packs_epi32(__m128i __a, __m128i __b)
1255f4a2713aSLionel Sambuc {
1256f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
1257f4a2713aSLionel Sambuc }
1258f4a2713aSLionel Sambuc 
1259f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_packus_epi16(__m128i __a,__m128i __b)1260f4a2713aSLionel Sambuc _mm_packus_epi16(__m128i __a, __m128i __b)
1261f4a2713aSLionel Sambuc {
1262f4a2713aSLionel Sambuc   return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
1263f4a2713aSLionel Sambuc }
1264f4a2713aSLionel Sambuc 
1265f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_extract_epi16(__m128i __a,int __imm)1266f4a2713aSLionel Sambuc _mm_extract_epi16(__m128i __a, int __imm)
1267f4a2713aSLionel Sambuc {
1268f4a2713aSLionel Sambuc   __v8hi __b = (__v8hi)__a;
1269f4a2713aSLionel Sambuc   return (unsigned short)__b[__imm & 7];
1270f4a2713aSLionel Sambuc }
1271f4a2713aSLionel Sambuc 
1272f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_insert_epi16(__m128i __a,int __b,int __imm)1273f4a2713aSLionel Sambuc _mm_insert_epi16(__m128i __a, int __b, int __imm)
1274f4a2713aSLionel Sambuc {
1275f4a2713aSLionel Sambuc   __v8hi __c = (__v8hi)__a;
1276f4a2713aSLionel Sambuc   __c[__imm & 7] = __b;
1277f4a2713aSLionel Sambuc   return (__m128i)__c;
1278f4a2713aSLionel Sambuc }
1279f4a2713aSLionel Sambuc 
1280f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_movemask_epi8(__m128i __a)1281f4a2713aSLionel Sambuc _mm_movemask_epi8(__m128i __a)
1282f4a2713aSLionel Sambuc {
1283f4a2713aSLionel Sambuc   return __builtin_ia32_pmovmskb128((__v16qi)__a);
1284f4a2713aSLionel Sambuc }
1285f4a2713aSLionel Sambuc 
1286f4a2713aSLionel Sambuc #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
1287f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
1288f4a2713aSLionel Sambuc   __m128i __a = (a); \
1289f4a2713aSLionel Sambuc   _Pragma("clang diagnostic pop"); \
1290f4a2713aSLionel Sambuc   (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \
1291f4a2713aSLionel Sambuc                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
1292f4a2713aSLionel Sambuc                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
1293f4a2713aSLionel Sambuc 
1294f4a2713aSLionel Sambuc #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
1295f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
1296f4a2713aSLionel Sambuc   __m128i __a = (a); \
1297f4a2713aSLionel Sambuc   _Pragma("clang diagnostic pop"); \
1298f4a2713aSLionel Sambuc   (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1299f4a2713aSLionel Sambuc                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
1300f4a2713aSLionel Sambuc                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
1301f4a2713aSLionel Sambuc                                    4, 5, 6, 7); })
1302f4a2713aSLionel Sambuc 
1303f4a2713aSLionel Sambuc #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
1304f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
1305f4a2713aSLionel Sambuc   __m128i __a = (a); \
1306f4a2713aSLionel Sambuc   _Pragma("clang diagnostic pop"); \
1307f4a2713aSLionel Sambuc   (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \
1308f4a2713aSLionel Sambuc                                    0, 1, 2, 3, \
1309f4a2713aSLionel Sambuc                                    4 + (((imm) & 0x03) >> 0), \
1310f4a2713aSLionel Sambuc                                    4 + (((imm) & 0x0c) >> 2), \
1311f4a2713aSLionel Sambuc                                    4 + (((imm) & 0x30) >> 4), \
1312f4a2713aSLionel Sambuc                                    4 + (((imm) & 0xc0) >> 6)); })
1313f4a2713aSLionel Sambuc 
1314f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_epi8(__m128i __a,__m128i __b)1315f4a2713aSLionel Sambuc _mm_unpackhi_epi8(__m128i __a, __m128i __b)
1316f4a2713aSLionel Sambuc {
1317f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
1318f4a2713aSLionel Sambuc }
1319f4a2713aSLionel Sambuc 
1320f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_epi16(__m128i __a,__m128i __b)1321f4a2713aSLionel Sambuc _mm_unpackhi_epi16(__m128i __a, __m128i __b)
1322f4a2713aSLionel Sambuc {
1323f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
1324f4a2713aSLionel Sambuc }
1325f4a2713aSLionel Sambuc 
1326f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_epi32(__m128i __a,__m128i __b)1327f4a2713aSLionel Sambuc _mm_unpackhi_epi32(__m128i __a, __m128i __b)
1328f4a2713aSLionel Sambuc {
1329f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
1330f4a2713aSLionel Sambuc }
1331f4a2713aSLionel Sambuc 
1332f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_epi64(__m128i __a,__m128i __b)1333f4a2713aSLionel Sambuc _mm_unpackhi_epi64(__m128i __a, __m128i __b)
1334f4a2713aSLionel Sambuc {
1335f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1);
1336f4a2713aSLionel Sambuc }
1337f4a2713aSLionel Sambuc 
1338f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_epi8(__m128i __a,__m128i __b)1339f4a2713aSLionel Sambuc _mm_unpacklo_epi8(__m128i __a, __m128i __b)
1340f4a2713aSLionel Sambuc {
1341f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
1342f4a2713aSLionel Sambuc }
1343f4a2713aSLionel Sambuc 
1344f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_epi16(__m128i __a,__m128i __b)1345f4a2713aSLionel Sambuc _mm_unpacklo_epi16(__m128i __a, __m128i __b)
1346f4a2713aSLionel Sambuc {
1347f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
1348f4a2713aSLionel Sambuc }
1349f4a2713aSLionel Sambuc 
1350f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_epi32(__m128i __a,__m128i __b)1351f4a2713aSLionel Sambuc _mm_unpacklo_epi32(__m128i __a, __m128i __b)
1352f4a2713aSLionel Sambuc {
1353f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
1354f4a2713aSLionel Sambuc }
1355f4a2713aSLionel Sambuc 
1356f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_epi64(__m128i __a,__m128i __b)1357f4a2713aSLionel Sambuc _mm_unpacklo_epi64(__m128i __a, __m128i __b)
1358f4a2713aSLionel Sambuc {
1359f4a2713aSLionel Sambuc   return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0);
1360f4a2713aSLionel Sambuc }
1361f4a2713aSLionel Sambuc 
1362f4a2713aSLionel Sambuc static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
_mm_movepi64_pi64(__m128i __a)1363f4a2713aSLionel Sambuc _mm_movepi64_pi64(__m128i __a)
1364f4a2713aSLionel Sambuc {
1365f4a2713aSLionel Sambuc   return (__m64)__a[0];
1366f4a2713aSLionel Sambuc }
1367f4a2713aSLionel Sambuc 
1368f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_movpi64_epi64(__m64 __a)1369f4a2713aSLionel Sambuc _mm_movpi64_epi64(__m64 __a)
1370f4a2713aSLionel Sambuc {
1371f4a2713aSLionel Sambuc   return (__m128i){ (long long)__a, 0 };
1372f4a2713aSLionel Sambuc }
1373f4a2713aSLionel Sambuc 
1374f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_move_epi64(__m128i __a)1375f4a2713aSLionel Sambuc _mm_move_epi64(__m128i __a)
1376f4a2713aSLionel Sambuc {
1377f4a2713aSLionel Sambuc   return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2);
1378f4a2713aSLionel Sambuc }
1379f4a2713aSLionel Sambuc 
1380f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_unpackhi_pd(__m128d __a,__m128d __b)1381f4a2713aSLionel Sambuc _mm_unpackhi_pd(__m128d __a, __m128d __b)
1382f4a2713aSLionel Sambuc {
1383f4a2713aSLionel Sambuc   return __builtin_shufflevector(__a, __b, 1, 2+1);
1384f4a2713aSLionel Sambuc }
1385f4a2713aSLionel Sambuc 
1386f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_unpacklo_pd(__m128d __a,__m128d __b)1387f4a2713aSLionel Sambuc _mm_unpacklo_pd(__m128d __a, __m128d __b)
1388f4a2713aSLionel Sambuc {
1389f4a2713aSLionel Sambuc   return __builtin_shufflevector(__a, __b, 0, 2+0);
1390f4a2713aSLionel Sambuc }
1391f4a2713aSLionel Sambuc 
1392f4a2713aSLionel Sambuc static __inline__ int __attribute__((__always_inline__, __nodebug__))
_mm_movemask_pd(__m128d __a)1393f4a2713aSLionel Sambuc _mm_movemask_pd(__m128d __a)
1394f4a2713aSLionel Sambuc {
1395f4a2713aSLionel Sambuc   return __builtin_ia32_movmskpd(__a);
1396f4a2713aSLionel Sambuc }
1397f4a2713aSLionel Sambuc 
1398f4a2713aSLionel Sambuc #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
1399f4a2713aSLionel Sambuc   _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \
1400f4a2713aSLionel Sambuc   __m128d __a = (a); \
1401f4a2713aSLionel Sambuc   __m128d __b = (b); \
1402f4a2713aSLionel Sambuc   _Pragma("clang diagnostic pop"); \
1403f4a2713aSLionel Sambuc   __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); })
1404f4a2713aSLionel Sambuc 
1405f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_castpd_ps(__m128d __a)1406f4a2713aSLionel Sambuc _mm_castpd_ps(__m128d __a)
1407f4a2713aSLionel Sambuc {
1408f4a2713aSLionel Sambuc   return (__m128)__a;
1409f4a2713aSLionel Sambuc }
1410f4a2713aSLionel Sambuc 
1411f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_castpd_si128(__m128d __a)1412f4a2713aSLionel Sambuc _mm_castpd_si128(__m128d __a)
1413f4a2713aSLionel Sambuc {
1414f4a2713aSLionel Sambuc   return (__m128i)__a;
1415f4a2713aSLionel Sambuc }
1416f4a2713aSLionel Sambuc 
1417f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_castps_pd(__m128 __a)1418f4a2713aSLionel Sambuc _mm_castps_pd(__m128 __a)
1419f4a2713aSLionel Sambuc {
1420f4a2713aSLionel Sambuc   return (__m128d)__a;
1421f4a2713aSLionel Sambuc }
1422f4a2713aSLionel Sambuc 
1423f4a2713aSLionel Sambuc static __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
_mm_castps_si128(__m128 __a)1424f4a2713aSLionel Sambuc _mm_castps_si128(__m128 __a)
1425f4a2713aSLionel Sambuc {
1426f4a2713aSLionel Sambuc   return (__m128i)__a;
1427f4a2713aSLionel Sambuc }
1428f4a2713aSLionel Sambuc 
1429f4a2713aSLionel Sambuc static __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
_mm_castsi128_ps(__m128i __a)1430f4a2713aSLionel Sambuc _mm_castsi128_ps(__m128i __a)
1431f4a2713aSLionel Sambuc {
1432f4a2713aSLionel Sambuc   return (__m128)__a;
1433f4a2713aSLionel Sambuc }
1434f4a2713aSLionel Sambuc 
1435f4a2713aSLionel Sambuc static __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
_mm_castsi128_pd(__m128i __a)1436f4a2713aSLionel Sambuc _mm_castsi128_pd(__m128i __a)
1437f4a2713aSLionel Sambuc {
1438f4a2713aSLionel Sambuc   return (__m128d)__a;
1439f4a2713aSLionel Sambuc }
1440f4a2713aSLionel Sambuc 
1441f4a2713aSLionel Sambuc static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_pause(void)1442f4a2713aSLionel Sambuc _mm_pause(void)
1443f4a2713aSLionel Sambuc {
1444f4a2713aSLionel Sambuc   __asm__ volatile ("pause");
1445f4a2713aSLionel Sambuc }
1446f4a2713aSLionel Sambuc 
1447f4a2713aSLionel Sambuc #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
1448f4a2713aSLionel Sambuc 
1449f4a2713aSLionel Sambuc #endif /* __SSE2__ */
1450f4a2713aSLionel Sambuc 
1451f4a2713aSLionel Sambuc #endif /* __EMMINTRIN_H */
1452