1 /* Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _X86INTRIN_H_INCLUDED
25 # error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26 #endif
27 
28 #ifndef _XOPMMINTRIN_H_INCLUDED
29 #define _XOPMMINTRIN_H_INCLUDED
30 
31 #ifndef __XOP__
32 # error "XOP instruction set not enabled"
33 #else
34 
35 #include <fma4intrin.h>
36 
37 /* Integer multiply/add intructions. */
38 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
40 {
41   return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
42 }
43 
44 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
45 _mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
46 {
47   return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
48 }
49 
50 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51 _mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
52 {
53   return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
54 }
55 
56 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 _mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
58 {
59   return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
60 }
61 
62 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63 _mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
64 {
65   return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
66 }
67 
68 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
69 _mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
70 {
71   return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
72 }
73 
74 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
75 _mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
76 {
77   return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
78 }
79 
80 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
81 _mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
82 {
83   return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
84 }
85 
86 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
87 _mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
88 {
89   return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
90 }
91 
92 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
93 _mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
94 {
95   return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
96 }
97 
98 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 _mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
100 {
101   return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
102 }
103 
104 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
105 _mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
106 {
107   return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
108 }
109 
110 /* Packed Integer Horizontal Add and Subtract */
111 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112 _mm_haddw_epi8(__m128i __A)
113 {
114   return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
115 }
116 
117 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
118 _mm_haddd_epi8(__m128i __A)
119 {
120   return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
121 }
122 
123 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
124 _mm_haddq_epi8(__m128i __A)
125 {
126   return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
127 }
128 
129 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
130 _mm_haddd_epi16(__m128i __A)
131 {
132   return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
133 }
134 
135 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
136 _mm_haddq_epi16(__m128i __A)
137 {
138   return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
139 }
140 
141 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
142 _mm_haddq_epi32(__m128i __A)
143 {
144   return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
145 }
146 
147 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
148 _mm_haddw_epu8(__m128i __A)
149 {
150   return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
151 }
152 
153 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
154 _mm_haddd_epu8(__m128i __A)
155 {
156   return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
157 }
158 
159 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
160 _mm_haddq_epu8(__m128i __A)
161 {
162   return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
163 }
164 
165 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
166 _mm_haddd_epu16(__m128i __A)
167 {
168   return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
169 }
170 
171 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
172 _mm_haddq_epu16(__m128i __A)
173 {
174   return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
175 }
176 
177 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
178 _mm_haddq_epu32(__m128i __A)
179 {
180   return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
181 }
182 
183 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
184 _mm_hsubw_epi8(__m128i __A)
185 {
186   return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
187 }
188 
189 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
190 _mm_hsubd_epi16(__m128i __A)
191 {
192   return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
193 }
194 
195 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
196 _mm_hsubq_epi32(__m128i __A)
197 {
198   return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
199 }
200 
201 /* Vector conditional move and permute */
202 
203 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
204 _mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
205 {
206   return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
207 }
208 
209 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
211 {
212   return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
213 }
214 
215 /* Packed Integer Rotates and Shifts
216    Rotates - Non-Immediate form */
217 
218 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
219 _mm_rot_epi8(__m128i __A,  __m128i __B)
220 {
221   return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
222 }
223 
224 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
225 _mm_rot_epi16(__m128i __A,  __m128i __B)
226 {
227   return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
228 }
229 
230 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 _mm_rot_epi32(__m128i __A,  __m128i __B)
232 {
233   return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
234 }
235 
236 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
237 _mm_rot_epi64(__m128i __A,  __m128i __B)
238 {
239   return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
240 }
241 
242 /* Rotates - Immediate form */
243 
244 #ifdef __OPTIMIZE__
245 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
246 _mm_roti_epi8(__m128i __A, const int __B)
247 {
248   return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
249 }
250 
251 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
252 _mm_roti_epi16(__m128i __A, const int __B)
253 {
254   return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
255 }
256 
257 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
258 _mm_roti_epi32(__m128i __A, const int __B)
259 {
260   return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
261 }
262 
263 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264 _mm_roti_epi64(__m128i __A, const int __B)
265 {
266   return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
267 }
268 #else
269 #define _mm_roti_epi8(A, N) \
270   ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
271 #define _mm_roti_epi16(A, N) \
272   ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
273 #define _mm_roti_epi32(A, N) \
274   ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
275 #define _mm_roti_epi64(A, N) \
276   ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
277 #endif
278 
279 /* Shifts */
280 
281 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282 _mm_shl_epi8(__m128i __A,  __m128i __B)
283 {
284   return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
285 }
286 
287 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
288 _mm_shl_epi16(__m128i __A,  __m128i __B)
289 {
290   return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
291 }
292 
293 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294 _mm_shl_epi32(__m128i __A,  __m128i __B)
295 {
296   return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
297 }
298 
299 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_shl_epi64(__m128i __A,  __m128i __B)
301 {
302   return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
303 }
304 
305 
306 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_sha_epi8(__m128i __A,  __m128i __B)
308 {
309   return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
310 }
311 
312 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
313 _mm_sha_epi16(__m128i __A,  __m128i __B)
314 {
315   return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
316 }
317 
318 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
319 _mm_sha_epi32(__m128i __A,  __m128i __B)
320 {
321   return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
322 }
323 
324 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
325 _mm_sha_epi64(__m128i __A,  __m128i __B)
326 {
327   return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
328 }
329 
330 /* Compare and Predicate Generation
331    pcom (integer, unsinged bytes) */
332 
333 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
334 _mm_comlt_epu8(__m128i __A, __m128i __B)
335 {
336   return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
337 }
338 
339 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
340 _mm_comle_epu8(__m128i __A, __m128i __B)
341 {
342   return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
343 }
344 
345 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
346 _mm_comgt_epu8(__m128i __A, __m128i __B)
347 {
348   return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
349 }
350 
351 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
352 _mm_comge_epu8(__m128i __A, __m128i __B)
353 {
354   return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
355 }
356 
357 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
358 _mm_comeq_epu8(__m128i __A, __m128i __B)
359 {
360   return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
361 }
362 
363 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
364 _mm_comneq_epu8(__m128i __A, __m128i __B)
365 {
366   return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
367 }
368 
369 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 _mm_comfalse_epu8(__m128i __A, __m128i __B)
371 {
372   return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
373 }
374 
375 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
376 _mm_comtrue_epu8(__m128i __A, __m128i __B)
377 {
378   return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
379 }
380 
381 /*pcom (integer, unsinged words) */
382 
383 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
384 _mm_comlt_epu16(__m128i __A, __m128i __B)
385 {
386   return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
387 }
388 
389 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
390 _mm_comle_epu16(__m128i __A, __m128i __B)
391 {
392   return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
393 }
394 
395 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
396 _mm_comgt_epu16(__m128i __A, __m128i __B)
397 {
398   return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
399 }
400 
401 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
402 _mm_comge_epu16(__m128i __A, __m128i __B)
403 {
404   return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
405 }
406 
407 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
408 _mm_comeq_epu16(__m128i __A, __m128i __B)
409 {
410   return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
411 }
412 
413 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
414 _mm_comneq_epu16(__m128i __A, __m128i __B)
415 {
416   return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
417 }
418 
419 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
420 _mm_comfalse_epu16(__m128i __A, __m128i __B)
421 {
422   return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
423 }
424 
425 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
426 _mm_comtrue_epu16(__m128i __A, __m128i __B)
427 {
428   return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
429 }
430 
431 /*pcom (integer, unsinged double words) */
432 
433 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 _mm_comlt_epu32(__m128i __A, __m128i __B)
435 {
436   return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
437 }
438 
439 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
440 _mm_comle_epu32(__m128i __A, __m128i __B)
441 {
442   return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
443 }
444 
445 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
446 _mm_comgt_epu32(__m128i __A, __m128i __B)
447 {
448   return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
449 }
450 
451 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
452 _mm_comge_epu32(__m128i __A, __m128i __B)
453 {
454   return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
455 }
456 
457 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
458 _mm_comeq_epu32(__m128i __A, __m128i __B)
459 {
460   return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
461 }
462 
463 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
464 _mm_comneq_epu32(__m128i __A, __m128i __B)
465 {
466   return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
467 }
468 
469 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
470 _mm_comfalse_epu32(__m128i __A, __m128i __B)
471 {
472   return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
473 }
474 
475 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
476 _mm_comtrue_epu32(__m128i __A, __m128i __B)
477 {
478   return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
479 }
480 
481 /*pcom (integer, unsinged quad words) */
482 
483 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
484 _mm_comlt_epu64(__m128i __A, __m128i __B)
485 {
486   return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
487 }
488 
489 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
490 _mm_comle_epu64(__m128i __A, __m128i __B)
491 {
492   return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
493 }
494 
495 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
496 _mm_comgt_epu64(__m128i __A, __m128i __B)
497 {
498   return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
499 }
500 
501 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
502 _mm_comge_epu64(__m128i __A, __m128i __B)
503 {
504   return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
505 }
506 
507 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
508 _mm_comeq_epu64(__m128i __A, __m128i __B)
509 {
510   return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
511 }
512 
513 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
514 _mm_comneq_epu64(__m128i __A, __m128i __B)
515 {
516   return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
517 }
518 
519 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
520 _mm_comfalse_epu64(__m128i __A, __m128i __B)
521 {
522   return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
523 }
524 
525 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 _mm_comtrue_epu64(__m128i __A, __m128i __B)
527 {
528   return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
529 }
530 
531 /*pcom (integer, signed bytes) */
532 
533 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534 _mm_comlt_epi8(__m128i __A, __m128i __B)
535 {
536   return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
537 }
538 
539 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
540 _mm_comle_epi8(__m128i __A, __m128i __B)
541 {
542   return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
543 }
544 
545 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
546 _mm_comgt_epi8(__m128i __A, __m128i __B)
547 {
548   return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
549 }
550 
551 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
552 _mm_comge_epi8(__m128i __A, __m128i __B)
553 {
554   return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
555 }
556 
557 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558 _mm_comeq_epi8(__m128i __A, __m128i __B)
559 {
560   return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
561 }
562 
563 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
564 _mm_comneq_epi8(__m128i __A, __m128i __B)
565 {
566   return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
567 }
568 
569 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
570 _mm_comfalse_epi8(__m128i __A, __m128i __B)
571 {
572   return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
573 }
574 
575 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
576 _mm_comtrue_epi8(__m128i __A, __m128i __B)
577 {
578   return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
579 }
580 
581 /*pcom (integer, signed words) */
582 
583 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584 _mm_comlt_epi16(__m128i __A, __m128i __B)
585 {
586   return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
587 }
588 
589 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
590 _mm_comle_epi16(__m128i __A, __m128i __B)
591 {
592   return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
593 }
594 
595 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
596 _mm_comgt_epi16(__m128i __A, __m128i __B)
597 {
598   return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
599 }
600 
601 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
602 _mm_comge_epi16(__m128i __A, __m128i __B)
603 {
604   return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
605 }
606 
607 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
608 _mm_comeq_epi16(__m128i __A, __m128i __B)
609 {
610   return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
611 }
612 
613 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
614 _mm_comneq_epi16(__m128i __A, __m128i __B)
615 {
616   return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
617 }
618 
619 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
620 _mm_comfalse_epi16(__m128i __A, __m128i __B)
621 {
622   return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
623 }
624 
625 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
626 _mm_comtrue_epi16(__m128i __A, __m128i __B)
627 {
628   return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
629 }
630 
631 /*pcom (integer, signed double words) */
632 
633 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
634 _mm_comlt_epi32(__m128i __A, __m128i __B)
635 {
636   return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
637 }
638 
639 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
640 _mm_comle_epi32(__m128i __A, __m128i __B)
641 {
642   return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
643 }
644 
645 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646 _mm_comgt_epi32(__m128i __A, __m128i __B)
647 {
648   return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
649 }
650 
651 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
652 _mm_comge_epi32(__m128i __A, __m128i __B)
653 {
654   return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
655 }
656 
657 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
658 _mm_comeq_epi32(__m128i __A, __m128i __B)
659 {
660   return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
661 }
662 
663 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
664 _mm_comneq_epi32(__m128i __A, __m128i __B)
665 {
666   return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
667 }
668 
669 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
670 _mm_comfalse_epi32(__m128i __A, __m128i __B)
671 {
672   return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
673 }
674 
675 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676 _mm_comtrue_epi32(__m128i __A, __m128i __B)
677 {
678   return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
679 }
680 
681 /*pcom (integer, signed quad words) */
682 
683 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
684 _mm_comlt_epi64(__m128i __A, __m128i __B)
685 {
686   return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
687 }
688 
689 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
690 _mm_comle_epi64(__m128i __A, __m128i __B)
691 {
692   return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
693 }
694 
695 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
696 _mm_comgt_epi64(__m128i __A, __m128i __B)
697 {
698   return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
699 }
700 
701 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
702 _mm_comge_epi64(__m128i __A, __m128i __B)
703 {
704   return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
705 }
706 
707 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
708 _mm_comeq_epi64(__m128i __A, __m128i __B)
709 {
710   return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
711 }
712 
713 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
714 _mm_comneq_epi64(__m128i __A, __m128i __B)
715 {
716   return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
717 }
718 
719 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
720 _mm_comfalse_epi64(__m128i __A, __m128i __B)
721 {
722   return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
723 }
724 
725 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
726 _mm_comtrue_epi64(__m128i __A, __m128i __B)
727 {
728   return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
729 }
730 
731 /* FRCZ */
732 
733 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734 _mm_frcz_ps (__m128 __A)
735 {
736   return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
737 }
738 
739 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
740 _mm_frcz_pd (__m128d __A)
741 {
742   return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
743 }
744 
745 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
746 _mm_frcz_ss (__m128 __A, __m128 __B)
747 {
748   return (__m128) __builtin_ia32_vfrczss ((__v4sf)__A, (__v4sf)__B);
749 }
750 
751 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
752 _mm_frcz_sd (__m128d __A, __m128d __B)
753 {
754   return (__m128d) __builtin_ia32_vfrczsd ((__v2df)__A, (__v2df)__B);
755 }
756 
757 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
758 _mm256_frcz_ps (__m256 __A)
759 {
760   return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
761 }
762 
763 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
764 _mm256_frcz_pd (__m256d __A)
765 {
766   return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
767 }
768 
769 /* PERMIL2 */
770 
771 #ifdef __OPTIMIZE__
772 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
773 _mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
774 {
775   return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
776 					      (__v2df)__Y,
777 					      (__v2di)__C,
778 					      __I);
779 }
780 
781 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
782 _mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
783 {
784   return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
785 						 (__v4df)__Y,
786 						 (__v4di)__C,
787 						 __I);
788 }
789 
790 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
791 _mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
792 {
793   return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
794 					     (__v4sf)__Y,
795 					     (__v4si)__C,
796 					     __I);
797 }
798 
799 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
800 _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
801 {
802   return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
803 						(__v8sf)__Y,
804 						(__v8si)__C,
805 						__I);
806 }
807 #else
808 #define _mm_permute2_pd(X, Y, C, I)					\
809   ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
810 					(__v2df)(__m128d)(Y),		\
811 					(__v2di)(__m128d)(C),		\
812 					(int)(I)))
813 
814 #define _mm256_permute2_pd(X, Y, C, I)					\
815   ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
816 					   (__v4df)(__m256d)(Y),	\
817 					   (__v4di)(__m256d)(C),	\
818 					   (int)(I)))
819 
820 #define _mm_permute2_ps(X, Y, C, I)					\
821   ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
822 				       (__v4sf)(__m128)(Y),		\
823 				       (__v4si)(__m128)(C),		\
824 				       (int)(I)))
825 
826 #define _mm256_permute2_ps(X, Y, C, I)					\
827   ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
828 					  (__v8sf)(__m256)(Y),  	\
829 					  (__v8si)(__m256)(C),		\
830  					  (int)(I)))
831 #endif /* __OPTIMIZE__ */
832 
833 #endif /* __XOP__ */
834 
835 #endif /* _XOPMMINTRIN_H_INCLUDED */
836