1 /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11 #endif
12 
13 #ifndef __AVX512FINTRIN_H
14 #define __AVX512FINTRIN_H
15 
16 typedef char __v64qi __attribute__((__vector_size__(64)));
17 typedef short __v32hi __attribute__((__vector_size__(64)));
18 typedef double __v8df __attribute__((__vector_size__(64)));
19 typedef float __v16sf __attribute__((__vector_size__(64)));
20 typedef long long __v8di __attribute__((__vector_size__(64)));
21 typedef int __v16si __attribute__((__vector_size__(64)));
22 
23 /* Unsigned types */
24 typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25 typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28 
29 /* We need an explicitly signed variant for char. Note that this shouldn't
30  * appear in the interface though. */
31 typedef signed char __v64qs __attribute__((__vector_size__(64)));
32 
33 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
34 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
35 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
36 
37 typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
38 typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
39 typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
40 
41 typedef unsigned char __mmask8;
42 typedef unsigned short __mmask16;
43 
44 /* Rounding mode macros.  */
45 #define _MM_FROUND_TO_NEAREST_INT   0x00
46 #define _MM_FROUND_TO_NEG_INF       0x01
47 #define _MM_FROUND_TO_POS_INF       0x02
48 #define _MM_FROUND_TO_ZERO          0x03
49 #define _MM_FROUND_CUR_DIRECTION    0x04
50 
51 /* Constants for integer comparison predicates */
52 typedef enum {
53     _MM_CMPINT_EQ,      /* Equal */
54     _MM_CMPINT_LT,      /* Less than */
55     _MM_CMPINT_LE,      /* Less than or Equal */
56     _MM_CMPINT_UNUSED,
57     _MM_CMPINT_NE,      /* Not Equal */
58     _MM_CMPINT_NLT,     /* Not Less than */
59 #define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
60     _MM_CMPINT_NLE      /* Not Less than or Equal */
61 #define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
62 } _MM_CMPINT_ENUM;
63 
64 typedef enum
65 {
66   _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
67   _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
68   _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
69   _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
70   _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
71   _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
72   _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
73   _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
74   _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
75   _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
76   _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
77   _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
78   _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
79   _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
80   _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
81   _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
82   _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
83   _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
84   _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
85   _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
86   _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
87   _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
88   _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
89   _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
90   _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
91   _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
92   _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
93   _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
94   _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
95   _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
96   _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
97   _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
98   _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
99   _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
100   _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
101   _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
102   _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
103   _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
104   _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
105   _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
106   _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
107   _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
108   _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
109   _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
110   _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
111   _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
112   _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
113   _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
114   _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
115   _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
116   _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
117   _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
118   _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
119   _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
120   _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
121   _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
122   _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
123   _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
124   _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
125   _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
126   _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
127   _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
128   _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
129   _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
130   _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
131   _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
132   _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
133   _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
134   _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
135   _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
136   _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
137   _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
138   _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
139   _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
140   _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
141   _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
142   _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
143   _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
144   _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
145   _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
146   _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
147   _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
148   _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
149   _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
150   _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
151   _MM_PERM_DDDD = 0xFF
152 } _MM_PERM_ENUM;
153 
154 typedef enum
155 {
156   _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
157   _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
158   _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
159   _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
160 } _MM_MANTISSA_NORM_ENUM;
161 
162 typedef enum
163 {
164   _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
165   _MM_MANT_SIGN_zero,   /* sign = 0             */
166   _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
167 } _MM_MANTISSA_SIGN_ENUM;
168 
169 /* Define the default attributes for the functions in this file. */
170 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
171 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
172 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
173 
174 /* Create vectors with repeated elements */
175 
176 static  __inline __m512i __DEFAULT_FN_ATTRS512
177 _mm512_setzero_si512(void)
178 {
179   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
180 }
181 
182 #define _mm512_setzero_epi32 _mm512_setzero_si512
183 
184 static __inline__ __m512d __DEFAULT_FN_ATTRS512
185 _mm512_undefined_pd(void)
186 {
187   return (__m512d)__builtin_ia32_undef512();
188 }
189 
190 static __inline__ __m512 __DEFAULT_FN_ATTRS512
191 _mm512_undefined(void)
192 {
193   return (__m512)__builtin_ia32_undef512();
194 }
195 
196 static __inline__ __m512 __DEFAULT_FN_ATTRS512
197 _mm512_undefined_ps(void)
198 {
199   return (__m512)__builtin_ia32_undef512();
200 }
201 
202 static __inline__ __m512i __DEFAULT_FN_ATTRS512
203 _mm512_undefined_epi32(void)
204 {
205   return (__m512i)__builtin_ia32_undef512();
206 }
207 
208 static __inline__ __m512i __DEFAULT_FN_ATTRS512
209 _mm512_broadcastd_epi32 (__m128i __A)
210 {
211   return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
212                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
213 }
214 
215 static __inline__ __m512i __DEFAULT_FN_ATTRS512
216 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
217 {
218   return (__m512i)__builtin_ia32_selectd_512(__M,
219                                              (__v16si) _mm512_broadcastd_epi32(__A),
220                                              (__v16si) __O);
221 }
222 
223 static __inline__ __m512i __DEFAULT_FN_ATTRS512
224 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
225 {
226   return (__m512i)__builtin_ia32_selectd_512(__M,
227                                              (__v16si) _mm512_broadcastd_epi32(__A),
228                                              (__v16si) _mm512_setzero_si512());
229 }
230 
231 static __inline__ __m512i __DEFAULT_FN_ATTRS512
232 _mm512_broadcastq_epi64 (__m128i __A)
233 {
234   return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
235                                           0, 0, 0, 0, 0, 0, 0, 0);
236 }
237 
238 static __inline__ __m512i __DEFAULT_FN_ATTRS512
239 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
240 {
241   return (__m512i)__builtin_ia32_selectq_512(__M,
242                                              (__v8di) _mm512_broadcastq_epi64(__A),
243                                              (__v8di) __O);
244 
245 }
246 
247 static __inline__ __m512i __DEFAULT_FN_ATTRS512
248 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
249 {
250   return (__m512i)__builtin_ia32_selectq_512(__M,
251                                              (__v8di) _mm512_broadcastq_epi64(__A),
252                                              (__v8di) _mm512_setzero_si512());
253 }
254 
255 
256 static __inline __m512 __DEFAULT_FN_ATTRS512
257 _mm512_setzero_ps(void)
258 {
259   return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
260                                  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f };
261 }
262 
263 #define _mm512_setzero _mm512_setzero_ps
264 
265 static  __inline __m512d __DEFAULT_FN_ATTRS512
266 _mm512_setzero_pd(void)
267 {
268   return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
269 }
270 
271 static __inline __m512 __DEFAULT_FN_ATTRS512
272 _mm512_set1_ps(float __w)
273 {
274   return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
275                                  __w, __w, __w, __w, __w, __w, __w, __w  };
276 }
277 
278 static __inline __m512d __DEFAULT_FN_ATTRS512
279 _mm512_set1_pd(double __w)
280 {
281   return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
282 }
283 
284 static __inline __m512i __DEFAULT_FN_ATTRS512
285 _mm512_set1_epi8(char __w)
286 {
287   return __extension__ (__m512i)(__v64qi){
288     __w, __w, __w, __w, __w, __w, __w, __w,
289     __w, __w, __w, __w, __w, __w, __w, __w,
290     __w, __w, __w, __w, __w, __w, __w, __w,
291     __w, __w, __w, __w, __w, __w, __w, __w,
292     __w, __w, __w, __w, __w, __w, __w, __w,
293     __w, __w, __w, __w, __w, __w, __w, __w,
294     __w, __w, __w, __w, __w, __w, __w, __w,
295     __w, __w, __w, __w, __w, __w, __w, __w  };
296 }
297 
298 static __inline __m512i __DEFAULT_FN_ATTRS512
299 _mm512_set1_epi16(short __w)
300 {
301   return __extension__ (__m512i)(__v32hi){
302     __w, __w, __w, __w, __w, __w, __w, __w,
303     __w, __w, __w, __w, __w, __w, __w, __w,
304     __w, __w, __w, __w, __w, __w, __w, __w,
305     __w, __w, __w, __w, __w, __w, __w, __w };
306 }
307 
308 static __inline __m512i __DEFAULT_FN_ATTRS512
309 _mm512_set1_epi32(int __s)
310 {
311   return __extension__ (__m512i)(__v16si){
312     __s, __s, __s, __s, __s, __s, __s, __s,
313     __s, __s, __s, __s, __s, __s, __s, __s };
314 }
315 
316 static __inline __m512i __DEFAULT_FN_ATTRS512
317 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
318 {
319   return (__m512i)__builtin_ia32_selectd_512(__M,
320                                              (__v16si)_mm512_set1_epi32(__A),
321                                              (__v16si)_mm512_setzero_si512());
322 }
323 
324 static __inline __m512i __DEFAULT_FN_ATTRS512
325 _mm512_set1_epi64(long long __d)
326 {
327   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
328 }
329 
330 static __inline __m512i __DEFAULT_FN_ATTRS512
331 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
332 {
333   return (__m512i)__builtin_ia32_selectq_512(__M,
334                                              (__v8di)_mm512_set1_epi64(__A),
335                                              (__v8di)_mm512_setzero_si512());
336 }
337 
338 static __inline__ __m512 __DEFAULT_FN_ATTRS512
339 _mm512_broadcastss_ps(__m128 __A)
340 {
341   return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
342                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
343 }
344 
345 static __inline __m512i __DEFAULT_FN_ATTRS512
346 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
347 {
348   return __extension__ (__m512i)(__v16si)
349    { __D, __C, __B, __A, __D, __C, __B, __A,
350      __D, __C, __B, __A, __D, __C, __B, __A };
351 }
352 
353 static __inline __m512i __DEFAULT_FN_ATTRS512
354 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
355        long long __D)
356 {
357   return __extension__ (__m512i) (__v8di)
358    { __D, __C, __B, __A, __D, __C, __B, __A };
359 }
360 
361 static __inline __m512d __DEFAULT_FN_ATTRS512
362 _mm512_set4_pd (double __A, double __B, double __C, double __D)
363 {
364   return __extension__ (__m512d)
365    { __D, __C, __B, __A, __D, __C, __B, __A };
366 }
367 
368 static __inline __m512 __DEFAULT_FN_ATTRS512
369 _mm512_set4_ps (float __A, float __B, float __C, float __D)
370 {
371   return __extension__ (__m512)
372    { __D, __C, __B, __A, __D, __C, __B, __A,
373      __D, __C, __B, __A, __D, __C, __B, __A };
374 }
375 
376 #define _mm512_setr4_epi32(e0,e1,e2,e3)               \
377   _mm512_set4_epi32((e3),(e2),(e1),(e0))
378 
379 #define _mm512_setr4_epi64(e0,e1,e2,e3)               \
380   _mm512_set4_epi64((e3),(e2),(e1),(e0))
381 
382 #define _mm512_setr4_pd(e0,e1,e2,e3)                \
383   _mm512_set4_pd((e3),(e2),(e1),(e0))
384 
385 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
386   _mm512_set4_ps((e3),(e2),(e1),(e0))
387 
388 static __inline__ __m512d __DEFAULT_FN_ATTRS512
389 _mm512_broadcastsd_pd(__m128d __A)
390 {
391   return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
392                                           0, 0, 0, 0, 0, 0, 0, 0);
393 }
394 
395 /* Cast between vector types */
396 
397 static __inline __m512d __DEFAULT_FN_ATTRS512
398 _mm512_castpd256_pd512(__m256d __a)
399 {
400   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
401                                  1, 2, 3, 4, 5, 6, 7);
402 }
403 
404 static __inline __m512 __DEFAULT_FN_ATTRS512
405 _mm512_castps256_ps512(__m256 __a)
406 {
407   return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
408                                  1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
409 }
410 
411 static __inline __m128d __DEFAULT_FN_ATTRS512
412 _mm512_castpd512_pd128(__m512d __a)
413 {
414   return __builtin_shufflevector(__a, __a, 0, 1);
415 }
416 
417 static __inline __m256d __DEFAULT_FN_ATTRS512
418 _mm512_castpd512_pd256 (__m512d __A)
419 {
420   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421 }
422 
423 static __inline __m128 __DEFAULT_FN_ATTRS512
424 _mm512_castps512_ps128(__m512 __a)
425 {
426   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
427 }
428 
429 static __inline __m256 __DEFAULT_FN_ATTRS512
430 _mm512_castps512_ps256 (__m512 __A)
431 {
432   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433 }
434 
435 static __inline __m512 __DEFAULT_FN_ATTRS512
436 _mm512_castpd_ps (__m512d __A)
437 {
438   return (__m512) (__A);
439 }
440 
441 static __inline __m512i __DEFAULT_FN_ATTRS512
442 _mm512_castpd_si512 (__m512d __A)
443 {
444   return (__m512i) (__A);
445 }
446 
447 static __inline__ __m512d __DEFAULT_FN_ATTRS512
448 _mm512_castpd128_pd512 (__m128d __A)
449 {
450   __m256d __B = __builtin_nondeterministic_value(__B);
451   return __builtin_shufflevector(
452       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
453       __B, 0, 1, 2, 3, 4, 5, 6, 7);
454 }
455 
456 static __inline __m512d __DEFAULT_FN_ATTRS512
457 _mm512_castps_pd (__m512 __A)
458 {
459   return (__m512d) (__A);
460 }
461 
462 static __inline __m512i __DEFAULT_FN_ATTRS512
463 _mm512_castps_si512 (__m512 __A)
464 {
465   return (__m512i) (__A);
466 }
467 
468 static __inline__ __m512 __DEFAULT_FN_ATTRS512
469 _mm512_castps128_ps512 (__m128 __A)
470 {
471   __m256 __B = __builtin_nondeterministic_value(__B);
472   return __builtin_shufflevector(
473       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7),
474       __B, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
475 }
476 
477 static __inline__ __m512i __DEFAULT_FN_ATTRS512
478 _mm512_castsi128_si512 (__m128i __A)
479 {
480   __m256i __B = __builtin_nondeterministic_value(__B);
481   return __builtin_shufflevector(
482       __builtin_shufflevector(__A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3),
483       __B, 0, 1, 2, 3, 4, 5, 6, 7);
484 }
485 
486 static __inline__ __m512i __DEFAULT_FN_ATTRS512
487 _mm512_castsi256_si512 (__m256i __A)
488 {
489    return  __builtin_shufflevector( __A, __builtin_nondeterministic_value(__A), 0, 1, 2, 3, 4, 5, 6, 7);
490 }
491 
492 static __inline __m512 __DEFAULT_FN_ATTRS512
493 _mm512_castsi512_ps (__m512i __A)
494 {
495   return (__m512) (__A);
496 }
497 
498 static __inline __m512d __DEFAULT_FN_ATTRS512
499 _mm512_castsi512_pd (__m512i __A)
500 {
501   return (__m512d) (__A);
502 }
503 
504 static __inline __m128i __DEFAULT_FN_ATTRS512
505 _mm512_castsi512_si128 (__m512i __A)
506 {
507   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
508 }
509 
510 static __inline __m256i __DEFAULT_FN_ATTRS512
511 _mm512_castsi512_si256 (__m512i __A)
512 {
513   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
514 }
515 
516 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
517 _mm512_int2mask(int __a)
518 {
519   return (__mmask16)__a;
520 }
521 
522 static __inline__ int __DEFAULT_FN_ATTRS
523 _mm512_mask2int(__mmask16 __a)
524 {
525   return (int)__a;
526 }
527 
528 /// Constructs a 512-bit floating-point vector of [8 x double] from a
529 ///    128-bit floating-point vector of [2 x double]. The lower 128 bits
530 ///    contain the value of the source vector. The upper 384 bits are set
531 ///    to zero.
532 ///
533 /// \headerfile <x86intrin.h>
534 ///
535 /// This intrinsic has no corresponding instruction.
536 ///
537 /// \param __a
538 ///    A 128-bit vector of [2 x double].
539 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
540 ///    contain the value of the parameter. The upper 384 bits are set to zero.
541 static __inline __m512d __DEFAULT_FN_ATTRS512
542 _mm512_zextpd128_pd512(__m128d __a)
543 {
544   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
545 }
546 
547 /// Constructs a 512-bit floating-point vector of [8 x double] from a
548 ///    256-bit floating-point vector of [4 x double]. The lower 256 bits
549 ///    contain the value of the source vector. The upper 256 bits are set
550 ///    to zero.
551 ///
552 /// \headerfile <x86intrin.h>
553 ///
554 /// This intrinsic has no corresponding instruction.
555 ///
556 /// \param __a
557 ///    A 256-bit vector of [4 x double].
558 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
559 ///    contain the value of the parameter. The upper 256 bits are set to zero.
560 static __inline __m512d __DEFAULT_FN_ATTRS512
561 _mm512_zextpd256_pd512(__m256d __a)
562 {
563   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
564 }
565 
566 /// Constructs a 512-bit floating-point vector of [16 x float] from a
567 ///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
568 ///    the value of the source vector. The upper 384 bits are set to zero.
569 ///
570 /// \headerfile <x86intrin.h>
571 ///
572 /// This intrinsic has no corresponding instruction.
573 ///
574 /// \param __a
575 ///    A 128-bit vector of [4 x float].
576 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
577 ///    contain the value of the parameter. The upper 384 bits are set to zero.
578 static __inline __m512 __DEFAULT_FN_ATTRS512
579 _mm512_zextps128_ps512(__m128 __a)
580 {
581   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
582 }
583 
584 /// Constructs a 512-bit floating-point vector of [16 x float] from a
585 ///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
586 ///    the value of the source vector. The upper 256 bits are set to zero.
587 ///
588 /// \headerfile <x86intrin.h>
589 ///
590 /// This intrinsic has no corresponding instruction.
591 ///
592 /// \param __a
593 ///    A 256-bit vector of [8 x float].
594 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
595 ///    contain the value of the parameter. The upper 256 bits are set to zero.
596 static __inline __m512 __DEFAULT_FN_ATTRS512
597 _mm512_zextps256_ps512(__m256 __a)
598 {
599   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
600 }
601 
602 /// Constructs a 512-bit integer vector from a 128-bit integer vector.
603 ///    The lower 128 bits contain the value of the source vector. The upper
604 ///    384 bits are set to zero.
605 ///
606 /// \headerfile <x86intrin.h>
607 ///
608 /// This intrinsic has no corresponding instruction.
609 ///
610 /// \param __a
611 ///    A 128-bit integer vector.
612 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
613 ///    the parameter. The upper 384 bits are set to zero.
614 static __inline __m512i __DEFAULT_FN_ATTRS512
615 _mm512_zextsi128_si512(__m128i __a)
616 {
617   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
618 }
619 
620 /// Constructs a 512-bit integer vector from a 256-bit integer vector.
621 ///    The lower 256 bits contain the value of the source vector. The upper
622 ///    256 bits are set to zero.
623 ///
624 /// \headerfile <x86intrin.h>
625 ///
626 /// This intrinsic has no corresponding instruction.
627 ///
628 /// \param __a
629 ///    A 256-bit integer vector.
630 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
631 ///    the parameter. The upper 256 bits are set to zero.
632 static __inline __m512i __DEFAULT_FN_ATTRS512
633 _mm512_zextsi256_si512(__m256i __a)
634 {
635   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
636 }
637 
638 /* Bitwise operators */
639 static __inline__ __m512i __DEFAULT_FN_ATTRS512
640 _mm512_and_epi32(__m512i __a, __m512i __b)
641 {
642   return (__m512i)((__v16su)__a & (__v16su)__b);
643 }
644 
645 static __inline__ __m512i __DEFAULT_FN_ATTRS512
646 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
647 {
648   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
649                 (__v16si) _mm512_and_epi32(__a, __b),
650                 (__v16si) __src);
651 }
652 
653 static __inline__ __m512i __DEFAULT_FN_ATTRS512
654 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
655 {
656   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
657                                          __k, __a, __b);
658 }
659 
660 static __inline__ __m512i __DEFAULT_FN_ATTRS512
661 _mm512_and_epi64(__m512i __a, __m512i __b)
662 {
663   return (__m512i)((__v8du)__a & (__v8du)__b);
664 }
665 
666 static __inline__ __m512i __DEFAULT_FN_ATTRS512
667 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
668 {
669     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
670                 (__v8di) _mm512_and_epi64(__a, __b),
671                 (__v8di) __src);
672 }
673 
674 static __inline__ __m512i __DEFAULT_FN_ATTRS512
675 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
676 {
677   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
678                                          __k, __a, __b);
679 }
680 
681 static __inline__ __m512i __DEFAULT_FN_ATTRS512
682 _mm512_andnot_si512 (__m512i __A, __m512i __B)
683 {
684   return (__m512i)(~(__v8du)__A & (__v8du)__B);
685 }
686 
687 static __inline__ __m512i __DEFAULT_FN_ATTRS512
688 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
689 {
690   return (__m512i)(~(__v16su)__A & (__v16su)__B);
691 }
692 
693 static __inline__ __m512i __DEFAULT_FN_ATTRS512
694 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
695 {
696   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
697                                          (__v16si)_mm512_andnot_epi32(__A, __B),
698                                          (__v16si)__W);
699 }
700 
701 static __inline__ __m512i __DEFAULT_FN_ATTRS512
702 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
703 {
704   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
705                                            __U, __A, __B);
706 }
707 
708 static __inline__ __m512i __DEFAULT_FN_ATTRS512
709 _mm512_andnot_epi64(__m512i __A, __m512i __B)
710 {
711   return (__m512i)(~(__v8du)__A & (__v8du)__B);
712 }
713 
714 static __inline__ __m512i __DEFAULT_FN_ATTRS512
715 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
716 {
717   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
718                                           (__v8di)_mm512_andnot_epi64(__A, __B),
719                                           (__v8di)__W);
720 }
721 
722 static __inline__ __m512i __DEFAULT_FN_ATTRS512
723 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
724 {
725   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
726                                            __U, __A, __B);
727 }
728 
729 static __inline__ __m512i __DEFAULT_FN_ATTRS512
730 _mm512_or_epi32(__m512i __a, __m512i __b)
731 {
732   return (__m512i)((__v16su)__a | (__v16su)__b);
733 }
734 
735 static __inline__ __m512i __DEFAULT_FN_ATTRS512
736 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
737 {
738   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
739                                              (__v16si)_mm512_or_epi32(__a, __b),
740                                              (__v16si)__src);
741 }
742 
743 static __inline__ __m512i __DEFAULT_FN_ATTRS512
744 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
745 {
746   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
747 }
748 
749 static __inline__ __m512i __DEFAULT_FN_ATTRS512
750 _mm512_or_epi64(__m512i __a, __m512i __b)
751 {
752   return (__m512i)((__v8du)__a | (__v8du)__b);
753 }
754 
755 static __inline__ __m512i __DEFAULT_FN_ATTRS512
756 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
757 {
758   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
759                                              (__v8di)_mm512_or_epi64(__a, __b),
760                                              (__v8di)__src);
761 }
762 
763 static __inline__ __m512i __DEFAULT_FN_ATTRS512
764 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
765 {
766   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
767 }
768 
769 static __inline__ __m512i __DEFAULT_FN_ATTRS512
770 _mm512_xor_epi32(__m512i __a, __m512i __b)
771 {
772   return (__m512i)((__v16su)__a ^ (__v16su)__b);
773 }
774 
775 static __inline__ __m512i __DEFAULT_FN_ATTRS512
776 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
777 {
778   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
779                                             (__v16si)_mm512_xor_epi32(__a, __b),
780                                             (__v16si)__src);
781 }
782 
783 static __inline__ __m512i __DEFAULT_FN_ATTRS512
784 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
785 {
786   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
787 }
788 
789 static __inline__ __m512i __DEFAULT_FN_ATTRS512
790 _mm512_xor_epi64(__m512i __a, __m512i __b)
791 {
792   return (__m512i)((__v8du)__a ^ (__v8du)__b);
793 }
794 
795 static __inline__ __m512i __DEFAULT_FN_ATTRS512
796 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
797 {
798   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
799                                              (__v8di)_mm512_xor_epi64(__a, __b),
800                                              (__v8di)__src);
801 }
802 
803 static __inline__ __m512i __DEFAULT_FN_ATTRS512
804 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
805 {
806   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
807 }
808 
809 static __inline__ __m512i __DEFAULT_FN_ATTRS512
810 _mm512_and_si512(__m512i __a, __m512i __b)
811 {
812   return (__m512i)((__v8du)__a & (__v8du)__b);
813 }
814 
815 static __inline__ __m512i __DEFAULT_FN_ATTRS512
816 _mm512_or_si512(__m512i __a, __m512i __b)
817 {
818   return (__m512i)((__v8du)__a | (__v8du)__b);
819 }
820 
821 static __inline__ __m512i __DEFAULT_FN_ATTRS512
822 _mm512_xor_si512(__m512i __a, __m512i __b)
823 {
824   return (__m512i)((__v8du)__a ^ (__v8du)__b);
825 }
826 
827 /* Arithmetic */
828 
829 static __inline __m512d __DEFAULT_FN_ATTRS512
830 _mm512_add_pd(__m512d __a, __m512d __b)
831 {
832   return (__m512d)((__v8df)__a + (__v8df)__b);
833 }
834 
835 static __inline __m512 __DEFAULT_FN_ATTRS512
836 _mm512_add_ps(__m512 __a, __m512 __b)
837 {
838   return (__m512)((__v16sf)__a + (__v16sf)__b);
839 }
840 
841 static __inline __m512d __DEFAULT_FN_ATTRS512
842 _mm512_mul_pd(__m512d __a, __m512d __b)
843 {
844   return (__m512d)((__v8df)__a * (__v8df)__b);
845 }
846 
847 static __inline __m512 __DEFAULT_FN_ATTRS512
848 _mm512_mul_ps(__m512 __a, __m512 __b)
849 {
850   return (__m512)((__v16sf)__a * (__v16sf)__b);
851 }
852 
853 static __inline __m512d __DEFAULT_FN_ATTRS512
854 _mm512_sub_pd(__m512d __a, __m512d __b)
855 {
856   return (__m512d)((__v8df)__a - (__v8df)__b);
857 }
858 
859 static __inline __m512 __DEFAULT_FN_ATTRS512
860 _mm512_sub_ps(__m512 __a, __m512 __b)
861 {
862   return (__m512)((__v16sf)__a - (__v16sf)__b);
863 }
864 
865 static __inline__ __m512i __DEFAULT_FN_ATTRS512
866 _mm512_add_epi64 (__m512i __A, __m512i __B)
867 {
868   return (__m512i) ((__v8du) __A + (__v8du) __B);
869 }
870 
871 static __inline__ __m512i __DEFAULT_FN_ATTRS512
872 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
873 {
874   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
875                                              (__v8di)_mm512_add_epi64(__A, __B),
876                                              (__v8di)__W);
877 }
878 
879 static __inline__ __m512i __DEFAULT_FN_ATTRS512
880 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
881 {
882   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883                                              (__v8di)_mm512_add_epi64(__A, __B),
884                                              (__v8di)_mm512_setzero_si512());
885 }
886 
887 static __inline__ __m512i __DEFAULT_FN_ATTRS512
888 _mm512_sub_epi64 (__m512i __A, __m512i __B)
889 {
890   return (__m512i) ((__v8du) __A - (__v8du) __B);
891 }
892 
893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
894 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
895 {
896   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
897                                              (__v8di)_mm512_sub_epi64(__A, __B),
898                                              (__v8di)__W);
899 }
900 
901 static __inline__ __m512i __DEFAULT_FN_ATTRS512
902 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
903 {
904   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
905                                              (__v8di)_mm512_sub_epi64(__A, __B),
906                                              (__v8di)_mm512_setzero_si512());
907 }
908 
909 static __inline__ __m512i __DEFAULT_FN_ATTRS512
910 _mm512_add_epi32 (__m512i __A, __m512i __B)
911 {
912   return (__m512i) ((__v16su) __A + (__v16su) __B);
913 }
914 
915 static __inline__ __m512i __DEFAULT_FN_ATTRS512
916 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
917 {
918   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
919                                              (__v16si)_mm512_add_epi32(__A, __B),
920                                              (__v16si)__W);
921 }
922 
923 static __inline__ __m512i __DEFAULT_FN_ATTRS512
924 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
925 {
926   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927                                              (__v16si)_mm512_add_epi32(__A, __B),
928                                              (__v16si)_mm512_setzero_si512());
929 }
930 
931 static __inline__ __m512i __DEFAULT_FN_ATTRS512
932 _mm512_sub_epi32 (__m512i __A, __m512i __B)
933 {
934   return (__m512i) ((__v16su) __A - (__v16su) __B);
935 }
936 
937 static __inline__ __m512i __DEFAULT_FN_ATTRS512
938 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
939 {
940   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
941                                              (__v16si)_mm512_sub_epi32(__A, __B),
942                                              (__v16si)__W);
943 }
944 
945 static __inline__ __m512i __DEFAULT_FN_ATTRS512
946 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
947 {
948   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
949                                              (__v16si)_mm512_sub_epi32(__A, __B),
950                                              (__v16si)_mm512_setzero_si512());
951 }
952 
953 #define _mm512_max_round_pd(A, B, R) \
954   ((__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
955                                     (__v8df)(__m512d)(B), (int)(R)))
956 
957 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
958   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
959                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
960                                    (__v8df)(W)))
961 
962 #define _mm512_maskz_max_round_pd(U, A, B, R) \
963   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
964                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
965                                    (__v8df)_mm512_setzero_pd()))
966 
967 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
968 _mm512_max_pd(__m512d __A, __m512d __B)
969 {
970   return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
971                                            _MM_FROUND_CUR_DIRECTION);
972 }
973 
974 static __inline__ __m512d __DEFAULT_FN_ATTRS512
975 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
976 {
977   return (__m512d)__builtin_ia32_selectpd_512(__U,
978                                               (__v8df)_mm512_max_pd(__A, __B),
979                                               (__v8df)__W);
980 }
981 
982 static __inline__ __m512d __DEFAULT_FN_ATTRS512
983 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
984 {
985   return (__m512d)__builtin_ia32_selectpd_512(__U,
986                                               (__v8df)_mm512_max_pd(__A, __B),
987                                               (__v8df)_mm512_setzero_pd());
988 }
989 
990 #define _mm512_max_round_ps(A, B, R) \
991   ((__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
992                                    (__v16sf)(__m512)(B), (int)(R)))
993 
994 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
995   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
996                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
997                                   (__v16sf)(W)))
998 
999 #define _mm512_maskz_max_round_ps(U, A, B, R) \
1000   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1001                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
1002                                   (__v16sf)_mm512_setzero_ps()))
1003 
1004 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1005 _mm512_max_ps(__m512 __A, __m512 __B)
1006 {
1007   return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1008                                           _MM_FROUND_CUR_DIRECTION);
1009 }
1010 
1011 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1012 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1013 {
1014   return (__m512)__builtin_ia32_selectps_512(__U,
1015                                              (__v16sf)_mm512_max_ps(__A, __B),
1016                                              (__v16sf)__W);
1017 }
1018 
1019 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1020 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1021 {
1022   return (__m512)__builtin_ia32_selectps_512(__U,
1023                                              (__v16sf)_mm512_max_ps(__A, __B),
1024                                              (__v16sf)_mm512_setzero_ps());
1025 }
1026 
1027 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1028 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1029   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1030                 (__v4sf) __B,
1031                 (__v4sf) __W,
1032                 (__mmask8) __U,
1033                 _MM_FROUND_CUR_DIRECTION);
1034 }
1035 
1036 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1037 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1038   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1039                 (__v4sf) __B,
1040                 (__v4sf)  _mm_setzero_ps (),
1041                 (__mmask8) __U,
1042                 _MM_FROUND_CUR_DIRECTION);
1043 }
1044 
1045 #define _mm_max_round_ss(A, B, R) \
1046   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1047                                            (__v4sf)(__m128)(B), \
1048                                            (__v4sf)_mm_setzero_ps(), \
1049                                            (__mmask8)-1, (int)(R)))
1050 
1051 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1052   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1053                                            (__v4sf)(__m128)(B), \
1054                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1055                                            (int)(R)))
1056 
1057 #define _mm_maskz_max_round_ss(U, A, B, R) \
1058   ((__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1059                                            (__v4sf)(__m128)(B), \
1060                                            (__v4sf)_mm_setzero_ps(), \
1061                                            (__mmask8)(U), (int)(R)))
1062 
1063 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1064 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1065   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1066                 (__v2df) __B,
1067                 (__v2df) __W,
1068                 (__mmask8) __U,
1069                 _MM_FROUND_CUR_DIRECTION);
1070 }
1071 
1072 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1073 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1074   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1075                 (__v2df) __B,
1076                 (__v2df)  _mm_setzero_pd (),
1077                 (__mmask8) __U,
1078                 _MM_FROUND_CUR_DIRECTION);
1079 }
1080 
1081 #define _mm_max_round_sd(A, B, R) \
1082   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1083                                             (__v2df)(__m128d)(B), \
1084                                             (__v2df)_mm_setzero_pd(), \
1085                                             (__mmask8)-1, (int)(R)))
1086 
1087 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1088   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1089                                             (__v2df)(__m128d)(B), \
1090                                             (__v2df)(__m128d)(W), \
1091                                             (__mmask8)(U), (int)(R)))
1092 
1093 #define _mm_maskz_max_round_sd(U, A, B, R) \
1094   ((__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1095                                             (__v2df)(__m128d)(B), \
1096                                             (__v2df)_mm_setzero_pd(), \
1097                                             (__mmask8)(U), (int)(R)))
1098 
1099 static __inline __m512i
1100 __DEFAULT_FN_ATTRS512
1101 _mm512_max_epi32(__m512i __A, __m512i __B)
1102 {
1103   return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
1104 }
1105 
1106 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1107 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1108 {
1109   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1110                                             (__v16si)_mm512_max_epi32(__A, __B),
1111                                             (__v16si)__W);
1112 }
1113 
1114 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1116 {
1117   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118                                             (__v16si)_mm512_max_epi32(__A, __B),
1119                                             (__v16si)_mm512_setzero_si512());
1120 }
1121 
1122 static __inline __m512i __DEFAULT_FN_ATTRS512
1123 _mm512_max_epu32(__m512i __A, __m512i __B)
1124 {
1125   return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
1126 }
1127 
1128 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1129 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1130 {
1131   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1132                                             (__v16si)_mm512_max_epu32(__A, __B),
1133                                             (__v16si)__W);
1134 }
1135 
1136 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1138 {
1139   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1140                                             (__v16si)_mm512_max_epu32(__A, __B),
1141                                             (__v16si)_mm512_setzero_si512());
1142 }
1143 
1144 static __inline __m512i __DEFAULT_FN_ATTRS512
1145 _mm512_max_epi64(__m512i __A, __m512i __B)
1146 {
1147   return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
1148 }
1149 
1150 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1151 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1152 {
1153   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1154                                              (__v8di)_mm512_max_epi64(__A, __B),
1155                                              (__v8di)__W);
1156 }
1157 
1158 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1160 {
1161   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162                                              (__v8di)_mm512_max_epi64(__A, __B),
1163                                              (__v8di)_mm512_setzero_si512());
1164 }
1165 
1166 static __inline __m512i __DEFAULT_FN_ATTRS512
1167 _mm512_max_epu64(__m512i __A, __m512i __B)
1168 {
1169   return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
1170 }
1171 
1172 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1173 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1174 {
1175   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1176                                              (__v8di)_mm512_max_epu64(__A, __B),
1177                                              (__v8di)__W);
1178 }
1179 
1180 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1181 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1182 {
1183   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1184                                              (__v8di)_mm512_max_epu64(__A, __B),
1185                                              (__v8di)_mm512_setzero_si512());
1186 }
1187 
1188 #define _mm512_min_round_pd(A, B, R) \
1189   ((__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1190                                     (__v8df)(__m512d)(B), (int)(R)))
1191 
1192 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1193   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1194                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1195                                    (__v8df)(W)))
1196 
1197 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1198   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1199                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1200                                    (__v8df)_mm512_setzero_pd()))
1201 
1202 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1203 _mm512_min_pd(__m512d __A, __m512d __B)
1204 {
1205   return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1206                                            _MM_FROUND_CUR_DIRECTION);
1207 }
1208 
1209 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1210 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1211 {
1212   return (__m512d)__builtin_ia32_selectpd_512(__U,
1213                                               (__v8df)_mm512_min_pd(__A, __B),
1214                                               (__v8df)__W);
1215 }
1216 
1217 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1218 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1219 {
1220   return (__m512d)__builtin_ia32_selectpd_512(__U,
1221                                               (__v8df)_mm512_min_pd(__A, __B),
1222                                               (__v8df)_mm512_setzero_pd());
1223 }
1224 
1225 #define _mm512_min_round_ps(A, B, R) \
1226   ((__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1227                                    (__v16sf)(__m512)(B), (int)(R)))
1228 
1229 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1230   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1231                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1232                                   (__v16sf)(W)))
1233 
1234 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1235   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1236                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1237                                   (__v16sf)_mm512_setzero_ps()))
1238 
1239 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1240 _mm512_min_ps(__m512 __A, __m512 __B)
1241 {
1242   return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1243                                           _MM_FROUND_CUR_DIRECTION);
1244 }
1245 
1246 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1247 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1248 {
1249   return (__m512)__builtin_ia32_selectps_512(__U,
1250                                              (__v16sf)_mm512_min_ps(__A, __B),
1251                                              (__v16sf)__W);
1252 }
1253 
1254 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1255 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1256 {
1257   return (__m512)__builtin_ia32_selectps_512(__U,
1258                                              (__v16sf)_mm512_min_ps(__A, __B),
1259                                              (__v16sf)_mm512_setzero_ps());
1260 }
1261 
1262 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1263 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1264   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1265                 (__v4sf) __B,
1266                 (__v4sf) __W,
1267                 (__mmask8) __U,
1268                 _MM_FROUND_CUR_DIRECTION);
1269 }
1270 
1271 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1272 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1273   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1274                 (__v4sf) __B,
1275                 (__v4sf)  _mm_setzero_ps (),
1276                 (__mmask8) __U,
1277                 _MM_FROUND_CUR_DIRECTION);
1278 }
1279 
1280 #define _mm_min_round_ss(A, B, R) \
1281   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1282                                            (__v4sf)(__m128)(B), \
1283                                            (__v4sf)_mm_setzero_ps(), \
1284                                            (__mmask8)-1, (int)(R)))
1285 
1286 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1287   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1288                                            (__v4sf)(__m128)(B), \
1289                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1290                                            (int)(R)))
1291 
1292 #define _mm_maskz_min_round_ss(U, A, B, R) \
1293   ((__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1294                                            (__v4sf)(__m128)(B), \
1295                                            (__v4sf)_mm_setzero_ps(), \
1296                                            (__mmask8)(U), (int)(R)))
1297 
1298 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1299 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1300   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1301                 (__v2df) __B,
1302                 (__v2df) __W,
1303                 (__mmask8) __U,
1304                 _MM_FROUND_CUR_DIRECTION);
1305 }
1306 
1307 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1308 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1309   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1310                 (__v2df) __B,
1311                 (__v2df)  _mm_setzero_pd (),
1312                 (__mmask8) __U,
1313                 _MM_FROUND_CUR_DIRECTION);
1314 }
1315 
1316 #define _mm_min_round_sd(A, B, R) \
1317   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1318                                             (__v2df)(__m128d)(B), \
1319                                             (__v2df)_mm_setzero_pd(), \
1320                                             (__mmask8)-1, (int)(R)))
1321 
1322 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1323   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1324                                             (__v2df)(__m128d)(B), \
1325                                             (__v2df)(__m128d)(W), \
1326                                             (__mmask8)(U), (int)(R)))
1327 
1328 #define _mm_maskz_min_round_sd(U, A, B, R) \
1329   ((__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1330                                             (__v2df)(__m128d)(B), \
1331                                             (__v2df)_mm_setzero_pd(), \
1332                                             (__mmask8)(U), (int)(R)))
1333 
1334 static __inline __m512i
1335 __DEFAULT_FN_ATTRS512
1336 _mm512_min_epi32(__m512i __A, __m512i __B)
1337 {
1338   return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
1339 }
1340 
1341 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1342 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1343 {
1344   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1345                                             (__v16si)_mm512_min_epi32(__A, __B),
1346                                             (__v16si)__W);
1347 }
1348 
1349 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1351 {
1352   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353                                             (__v16si)_mm512_min_epi32(__A, __B),
1354                                             (__v16si)_mm512_setzero_si512());
1355 }
1356 
1357 static __inline __m512i __DEFAULT_FN_ATTRS512
1358 _mm512_min_epu32(__m512i __A, __m512i __B)
1359 {
1360   return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
1361 }
1362 
1363 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1364 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1365 {
1366   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1367                                             (__v16si)_mm512_min_epu32(__A, __B),
1368                                             (__v16si)__W);
1369 }
1370 
1371 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1373 {
1374   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1375                                             (__v16si)_mm512_min_epu32(__A, __B),
1376                                             (__v16si)_mm512_setzero_si512());
1377 }
1378 
1379 static __inline __m512i __DEFAULT_FN_ATTRS512
1380 _mm512_min_epi64(__m512i __A, __m512i __B)
1381 {
1382   return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
1383 }
1384 
1385 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1386 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1387 {
1388   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1389                                              (__v8di)_mm512_min_epi64(__A, __B),
1390                                              (__v8di)__W);
1391 }
1392 
1393 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1395 {
1396   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397                                              (__v8di)_mm512_min_epi64(__A, __B),
1398                                              (__v8di)_mm512_setzero_si512());
1399 }
1400 
1401 static __inline __m512i __DEFAULT_FN_ATTRS512
1402 _mm512_min_epu64(__m512i __A, __m512i __B)
1403 {
1404   return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
1405 }
1406 
1407 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1408 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1409 {
1410   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1411                                              (__v8di)_mm512_min_epu64(__A, __B),
1412                                              (__v8di)__W);
1413 }
1414 
1415 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1416 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1417 {
1418   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419                                              (__v8di)_mm512_min_epu64(__A, __B),
1420                                              (__v8di)_mm512_setzero_si512());
1421 }
1422 
1423 static __inline __m512i __DEFAULT_FN_ATTRS512
1424 _mm512_mul_epi32(__m512i __X, __m512i __Y)
1425 {
1426   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1427 }
1428 
1429 static __inline __m512i __DEFAULT_FN_ATTRS512
1430 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1431 {
1432   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1433                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1434                                              (__v8di)__W);
1435 }
1436 
1437 static __inline __m512i __DEFAULT_FN_ATTRS512
1438 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1439 {
1440   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441                                              (__v8di)_mm512_mul_epi32(__X, __Y),
1442                                              (__v8di)_mm512_setzero_si512 ());
1443 }
1444 
1445 static __inline __m512i __DEFAULT_FN_ATTRS512
1446 _mm512_mul_epu32(__m512i __X, __m512i __Y)
1447 {
1448   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1449 }
1450 
1451 static __inline __m512i __DEFAULT_FN_ATTRS512
1452 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1453 {
1454   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1455                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1456                                              (__v8di)__W);
1457 }
1458 
1459 static __inline __m512i __DEFAULT_FN_ATTRS512
1460 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1461 {
1462   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1463                                              (__v8di)_mm512_mul_epu32(__X, __Y),
1464                                              (__v8di)_mm512_setzero_si512 ());
1465 }
1466 
1467 static __inline __m512i __DEFAULT_FN_ATTRS512
1468 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
1469 {
1470   return (__m512i) ((__v16su) __A * (__v16su) __B);
1471 }
1472 
1473 static __inline __m512i __DEFAULT_FN_ATTRS512
1474 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1475 {
1476   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1477                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1478                                              (__v16si)_mm512_setzero_si512());
1479 }
1480 
1481 static __inline __m512i __DEFAULT_FN_ATTRS512
1482 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1483 {
1484   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1485                                              (__v16si)_mm512_mullo_epi32(__A, __B),
1486                                              (__v16si)__W);
1487 }
1488 
1489 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1490 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1491   return (__m512i) ((__v8du) __A * (__v8du) __B);
1492 }
1493 
1494 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1495 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1496   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1497                                              (__v8di)_mm512_mullox_epi64(__A, __B),
1498                                              (__v8di)__W);
1499 }
1500 
1501 #define _mm512_sqrt_round_pd(A, R) \
1502   ((__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R)))
1503 
1504 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1505   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1506                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1507                                        (__v8df)(__m512d)(W)))
1508 
1509 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1510   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1511                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1512                                        (__v8df)_mm512_setzero_pd()))
1513 
1514 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1515 _mm512_sqrt_pd(__m512d __A)
1516 {
1517   return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1518                                            _MM_FROUND_CUR_DIRECTION);
1519 }
1520 
1521 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1522 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1523 {
1524   return (__m512d)__builtin_ia32_selectpd_512(__U,
1525                                               (__v8df)_mm512_sqrt_pd(__A),
1526                                               (__v8df)__W);
1527 }
1528 
1529 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1530 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1531 {
1532   return (__m512d)__builtin_ia32_selectpd_512(__U,
1533                                               (__v8df)_mm512_sqrt_pd(__A),
1534                                               (__v8df)_mm512_setzero_pd());
1535 }
1536 
1537 #define _mm512_sqrt_round_ps(A, R) \
1538   ((__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R)))
1539 
1540 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1541   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1542                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1543                                       (__v16sf)(__m512)(W)))
1544 
1545 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1546   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1547                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1548                                       (__v16sf)_mm512_setzero_ps()))
1549 
1550 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1551 _mm512_sqrt_ps(__m512 __A)
1552 {
1553   return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1554                                           _MM_FROUND_CUR_DIRECTION);
1555 }
1556 
1557 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1558 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1559 {
1560   return (__m512)__builtin_ia32_selectps_512(__U,
1561                                              (__v16sf)_mm512_sqrt_ps(__A),
1562                                              (__v16sf)__W);
1563 }
1564 
1565 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1566 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1567 {
1568   return (__m512)__builtin_ia32_selectps_512(__U,
1569                                              (__v16sf)_mm512_sqrt_ps(__A),
1570                                              (__v16sf)_mm512_setzero_ps());
1571 }
1572 
1573 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1574 _mm512_rsqrt14_pd(__m512d __A)
1575 {
1576   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1577                  (__v8df)
1578                  _mm512_setzero_pd (),
1579                  (__mmask8) -1);}
1580 
1581 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1582 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1583 {
1584   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1585                   (__v8df) __W,
1586                   (__mmask8) __U);
1587 }
1588 
1589 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1590 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1591 {
1592   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1593                   (__v8df)
1594                   _mm512_setzero_pd (),
1595                   (__mmask8) __U);
1596 }
1597 
1598 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1599 _mm512_rsqrt14_ps(__m512 __A)
1600 {
1601   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1602                 (__v16sf)
1603                 _mm512_setzero_ps (),
1604                 (__mmask16) -1);
1605 }
1606 
1607 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1608 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1609 {
1610   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1611                  (__v16sf) __W,
1612                  (__mmask16) __U);
1613 }
1614 
1615 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1616 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1617 {
1618   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1619                  (__v16sf)
1620                  _mm512_setzero_ps (),
1621                  (__mmask16) __U);
1622 }
1623 
1624 static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1625 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
1626 {
1627   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1628              (__v4sf) __B,
1629              (__v4sf)
1630              _mm_setzero_ps (),
1631              (__mmask8) -1);
1632 }
1633 
1634 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1635 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1636 {
1637  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1638           (__v4sf) __B,
1639           (__v4sf) __W,
1640           (__mmask8) __U);
1641 }
1642 
1643 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1644 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1645 {
1646  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1647           (__v4sf) __B,
1648           (__v4sf) _mm_setzero_ps (),
1649           (__mmask8) __U);
1650 }
1651 
1652 static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1653 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
1654 {
1655   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1656               (__v2df) __B,
1657               (__v2df)
1658               _mm_setzero_pd (),
1659               (__mmask8) -1);
1660 }
1661 
1662 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1663 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1664 {
1665  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1666           (__v2df) __B,
1667           (__v2df) __W,
1668           (__mmask8) __U);
1669 }
1670 
1671 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1672 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1673 {
1674  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1675           (__v2df) __B,
1676           (__v2df) _mm_setzero_pd (),
1677           (__mmask8) __U);
1678 }
1679 
1680 static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1681 _mm512_rcp14_pd(__m512d __A)
1682 {
1683   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1684                (__v8df)
1685                _mm512_setzero_pd (),
1686                (__mmask8) -1);
1687 }
1688 
1689 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1690 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1691 {
1692   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1693                 (__v8df) __W,
1694                 (__mmask8) __U);
1695 }
1696 
1697 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1698 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1699 {
1700   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1701                 (__v8df)
1702                 _mm512_setzero_pd (),
1703                 (__mmask8) __U);
1704 }
1705 
1706 static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1707 _mm512_rcp14_ps(__m512 __A)
1708 {
1709   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1710               (__v16sf)
1711               _mm512_setzero_ps (),
1712               (__mmask16) -1);
1713 }
1714 
1715 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1716 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1717 {
1718   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1719                    (__v16sf) __W,
1720                    (__mmask16) __U);
1721 }
1722 
1723 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1724 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1725 {
1726   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1727                    (__v16sf)
1728                    _mm512_setzero_ps (),
1729                    (__mmask16) __U);
1730 }
1731 
1732 static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1733 _mm_rcp14_ss(__m128 __A, __m128 __B)
1734 {
1735   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1736                  (__v4sf) __B,
1737                  (__v4sf)
1738                  _mm_setzero_ps (),
1739                  (__mmask8) -1);
1740 }
1741 
1742 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1743 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1744 {
1745  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1746           (__v4sf) __B,
1747           (__v4sf) __W,
1748           (__mmask8) __U);
1749 }
1750 
1751 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1752 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1753 {
1754  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1755           (__v4sf) __B,
1756           (__v4sf) _mm_setzero_ps (),
1757           (__mmask8) __U);
1758 }
1759 
1760 static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1761 _mm_rcp14_sd(__m128d __A, __m128d __B)
1762 {
1763   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1764             (__v2df) __B,
1765             (__v2df)
1766             _mm_setzero_pd (),
1767             (__mmask8) -1);
1768 }
1769 
1770 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1771 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1772 {
1773  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1774           (__v2df) __B,
1775           (__v2df) __W,
1776           (__mmask8) __U);
1777 }
1778 
1779 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1780 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1781 {
1782  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1783           (__v2df) __B,
1784           (__v2df) _mm_setzero_pd (),
1785           (__mmask8) __U);
1786 }
1787 
1788 static __inline __m512 __DEFAULT_FN_ATTRS512
1789 _mm512_floor_ps(__m512 __A)
1790 {
1791   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1792                                                   _MM_FROUND_FLOOR,
1793                                                   (__v16sf) __A, (unsigned short)-1,
1794                                                   _MM_FROUND_CUR_DIRECTION);
1795 }
1796 
1797 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1798 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1799 {
1800   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1801                    _MM_FROUND_FLOOR,
1802                    (__v16sf) __W, __U,
1803                    _MM_FROUND_CUR_DIRECTION);
1804 }
1805 
1806 static __inline __m512d __DEFAULT_FN_ATTRS512
1807 _mm512_floor_pd(__m512d __A)
1808 {
1809   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1810                                                    _MM_FROUND_FLOOR,
1811                                                    (__v8df) __A, (unsigned char)-1,
1812                                                    _MM_FROUND_CUR_DIRECTION);
1813 }
1814 
1815 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1816 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1817 {
1818   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1819                 _MM_FROUND_FLOOR,
1820                 (__v8df) __W, __U,
1821                 _MM_FROUND_CUR_DIRECTION);
1822 }
1823 
1824 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1825 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1826 {
1827   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1828                    _MM_FROUND_CEIL,
1829                    (__v16sf) __W, __U,
1830                    _MM_FROUND_CUR_DIRECTION);
1831 }
1832 
1833 static __inline __m512 __DEFAULT_FN_ATTRS512
1834 _mm512_ceil_ps(__m512 __A)
1835 {
1836   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1837                                                   _MM_FROUND_CEIL,
1838                                                   (__v16sf) __A, (unsigned short)-1,
1839                                                   _MM_FROUND_CUR_DIRECTION);
1840 }
1841 
1842 static __inline __m512d __DEFAULT_FN_ATTRS512
1843 _mm512_ceil_pd(__m512d __A)
1844 {
1845   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1846                                                    _MM_FROUND_CEIL,
1847                                                    (__v8df) __A, (unsigned char)-1,
1848                                                    _MM_FROUND_CUR_DIRECTION);
1849 }
1850 
1851 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1852 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1853 {
1854   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1855                 _MM_FROUND_CEIL,
1856                 (__v8df) __W, __U,
1857                 _MM_FROUND_CUR_DIRECTION);
1858 }
1859 
1860 static __inline __m512i __DEFAULT_FN_ATTRS512
1861 _mm512_abs_epi64(__m512i __A)
1862 {
1863   return (__m512i)__builtin_elementwise_abs((__v8di)__A);
1864 }
1865 
1866 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1867 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1868 {
1869   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1870                                              (__v8di)_mm512_abs_epi64(__A),
1871                                              (__v8di)__W);
1872 }
1873 
1874 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1876 {
1877   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1878                                              (__v8di)_mm512_abs_epi64(__A),
1879                                              (__v8di)_mm512_setzero_si512());
1880 }
1881 
1882 static __inline __m512i __DEFAULT_FN_ATTRS512
1883 _mm512_abs_epi32(__m512i __A)
1884 {
1885   return (__m512i)__builtin_elementwise_abs((__v16si) __A);
1886 }
1887 
1888 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1889 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1890 {
1891   return (__m512i)__builtin_ia32_selectd_512(__U,
1892                                              (__v16si)_mm512_abs_epi32(__A),
1893                                              (__v16si)__W);
1894 }
1895 
1896 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1897 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1898 {
1899   return (__m512i)__builtin_ia32_selectd_512(__U,
1900                                              (__v16si)_mm512_abs_epi32(__A),
1901                                              (__v16si)_mm512_setzero_si512());
1902 }
1903 
1904 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1905 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1906   __A = _mm_add_ss(__A, __B);
1907   return __builtin_ia32_selectss_128(__U, __A, __W);
1908 }
1909 
1910 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1911 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1912   __A = _mm_add_ss(__A, __B);
1913   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1914 }
1915 
1916 #define _mm_add_round_ss(A, B, R) \
1917   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1918                                            (__v4sf)(__m128)(B), \
1919                                            (__v4sf)_mm_setzero_ps(), \
1920                                            (__mmask8)-1, (int)(R)))
1921 
1922 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1923   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1924                                            (__v4sf)(__m128)(B), \
1925                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
1926                                            (int)(R)))
1927 
1928 #define _mm_maskz_add_round_ss(U, A, B, R) \
1929   ((__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1930                                            (__v4sf)(__m128)(B), \
1931                                            (__v4sf)_mm_setzero_ps(), \
1932                                            (__mmask8)(U), (int)(R)))
1933 
1934 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1935 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1936   __A = _mm_add_sd(__A, __B);
1937   return __builtin_ia32_selectsd_128(__U, __A, __W);
1938 }
1939 
1940 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1941 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1942   __A = _mm_add_sd(__A, __B);
1943   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1944 }
1945 #define _mm_add_round_sd(A, B, R) \
1946   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1947                                             (__v2df)(__m128d)(B), \
1948                                             (__v2df)_mm_setzero_pd(), \
1949                                             (__mmask8)-1, (int)(R)))
1950 
1951 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1952   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1953                                             (__v2df)(__m128d)(B), \
1954                                             (__v2df)(__m128d)(W), \
1955                                             (__mmask8)(U), (int)(R)))
1956 
1957 #define _mm_maskz_add_round_sd(U, A, B, R) \
1958   ((__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1959                                             (__v2df)(__m128d)(B), \
1960                                             (__v2df)_mm_setzero_pd(), \
1961                                             (__mmask8)(U), (int)(R)))
1962 
1963 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1964 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1965   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1966                                               (__v8df)_mm512_add_pd(__A, __B),
1967                                               (__v8df)__W);
1968 }
1969 
1970 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1971 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1972   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1973                                               (__v8df)_mm512_add_pd(__A, __B),
1974                                               (__v8df)_mm512_setzero_pd());
1975 }
1976 
1977 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1978 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1979   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1980                                              (__v16sf)_mm512_add_ps(__A, __B),
1981                                              (__v16sf)__W);
1982 }
1983 
1984 static __inline__ __m512 __DEFAULT_FN_ATTRS512
1985 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1986   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1987                                              (__v16sf)_mm512_add_ps(__A, __B),
1988                                              (__v16sf)_mm512_setzero_ps());
1989 }
1990 
1991 #define _mm512_add_round_pd(A, B, R) \
1992   ((__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1993                                     (__v8df)(__m512d)(B), (int)(R)))
1994 
1995 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
1996   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1997                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1998                                    (__v8df)(__m512d)(W)))
1999 
2000 #define _mm512_maskz_add_round_pd(U, A, B, R) \
2001   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2002                                    (__v8df)_mm512_add_round_pd((A), (B), (R)), \
2003                                    (__v8df)_mm512_setzero_pd()))
2004 
2005 #define _mm512_add_round_ps(A, B, R) \
2006   ((__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
2007                                    (__v16sf)(__m512)(B), (int)(R)))
2008 
2009 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
2010   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2011                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2012                                   (__v16sf)(__m512)(W)))
2013 
2014 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2015   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2016                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2017                                   (__v16sf)_mm512_setzero_ps()))
2018 
2019 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2020 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2021   __A = _mm_sub_ss(__A, __B);
2022   return __builtin_ia32_selectss_128(__U, __A, __W);
2023 }
2024 
2025 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2026 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2027   __A = _mm_sub_ss(__A, __B);
2028   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2029 }
2030 #define _mm_sub_round_ss(A, B, R) \
2031   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2032                                            (__v4sf)(__m128)(B), \
2033                                            (__v4sf)_mm_setzero_ps(), \
2034                                            (__mmask8)-1, (int)(R)))
2035 
2036 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2037   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2038                                            (__v4sf)(__m128)(B), \
2039                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2040                                            (int)(R)))
2041 
2042 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2043   ((__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2044                                            (__v4sf)(__m128)(B), \
2045                                            (__v4sf)_mm_setzero_ps(), \
2046                                            (__mmask8)(U), (int)(R)))
2047 
2048 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2049 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2050   __A = _mm_sub_sd(__A, __B);
2051   return __builtin_ia32_selectsd_128(__U, __A, __W);
2052 }
2053 
2054 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2055 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2056   __A = _mm_sub_sd(__A, __B);
2057   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2058 }
2059 
2060 #define _mm_sub_round_sd(A, B, R) \
2061   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2062                                             (__v2df)(__m128d)(B), \
2063                                             (__v2df)_mm_setzero_pd(), \
2064                                             (__mmask8)-1, (int)(R)))
2065 
2066 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2067   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2068                                             (__v2df)(__m128d)(B), \
2069                                             (__v2df)(__m128d)(W), \
2070                                             (__mmask8)(U), (int)(R)))
2071 
2072 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2073   ((__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2074                                             (__v2df)(__m128d)(B), \
2075                                             (__v2df)_mm_setzero_pd(), \
2076                                             (__mmask8)(U), (int)(R)))
2077 
2078 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2079 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2080   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2081                                               (__v8df)_mm512_sub_pd(__A, __B),
2082                                               (__v8df)__W);
2083 }
2084 
2085 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2086 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2087   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2088                                               (__v8df)_mm512_sub_pd(__A, __B),
2089                                               (__v8df)_mm512_setzero_pd());
2090 }
2091 
2092 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2093 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2094   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2095                                              (__v16sf)_mm512_sub_ps(__A, __B),
2096                                              (__v16sf)__W);
2097 }
2098 
2099 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2100 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2101   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2102                                              (__v16sf)_mm512_sub_ps(__A, __B),
2103                                              (__v16sf)_mm512_setzero_ps());
2104 }
2105 
2106 #define _mm512_sub_round_pd(A, B, R) \
2107   ((__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2108                                     (__v8df)(__m512d)(B), (int)(R)))
2109 
2110 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2111   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2112                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2113                                    (__v8df)(__m512d)(W)))
2114 
2115 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2116   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2117                                    (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2118                                    (__v8df)_mm512_setzero_pd()))
2119 
2120 #define _mm512_sub_round_ps(A, B, R) \
2121   ((__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2122                                    (__v16sf)(__m512)(B), (int)(R)))
2123 
2124 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2125   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2126                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2127                                   (__v16sf)(__m512)(W)))
2128 
2129 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2130   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2131                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2132                                   (__v16sf)_mm512_setzero_ps()))
2133 
2134 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2135 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2136   __A = _mm_mul_ss(__A, __B);
2137   return __builtin_ia32_selectss_128(__U, __A, __W);
2138 }
2139 
2140 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2141 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2142   __A = _mm_mul_ss(__A, __B);
2143   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2144 }
2145 #define _mm_mul_round_ss(A, B, R) \
2146   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2147                                            (__v4sf)(__m128)(B), \
2148                                            (__v4sf)_mm_setzero_ps(), \
2149                                            (__mmask8)-1, (int)(R)))
2150 
2151 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2152   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2153                                            (__v4sf)(__m128)(B), \
2154                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2155                                            (int)(R)))
2156 
2157 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2158   ((__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2159                                            (__v4sf)(__m128)(B), \
2160                                            (__v4sf)_mm_setzero_ps(), \
2161                                            (__mmask8)(U), (int)(R)))
2162 
2163 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2164 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2165   __A = _mm_mul_sd(__A, __B);
2166   return __builtin_ia32_selectsd_128(__U, __A, __W);
2167 }
2168 
2169 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2170 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2171   __A = _mm_mul_sd(__A, __B);
2172   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2173 }
2174 
2175 #define _mm_mul_round_sd(A, B, R) \
2176   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2177                                             (__v2df)(__m128d)(B), \
2178                                             (__v2df)_mm_setzero_pd(), \
2179                                             (__mmask8)-1, (int)(R)))
2180 
2181 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2182   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2183                                             (__v2df)(__m128d)(B), \
2184                                             (__v2df)(__m128d)(W), \
2185                                             (__mmask8)(U), (int)(R)))
2186 
2187 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2188   ((__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2189                                             (__v2df)(__m128d)(B), \
2190                                             (__v2df)_mm_setzero_pd(), \
2191                                             (__mmask8)(U), (int)(R)))
2192 
2193 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2194 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2195   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2196                                               (__v8df)_mm512_mul_pd(__A, __B),
2197                                               (__v8df)__W);
2198 }
2199 
2200 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2201 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2202   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2203                                               (__v8df)_mm512_mul_pd(__A, __B),
2204                                               (__v8df)_mm512_setzero_pd());
2205 }
2206 
2207 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2208 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2209   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2210                                              (__v16sf)_mm512_mul_ps(__A, __B),
2211                                              (__v16sf)__W);
2212 }
2213 
2214 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2215 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2216   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2217                                              (__v16sf)_mm512_mul_ps(__A, __B),
2218                                              (__v16sf)_mm512_setzero_ps());
2219 }
2220 
2221 #define _mm512_mul_round_pd(A, B, R) \
2222   ((__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2223                                     (__v8df)(__m512d)(B), (int)(R)))
2224 
2225 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2226   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2227                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2228                                    (__v8df)(__m512d)(W)))
2229 
2230 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2231   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2232                                    (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2233                                    (__v8df)_mm512_setzero_pd()))
2234 
2235 #define _mm512_mul_round_ps(A, B, R) \
2236   ((__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2237                                   (__v16sf)(__m512)(B), (int)(R)))
2238 
2239 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2240   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2241                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2242                                   (__v16sf)(__m512)(W)))
2243 
2244 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2245   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2246                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2247                                   (__v16sf)_mm512_setzero_ps()))
2248 
2249 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2250 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2251   __A = _mm_div_ss(__A, __B);
2252   return __builtin_ia32_selectss_128(__U, __A, __W);
2253 }
2254 
2255 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2256 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2257   __A = _mm_div_ss(__A, __B);
2258   return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2259 }
2260 
2261 #define _mm_div_round_ss(A, B, R) \
2262   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2263                                            (__v4sf)(__m128)(B), \
2264                                            (__v4sf)_mm_setzero_ps(), \
2265                                            (__mmask8)-1, (int)(R)))
2266 
2267 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2268   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2269                                            (__v4sf)(__m128)(B), \
2270                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
2271                                            (int)(R)))
2272 
2273 #define _mm_maskz_div_round_ss(U, A, B, R) \
2274   ((__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2275                                            (__v4sf)(__m128)(B), \
2276                                            (__v4sf)_mm_setzero_ps(), \
2277                                            (__mmask8)(U), (int)(R)))
2278 
2279 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2280 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2281   __A = _mm_div_sd(__A, __B);
2282   return __builtin_ia32_selectsd_128(__U, __A, __W);
2283 }
2284 
2285 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2286 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2287   __A = _mm_div_sd(__A, __B);
2288   return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2289 }
2290 
2291 #define _mm_div_round_sd(A, B, R) \
2292   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2293                                             (__v2df)(__m128d)(B), \
2294                                             (__v2df)_mm_setzero_pd(), \
2295                                             (__mmask8)-1, (int)(R)))
2296 
2297 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2298   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2299                                             (__v2df)(__m128d)(B), \
2300                                             (__v2df)(__m128d)(W), \
2301                                             (__mmask8)(U), (int)(R)))
2302 
2303 #define _mm_maskz_div_round_sd(U, A, B, R) \
2304   ((__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2305                                             (__v2df)(__m128d)(B), \
2306                                             (__v2df)_mm_setzero_pd(), \
2307                                             (__mmask8)(U), (int)(R)))
2308 
2309 static __inline __m512d __DEFAULT_FN_ATTRS512
2310 _mm512_div_pd(__m512d __a, __m512d __b)
2311 {
2312   return (__m512d)((__v8df)__a/(__v8df)__b);
2313 }
2314 
2315 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2316 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2317   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2318                                               (__v8df)_mm512_div_pd(__A, __B),
2319                                               (__v8df)__W);
2320 }
2321 
2322 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2323 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2324   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2325                                               (__v8df)_mm512_div_pd(__A, __B),
2326                                               (__v8df)_mm512_setzero_pd());
2327 }
2328 
2329 static __inline __m512 __DEFAULT_FN_ATTRS512
2330 _mm512_div_ps(__m512 __a, __m512 __b)
2331 {
2332   return (__m512)((__v16sf)__a/(__v16sf)__b);
2333 }
2334 
2335 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2336 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2337   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2338                                              (__v16sf)_mm512_div_ps(__A, __B),
2339                                              (__v16sf)__W);
2340 }
2341 
2342 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2343 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2344   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2345                                              (__v16sf)_mm512_div_ps(__A, __B),
2346                                              (__v16sf)_mm512_setzero_ps());
2347 }
2348 
2349 #define _mm512_div_round_pd(A, B, R) \
2350   ((__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2351                                     (__v8df)(__m512d)(B), (int)(R)))
2352 
2353 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2354   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2355                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2356                                    (__v8df)(__m512d)(W)))
2357 
2358 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2359   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2360                                    (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2361                                    (__v8df)_mm512_setzero_pd()))
2362 
2363 #define _mm512_div_round_ps(A, B, R) \
2364   ((__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2365                                    (__v16sf)(__m512)(B), (int)(R)))
2366 
2367 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2368   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2369                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2370                                   (__v16sf)(__m512)(W)))
2371 
2372 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2373   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2374                                   (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2375                                   (__v16sf)_mm512_setzero_ps()))
2376 
2377 #define _mm512_roundscale_ps(A, B) \
2378   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2379                                           (__v16sf)_mm512_undefined_ps(), \
2380                                           (__mmask16)-1, \
2381                                           _MM_FROUND_CUR_DIRECTION))
2382 
2383 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2384   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2385                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2386                                          _MM_FROUND_CUR_DIRECTION))
2387 
2388 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2389   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2390                                           (__v16sf)_mm512_setzero_ps(), \
2391                                           (__mmask16)(A), \
2392                                           _MM_FROUND_CUR_DIRECTION))
2393 
2394 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2395   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2396                                          (__v16sf)(__m512)(A), (__mmask16)(B), \
2397                                          (int)(R)))
2398 
2399 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2400   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2401                                           (__v16sf)_mm512_setzero_ps(), \
2402                                           (__mmask16)(A), (int)(R)))
2403 
2404 #define _mm512_roundscale_round_ps(A, imm, R) \
2405   ((__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2406                                           (__v16sf)_mm512_undefined_ps(), \
2407                                           (__mmask16)-1, (int)(R)))
2408 
2409 #define _mm512_roundscale_pd(A, B) \
2410   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2411                                            (__v8df)_mm512_undefined_pd(), \
2412                                            (__mmask8)-1, \
2413                                            _MM_FROUND_CUR_DIRECTION))
2414 
2415 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2416   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2417                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2418                                           _MM_FROUND_CUR_DIRECTION))
2419 
2420 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2421   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2422                                            (__v8df)_mm512_setzero_pd(), \
2423                                            (__mmask8)(A), \
2424                                            _MM_FROUND_CUR_DIRECTION))
2425 
2426 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2427   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2428                                           (__v8df)(__m512d)(A), (__mmask8)(B), \
2429                                           (int)(R)))
2430 
2431 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2432   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2433                                            (__v8df)_mm512_setzero_pd(), \
2434                                            (__mmask8)(A), (int)(R)))
2435 
2436 #define _mm512_roundscale_round_pd(A, imm, R) \
2437   ((__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2438                                            (__v8df)_mm512_undefined_pd(), \
2439                                            (__mmask8)-1, (int)(R)))
2440 
2441 #define _mm512_fmadd_round_pd(A, B, C, R) \
2442   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2443                                             (__v8df)(__m512d)(B), \
2444                                             (__v8df)(__m512d)(C), \
2445                                             (__mmask8)-1, (int)(R)))
2446 
2447 
2448 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2449   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2450                                             (__v8df)(__m512d)(B), \
2451                                             (__v8df)(__m512d)(C), \
2452                                             (__mmask8)(U), (int)(R)))
2453 
2454 
2455 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2456   ((__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2457                                              (__v8df)(__m512d)(B), \
2458                                              (__v8df)(__m512d)(C), \
2459                                              (__mmask8)(U), (int)(R)))
2460 
2461 
2462 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2463   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2464                                              (__v8df)(__m512d)(B), \
2465                                              (__v8df)(__m512d)(C), \
2466                                              (__mmask8)(U), (int)(R)))
2467 
2468 
2469 #define _mm512_fmsub_round_pd(A, B, C, R) \
2470   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2471                                             (__v8df)(__m512d)(B), \
2472                                             -(__v8df)(__m512d)(C), \
2473                                             (__mmask8)-1, (int)(R)))
2474 
2475 
2476 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2477   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2478                                             (__v8df)(__m512d)(B), \
2479                                             -(__v8df)(__m512d)(C), \
2480                                             (__mmask8)(U), (int)(R)))
2481 
2482 
2483 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2484   ((__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2485                                              (__v8df)(__m512d)(B), \
2486                                              -(__v8df)(__m512d)(C), \
2487                                              (__mmask8)(U), (int)(R)))
2488 
2489 
2490 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2491   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2492                                             (__v8df)(__m512d)(B), \
2493                                             (__v8df)(__m512d)(C), \
2494                                             (__mmask8)-1, (int)(R)))
2495 
2496 
2497 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2498   ((__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2499                                              (__v8df)(__m512d)(B), \
2500                                              (__v8df)(__m512d)(C), \
2501                                              (__mmask8)(U), (int)(R)))
2502 
2503 
2504 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2505   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506                                              (__v8df)(__m512d)(B), \
2507                                              (__v8df)(__m512d)(C), \
2508                                              (__mmask8)(U), (int)(R)))
2509 
2510 
2511 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2512   ((__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2513                                             (__v8df)(__m512d)(B), \
2514                                             -(__v8df)(__m512d)(C), \
2515                                             (__mmask8)-1, (int)(R)))
2516 
2517 
2518 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2519   ((__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2520                                              (__v8df)(__m512d)(B), \
2521                                              -(__v8df)(__m512d)(C), \
2522                                              (__mmask8)(U), (int)(R)))
2523 
2524 
2525 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2526 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2527 {
2528   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2529                                                     (__v8df) __B,
2530                                                     (__v8df) __C,
2531                                                     (__mmask8) -1,
2532                                                     _MM_FROUND_CUR_DIRECTION);
2533 }
2534 
2535 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2536 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2537 {
2538   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2539                                                     (__v8df) __B,
2540                                                     (__v8df) __C,
2541                                                     (__mmask8) __U,
2542                                                     _MM_FROUND_CUR_DIRECTION);
2543 }
2544 
2545 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2546 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2547 {
2548   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2549                                                      (__v8df) __B,
2550                                                      (__v8df) __C,
2551                                                      (__mmask8) __U,
2552                                                      _MM_FROUND_CUR_DIRECTION);
2553 }
2554 
2555 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2556 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2557 {
2558   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2559                                                      (__v8df) __B,
2560                                                      (__v8df) __C,
2561                                                      (__mmask8) __U,
2562                                                      _MM_FROUND_CUR_DIRECTION);
2563 }
2564 
2565 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2566 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2567 {
2568   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2569                                                     (__v8df) __B,
2570                                                     -(__v8df) __C,
2571                                                     (__mmask8) -1,
2572                                                     _MM_FROUND_CUR_DIRECTION);
2573 }
2574 
2575 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2576 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2577 {
2578   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2579                                                     (__v8df) __B,
2580                                                     -(__v8df) __C,
2581                                                     (__mmask8) __U,
2582                                                     _MM_FROUND_CUR_DIRECTION);
2583 }
2584 
2585 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2586 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2587 {
2588   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2589                                                      (__v8df) __B,
2590                                                      -(__v8df) __C,
2591                                                      (__mmask8) __U,
2592                                                      _MM_FROUND_CUR_DIRECTION);
2593 }
2594 
2595 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2596 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2597 {
2598   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2599                                                     -(__v8df) __B,
2600                                                     (__v8df) __C,
2601                                                     (__mmask8) -1,
2602                                                     _MM_FROUND_CUR_DIRECTION);
2603 }
2604 
2605 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2606 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2607 {
2608   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2609                                                      (__v8df) __B,
2610                                                      (__v8df) __C,
2611                                                      (__mmask8) __U,
2612                                                      _MM_FROUND_CUR_DIRECTION);
2613 }
2614 
2615 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2616 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2617 {
2618   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2619                                                      (__v8df) __B,
2620                                                      (__v8df) __C,
2621                                                      (__mmask8) __U,
2622                                                      _MM_FROUND_CUR_DIRECTION);
2623 }
2624 
2625 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2626 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2627 {
2628   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2629                                                     -(__v8df) __B,
2630                                                     -(__v8df) __C,
2631                                                     (__mmask8) -1,
2632                                                     _MM_FROUND_CUR_DIRECTION);
2633 }
2634 
2635 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2636 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2637 {
2638   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2639                                                      (__v8df) __B,
2640                                                      -(__v8df) __C,
2641                                                      (__mmask8) __U,
2642                                                      _MM_FROUND_CUR_DIRECTION);
2643 }
2644 
2645 #define _mm512_fmadd_round_ps(A, B, C, R) \
2646   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2647                                            (__v16sf)(__m512)(B), \
2648                                            (__v16sf)(__m512)(C), \
2649                                            (__mmask16)-1, (int)(R)))
2650 
2651 
2652 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2653   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2654                                            (__v16sf)(__m512)(B), \
2655                                            (__v16sf)(__m512)(C), \
2656                                            (__mmask16)(U), (int)(R)))
2657 
2658 
2659 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2660   ((__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2661                                             (__v16sf)(__m512)(B), \
2662                                             (__v16sf)(__m512)(C), \
2663                                             (__mmask16)(U), (int)(R)))
2664 
2665 
2666 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2667   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2668                                             (__v16sf)(__m512)(B), \
2669                                             (__v16sf)(__m512)(C), \
2670                                             (__mmask16)(U), (int)(R)))
2671 
2672 
2673 #define _mm512_fmsub_round_ps(A, B, C, R) \
2674   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2675                                            (__v16sf)(__m512)(B), \
2676                                            -(__v16sf)(__m512)(C), \
2677                                            (__mmask16)-1, (int)(R)))
2678 
2679 
2680 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2681   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682                                            (__v16sf)(__m512)(B), \
2683                                            -(__v16sf)(__m512)(C), \
2684                                            (__mmask16)(U), (int)(R)))
2685 
2686 
2687 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2688   ((__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2689                                             (__v16sf)(__m512)(B), \
2690                                             -(__v16sf)(__m512)(C), \
2691                                             (__mmask16)(U), (int)(R)))
2692 
2693 
2694 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2695   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2696                                            -(__v16sf)(__m512)(B), \
2697                                            (__v16sf)(__m512)(C), \
2698                                            (__mmask16)-1, (int)(R)))
2699 
2700 
2701 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2702   ((__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2703                                             (__v16sf)(__m512)(B), \
2704                                             (__v16sf)(__m512)(C), \
2705                                             (__mmask16)(U), (int)(R)))
2706 
2707 
2708 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2709   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710                                             (__v16sf)(__m512)(B), \
2711                                             (__v16sf)(__m512)(C), \
2712                                             (__mmask16)(U), (int)(R)))
2713 
2714 
2715 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2716   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2717                                            -(__v16sf)(__m512)(B), \
2718                                            -(__v16sf)(__m512)(C), \
2719                                            (__mmask16)-1, (int)(R)))
2720 
2721 
2722 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2723   ((__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2724                                             (__v16sf)(__m512)(B), \
2725                                             -(__v16sf)(__m512)(C), \
2726                                             (__mmask16)(U), (int)(R)))
2727 
2728 
2729 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2730 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2731 {
2732   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2733                                                    (__v16sf) __B,
2734                                                    (__v16sf) __C,
2735                                                    (__mmask16) -1,
2736                                                    _MM_FROUND_CUR_DIRECTION);
2737 }
2738 
2739 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2740 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2741 {
2742   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2743                                                    (__v16sf) __B,
2744                                                    (__v16sf) __C,
2745                                                    (__mmask16) __U,
2746                                                    _MM_FROUND_CUR_DIRECTION);
2747 }
2748 
2749 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2750 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2751 {
2752   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2753                                                     (__v16sf) __B,
2754                                                     (__v16sf) __C,
2755                                                     (__mmask16) __U,
2756                                                     _MM_FROUND_CUR_DIRECTION);
2757 }
2758 
2759 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2760 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2761 {
2762   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2763                                                     (__v16sf) __B,
2764                                                     (__v16sf) __C,
2765                                                     (__mmask16) __U,
2766                                                     _MM_FROUND_CUR_DIRECTION);
2767 }
2768 
2769 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2770 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2771 {
2772   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2773                                                    (__v16sf) __B,
2774                                                    -(__v16sf) __C,
2775                                                    (__mmask16) -1,
2776                                                    _MM_FROUND_CUR_DIRECTION);
2777 }
2778 
2779 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2780 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2781 {
2782   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2783                                                    (__v16sf) __B,
2784                                                    -(__v16sf) __C,
2785                                                    (__mmask16) __U,
2786                                                    _MM_FROUND_CUR_DIRECTION);
2787 }
2788 
2789 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2790 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2791 {
2792   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2793                                                     (__v16sf) __B,
2794                                                     -(__v16sf) __C,
2795                                                     (__mmask16) __U,
2796                                                     _MM_FROUND_CUR_DIRECTION);
2797 }
2798 
2799 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2800 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2801 {
2802   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2803                                                    -(__v16sf) __B,
2804                                                    (__v16sf) __C,
2805                                                    (__mmask16) -1,
2806                                                    _MM_FROUND_CUR_DIRECTION);
2807 }
2808 
2809 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2810 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2811 {
2812   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2813                                                     (__v16sf) __B,
2814                                                     (__v16sf) __C,
2815                                                     (__mmask16) __U,
2816                                                     _MM_FROUND_CUR_DIRECTION);
2817 }
2818 
2819 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2820 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2821 {
2822   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2823                                                     (__v16sf) __B,
2824                                                     (__v16sf) __C,
2825                                                     (__mmask16) __U,
2826                                                     _MM_FROUND_CUR_DIRECTION);
2827 }
2828 
2829 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2830 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2831 {
2832   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2833                                                    -(__v16sf) __B,
2834                                                    -(__v16sf) __C,
2835                                                    (__mmask16) -1,
2836                                                    _MM_FROUND_CUR_DIRECTION);
2837 }
2838 
2839 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2840 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2841 {
2842   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2843                                                     (__v16sf) __B,
2844                                                     -(__v16sf) __C,
2845                                                     (__mmask16) __U,
2846                                                     _MM_FROUND_CUR_DIRECTION);
2847 }
2848 
2849 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2850   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2851                                                (__v8df)(__m512d)(B), \
2852                                                (__v8df)(__m512d)(C), \
2853                                                (__mmask8)-1, (int)(R)))
2854 
2855 
2856 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2857   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2858                                                (__v8df)(__m512d)(B), \
2859                                                (__v8df)(__m512d)(C), \
2860                                                (__mmask8)(U), (int)(R)))
2861 
2862 
2863 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2864   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2865                                                 (__v8df)(__m512d)(B), \
2866                                                 (__v8df)(__m512d)(C), \
2867                                                 (__mmask8)(U), (int)(R)))
2868 
2869 
2870 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2871   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2872                                                 (__v8df)(__m512d)(B), \
2873                                                 (__v8df)(__m512d)(C), \
2874                                                 (__mmask8)(U), (int)(R)))
2875 
2876 
2877 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2878   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2879                                                (__v8df)(__m512d)(B), \
2880                                                -(__v8df)(__m512d)(C), \
2881                                                (__mmask8)-1, (int)(R)))
2882 
2883 
2884 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2885   ((__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2886                                                (__v8df)(__m512d)(B), \
2887                                                -(__v8df)(__m512d)(C), \
2888                                                (__mmask8)(U), (int)(R)))
2889 
2890 
2891 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2892   ((__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2893                                                 (__v8df)(__m512d)(B), \
2894                                                 -(__v8df)(__m512d)(C), \
2895                                                 (__mmask8)(U), (int)(R)))
2896 
2897 
2898 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2899 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2900 {
2901   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2902                                                       (__v8df) __B,
2903                                                       (__v8df) __C,
2904                                                       (__mmask8) -1,
2905                                                       _MM_FROUND_CUR_DIRECTION);
2906 }
2907 
2908 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2909 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2910 {
2911   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2912                                                       (__v8df) __B,
2913                                                       (__v8df) __C,
2914                                                       (__mmask8) __U,
2915                                                       _MM_FROUND_CUR_DIRECTION);
2916 }
2917 
2918 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2919 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2920 {
2921   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2922                                                        (__v8df) __B,
2923                                                        (__v8df) __C,
2924                                                        (__mmask8) __U,
2925                                                        _MM_FROUND_CUR_DIRECTION);
2926 }
2927 
2928 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2929 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2930 {
2931   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2932                                                        (__v8df) __B,
2933                                                        (__v8df) __C,
2934                                                        (__mmask8) __U,
2935                                                        _MM_FROUND_CUR_DIRECTION);
2936 }
2937 
2938 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2939 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2940 {
2941   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2942                                                        (__v8df) __B,
2943                                                        -(__v8df) __C,
2944                                                        (__mmask8) -1,
2945                                                        _MM_FROUND_CUR_DIRECTION);
2946 }
2947 
2948 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2949 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2950 {
2951   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2952                                                        (__v8df) __B,
2953                                                        -(__v8df) __C,
2954                                                        (__mmask8) __U,
2955                                                        _MM_FROUND_CUR_DIRECTION);
2956 }
2957 
2958 static __inline__ __m512d __DEFAULT_FN_ATTRS512
2959 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2960 {
2961   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2962                                                         (__v8df) __B,
2963                                                         -(__v8df) __C,
2964                                                         (__mmask8) __U,
2965                                                         _MM_FROUND_CUR_DIRECTION);
2966 }
2967 
2968 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2969   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2970                                               (__v16sf)(__m512)(B), \
2971                                               (__v16sf)(__m512)(C), \
2972                                               (__mmask16)-1, (int)(R)))
2973 
2974 
2975 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2976   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2977                                               (__v16sf)(__m512)(B), \
2978                                               (__v16sf)(__m512)(C), \
2979                                               (__mmask16)(U), (int)(R)))
2980 
2981 
2982 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2983   ((__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2984                                                (__v16sf)(__m512)(B), \
2985                                                (__v16sf)(__m512)(C), \
2986                                                (__mmask16)(U), (int)(R)))
2987 
2988 
2989 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2990   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2991                                                (__v16sf)(__m512)(B), \
2992                                                (__v16sf)(__m512)(C), \
2993                                                (__mmask16)(U), (int)(R)))
2994 
2995 
2996 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
2997   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2998                                               (__v16sf)(__m512)(B), \
2999                                               -(__v16sf)(__m512)(C), \
3000                                               (__mmask16)-1, (int)(R)))
3001 
3002 
3003 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
3004   ((__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
3005                                               (__v16sf)(__m512)(B), \
3006                                               -(__v16sf)(__m512)(C), \
3007                                               (__mmask16)(U), (int)(R)))
3008 
3009 
3010 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
3011   ((__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
3012                                                (__v16sf)(__m512)(B), \
3013                                                -(__v16sf)(__m512)(C), \
3014                                                (__mmask16)(U), (int)(R)))
3015 
3016 
3017 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3018 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3019 {
3020   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3021                                                       (__v16sf) __B,
3022                                                       (__v16sf) __C,
3023                                                       (__mmask16) -1,
3024                                                       _MM_FROUND_CUR_DIRECTION);
3025 }
3026 
3027 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3028 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3029 {
3030   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3031                                                       (__v16sf) __B,
3032                                                       (__v16sf) __C,
3033                                                       (__mmask16) __U,
3034                                                       _MM_FROUND_CUR_DIRECTION);
3035 }
3036 
3037 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3038 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3039 {
3040   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3041                                                        (__v16sf) __B,
3042                                                        (__v16sf) __C,
3043                                                        (__mmask16) __U,
3044                                                        _MM_FROUND_CUR_DIRECTION);
3045 }
3046 
3047 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3048 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3049 {
3050   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3051                                                        (__v16sf) __B,
3052                                                        (__v16sf) __C,
3053                                                        (__mmask16) __U,
3054                                                        _MM_FROUND_CUR_DIRECTION);
3055 }
3056 
3057 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3058 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3059 {
3060   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3061                                                       (__v16sf) __B,
3062                                                       -(__v16sf) __C,
3063                                                       (__mmask16) -1,
3064                                                       _MM_FROUND_CUR_DIRECTION);
3065 }
3066 
3067 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3068 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3069 {
3070   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3071                                                       (__v16sf) __B,
3072                                                       -(__v16sf) __C,
3073                                                       (__mmask16) __U,
3074                                                       _MM_FROUND_CUR_DIRECTION);
3075 }
3076 
3077 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3078 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3079 {
3080   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3081                                                        (__v16sf) __B,
3082                                                        -(__v16sf) __C,
3083                                                        (__mmask16) __U,
3084                                                        _MM_FROUND_CUR_DIRECTION);
3085 }
3086 
3087 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3088   ((__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3089                                              (__v8df)(__m512d)(B), \
3090                                              (__v8df)(__m512d)(C), \
3091                                              (__mmask8)(U), (int)(R)))
3092 
3093 
3094 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3095 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3096 {
3097   return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3098                                                     (__v8df) __B,
3099                                                     (__v8df) __C,
3100                                                     (__mmask8) __U,
3101                                                     _MM_FROUND_CUR_DIRECTION);
3102 }
3103 
3104 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3105   ((__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3106                                             (__v16sf)(__m512)(B), \
3107                                             (__v16sf)(__m512)(C), \
3108                                             (__mmask16)(U), (int)(R)))
3109 
3110 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3111 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3112 {
3113   return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3114                                                    (__v16sf) __B,
3115                                                    (__v16sf) __C,
3116                                                    (__mmask16) __U,
3117                                                    _MM_FROUND_CUR_DIRECTION);
3118 }
3119 
3120 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3121   ((__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3122                                                 (__v8df)(__m512d)(B), \
3123                                                 (__v8df)(__m512d)(C), \
3124                                                 (__mmask8)(U), (int)(R)))
3125 
3126 
3127 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3128 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3129 {
3130   return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3131                                                        (__v8df) __B,
3132                                                        (__v8df) __C,
3133                                                        (__mmask8) __U,
3134                                                        _MM_FROUND_CUR_DIRECTION);
3135 }
3136 
3137 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3138   ((__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3139                                                (__v16sf)(__m512)(B), \
3140                                                (__v16sf)(__m512)(C), \
3141                                                (__mmask16)(U), (int)(R)))
3142 
3143 
3144 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3145 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3146 {
3147   return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3148                                                       (__v16sf) __B,
3149                                                       (__v16sf) __C,
3150                                                       (__mmask16) __U,
3151                                                       _MM_FROUND_CUR_DIRECTION);
3152 }
3153 
3154 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3155   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3156                                             -(__v8df)(__m512d)(B), \
3157                                             (__v8df)(__m512d)(C), \
3158                                             (__mmask8)(U), (int)(R)))
3159 
3160 
3161 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3162 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3163 {
3164   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3165                                                     -(__v8df) __B,
3166                                                     (__v8df) __C,
3167                                                     (__mmask8) __U,
3168                                                     _MM_FROUND_CUR_DIRECTION);
3169 }
3170 
3171 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3172   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3173                                            -(__v16sf)(__m512)(B), \
3174                                            (__v16sf)(__m512)(C), \
3175                                            (__mmask16)(U), (int)(R)))
3176 
3177 
3178 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3179 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3180 {
3181   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3182                                                    -(__v16sf) __B,
3183                                                    (__v16sf) __C,
3184                                                    (__mmask16) __U,
3185                                                    _MM_FROUND_CUR_DIRECTION);
3186 }
3187 
3188 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3189   ((__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3190                                             -(__v8df)(__m512d)(B), \
3191                                             -(__v8df)(__m512d)(C), \
3192                                             (__mmask8)(U), (int)(R)))
3193 
3194 
3195 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3196   ((__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3197                                              (__v8df)(__m512d)(B), \
3198                                              (__v8df)(__m512d)(C), \
3199                                              (__mmask8)(U), (int)(R)))
3200 
3201 
3202 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3203 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3204 {
3205   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3206                                                     -(__v8df) __B,
3207                                                     -(__v8df) __C,
3208                                                     (__mmask8) __U,
3209                                                     _MM_FROUND_CUR_DIRECTION);
3210 }
3211 
3212 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3213 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3214 {
3215   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3216                                                      (__v8df) __B,
3217                                                      (__v8df) __C,
3218                                                      (__mmask8) __U,
3219                                                      _MM_FROUND_CUR_DIRECTION);
3220 }
3221 
3222 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3223   ((__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3224                                            -(__v16sf)(__m512)(B), \
3225                                            -(__v16sf)(__m512)(C), \
3226                                            (__mmask16)(U), (int)(R)))
3227 
3228 
3229 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3230   ((__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3231                                             (__v16sf)(__m512)(B), \
3232                                             (__v16sf)(__m512)(C), \
3233                                             (__mmask16)(U), (int)(R)))
3234 
3235 
3236 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3237 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3238 {
3239   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3240                                                    -(__v16sf) __B,
3241                                                    -(__v16sf) __C,
3242                                                    (__mmask16) __U,
3243                                                    _MM_FROUND_CUR_DIRECTION);
3244 }
3245 
3246 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3247 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3248 {
3249   return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3250                                                     (__v16sf) __B,
3251                                                     (__v16sf) __C,
3252                                                     (__mmask16) __U,
3253                                                     _MM_FROUND_CUR_DIRECTION);
3254 }
3255 
3256 
3257 
3258 /* Vector permutations */
3259 
3260 static __inline __m512i __DEFAULT_FN_ATTRS512
3261 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3262 {
3263   return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3264                                                 (__v16si) __B);
3265 }
3266 
3267 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3268 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3269                                __m512i __B)
3270 {
3271   return (__m512i)__builtin_ia32_selectd_512(__U,
3272                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3273                               (__v16si)__A);
3274 }
3275 
3276 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3277 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3278                                 __m512i __B)
3279 {
3280   return (__m512i)__builtin_ia32_selectd_512(__U,
3281                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3282                               (__v16si)__I);
3283 }
3284 
3285 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3286 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3287                                 __m512i __B)
3288 {
3289   return (__m512i)__builtin_ia32_selectd_512(__U,
3290                               (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3291                               (__v16si)_mm512_setzero_si512());
3292 }
3293 
3294 static __inline __m512i __DEFAULT_FN_ATTRS512
3295 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3296 {
3297   return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3298                                                 (__v8di) __B);
3299 }
3300 
3301 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3302 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3303                                __m512i __B)
3304 {
3305   return (__m512i)__builtin_ia32_selectq_512(__U,
3306                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3307                                (__v8di)__A);
3308 }
3309 
3310 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3311 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3312                                 __m512i __B)
3313 {
3314   return (__m512i)__builtin_ia32_selectq_512(__U,
3315                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3316                                (__v8di)__I);
3317 }
3318 
3319 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3320 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3321                                 __m512i __B)
3322 {
3323   return (__m512i)__builtin_ia32_selectq_512(__U,
3324                                (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3325                                (__v8di)_mm512_setzero_si512());
3326 }
3327 
3328 #define _mm512_alignr_epi64(A, B, I) \
3329   ((__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3330                                      (__v8di)(__m512i)(B), (int)(I)))
3331 
3332 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3333   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3334                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3335                                   (__v8di)(__m512i)(W)))
3336 
3337 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3338   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3339                                   (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3340                                   (__v8di)_mm512_setzero_si512()))
3341 
3342 #define _mm512_alignr_epi32(A, B, I) \
3343   ((__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3344                                      (__v16si)(__m512i)(B), (int)(I)))
3345 
3346 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3347   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3348                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3349                                  (__v16si)(__m512i)(W)))
3350 
3351 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3352   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3353                                  (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3354                                  (__v16si)_mm512_setzero_si512()))
3355 /* Vector Extract */
3356 
3357 #define _mm512_extractf64x4_pd(A, I) \
3358   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3359                                              (__v4df)_mm256_undefined_pd(), \
3360                                              (__mmask8)-1))
3361 
3362 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3363   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3364                                              (__v4df)(__m256d)(W), \
3365                                              (__mmask8)(U)))
3366 
3367 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3368   ((__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3369                                              (__v4df)_mm256_setzero_pd(), \
3370                                              (__mmask8)(U)))
3371 
3372 #define _mm512_extractf32x4_ps(A, I) \
3373   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3374                                             (__v4sf)_mm_undefined_ps(), \
3375                                             (__mmask8)-1))
3376 
3377 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3378   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3379                                             (__v4sf)(__m128)(W), \
3380                                             (__mmask8)(U)))
3381 
3382 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3383   ((__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3384                                             (__v4sf)_mm_setzero_ps(), \
3385                                             (__mmask8)(U)))
3386 
3387 /* Vector Blend */
3388 
3389 static __inline __m512d __DEFAULT_FN_ATTRS512
3390 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3391 {
3392   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3393                  (__v8df) __W,
3394                  (__v8df) __A);
3395 }
3396 
3397 static __inline __m512 __DEFAULT_FN_ATTRS512
3398 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3399 {
3400   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3401                 (__v16sf) __W,
3402                 (__v16sf) __A);
3403 }
3404 
3405 static __inline __m512i __DEFAULT_FN_ATTRS512
3406 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3407 {
3408   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3409                 (__v8di) __W,
3410                 (__v8di) __A);
3411 }
3412 
3413 static __inline __m512i __DEFAULT_FN_ATTRS512
3414 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3415 {
3416   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3417                 (__v16si) __W,
3418                 (__v16si) __A);
3419 }
3420 
3421 /* Compare */
3422 
3423 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3424   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3425                                            (__v16sf)(__m512)(B), (int)(P), \
3426                                            (__mmask16)-1, (int)(R)))
3427 
3428 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3429   ((__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3430                                            (__v16sf)(__m512)(B), (int)(P), \
3431                                            (__mmask16)(U), (int)(R)))
3432 
3433 #define _mm512_cmp_ps_mask(A, B, P) \
3434   _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3435 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3436   _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3437 
3438 #define _mm512_cmpeq_ps_mask(A, B) \
3439     _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3440 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3441     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3442 
3443 #define _mm512_cmplt_ps_mask(A, B) \
3444     _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3445 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3446     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3447 
3448 #define _mm512_cmple_ps_mask(A, B) \
3449     _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3450 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3451     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3452 
3453 #define _mm512_cmpunord_ps_mask(A, B) \
3454     _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3455 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3456     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3457 
3458 #define _mm512_cmpneq_ps_mask(A, B) \
3459     _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3460 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3461     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3462 
3463 #define _mm512_cmpnlt_ps_mask(A, B) \
3464     _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3465 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3466     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3467 
3468 #define _mm512_cmpnle_ps_mask(A, B) \
3469     _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3470 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3471     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3472 
3473 #define _mm512_cmpord_ps_mask(A, B) \
3474     _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3475 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3476     _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3477 
3478 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3479   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3480                                           (__v8df)(__m512d)(B), (int)(P), \
3481                                           (__mmask8)-1, (int)(R)))
3482 
3483 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3484   ((__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3485                                           (__v8df)(__m512d)(B), (int)(P), \
3486                                           (__mmask8)(U), (int)(R)))
3487 
3488 #define _mm512_cmp_pd_mask(A, B, P) \
3489   _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3490 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3491   _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3492 
3493 #define _mm512_cmpeq_pd_mask(A, B) \
3494     _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3495 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3496     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3497 
3498 #define _mm512_cmplt_pd_mask(A, B) \
3499     _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3500 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3501     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3502 
3503 #define _mm512_cmple_pd_mask(A, B) \
3504     _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3505 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3506     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3507 
3508 #define _mm512_cmpunord_pd_mask(A, B) \
3509     _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3510 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3511     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3512 
3513 #define _mm512_cmpneq_pd_mask(A, B) \
3514     _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3515 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3516     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3517 
3518 #define _mm512_cmpnlt_pd_mask(A, B) \
3519     _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3520 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3521     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3522 
3523 #define _mm512_cmpnle_pd_mask(A, B) \
3524     _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3525 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3526     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3527 
3528 #define _mm512_cmpord_pd_mask(A, B) \
3529     _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3530 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3531     _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3532 
3533 /* Conversion */
3534 
3535 #define _mm512_cvtt_roundps_epu32(A, R) \
3536   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3537                                               (__v16si)_mm512_undefined_epi32(), \
3538                                               (__mmask16)-1, (int)(R)))
3539 
3540 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3541   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3542                                               (__v16si)(__m512i)(W), \
3543                                               (__mmask16)(U), (int)(R)))
3544 
3545 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3546   ((__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3547                                               (__v16si)_mm512_setzero_si512(), \
3548                                               (__mmask16)(U), (int)(R)))
3549 
3550 
3551 static __inline __m512i __DEFAULT_FN_ATTRS512
3552 _mm512_cvttps_epu32(__m512 __A)
3553 {
3554   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3555                   (__v16si)
3556                   _mm512_setzero_si512 (),
3557                   (__mmask16) -1,
3558                   _MM_FROUND_CUR_DIRECTION);
3559 }
3560 
3561 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3562 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3563 {
3564   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3565                    (__v16si) __W,
3566                    (__mmask16) __U,
3567                    _MM_FROUND_CUR_DIRECTION);
3568 }
3569 
3570 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3571 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3572 {
3573   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3574                    (__v16si) _mm512_setzero_si512 (),
3575                    (__mmask16) __U,
3576                    _MM_FROUND_CUR_DIRECTION);
3577 }
3578 
3579 #define _mm512_cvt_roundepi32_ps(A, R) \
3580   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3581                                            (__v16sf)_mm512_setzero_ps(), \
3582                                            (__mmask16)-1, (int)(R)))
3583 
3584 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3585   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3586                                            (__v16sf)(__m512)(W), \
3587                                            (__mmask16)(U), (int)(R)))
3588 
3589 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3590   ((__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3591                                            (__v16sf)_mm512_setzero_ps(), \
3592                                            (__mmask16)(U), (int)(R)))
3593 
3594 #define _mm512_cvt_roundepu32_ps(A, R) \
3595   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3596                                             (__v16sf)_mm512_setzero_ps(), \
3597                                             (__mmask16)-1, (int)(R)))
3598 
3599 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3600   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3601                                             (__v16sf)(__m512)(W), \
3602                                             (__mmask16)(U), (int)(R)))
3603 
3604 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3605   ((__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3606                                             (__v16sf)_mm512_setzero_ps(), \
3607                                             (__mmask16)(U), (int)(R)))
3608 
3609 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3610 _mm512_cvtepu32_ps (__m512i __A)
3611 {
3612   return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3613 }
3614 
3615 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3616 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3617 {
3618   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3619                                              (__v16sf)_mm512_cvtepu32_ps(__A),
3620                                              (__v16sf)__W);
3621 }
3622 
3623 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3624 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3625 {
3626   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3627                                              (__v16sf)_mm512_cvtepu32_ps(__A),
3628                                              (__v16sf)_mm512_setzero_ps());
3629 }
3630 
3631 static __inline __m512d __DEFAULT_FN_ATTRS512
3632 _mm512_cvtepi32_pd(__m256i __A)
3633 {
3634   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3635 }
3636 
3637 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3638 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3639 {
3640   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3641                                               (__v8df)_mm512_cvtepi32_pd(__A),
3642                                               (__v8df)__W);
3643 }
3644 
3645 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3647 {
3648   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3649                                               (__v8df)_mm512_cvtepi32_pd(__A),
3650                                               (__v8df)_mm512_setzero_pd());
3651 }
3652 
3653 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3654 _mm512_cvtepi32lo_pd(__m512i __A)
3655 {
3656   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3657 }
3658 
3659 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3660 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3661 {
3662   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3663 }
3664 
3665 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3666 _mm512_cvtepi32_ps (__m512i __A)
3667 {
3668   return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3669 }
3670 
3671 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3672 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3673 {
3674   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3675                                              (__v16sf)_mm512_cvtepi32_ps(__A),
3676                                              (__v16sf)__W);
3677 }
3678 
3679 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3680 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3681 {
3682   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3683                                              (__v16sf)_mm512_cvtepi32_ps(__A),
3684                                              (__v16sf)_mm512_setzero_ps());
3685 }
3686 
3687 static __inline __m512d __DEFAULT_FN_ATTRS512
3688 _mm512_cvtepu32_pd(__m256i __A)
3689 {
3690   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3691 }
3692 
3693 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3694 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3695 {
3696   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3697                                               (__v8df)_mm512_cvtepu32_pd(__A),
3698                                               (__v8df)__W);
3699 }
3700 
3701 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3703 {
3704   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3705                                               (__v8df)_mm512_cvtepu32_pd(__A),
3706                                               (__v8df)_mm512_setzero_pd());
3707 }
3708 
3709 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3710 _mm512_cvtepu32lo_pd(__m512i __A)
3711 {
3712   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3713 }
3714 
3715 static __inline__ __m512d __DEFAULT_FN_ATTRS512
3716 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3717 {
3718   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3719 }
3720 
3721 #define _mm512_cvt_roundpd_ps(A, R) \
3722   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3723                                            (__v8sf)_mm256_setzero_ps(), \
3724                                            (__mmask8)-1, (int)(R)))
3725 
3726 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3727   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3728                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
3729                                            (int)(R)))
3730 
3731 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3732   ((__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3733                                            (__v8sf)_mm256_setzero_ps(), \
3734                                            (__mmask8)(U), (int)(R)))
3735 
3736 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3737 _mm512_cvtpd_ps (__m512d __A)
3738 {
3739   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3740                 (__v8sf) _mm256_undefined_ps (),
3741                 (__mmask8) -1,
3742                 _MM_FROUND_CUR_DIRECTION);
3743 }
3744 
3745 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3746 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3747 {
3748   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3749                 (__v8sf) __W,
3750                 (__mmask8) __U,
3751                 _MM_FROUND_CUR_DIRECTION);
3752 }
3753 
3754 static __inline__ __m256 __DEFAULT_FN_ATTRS512
3755 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3756 {
3757   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3758                 (__v8sf) _mm256_setzero_ps (),
3759                 (__mmask8) __U,
3760                 _MM_FROUND_CUR_DIRECTION);
3761 }
3762 
3763 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3764 _mm512_cvtpd_pslo (__m512d __A)
3765 {
3766   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3767                 (__v8sf) _mm256_setzero_ps (),
3768                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3769 }
3770 
3771 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3772 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3773 {
3774   return (__m512) __builtin_shufflevector (
3775                 (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3776                                                __U, __A),
3777                 (__v8sf) _mm256_setzero_ps (),
3778                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3779 }
3780 
3781 #define _mm512_cvt_roundps_ph(A, I) \
3782   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3783                                              (__v16hi)_mm256_undefined_si256(), \
3784                                              (__mmask16)-1))
3785 
3786 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3787   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3788                                              (__v16hi)(__m256i)(U), \
3789                                              (__mmask16)(W)))
3790 
3791 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3792   ((__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3793                                              (__v16hi)_mm256_setzero_si256(), \
3794                                              (__mmask16)(W)))
3795 
3796 #define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
3797 #define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
3798 #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3799 
3800 #define _mm512_cvt_roundph_ps(A, R) \
3801   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3802                                             (__v16sf)_mm512_undefined_ps(), \
3803                                             (__mmask16)-1, (int)(R)))
3804 
3805 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3806   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3807                                             (__v16sf)(__m512)(W), \
3808                                             (__mmask16)(U), (int)(R)))
3809 
3810 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3811   ((__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3812                                             (__v16sf)_mm512_setzero_ps(), \
3813                                             (__mmask16)(U), (int)(R)))
3814 
3815 
3816 static  __inline __m512 __DEFAULT_FN_ATTRS512
3817 _mm512_cvtph_ps(__m256i __A)
3818 {
3819   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3820                 (__v16sf)
3821                 _mm512_setzero_ps (),
3822                 (__mmask16) -1,
3823                 _MM_FROUND_CUR_DIRECTION);
3824 }
3825 
3826 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3827 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3828 {
3829   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3830                  (__v16sf) __W,
3831                  (__mmask16) __U,
3832                  _MM_FROUND_CUR_DIRECTION);
3833 }
3834 
3835 static __inline__ __m512 __DEFAULT_FN_ATTRS512
3836 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3837 {
3838   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3839                  (__v16sf) _mm512_setzero_ps (),
3840                  (__mmask16) __U,
3841                  _MM_FROUND_CUR_DIRECTION);
3842 }
3843 
3844 #define _mm512_cvtt_roundpd_epi32(A, R) \
3845   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3846                                              (__v8si)_mm256_setzero_si256(), \
3847                                              (__mmask8)-1, (int)(R)))
3848 
3849 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3850   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3851                                              (__v8si)(__m256i)(W), \
3852                                              (__mmask8)(U), (int)(R)))
3853 
3854 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3855   ((__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3856                                              (__v8si)_mm256_setzero_si256(), \
3857                                              (__mmask8)(U), (int)(R)))
3858 
3859 static __inline __m256i __DEFAULT_FN_ATTRS512
3860 _mm512_cvttpd_epi32(__m512d __a)
3861 {
3862   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3863                                                    (__v8si)_mm256_setzero_si256(),
3864                                                    (__mmask8) -1,
3865                                                     _MM_FROUND_CUR_DIRECTION);
3866 }
3867 
3868 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3869 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3870 {
3871   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3872                   (__v8si) __W,
3873                   (__mmask8) __U,
3874                   _MM_FROUND_CUR_DIRECTION);
3875 }
3876 
3877 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3878 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3879 {
3880   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3881                   (__v8si) _mm256_setzero_si256 (),
3882                   (__mmask8) __U,
3883                   _MM_FROUND_CUR_DIRECTION);
3884 }
3885 
3886 #define _mm512_cvtt_roundps_epi32(A, R) \
3887   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3888                                              (__v16si)_mm512_setzero_si512(), \
3889                                              (__mmask16)-1, (int)(R)))
3890 
3891 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3892   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3893                                              (__v16si)(__m512i)(W), \
3894                                              (__mmask16)(U), (int)(R)))
3895 
3896 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3897   ((__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3898                                              (__v16si)_mm512_setzero_si512(), \
3899                                              (__mmask16)(U), (int)(R)))
3900 
3901 static __inline __m512i __DEFAULT_FN_ATTRS512
3902 _mm512_cvttps_epi32(__m512 __a)
3903 {
3904   return (__m512i)
3905     __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3906                                      (__v16si) _mm512_setzero_si512 (),
3907                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3908 }
3909 
3910 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3911 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3912 {
3913   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3914                   (__v16si) __W,
3915                   (__mmask16) __U,
3916                   _MM_FROUND_CUR_DIRECTION);
3917 }
3918 
3919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3920 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3921 {
3922   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3923                   (__v16si) _mm512_setzero_si512 (),
3924                   (__mmask16) __U,
3925                   _MM_FROUND_CUR_DIRECTION);
3926 }
3927 
3928 #define _mm512_cvt_roundps_epi32(A, R) \
3929   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3930                                             (__v16si)_mm512_setzero_si512(), \
3931                                             (__mmask16)-1, (int)(R)))
3932 
3933 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3934   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3935                                             (__v16si)(__m512i)(W), \
3936                                             (__mmask16)(U), (int)(R)))
3937 
3938 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3939   ((__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3940                                             (__v16si)_mm512_setzero_si512(), \
3941                                             (__mmask16)(U), (int)(R)))
3942 
3943 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3944 _mm512_cvtps_epi32 (__m512 __A)
3945 {
3946   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3947                  (__v16si) _mm512_undefined_epi32 (),
3948                  (__mmask16) -1,
3949                  _MM_FROUND_CUR_DIRECTION);
3950 }
3951 
3952 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3953 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3954 {
3955   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3956                  (__v16si) __W,
3957                  (__mmask16) __U,
3958                  _MM_FROUND_CUR_DIRECTION);
3959 }
3960 
3961 static __inline__ __m512i __DEFAULT_FN_ATTRS512
3962 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3963 {
3964   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3965                  (__v16si)
3966                  _mm512_setzero_si512 (),
3967                  (__mmask16) __U,
3968                  _MM_FROUND_CUR_DIRECTION);
3969 }
3970 
3971 #define _mm512_cvt_roundpd_epi32(A, R) \
3972   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3973                                             (__v8si)_mm256_setzero_si256(), \
3974                                             (__mmask8)-1, (int)(R)))
3975 
3976 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3977   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3978                                             (__v8si)(__m256i)(W), \
3979                                             (__mmask8)(U), (int)(R)))
3980 
3981 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3982   ((__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3983                                             (__v8si)_mm256_setzero_si256(), \
3984                                             (__mmask8)(U), (int)(R)))
3985 
3986 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3987 _mm512_cvtpd_epi32 (__m512d __A)
3988 {
3989   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3990                  (__v8si)
3991                  _mm256_undefined_si256 (),
3992                  (__mmask8) -1,
3993                  _MM_FROUND_CUR_DIRECTION);
3994 }
3995 
3996 static __inline__ __m256i __DEFAULT_FN_ATTRS512
3997 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3998 {
3999   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4000                  (__v8si) __W,
4001                  (__mmask8) __U,
4002                  _MM_FROUND_CUR_DIRECTION);
4003 }
4004 
4005 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4006 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
4007 {
4008   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4009                  (__v8si)
4010                  _mm256_setzero_si256 (),
4011                  (__mmask8) __U,
4012                  _MM_FROUND_CUR_DIRECTION);
4013 }
4014 
4015 #define _mm512_cvt_roundps_epu32(A, R) \
4016   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4017                                              (__v16si)_mm512_setzero_si512(), \
4018                                              (__mmask16)-1, (int)(R)))
4019 
4020 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4021   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4022                                              (__v16si)(__m512i)(W), \
4023                                              (__mmask16)(U), (int)(R)))
4024 
4025 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4026   ((__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4027                                              (__v16si)_mm512_setzero_si512(), \
4028                                              (__mmask16)(U), (int)(R)))
4029 
4030 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4031 _mm512_cvtps_epu32 ( __m512 __A)
4032 {
4033   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4034                   (__v16si)\
4035                   _mm512_undefined_epi32 (),
4036                   (__mmask16) -1,\
4037                   _MM_FROUND_CUR_DIRECTION);
4038 }
4039 
4040 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4041 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4042 {
4043   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4044                   (__v16si) __W,
4045                   (__mmask16) __U,
4046                   _MM_FROUND_CUR_DIRECTION);
4047 }
4048 
4049 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4050 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4051 {
4052   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4053                   (__v16si)
4054                   _mm512_setzero_si512 (),
4055                   (__mmask16) __U ,
4056                   _MM_FROUND_CUR_DIRECTION);
4057 }
4058 
4059 #define _mm512_cvt_roundpd_epu32(A, R) \
4060   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4061                                              (__v8si)_mm256_setzero_si256(), \
4062                                              (__mmask8)-1, (int)(R)))
4063 
4064 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4065   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4066                                              (__v8si)(__m256i)(W), \
4067                                              (__mmask8)(U), (int)(R)))
4068 
4069 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4070   ((__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4071                                              (__v8si)_mm256_setzero_si256(), \
4072                                              (__mmask8)(U), (int)(R)))
4073 
4074 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4075 _mm512_cvtpd_epu32 (__m512d __A)
4076 {
4077   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4078                   (__v8si)
4079                   _mm256_undefined_si256 (),
4080                   (__mmask8) -1,
4081                   _MM_FROUND_CUR_DIRECTION);
4082 }
4083 
4084 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4085 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4086 {
4087   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4088                   (__v8si) __W,
4089                   (__mmask8) __U,
4090                   _MM_FROUND_CUR_DIRECTION);
4091 }
4092 
4093 static __inline__ __m256i __DEFAULT_FN_ATTRS512
4094 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4095 {
4096   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4097                   (__v8si)
4098                   _mm256_setzero_si256 (),
4099                   (__mmask8) __U,
4100                   _MM_FROUND_CUR_DIRECTION);
4101 }
4102 
4103 static __inline__ double __DEFAULT_FN_ATTRS512
4104 _mm512_cvtsd_f64(__m512d __a)
4105 {
4106   return __a[0];
4107 }
4108 
4109 static __inline__ float __DEFAULT_FN_ATTRS512
4110 _mm512_cvtss_f32(__m512 __a)
4111 {
4112   return __a[0];
4113 }
4114 
4115 /* Unpack and Interleave */
4116 
4117 static __inline __m512d __DEFAULT_FN_ATTRS512
4118 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
4119 {
4120   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4121                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4122 }
4123 
4124 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4125 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4126 {
4127   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4128                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4129                                            (__v8df)__W);
4130 }
4131 
4132 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4133 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4134 {
4135   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4136                                            (__v8df)_mm512_unpackhi_pd(__A, __B),
4137                                            (__v8df)_mm512_setzero_pd());
4138 }
4139 
4140 static __inline __m512d __DEFAULT_FN_ATTRS512
4141 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
4142 {
4143   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4144                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4145 }
4146 
4147 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4148 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4149 {
4150   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4151                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4152                                            (__v8df)__W);
4153 }
4154 
4155 static __inline__ __m512d __DEFAULT_FN_ATTRS512
4156 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4157 {
4158   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4159                                            (__v8df)_mm512_unpacklo_pd(__A, __B),
4160                                            (__v8df)_mm512_setzero_pd());
4161 }
4162 
4163 static __inline __m512 __DEFAULT_FN_ATTRS512
4164 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
4165 {
4166   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4167                                          2,    18,    3,    19,
4168                                          2+4,  18+4,  3+4,  19+4,
4169                                          2+8,  18+8,  3+8,  19+8,
4170                                          2+12, 18+12, 3+12, 19+12);
4171 }
4172 
4173 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4174 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4175 {
4176   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4177                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4178                                           (__v16sf)__W);
4179 }
4180 
4181 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4182 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4183 {
4184   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4185                                           (__v16sf)_mm512_unpackhi_ps(__A, __B),
4186                                           (__v16sf)_mm512_setzero_ps());
4187 }
4188 
4189 static __inline __m512 __DEFAULT_FN_ATTRS512
4190 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
4191 {
4192   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4193                                          0,    16,    1,    17,
4194                                          0+4,  16+4,  1+4,  17+4,
4195                                          0+8,  16+8,  1+8,  17+8,
4196                                          0+12, 16+12, 1+12, 17+12);
4197 }
4198 
4199 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4200 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4201 {
4202   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4203                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4204                                           (__v16sf)__W);
4205 }
4206 
4207 static __inline__ __m512 __DEFAULT_FN_ATTRS512
4208 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4209 {
4210   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4211                                           (__v16sf)_mm512_unpacklo_ps(__A, __B),
4212                                           (__v16sf)_mm512_setzero_ps());
4213 }
4214 
4215 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4216 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4217 {
4218   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4219                                           2,    18,    3,    19,
4220                                           2+4,  18+4,  3+4,  19+4,
4221                                           2+8,  18+8,  3+8,  19+8,
4222                                           2+12, 18+12, 3+12, 19+12);
4223 }
4224 
4225 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4226 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4227 {
4228   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4229                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4230                                        (__v16si)__W);
4231 }
4232 
4233 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4234 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4235 {
4236   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4237                                        (__v16si)_mm512_unpackhi_epi32(__A, __B),
4238                                        (__v16si)_mm512_setzero_si512());
4239 }
4240 
4241 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4242 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4243 {
4244   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4245                                           0,    16,    1,    17,
4246                                           0+4,  16+4,  1+4,  17+4,
4247                                           0+8,  16+8,  1+8,  17+8,
4248                                           0+12, 16+12, 1+12, 17+12);
4249 }
4250 
4251 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4252 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4253 {
4254   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4255                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4256                                        (__v16si)__W);
4257 }
4258 
4259 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4260 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4261 {
4262   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4263                                        (__v16si)_mm512_unpacklo_epi32(__A, __B),
4264                                        (__v16si)_mm512_setzero_si512());
4265 }
4266 
4267 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4268 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4269 {
4270   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4271                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4272 }
4273 
4274 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4275 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4276 {
4277   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4278                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4279                                         (__v8di)__W);
4280 }
4281 
4282 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4283 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4284 {
4285   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4286                                         (__v8di)_mm512_unpackhi_epi64(__A, __B),
4287                                         (__v8di)_mm512_setzero_si512());
4288 }
4289 
4290 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4291 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4292 {
4293   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4294                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4295 }
4296 
4297 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4298 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4299 {
4300   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4301                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4302                                         (__v8di)__W);
4303 }
4304 
4305 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4306 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4307 {
4308   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4309                                         (__v8di)_mm512_unpacklo_epi64(__A, __B),
4310                                         (__v8di)_mm512_setzero_si512());
4311 }
4312 
4313 
4314 /* SIMD load ops */
4315 
4316 static __inline __m512i __DEFAULT_FN_ATTRS512
4317 _mm512_loadu_si512 (void const *__P)
4318 {
4319   struct __loadu_si512 {
4320     __m512i_u __v;
4321   } __attribute__((__packed__, __may_alias__));
4322   return ((const struct __loadu_si512*)__P)->__v;
4323 }
4324 
4325 static __inline __m512i __DEFAULT_FN_ATTRS512
4326 _mm512_loadu_epi32 (void const *__P)
4327 {
4328   struct __loadu_epi32 {
4329     __m512i_u __v;
4330   } __attribute__((__packed__, __may_alias__));
4331   return ((const struct __loadu_epi32*)__P)->__v;
4332 }
4333 
4334 static __inline __m512i __DEFAULT_FN_ATTRS512
4335 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4336 {
4337   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4338                   (__v16si) __W,
4339                   (__mmask16) __U);
4340 }
4341 
4342 
4343 static __inline __m512i __DEFAULT_FN_ATTRS512
4344 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4345 {
4346   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4347                                                      (__v16si)
4348                                                      _mm512_setzero_si512 (),
4349                                                      (__mmask16) __U);
4350 }
4351 
4352 static __inline __m512i __DEFAULT_FN_ATTRS512
4353 _mm512_loadu_epi64 (void const *__P)
4354 {
4355   struct __loadu_epi64 {
4356     __m512i_u __v;
4357   } __attribute__((__packed__, __may_alias__));
4358   return ((const struct __loadu_epi64*)__P)->__v;
4359 }
4360 
4361 static __inline __m512i __DEFAULT_FN_ATTRS512
4362 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4363 {
4364   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4365                   (__v8di) __W,
4366                   (__mmask8) __U);
4367 }
4368 
4369 static __inline __m512i __DEFAULT_FN_ATTRS512
4370 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4371 {
4372   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4373                                                      (__v8di)
4374                                                      _mm512_setzero_si512 (),
4375                                                      (__mmask8) __U);
4376 }
4377 
4378 static __inline __m512 __DEFAULT_FN_ATTRS512
4379 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4380 {
4381   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4382                    (__v16sf) __W,
4383                    (__mmask16) __U);
4384 }
4385 
4386 static __inline __m512 __DEFAULT_FN_ATTRS512
4387 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4388 {
4389   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4390                                                   (__v16sf)
4391                                                   _mm512_setzero_ps (),
4392                                                   (__mmask16) __U);
4393 }
4394 
4395 static __inline __m512d __DEFAULT_FN_ATTRS512
4396 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4397 {
4398   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4399                 (__v8df) __W,
4400                 (__mmask8) __U);
4401 }
4402 
4403 static __inline __m512d __DEFAULT_FN_ATTRS512
4404 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4405 {
4406   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4407                                                    (__v8df)
4408                                                    _mm512_setzero_pd (),
4409                                                    (__mmask8) __U);
4410 }
4411 
4412 static __inline __m512d __DEFAULT_FN_ATTRS512
4413 _mm512_loadu_pd(void const *__p)
4414 {
4415   struct __loadu_pd {
4416     __m512d_u __v;
4417   } __attribute__((__packed__, __may_alias__));
4418   return ((const struct __loadu_pd*)__p)->__v;
4419 }
4420 
4421 static __inline __m512 __DEFAULT_FN_ATTRS512
4422 _mm512_loadu_ps(void const *__p)
4423 {
4424   struct __loadu_ps {
4425     __m512_u __v;
4426   } __attribute__((__packed__, __may_alias__));
4427   return ((const struct __loadu_ps*)__p)->__v;
4428 }
4429 
4430 static __inline __m512 __DEFAULT_FN_ATTRS512
4431 _mm512_load_ps(void const *__p)
4432 {
4433   return *(const __m512*)__p;
4434 }
4435 
4436 static __inline __m512 __DEFAULT_FN_ATTRS512
4437 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4438 {
4439   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4440                    (__v16sf) __W,
4441                    (__mmask16) __U);
4442 }
4443 
4444 static __inline __m512 __DEFAULT_FN_ATTRS512
4445 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4446 {
4447   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4448                                                   (__v16sf)
4449                                                   _mm512_setzero_ps (),
4450                                                   (__mmask16) __U);
4451 }
4452 
4453 static __inline __m512d __DEFAULT_FN_ATTRS512
4454 _mm512_load_pd(void const *__p)
4455 {
4456   return *(const __m512d*)__p;
4457 }
4458 
4459 static __inline __m512d __DEFAULT_FN_ATTRS512
4460 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4461 {
4462   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4463                           (__v8df) __W,
4464                           (__mmask8) __U);
4465 }
4466 
4467 static __inline __m512d __DEFAULT_FN_ATTRS512
4468 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4469 {
4470   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4471                                                    (__v8df)
4472                                                    _mm512_setzero_pd (),
4473                                                    (__mmask8) __U);
4474 }
4475 
4476 static __inline __m512i __DEFAULT_FN_ATTRS512
4477 _mm512_load_si512 (void const *__P)
4478 {
4479   return *(const __m512i *) __P;
4480 }
4481 
4482 static __inline __m512i __DEFAULT_FN_ATTRS512
4483 _mm512_load_epi32 (void const *__P)
4484 {
4485   return *(const __m512i *) __P;
4486 }
4487 
4488 static __inline __m512i __DEFAULT_FN_ATTRS512
4489 _mm512_load_epi64 (void const *__P)
4490 {
4491   return *(const __m512i *) __P;
4492 }
4493 
4494 /* SIMD store ops */
4495 
4496 static __inline void __DEFAULT_FN_ATTRS512
4497 _mm512_storeu_epi64 (void *__P, __m512i __A)
4498 {
4499   struct __storeu_epi64 {
4500     __m512i_u __v;
4501   } __attribute__((__packed__, __may_alias__));
4502   ((struct __storeu_epi64*)__P)->__v = __A;
4503 }
4504 
4505 static __inline void __DEFAULT_FN_ATTRS512
4506 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4507 {
4508   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4509                                      (__mmask8) __U);
4510 }
4511 
4512 static __inline void __DEFAULT_FN_ATTRS512
4513 _mm512_storeu_si512 (void *__P, __m512i __A)
4514 {
4515   struct __storeu_si512 {
4516     __m512i_u __v;
4517   } __attribute__((__packed__, __may_alias__));
4518   ((struct __storeu_si512*)__P)->__v = __A;
4519 }
4520 
4521 static __inline void __DEFAULT_FN_ATTRS512
4522 _mm512_storeu_epi32 (void *__P, __m512i __A)
4523 {
4524   struct __storeu_epi32 {
4525     __m512i_u __v;
4526   } __attribute__((__packed__, __may_alias__));
4527   ((struct __storeu_epi32*)__P)->__v = __A;
4528 }
4529 
4530 static __inline void __DEFAULT_FN_ATTRS512
4531 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4532 {
4533   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4534                                      (__mmask16) __U);
4535 }
4536 
4537 static __inline void __DEFAULT_FN_ATTRS512
4538 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4539 {
4540   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4541 }
4542 
4543 static __inline void __DEFAULT_FN_ATTRS512
4544 _mm512_storeu_pd(void *__P, __m512d __A)
4545 {
4546   struct __storeu_pd {
4547     __m512d_u __v;
4548   } __attribute__((__packed__, __may_alias__));
4549   ((struct __storeu_pd*)__P)->__v = __A;
4550 }
4551 
4552 static __inline void __DEFAULT_FN_ATTRS512
4553 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4554 {
4555   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4556                                    (__mmask16) __U);
4557 }
4558 
4559 static __inline void __DEFAULT_FN_ATTRS512
4560 _mm512_storeu_ps(void *__P, __m512 __A)
4561 {
4562   struct __storeu_ps {
4563     __m512_u __v;
4564   } __attribute__((__packed__, __may_alias__));
4565   ((struct __storeu_ps*)__P)->__v = __A;
4566 }
4567 
4568 static __inline void __DEFAULT_FN_ATTRS512
4569 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4570 {
4571   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4572 }
4573 
4574 static __inline void __DEFAULT_FN_ATTRS512
4575 _mm512_store_pd(void *__P, __m512d __A)
4576 {
4577   *(__m512d*)__P = __A;
4578 }
4579 
4580 static __inline void __DEFAULT_FN_ATTRS512
4581 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4582 {
4583   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4584                                    (__mmask16) __U);
4585 }
4586 
4587 static __inline void __DEFAULT_FN_ATTRS512
4588 _mm512_store_ps(void *__P, __m512 __A)
4589 {
4590   *(__m512*)__P = __A;
4591 }
4592 
4593 static __inline void __DEFAULT_FN_ATTRS512
4594 _mm512_store_si512 (void *__P, __m512i __A)
4595 {
4596   *(__m512i *) __P = __A;
4597 }
4598 
4599 static __inline void __DEFAULT_FN_ATTRS512
4600 _mm512_store_epi32 (void *__P, __m512i __A)
4601 {
4602   *(__m512i *) __P = __A;
4603 }
4604 
4605 static __inline void __DEFAULT_FN_ATTRS512
4606 _mm512_store_epi64 (void *__P, __m512i __A)
4607 {
4608   *(__m512i *) __P = __A;
4609 }
4610 
4611 /* Mask ops */
4612 
4613 static __inline __mmask16 __DEFAULT_FN_ATTRS
4614 _mm512_knot(__mmask16 __M)
4615 {
4616   return __builtin_ia32_knothi(__M);
4617 }
4618 
4619 /* Integer compare */
4620 
4621 #define _mm512_cmpeq_epi32_mask(A, B) \
4622     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4623 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4624     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4625 #define _mm512_cmpge_epi32_mask(A, B) \
4626     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4627 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4628     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4629 #define _mm512_cmpgt_epi32_mask(A, B) \
4630     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4631 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4632     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4633 #define _mm512_cmple_epi32_mask(A, B) \
4634     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4635 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4636     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4637 #define _mm512_cmplt_epi32_mask(A, B) \
4638     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4639 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4640     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4641 #define _mm512_cmpneq_epi32_mask(A, B) \
4642     _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4643 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4644     _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4645 
4646 #define _mm512_cmpeq_epu32_mask(A, B) \
4647     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4648 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4649     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4650 #define _mm512_cmpge_epu32_mask(A, B) \
4651     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4652 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4653     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4654 #define _mm512_cmpgt_epu32_mask(A, B) \
4655     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4656 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4657     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4658 #define _mm512_cmple_epu32_mask(A, B) \
4659     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4660 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4661     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4662 #define _mm512_cmplt_epu32_mask(A, B) \
4663     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4664 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4665     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4666 #define _mm512_cmpneq_epu32_mask(A, B) \
4667     _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4668 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4669     _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4670 
4671 #define _mm512_cmpeq_epi64_mask(A, B) \
4672     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4673 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4674     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4675 #define _mm512_cmpge_epi64_mask(A, B) \
4676     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4677 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4678     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4679 #define _mm512_cmpgt_epi64_mask(A, B) \
4680     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4681 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4682     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4683 #define _mm512_cmple_epi64_mask(A, B) \
4684     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4685 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4686     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4687 #define _mm512_cmplt_epi64_mask(A, B) \
4688     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4689 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4690     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4691 #define _mm512_cmpneq_epi64_mask(A, B) \
4692     _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4693 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4694     _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4695 
4696 #define _mm512_cmpeq_epu64_mask(A, B) \
4697     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4698 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4699     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4700 #define _mm512_cmpge_epu64_mask(A, B) \
4701     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4702 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4703     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4704 #define _mm512_cmpgt_epu64_mask(A, B) \
4705     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4706 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4707     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4708 #define _mm512_cmple_epu64_mask(A, B) \
4709     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4710 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4711     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4712 #define _mm512_cmplt_epu64_mask(A, B) \
4713     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4714 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4715     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4716 #define _mm512_cmpneq_epu64_mask(A, B) \
4717     _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4718 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4719     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4720 
4721 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4722 _mm512_cvtepi8_epi32(__m128i __A)
4723 {
4724   /* This function always performs a signed extension, but __v16qi is a char
4725      which may be signed or unsigned, so use __v16qs. */
4726   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4727 }
4728 
4729 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4730 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4731 {
4732   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4733                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4734                                              (__v16si)__W);
4735 }
4736 
4737 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4738 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4739 {
4740   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4741                                              (__v16si)_mm512_cvtepi8_epi32(__A),
4742                                              (__v16si)_mm512_setzero_si512());
4743 }
4744 
4745 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4746 _mm512_cvtepi8_epi64(__m128i __A)
4747 {
4748   /* This function always performs a signed extension, but __v16qi is a char
4749      which may be signed or unsigned, so use __v16qs. */
4750   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4751 }
4752 
4753 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4754 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4755 {
4756   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4757                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4758                                              (__v8di)__W);
4759 }
4760 
4761 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4762 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4763 {
4764   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765                                              (__v8di)_mm512_cvtepi8_epi64(__A),
4766                                              (__v8di)_mm512_setzero_si512 ());
4767 }
4768 
4769 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4770 _mm512_cvtepi32_epi64(__m256i __X)
4771 {
4772   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4773 }
4774 
4775 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4776 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4777 {
4778   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4779                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4780                                              (__v8di)__W);
4781 }
4782 
4783 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4784 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
4785 {
4786   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4787                                              (__v8di)_mm512_cvtepi32_epi64(__X),
4788                                              (__v8di)_mm512_setzero_si512());
4789 }
4790 
4791 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4792 _mm512_cvtepi16_epi32(__m256i __A)
4793 {
4794   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4795 }
4796 
4797 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4798 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4799 {
4800   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4801                                             (__v16si)_mm512_cvtepi16_epi32(__A),
4802                                             (__v16si)__W);
4803 }
4804 
4805 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4806 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
4807 {
4808   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4809                                             (__v16si)_mm512_cvtepi16_epi32(__A),
4810                                             (__v16si)_mm512_setzero_si512 ());
4811 }
4812 
4813 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4814 _mm512_cvtepi16_epi64(__m128i __A)
4815 {
4816   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4817 }
4818 
4819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4820 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4821 {
4822   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4823                                              (__v8di)_mm512_cvtepi16_epi64(__A),
4824                                              (__v8di)__W);
4825 }
4826 
4827 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4828 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
4829 {
4830   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4831                                              (__v8di)_mm512_cvtepi16_epi64(__A),
4832                                              (__v8di)_mm512_setzero_si512());
4833 }
4834 
4835 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4836 _mm512_cvtepu8_epi32(__m128i __A)
4837 {
4838   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4839 }
4840 
4841 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4842 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4843 {
4844   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4845                                              (__v16si)_mm512_cvtepu8_epi32(__A),
4846                                              (__v16si)__W);
4847 }
4848 
4849 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4850 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
4851 {
4852   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4853                                              (__v16si)_mm512_cvtepu8_epi32(__A),
4854                                              (__v16si)_mm512_setzero_si512());
4855 }
4856 
4857 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4858 _mm512_cvtepu8_epi64(__m128i __A)
4859 {
4860   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4861 }
4862 
4863 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4864 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4865 {
4866   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4867                                              (__v8di)_mm512_cvtepu8_epi64(__A),
4868                                              (__v8di)__W);
4869 }
4870 
4871 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4872 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
4873 {
4874   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875                                              (__v8di)_mm512_cvtepu8_epi64(__A),
4876                                              (__v8di)_mm512_setzero_si512());
4877 }
4878 
4879 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4880 _mm512_cvtepu32_epi64(__m256i __X)
4881 {
4882   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4883 }
4884 
4885 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4886 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4887 {
4888   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4889                                              (__v8di)_mm512_cvtepu32_epi64(__X),
4890                                              (__v8di)__W);
4891 }
4892 
4893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4894 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
4895 {
4896   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4897                                              (__v8di)_mm512_cvtepu32_epi64(__X),
4898                                              (__v8di)_mm512_setzero_si512());
4899 }
4900 
4901 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4902 _mm512_cvtepu16_epi32(__m256i __A)
4903 {
4904   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4905 }
4906 
4907 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4908 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4909 {
4910   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4911                                             (__v16si)_mm512_cvtepu16_epi32(__A),
4912                                             (__v16si)__W);
4913 }
4914 
4915 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4916 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
4917 {
4918   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4919                                             (__v16si)_mm512_cvtepu16_epi32(__A),
4920                                             (__v16si)_mm512_setzero_si512());
4921 }
4922 
4923 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4924 _mm512_cvtepu16_epi64(__m128i __A)
4925 {
4926   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4927 }
4928 
4929 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4930 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4931 {
4932   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4933                                              (__v8di)_mm512_cvtepu16_epi64(__A),
4934                                              (__v8di)__W);
4935 }
4936 
4937 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4938 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
4939 {
4940   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4941                                              (__v8di)_mm512_cvtepu16_epi64(__A),
4942                                              (__v8di)_mm512_setzero_si512());
4943 }
4944 
4945 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4946 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
4947 {
4948   return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4949 }
4950 
4951 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4952 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4953 {
4954   return (__m512i)__builtin_ia32_selectd_512(__U,
4955                                            (__v16si)_mm512_rorv_epi32(__A, __B),
4956                                            (__v16si)__W);
4957 }
4958 
4959 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4960 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4961 {
4962   return (__m512i)__builtin_ia32_selectd_512(__U,
4963                                            (__v16si)_mm512_rorv_epi32(__A, __B),
4964                                            (__v16si)_mm512_setzero_si512());
4965 }
4966 
4967 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4968 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
4969 {
4970   return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4971 }
4972 
4973 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4974 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4975 {
4976   return (__m512i)__builtin_ia32_selectq_512(__U,
4977                                             (__v8di)_mm512_rorv_epi64(__A, __B),
4978                                             (__v8di)__W);
4979 }
4980 
4981 static __inline__ __m512i __DEFAULT_FN_ATTRS512
4982 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4983 {
4984   return (__m512i)__builtin_ia32_selectq_512(__U,
4985                                             (__v8di)_mm512_rorv_epi64(__A, __B),
4986                                             (__v8di)_mm512_setzero_si512());
4987 }
4988 
4989 
4990 
4991 #define _mm512_cmp_epi32_mask(a, b, p) \
4992   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4993                                           (__v16si)(__m512i)(b), (int)(p), \
4994                                           (__mmask16)-1))
4995 
4996 #define _mm512_cmp_epu32_mask(a, b, p) \
4997   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4998                                            (__v16si)(__m512i)(b), (int)(p), \
4999                                            (__mmask16)-1))
5000 
5001 #define _mm512_cmp_epi64_mask(a, b, p) \
5002   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5003                                          (__v8di)(__m512i)(b), (int)(p), \
5004                                          (__mmask8)-1))
5005 
5006 #define _mm512_cmp_epu64_mask(a, b, p) \
5007   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5008                                           (__v8di)(__m512i)(b), (int)(p), \
5009                                           (__mmask8)-1))
5010 
5011 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
5012   ((__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
5013                                           (__v16si)(__m512i)(b), (int)(p), \
5014                                           (__mmask16)(m)))
5015 
5016 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5017   ((__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5018                                            (__v16si)(__m512i)(b), (int)(p), \
5019                                            (__mmask16)(m)))
5020 
5021 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5022   ((__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5023                                          (__v8di)(__m512i)(b), (int)(p), \
5024                                          (__mmask8)(m)))
5025 
5026 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5027   ((__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5028                                           (__v8di)(__m512i)(b), (int)(p), \
5029                                           (__mmask8)(m)))
5030 
5031 #define _mm512_rol_epi32(a, b) \
5032   ((__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b)))
5033 
5034 #define _mm512_mask_rol_epi32(W, U, a, b) \
5035   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5036                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5037                                        (__v16si)(__m512i)(W)))
5038 
5039 #define _mm512_maskz_rol_epi32(U, a, b) \
5040   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5041                                        (__v16si)_mm512_rol_epi32((a), (b)), \
5042                                        (__v16si)_mm512_setzero_si512()))
5043 
5044 #define _mm512_rol_epi64(a, b) \
5045   ((__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b)))
5046 
5047 #define _mm512_mask_rol_epi64(W, U, a, b) \
5048   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5049                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5050                                        (__v8di)(__m512i)(W)))
5051 
5052 #define _mm512_maskz_rol_epi64(U, a, b) \
5053   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5054                                        (__v8di)_mm512_rol_epi64((a), (b)), \
5055                                        (__v8di)_mm512_setzero_si512()))
5056 
5057 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5058 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
5059 {
5060   return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5061 }
5062 
5063 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5064 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5065 {
5066   return (__m512i)__builtin_ia32_selectd_512(__U,
5067                                            (__v16si)_mm512_rolv_epi32(__A, __B),
5068                                            (__v16si)__W);
5069 }
5070 
5071 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5072 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5073 {
5074   return (__m512i)__builtin_ia32_selectd_512(__U,
5075                                            (__v16si)_mm512_rolv_epi32(__A, __B),
5076                                            (__v16si)_mm512_setzero_si512());
5077 }
5078 
5079 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5080 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
5081 {
5082   return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5083 }
5084 
5085 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5086 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5087 {
5088   return (__m512i)__builtin_ia32_selectq_512(__U,
5089                                             (__v8di)_mm512_rolv_epi64(__A, __B),
5090                                             (__v8di)__W);
5091 }
5092 
5093 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5094 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5095 {
5096   return (__m512i)__builtin_ia32_selectq_512(__U,
5097                                             (__v8di)_mm512_rolv_epi64(__A, __B),
5098                                             (__v8di)_mm512_setzero_si512());
5099 }
5100 
5101 #define _mm512_ror_epi32(A, B) \
5102   ((__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B)))
5103 
5104 #define _mm512_mask_ror_epi32(W, U, A, B) \
5105   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5106                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5107                                        (__v16si)(__m512i)(W)))
5108 
5109 #define _mm512_maskz_ror_epi32(U, A, B) \
5110   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5111                                        (__v16si)_mm512_ror_epi32((A), (B)), \
5112                                        (__v16si)_mm512_setzero_si512()))
5113 
5114 #define _mm512_ror_epi64(A, B) \
5115   ((__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B)))
5116 
5117 #define _mm512_mask_ror_epi64(W, U, A, B) \
5118   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5119                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5120                                        (__v8di)(__m512i)(W)))
5121 
5122 #define _mm512_maskz_ror_epi64(U, A, B) \
5123   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5124                                        (__v8di)_mm512_ror_epi64((A), (B)), \
5125                                        (__v8di)_mm512_setzero_si512()))
5126 
5127 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5128 _mm512_slli_epi32(__m512i __A, unsigned int __B)
5129 {
5130   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, (int)__B);
5131 }
5132 
5133 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5134 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5135                        unsigned int __B)
5136 {
5137   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5138                                          (__v16si)_mm512_slli_epi32(__A, __B),
5139                                          (__v16si)__W);
5140 }
5141 
5142 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5143 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5144   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5145                                          (__v16si)_mm512_slli_epi32(__A, __B),
5146                                          (__v16si)_mm512_setzero_si512());
5147 }
5148 
5149 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5150 _mm512_slli_epi64(__m512i __A, unsigned int __B)
5151 {
5152   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, (int)__B);
5153 }
5154 
5155 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5156 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
5157 {
5158   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5159                                           (__v8di)_mm512_slli_epi64(__A, __B),
5160                                           (__v8di)__W);
5161 }
5162 
5163 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5164 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
5165 {
5166   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5167                                           (__v8di)_mm512_slli_epi64(__A, __B),
5168                                           (__v8di)_mm512_setzero_si512());
5169 }
5170 
5171 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5172 _mm512_srli_epi32(__m512i __A, unsigned int __B)
5173 {
5174   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, (int)__B);
5175 }
5176 
5177 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5178 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5179                        unsigned int __B)
5180 {
5181   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5182                                          (__v16si)_mm512_srli_epi32(__A, __B),
5183                                          (__v16si)__W);
5184 }
5185 
5186 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5187 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5188   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5189                                          (__v16si)_mm512_srli_epi32(__A, __B),
5190                                          (__v16si)_mm512_setzero_si512());
5191 }
5192 
5193 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5194 _mm512_srli_epi64(__m512i __A, unsigned int __B)
5195 {
5196   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, (int)__B);
5197 }
5198 
5199 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5200 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5201                        unsigned int __B)
5202 {
5203   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5204                                           (__v8di)_mm512_srli_epi64(__A, __B),
5205                                           (__v8di)__W);
5206 }
5207 
5208 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5209 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A,
5210                         unsigned int __B)
5211 {
5212   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5213                                           (__v8di)_mm512_srli_epi64(__A, __B),
5214                                           (__v8di)_mm512_setzero_si512());
5215 }
5216 
5217 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5218 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5219 {
5220   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5221               (__v16si) __W,
5222               (__mmask16) __U);
5223 }
5224 
5225 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5226 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
5227 {
5228   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5229               (__v16si)
5230               _mm512_setzero_si512 (),
5231               (__mmask16) __U);
5232 }
5233 
5234 static __inline__ void __DEFAULT_FN_ATTRS512
5235 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5236 {
5237   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5238           (__mmask16) __U);
5239 }
5240 
5241 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5242 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5243 {
5244   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5245                  (__v16si) __A,
5246                  (__v16si) __W);
5247 }
5248 
5249 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5250 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
5251 {
5252   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5253                  (__v16si) __A,
5254                  (__v16si) _mm512_setzero_si512 ());
5255 }
5256 
5257 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5258 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5259 {
5260   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5261                  (__v8di) __A,
5262                  (__v8di) __W);
5263 }
5264 
5265 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5266 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
5267 {
5268   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5269                  (__v8di) __A,
5270                  (__v8di) _mm512_setzero_si512 ());
5271 }
5272 
5273 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5274 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5275 {
5276   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5277               (__v8di) __W,
5278               (__mmask8) __U);
5279 }
5280 
5281 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5282 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5283 {
5284   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5285               (__v8di)
5286               _mm512_setzero_si512 (),
5287               (__mmask8) __U);
5288 }
5289 
5290 static __inline__ void __DEFAULT_FN_ATTRS512
5291 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5292 {
5293   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5294           (__mmask8) __U);
5295 }
5296 
5297 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5298 _mm512_movedup_pd (__m512d __A)
5299 {
5300   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5301                                           0, 0, 2, 2, 4, 4, 6, 6);
5302 }
5303 
5304 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5305 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5306 {
5307   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5308                                               (__v8df)_mm512_movedup_pd(__A),
5309                                               (__v8df)__W);
5310 }
5311 
5312 static __inline__ __m512d __DEFAULT_FN_ATTRS512
5313 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
5314 {
5315   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5316                                               (__v8df)_mm512_movedup_pd(__A),
5317                                               (__v8df)_mm512_setzero_pd());
5318 }
5319 
5320 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5321   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5322                                               (__v8df)(__m512d)(B), \
5323                                               (__v8di)(__m512i)(C), (int)(imm), \
5324                                               (__mmask8)-1, (int)(R)))
5325 
5326 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5327   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5328                                               (__v8df)(__m512d)(B), \
5329                                               (__v8di)(__m512i)(C), (int)(imm), \
5330                                               (__mmask8)(U), (int)(R)))
5331 
5332 #define _mm512_fixupimm_pd(A, B, C, imm) \
5333   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5334                                               (__v8df)(__m512d)(B), \
5335                                               (__v8di)(__m512i)(C), (int)(imm), \
5336                                               (__mmask8)-1, \
5337                                               _MM_FROUND_CUR_DIRECTION))
5338 
5339 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5340   ((__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5341                                               (__v8df)(__m512d)(B), \
5342                                               (__v8di)(__m512i)(C), (int)(imm), \
5343                                               (__mmask8)(U), \
5344                                               _MM_FROUND_CUR_DIRECTION))
5345 
5346 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5347   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5348                                                (__v8df)(__m512d)(B), \
5349                                                (__v8di)(__m512i)(C), \
5350                                                (int)(imm), (__mmask8)(U), \
5351                                                (int)(R)))
5352 
5353 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5354   ((__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5355                                                (__v8df)(__m512d)(B), \
5356                                                (__v8di)(__m512i)(C), \
5357                                                (int)(imm), (__mmask8)(U), \
5358                                                _MM_FROUND_CUR_DIRECTION))
5359 
5360 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5361   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5362                                              (__v16sf)(__m512)(B), \
5363                                              (__v16si)(__m512i)(C), (int)(imm), \
5364                                              (__mmask16)-1, (int)(R)))
5365 
5366 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5367   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5368                                              (__v16sf)(__m512)(B), \
5369                                              (__v16si)(__m512i)(C), (int)(imm), \
5370                                              (__mmask16)(U), (int)(R)))
5371 
5372 #define _mm512_fixupimm_ps(A, B, C, imm) \
5373   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5374                                              (__v16sf)(__m512)(B), \
5375                                              (__v16si)(__m512i)(C), (int)(imm), \
5376                                              (__mmask16)-1, \
5377                                              _MM_FROUND_CUR_DIRECTION))
5378 
5379 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5380   ((__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5381                                              (__v16sf)(__m512)(B), \
5382                                              (__v16si)(__m512i)(C), (int)(imm), \
5383                                              (__mmask16)(U), \
5384                                              _MM_FROUND_CUR_DIRECTION))
5385 
5386 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5387   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5388                                               (__v16sf)(__m512)(B), \
5389                                               (__v16si)(__m512i)(C), \
5390                                               (int)(imm), (__mmask16)(U), \
5391                                               (int)(R)))
5392 
5393 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5394   ((__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5395                                               (__v16sf)(__m512)(B), \
5396                                               (__v16si)(__m512i)(C), \
5397                                               (int)(imm), (__mmask16)(U), \
5398                                               _MM_FROUND_CUR_DIRECTION))
5399 
5400 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5401   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5402                                            (__v2df)(__m128d)(B), \
5403                                            (__v2di)(__m128i)(C), (int)(imm), \
5404                                            (__mmask8)-1, (int)(R)))
5405 
5406 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5407   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5408                                            (__v2df)(__m128d)(B), \
5409                                            (__v2di)(__m128i)(C), (int)(imm), \
5410                                            (__mmask8)(U), (int)(R)))
5411 
5412 #define _mm_fixupimm_sd(A, B, C, imm) \
5413   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5414                                            (__v2df)(__m128d)(B), \
5415                                            (__v2di)(__m128i)(C), (int)(imm), \
5416                                            (__mmask8)-1, \
5417                                            _MM_FROUND_CUR_DIRECTION))
5418 
5419 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5420   ((__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5421                                            (__v2df)(__m128d)(B), \
5422                                            (__v2di)(__m128i)(C), (int)(imm), \
5423                                            (__mmask8)(U), \
5424                                            _MM_FROUND_CUR_DIRECTION))
5425 
5426 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5427   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5428                                             (__v2df)(__m128d)(B), \
5429                                             (__v2di)(__m128i)(C), (int)(imm), \
5430                                             (__mmask8)(U), (int)(R)))
5431 
5432 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5433   ((__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5434                                             (__v2df)(__m128d)(B), \
5435                                             (__v2di)(__m128i)(C), (int)(imm), \
5436                                             (__mmask8)(U), \
5437                                             _MM_FROUND_CUR_DIRECTION))
5438 
5439 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5440   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5441                                           (__v4sf)(__m128)(B), \
5442                                           (__v4si)(__m128i)(C), (int)(imm), \
5443                                           (__mmask8)-1, (int)(R)))
5444 
5445 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5446   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5447                                           (__v4sf)(__m128)(B), \
5448                                           (__v4si)(__m128i)(C), (int)(imm), \
5449                                           (__mmask8)(U), (int)(R)))
5450 
5451 #define _mm_fixupimm_ss(A, B, C, imm) \
5452   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5453                                           (__v4sf)(__m128)(B), \
5454                                           (__v4si)(__m128i)(C), (int)(imm), \
5455                                           (__mmask8)-1, \
5456                                           _MM_FROUND_CUR_DIRECTION))
5457 
5458 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5459   ((__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5460                                           (__v4sf)(__m128)(B), \
5461                                           (__v4si)(__m128i)(C), (int)(imm), \
5462                                           (__mmask8)(U), \
5463                                           _MM_FROUND_CUR_DIRECTION))
5464 
5465 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5466   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5467                                            (__v4sf)(__m128)(B), \
5468                                            (__v4si)(__m128i)(C), (int)(imm), \
5469                                            (__mmask8)(U), (int)(R)))
5470 
5471 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5472   ((__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5473                                            (__v4sf)(__m128)(B), \
5474                                            (__v4si)(__m128i)(C), (int)(imm), \
5475                                            (__mmask8)(U), \
5476                                            _MM_FROUND_CUR_DIRECTION))
5477 
5478 #define _mm_getexp_round_sd(A, B, R) \
5479   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5480                                                   (__v2df)(__m128d)(B), \
5481                                                   (__v2df)_mm_setzero_pd(), \
5482                                                   (__mmask8)-1, (int)(R)))
5483 
5484 
5485 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5486 _mm_getexp_sd (__m128d __A, __m128d __B)
5487 {
5488   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5489                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5490 }
5491 
5492 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5493 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5494 {
5495  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5496           (__v2df) __B,
5497           (__v2df) __W,
5498           (__mmask8) __U,
5499           _MM_FROUND_CUR_DIRECTION);
5500 }
5501 
5502 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5503   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5504                                                   (__v2df)(__m128d)(B), \
5505                                                   (__v2df)(__m128d)(W), \
5506                                                   (__mmask8)(U), (int)(R)))
5507 
5508 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5509 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5510 {
5511  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5512           (__v2df) __B,
5513           (__v2df) _mm_setzero_pd (),
5514           (__mmask8) __U,
5515           _MM_FROUND_CUR_DIRECTION);
5516 }
5517 
5518 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5519   ((__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5520                                                   (__v2df)(__m128d)(B), \
5521                                                   (__v2df)_mm_setzero_pd(), \
5522                                                   (__mmask8)(U), (int)(R)))
5523 
5524 #define _mm_getexp_round_ss(A, B, R) \
5525   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5526                                                  (__v4sf)(__m128)(B), \
5527                                                  (__v4sf)_mm_setzero_ps(), \
5528                                                  (__mmask8)-1, (int)(R)))
5529 
5530 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5531 _mm_getexp_ss (__m128 __A, __m128 __B)
5532 {
5533   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5534                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5535 }
5536 
5537 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5538 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5539 {
5540  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5541           (__v4sf) __B,
5542           (__v4sf) __W,
5543           (__mmask8) __U,
5544           _MM_FROUND_CUR_DIRECTION);
5545 }
5546 
5547 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5548   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5549                                                  (__v4sf)(__m128)(B), \
5550                                                  (__v4sf)(__m128)(W), \
5551                                                  (__mmask8)(U), (int)(R)))
5552 
5553 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5554 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5555 {
5556  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5557           (__v4sf) __B,
5558           (__v4sf) _mm_setzero_ps (),
5559           (__mmask8) __U,
5560           _MM_FROUND_CUR_DIRECTION);
5561 }
5562 
5563 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5564   ((__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5565                                                  (__v4sf)(__m128)(B), \
5566                                                  (__v4sf)_mm_setzero_ps(), \
5567                                                  (__mmask8)(U), (int)(R)))
5568 
5569 #define _mm_getmant_round_sd(A, B, C, D, R) \
5570   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5571                                                 (__v2df)(__m128d)(B), \
5572                                                 (int)(((D)<<2) | (C)), \
5573                                                 (__v2df)_mm_setzero_pd(), \
5574                                                 (__mmask8)-1, (int)(R)))
5575 
5576 #define _mm_getmant_sd(A, B, C, D)  \
5577   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5578                                                 (__v2df)(__m128d)(B), \
5579                                                 (int)(((D)<<2) | (C)), \
5580                                                 (__v2df)_mm_setzero_pd(), \
5581                                                 (__mmask8)-1, \
5582                                                 _MM_FROUND_CUR_DIRECTION))
5583 
5584 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5585   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5586                                                 (__v2df)(__m128d)(B), \
5587                                                 (int)(((D)<<2) | (C)), \
5588                                                 (__v2df)(__m128d)(W), \
5589                                                 (__mmask8)(U), \
5590                                                 _MM_FROUND_CUR_DIRECTION))
5591 
5592 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5593   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5594                                                 (__v2df)(__m128d)(B), \
5595                                                 (int)(((D)<<2) | (C)), \
5596                                                 (__v2df)(__m128d)(W), \
5597                                                 (__mmask8)(U), (int)(R)))
5598 
5599 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5600   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5601                                                 (__v2df)(__m128d)(B), \
5602                                                 (int)(((D)<<2) | (C)), \
5603                                                 (__v2df)_mm_setzero_pd(), \
5604                                                 (__mmask8)(U), \
5605                                                 _MM_FROUND_CUR_DIRECTION))
5606 
5607 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5608   ((__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5609                                                 (__v2df)(__m128d)(B), \
5610                                                 (int)(((D)<<2) | (C)), \
5611                                                 (__v2df)_mm_setzero_pd(), \
5612                                                 (__mmask8)(U), (int)(R)))
5613 
5614 #define _mm_getmant_round_ss(A, B, C, D, R) \
5615   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5616                                                (__v4sf)(__m128)(B), \
5617                                                (int)(((D)<<2) | (C)), \
5618                                                (__v4sf)_mm_setzero_ps(), \
5619                                                (__mmask8)-1, (int)(R)))
5620 
5621 #define _mm_getmant_ss(A, B, C, D) \
5622   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5623                                                (__v4sf)(__m128)(B), \
5624                                                (int)(((D)<<2) | (C)), \
5625                                                (__v4sf)_mm_setzero_ps(), \
5626                                                (__mmask8)-1, \
5627                                                _MM_FROUND_CUR_DIRECTION))
5628 
5629 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5630   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5631                                                (__v4sf)(__m128)(B), \
5632                                                (int)(((D)<<2) | (C)), \
5633                                                (__v4sf)(__m128)(W), \
5634                                                (__mmask8)(U), \
5635                                                _MM_FROUND_CUR_DIRECTION))
5636 
5637 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5638   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5639                                                (__v4sf)(__m128)(B), \
5640                                                (int)(((D)<<2) | (C)), \
5641                                                (__v4sf)(__m128)(W), \
5642                                                (__mmask8)(U), (int)(R)))
5643 
5644 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5645   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5646                                                (__v4sf)(__m128)(B), \
5647                                                (int)(((D)<<2) | (C)), \
5648                                                (__v4sf)_mm_setzero_ps(), \
5649                                                (__mmask8)(U), \
5650                                                _MM_FROUND_CUR_DIRECTION))
5651 
5652 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5653   ((__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5654                                                (__v4sf)(__m128)(B), \
5655                                                (int)(((D)<<2) | (C)), \
5656                                                (__v4sf)_mm_setzero_ps(), \
5657                                                (__mmask8)(U), (int)(R)))
5658 
5659 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5660 _mm512_kmov (__mmask16 __A)
5661 {
5662   return  __A;
5663 }
5664 
5665 #define _mm_comi_round_sd(A, B, P, R) \
5666   ((int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5667                                (int)(P), (int)(R)))
5668 
5669 #define _mm_comi_round_ss(A, B, P, R) \
5670   ((int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5671                                (int)(P), (int)(R)))
5672 
5673 #ifdef __x86_64__
5674 #define _mm_cvt_roundsd_si64(A, R) \
5675   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5676 #endif
5677 
5678 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5679 _mm512_sll_epi32(__m512i __A, __m128i __B)
5680 {
5681   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5682 }
5683 
5684 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5685 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5686 {
5687   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5688                                           (__v16si)_mm512_sll_epi32(__A, __B),
5689                                           (__v16si)__W);
5690 }
5691 
5692 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5693 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5694 {
5695   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5696                                           (__v16si)_mm512_sll_epi32(__A, __B),
5697                                           (__v16si)_mm512_setzero_si512());
5698 }
5699 
5700 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5701 _mm512_sll_epi64(__m512i __A, __m128i __B)
5702 {
5703   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5704 }
5705 
5706 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5707 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5708 {
5709   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5710                                              (__v8di)_mm512_sll_epi64(__A, __B),
5711                                              (__v8di)__W);
5712 }
5713 
5714 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5715 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5716 {
5717   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5718                                            (__v8di)_mm512_sll_epi64(__A, __B),
5719                                            (__v8di)_mm512_setzero_si512());
5720 }
5721 
5722 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5723 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
5724 {
5725   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5726 }
5727 
5728 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5729 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5730 {
5731   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5732                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5733                                            (__v16si)__W);
5734 }
5735 
5736 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5737 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5738 {
5739   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5740                                            (__v16si)_mm512_sllv_epi32(__X, __Y),
5741                                            (__v16si)_mm512_setzero_si512());
5742 }
5743 
5744 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5745 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
5746 {
5747   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5748 }
5749 
5750 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5751 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5752 {
5753   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5754                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5755                                             (__v8di)__W);
5756 }
5757 
5758 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5759 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5760 {
5761   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5762                                             (__v8di)_mm512_sllv_epi64(__X, __Y),
5763                                             (__v8di)_mm512_setzero_si512());
5764 }
5765 
5766 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5767 _mm512_sra_epi32(__m512i __A, __m128i __B)
5768 {
5769   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5770 }
5771 
5772 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5773 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5774 {
5775   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5776                                           (__v16si)_mm512_sra_epi32(__A, __B),
5777                                           (__v16si)__W);
5778 }
5779 
5780 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5781 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5782 {
5783   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5784                                           (__v16si)_mm512_sra_epi32(__A, __B),
5785                                           (__v16si)_mm512_setzero_si512());
5786 }
5787 
5788 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5789 _mm512_sra_epi64(__m512i __A, __m128i __B)
5790 {
5791   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5792 }
5793 
5794 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5795 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5796 {
5797   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5798                                            (__v8di)_mm512_sra_epi64(__A, __B),
5799                                            (__v8di)__W);
5800 }
5801 
5802 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5803 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5804 {
5805   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5806                                            (__v8di)_mm512_sra_epi64(__A, __B),
5807                                            (__v8di)_mm512_setzero_si512());
5808 }
5809 
5810 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5811 _mm512_srav_epi32(__m512i __X, __m512i __Y)
5812 {
5813   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5814 }
5815 
5816 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5817 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5818 {
5819   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5820                                            (__v16si)_mm512_srav_epi32(__X, __Y),
5821                                            (__v16si)__W);
5822 }
5823 
5824 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5825 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5826 {
5827   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5828                                            (__v16si)_mm512_srav_epi32(__X, __Y),
5829                                            (__v16si)_mm512_setzero_si512());
5830 }
5831 
5832 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5833 _mm512_srav_epi64(__m512i __X, __m512i __Y)
5834 {
5835   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5836 }
5837 
5838 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5839 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5840 {
5841   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5842                                             (__v8di)_mm512_srav_epi64(__X, __Y),
5843                                             (__v8di)__W);
5844 }
5845 
5846 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5847 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5848 {
5849   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5850                                             (__v8di)_mm512_srav_epi64(__X, __Y),
5851                                             (__v8di)_mm512_setzero_si512());
5852 }
5853 
5854 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5855 _mm512_srl_epi32(__m512i __A, __m128i __B)
5856 {
5857   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5858 }
5859 
5860 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5861 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5862 {
5863   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5864                                           (__v16si)_mm512_srl_epi32(__A, __B),
5865                                           (__v16si)__W);
5866 }
5867 
5868 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5869 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5870 {
5871   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5872                                           (__v16si)_mm512_srl_epi32(__A, __B),
5873                                           (__v16si)_mm512_setzero_si512());
5874 }
5875 
5876 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5877 _mm512_srl_epi64(__m512i __A, __m128i __B)
5878 {
5879   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5880 }
5881 
5882 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5883 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5884 {
5885   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5886                                            (__v8di)_mm512_srl_epi64(__A, __B),
5887                                            (__v8di)__W);
5888 }
5889 
5890 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5891 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5892 {
5893   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5894                                            (__v8di)_mm512_srl_epi64(__A, __B),
5895                                            (__v8di)_mm512_setzero_si512());
5896 }
5897 
5898 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5899 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
5900 {
5901   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5902 }
5903 
5904 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5905 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5906 {
5907   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5908                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
5909                                            (__v16si)__W);
5910 }
5911 
5912 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5913 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5914 {
5915   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5916                                            (__v16si)_mm512_srlv_epi32(__X, __Y),
5917                                            (__v16si)_mm512_setzero_si512());
5918 }
5919 
5920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5921 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5922 {
5923   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5924 }
5925 
5926 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5927 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5928 {
5929   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5930                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
5931                                             (__v8di)__W);
5932 }
5933 
5934 static __inline__ __m512i __DEFAULT_FN_ATTRS512
5935 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5936 {
5937   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5938                                             (__v8di)_mm512_srlv_epi64(__X, __Y),
5939                                             (__v8di)_mm512_setzero_si512());
5940 }
5941 
5942 /// \enum _MM_TERNLOG_ENUM
5943 ///    A helper to represent the ternary logic operations among vector \a A,
5944 ///    \a B and \a C. The representation is passed to \a imm.
5945 typedef enum {
5946   _MM_TERNLOG_A = 0xF0,
5947   _MM_TERNLOG_B = 0xCC,
5948   _MM_TERNLOG_C = 0xAA
5949 } _MM_TERNLOG_ENUM;
5950 
5951 #define _mm512_ternarylogic_epi32(A, B, C, imm)                                \
5952   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
5953       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5954       (unsigned char)(imm), (__mmask16)-1))
5955 
5956 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
5957   ((__m512i)__builtin_ia32_pternlogd512_mask(                                  \
5958       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5959       (unsigned char)(imm), (__mmask16)(U)))
5960 
5961 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
5962   ((__m512i)__builtin_ia32_pternlogd512_maskz(                                 \
5963       (__v16si)(__m512i)(A), (__v16si)(__m512i)(B), (__v16si)(__m512i)(C),     \
5964       (unsigned char)(imm), (__mmask16)(U)))
5965 
5966 #define _mm512_ternarylogic_epi64(A, B, C, imm)                                \
5967   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
5968       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5969       (unsigned char)(imm), (__mmask8)-1))
5970 
5971 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
5972   ((__m512i)__builtin_ia32_pternlogq512_mask(                                  \
5973       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5974       (unsigned char)(imm), (__mmask8)(U)))
5975 
5976 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
5977   ((__m512i)__builtin_ia32_pternlogq512_maskz(                                 \
5978       (__v8di)(__m512i)(A), (__v8di)(__m512i)(B), (__v8di)(__m512i)(C),        \
5979       (unsigned char)(imm), (__mmask8)(U)))
5980 
5981 #ifdef __x86_64__
5982 #define _mm_cvt_roundsd_i64(A, R) \
5983   ((long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)))
5984 #endif
5985 
5986 #define _mm_cvt_roundsd_si32(A, R) \
5987   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5988 
5989 #define _mm_cvt_roundsd_i32(A, R) \
5990   ((int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R)))
5991 
5992 #define _mm_cvt_roundsd_u32(A, R) \
5993   ((unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)))
5994 
5995 static __inline__ unsigned __DEFAULT_FN_ATTRS128
5996 _mm_cvtsd_u32 (__m128d __A)
5997 {
5998   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5999              _MM_FROUND_CUR_DIRECTION);
6000 }
6001 
6002 #ifdef __x86_64__
6003 #define _mm_cvt_roundsd_u64(A, R) \
6004   ((unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
6005                                                    (int)(R)))
6006 
6007 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6008 _mm_cvtsd_u64 (__m128d __A)
6009 {
6010   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
6011                  __A,
6012                  _MM_FROUND_CUR_DIRECTION);
6013 }
6014 #endif
6015 
6016 #define _mm_cvt_roundss_si32(A, R) \
6017   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6018 
6019 #define _mm_cvt_roundss_i32(A, R) \
6020   ((int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R)))
6021 
6022 #ifdef __x86_64__
6023 #define _mm_cvt_roundss_si64(A, R) \
6024   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6025 
6026 #define _mm_cvt_roundss_i64(A, R) \
6027   ((long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R)))
6028 #endif
6029 
6030 #define _mm_cvt_roundss_u32(A, R) \
6031   ((unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)))
6032 
6033 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6034 _mm_cvtss_u32 (__m128 __A)
6035 {
6036   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6037              _MM_FROUND_CUR_DIRECTION);
6038 }
6039 
6040 #ifdef __x86_64__
6041 #define _mm_cvt_roundss_u64(A, R) \
6042   ((unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6043                                                    (int)(R)))
6044 
6045 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6046 _mm_cvtss_u64 (__m128 __A)
6047 {
6048   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6049                  __A,
6050                  _MM_FROUND_CUR_DIRECTION);
6051 }
6052 #endif
6053 
6054 #define _mm_cvtt_roundsd_i32(A, R) \
6055   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6056 
6057 #define _mm_cvtt_roundsd_si32(A, R) \
6058   ((int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)))
6059 
6060 static __inline__ int __DEFAULT_FN_ATTRS128
6061 _mm_cvttsd_i32 (__m128d __A)
6062 {
6063   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6064               _MM_FROUND_CUR_DIRECTION);
6065 }
6066 
6067 #ifdef __x86_64__
6068 #define _mm_cvtt_roundsd_si64(A, R) \
6069   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6070 
6071 #define _mm_cvtt_roundsd_i64(A, R) \
6072   ((long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)))
6073 
6074 static __inline__ long long __DEFAULT_FN_ATTRS128
6075 _mm_cvttsd_i64 (__m128d __A)
6076 {
6077   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6078               _MM_FROUND_CUR_DIRECTION);
6079 }
6080 #endif
6081 
6082 #define _mm_cvtt_roundsd_u32(A, R) \
6083   ((unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)))
6084 
6085 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6086 _mm_cvttsd_u32 (__m128d __A)
6087 {
6088   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6089               _MM_FROUND_CUR_DIRECTION);
6090 }
6091 
6092 #ifdef __x86_64__
6093 #define _mm_cvtt_roundsd_u64(A, R) \
6094   ((unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6095                                                     (int)(R)))
6096 
6097 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6098 _mm_cvttsd_u64 (__m128d __A)
6099 {
6100   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6101                   __A,
6102                   _MM_FROUND_CUR_DIRECTION);
6103 }
6104 #endif
6105 
6106 #define _mm_cvtt_roundss_i32(A, R) \
6107   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6108 
6109 #define _mm_cvtt_roundss_si32(A, R) \
6110   ((int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)))
6111 
6112 static __inline__ int __DEFAULT_FN_ATTRS128
6113 _mm_cvttss_i32 (__m128 __A)
6114 {
6115   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6116               _MM_FROUND_CUR_DIRECTION);
6117 }
6118 
6119 #ifdef __x86_64__
6120 #define _mm_cvtt_roundss_i64(A, R) \
6121   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6122 
6123 #define _mm_cvtt_roundss_si64(A, R) \
6124   ((long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)))
6125 
6126 static __inline__ long long __DEFAULT_FN_ATTRS128
6127 _mm_cvttss_i64 (__m128 __A)
6128 {
6129   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6130               _MM_FROUND_CUR_DIRECTION);
6131 }
6132 #endif
6133 
6134 #define _mm_cvtt_roundss_u32(A, R) \
6135   ((unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)))
6136 
6137 static __inline__ unsigned __DEFAULT_FN_ATTRS128
6138 _mm_cvttss_u32 (__m128 __A)
6139 {
6140   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6141               _MM_FROUND_CUR_DIRECTION);
6142 }
6143 
6144 #ifdef __x86_64__
6145 #define _mm_cvtt_roundss_u64(A, R) \
6146   ((unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6147                                                     (int)(R)))
6148 
6149 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6150 _mm_cvttss_u64 (__m128 __A)
6151 {
6152   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6153                   __A,
6154                   _MM_FROUND_CUR_DIRECTION);
6155 }
6156 #endif
6157 
6158 #define _mm512_permute_pd(X, C) \
6159   ((__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C)))
6160 
6161 #define _mm512_mask_permute_pd(W, U, X, C) \
6162   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6163                                         (__v8df)_mm512_permute_pd((X), (C)), \
6164                                         (__v8df)(__m512d)(W)))
6165 
6166 #define _mm512_maskz_permute_pd(U, X, C) \
6167   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6168                                         (__v8df)_mm512_permute_pd((X), (C)), \
6169                                         (__v8df)_mm512_setzero_pd()))
6170 
6171 #define _mm512_permute_ps(X, C) \
6172   ((__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C)))
6173 
6174 #define _mm512_mask_permute_ps(W, U, X, C) \
6175   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6176                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6177                                        (__v16sf)(__m512)(W)))
6178 
6179 #define _mm512_maskz_permute_ps(U, X, C) \
6180   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6181                                        (__v16sf)_mm512_permute_ps((X), (C)), \
6182                                        (__v16sf)_mm512_setzero_ps()))
6183 
6184 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6185 _mm512_permutevar_pd(__m512d __A, __m512i __C)
6186 {
6187   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6188 }
6189 
6190 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6191 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6192 {
6193   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6194                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6195                                          (__v8df)__W);
6196 }
6197 
6198 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6199 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6200 {
6201   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6202                                          (__v8df)_mm512_permutevar_pd(__A, __C),
6203                                          (__v8df)_mm512_setzero_pd());
6204 }
6205 
6206 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6207 _mm512_permutevar_ps(__m512 __A, __m512i __C)
6208 {
6209   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6210 }
6211 
6212 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6213 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6214 {
6215   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6216                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6217                                         (__v16sf)__W);
6218 }
6219 
6220 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6221 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
6222 {
6223   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6224                                         (__v16sf)_mm512_permutevar_ps(__A, __C),
6225                                         (__v16sf)_mm512_setzero_ps());
6226 }
6227 
6228 static __inline __m512d __DEFAULT_FN_ATTRS512
6229 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6230 {
6231   return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6232                                                  (__v8df)__B);
6233 }
6234 
6235 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6236 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6237 {
6238   return (__m512d)__builtin_ia32_selectpd_512(__U,
6239                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6240                                   (__v8df)__A);
6241 }
6242 
6243 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6244 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6245                              __m512d __B)
6246 {
6247   return (__m512d)__builtin_ia32_selectpd_512(__U,
6248                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6249                                   (__v8df)(__m512d)__I);
6250 }
6251 
6252 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6253 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6254                              __m512d __B)
6255 {
6256   return (__m512d)__builtin_ia32_selectpd_512(__U,
6257                                   (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6258                                   (__v8df)_mm512_setzero_pd());
6259 }
6260 
6261 static __inline __m512 __DEFAULT_FN_ATTRS512
6262 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6263 {
6264   return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6265                                                 (__v16sf) __B);
6266 }
6267 
6268 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6269 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6270 {
6271   return (__m512)__builtin_ia32_selectps_512(__U,
6272                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6273                                  (__v16sf)__A);
6274 }
6275 
6276 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6277 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6278 {
6279   return (__m512)__builtin_ia32_selectps_512(__U,
6280                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6281                                  (__v16sf)(__m512)__I);
6282 }
6283 
6284 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6285 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6286 {
6287   return (__m512)__builtin_ia32_selectps_512(__U,
6288                                  (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6289                                  (__v16sf)_mm512_setzero_ps());
6290 }
6291 
6292 
6293 #define _mm512_cvtt_roundpd_epu32(A, R) \
6294   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6295                                               (__v8si)_mm256_undefined_si256(), \
6296                                               (__mmask8)-1, (int)(R)))
6297 
6298 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6299   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6300                                               (__v8si)(__m256i)(W), \
6301                                               (__mmask8)(U), (int)(R)))
6302 
6303 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6304   ((__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6305                                               (__v8si)_mm256_setzero_si256(), \
6306                                               (__mmask8)(U), (int)(R)))
6307 
6308 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6309 _mm512_cvttpd_epu32 (__m512d __A)
6310 {
6311   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6312                   (__v8si)
6313                   _mm256_undefined_si256 (),
6314                   (__mmask8) -1,
6315                   _MM_FROUND_CUR_DIRECTION);
6316 }
6317 
6318 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6319 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6320 {
6321   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6322                   (__v8si) __W,
6323                   (__mmask8) __U,
6324                   _MM_FROUND_CUR_DIRECTION);
6325 }
6326 
6327 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6328 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
6329 {
6330   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6331                   (__v8si)
6332                   _mm256_setzero_si256 (),
6333                   (__mmask8) __U,
6334                   _MM_FROUND_CUR_DIRECTION);
6335 }
6336 
6337 #define _mm_roundscale_round_sd(A, B, imm, R) \
6338   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6339                                                  (__v2df)(__m128d)(B), \
6340                                                  (__v2df)_mm_setzero_pd(), \
6341                                                  (__mmask8)-1, (int)(imm), \
6342                                                  (int)(R)))
6343 
6344 #define _mm_roundscale_sd(A, B, imm) \
6345   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6346                                                  (__v2df)(__m128d)(B), \
6347                                                  (__v2df)_mm_setzero_pd(), \
6348                                                  (__mmask8)-1, (int)(imm), \
6349                                                  _MM_FROUND_CUR_DIRECTION))
6350 
6351 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6352   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6353                                                  (__v2df)(__m128d)(B), \
6354                                                  (__v2df)(__m128d)(W), \
6355                                                  (__mmask8)(U), (int)(imm), \
6356                                                  _MM_FROUND_CUR_DIRECTION))
6357 
6358 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6359   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6360                                                  (__v2df)(__m128d)(B), \
6361                                                  (__v2df)(__m128d)(W), \
6362                                                  (__mmask8)(U), (int)(I), \
6363                                                  (int)(R)))
6364 
6365 #define _mm_maskz_roundscale_sd(U, A, B, I) \
6366   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6367                                                  (__v2df)(__m128d)(B), \
6368                                                  (__v2df)_mm_setzero_pd(), \
6369                                                  (__mmask8)(U), (int)(I), \
6370                                                  _MM_FROUND_CUR_DIRECTION))
6371 
6372 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6373   ((__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6374                                                  (__v2df)(__m128d)(B), \
6375                                                  (__v2df)_mm_setzero_pd(), \
6376                                                  (__mmask8)(U), (int)(I), \
6377                                                  (int)(R)))
6378 
6379 #define _mm_roundscale_round_ss(A, B, imm, R) \
6380   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6381                                                 (__v4sf)(__m128)(B), \
6382                                                 (__v4sf)_mm_setzero_ps(), \
6383                                                 (__mmask8)-1, (int)(imm), \
6384                                                 (int)(R)))
6385 
6386 #define _mm_roundscale_ss(A, B, imm) \
6387   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6388                                                 (__v4sf)(__m128)(B), \
6389                                                 (__v4sf)_mm_setzero_ps(), \
6390                                                 (__mmask8)-1, (int)(imm), \
6391                                                 _MM_FROUND_CUR_DIRECTION))
6392 
6393 #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6394   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6395                                                 (__v4sf)(__m128)(B), \
6396                                                 (__v4sf)(__m128)(W), \
6397                                                 (__mmask8)(U), (int)(I), \
6398                                                 _MM_FROUND_CUR_DIRECTION))
6399 
6400 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6401   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6402                                                 (__v4sf)(__m128)(B), \
6403                                                 (__v4sf)(__m128)(W), \
6404                                                 (__mmask8)(U), (int)(I), \
6405                                                 (int)(R)))
6406 
6407 #define _mm_maskz_roundscale_ss(U, A, B, I) \
6408   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6409                                                 (__v4sf)(__m128)(B), \
6410                                                 (__v4sf)_mm_setzero_ps(), \
6411                                                 (__mmask8)(U), (int)(I), \
6412                                                 _MM_FROUND_CUR_DIRECTION))
6413 
6414 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6415   ((__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6416                                                 (__v4sf)(__m128)(B), \
6417                                                 (__v4sf)_mm_setzero_ps(), \
6418                                                 (__mmask8)(U), (int)(I), \
6419                                                 (int)(R)))
6420 
6421 #define _mm512_scalef_round_pd(A, B, R) \
6422   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6423                                             (__v8df)(__m512d)(B), \
6424                                             (__v8df)_mm512_undefined_pd(), \
6425                                             (__mmask8)-1, (int)(R)))
6426 
6427 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6428   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6429                                             (__v8df)(__m512d)(B), \
6430                                             (__v8df)(__m512d)(W), \
6431                                             (__mmask8)(U), (int)(R)))
6432 
6433 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6434   ((__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6435                                             (__v8df)(__m512d)(B), \
6436                                             (__v8df)_mm512_setzero_pd(), \
6437                                             (__mmask8)(U), (int)(R)))
6438 
6439 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6440 _mm512_scalef_pd (__m512d __A, __m512d __B)
6441 {
6442   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443                 (__v8df) __B,
6444                 (__v8df)
6445                 _mm512_undefined_pd (),
6446                 (__mmask8) -1,
6447                 _MM_FROUND_CUR_DIRECTION);
6448 }
6449 
6450 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6451 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6452 {
6453   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6454                 (__v8df) __B,
6455                 (__v8df) __W,
6456                 (__mmask8) __U,
6457                 _MM_FROUND_CUR_DIRECTION);
6458 }
6459 
6460 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6461 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6462 {
6463   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6464                 (__v8df) __B,
6465                 (__v8df)
6466                 _mm512_setzero_pd (),
6467                 (__mmask8) __U,
6468                 _MM_FROUND_CUR_DIRECTION);
6469 }
6470 
6471 #define _mm512_scalef_round_ps(A, B, R) \
6472   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6473                                            (__v16sf)(__m512)(B), \
6474                                            (__v16sf)_mm512_undefined_ps(), \
6475                                            (__mmask16)-1, (int)(R)))
6476 
6477 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6478   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6479                                            (__v16sf)(__m512)(B), \
6480                                            (__v16sf)(__m512)(W), \
6481                                            (__mmask16)(U), (int)(R)))
6482 
6483 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6484   ((__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6485                                            (__v16sf)(__m512)(B), \
6486                                            (__v16sf)_mm512_setzero_ps(), \
6487                                            (__mmask16)(U), (int)(R)))
6488 
6489 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6490 _mm512_scalef_ps (__m512 __A, __m512 __B)
6491 {
6492   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6493                (__v16sf) __B,
6494                (__v16sf)
6495                _mm512_undefined_ps (),
6496                (__mmask16) -1,
6497                _MM_FROUND_CUR_DIRECTION);
6498 }
6499 
6500 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6501 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6502 {
6503   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6504                (__v16sf) __B,
6505                (__v16sf) __W,
6506                (__mmask16) __U,
6507                _MM_FROUND_CUR_DIRECTION);
6508 }
6509 
6510 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6511 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6512 {
6513   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6514                (__v16sf) __B,
6515                (__v16sf)
6516                _mm512_setzero_ps (),
6517                (__mmask16) __U,
6518                _MM_FROUND_CUR_DIRECTION);
6519 }
6520 
6521 #define _mm_scalef_round_sd(A, B, R) \
6522   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6523                                                (__v2df)(__m128d)(B), \
6524                                                (__v2df)_mm_setzero_pd(), \
6525                                                (__mmask8)-1, (int)(R)))
6526 
6527 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6528 _mm_scalef_sd (__m128d __A, __m128d __B)
6529 {
6530   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6531               (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6532               (__mmask8) -1,
6533               _MM_FROUND_CUR_DIRECTION);
6534 }
6535 
6536 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6537 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6538 {
6539  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6540                  (__v2df) __B,
6541                 (__v2df) __W,
6542                 (__mmask8) __U,
6543                 _MM_FROUND_CUR_DIRECTION);
6544 }
6545 
6546 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6547   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6548                                                (__v2df)(__m128d)(B), \
6549                                                (__v2df)(__m128d)(W), \
6550                                                (__mmask8)(U), (int)(R)))
6551 
6552 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6553 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6554 {
6555  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6556                  (__v2df) __B,
6557                 (__v2df) _mm_setzero_pd (),
6558                 (__mmask8) __U,
6559                 _MM_FROUND_CUR_DIRECTION);
6560 }
6561 
6562 #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6563   ((__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6564                                                (__v2df)(__m128d)(B), \
6565                                                (__v2df)_mm_setzero_pd(), \
6566                                                (__mmask8)(U), (int)(R)))
6567 
6568 #define _mm_scalef_round_ss(A, B, R) \
6569   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6570                                               (__v4sf)(__m128)(B), \
6571                                               (__v4sf)_mm_setzero_ps(), \
6572                                               (__mmask8)-1, (int)(R)))
6573 
6574 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6575 _mm_scalef_ss (__m128 __A, __m128 __B)
6576 {
6577   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6578              (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6579              (__mmask8) -1,
6580              _MM_FROUND_CUR_DIRECTION);
6581 }
6582 
6583 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6584 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6585 {
6586  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6587                 (__v4sf) __B,
6588                 (__v4sf) __W,
6589                 (__mmask8) __U,
6590                 _MM_FROUND_CUR_DIRECTION);
6591 }
6592 
6593 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6594   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6595                                               (__v4sf)(__m128)(B), \
6596                                               (__v4sf)(__m128)(W), \
6597                                               (__mmask8)(U), (int)(R)))
6598 
6599 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6600 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6601 {
6602  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6603                  (__v4sf) __B,
6604                 (__v4sf) _mm_setzero_ps (),
6605                 (__mmask8) __U,
6606                 _MM_FROUND_CUR_DIRECTION);
6607 }
6608 
6609 #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6610   ((__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6611                                               (__v4sf)(__m128)(B), \
6612                                               (__v4sf)_mm_setzero_ps(), \
6613                                               (__mmask8)(U), \
6614                                               (int)(R)))
6615 
6616 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6617 _mm512_srai_epi32(__m512i __A, unsigned int __B)
6618 {
6619   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, (int)__B);
6620 }
6621 
6622 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6623 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6624                        unsigned int __B)
6625 {
6626   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6627                                          (__v16si)_mm512_srai_epi32(__A, __B),
6628                                          (__v16si)__W);
6629 }
6630 
6631 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6632 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A,
6633                         unsigned int __B) {
6634   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6635                                          (__v16si)_mm512_srai_epi32(__A, __B),
6636                                          (__v16si)_mm512_setzero_si512());
6637 }
6638 
6639 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6640 _mm512_srai_epi64(__m512i __A, unsigned int __B)
6641 {
6642   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, (int)__B);
6643 }
6644 
6645 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6646 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
6647 {
6648   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6649                                           (__v8di)_mm512_srai_epi64(__A, __B),
6650                                           (__v8di)__W);
6651 }
6652 
6653 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6654 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
6655 {
6656   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6657                                           (__v8di)_mm512_srai_epi64(__A, __B),
6658                                           (__v8di)_mm512_setzero_si512());
6659 }
6660 
6661 #define _mm512_shuffle_f32x4(A, B, imm) \
6662   ((__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6663                                      (__v16sf)(__m512)(B), (int)(imm)))
6664 
6665 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6666   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6667                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6668                                        (__v16sf)(__m512)(W)))
6669 
6670 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6671   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6672                                        (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6673                                        (__v16sf)_mm512_setzero_ps()))
6674 
6675 #define _mm512_shuffle_f64x2(A, B, imm) \
6676   ((__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6677                                       (__v8df)(__m512d)(B), (int)(imm)))
6678 
6679 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6680   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6681                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6682                                         (__v8df)(__m512d)(W)))
6683 
6684 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6685   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6686                                         (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6687                                         (__v8df)_mm512_setzero_pd()))
6688 
6689 #define _mm512_shuffle_i32x4(A, B, imm) \
6690   ((__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6691                                       (__v16si)(__m512i)(B), (int)(imm)))
6692 
6693 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6694   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6695                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6696                                        (__v16si)(__m512i)(W)))
6697 
6698 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6699   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6700                                        (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6701                                        (__v16si)_mm512_setzero_si512()))
6702 
6703 #define _mm512_shuffle_i64x2(A, B, imm) \
6704   ((__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6705                                       (__v8di)(__m512i)(B), (int)(imm)))
6706 
6707 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6708   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6709                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6710                                        (__v8di)(__m512i)(W)))
6711 
6712 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6713   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6714                                        (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6715                                        (__v8di)_mm512_setzero_si512()))
6716 
6717 #define _mm512_shuffle_pd(A, B, M) \
6718   ((__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6719                                      (__v8df)(__m512d)(B), (int)(M)))
6720 
6721 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6722   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6723                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6724                                         (__v8df)(__m512d)(W)))
6725 
6726 #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6727   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6728                                         (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6729                                         (__v8df)_mm512_setzero_pd()))
6730 
6731 #define _mm512_shuffle_ps(A, B, M) \
6732   ((__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6733                                     (__v16sf)(__m512)(B), (int)(M)))
6734 
6735 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6736   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6737                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6738                                        (__v16sf)(__m512)(W)))
6739 
6740 #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6741   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6742                                        (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6743                                        (__v16sf)_mm512_setzero_ps()))
6744 
6745 #define _mm_sqrt_round_sd(A, B, R) \
6746   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6747                                              (__v2df)(__m128d)(B), \
6748                                              (__v2df)_mm_setzero_pd(), \
6749                                              (__mmask8)-1, (int)(R)))
6750 
6751 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6752 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6753 {
6754  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6755                  (__v2df) __B,
6756                 (__v2df) __W,
6757                 (__mmask8) __U,
6758                 _MM_FROUND_CUR_DIRECTION);
6759 }
6760 
6761 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6762   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6763                                              (__v2df)(__m128d)(B), \
6764                                              (__v2df)(__m128d)(W), \
6765                                              (__mmask8)(U), (int)(R)))
6766 
6767 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6768 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6769 {
6770  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6771                  (__v2df) __B,
6772                 (__v2df) _mm_setzero_pd (),
6773                 (__mmask8) __U,
6774                 _MM_FROUND_CUR_DIRECTION);
6775 }
6776 
6777 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6778   ((__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6779                                              (__v2df)(__m128d)(B), \
6780                                              (__v2df)_mm_setzero_pd(), \
6781                                              (__mmask8)(U), (int)(R)))
6782 
6783 #define _mm_sqrt_round_ss(A, B, R) \
6784   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6785                                             (__v4sf)(__m128)(B), \
6786                                             (__v4sf)_mm_setzero_ps(), \
6787                                             (__mmask8)-1, (int)(R)))
6788 
6789 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6790 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6791 {
6792  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6793                  (__v4sf) __B,
6794                 (__v4sf) __W,
6795                 (__mmask8) __U,
6796                 _MM_FROUND_CUR_DIRECTION);
6797 }
6798 
6799 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6800   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6801                                             (__v4sf)(__m128)(B), \
6802                                             (__v4sf)(__m128)(W), (__mmask8)(U), \
6803                                             (int)(R)))
6804 
6805 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6806 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6807 {
6808  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6809                  (__v4sf) __B,
6810                 (__v4sf) _mm_setzero_ps (),
6811                 (__mmask8) __U,
6812                 _MM_FROUND_CUR_DIRECTION);
6813 }
6814 
6815 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6816   ((__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6817                                             (__v4sf)(__m128)(B), \
6818                                             (__v4sf)_mm_setzero_ps(), \
6819                                             (__mmask8)(U), (int)(R)))
6820 
6821 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6822 _mm512_broadcast_f32x4(__m128 __A)
6823 {
6824   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6825                                          0, 1, 2, 3, 0, 1, 2, 3,
6826                                          0, 1, 2, 3, 0, 1, 2, 3);
6827 }
6828 
6829 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6830 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6831 {
6832   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6833                                            (__v16sf)_mm512_broadcast_f32x4(__A),
6834                                            (__v16sf)__O);
6835 }
6836 
6837 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6838 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
6839 {
6840   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6841                                            (__v16sf)_mm512_broadcast_f32x4(__A),
6842                                            (__v16sf)_mm512_setzero_ps());
6843 }
6844 
6845 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6846 _mm512_broadcast_f64x4(__m256d __A)
6847 {
6848   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6849                                           0, 1, 2, 3, 0, 1, 2, 3);
6850 }
6851 
6852 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6853 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6854 {
6855   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6856                                             (__v8df)_mm512_broadcast_f64x4(__A),
6857                                             (__v8df)__O);
6858 }
6859 
6860 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6861 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
6862 {
6863   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6864                                             (__v8df)_mm512_broadcast_f64x4(__A),
6865                                             (__v8df)_mm512_setzero_pd());
6866 }
6867 
6868 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6869 _mm512_broadcast_i32x4(__m128i __A)
6870 {
6871   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6872                                           0, 1, 2, 3, 0, 1, 2, 3,
6873                                           0, 1, 2, 3, 0, 1, 2, 3);
6874 }
6875 
6876 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6877 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6878 {
6879   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6880                                            (__v16si)_mm512_broadcast_i32x4(__A),
6881                                            (__v16si)__O);
6882 }
6883 
6884 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6885 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
6886 {
6887   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6888                                            (__v16si)_mm512_broadcast_i32x4(__A),
6889                                            (__v16si)_mm512_setzero_si512());
6890 }
6891 
6892 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6893 _mm512_broadcast_i64x4(__m256i __A)
6894 {
6895   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6896                                           0, 1, 2, 3, 0, 1, 2, 3);
6897 }
6898 
6899 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6900 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6901 {
6902   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6903                                             (__v8di)_mm512_broadcast_i64x4(__A),
6904                                             (__v8di)__O);
6905 }
6906 
6907 static __inline__ __m512i __DEFAULT_FN_ATTRS512
6908 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
6909 {
6910   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6911                                             (__v8di)_mm512_broadcast_i64x4(__A),
6912                                             (__v8di)_mm512_setzero_si512());
6913 }
6914 
6915 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6916 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6917 {
6918   return (__m512d)__builtin_ia32_selectpd_512(__M,
6919                                               (__v8df) _mm512_broadcastsd_pd(__A),
6920                                               (__v8df) __O);
6921 }
6922 
6923 static __inline__ __m512d __DEFAULT_FN_ATTRS512
6924 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
6925 {
6926   return (__m512d)__builtin_ia32_selectpd_512(__M,
6927                                               (__v8df) _mm512_broadcastsd_pd(__A),
6928                                               (__v8df) _mm512_setzero_pd());
6929 }
6930 
6931 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6932 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6933 {
6934   return (__m512)__builtin_ia32_selectps_512(__M,
6935                                              (__v16sf) _mm512_broadcastss_ps(__A),
6936                                              (__v16sf) __O);
6937 }
6938 
6939 static __inline__ __m512 __DEFAULT_FN_ATTRS512
6940 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
6941 {
6942   return (__m512)__builtin_ia32_selectps_512(__M,
6943                                              (__v16sf) _mm512_broadcastss_ps(__A),
6944                                              (__v16sf) _mm512_setzero_ps());
6945 }
6946 
6947 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6948 _mm512_cvtsepi32_epi8 (__m512i __A)
6949 {
6950   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6951                (__v16qi) _mm_undefined_si128 (),
6952                (__mmask16) -1);
6953 }
6954 
6955 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6956 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6957 {
6958   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6959                (__v16qi) __O, __M);
6960 }
6961 
6962 static __inline__ __m128i __DEFAULT_FN_ATTRS512
6963 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
6964 {
6965   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6966                (__v16qi) _mm_setzero_si128 (),
6967                __M);
6968 }
6969 
6970 static __inline__ void __DEFAULT_FN_ATTRS512
6971 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6972 {
6973   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6974 }
6975 
6976 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6977 _mm512_cvtsepi32_epi16 (__m512i __A)
6978 {
6979   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6980                (__v16hi) _mm256_undefined_si256 (),
6981                (__mmask16) -1);
6982 }
6983 
6984 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6985 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6986 {
6987   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6988                (__v16hi) __O, __M);
6989 }
6990 
6991 static __inline__ __m256i __DEFAULT_FN_ATTRS512
6992 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
6993 {
6994   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6995                (__v16hi) _mm256_setzero_si256 (),
6996                __M);
6997 }
6998 
6999 static __inline__ void __DEFAULT_FN_ATTRS512
7000 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7001 {
7002   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7003 }
7004 
7005 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7006 _mm512_cvtsepi64_epi8 (__m512i __A)
7007 {
7008   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7009                (__v16qi) _mm_undefined_si128 (),
7010                (__mmask8) -1);
7011 }
7012 
7013 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7014 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7015 {
7016   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7017                (__v16qi) __O, __M);
7018 }
7019 
7020 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7021 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
7022 {
7023   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7024                (__v16qi) _mm_setzero_si128 (),
7025                __M);
7026 }
7027 
7028 static __inline__ void __DEFAULT_FN_ATTRS512
7029 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7030 {
7031   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7032 }
7033 
7034 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7035 _mm512_cvtsepi64_epi32 (__m512i __A)
7036 {
7037   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7038                (__v8si) _mm256_undefined_si256 (),
7039                (__mmask8) -1);
7040 }
7041 
7042 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7043 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7044 {
7045   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7046                (__v8si) __O, __M);
7047 }
7048 
7049 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7050 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
7051 {
7052   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7053                (__v8si) _mm256_setzero_si256 (),
7054                __M);
7055 }
7056 
7057 static __inline__ void __DEFAULT_FN_ATTRS512
7058 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
7059 {
7060   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7061 }
7062 
7063 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7064 _mm512_cvtsepi64_epi16 (__m512i __A)
7065 {
7066   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7067                (__v8hi) _mm_undefined_si128 (),
7068                (__mmask8) -1);
7069 }
7070 
7071 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7072 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7073 {
7074   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7075                (__v8hi) __O, __M);
7076 }
7077 
7078 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7079 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
7080 {
7081   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7082                (__v8hi) _mm_setzero_si128 (),
7083                __M);
7084 }
7085 
7086 static __inline__ void __DEFAULT_FN_ATTRS512
7087 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7088 {
7089   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7090 }
7091 
7092 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7093 _mm512_cvtusepi32_epi8 (__m512i __A)
7094 {
7095   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7096                 (__v16qi) _mm_undefined_si128 (),
7097                 (__mmask16) -1);
7098 }
7099 
7100 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7101 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7102 {
7103   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7104                 (__v16qi) __O,
7105                 __M);
7106 }
7107 
7108 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7109 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
7110 {
7111   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7112                 (__v16qi) _mm_setzero_si128 (),
7113                 __M);
7114 }
7115 
7116 static __inline__ void __DEFAULT_FN_ATTRS512
7117 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7118 {
7119   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7120 }
7121 
7122 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7123 _mm512_cvtusepi32_epi16 (__m512i __A)
7124 {
7125   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7126                 (__v16hi) _mm256_undefined_si256 (),
7127                 (__mmask16) -1);
7128 }
7129 
7130 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7131 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7132 {
7133   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7134                 (__v16hi) __O,
7135                 __M);
7136 }
7137 
7138 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7139 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
7140 {
7141   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7142                 (__v16hi) _mm256_setzero_si256 (),
7143                 __M);
7144 }
7145 
7146 static __inline__ void __DEFAULT_FN_ATTRS512
7147 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
7148 {
7149   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7150 }
7151 
7152 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7153 _mm512_cvtusepi64_epi8 (__m512i __A)
7154 {
7155   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7156                 (__v16qi) _mm_undefined_si128 (),
7157                 (__mmask8) -1);
7158 }
7159 
7160 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7161 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7162 {
7163   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7164                 (__v16qi) __O,
7165                 __M);
7166 }
7167 
7168 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7169 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
7170 {
7171   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7172                 (__v16qi) _mm_setzero_si128 (),
7173                 __M);
7174 }
7175 
7176 static __inline__ void __DEFAULT_FN_ATTRS512
7177 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7178 {
7179   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7180 }
7181 
7182 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7183 _mm512_cvtusepi64_epi32 (__m512i __A)
7184 {
7185   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7186                 (__v8si) _mm256_undefined_si256 (),
7187                 (__mmask8) -1);
7188 }
7189 
7190 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7191 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7192 {
7193   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7194                 (__v8si) __O, __M);
7195 }
7196 
7197 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7198 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
7199 {
7200   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7201                 (__v8si) _mm256_setzero_si256 (),
7202                 __M);
7203 }
7204 
7205 static __inline__ void __DEFAULT_FN_ATTRS512
7206 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7207 {
7208   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7209 }
7210 
7211 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7212 _mm512_cvtusepi64_epi16 (__m512i __A)
7213 {
7214   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7215                 (__v8hi) _mm_undefined_si128 (),
7216                 (__mmask8) -1);
7217 }
7218 
7219 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7220 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7221 {
7222   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7223                 (__v8hi) __O, __M);
7224 }
7225 
7226 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7227 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
7228 {
7229   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7230                 (__v8hi) _mm_setzero_si128 (),
7231                 __M);
7232 }
7233 
7234 static __inline__ void __DEFAULT_FN_ATTRS512
7235 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7236 {
7237   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7238 }
7239 
7240 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7241 _mm512_cvtepi32_epi8 (__m512i __A)
7242 {
7243   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7244               (__v16qi) _mm_undefined_si128 (),
7245               (__mmask16) -1);
7246 }
7247 
7248 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7249 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7250 {
7251   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7252               (__v16qi) __O, __M);
7253 }
7254 
7255 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7256 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
7257 {
7258   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7259               (__v16qi) _mm_setzero_si128 (),
7260               __M);
7261 }
7262 
7263 static __inline__ void __DEFAULT_FN_ATTRS512
7264 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7265 {
7266   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7267 }
7268 
7269 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7270 _mm512_cvtepi32_epi16 (__m512i __A)
7271 {
7272   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7273               (__v16hi) _mm256_undefined_si256 (),
7274               (__mmask16) -1);
7275 }
7276 
7277 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7278 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7279 {
7280   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7281               (__v16hi) __O, __M);
7282 }
7283 
7284 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7285 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
7286 {
7287   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7288               (__v16hi) _mm256_setzero_si256 (),
7289               __M);
7290 }
7291 
7292 static __inline__ void __DEFAULT_FN_ATTRS512
7293 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7294 {
7295   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7296 }
7297 
7298 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7299 _mm512_cvtepi64_epi8 (__m512i __A)
7300 {
7301   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7302               (__v16qi) _mm_undefined_si128 (),
7303               (__mmask8) -1);
7304 }
7305 
7306 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7307 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7308 {
7309   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7310               (__v16qi) __O, __M);
7311 }
7312 
7313 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7314 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
7315 {
7316   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7317               (__v16qi) _mm_setzero_si128 (),
7318               __M);
7319 }
7320 
7321 static __inline__ void __DEFAULT_FN_ATTRS512
7322 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7323 {
7324   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7325 }
7326 
7327 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7328 _mm512_cvtepi64_epi32 (__m512i __A)
7329 {
7330   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7331               (__v8si) _mm256_undefined_si256 (),
7332               (__mmask8) -1);
7333 }
7334 
7335 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7336 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7337 {
7338   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7339               (__v8si) __O, __M);
7340 }
7341 
7342 static __inline__ __m256i __DEFAULT_FN_ATTRS512
7343 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
7344 {
7345   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7346               (__v8si) _mm256_setzero_si256 (),
7347               __M);
7348 }
7349 
7350 static __inline__ void __DEFAULT_FN_ATTRS512
7351 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7352 {
7353   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7354 }
7355 
7356 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7357 _mm512_cvtepi64_epi16 (__m512i __A)
7358 {
7359   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7360               (__v8hi) _mm_undefined_si128 (),
7361               (__mmask8) -1);
7362 }
7363 
7364 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7365 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7366 {
7367   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7368               (__v8hi) __O, __M);
7369 }
7370 
7371 static __inline__ __m128i __DEFAULT_FN_ATTRS512
7372 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
7373 {
7374   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7375               (__v8hi) _mm_setzero_si128 (),
7376               __M);
7377 }
7378 
7379 static __inline__ void __DEFAULT_FN_ATTRS512
7380 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7381 {
7382   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7383 }
7384 
7385 #define _mm512_extracti32x4_epi32(A, imm) \
7386   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7387                                              (__v4si)_mm_undefined_si128(), \
7388                                              (__mmask8)-1))
7389 
7390 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7391   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7392                                              (__v4si)(__m128i)(W), \
7393                                              (__mmask8)(U)))
7394 
7395 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7396   ((__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7397                                              (__v4si)_mm_setzero_si128(), \
7398                                              (__mmask8)(U)))
7399 
7400 #define _mm512_extracti64x4_epi64(A, imm) \
7401   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7402                                              (__v4di)_mm256_undefined_si256(), \
7403                                              (__mmask8)-1))
7404 
7405 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7406   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7407                                              (__v4di)(__m256i)(W), \
7408                                              (__mmask8)(U)))
7409 
7410 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7411   ((__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7412                                              (__v4di)_mm256_setzero_si256(), \
7413                                              (__mmask8)(U)))
7414 
7415 #define _mm512_insertf64x4(A, B, imm) \
7416   ((__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7417                                        (__v4df)(__m256d)(B), (int)(imm)))
7418 
7419 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7420   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7421                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7422                                    (__v8df)(__m512d)(W)))
7423 
7424 #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7425   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7426                                    (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7427                                    (__v8df)_mm512_setzero_pd()))
7428 
7429 #define _mm512_inserti64x4(A, B, imm) \
7430   ((__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7431                                        (__v4di)(__m256i)(B), (int)(imm)))
7432 
7433 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7434   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7435                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7436                                    (__v8di)(__m512i)(W)))
7437 
7438 #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7439   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7440                                    (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7441                                    (__v8di)_mm512_setzero_si512()))
7442 
7443 #define _mm512_insertf32x4(A, B, imm) \
7444   ((__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7445                                       (__v4sf)(__m128)(B), (int)(imm)))
7446 
7447 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7448   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7449                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7450                                   (__v16sf)(__m512)(W)))
7451 
7452 #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7453   ((__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7454                                   (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7455                                   (__v16sf)_mm512_setzero_ps()))
7456 
7457 #define _mm512_inserti32x4(A, B, imm) \
7458   ((__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7459                                        (__v4si)(__m128i)(B), (int)(imm)))
7460 
7461 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7462   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7463                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7464                                   (__v16si)(__m512i)(W)))
7465 
7466 #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7467   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7468                                   (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7469                                   (__v16si)_mm512_setzero_si512()))
7470 
7471 #define _mm512_getmant_round_pd(A, B, C, R) \
7472   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7473                                              (int)(((C)<<2) | (B)), \
7474                                              (__v8df)_mm512_undefined_pd(), \
7475                                              (__mmask8)-1, (int)(R)))
7476 
7477 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7478   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7479                                              (int)(((C)<<2) | (B)), \
7480                                              (__v8df)(__m512d)(W), \
7481                                              (__mmask8)(U), (int)(R)))
7482 
7483 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7484   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7485                                              (int)(((C)<<2) | (B)), \
7486                                              (__v8df)_mm512_setzero_pd(), \
7487                                              (__mmask8)(U), (int)(R)))
7488 
7489 #define _mm512_getmant_pd(A, B, C) \
7490   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7491                                              (int)(((C)<<2) | (B)), \
7492                                              (__v8df)_mm512_setzero_pd(), \
7493                                              (__mmask8)-1, \
7494                                              _MM_FROUND_CUR_DIRECTION))
7495 
7496 #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7497   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7498                                              (int)(((C)<<2) | (B)), \
7499                                              (__v8df)(__m512d)(W), \
7500                                              (__mmask8)(U), \
7501                                              _MM_FROUND_CUR_DIRECTION))
7502 
7503 #define _mm512_maskz_getmant_pd(U, A, B, C) \
7504   ((__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7505                                              (int)(((C)<<2) | (B)), \
7506                                              (__v8df)_mm512_setzero_pd(), \
7507                                              (__mmask8)(U), \
7508                                              _MM_FROUND_CUR_DIRECTION))
7509 
7510 #define _mm512_getmant_round_ps(A, B, C, R) \
7511   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7512                                             (int)(((C)<<2) | (B)), \
7513                                             (__v16sf)_mm512_undefined_ps(), \
7514                                             (__mmask16)-1, (int)(R)))
7515 
7516 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7517   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7518                                             (int)(((C)<<2) | (B)), \
7519                                             (__v16sf)(__m512)(W), \
7520                                             (__mmask16)(U), (int)(R)))
7521 
7522 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7523   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7524                                             (int)(((C)<<2) | (B)), \
7525                                             (__v16sf)_mm512_setzero_ps(), \
7526                                             (__mmask16)(U), (int)(R)))
7527 
7528 #define _mm512_getmant_ps(A, B, C) \
7529   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7530                                             (int)(((C)<<2)|(B)), \
7531                                             (__v16sf)_mm512_undefined_ps(), \
7532                                             (__mmask16)-1, \
7533                                             _MM_FROUND_CUR_DIRECTION))
7534 
7535 #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7536   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7537                                             (int)(((C)<<2)|(B)), \
7538                                             (__v16sf)(__m512)(W), \
7539                                             (__mmask16)(U), \
7540                                             _MM_FROUND_CUR_DIRECTION))
7541 
7542 #define _mm512_maskz_getmant_ps(U, A, B, C) \
7543   ((__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7544                                             (int)(((C)<<2)|(B)), \
7545                                             (__v16sf)_mm512_setzero_ps(), \
7546                                             (__mmask16)(U), \
7547                                             _MM_FROUND_CUR_DIRECTION))
7548 
7549 #define _mm512_getexp_round_pd(A, R) \
7550   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7551                                             (__v8df)_mm512_undefined_pd(), \
7552                                             (__mmask8)-1, (int)(R)))
7553 
7554 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7555   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7556                                             (__v8df)(__m512d)(W), \
7557                                             (__mmask8)(U), (int)(R)))
7558 
7559 #define _mm512_maskz_getexp_round_pd(U, A, R) \
7560   ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7561                                             (__v8df)_mm512_setzero_pd(), \
7562                                             (__mmask8)(U), (int)(R)))
7563 
7564 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7565 _mm512_getexp_pd (__m512d __A)
7566 {
7567   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7568                 (__v8df) _mm512_undefined_pd (),
7569                 (__mmask8) -1,
7570                 _MM_FROUND_CUR_DIRECTION);
7571 }
7572 
7573 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7574 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7575 {
7576   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7577                 (__v8df) __W,
7578                 (__mmask8) __U,
7579                 _MM_FROUND_CUR_DIRECTION);
7580 }
7581 
7582 static __inline__ __m512d __DEFAULT_FN_ATTRS512
7583 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
7584 {
7585   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7586                 (__v8df) _mm512_setzero_pd (),
7587                 (__mmask8) __U,
7588                 _MM_FROUND_CUR_DIRECTION);
7589 }
7590 
7591 #define _mm512_getexp_round_ps(A, R) \
7592   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7593                                            (__v16sf)_mm512_undefined_ps(), \
7594                                            (__mmask16)-1, (int)(R)))
7595 
7596 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7597   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7598                                            (__v16sf)(__m512)(W), \
7599                                            (__mmask16)(U), (int)(R)))
7600 
7601 #define _mm512_maskz_getexp_round_ps(U, A, R) \
7602   ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7603                                            (__v16sf)_mm512_setzero_ps(), \
7604                                            (__mmask16)(U), (int)(R)))
7605 
7606 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7607 _mm512_getexp_ps (__m512 __A)
7608 {
7609   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7610                (__v16sf) _mm512_undefined_ps (),
7611                (__mmask16) -1,
7612                _MM_FROUND_CUR_DIRECTION);
7613 }
7614 
7615 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7616 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7617 {
7618   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7619                (__v16sf) __W,
7620                (__mmask16) __U,
7621                _MM_FROUND_CUR_DIRECTION);
7622 }
7623 
7624 static __inline__ __m512 __DEFAULT_FN_ATTRS512
7625 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
7626 {
7627   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7628                (__v16sf) _mm512_setzero_ps (),
7629                (__mmask16) __U,
7630                _MM_FROUND_CUR_DIRECTION);
7631 }
7632 
7633 #define _mm512_i64gather_ps(index, addr, scale) \
7634   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7635                                         (void const *)(addr), \
7636                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7637                                         (int)(scale)))
7638 
7639 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7640   ((__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7641                                         (void const *)(addr), \
7642                                         (__v8di)(__m512i)(index), \
7643                                         (__mmask8)(mask), (int)(scale)))
7644 
7645 #define _mm512_i64gather_epi32(index, addr, scale) \
7646   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7647                                          (void const *)(addr), \
7648                                          (__v8di)(__m512i)(index), \
7649                                          (__mmask8)-1, (int)(scale)))
7650 
7651 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7652   ((__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7653                                          (void const *)(addr), \
7654                                          (__v8di)(__m512i)(index), \
7655                                          (__mmask8)(mask), (int)(scale)))
7656 
7657 #define _mm512_i64gather_pd(index, addr, scale) \
7658   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7659                                         (void const *)(addr), \
7660                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7661                                         (int)(scale)))
7662 
7663 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7664   ((__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7665                                         (void const *)(addr), \
7666                                         (__v8di)(__m512i)(index), \
7667                                         (__mmask8)(mask), (int)(scale)))
7668 
7669 #define _mm512_i64gather_epi64(index, addr, scale) \
7670   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7671                                         (void const *)(addr), \
7672                                         (__v8di)(__m512i)(index), (__mmask8)-1, \
7673                                         (int)(scale)))
7674 
7675 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7676   ((__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7677                                         (void const *)(addr), \
7678                                         (__v8di)(__m512i)(index), \
7679                                         (__mmask8)(mask), (int)(scale)))
7680 
7681 #define _mm512_i32gather_ps(index, addr, scale) \
7682   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7683                                         (void const *)(addr), \
7684                                         (__v16si)(__m512)(index), \
7685                                         (__mmask16)-1, (int)(scale)))
7686 
7687 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7688   ((__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7689                                         (void const *)(addr), \
7690                                         (__v16si)(__m512)(index), \
7691                                         (__mmask16)(mask), (int)(scale)))
7692 
7693 #define _mm512_i32gather_epi32(index, addr, scale) \
7694   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7695                                          (void const *)(addr), \
7696                                          (__v16si)(__m512i)(index), \
7697                                          (__mmask16)-1, (int)(scale)))
7698 
7699 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7700   ((__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7701                                          (void const *)(addr), \
7702                                          (__v16si)(__m512i)(index), \
7703                                          (__mmask16)(mask), (int)(scale)))
7704 
7705 #define _mm512_i32gather_pd(index, addr, scale) \
7706   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7707                                         (void const *)(addr), \
7708                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7709                                         (int)(scale)))
7710 
7711 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7712   ((__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7713                                         (void const *)(addr), \
7714                                         (__v8si)(__m256i)(index), \
7715                                         (__mmask8)(mask), (int)(scale)))
7716 
7717 #define _mm512_i32gather_epi64(index, addr, scale) \
7718   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7719                                         (void const *)(addr), \
7720                                         (__v8si)(__m256i)(index), (__mmask8)-1, \
7721                                         (int)(scale)))
7722 
7723 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7724   ((__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7725                                         (void const *)(addr), \
7726                                         (__v8si)(__m256i)(index), \
7727                                         (__mmask8)(mask), (int)(scale)))
7728 
7729 #define _mm512_i64scatter_ps(addr, index, v1, scale) \
7730   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7731                                 (__v8di)(__m512i)(index), \
7732                                 (__v8sf)(__m256)(v1), (int)(scale))
7733 
7734 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7735   __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7736                                 (__v8di)(__m512i)(index), \
7737                                 (__v8sf)(__m256)(v1), (int)(scale))
7738 
7739 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7740   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7741                                 (__v8di)(__m512i)(index), \
7742                                 (__v8si)(__m256i)(v1), (int)(scale))
7743 
7744 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7745   __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7746                                 (__v8di)(__m512i)(index), \
7747                                 (__v8si)(__m256i)(v1), (int)(scale))
7748 
7749 #define _mm512_i64scatter_pd(addr, index, v1, scale) \
7750   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7751                                (__v8di)(__m512i)(index), \
7752                                (__v8df)(__m512d)(v1), (int)(scale))
7753 
7754 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7755   __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7756                                (__v8di)(__m512i)(index), \
7757                                (__v8df)(__m512d)(v1), (int)(scale))
7758 
7759 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7760   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7761                                (__v8di)(__m512i)(index), \
7762                                (__v8di)(__m512i)(v1), (int)(scale))
7763 
7764 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7765   __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7766                                (__v8di)(__m512i)(index), \
7767                                (__v8di)(__m512i)(v1), (int)(scale))
7768 
7769 #define _mm512_i32scatter_ps(addr, index, v1, scale) \
7770   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7771                                 (__v16si)(__m512i)(index), \
7772                                 (__v16sf)(__m512)(v1), (int)(scale))
7773 
7774 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7775   __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7776                                 (__v16si)(__m512i)(index), \
7777                                 (__v16sf)(__m512)(v1), (int)(scale))
7778 
7779 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7780   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7781                                 (__v16si)(__m512i)(index), \
7782                                 (__v16si)(__m512i)(v1), (int)(scale))
7783 
7784 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7785   __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7786                                 (__v16si)(__m512i)(index), \
7787                                 (__v16si)(__m512i)(v1), (int)(scale))
7788 
7789 #define _mm512_i32scatter_pd(addr, index, v1, scale) \
7790   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7791                                (__v8si)(__m256i)(index), \
7792                                (__v8df)(__m512d)(v1), (int)(scale))
7793 
7794 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7795   __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7796                                (__v8si)(__m256i)(index), \
7797                                (__v8df)(__m512d)(v1), (int)(scale))
7798 
7799 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7800   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7801                                (__v8si)(__m256i)(index), \
7802                                (__v8di)(__m512i)(v1), (int)(scale))
7803 
7804 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7805   __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7806                                (__v8si)(__m256i)(index), \
7807                                (__v8di)(__m512i)(v1), (int)(scale))
7808 
7809 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7810 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7811 {
7812   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7813                                        (__v4sf)__A,
7814                                        (__v4sf)__B,
7815                                        (__mmask8)__U,
7816                                        _MM_FROUND_CUR_DIRECTION);
7817 }
7818 
7819 #define _mm_fmadd_round_ss(A, B, C, R) \
7820   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7821                                          (__v4sf)(__m128)(B), \
7822                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7823                                          (int)(R)))
7824 
7825 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7826   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7827                                          (__v4sf)(__m128)(A), \
7828                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7829                                          (int)(R)))
7830 
7831 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7832 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7833 {
7834   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7835                                         (__v4sf)__B,
7836                                         (__v4sf)__C,
7837                                         (__mmask8)__U,
7838                                         _MM_FROUND_CUR_DIRECTION);
7839 }
7840 
7841 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7842   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7843                                           (__v4sf)(__m128)(B), \
7844                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7845                                           (int)(R)))
7846 
7847 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7848 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7849 {
7850   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7851                                         (__v4sf)__X,
7852                                         (__v4sf)__Y,
7853                                         (__mmask8)__U,
7854                                         _MM_FROUND_CUR_DIRECTION);
7855 }
7856 
7857 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7858   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7859                                           (__v4sf)(__m128)(X), \
7860                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7861                                           (int)(R)))
7862 
7863 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7864 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7865 {
7866   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7867                                        (__v4sf)__A,
7868                                        -(__v4sf)__B,
7869                                        (__mmask8)__U,
7870                                        _MM_FROUND_CUR_DIRECTION);
7871 }
7872 
7873 #define _mm_fmsub_round_ss(A, B, C, R) \
7874   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7875                                          (__v4sf)(__m128)(B), \
7876                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7877                                          (int)(R)))
7878 
7879 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7880   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7881                                          (__v4sf)(__m128)(A), \
7882                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
7883                                          (int)(R)))
7884 
7885 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7886 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7887 {
7888   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7889                                         (__v4sf)__B,
7890                                         -(__v4sf)__C,
7891                                         (__mmask8)__U,
7892                                         _MM_FROUND_CUR_DIRECTION);
7893 }
7894 
7895 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7896   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7897                                           (__v4sf)(__m128)(B), \
7898                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
7899                                           (int)(R)))
7900 
7901 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7902 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7903 {
7904   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7905                                         (__v4sf)__X,
7906                                         (__v4sf)__Y,
7907                                         (__mmask8)__U,
7908                                         _MM_FROUND_CUR_DIRECTION);
7909 }
7910 
7911 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7912   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7913                                           (__v4sf)(__m128)(X), \
7914                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7915                                           (int)(R)))
7916 
7917 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7918 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7919 {
7920   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7921                                        -(__v4sf)__A,
7922                                        (__v4sf)__B,
7923                                        (__mmask8)__U,
7924                                        _MM_FROUND_CUR_DIRECTION);
7925 }
7926 
7927 #define _mm_fnmadd_round_ss(A, B, C, R) \
7928   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7929                                          -(__v4sf)(__m128)(B), \
7930                                          (__v4sf)(__m128)(C), (__mmask8)-1, \
7931                                          (int)(R)))
7932 
7933 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7934   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7935                                          -(__v4sf)(__m128)(A), \
7936                                          (__v4sf)(__m128)(B), (__mmask8)(U), \
7937                                          (int)(R)))
7938 
7939 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7940 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7941 {
7942   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7943                                         -(__v4sf)__B,
7944                                         (__v4sf)__C,
7945                                         (__mmask8)__U,
7946                                         _MM_FROUND_CUR_DIRECTION);
7947 }
7948 
7949 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7950   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7951                                           -(__v4sf)(__m128)(B), \
7952                                           (__v4sf)(__m128)(C), (__mmask8)(U), \
7953                                           (int)(R)))
7954 
7955 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7956 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7957 {
7958   return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7959                                         -(__v4sf)__X,
7960                                         (__v4sf)__Y,
7961                                         (__mmask8)__U,
7962                                         _MM_FROUND_CUR_DIRECTION);
7963 }
7964 
7965 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7966   ((__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7967                                           -(__v4sf)(__m128)(X), \
7968                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
7969                                           (int)(R)))
7970 
7971 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7972 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7973 {
7974   return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7975                                        -(__v4sf)__A,
7976                                        -(__v4sf)__B,
7977                                        (__mmask8)__U,
7978                                        _MM_FROUND_CUR_DIRECTION);
7979 }
7980 
7981 #define _mm_fnmsub_round_ss(A, B, C, R) \
7982   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7983                                          -(__v4sf)(__m128)(B), \
7984                                          -(__v4sf)(__m128)(C), (__mmask8)-1, \
7985                                          (int)(R)))
7986 
7987 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7988   ((__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7989                                          -(__v4sf)(__m128)(A), \
7990                                          -(__v4sf)(__m128)(B), (__mmask8)(U), \
7991                                          (int)(R)))
7992 
7993 static __inline__ __m128 __DEFAULT_FN_ATTRS128
7994 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7995 {
7996   return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7997                                         -(__v4sf)__B,
7998                                         -(__v4sf)__C,
7999                                         (__mmask8)__U,
8000                                         _MM_FROUND_CUR_DIRECTION);
8001 }
8002 
8003 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
8004   ((__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
8005                                           -(__v4sf)(__m128)(B), \
8006                                           -(__v4sf)(__m128)(C), (__mmask8)(U), \
8007                                           (int)(R)))
8008 
8009 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8010 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
8011 {
8012   return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8013                                         -(__v4sf)__X,
8014                                         (__v4sf)__Y,
8015                                         (__mmask8)__U,
8016                                         _MM_FROUND_CUR_DIRECTION);
8017 }
8018 
8019 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8020   ((__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8021                                           -(__v4sf)(__m128)(X), \
8022                                           (__v4sf)(__m128)(Y), (__mmask8)(U), \
8023                                           (int)(R)))
8024 
8025 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8026 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8027 {
8028   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8029                                        (__v2df)__A,
8030                                        (__v2df)__B,
8031                                        (__mmask8)__U,
8032                                        _MM_FROUND_CUR_DIRECTION);
8033 }
8034 
8035 #define _mm_fmadd_round_sd(A, B, C, R) \
8036   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8037                                           (__v2df)(__m128d)(B), \
8038                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8039                                           (int)(R)))
8040 
8041 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8042   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8043                                           (__v2df)(__m128d)(A), \
8044                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8045                                           (int)(R)))
8046 
8047 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8048 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8049 {
8050   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8051                                         (__v2df)__B,
8052                                         (__v2df)__C,
8053                                         (__mmask8)__U,
8054                                         _MM_FROUND_CUR_DIRECTION);
8055 }
8056 
8057 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8058   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8059                                            (__v2df)(__m128d)(B), \
8060                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8061                                            (int)(R)))
8062 
8063 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8064 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8065 {
8066   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8067                                         (__v2df)__X,
8068                                         (__v2df)__Y,
8069                                         (__mmask8)__U,
8070                                         _MM_FROUND_CUR_DIRECTION);
8071 }
8072 
8073 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8074   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8075                                            (__v2df)(__m128d)(X), \
8076                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8077                                            (int)(R)))
8078 
8079 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8080 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8081 {
8082   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8083                                        (__v2df)__A,
8084                                        -(__v2df)__B,
8085                                        (__mmask8)__U,
8086                                        _MM_FROUND_CUR_DIRECTION);
8087 }
8088 
8089 #define _mm_fmsub_round_sd(A, B, C, R) \
8090   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8091                                           (__v2df)(__m128d)(B), \
8092                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8093                                           (int)(R)))
8094 
8095 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8096   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8097                                           (__v2df)(__m128d)(A), \
8098                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8099                                           (int)(R)))
8100 
8101 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8102 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8103 {
8104   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8105                                         (__v2df)__B,
8106                                         -(__v2df)__C,
8107                                         (__mmask8)__U,
8108                                         _MM_FROUND_CUR_DIRECTION);
8109 }
8110 
8111 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8112   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8113                                            (__v2df)(__m128d)(B), \
8114                                            -(__v2df)(__m128d)(C), \
8115                                            (__mmask8)(U), (int)(R)))
8116 
8117 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8118 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8119 {
8120   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8121                                         (__v2df)__X,
8122                                         (__v2df)__Y,
8123                                         (__mmask8)__U,
8124                                         _MM_FROUND_CUR_DIRECTION);
8125 }
8126 
8127 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8128   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8129                                            (__v2df)(__m128d)(X), \
8130                                            (__v2df)(__m128d)(Y), \
8131                                            (__mmask8)(U), (int)(R)))
8132 
8133 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8134 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8135 {
8136   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8137                                        -(__v2df)__A,
8138                                        (__v2df)__B,
8139                                        (__mmask8)__U,
8140                                        _MM_FROUND_CUR_DIRECTION);
8141 }
8142 
8143 #define _mm_fnmadd_round_sd(A, B, C, R) \
8144   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8145                                           -(__v2df)(__m128d)(B), \
8146                                           (__v2df)(__m128d)(C), (__mmask8)-1, \
8147                                           (int)(R)))
8148 
8149 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8150   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8151                                           -(__v2df)(__m128d)(A), \
8152                                           (__v2df)(__m128d)(B), (__mmask8)(U), \
8153                                           (int)(R)))
8154 
8155 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8156 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8157 {
8158   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8159                                         -(__v2df)__B,
8160                                         (__v2df)__C,
8161                                         (__mmask8)__U,
8162                                         _MM_FROUND_CUR_DIRECTION);
8163 }
8164 
8165 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8166   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8167                                            -(__v2df)(__m128d)(B), \
8168                                            (__v2df)(__m128d)(C), (__mmask8)(U), \
8169                                            (int)(R)))
8170 
8171 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8172 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8173 {
8174   return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8175                                         -(__v2df)__X,
8176                                         (__v2df)__Y,
8177                                         (__mmask8)__U,
8178                                         _MM_FROUND_CUR_DIRECTION);
8179 }
8180 
8181 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8182   ((__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8183                                            -(__v2df)(__m128d)(X), \
8184                                            (__v2df)(__m128d)(Y), (__mmask8)(U), \
8185                                            (int)(R)))
8186 
8187 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8188 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8189 {
8190   return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8191                                        -(__v2df)__A,
8192                                        -(__v2df)__B,
8193                                        (__mmask8)__U,
8194                                        _MM_FROUND_CUR_DIRECTION);
8195 }
8196 
8197 #define _mm_fnmsub_round_sd(A, B, C, R) \
8198   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8199                                           -(__v2df)(__m128d)(B), \
8200                                           -(__v2df)(__m128d)(C), (__mmask8)-1, \
8201                                           (int)(R)))
8202 
8203 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8204   ((__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8205                                           -(__v2df)(__m128d)(A), \
8206                                           -(__v2df)(__m128d)(B), (__mmask8)(U), \
8207                                           (int)(R)))
8208 
8209 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8210 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8211 {
8212   return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8213                                         -(__v2df)__B,
8214                                         -(__v2df)__C,
8215                                         (__mmask8)__U,
8216                                         _MM_FROUND_CUR_DIRECTION);
8217 }
8218 
8219 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8220   ((__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8221                                            -(__v2df)(__m128d)(B), \
8222                                            -(__v2df)(__m128d)(C), \
8223                                            (__mmask8)(U), \
8224                                            (int)(R)))
8225 
8226 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8227 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8228 {
8229   return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8230                                         -(__v2df)__X,
8231                                         (__v2df)__Y,
8232                                         (__mmask8)__U,
8233                                         _MM_FROUND_CUR_DIRECTION);
8234 }
8235 
8236 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8237   ((__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8238                                            -(__v2df)(__m128d)(X), \
8239                                            (__v2df)(__m128d)(Y), \
8240                                            (__mmask8)(U), (int)(R)))
8241 
8242 #define _mm512_permutex_pd(X, C) \
8243   ((__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C)))
8244 
8245 #define _mm512_mask_permutex_pd(W, U, X, C) \
8246   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8247                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8248                                         (__v8df)(__m512d)(W)))
8249 
8250 #define _mm512_maskz_permutex_pd(U, X, C) \
8251   ((__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8252                                         (__v8df)_mm512_permutex_pd((X), (C)), \
8253                                         (__v8df)_mm512_setzero_pd()))
8254 
8255 #define _mm512_permutex_epi64(X, C) \
8256   ((__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C)))
8257 
8258 #define _mm512_mask_permutex_epi64(W, U, X, C) \
8259   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8260                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8261                                        (__v8di)(__m512i)(W)))
8262 
8263 #define _mm512_maskz_permutex_epi64(U, X, C) \
8264   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8265                                        (__v8di)_mm512_permutex_epi64((X), (C)), \
8266                                        (__v8di)_mm512_setzero_si512()))
8267 
8268 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8269 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8270 {
8271   return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8272 }
8273 
8274 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8275 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8276 {
8277   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8278                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
8279                                         (__v8df)__W);
8280 }
8281 
8282 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8283 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8284 {
8285   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8286                                         (__v8df)_mm512_permutexvar_pd(__X, __Y),
8287                                         (__v8df)_mm512_setzero_pd());
8288 }
8289 
8290 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8291 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8292 {
8293   return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8294 }
8295 
8296 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8297 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8298 {
8299   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8300                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8301                                      (__v8di)_mm512_setzero_si512());
8302 }
8303 
8304 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8305 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8306              __m512i __Y)
8307 {
8308   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8309                                      (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8310                                      (__v8di)__W);
8311 }
8312 
8313 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8314 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8315 {
8316   return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8317 }
8318 
8319 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8320 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8321 {
8322   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8323                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8324                                        (__v16sf)__W);
8325 }
8326 
8327 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8328 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8329 {
8330   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8331                                        (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8332                                        (__v16sf)_mm512_setzero_ps());
8333 }
8334 
8335 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8336 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8337 {
8338   return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8339 }
8340 
8341 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8342 
8343 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8344 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8345 {
8346   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8347                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8348                                     (__v16si)_mm512_setzero_si512());
8349 }
8350 
8351 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8352 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8353              __m512i __Y)
8354 {
8355   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8356                                     (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8357                                     (__v16si)__W);
8358 }
8359 
8360 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8361 
8362 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8363 _mm512_kand (__mmask16 __A, __mmask16 __B)
8364 {
8365   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8366 }
8367 
8368 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8369 _mm512_kandn (__mmask16 __A, __mmask16 __B)
8370 {
8371   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8372 }
8373 
8374 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8375 _mm512_kor (__mmask16 __A, __mmask16 __B)
8376 {
8377   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8378 }
8379 
8380 static __inline__ int __DEFAULT_FN_ATTRS
8381 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
8382 {
8383   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8384 }
8385 
8386 static __inline__ int __DEFAULT_FN_ATTRS
8387 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
8388 {
8389   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8390 }
8391 
8392 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8393 _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
8394 {
8395   return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8396 }
8397 
8398 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8399 _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
8400 {
8401   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8402 }
8403 
8404 static __inline__ unsigned char __DEFAULT_FN_ATTRS
8405 _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8406   *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8407   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8408 }
8409 
8410 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8411 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
8412 {
8413   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8414 }
8415 
8416 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8417 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
8418 {
8419   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8420 }
8421 
8422 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8423 _mm512_kxor (__mmask16 __A, __mmask16 __B)
8424 {
8425   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8426 }
8427 
8428 #define _kand_mask16 _mm512_kand
8429 #define _kandn_mask16 _mm512_kandn
8430 #define _knot_mask16 _mm512_knot
8431 #define _kor_mask16 _mm512_kor
8432 #define _kxnor_mask16 _mm512_kxnor
8433 #define _kxor_mask16 _mm512_kxor
8434 
8435 #define _kshiftli_mask16(A, I) \
8436   ((__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I)))
8437 
8438 #define _kshiftri_mask16(A, I) \
8439   ((__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I)))
8440 
8441 static __inline__ unsigned int __DEFAULT_FN_ATTRS
8442 _cvtmask16_u32(__mmask16 __A) {
8443   return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8444 }
8445 
8446 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8447 _cvtu32_mask16(unsigned int __A) {
8448   return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8449 }
8450 
8451 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8452 _load_mask16(__mmask16 *__A) {
8453   return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8454 }
8455 
8456 static __inline__ void __DEFAULT_FN_ATTRS
8457 _store_mask16(__mmask16 *__A, __mmask16 __B) {
8458   *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8459 }
8460 
8461 static __inline__ void __DEFAULT_FN_ATTRS512
8462 _mm512_stream_si512 (void * __P, __m512i __A)
8463 {
8464   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8465   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8466 }
8467 
8468 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8469 _mm512_stream_load_si512 (void const *__P)
8470 {
8471   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8472   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8473 }
8474 
8475 static __inline__ void __DEFAULT_FN_ATTRS512
8476 _mm512_stream_pd (void *__P, __m512d __A)
8477 {
8478   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8479   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8480 }
8481 
8482 static __inline__ void __DEFAULT_FN_ATTRS512
8483 _mm512_stream_ps (void *__P, __m512 __A)
8484 {
8485   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8486   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8487 }
8488 
8489 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8490 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8491 {
8492   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8493                   (__v8df) __W,
8494                   (__mmask8) __U);
8495 }
8496 
8497 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8498 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
8499 {
8500   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8501                   (__v8df)
8502                   _mm512_setzero_pd (),
8503                   (__mmask8) __U);
8504 }
8505 
8506 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8507 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8508 {
8509   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8510                   (__v8di) __W,
8511                   (__mmask8) __U);
8512 }
8513 
8514 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8515 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
8516 {
8517   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8518                   (__v8di)
8519                   _mm512_setzero_si512 (),
8520                   (__mmask8) __U);
8521 }
8522 
8523 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8524 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8525 {
8526   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8527                  (__v16sf) __W,
8528                  (__mmask16) __U);
8529 }
8530 
8531 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8532 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
8533 {
8534   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8535                  (__v16sf)
8536                  _mm512_setzero_ps (),
8537                  (__mmask16) __U);
8538 }
8539 
8540 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8541 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8542 {
8543   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8544                   (__v16si) __W,
8545                   (__mmask16) __U);
8546 }
8547 
8548 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8549 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
8550 {
8551   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8552                   (__v16si)
8553                   _mm512_setzero_si512 (),
8554                   (__mmask16) __U);
8555 }
8556 
8557 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8558   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8559                                        (__v4sf)(__m128)(Y), (int)(P), \
8560                                        (__mmask8)-1, (int)(R)))
8561 
8562 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8563   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8564                                        (__v4sf)(__m128)(Y), (int)(P), \
8565                                        (__mmask8)(M), (int)(R)))
8566 
8567 #define _mm_cmp_ss_mask(X, Y, P) \
8568   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8569                                        (__v4sf)(__m128)(Y), (int)(P), \
8570                                        (__mmask8)-1, \
8571                                        _MM_FROUND_CUR_DIRECTION))
8572 
8573 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8574   ((__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8575                                        (__v4sf)(__m128)(Y), (int)(P), \
8576                                        (__mmask8)(M), \
8577                                        _MM_FROUND_CUR_DIRECTION))
8578 
8579 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8580   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8581                                        (__v2df)(__m128d)(Y), (int)(P), \
8582                                        (__mmask8)-1, (int)(R)))
8583 
8584 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8585   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8586                                        (__v2df)(__m128d)(Y), (int)(P), \
8587                                        (__mmask8)(M), (int)(R)))
8588 
8589 #define _mm_cmp_sd_mask(X, Y, P) \
8590   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8591                                        (__v2df)(__m128d)(Y), (int)(P), \
8592                                        (__mmask8)-1, \
8593                                        _MM_FROUND_CUR_DIRECTION))
8594 
8595 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8596   ((__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8597                                        (__v2df)(__m128d)(Y), (int)(P), \
8598                                        (__mmask8)(M), \
8599                                        _MM_FROUND_CUR_DIRECTION))
8600 
8601 /* Bit Test */
8602 
8603 static __inline __mmask16 __DEFAULT_FN_ATTRS512
8604 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
8605 {
8606   return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
8607                                    _mm512_setzero_si512());
8608 }
8609 
8610 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8611 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8612 {
8613   return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8614                                         _mm512_setzero_si512());
8615 }
8616 
8617 static __inline __mmask8 __DEFAULT_FN_ATTRS512
8618 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
8619 {
8620   return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8621                                    _mm512_setzero_si512());
8622 }
8623 
8624 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8625 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8626 {
8627   return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8628                                         _mm512_setzero_si512());
8629 }
8630 
8631 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8632 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8633 {
8634   return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8635                                   _mm512_setzero_si512());
8636 }
8637 
8638 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8639 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8640 {
8641   return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8642                                        _mm512_setzero_si512());
8643 }
8644 
8645 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8646 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8647 {
8648   return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8649                                   _mm512_setzero_si512());
8650 }
8651 
8652 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8653 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8654 {
8655   return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8656                                        _mm512_setzero_si512());
8657 }
8658 
8659 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8660 _mm512_movehdup_ps (__m512 __A)
8661 {
8662   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8663                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8664 }
8665 
8666 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8667 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8668 {
8669   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8670                                              (__v16sf)_mm512_movehdup_ps(__A),
8671                                              (__v16sf)__W);
8672 }
8673 
8674 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8675 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
8676 {
8677   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8678                                              (__v16sf)_mm512_movehdup_ps(__A),
8679                                              (__v16sf)_mm512_setzero_ps());
8680 }
8681 
8682 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8683 _mm512_moveldup_ps (__m512 __A)
8684 {
8685   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8686                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8687 }
8688 
8689 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8690 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8691 {
8692   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8693                                              (__v16sf)_mm512_moveldup_ps(__A),
8694                                              (__v16sf)__W);
8695 }
8696 
8697 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8698 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
8699 {
8700   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8701                                              (__v16sf)_mm512_moveldup_ps(__A),
8702                                              (__v16sf)_mm512_setzero_ps());
8703 }
8704 
8705 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8706 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8707 {
8708   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8709 }
8710 
8711 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8712 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8713 {
8714   return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8715                                      _mm_setzero_ps());
8716 }
8717 
8718 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8719 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8720 {
8721   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8722 }
8723 
8724 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8725 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8726 {
8727   return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8728                                      _mm_setzero_pd());
8729 }
8730 
8731 static __inline__ void __DEFAULT_FN_ATTRS128
8732 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8733 {
8734   __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8735 }
8736 
8737 static __inline__ void __DEFAULT_FN_ATTRS128
8738 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8739 {
8740   __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8741 }
8742 
8743 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8744 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8745 {
8746   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8747                                                 (__v4sf)_mm_setzero_ps(),
8748                                                 0, 4, 4, 4);
8749 
8750   return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8751 }
8752 
8753 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8754 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
8755 {
8756   return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8757                                                 (__v4sf) _mm_setzero_ps(),
8758                                                 __U & 1);
8759 }
8760 
8761 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8762 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8763 {
8764   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8765                                                  (__v2df)_mm_setzero_pd(),
8766                                                  0, 2);
8767 
8768   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8769 }
8770 
8771 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8772 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
8773 {
8774   return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8775                                                   (__v2df) _mm_setzero_pd(),
8776                                                   __U & 1);
8777 }
8778 
8779 #define _mm512_shuffle_epi32(A, I) \
8780   ((__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I)))
8781 
8782 #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8783   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8784                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8785                                        (__v16si)(__m512i)(W)))
8786 
8787 #define _mm512_maskz_shuffle_epi32(U, A, I) \
8788   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8789                                        (__v16si)_mm512_shuffle_epi32((A), (I)), \
8790                                        (__v16si)_mm512_setzero_si512()))
8791 
8792 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8793 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8794 {
8795   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8796                 (__v8df) __W,
8797                 (__mmask8) __U);
8798 }
8799 
8800 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8801 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
8802 {
8803   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8804                 (__v8df) _mm512_setzero_pd (),
8805                 (__mmask8) __U);
8806 }
8807 
8808 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8809 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8810 {
8811   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8812                 (__v8di) __W,
8813                 (__mmask8) __U);
8814 }
8815 
8816 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8817 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
8818 {
8819   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8820                 (__v8di) _mm512_setzero_si512 (),
8821                 (__mmask8) __U);
8822 }
8823 
8824 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8825 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8826 {
8827   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8828               (__v8df) __W,
8829               (__mmask8) __U);
8830 }
8831 
8832 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8833 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
8834 {
8835   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8836               (__v8df) _mm512_setzero_pd(),
8837               (__mmask8) __U);
8838 }
8839 
8840 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8841 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8842 {
8843   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8844               (__v8di) __W,
8845               (__mmask8) __U);
8846 }
8847 
8848 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8849 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
8850 {
8851   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8852               (__v8di) _mm512_setzero_si512(),
8853               (__mmask8) __U);
8854 }
8855 
8856 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8857 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8858 {
8859   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8860                    (__v16sf) __W,
8861                    (__mmask16) __U);
8862 }
8863 
8864 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8865 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
8866 {
8867   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8868                    (__v16sf) _mm512_setzero_ps(),
8869                    (__mmask16) __U);
8870 }
8871 
8872 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8873 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8874 {
8875   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8876               (__v16si) __W,
8877               (__mmask16) __U);
8878 }
8879 
8880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8881 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
8882 {
8883   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8884               (__v16si) _mm512_setzero_si512(),
8885               (__mmask16) __U);
8886 }
8887 
8888 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8889 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8890 {
8891   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8892                (__v16sf) __W,
8893                (__mmask16) __U);
8894 }
8895 
8896 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8897 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
8898 {
8899   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8900                (__v16sf) _mm512_setzero_ps(),
8901                (__mmask16) __U);
8902 }
8903 
8904 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8905 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8906 {
8907   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8908                 (__v16si) __W,
8909                 (__mmask16) __U);
8910 }
8911 
8912 static __inline__ __m512i __DEFAULT_FN_ATTRS512
8913 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
8914 {
8915   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8916                 (__v16si) _mm512_setzero_si512(),
8917                 (__mmask16) __U);
8918 }
8919 
8920 #define _mm512_cvt_roundps_pd(A, R) \
8921   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8922                                             (__v8df)_mm512_undefined_pd(), \
8923                                             (__mmask8)-1, (int)(R)))
8924 
8925 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8926   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8927                                             (__v8df)(__m512d)(W), \
8928                                             (__mmask8)(U), (int)(R)))
8929 
8930 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8931   ((__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8932                                             (__v8df)_mm512_setzero_pd(), \
8933                                             (__mmask8)(U), (int)(R)))
8934 
8935 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8936 _mm512_cvtps_pd (__m256 __A)
8937 {
8938   return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8939 }
8940 
8941 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8942 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8943 {
8944   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8945                                               (__v8df)_mm512_cvtps_pd(__A),
8946                                               (__v8df)__W);
8947 }
8948 
8949 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8950 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
8951 {
8952   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8953                                               (__v8df)_mm512_cvtps_pd(__A),
8954                                               (__v8df)_mm512_setzero_pd());
8955 }
8956 
8957 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8958 _mm512_cvtpslo_pd (__m512 __A)
8959 {
8960   return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8961 }
8962 
8963 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8964 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8965 {
8966   return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8967 }
8968 
8969 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8970 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8971 {
8972   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8973               (__v8df) __A,
8974               (__v8df) __W);
8975 }
8976 
8977 static __inline__ __m512d __DEFAULT_FN_ATTRS512
8978 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
8979 {
8980   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8981               (__v8df) __A,
8982               (__v8df) _mm512_setzero_pd ());
8983 }
8984 
8985 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8986 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8987 {
8988   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8989              (__v16sf) __A,
8990              (__v16sf) __W);
8991 }
8992 
8993 static __inline__ __m512 __DEFAULT_FN_ATTRS512
8994 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
8995 {
8996   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8997              (__v16sf) __A,
8998              (__v16sf) _mm512_setzero_ps ());
8999 }
9000 
9001 static __inline__ void __DEFAULT_FN_ATTRS512
9002 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
9003 {
9004   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
9005             (__mmask8) __U);
9006 }
9007 
9008 static __inline__ void __DEFAULT_FN_ATTRS512
9009 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9010 {
9011   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9012             (__mmask8) __U);
9013 }
9014 
9015 static __inline__ void __DEFAULT_FN_ATTRS512
9016 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9017 {
9018   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9019             (__mmask16) __U);
9020 }
9021 
9022 static __inline__ void __DEFAULT_FN_ATTRS512
9023 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9024 {
9025   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9026             (__mmask16) __U);
9027 }
9028 
9029 #define _mm_cvt_roundsd_ss(A, B, R) \
9030   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9031                                               (__v2df)(__m128d)(B), \
9032                                               (__v4sf)_mm_undefined_ps(), \
9033                                               (__mmask8)-1, (int)(R)))
9034 
9035 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9036   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9037                                               (__v2df)(__m128d)(B), \
9038                                               (__v4sf)(__m128)(W), \
9039                                               (__mmask8)(U), (int)(R)))
9040 
9041 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9042   ((__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9043                                               (__v2df)(__m128d)(B), \
9044                                               (__v4sf)_mm_setzero_ps(), \
9045                                               (__mmask8)(U), (int)(R)))
9046 
9047 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9048 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9049 {
9050   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9051                                              (__v2df)__B,
9052                                              (__v4sf)__W,
9053                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9054 }
9055 
9056 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9057 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9058 {
9059   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9060                                              (__v2df)__B,
9061                                              (__v4sf)_mm_setzero_ps(),
9062                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9063 }
9064 
9065 #define _mm_cvtss_i32 _mm_cvtss_si32
9066 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9067 #define _mm_cvti32_sd _mm_cvtsi32_sd
9068 #define _mm_cvti32_ss _mm_cvtsi32_ss
9069 #ifdef __x86_64__
9070 #define _mm_cvtss_i64 _mm_cvtss_si64
9071 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9072 #define _mm_cvti64_sd _mm_cvtsi64_sd
9073 #define _mm_cvti64_ss _mm_cvtsi64_ss
9074 #endif
9075 
9076 #ifdef __x86_64__
9077 #define _mm_cvt_roundi64_sd(A, B, R) \
9078   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9079                                       (int)(R)))
9080 
9081 #define _mm_cvt_roundsi64_sd(A, B, R) \
9082   ((__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9083                                       (int)(R)))
9084 #endif
9085 
9086 #define _mm_cvt_roundsi32_ss(A, B, R) \
9087   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9088 
9089 #define _mm_cvt_roundi32_ss(A, B, R) \
9090   ((__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R)))
9091 
9092 #ifdef __x86_64__
9093 #define _mm_cvt_roundsi64_ss(A, B, R) \
9094   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9095                                      (int)(R)))
9096 
9097 #define _mm_cvt_roundi64_ss(A, B, R) \
9098   ((__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9099                                      (int)(R)))
9100 #endif
9101 
9102 #define _mm_cvt_roundss_sd(A, B, R) \
9103   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9104                                                (__v4sf)(__m128)(B), \
9105                                                (__v2df)_mm_undefined_pd(), \
9106                                                (__mmask8)-1, (int)(R)))
9107 
9108 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9109   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9110                                                (__v4sf)(__m128)(B), \
9111                                                (__v2df)(__m128d)(W), \
9112                                                (__mmask8)(U), (int)(R)))
9113 
9114 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9115   ((__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9116                                                (__v4sf)(__m128)(B), \
9117                                                (__v2df)_mm_setzero_pd(), \
9118                                                (__mmask8)(U), (int)(R)))
9119 
9120 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9121 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9122 {
9123   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9124                                             (__v4sf)__B,
9125                                             (__v2df)__W,
9126                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9127 }
9128 
9129 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9130 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9131 {
9132   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9133                                             (__v4sf)__B,
9134                                             (__v2df)_mm_setzero_pd(),
9135                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
9136 }
9137 
9138 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9139 _mm_cvtu32_sd (__m128d __A, unsigned __B)
9140 {
9141   __A[0] = __B;
9142   return __A;
9143 }
9144 
9145 #ifdef __x86_64__
9146 #define _mm_cvt_roundu64_sd(A, B, R) \
9147   ((__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9148                                        (unsigned long long)(B), (int)(R)))
9149 
9150 static __inline__ __m128d __DEFAULT_FN_ATTRS128
9151 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9152 {
9153   __A[0] = __B;
9154   return __A;
9155 }
9156 #endif
9157 
9158 #define _mm_cvt_roundu32_ss(A, B, R) \
9159   ((__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9160                                       (int)(R)))
9161 
9162 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9163 _mm_cvtu32_ss (__m128 __A, unsigned __B)
9164 {
9165   __A[0] = __B;
9166   return __A;
9167 }
9168 
9169 #ifdef __x86_64__
9170 #define _mm_cvt_roundu64_ss(A, B, R) \
9171   ((__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9172                                       (unsigned long long)(B), (int)(R)))
9173 
9174 static __inline__ __m128 __DEFAULT_FN_ATTRS128
9175 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9176 {
9177   __A[0] = __B;
9178   return __A;
9179 }
9180 #endif
9181 
9182 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9183 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9184 {
9185   return (__m512i) __builtin_ia32_selectd_512(__M,
9186                                               (__v16si) _mm512_set1_epi32(__A),
9187                                               (__v16si) __O);
9188 }
9189 
9190 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9191 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9192 {
9193   return (__m512i) __builtin_ia32_selectq_512(__M,
9194                                               (__v8di) _mm512_set1_epi64(__A),
9195                                               (__v8di) __O);
9196 }
9197 
9198 static  __inline __m512i __DEFAULT_FN_ATTRS512
9199 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9200     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9201     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9202     char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9203     char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9204     char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9205     char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9206     char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9207     char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9208     char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9209     char __e4, char __e3, char __e2, char __e1, char __e0) {
9210 
9211   return __extension__ (__m512i)(__v64qi)
9212     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9213      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9214      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9215      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9216      __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9217      __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9218      __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9219      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9220 }
9221 
9222 static  __inline __m512i __DEFAULT_FN_ATTRS512
9223 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9224     short __e27, short __e26, short __e25, short __e24, short __e23,
9225     short __e22, short __e21, short __e20, short __e19, short __e18,
9226     short __e17, short __e16, short __e15, short __e14, short __e13,
9227     short __e12, short __e11, short __e10, short __e9, short __e8,
9228     short __e7, short __e6, short __e5, short __e4, short __e3,
9229     short __e2, short __e1, short __e0) {
9230   return __extension__ (__m512i)(__v32hi)
9231     {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9232      __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9233      __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9234      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9235 }
9236 
9237 static __inline __m512i __DEFAULT_FN_ATTRS512
9238 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
9239      int __E, int __F, int __G, int __H,
9240      int __I, int __J, int __K, int __L,
9241      int __M, int __N, int __O, int __P)
9242 {
9243   return __extension__ (__m512i)(__v16si)
9244   { __P, __O, __N, __M, __L, __K, __J, __I,
9245     __H, __G, __F, __E, __D, __C, __B, __A };
9246 }
9247 
9248 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7,           \
9249        e8,e9,e10,e11,e12,e13,e14,e15)          \
9250   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9251                    (e5),(e4),(e3),(e2),(e1),(e0))
9252 
9253 static __inline__ __m512i __DEFAULT_FN_ATTRS512
9254 _mm512_set_epi64 (long long __A, long long __B, long long __C,
9255      long long __D, long long __E, long long __F,
9256      long long __G, long long __H)
9257 {
9258   return __extension__ (__m512i) (__v8di)
9259   { __H, __G, __F, __E, __D, __C, __B, __A };
9260 }
9261 
9262 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
9263   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9264 
9265 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9266 _mm512_set_pd (double __A, double __B, double __C, double __D,
9267         double __E, double __F, double __G, double __H)
9268 {
9269   return __extension__ (__m512d)
9270   { __H, __G, __F, __E, __D, __C, __B, __A };
9271 }
9272 
9273 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
9274   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9275 
9276 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9277 _mm512_set_ps (float __A, float __B, float __C, float __D,
9278         float __E, float __F, float __G, float __H,
9279         float __I, float __J, float __K, float __L,
9280         float __M, float __N, float __O, float __P)
9281 {
9282   return __extension__ (__m512)
9283   { __P, __O, __N, __M, __L, __K, __J, __I,
9284     __H, __G, __F, __E, __D, __C, __B, __A };
9285 }
9286 
9287 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9288   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9289                 (e4),(e3),(e2),(e1),(e0))
9290 
9291 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9292 _mm512_abs_ps(__m512 __A)
9293 {
9294   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9295 }
9296 
9297 static __inline__ __m512 __DEFAULT_FN_ATTRS512
9298 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9299 {
9300   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9301 }
9302 
9303 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9304 _mm512_abs_pd(__m512d __A)
9305 {
9306   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9307 }
9308 
9309 static __inline__ __m512d __DEFAULT_FN_ATTRS512
9310 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9311 {
9312   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9313 }
9314 
9315 /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9316  * outputs. This class of vector operation forms the basis of many scientific
9317  * computations. In vector-reduction arithmetic, the evaluation order is
9318  * independent of the order of the input elements of V.
9319 
9320  * For floating-point intrinsics:
9321  * 1. When using fadd/fmul intrinsics, the order of operations within the
9322  * vector is unspecified (associative math).
9323  * 2. When using fmin/fmax intrinsics, NaN or -0.0 elements within the vector
9324  * produce unspecified results.
9325 
9326  * Used bisection method. At each step, we partition the vector with previous
9327  * step in half, and the operation is performed on its two halves.
9328  * This takes log2(n) steps where n is the number of elements in the vector.
9329  */
9330 
9331 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9332   return __builtin_reduce_add((__v8di)__W);
9333 }
9334 
9335 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9336   return __builtin_reduce_mul((__v8di)__W);
9337 }
9338 
9339 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9340   return __builtin_reduce_and((__v8di)__W);
9341 }
9342 
9343 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9344   return __builtin_reduce_or((__v8di)__W);
9345 }
9346 
9347 static __inline__ long long __DEFAULT_FN_ATTRS512
9348 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
9349   __W = _mm512_maskz_mov_epi64(__M, __W);
9350   return __builtin_reduce_add((__v8di)__W);
9351 }
9352 
9353 static __inline__ long long __DEFAULT_FN_ATTRS512
9354 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
9355   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
9356   return __builtin_reduce_mul((__v8di)__W);
9357 }
9358 
9359 static __inline__ long long __DEFAULT_FN_ATTRS512
9360 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
9361   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __W);
9362   return __builtin_reduce_and((__v8di)__W);
9363 }
9364 
9365 static __inline__ long long __DEFAULT_FN_ATTRS512
9366 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
9367   __W = _mm512_maskz_mov_epi64(__M, __W);
9368   return __builtin_reduce_or((__v8di)__W);
9369 }
9370 
9371 // -0.0 is used to ignore the start value since it is the neutral value of
9372 // floating point addition. For more information, please refer to
9373 // https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
9374 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9375   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9376 }
9377 
9378 static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9379   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9380 }
9381 
9382 static __inline__ double __DEFAULT_FN_ATTRS512
9383 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
9384   __W = _mm512_maskz_mov_pd(__M, __W);
9385   return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
9386 }
9387 
9388 static __inline__ double __DEFAULT_FN_ATTRS512
9389 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
9390   __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9391   return __builtin_ia32_reduce_fmul_pd512(1.0, __W);
9392 }
9393 
9394 static __inline__ int __DEFAULT_FN_ATTRS512
9395 _mm512_reduce_add_epi32(__m512i __W) {
9396   return __builtin_reduce_add((__v16si)__W);
9397 }
9398 
9399 static __inline__ int __DEFAULT_FN_ATTRS512
9400 _mm512_reduce_mul_epi32(__m512i __W) {
9401   return __builtin_reduce_mul((__v16si)__W);
9402 }
9403 
9404 static __inline__ int __DEFAULT_FN_ATTRS512
9405 _mm512_reduce_and_epi32(__m512i __W) {
9406   return __builtin_reduce_and((__v16si)__W);
9407 }
9408 
9409 static __inline__ int __DEFAULT_FN_ATTRS512
9410 _mm512_reduce_or_epi32(__m512i __W) {
9411   return __builtin_reduce_or((__v16si)__W);
9412 }
9413 
9414 static __inline__ int __DEFAULT_FN_ATTRS512
9415 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
9416   __W = _mm512_maskz_mov_epi32(__M, __W);
9417   return __builtin_reduce_add((__v16si)__W);
9418 }
9419 
9420 static __inline__ int __DEFAULT_FN_ATTRS512
9421 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
9422   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
9423   return __builtin_reduce_mul((__v16si)__W);
9424 }
9425 
9426 static __inline__ int __DEFAULT_FN_ATTRS512
9427 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
9428   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __W);
9429   return __builtin_reduce_and((__v16si)__W);
9430 }
9431 
9432 static __inline__ int __DEFAULT_FN_ATTRS512
9433 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
9434   __W = _mm512_maskz_mov_epi32(__M, __W);
9435   return __builtin_reduce_or((__v16si)__W);
9436 }
9437 
9438 static __inline__ float __DEFAULT_FN_ATTRS512
9439 _mm512_reduce_add_ps(__m512 __W) {
9440   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9441 }
9442 
9443 static __inline__ float __DEFAULT_FN_ATTRS512
9444 _mm512_reduce_mul_ps(__m512 __W) {
9445   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9446 }
9447 
9448 static __inline__ float __DEFAULT_FN_ATTRS512
9449 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
9450   __W = _mm512_maskz_mov_ps(__M, __W);
9451   return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
9452 }
9453 
9454 static __inline__ float __DEFAULT_FN_ATTRS512
9455 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
9456   __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9457   return __builtin_ia32_reduce_fmul_ps512(1.0f, __W);
9458 }
9459 
9460 static __inline__ long long __DEFAULT_FN_ATTRS512
9461 _mm512_reduce_max_epi64(__m512i __V) {
9462   return __builtin_reduce_max((__v8di)__V);
9463 }
9464 
9465 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9466 _mm512_reduce_max_epu64(__m512i __V) {
9467   return __builtin_reduce_max((__v8du)__V);
9468 }
9469 
9470 static __inline__ long long __DEFAULT_FN_ATTRS512
9471 _mm512_reduce_min_epi64(__m512i __V) {
9472   return __builtin_reduce_min((__v8di)__V);
9473 }
9474 
9475 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9476 _mm512_reduce_min_epu64(__m512i __V) {
9477   return __builtin_reduce_min((__v8du)__V);
9478 }
9479 
9480 static __inline__ long long __DEFAULT_FN_ATTRS512
9481 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
9482   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9483   return __builtin_reduce_max((__v8di)__V);
9484 }
9485 
9486 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9487 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
9488   __V = _mm512_maskz_mov_epi64(__M, __V);
9489   return __builtin_reduce_max((__v8du)__V);
9490 }
9491 
9492 static __inline__ long long __DEFAULT_FN_ATTRS512
9493 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
9494   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9495   return __builtin_reduce_min((__v8di)__V);
9496 }
9497 
9498 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9499 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
9500   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-1LL), __M, __V);
9501   return __builtin_reduce_min((__v8du)__V);
9502 }
9503 static __inline__ int __DEFAULT_FN_ATTRS512
9504 _mm512_reduce_max_epi32(__m512i __V) {
9505   return __builtin_reduce_max((__v16si)__V);
9506 }
9507 
9508 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9509 _mm512_reduce_max_epu32(__m512i __V) {
9510   return __builtin_reduce_max((__v16su)__V);
9511 }
9512 
9513 static __inline__ int __DEFAULT_FN_ATTRS512
9514 _mm512_reduce_min_epi32(__m512i __V) {
9515   return __builtin_reduce_min((__v16si)__V);
9516 }
9517 
9518 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9519 _mm512_reduce_min_epu32(__m512i __V) {
9520   return __builtin_reduce_min((__v16su)__V);
9521 }
9522 
9523 static __inline__ int __DEFAULT_FN_ATTRS512
9524 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
9525   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9526   return __builtin_reduce_max((__v16si)__V);
9527 }
9528 
9529 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9530 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
9531   __V = _mm512_maskz_mov_epi32(__M, __V);
9532   return __builtin_reduce_max((__v16su)__V);
9533 }
9534 
9535 static __inline__ int __DEFAULT_FN_ATTRS512
9536 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
9537   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9538   return __builtin_reduce_min((__v16si)__V);
9539 }
9540 
9541 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9542 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
9543   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-1), __M, __V);
9544   return __builtin_reduce_min((__v16su)__V);
9545 }
9546 
9547 static __inline__ double __DEFAULT_FN_ATTRS512
9548 _mm512_reduce_max_pd(__m512d __V) {
9549   return __builtin_ia32_reduce_fmax_pd512(__V);
9550 }
9551 
9552 static __inline__ double __DEFAULT_FN_ATTRS512
9553 _mm512_reduce_min_pd(__m512d __V) {
9554   return __builtin_ia32_reduce_fmin_pd512(__V);
9555 }
9556 
9557 static __inline__ double __DEFAULT_FN_ATTRS512
9558 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
9559   __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9560   return __builtin_ia32_reduce_fmax_pd512(__V);
9561 }
9562 
9563 static __inline__ double __DEFAULT_FN_ATTRS512
9564 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
9565   __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9566   return __builtin_ia32_reduce_fmin_pd512(__V);
9567 }
9568 
9569 static __inline__ float __DEFAULT_FN_ATTRS512
9570 _mm512_reduce_max_ps(__m512 __V) {
9571   return __builtin_ia32_reduce_fmax_ps512(__V);
9572 }
9573 
9574 static __inline__ float __DEFAULT_FN_ATTRS512
9575 _mm512_reduce_min_ps(__m512 __V) {
9576   return __builtin_ia32_reduce_fmin_ps512(__V);
9577 }
9578 
9579 static __inline__ float __DEFAULT_FN_ATTRS512
9580 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
9581   __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9582   return __builtin_ia32_reduce_fmax_ps512(__V);
9583 }
9584 
9585 static __inline__ float __DEFAULT_FN_ATTRS512
9586 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
9587   __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9588   return __builtin_ia32_reduce_fmin_ps512(__V);
9589 }
9590 
9591 /// Moves the least significant 32 bits of a vector of [16 x i32] to a
9592 ///    32-bit signed integer value.
9593 ///
9594 /// \headerfile <x86intrin.h>
9595 ///
9596 /// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
9597 ///
9598 /// \param __A
9599 ///    A vector of [16 x i32]. The least significant 32 bits are moved to the
9600 ///    destination.
9601 /// \returns A 32-bit signed integer containing the moved value.
9602 static __inline__ int __DEFAULT_FN_ATTRS512
9603 _mm512_cvtsi512_si32(__m512i __A) {
9604   __v16si __b = (__v16si)__A;
9605   return __b[0];
9606 }
9607 
9608 /// Loads 8 double-precision (64-bit) floating-point elements stored at memory
9609 /// locations starting at location \a base_addr at packed 32-bit integer indices
9610 /// stored in the lower half of \a vindex scaled by \a scale them in dst.
9611 ///
9612 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9613 ///
9614 /// \code{.operation}
9615 /// FOR j := 0 to 7
9616 ///   i := j*64
9617 ///   m := j*32
9618 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9619 ///   dst[i+63:i] := MEM[addr+63:addr]
9620 /// ENDFOR
9621 /// dst[MAX:512] := 0
9622 /// \endcode
9623 #define _mm512_i32logather_pd(vindex, base_addr, scale)                        \
9624   _mm512_i32gather_pd(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9625 
9626 /// Loads 8 double-precision (64-bit) floating-point elements from memory
9627 /// starting at location \a base_addr at packed 32-bit integer indices stored in
9628 /// the lower half of \a vindex scaled by \a scale into dst using writemask
9629 /// \a mask (elements are copied from \a src when the corresponding mask bit is
9630 /// not set).
9631 ///
9632 /// This intrinsic corresponds to the <c> VGATHERDPD </c> instructions.
9633 ///
9634 /// \code{.operation}
9635 /// FOR j := 0 to 7
9636 ///   i := j*64
9637 ///   m := j*32
9638 ///   IF mask[j]
9639 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9640 ///     dst[i+63:i] := MEM[addr+63:addr]
9641 ///   ELSE
9642 ///     dst[i+63:i] := src[i+63:i]
9643 ///   FI
9644 /// ENDFOR
9645 /// dst[MAX:512] := 0
9646 /// \endcode
9647 #define _mm512_mask_i32logather_pd(src, mask, vindex, base_addr, scale)        \
9648   _mm512_mask_i32gather_pd((src), (mask), _mm512_castsi512_si256(vindex),      \
9649                            (base_addr), (scale))
9650 
9651 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9652 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9653 /// scaled by \a scale and stores them in dst.
9654 ///
9655 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9656 ///
9657 /// \code{.operation}
9658 /// FOR j := 0 to 7
9659 ///   i := j*64
9660 ///   m := j*32
9661 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9662 ///   dst[i+63:i] := MEM[addr+63:addr]
9663 /// ENDFOR
9664 /// dst[MAX:512] := 0
9665 /// \endcode
9666 #define _mm512_i32logather_epi64(vindex, base_addr, scale)                     \
9667   _mm512_i32gather_epi64(_mm512_castsi512_si256(vindex), (base_addr), (scale))
9668 
9669 /// Loads 8 64-bit integer elements from memory starting at location \a base_addr
9670 /// at packed 32-bit integer indices stored in the lower half of \a vindex
9671 /// scaled by \a scale and stores them in dst using writemask \a mask (elements
9672 /// are copied from \a src when the corresponding mask bit is not set).
9673 ///
9674 /// This intrinsic corresponds to the <c> VPGATHERDQ </c> instructions.
9675 ///
9676 /// \code{.operation}
9677 /// FOR j := 0 to 7
9678 ///   i := j*64
9679 ///   m := j*32
9680 ///   IF mask[j]
9681 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9682 ///     dst[i+63:i] := MEM[addr+63:addr]
9683 ///   ELSE
9684 ///     dst[i+63:i] := src[i+63:i]
9685 ///   FI
9686 /// ENDFOR
9687 /// dst[MAX:512] := 0
9688 /// \endcode
9689 #define _mm512_mask_i32logather_epi64(src, mask, vindex, base_addr, scale)     \
9690   _mm512_mask_i32gather_epi64((src), (mask), _mm512_castsi512_si256(vindex),   \
9691                               (base_addr), (scale))
9692 
9693 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9694 /// and to memory locations starting at location \a base_addr at packed 32-bit
9695 /// integer indices stored in \a vindex scaled by \a scale.
9696 ///
9697 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9698 ///
9699 /// \code{.operation}
9700 /// FOR j := 0 to 7
9701 ///   i := j*64
9702 ///   m := j*32
9703 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9704 ///   MEM[addr+63:addr] := v1[i+63:i]
9705 /// ENDFOR
9706 /// \endcode
9707 #define _mm512_i32loscatter_pd(base_addr, vindex, v1, scale)                   \
9708   _mm512_i32scatter_pd((base_addr), _mm512_castsi512_si256(vindex), (v1), (scale))
9709 
9710 /// Stores 8 packed double-precision (64-bit) floating-point elements in \a v1
9711 /// to memory locations starting at location \a base_addr at packed 32-bit
9712 /// integer indices stored in \a vindex scaled by \a scale. Only those elements
9713 /// whose corresponding mask bit is set in writemask \a mask are written to
9714 /// memory.
9715 ///
9716 /// This intrinsic corresponds to the <c> VSCATTERDPD </c> instructions.
9717 ///
9718 /// \code{.operation}
9719 /// FOR j := 0 to 7
9720 ///   i := j*64
9721 ///   m := j*32
9722 ///   IF mask[j]
9723 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9724 ///     MEM[addr+63:addr] := a[i+63:i]
9725 ///   FI
9726 /// ENDFOR
9727 /// \endcode
9728 #define _mm512_mask_i32loscatter_pd(base_addr, mask, vindex, v1, scale)        \
9729   _mm512_mask_i32scatter_pd((base_addr), (mask),                               \
9730                             _mm512_castsi512_si256(vindex), (v1), (scale))
9731 
9732 /// Stores 8 packed 64-bit integer elements located in \a v1 and stores them in
9733 /// memory locations starting at location \a base_addr at packed 32-bit integer
9734 /// indices stored in \a vindex scaled by \a scale.
9735 ///
9736 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9737 ///
9738 /// \code{.operation}
9739 /// FOR j := 0 to 7
9740 ///   i := j*64
9741 ///   m := j*32
9742 ///   addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9743 ///   MEM[addr+63:addr] := a[i+63:i]
9744 /// ENDFOR
9745 /// \endcode
9746 #define _mm512_i32loscatter_epi64(base_addr, vindex, v1, scale)                \
9747   _mm512_i32scatter_epi64((base_addr),                                         \
9748                           _mm512_castsi512_si256(vindex), (v1), (scale))
9749 
9750 /// Stores 8 packed 64-bit integer elements located in a and stores them in
9751 /// memory locations starting at location \a base_addr at packed 32-bit integer
9752 /// indices stored in \a vindex scaled by scale using writemask \a mask (elements
9753 /// whose corresponding mask bit is not set are not written to memory).
9754 ///
9755 /// This intrinsic corresponds to the <c> VPSCATTERDQ </c> instructions.
9756 ///
9757 /// \code{.operation}
9758 /// FOR j := 0 to 7
9759 ///   i := j*64
9760 ///   m := j*32
9761 ///   IF mask[j]
9762 ///     addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
9763 ///     MEM[addr+63:addr] := a[i+63:i]
9764 ///   FI
9765 /// ENDFOR
9766 /// \endcode
9767 #define _mm512_mask_i32loscatter_epi64(base_addr, mask, vindex, v1, scale)     \
9768   _mm512_mask_i32scatter_epi64((base_addr), (mask),                            \
9769                                _mm512_castsi512_si256(vindex), (v1), (scale))
9770 
9771 #undef __DEFAULT_FN_ATTRS512
9772 #undef __DEFAULT_FN_ATTRS128
9773 #undef __DEFAULT_FN_ATTRS
9774 
9775 #endif /* __AVX512FINTRIN_H */
9776