1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __IMMINTRIN_H
11 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
12 #endif
13 
14 #ifndef __AVX512DQINTRIN_H
15 #define __AVX512DQINTRIN_H
16 
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
19 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
20 
21 static __inline __mmask8 __DEFAULT_FN_ATTRS
_knot_mask8(__mmask8 __M)22 _knot_mask8(__mmask8 __M)
23 {
24   return __builtin_ia32_knotqi(__M);
25 }
26 
27 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kand_mask8(__mmask8 __A,__mmask8 __B)28 _kand_mask8(__mmask8 __A, __mmask8 __B)
29 {
30   return (__mmask8)__builtin_ia32_kandqi((__mmask8)__A, (__mmask8)__B);
31 }
32 
33 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kandn_mask8(__mmask8 __A,__mmask8 __B)34 _kandn_mask8(__mmask8 __A, __mmask8 __B)
35 {
36   return (__mmask8)__builtin_ia32_kandnqi((__mmask8)__A, (__mmask8)__B);
37 }
38 
39 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kor_mask8(__mmask8 __A,__mmask8 __B)40 _kor_mask8(__mmask8 __A, __mmask8 __B)
41 {
42   return (__mmask8)__builtin_ia32_korqi((__mmask8)__A, (__mmask8)__B);
43 }
44 
45 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxnor_mask8(__mmask8 __A,__mmask8 __B)46 _kxnor_mask8(__mmask8 __A, __mmask8 __B)
47 {
48   return (__mmask8)__builtin_ia32_kxnorqi((__mmask8)__A, (__mmask8)__B);
49 }
50 
51 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kxor_mask8(__mmask8 __A,__mmask8 __B)52 _kxor_mask8(__mmask8 __A, __mmask8 __B)
53 {
54   return (__mmask8)__builtin_ia32_kxorqi((__mmask8)__A, (__mmask8)__B);
55 }
56 
57 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask8_u8(__mmask8 __A,__mmask8 __B)58 _kortestc_mask8_u8(__mmask8 __A, __mmask8 __B)
59 {
60   return (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
61 }
62 
63 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask8_u8(__mmask8 __A,__mmask8 __B)64 _kortestz_mask8_u8(__mmask8 __A, __mmask8 __B)
65 {
66   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
67 }
68 
69 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask8_u8(__mmask8 __A,__mmask8 __B,unsigned char * __C)70 _kortest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
71   *__C = (unsigned char)__builtin_ia32_kortestcqi(__A, __B);
72   return (unsigned char)__builtin_ia32_kortestzqi(__A, __B);
73 }
74 
75 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask8_u8(__mmask8 __A,__mmask8 __B)76 _ktestc_mask8_u8(__mmask8 __A, __mmask8 __B)
77 {
78   return (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
79 }
80 
81 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask8_u8(__mmask8 __A,__mmask8 __B)82 _ktestz_mask8_u8(__mmask8 __A, __mmask8 __B)
83 {
84   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
85 }
86 
87 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask8_u8(__mmask8 __A,__mmask8 __B,unsigned char * __C)88 _ktest_mask8_u8(__mmask8 __A, __mmask8 __B, unsigned char *__C) {
89   *__C = (unsigned char)__builtin_ia32_ktestcqi(__A, __B);
90   return (unsigned char)__builtin_ia32_ktestzqi(__A, __B);
91 }
92 
93 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask16_u8(__mmask16 __A,__mmask16 __B)94 _ktestc_mask16_u8(__mmask16 __A, __mmask16 __B)
95 {
96   return (unsigned char)__builtin_ia32_ktestchi(__A, __B);
97 }
98 
99 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask16_u8(__mmask16 __A,__mmask16 __B)100 _ktestz_mask16_u8(__mmask16 __A, __mmask16 __B)
101 {
102   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
103 }
104 
105 static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask16_u8(__mmask16 __A,__mmask16 __B,unsigned char * __C)106 _ktest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
107   *__C = (unsigned char)__builtin_ia32_ktestchi(__A, __B);
108   return (unsigned char)__builtin_ia32_ktestzhi(__A, __B);
109 }
110 
111 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_kadd_mask8(__mmask8 __A,__mmask8 __B)112 _kadd_mask8(__mmask8 __A, __mmask8 __B)
113 {
114   return (__mmask8)__builtin_ia32_kaddqi((__mmask8)__A, (__mmask8)__B);
115 }
116 
117 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_kadd_mask16(__mmask16 __A,__mmask16 __B)118 _kadd_mask16(__mmask16 __A, __mmask16 __B)
119 {
120   return (__mmask16)__builtin_ia32_kaddhi((__mmask16)__A, (__mmask16)__B);
121 }
122 
123 #define _kshiftli_mask8(A, I) \
124   (__mmask8)__builtin_ia32_kshiftliqi((__mmask8)(A), (unsigned int)(I))
125 
126 #define _kshiftri_mask8(A, I) \
127   (__mmask8)__builtin_ia32_kshiftriqi((__mmask8)(A), (unsigned int)(I))
128 
129 static __inline__ unsigned int __DEFAULT_FN_ATTRS
_cvtmask8_u32(__mmask8 __A)130 _cvtmask8_u32(__mmask8 __A) {
131   return (unsigned int)__builtin_ia32_kmovb((__mmask8)__A);
132 }
133 
134 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_cvtu32_mask8(unsigned int __A)135 _cvtu32_mask8(unsigned int __A) {
136   return (__mmask8)__builtin_ia32_kmovb((__mmask8)__A);
137 }
138 
139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_load_mask8(__mmask8 * __A)140 _load_mask8(__mmask8 *__A) {
141   return (__mmask8)__builtin_ia32_kmovb(*(__mmask8 *)__A);
142 }
143 
144 static __inline__ void __DEFAULT_FN_ATTRS
_store_mask8(__mmask8 * __A,__mmask8 __B)145 _store_mask8(__mmask8 *__A, __mmask8 __B) {
146   *(__mmask8 *)__A = __builtin_ia32_kmovb((__mmask8)__B);
147 }
148 
149 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mullo_epi64(__m512i __A,__m512i __B)150 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
151   return (__m512i) ((__v8du) __A * (__v8du) __B);
152 }
153 
154 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_mullo_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)155 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
156   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
157                                              (__v8di)_mm512_mullo_epi64(__A, __B),
158                                              (__v8di)__W);
159 }
160 
161 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_mullo_epi64(__mmask8 __U,__m512i __A,__m512i __B)162 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
163   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
164                                              (__v8di)_mm512_mullo_epi64(__A, __B),
165                                              (__v8di)_mm512_setzero_si512());
166 }
167 
168 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_xor_pd(__m512d __A,__m512d __B)169 _mm512_xor_pd(__m512d __A, __m512d __B) {
170   return (__m512d)((__v8du)__A ^ (__v8du)__B);
171 }
172 
173 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_xor_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)174 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
175   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
176                                               (__v8df)_mm512_xor_pd(__A, __B),
177                                               (__v8df)__W);
178 }
179 
180 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_xor_pd(__mmask8 __U,__m512d __A,__m512d __B)181 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
182   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
183                                               (__v8df)_mm512_xor_pd(__A, __B),
184                                               (__v8df)_mm512_setzero_pd());
185 }
186 
187 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_xor_ps(__m512 __A,__m512 __B)188 _mm512_xor_ps (__m512 __A, __m512 __B) {
189   return (__m512)((__v16su)__A ^ (__v16su)__B);
190 }
191 
192 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_xor_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)193 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
194   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
195                                              (__v16sf)_mm512_xor_ps(__A, __B),
196                                              (__v16sf)__W);
197 }
198 
199 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_xor_ps(__mmask16 __U,__m512 __A,__m512 __B)200 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
201   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
202                                              (__v16sf)_mm512_xor_ps(__A, __B),
203                                              (__v16sf)_mm512_setzero_ps());
204 }
205 
206 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_or_pd(__m512d __A,__m512d __B)207 _mm512_or_pd(__m512d __A, __m512d __B) {
208   return (__m512d)((__v8du)__A | (__v8du)__B);
209 }
210 
211 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_or_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)212 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
213   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
214                                               (__v8df)_mm512_or_pd(__A, __B),
215                                               (__v8df)__W);
216 }
217 
218 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_or_pd(__mmask8 __U,__m512d __A,__m512d __B)219 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
220   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
221                                               (__v8df)_mm512_or_pd(__A, __B),
222                                               (__v8df)_mm512_setzero_pd());
223 }
224 
225 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_or_ps(__m512 __A,__m512 __B)226 _mm512_or_ps(__m512 __A, __m512 __B) {
227   return (__m512)((__v16su)__A | (__v16su)__B);
228 }
229 
230 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_or_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)231 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
232   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
233                                              (__v16sf)_mm512_or_ps(__A, __B),
234                                              (__v16sf)__W);
235 }
236 
237 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_or_ps(__mmask16 __U,__m512 __A,__m512 __B)238 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
239   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
240                                              (__v16sf)_mm512_or_ps(__A, __B),
241                                              (__v16sf)_mm512_setzero_ps());
242 }
243 
244 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_and_pd(__m512d __A,__m512d __B)245 _mm512_and_pd(__m512d __A, __m512d __B) {
246   return (__m512d)((__v8du)__A & (__v8du)__B);
247 }
248 
249 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_and_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)250 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
251   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
252                                               (__v8df)_mm512_and_pd(__A, __B),
253                                               (__v8df)__W);
254 }
255 
256 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_and_pd(__mmask8 __U,__m512d __A,__m512d __B)257 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
258   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
259                                               (__v8df)_mm512_and_pd(__A, __B),
260                                               (__v8df)_mm512_setzero_pd());
261 }
262 
263 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_and_ps(__m512 __A,__m512 __B)264 _mm512_and_ps(__m512 __A, __m512 __B) {
265   return (__m512)((__v16su)__A & (__v16su)__B);
266 }
267 
268 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_and_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)269 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
270   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
271                                              (__v16sf)_mm512_and_ps(__A, __B),
272                                              (__v16sf)__W);
273 }
274 
275 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_and_ps(__mmask16 __U,__m512 __A,__m512 __B)276 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
277   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
278                                              (__v16sf)_mm512_and_ps(__A, __B),
279                                              (__v16sf)_mm512_setzero_ps());
280 }
281 
282 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_andnot_pd(__m512d __A,__m512d __B)283 _mm512_andnot_pd(__m512d __A, __m512d __B) {
284   return (__m512d)(~(__v8du)__A & (__v8du)__B);
285 }
286 
287 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_andnot_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)288 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
289   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
290                                               (__v8df)_mm512_andnot_pd(__A, __B),
291                                               (__v8df)__W);
292 }
293 
294 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_andnot_pd(__mmask8 __U,__m512d __A,__m512d __B)295 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
296   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
297                                               (__v8df)_mm512_andnot_pd(__A, __B),
298                                               (__v8df)_mm512_setzero_pd());
299 }
300 
301 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_andnot_ps(__m512 __A,__m512 __B)302 _mm512_andnot_ps(__m512 __A, __m512 __B) {
303   return (__m512)(~(__v16su)__A & (__v16su)__B);
304 }
305 
306 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_andnot_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)307 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
308   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
309                                              (__v16sf)_mm512_andnot_ps(__A, __B),
310                                              (__v16sf)__W);
311 }
312 
313 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_andnot_ps(__mmask16 __U,__m512 __A,__m512 __B)314 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
315   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
316                                              (__v16sf)_mm512_andnot_ps(__A, __B),
317                                              (__v16sf)_mm512_setzero_ps());
318 }
319 
320 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epi64(__m512d __A)321 _mm512_cvtpd_epi64 (__m512d __A) {
322   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
323                 (__v8di) _mm512_setzero_si512(),
324                 (__mmask8) -1,
325                 _MM_FROUND_CUR_DIRECTION);
326 }
327 
328 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)329 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
330   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
331                 (__v8di) __W,
332                 (__mmask8) __U,
333                 _MM_FROUND_CUR_DIRECTION);
334 }
335 
336 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpd_epi64(__mmask8 __U,__m512d __A)337 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
338   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
339                 (__v8di) _mm512_setzero_si512(),
340                 (__mmask8) __U,
341                 _MM_FROUND_CUR_DIRECTION);
342 }
343 
344 #define _mm512_cvt_roundpd_epi64(A, R) \
345   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
346                                            (__v8di)_mm512_setzero_si512(), \
347                                            (__mmask8)-1, (int)(R))
348 
349 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
350   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
351                                            (__v8di)(__m512i)(W), \
352                                            (__mmask8)(U), (int)(R))
353 
354 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
355   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
356                                            (__v8di)_mm512_setzero_si512(), \
357                                            (__mmask8)(U), (int)(R))
358 
359 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtpd_epu64(__m512d __A)360 _mm512_cvtpd_epu64 (__m512d __A) {
361   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
362                  (__v8di) _mm512_setzero_si512(),
363                  (__mmask8) -1,
364                  _MM_FROUND_CUR_DIRECTION);
365 }
366 
367 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)368 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
369   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
370                  (__v8di) __W,
371                  (__mmask8) __U,
372                  _MM_FROUND_CUR_DIRECTION);
373 }
374 
375 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpd_epu64(__mmask8 __U,__m512d __A)376 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
377   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
378                  (__v8di) _mm512_setzero_si512(),
379                  (__mmask8) __U,
380                  _MM_FROUND_CUR_DIRECTION);
381 }
382 
383 #define _mm512_cvt_roundpd_epu64(A, R) \
384   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
385                                             (__v8di)_mm512_setzero_si512(), \
386                                             (__mmask8)-1, (int)(R))
387 
388 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
389   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
390                                             (__v8di)(__m512i)(W), \
391                                             (__mmask8)(U), (int)(R))
392 
393 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
394   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
395                                             (__v8di)_mm512_setzero_si512(), \
396                                             (__mmask8)(U), (int)(R))
397 
398 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epi64(__m256 __A)399 _mm512_cvtps_epi64 (__m256 __A) {
400   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
401                 (__v8di) _mm512_setzero_si512(),
402                 (__mmask8) -1,
403                 _MM_FROUND_CUR_DIRECTION);
404 }
405 
406 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtps_epi64(__m512i __W,__mmask8 __U,__m256 __A)407 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
408   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
409                 (__v8di) __W,
410                 (__mmask8) __U,
411                 _MM_FROUND_CUR_DIRECTION);
412 }
413 
414 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtps_epi64(__mmask8 __U,__m256 __A)415 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
416   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
417                 (__v8di) _mm512_setzero_si512(),
418                 (__mmask8) __U,
419                 _MM_FROUND_CUR_DIRECTION);
420 }
421 
422 #define _mm512_cvt_roundps_epi64(A, R) \
423   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
424                                            (__v8di)_mm512_setzero_si512(), \
425                                            (__mmask8)-1, (int)(R))
426 
427 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
428   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
429                                            (__v8di)(__m512i)(W), \
430                                            (__mmask8)(U), (int)(R))
431 
432 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
433   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
434                                            (__v8di)_mm512_setzero_si512(), \
435                                            (__mmask8)(U), (int)(R))
436 
437 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvtps_epu64(__m256 __A)438 _mm512_cvtps_epu64 (__m256 __A) {
439   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
440                  (__v8di) _mm512_setzero_si512(),
441                  (__mmask8) -1,
442                  _MM_FROUND_CUR_DIRECTION);
443 }
444 
445 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvtps_epu64(__m512i __W,__mmask8 __U,__m256 __A)446 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
447   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
448                  (__v8di) __W,
449                  (__mmask8) __U,
450                  _MM_FROUND_CUR_DIRECTION);
451 }
452 
453 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtps_epu64(__mmask8 __U,__m256 __A)454 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
455   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
456                  (__v8di) _mm512_setzero_si512(),
457                  (__mmask8) __U,
458                  _MM_FROUND_CUR_DIRECTION);
459 }
460 
461 #define _mm512_cvt_roundps_epu64(A, R) \
462   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
463                                             (__v8di)_mm512_setzero_si512(), \
464                                             (__mmask8)-1, (int)(R))
465 
466 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
467   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
468                                             (__v8di)(__m512i)(W), \
469                                             (__mmask8)(U), (int)(R))
470 
471 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
472   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
473                                             (__v8di)_mm512_setzero_si512(), \
474                                             (__mmask8)(U), (int)(R))
475 
476 
477 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtepi64_pd(__m512i __A)478 _mm512_cvtepi64_pd (__m512i __A) {
479   return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
480 }
481 
482 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepi64_pd(__m512d __W,__mmask8 __U,__m512i __A)483 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
484   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
485                                               (__v8df)_mm512_cvtepi64_pd(__A),
486                                               (__v8df)__W);
487 }
488 
489 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepi64_pd(__mmask8 __U,__m512i __A)490 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
491   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
492                                               (__v8df)_mm512_cvtepi64_pd(__A),
493                                               (__v8df)_mm512_setzero_pd());
494 }
495 
496 #define _mm512_cvt_roundepi64_pd(A, R) \
497   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
498                                            (__v8df)_mm512_setzero_pd(), \
499                                            (__mmask8)-1, (int)(R))
500 
501 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
502   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
503                                            (__v8df)(__m512d)(W), \
504                                            (__mmask8)(U), (int)(R))
505 
506 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
507   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
508                                            (__v8df)_mm512_setzero_pd(), \
509                                            (__mmask8)(U), (int)(R))
510 
511 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtepi64_ps(__m512i __A)512 _mm512_cvtepi64_ps (__m512i __A) {
513   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
514                (__v8sf) _mm256_setzero_ps(),
515                (__mmask8) -1,
516                _MM_FROUND_CUR_DIRECTION);
517 }
518 
519 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepi64_ps(__m256 __W,__mmask8 __U,__m512i __A)520 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
521   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
522                (__v8sf) __W,
523                (__mmask8) __U,
524                _MM_FROUND_CUR_DIRECTION);
525 }
526 
527 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepi64_ps(__mmask8 __U,__m512i __A)528 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
529   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
530                (__v8sf) _mm256_setzero_ps(),
531                (__mmask8) __U,
532                _MM_FROUND_CUR_DIRECTION);
533 }
534 
535 #define _mm512_cvt_roundepi64_ps(A, R) \
536   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
537                                           (__v8sf)_mm256_setzero_ps(), \
538                                           (__mmask8)-1, (int)(R))
539 
540 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
541   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
542                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
543                                           (int)(R))
544 
545 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
546   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
547                                           (__v8sf)_mm256_setzero_ps(), \
548                                           (__mmask8)(U), (int)(R))
549 
550 
551 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epi64(__m512d __A)552 _mm512_cvttpd_epi64 (__m512d __A) {
553   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
554                  (__v8di) _mm512_setzero_si512(),
555                  (__mmask8) -1,
556                  _MM_FROUND_CUR_DIRECTION);
557 }
558 
559 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)560 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
561   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
562                  (__v8di) __W,
563                  (__mmask8) __U,
564                  _MM_FROUND_CUR_DIRECTION);
565 }
566 
567 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttpd_epi64(__mmask8 __U,__m512d __A)568 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
569   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
570                  (__v8di) _mm512_setzero_si512(),
571                  (__mmask8) __U,
572                  _MM_FROUND_CUR_DIRECTION);
573 }
574 
575 #define _mm512_cvtt_roundpd_epi64(A, R) \
576   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
577                                             (__v8di)_mm512_setzero_si512(), \
578                                             (__mmask8)-1, (int)(R))
579 
580 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
581   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
582                                             (__v8di)(__m512i)(W), \
583                                             (__mmask8)(U), (int)(R))
584 
585 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
586   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
587                                             (__v8di)_mm512_setzero_si512(), \
588                                             (__mmask8)(U), (int)(R))
589 
590 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttpd_epu64(__m512d __A)591 _mm512_cvttpd_epu64 (__m512d __A) {
592   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
593                   (__v8di) _mm512_setzero_si512(),
594                   (__mmask8) -1,
595                   _MM_FROUND_CUR_DIRECTION);
596 }
597 
598 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)599 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
600   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
601                   (__v8di) __W,
602                   (__mmask8) __U,
603                   _MM_FROUND_CUR_DIRECTION);
604 }
605 
606 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttpd_epu64(__mmask8 __U,__m512d __A)607 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
608   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
609                   (__v8di) _mm512_setzero_si512(),
610                   (__mmask8) __U,
611                   _MM_FROUND_CUR_DIRECTION);
612 }
613 
614 #define _mm512_cvtt_roundpd_epu64(A, R) \
615   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
616                                              (__v8di)_mm512_setzero_si512(), \
617                                              (__mmask8)-1, (int)(R))
618 
619 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
620   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
621                                              (__v8di)(__m512i)(W), \
622                                              (__mmask8)(U), (int)(R))
623 
624 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
625   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
626                                              (__v8di)_mm512_setzero_si512(), \
627                                              (__mmask8)(U), (int)(R))
628 
629 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epi64(__m256 __A)630 _mm512_cvttps_epi64 (__m256 __A) {
631   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
632                  (__v8di) _mm512_setzero_si512(),
633                  (__mmask8) -1,
634                  _MM_FROUND_CUR_DIRECTION);
635 }
636 
637 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttps_epi64(__m512i __W,__mmask8 __U,__m256 __A)638 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
639   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
640                  (__v8di) __W,
641                  (__mmask8) __U,
642                  _MM_FROUND_CUR_DIRECTION);
643 }
644 
645 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttps_epi64(__mmask8 __U,__m256 __A)646 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
647   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
648                  (__v8di) _mm512_setzero_si512(),
649                  (__mmask8) __U,
650                  _MM_FROUND_CUR_DIRECTION);
651 }
652 
653 #define _mm512_cvtt_roundps_epi64(A, R) \
654   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
655                                             (__v8di)_mm512_setzero_si512(), \
656                                             (__mmask8)-1, (int)(R))
657 
658 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
659   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
660                                             (__v8di)(__m512i)(W), \
661                                             (__mmask8)(U), (int)(R))
662 
663 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
664   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
665                                             (__v8di)_mm512_setzero_si512(), \
666                                             (__mmask8)(U), (int)(R))
667 
668 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_cvttps_epu64(__m256 __A)669 _mm512_cvttps_epu64 (__m256 __A) {
670   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
671                   (__v8di) _mm512_setzero_si512(),
672                   (__mmask8) -1,
673                   _MM_FROUND_CUR_DIRECTION);
674 }
675 
676 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_cvttps_epu64(__m512i __W,__mmask8 __U,__m256 __A)677 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
678   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
679                   (__v8di) __W,
680                   (__mmask8) __U,
681                   _MM_FROUND_CUR_DIRECTION);
682 }
683 
684 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_cvttps_epu64(__mmask8 __U,__m256 __A)685 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
686   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
687                   (__v8di) _mm512_setzero_si512(),
688                   (__mmask8) __U,
689                   _MM_FROUND_CUR_DIRECTION);
690 }
691 
692 #define _mm512_cvtt_roundps_epu64(A, R) \
693   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
694                                              (__v8di)_mm512_setzero_si512(), \
695                                              (__mmask8)-1, (int)(R))
696 
697 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
698   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
699                                              (__v8di)(__m512i)(W), \
700                                              (__mmask8)(U), (int)(R))
701 
702 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
703   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
704                                              (__v8di)_mm512_setzero_si512(), \
705                                              (__mmask8)(U), (int)(R))
706 
707 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_cvtepu64_pd(__m512i __A)708 _mm512_cvtepu64_pd (__m512i __A) {
709   return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
710 }
711 
712 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepu64_pd(__m512d __W,__mmask8 __U,__m512i __A)713 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
714   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
715                                               (__v8df)_mm512_cvtepu64_pd(__A),
716                                               (__v8df)__W);
717 }
718 
719 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepu64_pd(__mmask8 __U,__m512i __A)720 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
721   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
722                                               (__v8df)_mm512_cvtepu64_pd(__A),
723                                               (__v8df)_mm512_setzero_pd());
724 }
725 
726 #define _mm512_cvt_roundepu64_pd(A, R) \
727   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
728                                             (__v8df)_mm512_setzero_pd(), \
729                                             (__mmask8)-1, (int)(R))
730 
731 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
732   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
733                                             (__v8df)(__m512d)(W), \
734                                             (__mmask8)(U), (int)(R))
735 
736 
737 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
738   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
739                                             (__v8df)_mm512_setzero_pd(), \
740                                             (__mmask8)(U), (int)(R))
741 
742 
743 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_cvtepu64_ps(__m512i __A)744 _mm512_cvtepu64_ps (__m512i __A) {
745   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
746                 (__v8sf) _mm256_setzero_ps(),
747                 (__mmask8) -1,
748                 _MM_FROUND_CUR_DIRECTION);
749 }
750 
751 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtepu64_ps(__m256 __W,__mmask8 __U,__m512i __A)752 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
753   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
754                 (__v8sf) __W,
755                 (__mmask8) __U,
756                 _MM_FROUND_CUR_DIRECTION);
757 }
758 
759 static __inline__ __m256 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtepu64_ps(__mmask8 __U,__m512i __A)760 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
761   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
762                 (__v8sf) _mm256_setzero_ps(),
763                 (__mmask8) __U,
764                 _MM_FROUND_CUR_DIRECTION);
765 }
766 
767 #define _mm512_cvt_roundepu64_ps(A, R) \
768   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
769                                            (__v8sf)_mm256_setzero_ps(), \
770                                            (__mmask8)-1, (int)(R))
771 
772 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
773   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
774                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
775                                            (int)(R))
776 
777 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
778   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
779                                            (__v8sf)_mm256_setzero_ps(), \
780                                            (__mmask8)(U), (int)(R))
781 
782 #define _mm512_range_pd(A, B, C) \
783   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
784                                           (__v8df)(__m512d)(B), (int)(C), \
785                                           (__v8df)_mm512_setzero_pd(), \
786                                           (__mmask8)-1, \
787                                           _MM_FROUND_CUR_DIRECTION)
788 
789 #define _mm512_mask_range_pd(W, U, A, B, C) \
790   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
791                                           (__v8df)(__m512d)(B), (int)(C), \
792                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
793                                           _MM_FROUND_CUR_DIRECTION)
794 
795 #define _mm512_maskz_range_pd(U, A, B, C) \
796   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
797                                           (__v8df)(__m512d)(B), (int)(C), \
798                                           (__v8df)_mm512_setzero_pd(), \
799                                           (__mmask8)(U), \
800                                           _MM_FROUND_CUR_DIRECTION)
801 
802 #define _mm512_range_round_pd(A, B, C, R) \
803   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
804                                           (__v8df)(__m512d)(B), (int)(C), \
805                                           (__v8df)_mm512_setzero_pd(), \
806                                           (__mmask8)-1, (int)(R))
807 
808 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
809   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
810                                           (__v8df)(__m512d)(B), (int)(C), \
811                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
812                                           (int)(R))
813 
814 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
815   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
816                                           (__v8df)(__m512d)(B), (int)(C), \
817                                           (__v8df)_mm512_setzero_pd(), \
818                                           (__mmask8)(U), (int)(R))
819 
820 #define _mm512_range_ps(A, B, C) \
821   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
822                                          (__v16sf)(__m512)(B), (int)(C), \
823                                          (__v16sf)_mm512_setzero_ps(), \
824                                          (__mmask16)-1, \
825                                          _MM_FROUND_CUR_DIRECTION)
826 
827 #define _mm512_mask_range_ps(W, U, A, B, C) \
828   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
829                                          (__v16sf)(__m512)(B), (int)(C), \
830                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
831                                          _MM_FROUND_CUR_DIRECTION)
832 
833 #define _mm512_maskz_range_ps(U, A, B, C) \
834   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
835                                          (__v16sf)(__m512)(B), (int)(C), \
836                                          (__v16sf)_mm512_setzero_ps(), \
837                                          (__mmask16)(U), \
838                                          _MM_FROUND_CUR_DIRECTION)
839 
840 #define _mm512_range_round_ps(A, B, C, R) \
841   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
842                                          (__v16sf)(__m512)(B), (int)(C), \
843                                          (__v16sf)_mm512_setzero_ps(), \
844                                          (__mmask16)-1, (int)(R))
845 
846 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
847   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
848                                          (__v16sf)(__m512)(B), (int)(C), \
849                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
850                                          (int)(R))
851 
852 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
853   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
854                                          (__v16sf)(__m512)(B), (int)(C), \
855                                          (__v16sf)_mm512_setzero_ps(), \
856                                          (__mmask16)(U), (int)(R))
857 
858 #define _mm_range_round_ss(A, B, C, R) \
859   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
860                                                (__v4sf)(__m128)(B), \
861                                                (__v4sf)_mm_setzero_ps(), \
862                                                (__mmask8) -1, (int)(C),\
863                                                (int)(R))
864 
865 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
866 
867 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
868   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
869                                                (__v4sf)(__m128)(B), \
870                                                (__v4sf)(__m128)(W),\
871                                                (__mmask8)(U), (int)(C),\
872                                                (int)(R))
873 
874 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
875 
876 #define _mm_maskz_range_round_ss(U, A, B, C, R) \
877   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
878                                                (__v4sf)(__m128)(B), \
879                                                (__v4sf)_mm_setzero_ps(), \
880                                                (__mmask8)(U), (int)(C),\
881                                                (int)(R))
882 
883 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
884 
885 #define _mm_range_round_sd(A, B, C, R) \
886   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
887                                                 (__v2df)(__m128d)(B), \
888                                                 (__v2df)_mm_setzero_pd(), \
889                                                 (__mmask8) -1, (int)(C),\
890                                                 (int)(R))
891 
892 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
893 
894 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
895   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
896                                                 (__v2df)(__m128d)(B), \
897                                                 (__v2df)(__m128d)(W),\
898                                                 (__mmask8)(U), (int)(C),\
899                                                 (int)(R))
900 
901 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
902 
903 #define _mm_maskz_range_round_sd(U, A, B, C, R) \
904   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
905                                                 (__v2df)(__m128d)(B), \
906                                                 (__v2df)_mm_setzero_pd(), \
907                                                 (__mmask8)(U), (int)(C),\
908                                                 (int)(R))
909 
910 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
911 
912 #define _mm512_reduce_pd(A, B) \
913   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
914                                            (__v8df)_mm512_setzero_pd(), \
915                                            (__mmask8)-1, \
916                                            _MM_FROUND_CUR_DIRECTION)
917 
918 #define _mm512_mask_reduce_pd(W, U, A, B) \
919   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
920                                            (__v8df)(__m512d)(W), \
921                                            (__mmask8)(U), \
922                                            _MM_FROUND_CUR_DIRECTION)
923 
924 #define _mm512_maskz_reduce_pd(U, A, B) \
925   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
926                                            (__v8df)_mm512_setzero_pd(), \
927                                            (__mmask8)(U), \
928                                            _MM_FROUND_CUR_DIRECTION)
929 
930 #define _mm512_reduce_ps(A, B) \
931   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
932                                           (__v16sf)_mm512_setzero_ps(), \
933                                           (__mmask16)-1, \
934                                           _MM_FROUND_CUR_DIRECTION)
935 
936 #define _mm512_mask_reduce_ps(W, U, A, B) \
937   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
938                                           (__v16sf)(__m512)(W), \
939                                           (__mmask16)(U), \
940                                           _MM_FROUND_CUR_DIRECTION)
941 
942 #define _mm512_maskz_reduce_ps(U, A, B) \
943   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
944                                           (__v16sf)_mm512_setzero_ps(), \
945                                           (__mmask16)(U), \
946                                           _MM_FROUND_CUR_DIRECTION)
947 
948 #define _mm512_reduce_round_pd(A, B, R) \
949   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
950                                            (__v8df)_mm512_setzero_pd(), \
951                                            (__mmask8)-1, (int)(R))
952 
953 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
954   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
955                                            (__v8df)(__m512d)(W), \
956                                            (__mmask8)(U), (int)(R))
957 
958 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \
959   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
960                                            (__v8df)_mm512_setzero_pd(), \
961                                            (__mmask8)(U), (int)(R))
962 
963 #define _mm512_reduce_round_ps(A, B, R) \
964   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
965                                           (__v16sf)_mm512_setzero_ps(), \
966                                           (__mmask16)-1, (int)(R))
967 
968 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
969   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
970                                           (__v16sf)(__m512)(W), \
971                                           (__mmask16)(U), (int)(R))
972 
973 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \
974   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
975                                           (__v16sf)_mm512_setzero_ps(), \
976                                           (__mmask16)(U), (int)(R))
977 
978 #define _mm_reduce_ss(A, B, C) \
979   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
980                                        (__v4sf)(__m128)(B), \
981                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
982                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
983 
984 #define _mm_mask_reduce_ss(W, U, A, B, C) \
985   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
986                                        (__v4sf)(__m128)(B), \
987                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
988                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
989 
990 #define _mm_maskz_reduce_ss(U, A, B, C) \
991   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
992                                        (__v4sf)(__m128)(B), \
993                                        (__v4sf)_mm_setzero_ps(), \
994                                        (__mmask8)(U), (int)(C), \
995                                        _MM_FROUND_CUR_DIRECTION)
996 
997 #define _mm_reduce_round_ss(A, B, C, R) \
998   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
999                                        (__v4sf)(__m128)(B), \
1000                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
1001                                        (int)(C), (int)(R))
1002 
1003 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
1004   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1005                                        (__v4sf)(__m128)(B), \
1006                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
1007                                        (int)(C), (int)(R))
1008 
1009 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
1010   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
1011                                        (__v4sf)(__m128)(B), \
1012                                        (__v4sf)_mm_setzero_ps(), \
1013                                        (__mmask8)(U), (int)(C), (int)(R))
1014 
1015 #define _mm_reduce_sd(A, B, C) \
1016   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1017                                         (__v2df)(__m128d)(B), \
1018                                         (__v2df)_mm_setzero_pd(), \
1019                                         (__mmask8)-1, (int)(C), \
1020                                         _MM_FROUND_CUR_DIRECTION)
1021 
1022 #define _mm_mask_reduce_sd(W, U, A, B, C) \
1023   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1024                                         (__v2df)(__m128d)(B), \
1025                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1026                                         (int)(C), _MM_FROUND_CUR_DIRECTION)
1027 
1028 #define _mm_maskz_reduce_sd(U, A, B, C) \
1029   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1030                                         (__v2df)(__m128d)(B), \
1031                                         (__v2df)_mm_setzero_pd(), \
1032                                         (__mmask8)(U), (int)(C), \
1033                                         _MM_FROUND_CUR_DIRECTION)
1034 
1035 #define _mm_reduce_round_sd(A, B, C, R) \
1036   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1037                                         (__v2df)(__m128d)(B), \
1038                                         (__v2df)_mm_setzero_pd(), \
1039                                         (__mmask8)-1, (int)(C), (int)(R))
1040 
1041 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
1042   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1043                                         (__v2df)(__m128d)(B), \
1044                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
1045                                         (int)(C), (int)(R))
1046 
1047 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
1048   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
1049                                         (__v2df)(__m128d)(B), \
1050                                         (__v2df)_mm_setzero_pd(), \
1051                                         (__mmask8)(U), (int)(C), (int)(R))
1052 
1053 static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
_mm512_movepi32_mask(__m512i __A)1054 _mm512_movepi32_mask (__m512i __A)
1055 {
1056   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
1057 }
1058 
1059 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_movm_epi32(__mmask16 __A)1060 _mm512_movm_epi32 (__mmask16 __A)
1061 {
1062   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
1063 }
1064 
1065 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_movm_epi64(__mmask8 __A)1066 _mm512_movm_epi64 (__mmask8 __A)
1067 {
1068   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
1069 }
1070 
1071 static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
_mm512_movepi64_mask(__m512i __A)1072 _mm512_movepi64_mask (__m512i __A)
1073 {
1074   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
1075 }
1076 
1077 
1078 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_broadcast_f32x2(__m128 __A)1079 _mm512_broadcast_f32x2 (__m128 __A)
1080 {
1081   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
1082                                          0, 1, 0, 1, 0, 1, 0, 1,
1083                                          0, 1, 0, 1, 0, 1, 0, 1);
1084 }
1085 
1086 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f32x2(__m512 __O,__mmask16 __M,__m128 __A)1087 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
1088 {
1089   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1090                                              (__v16sf)_mm512_broadcast_f32x2(__A),
1091                                              (__v16sf)__O);
1092 }
1093 
1094 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f32x2(__mmask16 __M,__m128 __A)1095 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
1096 {
1097   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1098                                              (__v16sf)_mm512_broadcast_f32x2(__A),
1099                                              (__v16sf)_mm512_setzero_ps());
1100 }
1101 
1102 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_broadcast_f32x8(__m256 __A)1103 _mm512_broadcast_f32x8(__m256 __A)
1104 {
1105   return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
1106                                          0, 1, 2, 3, 4, 5, 6, 7,
1107                                          0, 1, 2, 3, 4, 5, 6, 7);
1108 }
1109 
1110 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f32x8(__m512 __O,__mmask16 __M,__m256 __A)1111 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
1112 {
1113   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1114                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1115                                            (__v16sf)__O);
1116 }
1117 
1118 static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f32x8(__mmask16 __M,__m256 __A)1119 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1120 {
1121   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1122                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1123                                            (__v16sf)_mm512_setzero_ps());
1124 }
1125 
1126 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_broadcast_f64x2(__m128d __A)1127 _mm512_broadcast_f64x2(__m128d __A)
1128 {
1129   return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1130                                           0, 1, 0, 1, 0, 1, 0, 1);
1131 }
1132 
1133 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_f64x2(__m512d __O,__mmask8 __M,__m128d __A)1134 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1135 {
1136   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1137                                             (__v8df)_mm512_broadcast_f64x2(__A),
1138                                             (__v8df)__O);
1139 }
1140 
1141 static __inline__ __m512d __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_f64x2(__mmask8 __M,__m128d __A)1142 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1143 {
1144   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1145                                             (__v8df)_mm512_broadcast_f64x2(__A),
1146                                             (__v8df)_mm512_setzero_pd());
1147 }
1148 
1149 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i32x2(__m128i __A)1150 _mm512_broadcast_i32x2 (__m128i __A)
1151 {
1152   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1153                                           0, 1, 0, 1, 0, 1, 0, 1,
1154                                           0, 1, 0, 1, 0, 1, 0, 1);
1155 }
1156 
1157 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i32x2(__m512i __O,__mmask16 __M,__m128i __A)1158 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1159 {
1160   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1161                                              (__v16si)_mm512_broadcast_i32x2(__A),
1162                                              (__v16si)__O);
1163 }
1164 
1165 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i32x2(__mmask16 __M,__m128i __A)1166 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1167 {
1168   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1169                                              (__v16si)_mm512_broadcast_i32x2(__A),
1170                                              (__v16si)_mm512_setzero_si512());
1171 }
1172 
1173 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i32x8(__m256i __A)1174 _mm512_broadcast_i32x8(__m256i __A)
1175 {
1176   return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1177                                           0, 1, 2, 3, 4, 5, 6, 7,
1178                                           0, 1, 2, 3, 4, 5, 6, 7);
1179 }
1180 
1181 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i32x8(__m512i __O,__mmask16 __M,__m256i __A)1182 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1183 {
1184   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1185                                            (__v16si)_mm512_broadcast_i32x8(__A),
1186                                            (__v16si)__O);
1187 }
1188 
1189 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i32x8(__mmask16 __M,__m256i __A)1190 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1191 {
1192   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1193                                            (__v16si)_mm512_broadcast_i32x8(__A),
1194                                            (__v16si)_mm512_setzero_si512());
1195 }
1196 
1197 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_broadcast_i64x2(__m128i __A)1198 _mm512_broadcast_i64x2(__m128i __A)
1199 {
1200   return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1201                                           0, 1, 0, 1, 0, 1, 0, 1);
1202 }
1203 
1204 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_broadcast_i64x2(__m512i __O,__mmask8 __M,__m128i __A)1205 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1206 {
1207   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1208                                             (__v8di)_mm512_broadcast_i64x2(__A),
1209                                             (__v8di)__O);
1210 }
1211 
1212 static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_broadcast_i64x2(__mmask8 __M,__m128i __A)1213 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1214 {
1215   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1216                                             (__v8di)_mm512_broadcast_i64x2(__A),
1217                                             (__v8di)_mm512_setzero_si512());
1218 }
1219 
1220 #define _mm512_extractf32x8_ps(A, imm) \
1221   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1222                                            (__v8sf)_mm256_undefined_ps(), \
1223                                            (__mmask8)-1)
1224 
1225 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1226   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1227                                            (__v8sf)(__m256)(W), \
1228                                            (__mmask8)(U))
1229 
1230 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1231   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1232                                            (__v8sf)_mm256_setzero_ps(), \
1233                                            (__mmask8)(U))
1234 
1235 #define _mm512_extractf64x2_pd(A, imm) \
1236   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1237                                                 (int)(imm), \
1238                                                 (__v2df)_mm_undefined_pd(), \
1239                                                 (__mmask8)-1)
1240 
1241 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1242   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1243                                                 (int)(imm), \
1244                                                 (__v2df)(__m128d)(W), \
1245                                                 (__mmask8)(U))
1246 
1247 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1248   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1249                                                 (int)(imm), \
1250                                                 (__v2df)_mm_setzero_pd(), \
1251                                                 (__mmask8)(U))
1252 
1253 #define _mm512_extracti32x8_epi32(A, imm) \
1254   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1255                                             (__v8si)_mm256_undefined_si256(), \
1256                                             (__mmask8)-1)
1257 
1258 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1259   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1260                                             (__v8si)(__m256i)(W), \
1261                                             (__mmask8)(U))
1262 
1263 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1264   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1265                                             (__v8si)_mm256_setzero_si256(), \
1266                                             (__mmask8)(U))
1267 
1268 #define _mm512_extracti64x2_epi64(A, imm) \
1269   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1270                                                 (int)(imm), \
1271                                                 (__v2di)_mm_undefined_si128(), \
1272                                                 (__mmask8)-1)
1273 
1274 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1275   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1276                                                 (int)(imm), \
1277                                                 (__v2di)(__m128i)(W), \
1278                                                 (__mmask8)(U))
1279 
1280 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1281   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1282                                                 (int)(imm), \
1283                                                 (__v2di)_mm_setzero_si128(), \
1284                                                 (__mmask8)(U))
1285 
1286 #define _mm512_insertf32x8(A, B, imm) \
1287   (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1288                                      (__v8sf)(__m256)(B), (int)(imm))
1289 
1290 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1291   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1292                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1293                                  (__v16sf)(__m512)(W))
1294 
1295 #define _mm512_maskz_insertf32x8(U, A, B, imm) \
1296   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1297                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1298                                  (__v16sf)_mm512_setzero_ps())
1299 
1300 #define _mm512_insertf64x2(A, B, imm) \
1301   (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1302                                           (__v2df)(__m128d)(B), (int)(imm))
1303 
1304 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1305   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1306                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1307                                   (__v8df)(__m512d)(W))
1308 
1309 #define _mm512_maskz_insertf64x2(U, A, B, imm) \
1310   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1311                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1312                                   (__v8df)_mm512_setzero_pd())
1313 
1314 #define _mm512_inserti32x8(A, B, imm) \
1315   (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1316                                       (__v8si)(__m256i)(B), (int)(imm))
1317 
1318 #define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1319   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1320                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1321                                  (__v16si)(__m512i)(W))
1322 
1323 #define _mm512_maskz_inserti32x8(U, A, B, imm) \
1324   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1325                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1326                                  (__v16si)_mm512_setzero_si512())
1327 
1328 #define _mm512_inserti64x2(A, B, imm) \
1329   (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1330                                           (__v2di)(__m128i)(B), (int)(imm))
1331 
1332 #define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1333   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1334                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1335                                   (__v8di)(__m512i)(W))
1336 
1337 #define _mm512_maskz_inserti64x2(U, A, B, imm) \
1338   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1339                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1340                                   (__v8di)_mm512_setzero_si512())
1341 
1342 #define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1343   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1344                                               (int)(imm), (__mmask16)(U))
1345 
1346 #define _mm512_fpclass_ps_mask(A, imm) \
1347   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1348                                               (int)(imm), (__mmask16)-1)
1349 
1350 #define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1351   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1352                                              (__mmask8)(U))
1353 
1354 #define _mm512_fpclass_pd_mask(A, imm) \
1355   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1356                                              (__mmask8)-1)
1357 
1358 #define _mm_fpclass_sd_mask(A, imm) \
1359   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1360                                           (__mmask8)-1)
1361 
1362 #define _mm_mask_fpclass_sd_mask(U, A, imm) \
1363   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1364                                           (__mmask8)(U))
1365 
1366 #define _mm_fpclass_ss_mask(A, imm) \
1367   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1368                                           (__mmask8)-1)
1369 
1370 #define _mm_mask_fpclass_ss_mask(U, A, imm) \
1371   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1372                                           (__mmask8)(U))
1373 
1374 #undef __DEFAULT_FN_ATTRS512
1375 #undef __DEFAULT_FN_ATTRS
1376 
1377 #endif
1378