1 /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to deal
5  * in the Software without restriction, including without limitation the rights
6  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7  * copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19  * THE SOFTWARE.
20  *
21  *===-----------------------------------------------------------------------===
22  */
23 
24 #ifndef __IMMINTRIN_H
25 #error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef __AVX512DQINTRIN_H
29 #define __AVX512DQINTRIN_H
30 
31 /* Define the default attributes for the functions in this file. */
32 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
33 
34 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mullo_epi64(__m512i __A,__m512i __B)35 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36   return (__m512i) ((__v8du) __A * (__v8du) __B);
37 }
38 
39 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_mullo_epi64(__m512i __W,__mmask8 __U,__m512i __A,__m512i __B)40 _mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
42                                              (__v8di)_mm512_mullo_epi64(__A, __B),
43                                              (__v8di)__W);
44 }
45 
46 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_mullo_epi64(__mmask8 __U,__m512i __A,__m512i __B)47 _mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
48   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49                                              (__v8di)_mm512_mullo_epi64(__A, __B),
50                                              (__v8di)_mm512_setzero_si512());
51 }
52 
53 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_xor_pd(__m512d __A,__m512d __B)54 _mm512_xor_pd(__m512d __A, __m512d __B) {
55   return (__m512d)((__v8du)__A ^ (__v8du)__B);
56 }
57 
58 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_xor_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)59 _mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
60   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
61                                               (__v8df)_mm512_xor_pd(__A, __B),
62                                               (__v8df)__W);
63 }
64 
65 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_xor_pd(__mmask8 __U,__m512d __A,__m512d __B)66 _mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
67   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
68                                               (__v8df)_mm512_xor_pd(__A, __B),
69                                               (__v8df)_mm512_setzero_pd());
70 }
71 
72 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_xor_ps(__m512 __A,__m512 __B)73 _mm512_xor_ps (__m512 __A, __m512 __B) {
74   return (__m512)((__v16su)__A ^ (__v16su)__B);
75 }
76 
77 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_xor_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)78 _mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
79   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
80                                              (__v16sf)_mm512_xor_ps(__A, __B),
81                                              (__v16sf)__W);
82 }
83 
84 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_xor_ps(__mmask16 __U,__m512 __A,__m512 __B)85 _mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
86   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
87                                              (__v16sf)_mm512_xor_ps(__A, __B),
88                                              (__v16sf)_mm512_setzero_ps());
89 }
90 
91 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_or_pd(__m512d __A,__m512d __B)92 _mm512_or_pd(__m512d __A, __m512d __B) {
93   return (__m512d)((__v8du)__A | (__v8du)__B);
94 }
95 
96 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_or_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)97 _mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
98   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
99                                               (__v8df)_mm512_or_pd(__A, __B),
100                                               (__v8df)__W);
101 }
102 
103 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_or_pd(__mmask8 __U,__m512d __A,__m512d __B)104 _mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
105   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
106                                               (__v8df)_mm512_or_pd(__A, __B),
107                                               (__v8df)_mm512_setzero_pd());
108 }
109 
110 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_or_ps(__m512 __A,__m512 __B)111 _mm512_or_ps(__m512 __A, __m512 __B) {
112   return (__m512)((__v16su)__A | (__v16su)__B);
113 }
114 
115 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_or_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)116 _mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
117   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
118                                              (__v16sf)_mm512_or_ps(__A, __B),
119                                              (__v16sf)__W);
120 }
121 
122 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_or_ps(__mmask16 __U,__m512 __A,__m512 __B)123 _mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
124   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
125                                              (__v16sf)_mm512_or_ps(__A, __B),
126                                              (__v16sf)_mm512_setzero_ps());
127 }
128 
129 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_and_pd(__m512d __A,__m512d __B)130 _mm512_and_pd(__m512d __A, __m512d __B) {
131   return (__m512d)((__v8du)__A & (__v8du)__B);
132 }
133 
134 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_and_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)135 _mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
136   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
137                                               (__v8df)_mm512_and_pd(__A, __B),
138                                               (__v8df)__W);
139 }
140 
141 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_and_pd(__mmask8 __U,__m512d __A,__m512d __B)142 _mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
143   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
144                                               (__v8df)_mm512_and_pd(__A, __B),
145                                               (__v8df)_mm512_setzero_pd());
146 }
147 
148 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_and_ps(__m512 __A,__m512 __B)149 _mm512_and_ps(__m512 __A, __m512 __B) {
150   return (__m512)((__v16su)__A & (__v16su)__B);
151 }
152 
153 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_and_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)154 _mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
155   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
156                                              (__v16sf)_mm512_and_ps(__A, __B),
157                                              (__v16sf)__W);
158 }
159 
160 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_and_ps(__mmask16 __U,__m512 __A,__m512 __B)161 _mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
162   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
163                                              (__v16sf)_mm512_and_ps(__A, __B),
164                                              (__v16sf)_mm512_setzero_ps());
165 }
166 
167 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_andnot_pd(__m512d __A,__m512d __B)168 _mm512_andnot_pd(__m512d __A, __m512d __B) {
169   return (__m512d)(~(__v8du)__A & (__v8du)__B);
170 }
171 
172 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_andnot_pd(__m512d __W,__mmask8 __U,__m512d __A,__m512d __B)173 _mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
174   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
175                                               (__v8df)_mm512_andnot_pd(__A, __B),
176                                               (__v8df)__W);
177 }
178 
179 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_pd(__mmask8 __U,__m512d __A,__m512d __B)180 _mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
181   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
182                                               (__v8df)_mm512_andnot_pd(__A, __B),
183                                               (__v8df)_mm512_setzero_pd());
184 }
185 
186 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_andnot_ps(__m512 __A,__m512 __B)187 _mm512_andnot_ps(__m512 __A, __m512 __B) {
188   return (__m512)(~(__v16su)__A & (__v16su)__B);
189 }
190 
191 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_andnot_ps(__m512 __W,__mmask16 __U,__m512 __A,__m512 __B)192 _mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
193   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
194                                              (__v16sf)_mm512_andnot_ps(__A, __B),
195                                              (__v16sf)__W);
196 }
197 
198 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_andnot_ps(__mmask16 __U,__m512 __A,__m512 __B)199 _mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
200   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
201                                              (__v16sf)_mm512_andnot_ps(__A, __B),
202                                              (__v16sf)_mm512_setzero_ps());
203 }
204 
205 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epi64(__m512d __A)206 _mm512_cvtpd_epi64 (__m512d __A) {
207   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
208                 (__v8di) _mm512_setzero_si512(),
209                 (__mmask8) -1,
210                 _MM_FROUND_CUR_DIRECTION);
211 }
212 
213 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)214 _mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
215   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
216                 (__v8di) __W,
217                 (__mmask8) __U,
218                 _MM_FROUND_CUR_DIRECTION);
219 }
220 
221 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epi64(__mmask8 __U,__m512d __A)222 _mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
223   return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
224                 (__v8di) _mm512_setzero_si512(),
225                 (__mmask8) __U,
226                 _MM_FROUND_CUR_DIRECTION);
227 }
228 
229 #define _mm512_cvt_roundpd_epi64(A, R) \
230   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
231                                            (__v8di)_mm512_setzero_si512(), \
232                                            (__mmask8)-1, (int)(R))
233 
234 #define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
235   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
236                                            (__v8di)(__m512i)(W), \
237                                            (__mmask8)(U), (int)(R))
238 
239 #define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
240   (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
241                                            (__v8di)_mm512_setzero_si512(), \
242                                            (__mmask8)(U), (int)(R))
243 
244 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtpd_epu64(__m512d __A)245 _mm512_cvtpd_epu64 (__m512d __A) {
246   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
247                  (__v8di) _mm512_setzero_si512(),
248                  (__mmask8) -1,
249                  _MM_FROUND_CUR_DIRECTION);
250 }
251 
252 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)253 _mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
254   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
255                  (__v8di) __W,
256                  (__mmask8) __U,
257                  _MM_FROUND_CUR_DIRECTION);
258 }
259 
260 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtpd_epu64(__mmask8 __U,__m512d __A)261 _mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
262   return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
263                  (__v8di) _mm512_setzero_si512(),
264                  (__mmask8) __U,
265                  _MM_FROUND_CUR_DIRECTION);
266 }
267 
268 #define _mm512_cvt_roundpd_epu64(A, R) \
269   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
270                                             (__v8di)_mm512_setzero_si512(), \
271                                             (__mmask8)-1, (int)(R))
272 
273 #define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
274   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
275                                             (__v8di)(__m512i)(W), \
276                                             (__mmask8)(U), (int)(R))
277 
278 #define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
279   (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
280                                             (__v8di)_mm512_setzero_si512(), \
281                                             (__mmask8)(U), (int)(R))
282 
283 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epi64(__m256 __A)284 _mm512_cvtps_epi64 (__m256 __A) {
285   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
286                 (__v8di) _mm512_setzero_si512(),
287                 (__mmask8) -1,
288                 _MM_FROUND_CUR_DIRECTION);
289 }
290 
291 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epi64(__m512i __W,__mmask8 __U,__m256 __A)292 _mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
293   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
294                 (__v8di) __W,
295                 (__mmask8) __U,
296                 _MM_FROUND_CUR_DIRECTION);
297 }
298 
299 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epi64(__mmask8 __U,__m256 __A)300 _mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
301   return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
302                 (__v8di) _mm512_setzero_si512(),
303                 (__mmask8) __U,
304                 _MM_FROUND_CUR_DIRECTION);
305 }
306 
307 #define _mm512_cvt_roundps_epi64(A, R) \
308   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
309                                            (__v8di)_mm512_setzero_si512(), \
310                                            (__mmask8)-1, (int)(R))
311 
312 #define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
313   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
314                                            (__v8di)(__m512i)(W), \
315                                            (__mmask8)(U), (int)(R))
316 
317 #define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
318   (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
319                                            (__v8di)_mm512_setzero_si512(), \
320                                            (__mmask8)(U), (int)(R))
321 
322 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvtps_epu64(__m256 __A)323 _mm512_cvtps_epu64 (__m256 __A) {
324   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
325                  (__v8di) _mm512_setzero_si512(),
326                  (__mmask8) -1,
327                  _MM_FROUND_CUR_DIRECTION);
328 }
329 
330 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvtps_epu64(__m512i __W,__mmask8 __U,__m256 __A)331 _mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
332   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
333                  (__v8di) __W,
334                  (__mmask8) __U,
335                  _MM_FROUND_CUR_DIRECTION);
336 }
337 
338 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvtps_epu64(__mmask8 __U,__m256 __A)339 _mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
340   return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
341                  (__v8di) _mm512_setzero_si512(),
342                  (__mmask8) __U,
343                  _MM_FROUND_CUR_DIRECTION);
344 }
345 
346 #define _mm512_cvt_roundps_epu64(A, R) \
347   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
348                                             (__v8di)_mm512_setzero_si512(), \
349                                             (__mmask8)-1, (int)(R))
350 
351 #define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
352   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
353                                             (__v8di)(__m512i)(W), \
354                                             (__mmask8)(U), (int)(R))
355 
356 #define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
357   (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
358                                             (__v8di)_mm512_setzero_si512(), \
359                                             (__mmask8)(U), (int)(R))
360 
361 
362 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepi64_pd(__m512i __A)363 _mm512_cvtepi64_pd (__m512i __A) {
364   return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
365 }
366 
367 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_pd(__m512d __W,__mmask8 __U,__m512i __A)368 _mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
369   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
370                                               (__v8df)_mm512_cvtepi64_pd(__A),
371                                               (__v8df)__W);
372 }
373 
374 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_pd(__mmask8 __U,__m512i __A)375 _mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
376   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
377                                               (__v8df)_mm512_cvtepi64_pd(__A),
378                                               (__v8df)_mm512_setzero_pd());
379 }
380 
381 #define _mm512_cvt_roundepi64_pd(A, R) \
382   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
383                                            (__v8df)_mm512_setzero_pd(), \
384                                            (__mmask8)-1, (int)(R))
385 
386 #define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
387   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
388                                            (__v8df)(__m512d)(W), \
389                                            (__mmask8)(U), (int)(R))
390 
391 #define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
392   (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
393                                            (__v8df)_mm512_setzero_pd(), \
394                                            (__mmask8)(U), (int)(R))
395 
396 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepi64_ps(__m512i __A)397 _mm512_cvtepi64_ps (__m512i __A) {
398   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
399                (__v8sf) _mm256_setzero_ps(),
400                (__mmask8) -1,
401                _MM_FROUND_CUR_DIRECTION);
402 }
403 
404 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepi64_ps(__m256 __W,__mmask8 __U,__m512i __A)405 _mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
406   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
407                (__v8sf) __W,
408                (__mmask8) __U,
409                _MM_FROUND_CUR_DIRECTION);
410 }
411 
412 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepi64_ps(__mmask8 __U,__m512i __A)413 _mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
414   return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
415                (__v8sf) _mm256_setzero_ps(),
416                (__mmask8) __U,
417                _MM_FROUND_CUR_DIRECTION);
418 }
419 
420 #define _mm512_cvt_roundepi64_ps(A, R) \
421   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
422                                           (__v8sf)_mm256_setzero_ps(), \
423                                           (__mmask8)-1, (int)(R))
424 
425 #define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
426   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
427                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
428                                           (int)(R))
429 
430 #define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
431   (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
432                                           (__v8sf)_mm256_setzero_ps(), \
433                                           (__mmask8)(U), (int)(R))
434 
435 
436 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epi64(__m512d __A)437 _mm512_cvttpd_epi64 (__m512d __A) {
438   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
439                  (__v8di) _mm512_setzero_si512(),
440                  (__mmask8) -1,
441                  _MM_FROUND_CUR_DIRECTION);
442 }
443 
444 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epi64(__m512i __W,__mmask8 __U,__m512d __A)445 _mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
446   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
447                  (__v8di) __W,
448                  (__mmask8) __U,
449                  _MM_FROUND_CUR_DIRECTION);
450 }
451 
452 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epi64(__mmask8 __U,__m512d __A)453 _mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
454   return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
455                  (__v8di) _mm512_setzero_si512(),
456                  (__mmask8) __U,
457                  _MM_FROUND_CUR_DIRECTION);
458 }
459 
460 #define _mm512_cvtt_roundpd_epi64(A, R) \
461   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
462                                             (__v8di)_mm512_setzero_si512(), \
463                                             (__mmask8)-1, (int)(R))
464 
465 #define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
466   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
467                                             (__v8di)(__m512i)(W), \
468                                             (__mmask8)(U), (int)(R))
469 
470 #define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
471   (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
472                                             (__v8di)_mm512_setzero_si512(), \
473                                             (__mmask8)(U), (int)(R))
474 
475 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttpd_epu64(__m512d __A)476 _mm512_cvttpd_epu64 (__m512d __A) {
477   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
478                   (__v8di) _mm512_setzero_si512(),
479                   (__mmask8) -1,
480                   _MM_FROUND_CUR_DIRECTION);
481 }
482 
483 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttpd_epu64(__m512i __W,__mmask8 __U,__m512d __A)484 _mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
485   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
486                   (__v8di) __W,
487                   (__mmask8) __U,
488                   _MM_FROUND_CUR_DIRECTION);
489 }
490 
491 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttpd_epu64(__mmask8 __U,__m512d __A)492 _mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
493   return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
494                   (__v8di) _mm512_setzero_si512(),
495                   (__mmask8) __U,
496                   _MM_FROUND_CUR_DIRECTION);
497 }
498 
499 #define _mm512_cvtt_roundpd_epu64(A, R) \
500   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
501                                              (__v8di)_mm512_setzero_si512(), \
502                                              (__mmask8)-1, (int)(R))
503 
504 #define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
505   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
506                                              (__v8di)(__m512i)(W), \
507                                              (__mmask8)(U), (int)(R))
508 
509 #define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
510   (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
511                                              (__v8di)_mm512_setzero_si512(), \
512                                              (__mmask8)(U), (int)(R))
513 
514 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epi64(__m256 __A)515 _mm512_cvttps_epi64 (__m256 __A) {
516   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
517                  (__v8di) _mm512_setzero_si512(),
518                  (__mmask8) -1,
519                  _MM_FROUND_CUR_DIRECTION);
520 }
521 
522 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epi64(__m512i __W,__mmask8 __U,__m256 __A)523 _mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
524   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
525                  (__v8di) __W,
526                  (__mmask8) __U,
527                  _MM_FROUND_CUR_DIRECTION);
528 }
529 
530 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epi64(__mmask8 __U,__m256 __A)531 _mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
532   return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
533                  (__v8di) _mm512_setzero_si512(),
534                  (__mmask8) __U,
535                  _MM_FROUND_CUR_DIRECTION);
536 }
537 
538 #define _mm512_cvtt_roundps_epi64(A, R) \
539   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
540                                             (__v8di)_mm512_setzero_si512(), \
541                                             (__mmask8)-1, (int)(R))
542 
543 #define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
544   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
545                                             (__v8di)(__m512i)(W), \
546                                             (__mmask8)(U), (int)(R))
547 
548 #define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
549   (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
550                                             (__v8di)_mm512_setzero_si512(), \
551                                             (__mmask8)(U), (int)(R))
552 
553 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_cvttps_epu64(__m256 __A)554 _mm512_cvttps_epu64 (__m256 __A) {
555   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
556                   (__v8di) _mm512_setzero_si512(),
557                   (__mmask8) -1,
558                   _MM_FROUND_CUR_DIRECTION);
559 }
560 
561 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_cvttps_epu64(__m512i __W,__mmask8 __U,__m256 __A)562 _mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
563   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
564                   (__v8di) __W,
565                   (__mmask8) __U,
566                   _MM_FROUND_CUR_DIRECTION);
567 }
568 
569 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_cvttps_epu64(__mmask8 __U,__m256 __A)570 _mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
571   return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
572                   (__v8di) _mm512_setzero_si512(),
573                   (__mmask8) __U,
574                   _MM_FROUND_CUR_DIRECTION);
575 }
576 
577 #define _mm512_cvtt_roundps_epu64(A, R) \
578   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
579                                              (__v8di)_mm512_setzero_si512(), \
580                                              (__mmask8)-1, (int)(R))
581 
582 #define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
583   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
584                                              (__v8di)(__m512i)(W), \
585                                              (__mmask8)(U), (int)(R))
586 
587 #define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
588   (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
589                                              (__v8di)_mm512_setzero_si512(), \
590                                              (__mmask8)(U), (int)(R))
591 
592 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_cvtepu64_pd(__m512i __A)593 _mm512_cvtepu64_pd (__m512i __A) {
594   return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
595 }
596 
597 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_pd(__m512d __W,__mmask8 __U,__m512i __A)598 _mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
599   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
600                                               (__v8df)_mm512_cvtepu64_pd(__A),
601                                               (__v8df)__W);
602 }
603 
604 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_pd(__mmask8 __U,__m512i __A)605 _mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
606   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
607                                               (__v8df)_mm512_cvtepu64_pd(__A),
608                                               (__v8df)_mm512_setzero_pd());
609 }
610 
611 #define _mm512_cvt_roundepu64_pd(A, R) \
612   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
613                                             (__v8df)_mm512_setzero_pd(), \
614                                             (__mmask8)-1, (int)(R))
615 
616 #define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
617   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
618                                             (__v8df)(__m512d)(W), \
619                                             (__mmask8)(U), (int)(R))
620 
621 
622 #define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
623   (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
624                                             (__v8df)_mm512_setzero_pd(), \
625                                             (__mmask8)(U), (int)(R))
626 
627 
628 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_cvtepu64_ps(__m512i __A)629 _mm512_cvtepu64_ps (__m512i __A) {
630   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
631                 (__v8sf) _mm256_setzero_ps(),
632                 (__mmask8) -1,
633                 _MM_FROUND_CUR_DIRECTION);
634 }
635 
636 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_mask_cvtepu64_ps(__m256 __W,__mmask8 __U,__m512i __A)637 _mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
638   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
639                 (__v8sf) __W,
640                 (__mmask8) __U,
641                 _MM_FROUND_CUR_DIRECTION);
642 }
643 
644 static __inline__ __m256 __DEFAULT_FN_ATTRS
_mm512_maskz_cvtepu64_ps(__mmask8 __U,__m512i __A)645 _mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
646   return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
647                 (__v8sf) _mm256_setzero_ps(),
648                 (__mmask8) __U,
649                 _MM_FROUND_CUR_DIRECTION);
650 }
651 
652 #define _mm512_cvt_roundepu64_ps(A, R) \
653   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
654                                            (__v8sf)_mm256_setzero_ps(), \
655                                            (__mmask8)-1, (int)(R))
656 
657 #define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
658   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
659                                            (__v8sf)(__m256)(W), (__mmask8)(U), \
660                                            (int)(R))
661 
662 #define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
663   (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
664                                            (__v8sf)_mm256_setzero_ps(), \
665                                            (__mmask8)(U), (int)(R))
666 
667 #define _mm512_range_pd(A, B, C) \
668   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
669                                           (__v8df)(__m512d)(B), (int)(C), \
670                                           (__v8df)_mm512_setzero_pd(), \
671                                           (__mmask8)-1, \
672                                           _MM_FROUND_CUR_DIRECTION)
673 
674 #define _mm512_mask_range_pd(W, U, A, B, C) \
675   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
676                                           (__v8df)(__m512d)(B), (int)(C), \
677                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
678                                           _MM_FROUND_CUR_DIRECTION)
679 
680 #define _mm512_maskz_range_pd(U, A, B, C) \
681   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
682                                           (__v8df)(__m512d)(B), (int)(C), \
683                                           (__v8df)_mm512_setzero_pd(), \
684                                           (__mmask8)(U), \
685                                           _MM_FROUND_CUR_DIRECTION)
686 
687 #define _mm512_range_round_pd(A, B, C, R) \
688   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
689                                           (__v8df)(__m512d)(B), (int)(C), \
690                                           (__v8df)_mm512_setzero_pd(), \
691                                           (__mmask8)-1, (int)(R))
692 
693 #define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
694   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
695                                           (__v8df)(__m512d)(B), (int)(C), \
696                                           (__v8df)(__m512d)(W), (__mmask8)(U), \
697                                           (int)(R))
698 
699 #define _mm512_maskz_range_round_pd(U, A, B, C, R) \
700   (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
701                                           (__v8df)(__m512d)(B), (int)(C), \
702                                           (__v8df)_mm512_setzero_pd(), \
703                                           (__mmask8)(U), (int)(R))
704 
705 #define _mm512_range_ps(A, B, C) \
706   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
707                                          (__v16sf)(__m512)(B), (int)(C), \
708                                          (__v16sf)_mm512_setzero_ps(), \
709                                          (__mmask16)-1, \
710                                          _MM_FROUND_CUR_DIRECTION)
711 
712 #define _mm512_mask_range_ps(W, U, A, B, C) \
713   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
714                                          (__v16sf)(__m512)(B), (int)(C), \
715                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
716                                          _MM_FROUND_CUR_DIRECTION)
717 
718 #define _mm512_maskz_range_ps(U, A, B, C) \
719   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
720                                          (__v16sf)(__m512)(B), (int)(C), \
721                                          (__v16sf)_mm512_setzero_ps(), \
722                                          (__mmask16)(U), \
723                                          _MM_FROUND_CUR_DIRECTION)
724 
725 #define _mm512_range_round_ps(A, B, C, R) \
726   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
727                                          (__v16sf)(__m512)(B), (int)(C), \
728                                          (__v16sf)_mm512_setzero_ps(), \
729                                          (__mmask16)-1, (int)(R))
730 
731 #define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
732   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
733                                          (__v16sf)(__m512)(B), (int)(C), \
734                                          (__v16sf)(__m512)(W), (__mmask16)(U), \
735                                          (int)(R))
736 
737 #define _mm512_maskz_range_round_ps(U, A, B, C, R) \
738   (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
739                                          (__v16sf)(__m512)(B), (int)(C), \
740                                          (__v16sf)_mm512_setzero_ps(), \
741                                          (__mmask16)(U), (int)(R))
742 
743 #define _mm_range_round_ss(A, B, C, R) \
744   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
745                                                (__v4sf)(__m128)(B), \
746                                                (__v4sf)_mm_setzero_ps(), \
747                                                (__mmask8) -1, (int)(C),\
748                                                (int)(R))
749 
750 #define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
751 
752 #define _mm_mask_range_round_ss(W, U, A, B, C, R) \
753   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
754                                                (__v4sf)(__m128)(B), \
755                                                (__v4sf)(__m128)(W),\
756                                                (__mmask8)(U), (int)(C),\
757                                                (int)(R))
758 
759 #define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
760 
761 #define _mm_maskz_range_round_ss(U, A, B, C, R) \
762   (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
763                                                (__v4sf)(__m128)(B), \
764                                                (__v4sf)_mm_setzero_ps(), \
765                                                (__mmask8)(U), (int)(C),\
766                                                (int)(R))
767 
768 #define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
769 
770 #define _mm_range_round_sd(A, B, C, R) \
771   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
772                                                 (__v2df)(__m128d)(B), \
773                                                 (__v2df)_mm_setzero_pd(), \
774                                                 (__mmask8) -1, (int)(C),\
775                                                 (int)(R))
776 
777 #define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
778 
779 #define _mm_mask_range_round_sd(W, U, A, B, C, R) \
780   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
781                                                 (__v2df)(__m128d)(B), \
782                                                 (__v2df)(__m128d)(W),\
783                                                 (__mmask8)(U), (int)(C),\
784                                                 (int)(R))
785 
786 #define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
787 
788 #define _mm_maskz_range_round_sd(U, A, B, C, R) \
789   (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
790                                                 (__v2df)(__m128d)(B), \
791                                                 (__v2df)_mm_setzero_pd(), \
792                                                 (__mmask8)(U), (int)(C),\
793                                                 (int)(R))
794 
795 #define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
796 
797 #define _mm512_reduce_pd(A, B) \
798   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
799                                            (__v8df)_mm512_setzero_pd(), \
800                                            (__mmask8)-1, \
801                                            _MM_FROUND_CUR_DIRECTION)
802 
803 #define _mm512_mask_reduce_pd(W, U, A, B) \
804   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
805                                            (__v8df)(__m512d)(W), \
806                                            (__mmask8)(U), \
807                                            _MM_FROUND_CUR_DIRECTION)
808 
809 #define _mm512_maskz_reduce_pd(U, A, B) \
810   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
811                                            (__v8df)_mm512_setzero_pd(), \
812                                            (__mmask8)(U), \
813                                            _MM_FROUND_CUR_DIRECTION)
814 
815 #define _mm512_reduce_ps(A, B) \
816   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
817                                           (__v16sf)_mm512_setzero_ps(), \
818                                           (__mmask16)-1, \
819                                           _MM_FROUND_CUR_DIRECTION)
820 
821 #define _mm512_mask_reduce_ps(W, U, A, B) \
822   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
823                                           (__v16sf)(__m512)(W), \
824                                           (__mmask16)(U), \
825                                           _MM_FROUND_CUR_DIRECTION)
826 
827 #define _mm512_maskz_reduce_ps(U, A, B) \
828   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
829                                           (__v16sf)_mm512_setzero_ps(), \
830                                           (__mmask16)(U), \
831                                           _MM_FROUND_CUR_DIRECTION)
832 
833 #define _mm512_reduce_round_pd(A, B, R) \
834   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
835                                            (__v8df)_mm512_setzero_pd(), \
836                                            (__mmask8)-1, (int)(R))
837 
838 #define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
839   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
840                                            (__v8df)(__m512d)(W), \
841                                            (__mmask8)(U), (int)(R))
842 
843 #define _mm512_maskz_reduce_round_pd(U, A, B, R) \
844   (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
845                                            (__v8df)_mm512_setzero_pd(), \
846                                            (__mmask8)(U), (int)(R))
847 
848 #define _mm512_reduce_round_ps(A, B, R) \
849   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
850                                           (__v16sf)_mm512_setzero_ps(), \
851                                           (__mmask16)-1, (int)(R))
852 
853 #define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
854   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
855                                           (__v16sf)(__m512)(W), \
856                                           (__mmask16)(U), (int)(R))
857 
858 #define _mm512_maskz_reduce_round_ps(U, A, B, R) \
859   (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
860                                           (__v16sf)_mm512_setzero_ps(), \
861                                           (__mmask16)(U), (int)(R))
862 
863 #define _mm_reduce_ss(A, B, C) \
864   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
865                                        (__v4sf)(__m128)(B), \
866                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
867                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
868 
869 #define _mm_mask_reduce_ss(W, U, A, B, C) \
870   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
871                                        (__v4sf)(__m128)(B), \
872                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
873                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
874 
875 #define _mm_maskz_reduce_ss(U, A, B, C) \
876   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
877                                        (__v4sf)(__m128)(B), \
878                                        (__v4sf)_mm_setzero_ps(), \
879                                        (__mmask8)(U), (int)(C), \
880                                        _MM_FROUND_CUR_DIRECTION)
881 
882 #define _mm_reduce_round_ss(A, B, C, R) \
883   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
884                                        (__v4sf)(__m128)(B), \
885                                        (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
886                                        (int)(C), (int)(R))
887 
888 #define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
889   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
890                                        (__v4sf)(__m128)(B), \
891                                        (__v4sf)(__m128)(W), (__mmask8)(U), \
892                                        (int)(C), (int)(R))
893 
894 #define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
895   (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
896                                        (__v4sf)(__m128)(B), \
897                                        (__v4sf)_mm_setzero_ps(), \
898                                        (__mmask8)(U), (int)(C), (int)(R))
899 
900 #define _mm_reduce_sd(A, B, C) \
901   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
902                                         (__v2df)(__m128d)(B), \
903                                         (__v2df)_mm_setzero_pd(), \
904                                         (__mmask8)-1, (int)(C), \
905                                         _MM_FROUND_CUR_DIRECTION)
906 
907 #define _mm_mask_reduce_sd(W, U, A, B, C) \
908   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
909                                         (__v2df)(__m128d)(B), \
910                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
911                                         (int)(C), _MM_FROUND_CUR_DIRECTION)
912 
913 #define _mm_maskz_reduce_sd(U, A, B, C) \
914   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
915                                         (__v2df)(__m128d)(B), \
916                                         (__v2df)_mm_setzero_pd(), \
917                                         (__mmask8)(U), (int)(C), \
918                                         _MM_FROUND_CUR_DIRECTION)
919 
920 #define _mm_reduce_round_sd(A, B, C, R) \
921   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
922                                         (__v2df)(__m128d)(B), \
923                                         (__v2df)_mm_setzero_pd(), \
924                                         (__mmask8)-1, (int)(C), (int)(R))
925 
926 #define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
927   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
928                                         (__v2df)(__m128d)(B), \
929                                         (__v2df)(__m128d)(W), (__mmask8)(U), \
930                                         (int)(C), (int)(R))
931 
932 #define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
933   (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
934                                         (__v2df)(__m128d)(B), \
935                                         (__v2df)_mm_setzero_pd(), \
936                                         (__mmask8)(U), (int)(C), (int)(R))
937 
938 static __inline__ __mmask16 __DEFAULT_FN_ATTRS
_mm512_movepi32_mask(__m512i __A)939 _mm512_movepi32_mask (__m512i __A)
940 {
941   return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
942 }
943 
944 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi32(__mmask16 __A)945 _mm512_movm_epi32 (__mmask16 __A)
946 {
947   return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
948 }
949 
950 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_movm_epi64(__mmask8 __A)951 _mm512_movm_epi64 (__mmask8 __A)
952 {
953   return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
954 }
955 
956 static __inline__ __mmask8 __DEFAULT_FN_ATTRS
_mm512_movepi64_mask(__m512i __A)957 _mm512_movepi64_mask (__m512i __A)
958 {
959   return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
960 }
961 
962 
963 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x2(__m128 __A)964 _mm512_broadcast_f32x2 (__m128 __A)
965 {
966   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
967                                          0, 1, 0, 1, 0, 1, 0, 1,
968                                          0, 1, 0, 1, 0, 1, 0, 1);
969 }
970 
971 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x2(__m512 __O,__mmask16 __M,__m128 __A)972 _mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
973 {
974   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
975                                              (__v16sf)_mm512_broadcast_f32x2(__A),
976                                              (__v16sf)__O);
977 }
978 
979 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x2(__mmask16 __M,__m128 __A)980 _mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
981 {
982   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
983                                              (__v16sf)_mm512_broadcast_f32x2(__A),
984                                              (__v16sf)_mm512_setzero_ps());
985 }
986 
987 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_broadcast_f32x8(__m256 __A)988 _mm512_broadcast_f32x8(__m256 __A)
989 {
990   return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
991                                          0, 1, 2, 3, 4, 5, 6, 7,
992                                          0, 1, 2, 3, 4, 5, 6, 7);
993 }
994 
995 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f32x8(__m512 __O,__mmask16 __M,__m256 __A)996 _mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
997 {
998   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
999                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1000                                            (__v16sf)__O);
1001 }
1002 
1003 static __inline__ __m512 __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f32x8(__mmask16 __M,__m256 __A)1004 _mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1005 {
1006   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1007                                            (__v16sf)_mm512_broadcast_f32x8(__A),
1008                                            (__v16sf)_mm512_setzero_ps());
1009 }
1010 
1011 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_broadcast_f64x2(__m128d __A)1012 _mm512_broadcast_f64x2(__m128d __A)
1013 {
1014   return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1015                                           0, 1, 0, 1, 0, 1, 0, 1);
1016 }
1017 
1018 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_f64x2(__m512d __O,__mmask8 __M,__m128d __A)1019 _mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1020 {
1021   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1022                                             (__v8df)_mm512_broadcast_f64x2(__A),
1023                                             (__v8df)__O);
1024 }
1025 
1026 static __inline__ __m512d __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_f64x2(__mmask8 __M,__m128d __A)1027 _mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1028 {
1029   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1030                                             (__v8df)_mm512_broadcast_f64x2(__A),
1031                                             (__v8df)_mm512_setzero_pd());
1032 }
1033 
1034 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x2(__m128i __A)1035 _mm512_broadcast_i32x2 (__m128i __A)
1036 {
1037   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1038                                           0, 1, 0, 1, 0, 1, 0, 1,
1039                                           0, 1, 0, 1, 0, 1, 0, 1);
1040 }
1041 
1042 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x2(__m512i __O,__mmask16 __M,__m128i __A)1043 _mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1044 {
1045   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1046                                              (__v16si)_mm512_broadcast_i32x2(__A),
1047                                              (__v16si)__O);
1048 }
1049 
1050 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x2(__mmask16 __M,__m128i __A)1051 _mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1052 {
1053   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1054                                              (__v16si)_mm512_broadcast_i32x2(__A),
1055                                              (__v16si)_mm512_setzero_si512());
1056 }
1057 
1058 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i32x8(__m256i __A)1059 _mm512_broadcast_i32x8(__m256i __A)
1060 {
1061   return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1062                                           0, 1, 2, 3, 4, 5, 6, 7,
1063                                           0, 1, 2, 3, 4, 5, 6, 7);
1064 }
1065 
1066 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i32x8(__m512i __O,__mmask16 __M,__m256i __A)1067 _mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1068 {
1069   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1070                                            (__v16si)_mm512_broadcast_i32x8(__A),
1071                                            (__v16si)__O);
1072 }
1073 
1074 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i32x8(__mmask16 __M,__m256i __A)1075 _mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1076 {
1077   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1078                                            (__v16si)_mm512_broadcast_i32x8(__A),
1079                                            (__v16si)_mm512_setzero_si512());
1080 }
1081 
1082 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_broadcast_i64x2(__m128i __A)1083 _mm512_broadcast_i64x2(__m128i __A)
1084 {
1085   return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1086                                           0, 1, 0, 1, 0, 1, 0, 1);
1087 }
1088 
1089 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_broadcast_i64x2(__m512i __O,__mmask8 __M,__m128i __A)1090 _mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1091 {
1092   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1093                                             (__v8di)_mm512_broadcast_i64x2(__A),
1094                                             (__v8di)__O);
1095 }
1096 
1097 static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_broadcast_i64x2(__mmask8 __M,__m128i __A)1098 _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1099 {
1100   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1101                                             (__v8di)_mm512_broadcast_i64x2(__A),
1102                                             (__v8di)_mm512_setzero_si512());
1103 }
1104 
1105 #define _mm512_extractf32x8_ps(A, imm) \
1106   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1107                                            (__v8sf)_mm256_undefined_ps(), \
1108                                            (__mmask8)-1)
1109 
1110 #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1111   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1112                                            (__v8sf)(__m256)(W), \
1113                                            (__mmask8)(U))
1114 
1115 #define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1116   (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1117                                            (__v8sf)_mm256_setzero_ps(), \
1118                                            (__mmask8)(U))
1119 
1120 #define _mm512_extractf64x2_pd(A, imm) \
1121   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1122                                                 (int)(imm), \
1123                                                 (__v2df)_mm_undefined_pd(), \
1124                                                 (__mmask8)-1)
1125 
1126 #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1127   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1128                                                 (int)(imm), \
1129                                                 (__v2df)(__m128d)(W), \
1130                                                 (__mmask8)(U))
1131 
1132 #define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1133   (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1134                                                 (int)(imm), \
1135                                                 (__v2df)_mm_setzero_pd(), \
1136                                                 (__mmask8)(U))
1137 
1138 #define _mm512_extracti32x8_epi32(A, imm) \
1139   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1140                                             (__v8si)_mm256_undefined_si256(), \
1141                                             (__mmask8)-1)
1142 
1143 #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1144   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1145                                             (__v8si)(__m256i)(W), \
1146                                             (__mmask8)(U))
1147 
1148 #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1149   (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1150                                             (__v8si)_mm256_setzero_si256(), \
1151                                             (__mmask8)(U))
1152 
1153 #define _mm512_extracti64x2_epi64(A, imm) \
1154   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1155                                                 (int)(imm), \
1156                                                 (__v2di)_mm_undefined_si128(), \
1157                                                 (__mmask8)-1)
1158 
1159 #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1160   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1161                                                 (int)(imm), \
1162                                                 (__v2di)(__m128i)(W), \
1163                                                 (__mmask8)(U))
1164 
1165 #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1166   (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1167                                                 (int)(imm), \
1168                                                 (__v2di)_mm_setzero_si128(), \
1169                                                 (__mmask8)(U))
1170 
1171 #define _mm512_insertf32x8(A, B, imm) \
1172   (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1173                                      (__v8sf)(__m256)(B), (int)(imm))
1174 
1175 #define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1176   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1177                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1178                                  (__v16sf)(__m512)(W))
1179 
1180 #define _mm512_maskz_insertf32x8(U, A, B, imm) \
1181   (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1182                                  (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1183                                  (__v16sf)_mm512_setzero_ps())
1184 
1185 #define _mm512_insertf64x2(A, B, imm) \
1186   (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1187                                           (__v2df)(__m128d)(B), (int)(imm))
1188 
1189 #define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1190   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1191                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1192                                   (__v8df)(__m512d)(W))
1193 
1194 #define _mm512_maskz_insertf64x2(U, A, B, imm) \
1195   (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1196                                   (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1197                                   (__v8df)_mm512_setzero_pd())
1198 
1199 #define _mm512_inserti32x8(A, B, imm) \
1200   (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1201                                       (__v8si)(__m256i)(B), (int)(imm))
1202 
1203 #define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1204   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1205                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1206                                  (__v16si)(__m512i)(W))
1207 
1208 #define _mm512_maskz_inserti32x8(U, A, B, imm) \
1209   (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1210                                  (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1211                                  (__v16si)_mm512_setzero_si512())
1212 
1213 #define _mm512_inserti64x2(A, B, imm) \
1214   (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1215                                           (__v2di)(__m128i)(B), (int)(imm))
1216 
1217 #define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1218   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1219                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1220                                   (__v8di)(__m512i)(W))
1221 
1222 #define _mm512_maskz_inserti64x2(U, A, B, imm) \
1223   (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1224                                   (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1225                                   (__v8di)_mm512_setzero_si512())
1226 
1227 #define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1228   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1229                                               (int)(imm), (__mmask16)(U))
1230 
1231 #define _mm512_fpclass_ps_mask(A, imm) \
1232   (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1233                                               (int)(imm), (__mmask16)-1)
1234 
1235 #define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1236   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1237                                              (__mmask8)(U))
1238 
1239 #define _mm512_fpclass_pd_mask(A, imm) \
1240   (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1241                                              (__mmask8)-1)
1242 
1243 #define _mm_fpclass_sd_mask(A, imm) \
1244   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1245                                           (__mmask8)-1)
1246 
1247 #define _mm_mask_fpclass_sd_mask(U, A, imm) \
1248   (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1249                                           (__mmask8)(U))
1250 
1251 #define _mm_fpclass_ss_mask(A, imm) \
1252   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1253                                           (__mmask8)-1)
1254 
1255 #define _mm_mask_fpclass_ss_mask(U, A, imm) \
1256   (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1257                                           (__mmask8)(U))
1258 
1259 #undef __DEFAULT_FN_ATTRS
1260 
1261 #endif
1262