1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2
3 This file is part of GCC.
4
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
9
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
18
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
23
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26 #endif
27
28 #ifndef _AVX512ERINTRIN_H_INCLUDED
29 #define _AVX512ERINTRIN_H_INCLUDED
30
31 #ifndef __AVX512ER__
32 #pragma GCC push_options
33 #pragma GCC target("avx512er")
34 #define __DISABLE_AVX512ER__
35 #endif /* __AVX512ER__ */
36
37 /* Internal data types for implementing the intrinsics. */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40
41 /* The Intel API is flexible enough that we must allow aliasing with other
42 vector types, and their scalar components. */
43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45
46 typedef unsigned char __mmask8;
47 typedef unsigned short __mmask16;
48
49 #ifdef __OPTIMIZE__
50 extern __inline __m512d
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_pd(__m512d __A,int __R)52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
53 {
54 __m512d __W;
55 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
56 (__v8df) __W,
57 (__mmask8) -1, __R);
58 }
59
60 extern __inline __m512d
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_exp2a23_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
63 {
64 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
65 (__v8df) __W,
66 (__mmask8) __U, __R);
67 }
68
69 extern __inline __m512d
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_pd(__mmask8 __U,__m512d __A,int __R)71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
72 {
73 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
74 (__v8df) _mm512_setzero_pd (),
75 (__mmask8) __U, __R);
76 }
77
78 extern __inline __m512
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_ps(__m512 __A,int __R)80 _mm512_exp2a23_round_ps (__m512 __A, int __R)
81 {
82 __m512 __W;
83 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
84 (__v16sf) __W,
85 (__mmask16) -1, __R);
86 }
87
88 extern __inline __m512
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_exp2a23_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
91 {
92 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
93 (__v16sf) __W,
94 (__mmask16) __U, __R);
95 }
96
97 extern __inline __m512
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_ps(__mmask16 __U,__m512 __A,int __R)99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
100 {
101 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
102 (__v16sf) _mm512_setzero_ps (),
103 (__mmask16) __U, __R);
104 }
105
106 extern __inline __m512d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd(__m512d __A,int __R)108 _mm512_rcp28_round_pd (__m512d __A, int __R)
109 {
110 __m512d __W;
111 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
112 (__v8df) __W,
113 (__mmask8) -1, __R);
114 }
115
116 extern __inline __m512d
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
119 {
120 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
121 (__v8df) __W,
122 (__mmask8) __U, __R);
123 }
124
125 extern __inline __m512d
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_pd(__mmask8 __U,__m512d __A,int __R)127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
128 {
129 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
130 (__v8df) _mm512_setzero_pd (),
131 (__mmask8) __U, __R);
132 }
133
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps(__m512 __A,int __R)136 _mm512_rcp28_round_ps (__m512 __A, int __R)
137 {
138 __m512 __W;
139 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
140 (__v16sf) __W,
141 (__mmask16) -1, __R);
142 }
143
144 extern __inline __m512
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
147 {
148 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
149 (__v16sf) __W,
150 (__mmask16) __U, __R);
151 }
152
153 extern __inline __m512
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_ps(__mmask16 __U,__m512 __A,int __R)155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
156 {
157 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
158 (__v16sf) _mm512_setzero_ps (),
159 (__mmask16) __U, __R);
160 }
161
162 extern __inline __m128d
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_sd(__m128d __A,__m128d __B,int __R)164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
165 {
166 return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
167 (__v2df) __A,
168 __R);
169 }
170
171 extern __inline __m128d
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp28_round_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B,int __R)173 _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
174 __m128d __B, int __R)
175 {
176 return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
177 (__v2df) __A,
178 (__v2df) __W,
179 __U,
180 __R);
181 }
182
183 extern __inline __m128d
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_sd(__mmask8 __U,__m128d __A,__m128d __B,int __R)185 _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
186 {
187 return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
188 (__v2df) __A,
189 (__v2df)
190 _mm_setzero_pd (),
191 __U,
192 __R);
193 }
194
195 extern __inline __m128
196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss(__m128 __A,__m128 __B,int __R)197 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
198 {
199 return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
200 (__v4sf) __A,
201 __R);
202 }
203
204 extern __inline __m128
205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp28_round_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B,int __R)206 _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
207 __m128 __B, int __R)
208 {
209 return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
210 (__v4sf) __A,
211 (__v4sf) __W,
212 __U,
213 __R);
214 }
215
216 extern __inline __m128
217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_ss(__mmask8 __U,__m128 __A,__m128 __B,int __R)218 _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
219 {
220 return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
221 (__v4sf) __A,
222 (__v4sf)
223 _mm_setzero_ps (),
224 __U,
225 __R);
226 }
227
228 extern __inline __m512d
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd(__m512d __A,int __R)230 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
231 {
232 __m512d __W;
233 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
234 (__v8df) __W,
235 (__mmask8) -1, __R);
236 }
237
238 extern __inline __m512d
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)240 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
241 {
242 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
243 (__v8df) __W,
244 (__mmask8) __U, __R);
245 }
246
247 extern __inline __m512d
248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_pd(__mmask8 __U,__m512d __A,int __R)249 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
250 {
251 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
252 (__v8df) _mm512_setzero_pd (),
253 (__mmask8) __U, __R);
254 }
255
256 extern __inline __m512
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps(__m512 __A,int __R)258 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
259 {
260 __m512 __W;
261 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
262 (__v16sf) __W,
263 (__mmask16) -1, __R);
264 }
265
266 extern __inline __m512
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)268 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
269 {
270 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
271 (__v16sf) __W,
272 (__mmask16) __U, __R);
273 }
274
275 extern __inline __m512
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_ps(__mmask16 __U,__m512 __A,int __R)277 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
278 {
279 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
280 (__v16sf) _mm512_setzero_ps (),
281 (__mmask16) __U, __R);
282 }
283
284 extern __inline __m128d
285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_sd(__m128d __A,__m128d __B,int __R)286 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
287 {
288 return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
289 (__v2df) __A,
290 __R);
291 }
292
293 extern __inline __m128d
294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt28_round_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B,int __R)295 _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
296 __m128d __B, int __R)
297 {
298 return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
299 (__v2df) __A,
300 (__v2df) __W,
301 __U,
302 __R);
303 }
304
305 extern __inline __m128d
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_sd(__mmask8 __U,__m128d __A,__m128d __B,int __R)307 _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
308 {
309 return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
310 (__v2df) __A,
311 (__v2df)
312 _mm_setzero_pd (),
313 __U,
314 __R);
315 }
316
317 extern __inline __m128
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss(__m128 __A,__m128 __B,int __R)319 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
320 {
321 return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
322 (__v4sf) __A,
323 __R);
324 }
325
326 extern __inline __m128
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt28_round_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B,int __R)328 _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
329 __m128 __B, int __R)
330 {
331 return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
332 (__v4sf) __A,
333 (__v4sf) __W,
334 __U,
335 __R);
336 }
337
338 extern __inline __m128
339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_ss(__mmask8 __U,__m128 __A,__m128 __B,int __R)340 _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
341 {
342 return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
343 (__v4sf) __A,
344 (__v4sf)
345 _mm_setzero_ps (),
346 __U,
347 __R);
348 }
349
350 #else
351 #define _mm512_exp2a23_round_pd(A, C) \
352 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
353
354 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
355 __builtin_ia32_exp2pd_mask(A, W, U, C)
356
357 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \
358 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
359
360 #define _mm512_exp2a23_round_ps(A, C) \
361 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
362
363 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
364 __builtin_ia32_exp2ps_mask(A, W, U, C)
365
366 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \
367 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
368
369 #define _mm512_rcp28_round_pd(A, C) \
370 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
371
372 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
373 __builtin_ia32_rcp28pd_mask(A, W, U, C)
374
375 #define _mm512_maskz_rcp28_round_pd(U, A, C) \
376 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
377
378 #define _mm512_rcp28_round_ps(A, C) \
379 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
380
381 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
382 __builtin_ia32_rcp28ps_mask(A, W, U, C)
383
384 #define _mm512_maskz_rcp28_round_ps(U, A, C) \
385 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
386
387 #define _mm512_rsqrt28_round_pd(A, C) \
388 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
389
390 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
391 __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
392
393 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
394 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
395
396 #define _mm512_rsqrt28_round_ps(A, C) \
397 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
398
399 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
400 __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
401
402 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
403 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
404
405 #define _mm_rcp28_round_sd(A, B, R) \
406 __builtin_ia32_rcp28sd_round(A, B, R)
407
408 #define _mm_mask_rcp28_round_sd(W, U, A, B, R) \
409 __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
410
411 #define _mm_maskz_rcp28_round_sd(U, A, B, R) \
412 __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
413 (U), (R))
414
415 #define _mm_rcp28_round_ss(A, B, R) \
416 __builtin_ia32_rcp28ss_round(A, B, R)
417
418 #define _mm_mask_rcp28_round_ss(W, U, A, B, R) \
419 __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
420
421 #define _mm_maskz_rcp28_round_ss(U, A, B, R) \
422 __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
423 (U), (R))
424
425 #define _mm_rsqrt28_round_sd(A, B, R) \
426 __builtin_ia32_rsqrt28sd_round(A, B, R)
427
428 #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R) \
429 __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
430
431 #define _mm_maskz_rsqrt28_round_sd(U, A, B, R) \
432 __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
433 (U), (R))
434
435 #define _mm_rsqrt28_round_ss(A, B, R) \
436 __builtin_ia32_rsqrt28ss_round(A, B, R)
437
438 #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R) \
439 __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
440
441 #define _mm_maskz_rsqrt28_round_ss(U, A, B, R) \
442 __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
443 (U), (R))
444
445 #endif
446
447 #define _mm_mask_rcp28_sd(W, U, A, B)\
448 _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
449
450 #define _mm_maskz_rcp28_sd(U, A, B)\
451 _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
452
453 #define _mm_mask_rcp28_ss(W, U, A, B)\
454 _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
455
456 #define _mm_maskz_rcp28_ss(U, A, B)\
457 _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
458
459 #define _mm_mask_rsqrt28_sd(W, U, A, B)\
460 _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
461
462 #define _mm_maskz_rsqrt28_sd(U, A, B)\
463 _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
464
465 #define _mm_mask_rsqrt28_ss(W, U, A, B)\
466 _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
467
468 #define _mm_maskz_rsqrt28_ss(U, A, B)\
469 _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
470
471 #define _mm512_exp2a23_pd(A) \
472 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
473
474 #define _mm512_mask_exp2a23_pd(W, U, A) \
475 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
476
477 #define _mm512_maskz_exp2a23_pd(U, A) \
478 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
479
480 #define _mm512_exp2a23_ps(A) \
481 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
482
483 #define _mm512_mask_exp2a23_ps(W, U, A) \
484 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
485
486 #define _mm512_maskz_exp2a23_ps(U, A) \
487 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
488
489 #define _mm512_rcp28_pd(A) \
490 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
491
492 #define _mm512_mask_rcp28_pd(W, U, A) \
493 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
494
495 #define _mm512_maskz_rcp28_pd(U, A) \
496 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
497
498 #define _mm512_rcp28_ps(A) \
499 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
500
501 #define _mm512_mask_rcp28_ps(W, U, A) \
502 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
503
504 #define _mm512_maskz_rcp28_ps(U, A) \
505 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
506
507 #define _mm512_rsqrt28_pd(A) \
508 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
509
510 #define _mm512_mask_rsqrt28_pd(W, U, A) \
511 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
512
513 #define _mm512_maskz_rsqrt28_pd(U, A) \
514 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
515
516 #define _mm512_rsqrt28_ps(A) \
517 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
518
519 #define _mm512_mask_rsqrt28_ps(W, U, A) \
520 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
521
522 #define _mm512_maskz_rsqrt28_ps(U, A) \
523 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
524
525 #define _mm_rcp28_sd(A, B) \
526 __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
527
528 #define _mm_rcp28_ss(A, B) \
529 __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
530
531 #define _mm_rsqrt28_sd(A, B) \
532 __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
533
534 #define _mm_rsqrt28_ss(A, B) \
535 __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
536
537 #ifdef __DISABLE_AVX512ER__
538 #undef __DISABLE_AVX512ER__
539 #pragma GCC pop_options
540 #endif /* __DISABLE_AVX512ER__ */
541
542 #endif /* _AVX512ERINTRIN_H_INCLUDED */
543