1 /* Copyright (C) 2013-2021 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef _AVX512ERINTRIN_H_INCLUDED
29 #define _AVX512ERINTRIN_H_INCLUDED
30 
31 #ifndef __AVX512ER__
32 #pragma GCC push_options
33 #pragma GCC target("avx512er")
34 #define __DISABLE_AVX512ER__
35 #endif /* __AVX512ER__ */
36 
37 /* Internal data types for implementing the intrinsics.  */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 
41 /* The Intel API is flexible enough that we must allow aliasing with other
42    vector types, and their scalar components.  */
43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45 
46 typedef unsigned char  __mmask8;
47 typedef unsigned short __mmask16;
48 
49 #ifdef __OPTIMIZE__
50 extern __inline __m512d
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_pd(__m512d __A,int __R)52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
53 {
54   __m512d __W;
55   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
56 					       (__v8df) __W,
57 					       (__mmask8) -1, __R);
58 }
59 
60 extern __inline __m512d
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_exp2a23_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
63 {
64   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
65 					       (__v8df) __W,
66 					       (__mmask8) __U, __R);
67 }
68 
69 extern __inline __m512d
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_pd(__mmask8 __U,__m512d __A,int __R)71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
72 {
73   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
74 					       (__v8df) _mm512_setzero_pd (),
75 					       (__mmask8) __U, __R);
76 }
77 
78 extern __inline __m512
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_exp2a23_round_ps(__m512 __A,int __R)80 _mm512_exp2a23_round_ps (__m512 __A, int __R)
81 {
82   __m512 __W;
83   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
84 					      (__v16sf) __W,
85 					      (__mmask16) -1, __R);
86 }
87 
88 extern __inline __m512
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_exp2a23_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
91 {
92   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
93 					      (__v16sf) __W,
94 					      (__mmask16) __U, __R);
95 }
96 
97 extern __inline __m512
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_exp2a23_round_ps(__mmask16 __U,__m512 __A,int __R)99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
100 {
101   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
102 					      (__v16sf) _mm512_setzero_ps (),
103 					      (__mmask16) __U, __R);
104 }
105 
106 extern __inline __m512d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd(__m512d __A,int __R)108 _mm512_rcp28_round_pd (__m512d __A, int __R)
109 {
110   __m512d __W;
111   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
112 						(__v8df) __W,
113 						(__mmask8) -1, __R);
114 }
115 
116 extern __inline __m512d
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
119 {
120   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
121 						(__v8df) __W,
122 						(__mmask8) __U, __R);
123 }
124 
125 extern __inline __m512d
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_pd(__mmask8 __U,__m512d __A,int __R)127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
128 {
129   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
130 						(__v8df) _mm512_setzero_pd (),
131 						(__mmask8) __U, __R);
132 }
133 
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps(__m512 __A,int __R)136 _mm512_rcp28_round_ps (__m512 __A, int __R)
137 {
138   __m512 __W;
139   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
140 					       (__v16sf) __W,
141 					       (__mmask16) -1, __R);
142 }
143 
144 extern __inline __m512
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
147 {
148   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
149 					       (__v16sf) __W,
150 					       (__mmask16) __U, __R);
151 }
152 
153 extern __inline __m512
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_ps(__mmask16 __U,__m512 __A,int __R)155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
156 {
157   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
158 					       (__v16sf) _mm512_setzero_ps (),
159 					       (__mmask16) __U, __R);
160 }
161 
162 extern __inline __m128d
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_sd(__m128d __A,__m128d __B,int __R)164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
165 {
166   return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
167 						 (__v2df) __A,
168 						 __R);
169 }
170 
171 extern __inline __m128d
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp28_round_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B,int __R)173 _mm_mask_rcp28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
174 			 __m128d __B, int __R)
175 {
176   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
177 						      (__v2df) __A,
178 						      (__v2df) __W,
179 						      __U,
180 						      __R);
181 }
182 
183 extern __inline __m128d
184 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_sd(__mmask8 __U,__m128d __A,__m128d __B,int __R)185 _mm_maskz_rcp28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
186 {
187   return (__m128d) __builtin_ia32_rcp28sd_mask_round ((__v2df) __B,
188 						      (__v2df) __A,
189 						      (__v2df)
190 						      _mm_setzero_pd (),
191 						      __U,
192 						      __R);
193 }
194 
195 extern __inline __m128
196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss(__m128 __A,__m128 __B,int __R)197 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
198 {
199   return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
200 						(__v4sf) __A,
201 						__R);
202 }
203 
204 extern __inline __m128
205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp28_round_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B,int __R)206 _mm_mask_rcp28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
207 			 __m128 __B, int __R)
208 {
209   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
210 						     (__v4sf) __A,
211 						     (__v4sf) __W,
212 						     __U,
213 						     __R);
214 }
215 
216 extern __inline __m128
217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp28_round_ss(__mmask8 __U,__m128 __A,__m128 __B,int __R)218 _mm_maskz_rcp28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
219 {
220   return (__m128) __builtin_ia32_rcp28ss_mask_round ((__v4sf) __B,
221 						     (__v4sf) __A,
222 						     (__v4sf)
223 						     _mm_setzero_ps (),
224 						     __U,
225 						     __R);
226 }
227 
228 extern __inline __m512d
229 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd(__m512d __A,int __R)230 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
231 {
232   __m512d __W;
233   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
234 						  (__v8df) __W,
235 						  (__mmask8) -1, __R);
236 }
237 
238 extern __inline __m512d
239 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_pd(__m512d __W,__mmask8 __U,__m512d __A,int __R)240 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
241 {
242   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
243 						  (__v8df) __W,
244 						  (__mmask8) __U, __R);
245 }
246 
247 extern __inline __m512d
248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_pd(__mmask8 __U,__m512d __A,int __R)249 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
250 {
251   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
252 						  (__v8df) _mm512_setzero_pd (),
253 						  (__mmask8) __U, __R);
254 }
255 
256 extern __inline __m512
257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps(__m512 __A,int __R)258 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
259 {
260   __m512 __W;
261   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
262 						 (__v16sf) __W,
263 						 (__mmask16) -1, __R);
264 }
265 
266 extern __inline __m512
267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_ps(__m512 __W,__mmask16 __U,__m512 __A,int __R)268 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
269 {
270   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
271 						 (__v16sf) __W,
272 						 (__mmask16) __U, __R);
273 }
274 
275 extern __inline __m512
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_ps(__mmask16 __U,__m512 __A,int __R)277 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
278 {
279   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
280 						 (__v16sf) _mm512_setzero_ps (),
281 						 (__mmask16) __U, __R);
282 }
283 
284 extern __inline __m128d
285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_sd(__m128d __A,__m128d __B,int __R)286 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
287 {
288   return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
289 						   (__v2df) __A,
290 						   __R);
291 }
292 
293 extern __inline __m128d
294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt28_round_sd(__m128d __W,__mmask8 __U,__m128d __A,__m128d __B,int __R)295 _mm_mask_rsqrt28_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
296 			   __m128d __B, int __R)
297 {
298   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
299 							(__v2df) __A,
300 							(__v2df) __W,
301 							__U,
302 							__R);
303 }
304 
305 extern __inline __m128d
306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_sd(__mmask8 __U,__m128d __A,__m128d __B,int __R)307 _mm_maskz_rsqrt28_round_sd (__mmask8 __U, __m128d __A, __m128d __B, int __R)
308 {
309   return (__m128d) __builtin_ia32_rsqrt28sd_mask_round ((__v2df) __B,
310 							(__v2df) __A,
311 							(__v2df)
312 							_mm_setzero_pd (),
313 							__U,
314 							__R);
315 }
316 
317 extern __inline __m128
318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss(__m128 __A,__m128 __B,int __R)319 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
320 {
321   return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
322 						  (__v4sf) __A,
323 						  __R);
324 }
325 
326 extern __inline __m128
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt28_round_ss(__m128 __W,__mmask8 __U,__m128 __A,__m128 __B,int __R)328 _mm_mask_rsqrt28_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
329 			   __m128 __B, int __R)
330 {
331   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
332 						       (__v4sf) __A,
333 						       (__v4sf) __W,
334 						       __U,
335 						       __R);
336 }
337 
338 extern __inline __m128
339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt28_round_ss(__mmask8 __U,__m128 __A,__m128 __B,int __R)340 _mm_maskz_rsqrt28_round_ss (__mmask8 __U, __m128 __A, __m128 __B, int __R)
341 {
342   return (__m128) __builtin_ia32_rsqrt28ss_mask_round ((__v4sf) __B,
343 						       (__v4sf) __A,
344 						       (__v4sf)
345 						       _mm_setzero_ps (),
346 						       __U,
347 						       __R);
348 }
349 
350 #else
351 #define _mm512_exp2a23_round_pd(A, C)            \
352     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
353 
354 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
355     __builtin_ia32_exp2pd_mask(A, W, U, C)
356 
357 #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
358     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
359 
360 #define _mm512_exp2a23_round_ps(A, C)            \
361     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
362 
363 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
364     __builtin_ia32_exp2ps_mask(A, W, U, C)
365 
366 #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
367     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
368 
369 #define _mm512_rcp28_round_pd(A, C)            \
370     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
371 
372 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
373     __builtin_ia32_rcp28pd_mask(A, W, U, C)
374 
375 #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
376     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
377 
378 #define _mm512_rcp28_round_ps(A, C)            \
379     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
380 
381 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
382     __builtin_ia32_rcp28ps_mask(A, W, U, C)
383 
384 #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
385     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
386 
387 #define _mm512_rsqrt28_round_pd(A, C)            \
388     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
389 
390 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
391     __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
392 
393 #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
394     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
395 
396 #define _mm512_rsqrt28_round_ps(A, C)            \
397     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
398 
399 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
400     __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
401 
402 #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
403     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
404 
405 #define _mm_rcp28_round_sd(A, B, R)	\
406     __builtin_ia32_rcp28sd_round(A, B, R)
407 
408 #define _mm_mask_rcp28_round_sd(W, U, A, B, R)	\
409     __builtin_ia32_rcp28sd_mask_round ((A), (B), (W), (U), (R))
410 
411 #define _mm_maskz_rcp28_round_sd(U, A, B, R)	\
412     __builtin_ia32_rcp28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (), \
413 				       (U), (R))
414 
415 #define _mm_rcp28_round_ss(A, B, R)	\
416     __builtin_ia32_rcp28ss_round(A, B, R)
417 
418 #define _mm_mask_rcp28_round_ss(W, U, A, B, R)	\
419     __builtin_ia32_rcp28ss_mask_round ((A), (B), (W), (U), (R))
420 
421 #define _mm_maskz_rcp28_round_ss(U, A, B, R)	\
422     __builtin_ia32_rcp28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (), \
423 				       (U), (R))
424 
425 #define _mm_rsqrt28_round_sd(A, B, R)	\
426     __builtin_ia32_rsqrt28sd_round(A, B, R)
427 
428 #define _mm_mask_rsqrt28_round_sd(W, U, A, B, R)	\
429     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (W), (U), (R))
430 
431 #define _mm_maskz_rsqrt28_round_sd(U, A, B, R)	\
432     __builtin_ia32_rsqrt28sd_mask_round ((A), (B), (__v2df) _mm_setzero_pd (),\
433 					 (U), (R))
434 
435 #define _mm_rsqrt28_round_ss(A, B, R)	\
436     __builtin_ia32_rsqrt28ss_round(A, B, R)
437 
438 #define _mm_mask_rsqrt28_round_ss(W, U, A, B, R)	\
439     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (W), (U), (R))
440 
441 #define _mm_maskz_rsqrt28_round_ss(U, A, B, R)	\
442     __builtin_ia32_rsqrt28ss_mask_round ((A), (B), (__v4sf) _mm_setzero_ps (),\
443 					 (U), (R))
444 
445 #endif
446 
447 #define _mm_mask_rcp28_sd(W, U, A, B)\
448     _mm_mask_rcp28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
449 
450 #define _mm_maskz_rcp28_sd(U, A, B)\
451     _mm_maskz_rcp28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
452 
453 #define _mm_mask_rcp28_ss(W, U, A, B)\
454     _mm_mask_rcp28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
455 
456 #define _mm_maskz_rcp28_ss(U, A, B)\
457     _mm_maskz_rcp28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
458 
459 #define _mm_mask_rsqrt28_sd(W, U, A, B)\
460     _mm_mask_rsqrt28_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
461 
462 #define _mm_maskz_rsqrt28_sd(U, A, B)\
463     _mm_maskz_rsqrt28_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
464 
465 #define _mm_mask_rsqrt28_ss(W, U, A, B)\
466     _mm_mask_rsqrt28_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
467 
468 #define _mm_maskz_rsqrt28_ss(U, A, B)\
469     _mm_maskz_rsqrt28_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
470 
471 #define _mm512_exp2a23_pd(A)                    \
472     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
473 
474 #define _mm512_mask_exp2a23_pd(W, U, A)   \
475     _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
476 
477 #define _mm512_maskz_exp2a23_pd(U, A)     \
478     _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
479 
480 #define _mm512_exp2a23_ps(A)                    \
481     _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
482 
483 #define _mm512_mask_exp2a23_ps(W, U, A)   \
484     _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
485 
486 #define _mm512_maskz_exp2a23_ps(U, A)     \
487     _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
488 
489 #define _mm512_rcp28_pd(A)                    \
490     _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
491 
492 #define _mm512_mask_rcp28_pd(W, U, A)   \
493     _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
494 
495 #define _mm512_maskz_rcp28_pd(U, A)     \
496     _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
497 
498 #define _mm512_rcp28_ps(A)                    \
499     _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
500 
501 #define _mm512_mask_rcp28_ps(W, U, A)   \
502     _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
503 
504 #define _mm512_maskz_rcp28_ps(U, A)     \
505     _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
506 
507 #define _mm512_rsqrt28_pd(A)                    \
508     _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
509 
510 #define _mm512_mask_rsqrt28_pd(W, U, A)   \
511     _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
512 
513 #define _mm512_maskz_rsqrt28_pd(U, A)     \
514     _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
515 
516 #define _mm512_rsqrt28_ps(A)                    \
517     _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
518 
519 #define _mm512_mask_rsqrt28_ps(W, U, A)   \
520     _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
521 
522 #define _mm512_maskz_rsqrt28_ps(U, A)     \
523     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
524 
525 #define _mm_rcp28_sd(A, B)	\
526     __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
527 
528 #define _mm_rcp28_ss(A, B)	\
529     __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
530 
531 #define _mm_rsqrt28_sd(A, B)	\
532     __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
533 
534 #define _mm_rsqrt28_ss(A, B)	\
535     __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
536 
537 #ifdef __DISABLE_AVX512ER__
538 #undef __DISABLE_AVX512ER__
539 #pragma GCC pop_options
540 #endif /* __DISABLE_AVX512ER__ */
541 
542 #endif /* _AVX512ERINTRIN_H_INCLUDED */
543