1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef _AVX512ERINTRIN_H_INCLUDED
29 #define _AVX512ERINTRIN_H_INCLUDED
30 
31 #ifndef __AVX512ER__
32 #pragma GCC push_options
33 #pragma GCC target("avx512er")
34 #define __DISABLE_AVX512ER__
35 #endif /* __AVX512ER__ */
36 
37 /* Internal data types for implementing the intrinsics.  */
38 typedef double __v8df __attribute__ ((__vector_size__ (64)));
39 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40 
41 /* The Intel API is flexible enough that we must allow aliasing with other
42    vector types, and their scalar components.  */
43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45 
46 typedef unsigned char  __mmask8;
47 typedef unsigned short __mmask16;
48 
49 #ifdef __OPTIMIZE__
50 extern __inline __m512d
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
52 _mm512_exp2a23_round_pd (__m512d __A, int __R)
53 {
54   __m512d __W;
55   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
56 					       (__v8df) __W,
57 					       (__mmask8) -1, __R);
58 }
59 
60 extern __inline __m512d
61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
63 {
64   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
65 					       (__v8df) __W,
66 					       (__mmask8) __U, __R);
67 }
68 
69 extern __inline __m512d
70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
72 {
73   return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
74 					       (__v8df) _mm512_setzero_pd (),
75 					       (__mmask8) __U, __R);
76 }
77 
78 extern __inline __m512
79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80 _mm512_exp2a23_round_ps (__m512 __A, int __R)
81 {
82   __m512 __W;
83   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
84 					      (__v16sf) __W,
85 					      (__mmask16) -1, __R);
86 }
87 
88 extern __inline __m512
89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
91 {
92   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
93 					      (__v16sf) __W,
94 					      (__mmask16) __U, __R);
95 }
96 
97 extern __inline __m512
98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
100 {
101   return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
102 					      (__v16sf) _mm512_setzero_ps (),
103 					      (__mmask16) __U, __R);
104 }
105 
106 extern __inline __m512d
107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108 _mm512_rcp28_round_pd (__m512d __A, int __R)
109 {
110   __m512d __W;
111   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
112 						(__v8df) __W,
113 						(__mmask8) -1, __R);
114 }
115 
116 extern __inline __m512d
117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
119 {
120   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
121 						(__v8df) __W,
122 						(__mmask8) __U, __R);
123 }
124 
125 extern __inline __m512d
126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
128 {
129   return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
130 						(__v8df) _mm512_setzero_pd (),
131 						(__mmask8) __U, __R);
132 }
133 
134 extern __inline __m512
135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136 _mm512_rcp28_round_ps (__m512 __A, int __R)
137 {
138   __m512 __W;
139   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
140 					       (__v16sf) __W,
141 					       (__mmask16) -1, __R);
142 }
143 
144 extern __inline __m512
145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
147 {
148   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
149 					       (__v16sf) __W,
150 					       (__mmask16) __U, __R);
151 }
152 
153 extern __inline __m512
154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
156 {
157   return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
158 					       (__v16sf) _mm512_setzero_ps (),
159 					       (__mmask16) __U, __R);
160 }
161 
162 extern __inline __m128d
163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
165 {
166   return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
167 						 (__v2df) __A,
168 						 __R);
169 }
170 
171 extern __inline __m128
172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
174 {
175   return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
176 						(__v4sf) __A,
177 						__R);
178 }
179 
180 extern __inline __m512d
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm512_rsqrt28_round_pd (__m512d __A, int __R)
183 {
184   __m512d __W;
185   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
186 						  (__v8df) __W,
187 						  (__mmask8) -1, __R);
188 }
189 
190 extern __inline __m512d
191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
193 {
194   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
195 						  (__v8df) __W,
196 						  (__mmask8) __U, __R);
197 }
198 
199 extern __inline __m512d
200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
202 {
203   return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
204 						  (__v8df) _mm512_setzero_pd (),
205 						  (__mmask8) __U, __R);
206 }
207 
208 extern __inline __m512
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm512_rsqrt28_round_ps (__m512 __A, int __R)
211 {
212   __m512 __W;
213   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
214 						 (__v16sf) __W,
215 						 (__mmask16) -1, __R);
216 }
217 
218 extern __inline __m512
219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
221 {
222   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
223 						 (__v16sf) __W,
224 						 (__mmask16) __U, __R);
225 }
226 
227 extern __inline __m512
228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
230 {
231   return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
232 						 (__v16sf) _mm512_setzero_ps (),
233 						 (__mmask16) __U, __R);
234 }
235 
236 extern __inline __m128d
237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
239 {
240   return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
241 						   (__v2df) __A,
242 						   __R);
243 }
244 
245 extern __inline __m128
246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
248 {
249   return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
250 						  (__v4sf) __A,
251 						  __R);
252 }
253 
254 #else
255 #define _mm512_exp2a23_round_pd(A, C)            \
256     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
257 
258 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
259     __builtin_ia32_exp2pd_mask(A, W, U, C)
260 
261 #define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
262     __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
263 
264 #define _mm512_exp2a23_round_ps(A, C)            \
265     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
266 
267 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
268     __builtin_ia32_exp2ps_mask(A, W, U, C)
269 
270 #define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
271     __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
272 
273 #define _mm512_rcp28_round_pd(A, C)            \
274     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
275 
276 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \
277     __builtin_ia32_rcp28pd_mask(A, W, U, C)
278 
279 #define _mm512_maskz_rcp28_round_pd(U, A, C)   \
280     __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
281 
282 #define _mm512_rcp28_round_ps(A, C)            \
283     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
284 
285 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \
286     __builtin_ia32_rcp28ps_mask(A, W, U, C)
287 
288 #define _mm512_maskz_rcp28_round_ps(U, A, C)   \
289     __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
290 
291 #define _mm512_rsqrt28_round_pd(A, C)            \
292     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
293 
294 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
295     __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
296 
297 #define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
298     __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
299 
300 #define _mm512_rsqrt28_round_ps(A, C)            \
301     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
302 
303 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
304     __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
305 
306 #define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
307     __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
308 
309 #define _mm_rcp28_round_sd(A, B, R)	\
310     __builtin_ia32_rcp28sd_round(A, B, R)
311 
312 #define _mm_rcp28_round_ss(A, B, R)	\
313     __builtin_ia32_rcp28ss_round(A, B, R)
314 
315 #define _mm_rsqrt28_round_sd(A, B, R)	\
316     __builtin_ia32_rsqrt28sd_round(A, B, R)
317 
318 #define _mm_rsqrt28_round_ss(A, B, R)	\
319     __builtin_ia32_rsqrt28ss_round(A, B, R)
320 
321 #endif
322 
323 #define _mm512_exp2a23_pd(A)                    \
324     _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
325 
326 #define _mm512_mask_exp2a23_pd(W, U, A)   \
327     _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
328 
329 #define _mm512_maskz_exp2a23_pd(U, A)     \
330     _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
331 
332 #define _mm512_exp2a23_ps(A)                    \
333     _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
334 
335 #define _mm512_mask_exp2a23_ps(W, U, A)   \
336     _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
337 
338 #define _mm512_maskz_exp2a23_ps(U, A)     \
339     _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
340 
341 #define _mm512_rcp28_pd(A)                    \
342     _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
343 
344 #define _mm512_mask_rcp28_pd(W, U, A)   \
345     _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
346 
347 #define _mm512_maskz_rcp28_pd(U, A)     \
348     _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
349 
350 #define _mm512_rcp28_ps(A)                    \
351     _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
352 
353 #define _mm512_mask_rcp28_ps(W, U, A)   \
354     _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
355 
356 #define _mm512_maskz_rcp28_ps(U, A)     \
357     _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
358 
359 #define _mm512_rsqrt28_pd(A)                    \
360     _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
361 
362 #define _mm512_mask_rsqrt28_pd(W, U, A)   \
363     _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
364 
365 #define _mm512_maskz_rsqrt28_pd(U, A)     \
366     _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
367 
368 #define _mm512_rsqrt28_ps(A)                    \
369     _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
370 
371 #define _mm512_mask_rsqrt28_ps(W, U, A)   \
372     _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
373 
374 #define _mm512_maskz_rsqrt28_ps(U, A)     \
375     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
376 
377 #define _mm_rcp28_sd(A, B)	\
378     __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
379 
380 #define _mm_rcp28_ss(A, B)	\
381     __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
382 
383 #define _mm_rsqrt28_sd(A, B)	\
384     __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
385 
386 #define _mm_rsqrt28_ss(A, B)	\
387     __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
388 
389 #ifdef __DISABLE_AVX512ER__
390 #undef __DISABLE_AVX512ER__
391 #pragma GCC pop_options
392 #endif /* __DISABLE_AVX512ER__ */
393 
394 #endif /* _AVX512ERINTRIN_H_INCLUDED */
395