1 /* Copyright (C) 2017-2019 Free Software Foundation, Inc.
2 
3    This file is part of GCC.
4 
5    GCC is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 3, or (at your option)
8    any later version.
9 
10    GCC is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    Under Section 7 of GPL version 3, you are granted additional
16    permissions described in the GCC Runtime Library Exception, version
17    3.1, as published by the Free Software Foundation.
18 
19    You should have received a copy of the GNU General Public License and
20    a copy of the GCC Runtime Library Exception along with this program;
21    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22    <http://www.gnu.org/licenses/>.  */
23 
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
26 #endif
27 
28 #ifndef _GFNIINTRIN_H_INCLUDED
29 #define _GFNIINTRIN_H_INCLUDED
30 
31 #if !defined(__GFNI__) || !defined(__SSE2__)
32 #pragma GCC push_options
33 #pragma GCC target("gfni,sse2")
34 #define __DISABLE_GFNI__
35 #endif /* __GFNI__ */
36 
37 extern __inline __m128i
38 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8mul_epi8(__m128i __A,__m128i __B)39 _mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
40 {
41   return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
42 						   (__v16qi) __B);
43 }
44 
45 #ifdef __OPTIMIZE__
46 extern __inline __m128i
47 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8affineinv_epi64_epi8(__m128i __A,__m128i __B,const int __C)48 _mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
49 {
50   return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
51 							   (__v16qi) __B,
52 							    __C);
53 }
54 
55 extern __inline __m128i
56 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_gf2p8affine_epi64_epi8(__m128i __A,__m128i __B,const int __C)57 _mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
58 {
59   return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
60 							(__v16qi) __B, __C);
61 }
62 #else
63 #define _mm_gf2p8affineinv_epi64_epi8(A, B, C)				   \
64   ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
65 					   (__v16qi)(__m128i)(B), (int)(C)))
66 #define _mm_gf2p8affine_epi64_epi8(A, B, C)				   \
67   ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A),   \
68 					   (__v16qi)(__m128i)(B), (int)(C)))
69 #endif
70 
71 #ifdef __DISABLE_GFNI__
72 #undef __DISABLE_GFNI__
73 #pragma GCC pop_options
74 #endif /* __DISABLE_GFNI__ */
75 
76 #if !defined(__GFNI__) || !defined(__AVX__)
77 #pragma GCC push_options
78 #pragma GCC target("gfni,avx")
79 #define __DISABLE_GFNIAVX__
80 #endif /* __GFNIAVX__ */
81 
82 extern __inline __m256i
83 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8mul_epi8(__m256i __A,__m256i __B)84 _mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
85 {
86   return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
87 						    (__v32qi) __B);
88 }
89 
90 #ifdef __OPTIMIZE__
91 extern __inline __m256i
92 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8affineinv_epi64_epi8(__m256i __A,__m256i __B,const int __C)93 _mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
94 {
95   return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
96 							   (__v32qi) __B,
97 							    __C);
98 }
99 
100 extern __inline __m256i
101 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_gf2p8affine_epi64_epi8(__m256i __A,__m256i __B,const int __C)102 _mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
103 {
104   return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
105 							(__v32qi) __B, __C);
106 }
107 #else
108 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, C)			   \
109   ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
110 						    (__v32qi)(__m256i)(B), \
111 						    (int)(C)))
112 #define _mm256_gf2p8affine_epi64_epi8(A, B, C)				   \
113   ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A),   \
114 					(   __v32qi)(__m256i)(B), (int)(C)))
115 #endif
116 
117 #ifdef __DISABLE_GFNIAVX__
118 #undef __DISABLE_GFNIAVX__
119 #pragma GCC pop_options
120 #endif /* __GFNIAVX__ */
121 
122 #if !defined(__GFNI__) || !defined(__AVX512VL__)
123 #pragma GCC push_options
124 #pragma GCC target("gfni,avx512vl")
125 #define __DISABLE_GFNIAVX512VL__
126 #endif /* __GFNIAVX512VL__ */
127 
128 extern __inline __m128i
129 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_gf2p8mul_epi8(__m128i __A,__mmask16 __B,__m128i __C,__m128i __D)130 _mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
131 {
132   return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
133 							 (__v16qi) __D,
134 							 (__v16qi)__A, __B);
135 }
136 
137 extern __inline __m128i
138 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8mul_epi8(__mmask16 __A,__m128i __B,__m128i __C)139 _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
140 {
141   return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
142 			(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
143 }
144 
145 #ifdef __OPTIMIZE__
146 extern __inline __m128i
147 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_gf2p8affineinv_epi64_epi8(__m128i __A,__mmask16 __B,__m128i __C,__m128i __D,const int __E)148 _mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
149 				    __m128i __D, const int __E)
150 {
151   return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
152 								(__v16qi) __D,
153 								 __E,
154 								(__v16qi)__A,
155 								 __B);
156 }
157 
158 extern __inline __m128i
159 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 __A,__m128i __B,__m128i __C,const int __D)160 _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
161 				     const int __D)
162 {
163   return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
164 						(__v16qi) __C, __D,
165 						(__v16qi) _mm_setzero_si128 (),
166 						 __A);
167 }
168 
169 extern __inline __m128i
170 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_gf2p8affine_epi64_epi8(__m128i __A,__mmask16 __B,__m128i __C,__m128i __D,const int __E)171 _mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
172 				 __m128i __D, const int __E)
173 {
174   return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
175 					(__v16qi) __D, __E, (__v16qi)__A, __B);
176 }
177 
178 extern __inline __m128i
179 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 __A,__m128i __B,__m128i __C,const int __D)180 _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
181 				  const int __D)
182 {
183   return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
184 		     (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
185 }
186 #else
187 #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		   \
188   ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
189 			(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D),      \
190 			(int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
191 #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
192   ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
193 			(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C),	   \
194 			(int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
195 			(__mmask16)(A)))
196 #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
197   ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
198       (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
199 #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
200   ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
201 		(__v16qi)(__m128i)(C), (int)(D),			    \
202 		(__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
203 #endif
204 
205 #ifdef __DISABLE_GFNIAVX512VL__
206 #undef __DISABLE_GFNIAVX512VL__
207 #pragma GCC pop_options
208 #endif /* __GFNIAVX512VL__ */
209 
210 #if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
211 #pragma GCC push_options
212 #pragma GCC target("gfni,avx512vl,avx512bw")
213 #define __DISABLE_GFNIAVX512VLBW__
214 #endif /* __GFNIAVX512VLBW__ */
215 
216 extern __inline __m256i
217 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8mul_epi8(__m256i __A,__mmask32 __B,__m256i __C,__m256i __D)218 _mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
219 			   __m256i __D)
220 {
221   return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
222 							 (__v32qi) __D,
223 							 (__v32qi)__A, __B);
224 }
225 
226 extern __inline __m256i
227 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8mul_epi8(__mmask32 __A,__m256i __B,__m256i __C)228 _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
229 {
230   return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
231 			(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
232 }
233 
234 #ifdef __OPTIMIZE__
235 extern __inline __m256i
236 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i __A,__mmask32 __B,__m256i __C,__m256i __D,const int __E)237 _mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
238 				       __m256i __C, __m256i __D, const int __E)
239 {
240   return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
241 								(__v32qi) __D,
242 							 	 __E,
243 								(__v32qi)__A,
244 								 __B);
245 }
246 
247 extern __inline __m256i
248 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 __A,__m256i __B,__m256i __C,const int __D)249 _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
250 					__m256i __C, const int __D)
251 {
252   return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
253 				      (__v32qi) __C, __D,
254 				      (__v32qi) _mm256_setzero_si256 (), __A);
255 }
256 
257 extern __inline __m256i
258 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_gf2p8affine_epi64_epi8(__m256i __A,__mmask32 __B,__m256i __C,__m256i __D,const int __E)259 _mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
260 				    __m256i __D, const int __E)
261 {
262   return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
263 							     (__v32qi) __D,
264 							      __E,
265 							     (__v32qi)__A,
266 							      __B);
267 }
268 
269 extern __inline __m256i
270 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 __A,__m256i __B,__m256i __C,const int __D)271 _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
272 				     __m256i __C, const int __D)
273 {
274   return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
275 		(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
276 }
277 #else
278 #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E)		\
279   ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
280 	(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E),		\
281 	(__v32qi)(__m256i)(A), (__mmask32)(B)))
282 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
283   ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
284 	(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D),		\
285 	(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
286 #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) 		    \
287   ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
288 	(__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
289 #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
290   ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
291 	 (__v32qi)(__m256i)(C), (int)(D),				    \
292 	 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
293 #endif
294 
295 #ifdef __DISABLE_GFNIAVX512VLBW__
296 #undef __DISABLE_GFNIAVX512VLBW__
297 #pragma GCC pop_options
298 #endif /* __GFNIAVX512VLBW__ */
299 
300 #if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
301 #pragma GCC push_options
302 #pragma GCC target("gfni,avx512f,avx512bw")
303 #define __DISABLE_GFNIAVX512FBW__
304 #endif /* __GFNIAVX512FBW__ */
305 
306 extern __inline __m512i
307 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8mul_epi8(__m512i __A,__mmask64 __B,__m512i __C,__m512i __D)308 _mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
309 			   __m512i __D)
310 {
311   return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
312 					(__v64qi) __D, (__v64qi)__A, __B);
313 }
314 
315 extern __inline __m512i
316 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8mul_epi8(__mmask64 __A,__m512i __B,__m512i __C)317 _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
318 {
319   return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
320 			(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
321 }
322 extern __inline __m512i
323 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8mul_epi8(__m512i __A,__m512i __B)324 _mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
325 {
326   return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
327 						    (__v64qi) __B);
328 }
329 
330 #ifdef __OPTIMIZE__
331 extern __inline __m512i
332 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i __A,__mmask64 __B,__m512i __C,__m512i __D,const int __E)333 _mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
334 				       __m512i __D, const int __E)
335 {
336   return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
337 								(__v64qi) __D,
338 								 __E,
339 								(__v64qi)__A,
340 								 __B);
341 }
342 
343 extern __inline __m512i
344 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 __A,__m512i __B,__m512i __C,const int __D)345 _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
346 					__m512i __C, const int __D)
347 {
348   return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
349 				(__v64qi) __C, __D,
350 				(__v64qi) _mm512_setzero_si512 (), __A);
351 }
352 
353 extern __inline __m512i
354 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8affineinv_epi64_epi8(__m512i __A,__m512i __B,const int __C)355 _mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
356 {
357   return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
358 							   (__v64qi) __B, __C);
359 }
360 
361 extern __inline __m512i
362 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_gf2p8affine_epi64_epi8(__m512i __A,__mmask64 __B,__m512i __C,__m512i __D,const int __E)363 _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
364 				    __m512i __D, const int __E)
365 {
366   return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
367 					(__v64qi) __D, __E, (__v64qi)__A, __B);
368 }
369 
370 extern __inline __m512i
371 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 __A,__m512i __B,__m512i __C,const int __D)372 _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
373 				     const int __D)
374 {
375   return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
376 		  (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
377 }
378 extern __inline __m512i
379 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_gf2p8affine_epi64_epi8(__m512i __A,__m512i __B,const int __C)380 _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
381 {
382   return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
383 							(__v64qi) __B, __C);
384 }
385 #else
386 #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		\
387   ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
388 	(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E),		\
389 	(__v64qi)(__m512i)(A), (__mmask64)(B)))
390 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
391   ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
392 	(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D),		\
393 	(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
394 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, C)			\
395   ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi (			\
396 	(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
397 #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E)		    \
398   ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
399      (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
400 #define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
401   ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
402 	 (__v64qi)(__m512i)(C), (int)(D),				    \
403 	 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
404 #define _mm512_gf2p8affine_epi64_epi8(A, B, C)				    \
405   ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A),    \
406 	 (__v64qi)(__m512i)(B), (int)(C)))
407 #endif
408 
409 #ifdef __DISABLE_GFNIAVX512FBW__
410 #undef __DISABLE_GFNIAVX512FBW__
411 #pragma GCC pop_options
412 #endif /* __GFNIAVX512FBW__ */
413 
414 #endif /* _GFNIINTRIN_H_INCLUDED */
415