1 /* Copyright (C) 2017-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <gfniintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _GFNIINTRIN_H_INCLUDED 29 #define _GFNIINTRIN_H_INCLUDED 30 31 #if !defined(__GFNI__) || !defined(__SSE2__) 32 #pragma GCC push_options 33 #pragma GCC target("gfni,sse2") 34 #define __DISABLE_GFNI__ 35 #endif /* __GFNI__ */ 36 37 extern __inline __m128i 38 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 39 _mm_gf2p8mul_epi8 (__m128i __A, __m128i __B) 40 { 41 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, 42 (__v16qi) __B); 43 } 44 45 #ifdef __OPTIMIZE__ 46 extern __inline __m128i 47 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 48 _mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C) 49 { 50 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A, 51 (__v16qi) __B, 52 __C); 53 } 54 55 extern __inline __m128i 56 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 57 _mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C) 58 { 59 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A, 60 (__v16qi) __B, __C); 61 } 62 #else 63 #define _mm_gf2p8affineinv_epi64_epi8(A, B, C) \ 64 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ 65 (__v16qi)(__m128i)(B), (int)(C))) 66 #define _mm_gf2p8affine_epi64_epi8(A, B, C) \ 67 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A), \ 68 (__v16qi)(__m128i)(B), (int)(C))) 69 #endif 70 71 #ifdef __DISABLE_GFNI__ 72 #undef __DISABLE_GFNI__ 73 #pragma GCC pop_options 74 #endif /* __DISABLE_GFNI__ */ 75 76 #if !defined(__GFNI__) || !defined(__AVX__) 77 #pragma GCC push_options 78 #pragma GCC target("gfni,avx") 79 #define __DISABLE_GFNIAVX__ 80 #endif /* __GFNIAVX__ */ 81 82 extern __inline __m256i 83 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 84 _mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B) 85 { 86 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A, 87 (__v32qi) __B); 88 } 89 90 #ifdef __OPTIMIZE__ 91 extern __inline __m256i 92 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 93 _mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C) 94 { 95 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A, 96 (__v32qi) __B, 97 __C); 98 } 99 100 extern __inline __m256i 101 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 102 _mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C) 103 { 104 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A, 105 (__v32qi) __B, __C); 106 } 107 #else 108 #define _mm256_gf2p8affineinv_epi64_epi8(A, B, C) \ 109 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ 110 (__v32qi)(__m256i)(B), \ 111 (int)(C))) 112 #define _mm256_gf2p8affine_epi64_epi8(A, B, C) \ 113 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A), \ 114 ( __v32qi)(__m256i)(B), (int)(C))) 115 #endif 116 117 #ifdef __DISABLE_GFNIAVX__ 118 #undef __DISABLE_GFNIAVX__ 119 #pragma GCC pop_options 120 #endif /* __GFNIAVX__ */ 121 122 #if !defined(__GFNI__) || !defined(__AVX512VL__) 123 #pragma GCC push_options 124 #pragma GCC target("gfni,avx512vl") 125 #define __DISABLE_GFNIAVX512VL__ 126 #endif /* __GFNIAVX512VL__ */ 127 128 extern __inline __m128i 129 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 130 _mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D) 131 { 132 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C, 133 (__v16qi) __D, 134 (__v16qi)__A, __B); 135 } 136 137 extern __inline __m128i 138 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 139 _mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C) 140 { 141 return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B, 142 (__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A); 143 } 144 145 #ifdef __OPTIMIZE__ 146 extern __inline __m128i 147 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 148 _mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C, 149 __m128i __D, const int __E) 150 { 151 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C, 152 (__v16qi) __D, 153 __E, 154 (__v16qi)__A, 155 __B); 156 } 157 158 extern __inline __m128i 159 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 160 _mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C, 161 const int __D) 162 { 163 return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B, 164 (__v16qi) __C, __D, 165 (__v16qi) _mm_setzero_si128 (), 166 __A); 167 } 168 169 extern __inline __m128i 170 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 171 _mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C, 172 __m128i __D, const int __E) 173 { 174 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C, 175 (__v16qi) __D, __E, (__v16qi)__A, __B); 176 } 177 178 extern __inline __m128i 179 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 180 _mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C, 181 const int __D) 182 { 183 return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B, 184 (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A); 185 } 186 #else 187 #define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 188 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \ 189 (__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D), \ 190 (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B))) 191 #define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 192 ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask( \ 193 (__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C), \ 194 (int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \ 195 (__mmask16)(A))) 196 #define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 197 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\ 198 (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B))) 199 #define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 200 ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\ 201 (__v16qi)(__m128i)(C), (int)(D), \ 202 (__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A))) 203 #endif 204 205 #ifdef __DISABLE_GFNIAVX512VL__ 206 #undef __DISABLE_GFNIAVX512VL__ 207 #pragma GCC pop_options 208 #endif /* __GFNIAVX512VL__ */ 209 210 #if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) 211 #pragma GCC push_options 212 #pragma GCC target("gfni,avx512vl,avx512bw") 213 #define __DISABLE_GFNIAVX512VLBW__ 214 #endif /* __GFNIAVX512VLBW__ */ 215 216 extern __inline __m256i 217 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 218 _mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C, 219 __m256i __D) 220 { 221 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C, 222 (__v32qi) __D, 223 (__v32qi)__A, __B); 224 } 225 226 extern __inline __m256i 227 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 228 _mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C) 229 { 230 return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B, 231 (__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A); 232 } 233 234 #ifdef __OPTIMIZE__ 235 extern __inline __m256i 236 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 237 _mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B, 238 __m256i __C, __m256i __D, const int __E) 239 { 240 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C, 241 (__v32qi) __D, 242 __E, 243 (__v32qi)__A, 244 __B); 245 } 246 247 extern __inline __m256i 248 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 249 _mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B, 250 __m256i __C, const int __D) 251 { 252 return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B, 253 (__v32qi) __C, __D, 254 (__v32qi) _mm256_setzero_si256 (), __A); 255 } 256 257 extern __inline __m256i 258 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 259 _mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C, 260 __m256i __D, const int __E) 261 { 262 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C, 263 (__v32qi) __D, 264 __E, 265 (__v32qi)__A, 266 __B); 267 } 268 269 extern __inline __m256i 270 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 271 _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B, 272 __m256i __C, const int __D) 273 { 274 return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B, 275 (__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A); 276 } 277 #else 278 #define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 279 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \ 280 (__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E), \ 281 (__v32qi)(__m256i)(A), (__mmask32)(B))) 282 #define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 283 ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask( \ 284 (__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D), \ 285 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A))) 286 #define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 287 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\ 288 (__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B))) 289 #define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 290 ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\ 291 (__v32qi)(__m256i)(C), (int)(D), \ 292 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A))) 293 #endif 294 295 #ifdef __DISABLE_GFNIAVX512VLBW__ 296 #undef __DISABLE_GFNIAVX512VLBW__ 297 #pragma GCC pop_options 298 #endif /* __GFNIAVX512VLBW__ */ 299 300 #if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__) 301 #pragma GCC push_options 302 #pragma GCC target("gfni,avx512f,avx512bw") 303 #define __DISABLE_GFNIAVX512FBW__ 304 #endif /* __GFNIAVX512FBW__ */ 305 306 extern __inline __m512i 307 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 308 _mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 309 __m512i __D) 310 { 311 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C, 312 (__v64qi) __D, (__v64qi)__A, __B); 313 } 314 315 extern __inline __m512i 316 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 317 _mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C) 318 { 319 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B, 320 (__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A); 321 } 322 extern __inline __m512i 323 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 324 _mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B) 325 { 326 return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A, 327 (__v64qi) __B); 328 } 329 330 #ifdef __OPTIMIZE__ 331 extern __inline __m512i 332 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 333 _mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 334 __m512i __D, const int __E) 335 { 336 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C, 337 (__v64qi) __D, 338 __E, 339 (__v64qi)__A, 340 __B); 341 } 342 343 extern __inline __m512i 344 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 345 _mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B, 346 __m512i __C, const int __D) 347 { 348 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B, 349 (__v64qi) __C, __D, 350 (__v64qi) _mm512_setzero_si512 (), __A); 351 } 352 353 extern __inline __m512i 354 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 355 _mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C) 356 { 357 return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A, 358 (__v64qi) __B, __C); 359 } 360 361 extern __inline __m512i 362 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 363 _mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C, 364 __m512i __D, const int __E) 365 { 366 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C, 367 (__v64qi) __D, __E, (__v64qi)__A, __B); 368 } 369 370 extern __inline __m512i 371 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 372 _mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C, 373 const int __D) 374 { 375 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B, 376 (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A); 377 } 378 extern __inline __m512i 379 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 380 _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C) 381 { 382 return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A, 383 (__v64qi) __B, __C); 384 } 385 #else 386 #define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) \ 387 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ 388 (__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E), \ 389 (__v64qi)(__m512i)(A), (__mmask64)(B))) 390 #define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \ 391 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask( \ 392 (__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D), \ 393 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) 394 #define _mm512_gf2p8affineinv_epi64_epi8(A, B, C) \ 395 ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ( \ 396 (__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C))) 397 #define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \ 398 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\ 399 (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B))) 400 #define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D) \ 401 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\ 402 (__v64qi)(__m512i)(C), (int)(D), \ 403 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A))) 404 #define _mm512_gf2p8affine_epi64_epi8(A, B, C) \ 405 ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A), \ 406 (__v64qi)(__m512i)(B), (int)(C))) 407 #endif 408 409 #ifdef __DISABLE_GFNIAVX512FBW__ 410 #undef __DISABLE_GFNIAVX512FBW__ 411 #pragma GCC pop_options 412 #endif /* __GFNIAVX512FBW__ */ 413 414 #endif /* _GFNIINTRIN_H_INCLUDED */ 415