1 /* Copyright (C) 2013-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #ifndef _IMMINTRIN_H_INCLUDED 25 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 26 #endif 27 28 #ifndef _AVX512ERINTRIN_H_INCLUDED 29 #define _AVX512ERINTRIN_H_INCLUDED 30 31 #ifndef __AVX512ER__ 32 #pragma GCC push_options 33 #pragma GCC target("avx512er") 34 #define __DISABLE_AVX512ER__ 35 #endif /* __AVX512ER__ */ 36 37 /* Internal data types for implementing the intrinsics. */ 38 typedef double __v8df __attribute__ ((__vector_size__ (64))); 39 typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40 41 /* The Intel API is flexible enough that we must allow aliasing with other 42 vector types, and their scalar components. */ 43 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 44 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 45 46 typedef unsigned char __mmask8; 47 typedef unsigned short __mmask16; 48 49 #ifdef __OPTIMIZE__ 50 extern __inline __m512d 51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 52 _mm512_exp2a23_round_pd (__m512d __A, int __R) 53 { 54 __m512d __W; 55 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 56 (__v8df) __W, 57 (__mmask8) -1, __R); 58 } 59 60 extern __inline __m512d 61 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 62 _mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 63 { 64 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 65 (__v8df) __W, 66 (__mmask8) __U, __R); 67 } 68 69 extern __inline __m512d 70 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 71 _mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R) 72 { 73 return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A, 74 (__v8df) _mm512_setzero_pd (), 75 (__mmask8) __U, __R); 76 } 77 78 extern __inline __m512 79 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 80 _mm512_exp2a23_round_ps (__m512 __A, int __R) 81 { 82 __m512 __W; 83 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 84 (__v16sf) __W, 85 (__mmask16) -1, __R); 86 } 87 88 extern __inline __m512 89 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 90 _mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 91 { 92 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 93 (__v16sf) __W, 94 (__mmask16) __U, __R); 95 } 96 97 extern __inline __m512 98 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 99 _mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R) 100 { 101 return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A, 102 (__v16sf) _mm512_setzero_ps (), 103 (__mmask16) __U, __R); 104 } 105 106 extern __inline __m512d 107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 108 _mm512_rcp28_round_pd (__m512d __A, int __R) 109 { 110 __m512d __W; 111 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 112 (__v8df) __W, 113 (__mmask8) -1, __R); 114 } 115 116 extern __inline __m512d 117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 118 _mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 119 { 120 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 121 (__v8df) __W, 122 (__mmask8) __U, __R); 123 } 124 125 extern __inline __m512d 126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 127 _mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R) 128 { 129 return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A, 130 (__v8df) _mm512_setzero_pd (), 131 (__mmask8) __U, __R); 132 } 133 134 extern __inline __m512 135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136 _mm512_rcp28_round_ps (__m512 __A, int __R) 137 { 138 __m512 __W; 139 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 140 (__v16sf) __W, 141 (__mmask16) -1, __R); 142 } 143 144 extern __inline __m512 145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 146 _mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 147 { 148 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 149 (__v16sf) __W, 150 (__mmask16) __U, __R); 151 } 152 153 extern __inline __m512 154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 155 _mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R) 156 { 157 return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A, 158 (__v16sf) _mm512_setzero_ps (), 159 (__mmask16) __U, __R); 160 } 161 162 extern __inline __m128d 163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 164 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) 165 { 166 return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B, 167 (__v2df) __A, 168 __R); 169 } 170 171 extern __inline __m128 172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 173 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) 174 { 175 return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B, 176 (__v4sf) __A, 177 __R); 178 } 179 180 extern __inline __m512d 181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 182 _mm512_rsqrt28_round_pd (__m512d __A, int __R) 183 { 184 __m512d __W; 185 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 186 (__v8df) __W, 187 (__mmask8) -1, __R); 188 } 189 190 extern __inline __m512d 191 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 192 _mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R) 193 { 194 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 195 (__v8df) __W, 196 (__mmask8) __U, __R); 197 } 198 199 extern __inline __m512d 200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 201 _mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R) 202 { 203 return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A, 204 (__v8df) _mm512_setzero_pd (), 205 (__mmask8) __U, __R); 206 } 207 208 extern __inline __m512 209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 210 _mm512_rsqrt28_round_ps (__m512 __A, int __R) 211 { 212 __m512 __W; 213 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 214 (__v16sf) __W, 215 (__mmask16) -1, __R); 216 } 217 218 extern __inline __m512 219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 220 _mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R) 221 { 222 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 223 (__v16sf) __W, 224 (__mmask16) __U, __R); 225 } 226 227 extern __inline __m512 228 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 229 _mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R) 230 { 231 return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A, 232 (__v16sf) _mm512_setzero_ps (), 233 (__mmask16) __U, __R); 234 } 235 236 extern __inline __m128d 237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 238 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) 239 { 240 return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B, 241 (__v2df) __A, 242 __R); 243 } 244 245 extern __inline __m128 246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 247 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) 248 { 249 return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B, 250 (__v4sf) __A, 251 __R); 252 } 253 254 #else 255 #define _mm512_exp2a23_round_pd(A, C) \ 256 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 257 258 #define _mm512_mask_exp2a23_round_pd(W, U, A, C) \ 259 __builtin_ia32_exp2pd_mask(A, W, U, C) 260 261 #define _mm512_maskz_exp2a23_round_pd(U, A, C) \ 262 __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 263 264 #define _mm512_exp2a23_round_ps(A, C) \ 265 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 266 267 #define _mm512_mask_exp2a23_round_ps(W, U, A, C) \ 268 __builtin_ia32_exp2ps_mask(A, W, U, C) 269 270 #define _mm512_maskz_exp2a23_round_ps(U, A, C) \ 271 __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 272 273 #define _mm512_rcp28_round_pd(A, C) \ 274 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 275 276 #define _mm512_mask_rcp28_round_pd(W, U, A, C) \ 277 __builtin_ia32_rcp28pd_mask(A, W, U, C) 278 279 #define _mm512_maskz_rcp28_round_pd(U, A, C) \ 280 __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 281 282 #define _mm512_rcp28_round_ps(A, C) \ 283 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 284 285 #define _mm512_mask_rcp28_round_ps(W, U, A, C) \ 286 __builtin_ia32_rcp28ps_mask(A, W, U, C) 287 288 #define _mm512_maskz_rcp28_round_ps(U, A, C) \ 289 __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 290 291 #define _mm512_rsqrt28_round_pd(A, C) \ 292 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C) 293 294 #define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \ 295 __builtin_ia32_rsqrt28pd_mask(A, W, U, C) 296 297 #define _mm512_maskz_rsqrt28_round_pd(U, A, C) \ 298 __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 299 300 #define _mm512_rsqrt28_round_ps(A, C) \ 301 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C) 302 303 #define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \ 304 __builtin_ia32_rsqrt28ps_mask(A, W, U, C) 305 306 #define _mm512_maskz_rsqrt28_round_ps(U, A, C) \ 307 __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 308 309 #define _mm_rcp28_round_sd(A, B, R) \ 310 __builtin_ia32_rcp28sd_round(A, B, R) 311 312 #define _mm_rcp28_round_ss(A, B, R) \ 313 __builtin_ia32_rcp28ss_round(A, B, R) 314 315 #define _mm_rsqrt28_round_sd(A, B, R) \ 316 __builtin_ia32_rsqrt28sd_round(A, B, R) 317 318 #define _mm_rsqrt28_round_ss(A, B, R) \ 319 __builtin_ia32_rsqrt28ss_round(A, B, R) 320 321 #endif 322 323 #define _mm512_exp2a23_pd(A) \ 324 _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION) 325 326 #define _mm512_mask_exp2a23_pd(W, U, A) \ 327 _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 328 329 #define _mm512_maskz_exp2a23_pd(U, A) \ 330 _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 331 332 #define _mm512_exp2a23_ps(A) \ 333 _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION) 334 335 #define _mm512_mask_exp2a23_ps(W, U, A) \ 336 _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 337 338 #define _mm512_maskz_exp2a23_ps(U, A) \ 339 _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 340 341 #define _mm512_rcp28_pd(A) \ 342 _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION) 343 344 #define _mm512_mask_rcp28_pd(W, U, A) \ 345 _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 346 347 #define _mm512_maskz_rcp28_pd(U, A) \ 348 _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 349 350 #define _mm512_rcp28_ps(A) \ 351 _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION) 352 353 #define _mm512_mask_rcp28_ps(W, U, A) \ 354 _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 355 356 #define _mm512_maskz_rcp28_ps(U, A) \ 357 _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 358 359 #define _mm512_rsqrt28_pd(A) \ 360 _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION) 361 362 #define _mm512_mask_rsqrt28_pd(W, U, A) \ 363 _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION) 364 365 #define _mm512_maskz_rsqrt28_pd(U, A) \ 366 _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION) 367 368 #define _mm512_rsqrt28_ps(A) \ 369 _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION) 370 371 #define _mm512_mask_rsqrt28_ps(W, U, A) \ 372 _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION) 373 374 #define _mm512_maskz_rsqrt28_ps(U, A) \ 375 _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) 376 377 #define _mm_rcp28_sd(A, B) \ 378 __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) 379 380 #define _mm_rcp28_ss(A, B) \ 381 __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) 382 383 #define _mm_rsqrt28_sd(A, B) \ 384 __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION) 385 386 #define _mm_rsqrt28_ss(A, B) \ 387 __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION) 388 389 #ifdef __DISABLE_AVX512ER__ 390 #undef __DISABLE_AVX512ER__ 391 #pragma GCC pop_options 392 #endif /* __DISABLE_AVX512ER__ */ 393 394 #endif /* _AVX512ERINTRIN_H_INCLUDED */ 395