1 /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 11 #endif 12 13 #ifndef __AVX512ERINTRIN_H 14 #define __AVX512ERINTRIN_H 15 16 /* exp2a23 */ 17 #define _mm512_exp2a23_round_pd(A, R) \ 18 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 19 (__v8df)_mm512_setzero_pd(), \ 20 (__mmask8)-1, (int)(R))) 21 22 #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ 23 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 24 (__v8df)(__m512d)(S), (__mmask8)(M), \ 25 (int)(R))) 26 27 #define _mm512_maskz_exp2a23_round_pd(M, A, R) \ 28 ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 29 (__v8df)_mm512_setzero_pd(), \ 30 (__mmask8)(M), (int)(R))) 31 32 #define _mm512_exp2a23_pd(A) \ 33 _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) 34 35 #define _mm512_mask_exp2a23_pd(S, M, A) \ 36 _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 37 38 #define _mm512_maskz_exp2a23_pd(M, A) \ 39 _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 40 41 #define _mm512_exp2a23_round_ps(A, R) \ 42 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 43 (__v16sf)_mm512_setzero_ps(), \ 44 (__mmask16)-1, (int)(R))) 45 46 #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ 47 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 48 (__v16sf)(__m512)(S), (__mmask16)(M), \ 49 (int)(R))) 50 51 #define _mm512_maskz_exp2a23_round_ps(M, A, R) \ 52 ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 53 (__v16sf)_mm512_setzero_ps(), \ 54 (__mmask16)(M), (int)(R))) 55 56 #define _mm512_exp2a23_ps(A) \ 57 _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) 58 59 #define _mm512_mask_exp2a23_ps(S, M, A) \ 60 _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 61 62 #define _mm512_maskz_exp2a23_ps(M, A) \ 63 _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 64 65 /* rsqrt28 */ 66 #define _mm512_rsqrt28_round_pd(A, R) \ 67 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 68 (__v8df)_mm512_setzero_pd(), \ 69 (__mmask8)-1, (int)(R))) 70 71 #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ 72 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 73 (__v8df)(__m512d)(S), (__mmask8)(M), \ 74 (int)(R))) 75 76 #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ 77 ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 78 (__v8df)_mm512_setzero_pd(), \ 79 (__mmask8)(M), (int)(R))) 80 81 #define _mm512_rsqrt28_pd(A) \ 82 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 83 84 #define _mm512_mask_rsqrt28_pd(S, M, A) \ 85 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 86 87 #define _mm512_maskz_rsqrt28_pd(M, A) \ 88 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 89 90 #define _mm512_rsqrt28_round_ps(A, R) \ 91 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 92 (__v16sf)_mm512_setzero_ps(), \ 93 (__mmask16)-1, (int)(R))) 94 95 #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ 96 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 97 (__v16sf)(__m512)(S), (__mmask16)(M), \ 98 (int)(R))) 99 100 #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ 101 ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 102 (__v16sf)_mm512_setzero_ps(), \ 103 (__mmask16)(M), (int)(R))) 104 105 #define _mm512_rsqrt28_ps(A) \ 106 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 107 108 #define _mm512_mask_rsqrt28_ps(S, M, A) \ 109 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) 110 111 #define _mm512_maskz_rsqrt28_ps(M, A) \ 112 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 113 114 #define _mm_rsqrt28_round_ss(A, B, R) \ 115 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 116 (__v4sf)(__m128)(B), \ 117 (__v4sf)_mm_setzero_ps(), \ 118 (__mmask8)-1, (int)(R))) 119 120 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ 121 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 122 (__v4sf)(__m128)(B), \ 123 (__v4sf)(__m128)(S), \ 124 (__mmask8)(M), (int)(R))) 125 126 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ 127 ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 128 (__v4sf)(__m128)(B), \ 129 (__v4sf)_mm_setzero_ps(), \ 130 (__mmask8)(M), (int)(R))) 131 132 #define _mm_rsqrt28_ss(A, B) \ 133 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 134 135 #define _mm_mask_rsqrt28_ss(S, M, A, B) \ 136 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 137 138 #define _mm_maskz_rsqrt28_ss(M, A, B) \ 139 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 140 141 #define _mm_rsqrt28_round_sd(A, B, R) \ 142 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 143 (__v2df)(__m128d)(B), \ 144 (__v2df)_mm_setzero_pd(), \ 145 (__mmask8)-1, (int)(R))) 146 147 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ 148 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 149 (__v2df)(__m128d)(B), \ 150 (__v2df)(__m128d)(S), \ 151 (__mmask8)(M), (int)(R))) 152 153 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ 154 ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 155 (__v2df)(__m128d)(B), \ 156 (__v2df)_mm_setzero_pd(), \ 157 (__mmask8)(M), (int)(R))) 158 159 #define _mm_rsqrt28_sd(A, B) \ 160 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 161 162 #define _mm_mask_rsqrt28_sd(S, M, A, B) \ 163 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 164 165 #define _mm_maskz_rsqrt28_sd(M, A, B) \ 166 _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 167 168 /* rcp28 */ 169 #define _mm512_rcp28_round_pd(A, R) \ 170 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 171 (__v8df)_mm512_setzero_pd(), \ 172 (__mmask8)-1, (int)(R))) 173 174 #define _mm512_mask_rcp28_round_pd(S, M, A, R) \ 175 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 176 (__v8df)(__m512d)(S), (__mmask8)(M), \ 177 (int)(R))) 178 179 #define _mm512_maskz_rcp28_round_pd(M, A, R) \ 180 ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 181 (__v8df)_mm512_setzero_pd(), \ 182 (__mmask8)(M), (int)(R))) 183 184 #define _mm512_rcp28_pd(A) \ 185 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 186 187 #define _mm512_mask_rcp28_pd(S, M, A) \ 188 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 189 190 #define _mm512_maskz_rcp28_pd(M, A) \ 191 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 192 193 #define _mm512_rcp28_round_ps(A, R) \ 194 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 195 (__v16sf)_mm512_setzero_ps(), \ 196 (__mmask16)-1, (int)(R))) 197 198 #define _mm512_mask_rcp28_round_ps(S, M, A, R) \ 199 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 200 (__v16sf)(__m512)(S), (__mmask16)(M), \ 201 (int)(R))) 202 203 #define _mm512_maskz_rcp28_round_ps(M, A, R) \ 204 ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 205 (__v16sf)_mm512_setzero_ps(), \ 206 (__mmask16)(M), (int)(R))) 207 208 #define _mm512_rcp28_ps(A) \ 209 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 210 211 #define _mm512_mask_rcp28_ps(S, M, A) \ 212 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 213 214 #define _mm512_maskz_rcp28_ps(M, A) \ 215 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 216 217 #define _mm_rcp28_round_ss(A, B, R) \ 218 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 219 (__v4sf)(__m128)(B), \ 220 (__v4sf)_mm_setzero_ps(), \ 221 (__mmask8)-1, (int)(R))) 222 223 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ 224 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 225 (__v4sf)(__m128)(B), \ 226 (__v4sf)(__m128)(S), \ 227 (__mmask8)(M), (int)(R))) 228 229 #define _mm_maskz_rcp28_round_ss(M, A, B, R) \ 230 ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 231 (__v4sf)(__m128)(B), \ 232 (__v4sf)_mm_setzero_ps(), \ 233 (__mmask8)(M), (int)(R))) 234 235 #define _mm_rcp28_ss(A, B) \ 236 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 237 238 #define _mm_mask_rcp28_ss(S, M, A, B) \ 239 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 240 241 #define _mm_maskz_rcp28_ss(M, A, B) \ 242 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 243 244 #define _mm_rcp28_round_sd(A, B, R) \ 245 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 246 (__v2df)(__m128d)(B), \ 247 (__v2df)_mm_setzero_pd(), \ 248 (__mmask8)-1, (int)(R))) 249 250 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ 251 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 252 (__v2df)(__m128d)(B), \ 253 (__v2df)(__m128d)(S), \ 254 (__mmask8)(M), (int)(R))) 255 256 #define _mm_maskz_rcp28_round_sd(M, A, B, R) \ 257 ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 258 (__v2df)(__m128d)(B), \ 259 (__v2df)_mm_setzero_pd(), \ 260 (__mmask8)(M), (int)(R))) 261 262 #define _mm_rcp28_sd(A, B) \ 263 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 264 265 #define _mm_mask_rcp28_sd(S, M, A, B) \ 266 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 267 268 #define _mm_maskz_rcp28_sd(M, A, B) \ 269 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 270 271 #endif /* __AVX512ERINTRIN_H */ 272