1 // Tencent is pleased to support the open source community by making ncnn available. 2 // 3 // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. 4 // 5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except 6 // in compliance with the License. You may obtain a copy of the License at 7 // 8 // https://opensource.org/licenses/BSD-3-Clause 9 // 10 // Unless required by applicable law or agreed to in writing, software distributed 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 // specific language governing permissions and limitations under the License. 14 15 #ifndef RVV_MATHFUN_H 16 #define RVV_MATHFUN_H 17 18 #ifdef RVV_SPEC_0_7 19 #include "riscv_v_071_fix.h" 20 #else 21 #include <riscv_vector.h> 22 #endif 23 24 #define c_inv_mant_mask ~0x7f800000u 25 #define c_cephes_SQRTHF 0.707106781186547524 26 #define c_cephes_log_p0 7.0376836292E-2 27 #define c_cephes_log_p1 -1.1514610310E-1 28 #define c_cephes_log_p2 1.1676998740E-1 29 #define c_cephes_log_p3 -1.2420140846E-1 30 #define c_cephes_log_p4 +1.4249322787E-1 31 #define c_cephes_log_p5 -1.6668057665E-1 32 #define c_cephes_log_p6 +2.0000714765E-1 33 #define c_cephes_log_p7 -2.4999993993E-1 34 #define c_cephes_log_p8 +3.3333331174E-1 35 #define c_cephes_log_q1 -2.12194440e-4 36 #define c_cephes_log_q2 0.693359375 37 38 #define _RVV_FLOAT32_LOG_OP(LMUL, MLEN) \ 39 static inline vfloat32m##LMUL##_t log_ps(vfloat32m##LMUL##_t x, word_type vl) \ 40 { \ 41 x = vfmax_vf_f32m##LMUL(x, 0.f, vl); /* force flush to zero on denormal values */ \ 42 vbool##MLEN##_t invalid_mask = vmfle_vf_f32m##LMUL##_b##MLEN(x, 0.f, vl); \ 43 \ 44 vint32m##LMUL##_t ux = vreinterpret_v_f32m##LMUL##_i32m##LMUL(x); \ 45 \ 46 vint32m##LMUL##_t emm0 = vsra_vx_i32m##LMUL(ux, 23, vl); \ 47 \ 48 /* keep only the fractional part */ \ 49 ux = vand_vx_i32m##LMUL(ux, c_inv_mant_mask, vl); \ 50 ux = vor_vx_i32m##LMUL(ux, 1056964608 /* reinterpret_cast<int>(0.5) */, vl); \ 51 x = vreinterpret_v_i32m##LMUL##_f32m##LMUL(ux); \ 52 \ 53 emm0 = vsub_vx_i32m##LMUL(emm0, 0x7f, vl); \ 54 vfloat32m##LMUL##_t e = vfcvt_f_x_v_f32m##LMUL(emm0, vl); \ 55 \ 56 e = vfadd_vf_f32m##LMUL(e, 1.f, vl); \ 57 \ 58 /* part2: */ \ 59 /* if( x < SQRTHF ) { */ \ 60 /* e -= 1; */ \ 61 /* x = x + x - 1.0; */ \ 62 /* } else { x = x - 1.0; } */ \ 63 vbool##MLEN##_t mask = vmflt_vf_f32m##LMUL##_b##MLEN(x, c_cephes_SQRTHF, vl); \ 64 x = vfadd_vv_f32m##LMUL##_m(mask, x, x, x, vl); \ 65 x = vfsub_vf_f32m##LMUL(x, 1.f, vl); \ 66 e = vfsub_vf_f32m##LMUL##_m(mask, e, e, 1.f, vl); \ 67 \ 68 vfloat32m##LMUL##_t z = vfmul_vv_f32m##LMUL(x, x, vl); \ 69 \ 70 vfloat32m##LMUL##_t y = vfmul_vf_f32m##LMUL(x, c_cephes_log_p0, vl); \ 71 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p1, vl); \ 72 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 73 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p2, vl); \ 74 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 75 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p3, vl); \ 76 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 77 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p4, vl); \ 78 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 79 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p5, vl); \ 80 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 81 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p6, vl); \ 82 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 83 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p7, vl); \ 84 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 85 y = vfadd_vf_f32m##LMUL(y, c_cephes_log_p8, vl); \ 86 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 87 \ 88 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 89 \ 90 vfloat32m##LMUL##_t tmp = vfmul_vf_f32m##LMUL(e, c_cephes_log_q1, vl); \ 91 y = vfadd_vv_f32m##LMUL(y, tmp, vl); \ 92 \ 93 tmp = vfmul_vf_f32m##LMUL(z, 0.5f, vl); \ 94 y = vfsub_vv_f32m##LMUL(y, tmp, vl); \ 95 \ 96 tmp = vfmul_vf_f32m##LMUL(e, c_cephes_log_q2, vl); \ 97 x = vfadd_vv_f32m##LMUL(x, y, vl); \ 98 x = vfadd_vv_f32m##LMUL(x, tmp, vl); \ 99 /* negative arg will be NAN */ \ 100 vuint32m##LMUL##_t xtmp = vreinterpret_v_f32m##LMUL##_u32m##LMUL(x); \ 101 x = vreinterpret_v_u32m##LMUL##_f32m##LMUL(vor_vx_u32m##LMUL##_m(invalid_mask, xtmp, xtmp, 0xffffffff, vl)); \ 102 return x; \ 103 } 104 105 _RVV_FLOAT32_LOG_OP(1, 32) 106 _RVV_FLOAT32_LOG_OP(2, 16) 107 _RVV_FLOAT32_LOG_OP(4, 8) 108 _RVV_FLOAT32_LOG_OP(8, 4) 109 110 #define c_exp_hi 88.3762626647949f 111 #define c_exp_lo -88.3762626647949f 112 113 #define c_cephes_LOG2EF 1.44269504088896341 114 #define c_cephes_exp_C1 0.693359375 115 #define c_cephes_exp_C2 -2.12194440e-4 116 117 #define c_cephes_exp_p0 1.9875691500E-4 118 #define c_cephes_exp_p1 1.3981999507E-3 119 #define c_cephes_exp_p2 8.3334519073E-3 120 #define c_cephes_exp_p3 4.1665795894E-2 121 #define c_cephes_exp_p4 1.6666665459E-1 122 #define c_cephes_exp_p5 5.0000001201E-1 123 124 #define _RVV_FLOAT32_EXP_OP(LMUL, MLEN) \ 125 static inline vfloat32m##LMUL##_t exp_ps(vfloat32m##LMUL##_t x, word_type vl) \ 126 { \ 127 vfloat32m##LMUL##_t tmp, fx; \ 128 \ 129 x = vfmin_vf_f32m##LMUL(x, c_exp_hi, vl); \ 130 x = vfmax_vf_f32m##LMUL(x, c_exp_lo, vl); \ 131 \ 132 /* express exp(x) as exp(g + n*log(2)) */ \ 133 fx = vfmacc_vf_f32m##LMUL(vfmv_v_f_f32m##LMUL(0.5f, vl), c_cephes_LOG2EF, x, vl); \ 134 \ 135 /* perform a floorf */ \ 136 tmp = vfcvt_f_x_v_f32m##LMUL(vfcvt_x_f_v_i32m##LMUL(fx, vl), vl); \ 137 \ 138 /* if greater, substract 1 */ \ 139 vbool##MLEN##_t mask = vmfgt_vv_f32m##LMUL##_b##MLEN(tmp, fx, vl); \ 140 fx = vfsub_vf_f32m##LMUL##_m(mask, tmp, tmp, 1.f, vl); \ 141 \ 142 tmp = vfmul_vf_f32m##LMUL(fx, c_cephes_exp_C1, vl); \ 143 vfloat32m##LMUL##_t z = vfmul_vf_f32m##LMUL(fx, c_cephes_exp_C2, vl); \ 144 x = vfsub_vv_f32m##LMUL(x, tmp, vl); \ 145 x = vfsub_vv_f32m##LMUL(x, z, vl); \ 146 \ 147 vfloat32m##LMUL##_t y = vfmul_vf_f32m##LMUL(x, c_cephes_exp_p0, vl); \ 148 z = vfmul_vv_f32m##LMUL(x, x, vl); \ 149 \ 150 y = vfadd_vf_f32m##LMUL(y, c_cephes_exp_p1, vl); \ 151 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 152 y = vfadd_vf_f32m##LMUL(y, c_cephes_exp_p2, vl); \ 153 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 154 y = vfadd_vf_f32m##LMUL(y, c_cephes_exp_p3, vl); \ 155 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 156 y = vfadd_vf_f32m##LMUL(y, c_cephes_exp_p4, vl); \ 157 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 158 y = vfadd_vf_f32m##LMUL(y, c_cephes_exp_p5, vl); \ 159 \ 160 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 161 y = vfadd_vv_f32m##LMUL(y, x, vl); \ 162 y = vfadd_vf_f32m##LMUL(y, 1.f, vl); \ 163 \ 164 /* build 2^n */ \ 165 vint32m##LMUL##_t mm = vfcvt_x_f_v_i32m##LMUL(fx, vl); \ 166 mm = vadd_vx_i32m##LMUL(mm, 0x7f, vl); \ 167 mm = vsll_vx_i32m##LMUL(mm, 23, vl); \ 168 vfloat32m##LMUL##_t pow2n = vreinterpret_v_i32m##LMUL##_f32m##LMUL(mm); \ 169 \ 170 y = vfmul_vv_f32m##LMUL(y, pow2n, vl); \ 171 return y; \ 172 } 173 174 _RVV_FLOAT32_EXP_OP(1, 32) 175 _RVV_FLOAT32_EXP_OP(2, 16) 176 _RVV_FLOAT32_EXP_OP(4, 8) 177 _RVV_FLOAT32_EXP_OP(8, 4) 178 179 #define c_minus_cephes_DP1 -0.78515625 180 #define c_minus_cephes_DP2 -2.4187564849853515625e-4 181 #define c_minus_cephes_DP3 -3.77489497744594108e-8 182 #define c_sincof_p0 -1.9515295891E-4 183 #define c_sincof_p1 8.3321608736E-3 184 #define c_sincof_p2 -1.6666654611E-1 185 #define c_coscof_p0 2.443315711809948E-005 186 #define c_coscof_p1 -1.388731625493765E-003 187 #define c_coscof_p2 4.166664568298827E-002 188 #define c_cephes_FOPI 1.27323954473516 // 4 / M_PI 189 190 #define _RVV_FLOAT32_SINCOS_OP(LMUL, MLEN) \ 191 static inline void sincos_ps(vfloat32m##LMUL##_t x, vfloat32m##LMUL##_t* ysin, vfloat32m##LMUL##_t* ycos, word_type vl) \ 192 { \ 193 /* any x */ \ 194 vfloat32m##LMUL##_t xmm1, xmm2, xmm3, y; \ 195 \ 196 vuint32m##LMUL##_t emm2; \ 197 \ 198 vbool##MLEN##_t sign_mask_sin, sign_mask_cos; \ 199 sign_mask_sin = vmflt_vf_f32m##LMUL##_b##MLEN(x, 0.f, vl); \ 200 x = vfsgnj_vf_f32m##LMUL(x, 1.f, vl); \ 201 \ 202 /* scale by 4/Pi */ \ 203 y = vfmul_vf_f32m##LMUL(x, c_cephes_FOPI, vl); \ 204 \ 205 /* store the integer part of y in mm0 */ \ 206 emm2 = vfcvt_xu_f_v_u32m##LMUL(y, vl); \ 207 /* j=(j+1) & (~1) (see the cephes sources) */ \ 208 emm2 = vadd_vx_u32m##LMUL(emm2, 1, vl); \ 209 emm2 = vand_vx_u32m##LMUL(emm2, ~1, vl); \ 210 y = vfcvt_f_xu_v_f32m##LMUL(emm2, vl); \ 211 \ 212 /* get the polynom selection mask */ \ 213 /* there is one polynom for 0 <= x <= Pi/4 */ \ 214 /* and another one for Pi/4<x<=Pi/2 */ \ 215 /* */ \ 216 /* Both branches will be computed. */ \ 217 vbool##MLEN##_t poly_mask = vmsne_vx_u32m##LMUL##_b##MLEN(vand_vx_u32m##LMUL(emm2, 2, vl), 0, vl); \ 218 \ 219 /* The magic pass: "Extended precision modular arithmetic" */ \ 220 /* x = ((x - y * DP1) - y * DP2) - y * DP3; */ \ 221 xmm1 = vfmul_vf_f32m##LMUL(y, c_minus_cephes_DP1, vl); \ 222 xmm2 = vfmul_vf_f32m##LMUL(y, c_minus_cephes_DP2, vl); \ 223 xmm3 = vfmul_vf_f32m##LMUL(y, c_minus_cephes_DP3, vl); \ 224 x = vfadd_vv_f32m##LMUL(x, xmm1, vl); \ 225 x = vfadd_vv_f32m##LMUL(x, xmm2, vl); \ 226 x = vfadd_vv_f32m##LMUL(x, xmm3, vl); \ 227 \ 228 sign_mask_sin = vmxor_mm_b##MLEN(sign_mask_sin, vmsne_vx_u32m##LMUL##_b##MLEN(vand_vx_u32m##LMUL(emm2, 4, vl), 0, vl), vl); \ 229 sign_mask_cos = vmsne_vx_u32m##LMUL##_b##MLEN(vand_vx_u32m##LMUL(vsub_vx_u32m##LMUL(emm2, 2, vl), 4, vl), 0, vl); \ 230 \ 231 /* Evaluate the first polynom (0 <= x <= Pi/4) in y1, */ \ 232 /* and the second polynom (Pi/4 <= x <= 0) in y2 */ \ 233 vfloat32m##LMUL##_t z = vfmul_vv_f32m##LMUL(x, x, vl); \ 234 vfloat32m##LMUL##_t y1, y2; \ 235 \ 236 y1 = vfmul_vf_f32m##LMUL(z, c_coscof_p0, vl); \ 237 y2 = vfmul_vf_f32m##LMUL(z, c_sincof_p0, vl); \ 238 y1 = vfadd_vf_f32m##LMUL(y1, c_coscof_p1, vl); \ 239 y2 = vfadd_vf_f32m##LMUL(y2, c_sincof_p1, vl); \ 240 y1 = vfmul_vv_f32m##LMUL(y1, z, vl); \ 241 y2 = vfmul_vv_f32m##LMUL(y2, z, vl); \ 242 y1 = vfadd_vf_f32m##LMUL(y1, c_coscof_p2, vl); \ 243 y2 = vfadd_vf_f32m##LMUL(y2, c_sincof_p2, vl); \ 244 y1 = vfmul_vv_f32m##LMUL(y1, z, vl); \ 245 y2 = vfmul_vv_f32m##LMUL(y2, z, vl); \ 246 y1 = vfmul_vv_f32m##LMUL(y1, z, vl); \ 247 y2 = vfmul_vv_f32m##LMUL(y2, x, vl); \ 248 y1 = vfsub_vv_f32m##LMUL(y1, vfmul_vf_f32m##LMUL(z, 0.5f, vl), vl); \ 249 y2 = vfadd_vv_f32m##LMUL(y2, x, vl); \ 250 y1 = vfadd_vf_f32m##LMUL(y1, 1.f, vl); \ 251 \ 252 /* select the correct result from the two polynoms */ \ 253 vfloat32m##LMUL##_t ys = vmerge_vvm_f32m##LMUL(poly_mask, y2, y1, vl); \ 254 vfloat32m##LMUL##_t yc = vmerge_vvm_f32m##LMUL(poly_mask, y1, y2, vl); \ 255 *ysin = vmerge_vvm_f32m##LMUL(sign_mask_sin, ys, vfneg_v_f32m##LMUL(ys, vl), vl); \ 256 *ycos = vmerge_vvm_f32m##LMUL(sign_mask_cos, vfneg_v_f32m##LMUL(yc, vl), yc, vl); \ 257 } 258 259 _RVV_FLOAT32_SINCOS_OP(1, 32) 260 _RVV_FLOAT32_SINCOS_OP(2, 16) 261 _RVV_FLOAT32_SINCOS_OP(4, 8) 262 _RVV_FLOAT32_SINCOS_OP(8, 4) 263 264 #define _RVV_FLOAT32_SIN_OP(LMUL, MLEN) \ 265 static inline vfloat32m##LMUL##_t sin_ps(vfloat32m##LMUL##_t x, word_type vl) \ 266 { \ 267 vfloat32m##LMUL##_t ysin, ycos; \ 268 sincos_ps(x, &ysin, &ycos, vl); \ 269 return ysin; \ 270 } 271 272 _RVV_FLOAT32_SIN_OP(1, 32) 273 _RVV_FLOAT32_SIN_OP(2, 16) 274 _RVV_FLOAT32_SIN_OP(4, 8) 275 _RVV_FLOAT32_SIN_OP(8, 4) 276 277 #define _RVV_FLOAT32_COS_OP(LMUL, MLEN) \ 278 static inline vfloat32m##LMUL##_t cos_ps(vfloat32m##LMUL##_t x, word_type vl) \ 279 { \ 280 vfloat32m##LMUL##_t ysin, ycos; \ 281 sincos_ps(x, &ysin, &ycos, vl); \ 282 return ycos; \ 283 } 284 285 _RVV_FLOAT32_COS_OP(1, 32) 286 _RVV_FLOAT32_COS_OP(2, 16) 287 _RVV_FLOAT32_COS_OP(4, 8) 288 _RVV_FLOAT32_COS_OP(8, 4) 289 290 #define c_cephes_HALFMAXLOGF 44.014845935754205f 291 #define c_cephes_tanh_C1 0.625f 292 293 #define c_cephes_tanh_p0 -5.70498872745E-3 294 #define c_cephes_tanh_p1 +2.06390887954E-2 295 #define c_cephes_tanh_p2 -5.37397155531E-2 296 #define c_cephes_tanh_p3 +1.33314422036E-1 297 #define c_cephes_tanh_p4 -3.33332819422E-1 298 299 #define _RVV_FLOAT32_TANH_OP(LMUL, MLEN) \ 300 static inline vfloat32m##LMUL##_t tanh_ps(vfloat32m##LMUL##_t x, word_type vl) \ 301 { \ 302 vfloat32m##LMUL##_t x2 = vfsgnj_vf_f32m##LMUL(x, 1.f, vl); \ 303 \ 304 vbool##MLEN##_t mask_l = vmfge_vf_f32m##LMUL##_b##MLEN(x2, c_cephes_tanh_C1, vl); \ 305 vbool##MLEN##_t mask_l2 = vmfgt_vf_f32m##LMUL##_b##MLEN(x2, c_cephes_HALFMAXLOGF, vl); \ 306 \ 307 /* abs(x) >= 0.625 */ \ 308 vfloat32m##LMUL##_t exp_x_x = exp_ps(vfadd_vv_f32m##LMUL(x, x, vl), vl); \ 309 vfloat32m##LMUL##_t y0 = vfrsub_vf_f32m##LMUL(vfrdiv_vf_f32m##LMUL(vfadd_vf_f32m##LMUL(exp_x_x, 1.f, vl), 2.f, vl), 1.f, vl); \ 310 \ 311 /* abs(x) < 0.625 */ \ 312 /* z = x2 * x2; */ \ 313 /* z = */ \ 314 /* (((( -5.70498872745E-3 * z */ \ 315 /* + 2.06390887954E-2) * z */ \ 316 /* - 5.37397155531E-2) * z */ \ 317 /* + 1.33314422036E-1) * z */ \ 318 /* - 3.33332819422E-1) * z * x */ \ 319 /* + x; */ \ 320 vfloat32m##LMUL##_t z = vfmul_vv_f32m##LMUL(x, x, vl); \ 321 \ 322 vfloat32m##LMUL##_t y = vfmul_vf_f32m##LMUL(z, c_cephes_tanh_p0, vl); \ 323 y = vfadd_vf_f32m##LMUL(y, c_cephes_tanh_p1, vl); \ 324 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 325 y = vfadd_vf_f32m##LMUL(y, c_cephes_tanh_p2, vl); \ 326 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 327 y = vfadd_vf_f32m##LMUL(y, c_cephes_tanh_p3, vl); \ 328 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 329 y = vfadd_vf_f32m##LMUL(y, c_cephes_tanh_p4, vl); \ 330 \ 331 y = vfmul_vv_f32m##LMUL(y, z, vl); \ 332 y = vfmul_vv_f32m##LMUL(y, x, vl); \ 333 y = vfadd_vv_f32m##LMUL(y, x, vl); \ 334 \ 335 /* abs(x) > HALFMAXLOGF */ \ 336 vfloat32m##LMUL##_t y1 = vfsgnj_vv_f32m##LMUL(vfmv_v_f_f32m##LMUL(1.f, vl), x, vl); \ 337 \ 338 y = vmerge_vvm_f32m##LMUL(mask_l, y, y0, vl); \ 339 y = vmerge_vvm_f32m##LMUL(mask_l2, y, y1, vl); \ 340 return y; \ 341 } 342 343 _RVV_FLOAT32_TANH_OP(1, 32) 344 _RVV_FLOAT32_TANH_OP(2, 16) 345 _RVV_FLOAT32_TANH_OP(4, 8) 346 _RVV_FLOAT32_TANH_OP(8, 4) 347 348 #define _RVV_FLOAT32_POW_OP(LMUL, MLEN) \ 349 static inline vfloat32m##LMUL##_t pow_ps(vfloat32m##LMUL##_t a, vfloat32m##LMUL##_t b, word_type vl) \ 350 { \ 351 /* pow(x, m) = exp(m * log(x)) */ \ 352 return exp_ps(vfmul_vv_f32m##LMUL(b, log_ps(a, vl), vl), vl); \ 353 } 354 355 _RVV_FLOAT32_POW_OP(1, 32) 356 _RVV_FLOAT32_POW_OP(2, 16) 357 _RVV_FLOAT32_POW_OP(4, 8) 358 _RVV_FLOAT32_POW_OP(8, 4) 359 360 #define _RVV_FLOAT32_SIGMOID_OP(LMUL, MLEN) \ 361 static inline vfloat32m##LMUL##_t sigmoid_ps(vfloat32m##LMUL##_t _v, word_type vl) \ 362 { \ 363 _v = vfneg_v_f32m##LMUL(_v, vl); \ 364 _v = exp_ps(_v, vl); \ 365 _v = vfadd_vf_f32m##LMUL(_v, 1.f, vl); \ 366 vfloat32m##LMUL##_t _reciprocal = vfrec7_v_f32m##LMUL(_v, vl); \ 367 _reciprocal = vfmul_vv_f32m##LMUL(vfrsub_vf_f32m##LMUL(vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); \ 368 /* _reciprocal = vfmul_vv_f32m##LMUL(vfrsub_vf_f32m##LMUL(vfmul_vv_f32m##LMUL(_v, _reciprocal, vl), 2.f, vl), _reciprocal, vl); */ \ 369 return _reciprocal; \ 370 } 371 372 _RVV_FLOAT32_SIGMOID_OP(1, 32) 373 _RVV_FLOAT32_SIGMOID_OP(2, 16) 374 _RVV_FLOAT32_SIGMOID_OP(4, 8) 375 _RVV_FLOAT32_SIGMOID_OP(8, 4) 376 377 #endif // RVV_MATHFUN_H 378