/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_pack4.h | 127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local 152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon() 153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon() 154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon() 155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon() 157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon() 158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon() 159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon() 160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon() 167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon() [all …]
|
H A D | convolution_pack4_bf16s.h | 128 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_bf16s_neon() local 132 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack4_bf16s_neon() 153 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_bf16s_neon() 154 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_bf16s_neon() 155 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_bf16s_neon() 156 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_bf16s_neon() 158 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_bf16s_neon() 159 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_bf16s_neon() 160 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_bf16s_neon() 161 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_bf16s_neon() [all …]
|
H A D | convolution_pack4to8_fp16s.h | 57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local 61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon() 81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon() 82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon() 83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon() 84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon() 90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon() 92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
|
H A D | convolutiondepthwise_3x3.h | 440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon() 1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon() [all …]
|
H A D | convolution_5x5.h | 483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon() 488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon() 492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon() 513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() 890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon() 893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon() 896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon() 899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_pack4.h | 127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local 152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon() 153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon() 154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon() 155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon() 157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon() 158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon() 159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon() 160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon() 167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon() [all …]
|
H A D | convolution_pack4_bf16s.h | 128 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_bf16s_neon() local 132 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack4_bf16s_neon() 153 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_bf16s_neon() 154 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_bf16s_neon() 155 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_bf16s_neon() 156 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_bf16s_neon() 158 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_bf16s_neon() 159 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_bf16s_neon() 160 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_bf16s_neon() 161 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_bf16s_neon() [all …]
|
H A D | convolution_pack4to8_fp16s.h | 57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local 61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon() 81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon() 82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon() 83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon() 84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon() 90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon() 92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
|
H A D | convolutiondepthwise_3x3.h | 440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon() 1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon() [all …]
|
H A D | convolution_5x5.h | 483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon() 488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon() 492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon() 513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() 890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon() 893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon() 896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon() 899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_pack4.h | 127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local 152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon() 153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon() 154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon() 155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon() 157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon() 158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon() 159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon() 160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon() 167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon() [all …]
|
H A D | convolution_pack4to8_fp16s.h | 57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local 61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon() 81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon() 82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon() 83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon() 84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon() 90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon() 92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
|
H A D | convolutiondepthwise_3x3.h | 440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon() 1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon() [all …]
|
H A D | convolution_5x5.h | 483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon() 488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon() 492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon() 513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() 890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon() 893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon() 896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon() 899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/ |
H A D | convolution1d_mips.cpp | 132 v4f32 _sum = (v4f32)__msa_fill_w(0); in forward() local 157 _sum = __msa_fmadd_w(_sum, _val0, _w0); in forward() 158 _sum = __msa_fmadd_w(_sum, _val1, _w1); in forward() 159 _sum = __msa_fmadd_w(_sum, _val2, _w2); in forward() 160 _sum = __msa_fmadd_w(_sum, _val3, _w3); in forward() 167 _sum = activation_ps(_sum, activation_type, activation_params); in forward() 169 __msa_st_w((v4i32)_sum, outptr, 0); in forward() 203 _sum = __msa_fmadd_w(_sum, _val, _w); in forward() 210 _sum = activation_ps(_sum, activation_type, activation_params); in forward() 248 _sum = __msa_fmadd_w(_sum, _val, _w); in forward() [all …]
|
H A D | convolution_pack4.h | 56 v4f32 _sum = (v4f32)__msa_fill_w(0); in convolution_pack4_msa() local 60 _sum = (v4f32)__msa_ld_w(bias_data_ptr + p * 4, 0); in convolution_pack4_msa() 85 _sum = __msa_fmadd_w(_sum, _val0, _w0); in convolution_pack4_msa() 86 _sum = __msa_fmadd_w(_sum, _val1, _w1); in convolution_pack4_msa() 87 _sum = __msa_fmadd_w(_sum, _val2, _w2); in convolution_pack4_msa() 88 _sum = __msa_fmadd_w(_sum, _val3, _w3); in convolution_pack4_msa() 94 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_msa() 96 __msa_st_w((v4i32)_sum, outptr + j * 4, 0); in convolution_pack4_msa()
|
H A D | deconvolution_pack4.h | 42 v4f32 _sum = (v4f32)__msa_fill_w(0); in deconvolution_pack4_msa() local 46 _sum = (v4f32)__msa_ld_w((const float*)bias_data_ptr + p * 4, 0); in deconvolution_pack4_msa() 88 _sum = __msa_fmadd_w(_sum, _val0, _w0); in deconvolution_pack4_msa() 89 _sum = __msa_fmadd_w(_sum, _val1, _w1); in deconvolution_pack4_msa() 90 _sum = __msa_fmadd_w(_sum, _val2, _w2); in deconvolution_pack4_msa() 91 _sum = __msa_fmadd_w(_sum, _val3, _w3); in deconvolution_pack4_msa() 98 _sum = activation_ps(_sum, activation_type, activation_params); in deconvolution_pack4_msa() 100 __msa_st_w((v4i32)_sum, outptr + j * 4, 0); in deconvolution_pack4_msa()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution1d_riscv.cpp | 190 _sum = vfmacc_vf_f32m1(_sum, val, _w0, vl); in forward() 197 _sum = activation_ps(_sum, activation_type, activation_params, vl); in forward() 233 _sum = vfmacc_vf_f32m1(_sum, val, _w, vl); in forward() 240 _sum = activation_ps(_sum, activation_type, activation_params, vl); in forward() 278 _sum = vfmacc_vv_f32m1(_sum, _val, _w, vl); in forward() 456 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in forward_fp16s() 499 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in forward_fp16s() 544 _sum = vfwmacc_vv_f32m2(_sum, _val, _w, vl); in forward_fp16s() 677 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in forward_fp16sa() 720 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in forward_fp16sa() [all …]
|
/dports/science/code_saturne/code_saturne-7.1.0/src/alge/ |
H A D | cs_sles_it_priv.h | 270 double _sum; in _dot_product() local 272 s = _sum; in _dot_product() 302 double _sum; in _dot_product_xx() local 304 s = _sum; in _dot_product_xx() 337 double _sum[2]; in _dot_products_xx_xy() local 339 s[0] = _sum[0]; in _dot_products_xx_xy() 340 s[1] = _sum[1]; in _dot_products_xx_xy() 376 double _sum[2]; in _dot_products_xy_yz() local 378 s[0] = _sum[0]; in _dot_products_xy_yz() 379 s[1] = _sum[1]; in _dot_products_xy_yz() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_pack4to8_fp16s.h | 57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local 61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon() 81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon() 82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon() 83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon() 84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon() 90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon() 92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
|
H A D | convolutiondepthwise_3x3.h | 440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon() 1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon() [all …]
|
H A D | convolution_5x5.h | 483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon() 488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon() 492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon() 513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() 890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon() 893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon() 896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon() 899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3.h | 440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon() 789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon() 791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon() 795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon() 1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon() 1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon() [all …]
|
H A D | convolution_5x5.h | 483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon() 488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon() 492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon() 513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() 890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon() 893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon() 896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon() 899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon() 914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon() [all …]
|
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/mongo/util/ |
H A D | summation.h | 58 std::tie(_sum, x) = _2Sum(_sum, x); // Compensated add: x maybe larger than _sum in addDouble() 79 return std::isnan(_sum) ? _special : _sum; in getDouble() 87 return std::isnan(_sum) ? DoubleDouble{_special, 0.0} : DoubleDouble{_sum, _addend}; in getDoubleDouble() 95 return !std::isfinite(_sum) ? Decimal128(_special, Decimal128::kRoundTo34Digits) in getDecimal() 96 : Decimal128(_sum, Decimal128::kRoundTo34Digits) in getDecimal() 109 return std::trunc(_sum) == _sum && std::trunc(_addend) == _addend; in isInteger() 149 double _sum = 0.0; variable
|