Home
last modified time | relevance | path

Searched refs:_sum (Results 51 – 75 of 932) sorted by relevance

12345678910>>...38

/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_pack4.h127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local
152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon()
153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon()
154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon()
155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon()
157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon()
158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon()
159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon()
160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon()
167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon()
[all …]
H A Dconvolution_pack4_bf16s.h128 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_bf16s_neon() local
132 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack4_bf16s_neon()
153 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_bf16s_neon()
154 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_bf16s_neon()
155 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_bf16s_neon()
156 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_bf16s_neon()
158 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_bf16s_neon()
159 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_bf16s_neon()
160 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_bf16s_neon()
161 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_bf16s_neon()
[all …]
H A Dconvolution_pack4to8_fp16s.h57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local
61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon()
81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon()
82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon()
83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon()
84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon()
90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon()
92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
H A Dconvolutiondepthwise_3x3.h440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon()
1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon()
[all …]
H A Dconvolution_5x5.h483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon()
488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon()
492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon()
513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon()
893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon()
896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon()
899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_pack4.h127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local
152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon()
153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon()
154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon()
155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon()
157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon()
158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon()
159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon()
160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon()
167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon()
[all …]
H A Dconvolution_pack4_bf16s.h128 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_bf16s_neon() local
132 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack4_bf16s_neon()
153 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_bf16s_neon()
154 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_bf16s_neon()
155 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_bf16s_neon()
156 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_bf16s_neon()
158 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_bf16s_neon()
159 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_bf16s_neon()
160 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_bf16s_neon()
161 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_bf16s_neon()
[all …]
H A Dconvolution_pack4to8_fp16s.h57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local
61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon()
81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon()
82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon()
83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon()
84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon()
90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon()
92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
H A Dconvolutiondepthwise_3x3.h440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon()
1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon()
[all …]
H A Dconvolution_5x5.h483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon()
488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon()
492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon()
513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon()
893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon()
896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon()
899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_pack4.h127 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack4_neon() local
152 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in convolution_pack4_neon()
153 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in convolution_pack4_neon()
154 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in convolution_pack4_neon()
155 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in convolution_pack4_neon()
157 _sum = vmlaq_lane_f32(_sum, _w0, vget_low_f32(_val), 0); in convolution_pack4_neon()
158 _sum = vmlaq_lane_f32(_sum, _w1, vget_low_f32(_val), 1); in convolution_pack4_neon()
159 _sum = vmlaq_lane_f32(_sum, _w2, vget_high_f32(_val), 0); in convolution_pack4_neon()
160 _sum = vmlaq_lane_f32(_sum, _w3, vget_high_f32(_val), 1); in convolution_pack4_neon()
167 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_neon()
[all …]
H A Dconvolution_pack4to8_fp16s.h57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local
61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon()
81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon()
82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon()
83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon()
84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon()
90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon()
92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
H A Dconvolutiondepthwise_3x3.h440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon()
1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon()
[all …]
H A Dconvolution_5x5.h483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon()
488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon()
492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon()
513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon()
893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon()
896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon()
899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolution1d_mips.cpp132 v4f32 _sum = (v4f32)__msa_fill_w(0); in forward() local
157 _sum = __msa_fmadd_w(_sum, _val0, _w0); in forward()
158 _sum = __msa_fmadd_w(_sum, _val1, _w1); in forward()
159 _sum = __msa_fmadd_w(_sum, _val2, _w2); in forward()
160 _sum = __msa_fmadd_w(_sum, _val3, _w3); in forward()
167 _sum = activation_ps(_sum, activation_type, activation_params); in forward()
169 __msa_st_w((v4i32)_sum, outptr, 0); in forward()
203 _sum = __msa_fmadd_w(_sum, _val, _w); in forward()
210 _sum = activation_ps(_sum, activation_type, activation_params); in forward()
248 _sum = __msa_fmadd_w(_sum, _val, _w); in forward()
[all …]
H A Dconvolution_pack4.h56 v4f32 _sum = (v4f32)__msa_fill_w(0); in convolution_pack4_msa() local
60 _sum = (v4f32)__msa_ld_w(bias_data_ptr + p * 4, 0); in convolution_pack4_msa()
85 _sum = __msa_fmadd_w(_sum, _val0, _w0); in convolution_pack4_msa()
86 _sum = __msa_fmadd_w(_sum, _val1, _w1); in convolution_pack4_msa()
87 _sum = __msa_fmadd_w(_sum, _val2, _w2); in convolution_pack4_msa()
88 _sum = __msa_fmadd_w(_sum, _val3, _w3); in convolution_pack4_msa()
94 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4_msa()
96 __msa_st_w((v4i32)_sum, outptr + j * 4, 0); in convolution_pack4_msa()
H A Ddeconvolution_pack4.h42 v4f32 _sum = (v4f32)__msa_fill_w(0); in deconvolution_pack4_msa() local
46 _sum = (v4f32)__msa_ld_w((const float*)bias_data_ptr + p * 4, 0); in deconvolution_pack4_msa()
88 _sum = __msa_fmadd_w(_sum, _val0, _w0); in deconvolution_pack4_msa()
89 _sum = __msa_fmadd_w(_sum, _val1, _w1); in deconvolution_pack4_msa()
90 _sum = __msa_fmadd_w(_sum, _val2, _w2); in deconvolution_pack4_msa()
91 _sum = __msa_fmadd_w(_sum, _val3, _w3); in deconvolution_pack4_msa()
98 _sum = activation_ps(_sum, activation_type, activation_params); in deconvolution_pack4_msa()
100 __msa_st_w((v4i32)_sum, outptr + j * 4, 0); in deconvolution_pack4_msa()
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution1d_riscv.cpp190 _sum = vfmacc_vf_f32m1(_sum, val, _w0, vl); in forward()
197 _sum = activation_ps(_sum, activation_type, activation_params, vl); in forward()
233 _sum = vfmacc_vf_f32m1(_sum, val, _w, vl); in forward()
240 _sum = activation_ps(_sum, activation_type, activation_params, vl); in forward()
278 _sum = vfmacc_vv_f32m1(_sum, _val, _w, vl); in forward()
456 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in forward_fp16s()
499 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in forward_fp16s()
544 _sum = vfwmacc_vv_f32m2(_sum, _val, _w, vl); in forward_fp16s()
677 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in forward_fp16sa()
720 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in forward_fp16sa()
[all …]
/dports/science/code_saturne/code_saturne-7.1.0/src/alge/
H A Dcs_sles_it_priv.h270 double _sum; in _dot_product() local
272 s = _sum; in _dot_product()
302 double _sum; in _dot_product_xx() local
304 s = _sum; in _dot_product_xx()
337 double _sum[2]; in _dot_products_xx_xy() local
339 s[0] = _sum[0]; in _dot_products_xx_xy()
340 s[1] = _sum[1]; in _dot_products_xx_xy()
376 double _sum[2]; in _dot_products_xy_yz() local
378 s[0] = _sum[0]; in _dot_products_xy_yz()
379 s[1] = _sum[1]; in _dot_products_xy_yz()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_pack4to8_fp16s.h57 float16x8_t _sum = vdupq_n_f16((__fp16)0.f); in convolution_pack4to8_fp16sa_neon() local
61 _sum = vld1q_f16(bias_data_ptr + p * 8); in convolution_pack4to8_fp16sa_neon()
81 _sum = vfmaq_lane_f16(_sum, _w0, _val, 0); in convolution_pack4to8_fp16sa_neon()
82 _sum = vfmaq_lane_f16(_sum, _w1, _val, 1); in convolution_pack4to8_fp16sa_neon()
83 _sum = vfmaq_lane_f16(_sum, _w2, _val, 2); in convolution_pack4to8_fp16sa_neon()
84 _sum = vfmaq_lane_f16(_sum, _w3, _val, 3); in convolution_pack4to8_fp16sa_neon()
90 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack4to8_fp16sa_neon()
92 vst1q_f16(outptr + j * 8, _sum); in convolution_pack4to8_fp16sa_neon()
H A Dconvolutiondepthwise_3x3.h440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon()
1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon()
[all …]
H A Dconvolution_5x5.h483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon()
488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon()
492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon()
513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon()
893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon()
896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon()
899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_3x3.h440 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
441 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
447 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
453 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
788 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s1_neon()
789 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s1_neon()
791 _sum = vsetq_lane_f32(bias0, _sum, 3); in convdw3x3s1_neon()
795 float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); in convdw3x3s1_neon()
1038 _sum = vmlaq_f32(_sum, _r10, _k345x); in convdw3x3s2_neon()
1039 _sum = vmlaq_f32(_sum, _r20, _k678x); in convdw3x3s2_neon()
[all …]
H A Dconvolution_5x5.h483 _sum = vmlaq_f32(_sum, _r2, _k2); in conv5x5s1_neon()
488 _sum = vmlaq_f32(_sum, _r3, _k3); in conv5x5s1_neon()
492 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
496 _sum = vmlaq_f32(_sum, _r0, _k0123); in conv5x5s1_neon()
513 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
890 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in conv5x5s1_neon()
893 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in conv5x5s1_neon()
896 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in conv5x5s1_neon()
899 _sum = vmlaq_f32(_sum, _r4, _k20212223); in conv5x5s1_neon()
914 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in conv5x5s1_neon()
[all …]
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/mongo/util/
H A Dsummation.h58 std::tie(_sum, x) = _2Sum(_sum, x); // Compensated add: x maybe larger than _sum in addDouble()
79 return std::isnan(_sum) ? _special : _sum; in getDouble()
87 return std::isnan(_sum) ? DoubleDouble{_special, 0.0} : DoubleDouble{_sum, _addend}; in getDoubleDouble()
95 return !std::isfinite(_sum) ? Decimal128(_special, Decimal128::kRoundTo34Digits) in getDecimal()
96 : Decimal128(_sum, Decimal128::kRoundTo34Digits) in getDecimal()
109 return std::trunc(_sum) == _sum && std::trunc(_addend) == _addend; in isInteger()
149 double _sum = 0.0; variable

12345678910>>...38