Home
last modified time | relevance | path

Searched refs:_sum (Results 76 – 100 of 932) sorted by relevance

12345678910>>...38

/dports/devel/spark/spark-2.1.1/core/src/main/scala/org/apache/spark/util/
H A DAccumulatorV2.scala299 newAcc._sum = this._sum
304 _sum = 0L
313 _sum += v
322 _sum += v
336 def sum: Long = _sum
346 _sum += o.sum
374 newAcc._sum = this._sum
379 _sum = 0.0
388 _sum += v
397 _sum += v
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_arm.cpp668 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in forward()
669 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in forward()
670 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in forward()
671 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in forward()
754 _sum = vmlaq_f32(_sum, _val, _w); in forward()
1356 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
1725 _sum = vfmaq_f16(_sum, _val, _w); in forward_fp16sa()
2065 _sum = vfma_lane_f16(_sum, _w0, _val, 0); in forward_fp16sa()
2066 _sum = vfma_lane_f16(_sum, _w1, _val, 1); in forward_fp16sa()
2117 _sum = vfma_f16(_sum, _val, _w); in forward_fp16sa()
[all …]
H A Dinnerproduct_arm.cpp258 _sum = vmlaq_f32(_sum, _val, _k); in forward()
605 _sum = vaddq_f32(_sum, _sum2); in forward()
753 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
786 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
915 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
950 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s()
1090 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa()
1320 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1353 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1713 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa()
[all …]
H A Dconvolutiondepthwise_5x5.h797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon()
802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon()
806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon()
827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon()
1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon()
1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon()
1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dinnerproduct_arm.cpp160 _sum = vmlaq_f32(_sum, _val, _k); in forward()
446 _sum = vaddq_f32(_sum, _sum2); in forward()
594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s()
908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa()
1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa()
[all …]
H A Dconvolution_pack1to4_fp16s.h57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local
61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon()
76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon()
82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon()
84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon()
134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local
138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon()
153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon()
159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon()
161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
H A Dconvolutiondepthwise_5x5.h797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon()
802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon()
806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon()
827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon()
1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon()
1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon()
1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dinnerproduct_arm.cpp160 _sum = vmlaq_f32(_sum, _val, _k); in forward()
446 _sum = vaddq_f32(_sum, _sum2); in forward()
594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s()
908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa()
1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa()
[all …]
H A Dconvolution_pack1to4_fp16s.h57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local
61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon()
76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon()
82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon()
84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon()
134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local
138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon()
153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon()
159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon()
161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
H A Dconvolutiondepthwise_5x5.h797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon()
802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon()
806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon()
827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon()
1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon()
1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon()
1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dinnerproduct_arm.cpp160 _sum = vmlaq_f32(_sum, _val, _k); in forward()
446 _sum = vaddq_f32(_sum, _sum2); in forward()
594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s()
908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa()
1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa()
[all …]
H A Dconvolution_pack1to4_fp16s.h57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local
61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon()
76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon()
82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon()
84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon()
134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local
138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon()
153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon()
159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon()
161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
H A Dconvolutiondepthwise_5x5.h797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon()
802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon()
806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon()
827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon()
1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon()
1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon()
1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_pack1to4_fp16s.h57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local
61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon()
76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon()
82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon()
84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon()
134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local
138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon()
153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon()
159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon()
161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
H A Dconvolutiondepthwise_5x5.h797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon()
802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon()
806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon()
827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon()
1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon()
1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon()
1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon()
1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon()
[all …]
H A Dinnerproduct_arm.cpp163 _sum = vmlaq_f32(_sum, _val, _k); in forward()
449 _sum = vaddq_f32(_sum, _sum2); in forward()
597 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
630 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s()
736 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s()
771 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s()
911 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa()
1141 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1174 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa()
1511 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_x86.cpp530 _sum = _mm256_add_ps(_sum_all, _sum); in forward()
598 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward()
651 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
773 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward()
775 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward()
777 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward()
779 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward()
781 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward()
783 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward()
785 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward()
[all …]
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/mongo/util/
H A Dsummation.cpp56 if (_sum > limits::min() && _sum < limits::max()) in fitsLong()
65 if (_sum == limits::max()) in fitsLong()
70 if (_sum == limits::min()) in fitsLong()
82 if (_sum == std::numeric_limits<long long>::max()) { in getLong()
84 dassert(_addend < -0.5 && -_sum == std::numeric_limits<long long>::min()); in getLong()
87 long long sum = llround(_sum); in getLong()
88 sum += llround((_sum - sum) + _addend); in getLong()
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_pack1ton_fp16s.h60 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in convolution_pack1ton_fp16s_rvv() local
64 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in convolution_pack1ton_fp16s_rvv()
79 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in convolution_pack1ton_fp16s_rvv()
85 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_pack1ton_fp16s_rvv()
87 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in convolution_pack1ton_fp16s_rvv()
140 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in convolution_pack1ton_fp16sa_rvv() local
144 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in convolution_pack1ton_fp16sa_rvv()
159 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in convolution_pack1ton_fp16sa_rvv()
165 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_pack1ton_fp16sa_rvv()
167 vse16_v_f16m1(outptr + j * packn, _sum, vl); in convolution_pack1ton_fp16sa_rvv()
H A Ddeconvolution_pack1ton_fp16s.h45 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in deconvolution_pack1ton_fp16s_rvv() local
49 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in deconvolution_pack1ton_fp16s_rvv()
86 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in deconvolution_pack1ton_fp16s_rvv()
93 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_pack1ton_fp16s_rvv()
95 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in deconvolution_pack1ton_fp16s_rvv()
133 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in deconvolution_pack1ton_fp16sa_rvv() local
137 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in deconvolution_pack1ton_fp16sa_rvv()
174 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in deconvolution_pack1ton_fp16sa_rvv()
181 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_pack1ton_fp16sa_rvv()
183 vse16_v_f16m1(outptr + j * packn, _sum, vl); in deconvolution_pack1ton_fp16sa_rvv()
H A Dconvolution_packn_fp16s.h60 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in convolution_packn_fp16s_rvv() local
64 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in convolution_packn_fp16s_rvv()
83 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in convolution_packn_fp16s_rvv()
90 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_packn_fp16s_rvv()
92 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in convolution_packn_fp16s_rvv()
145 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in convolution_packn_fp16sa_rvv() local
149 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in convolution_packn_fp16sa_rvv()
168 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in convolution_packn_fp16sa_rvv()
175 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_packn_fp16sa_rvv()
177 vse16_v_f16m1(outptr + j * packn, _sum, vl); in convolution_packn_fp16sa_rvv()
H A Ddeconvolution_packn_fp16s.h45 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in deconvolution_packn_fp16s_rvv() local
49 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in deconvolution_packn_fp16s_rvv()
87 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in deconvolution_packn_fp16s_rvv()
95 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_packn_fp16s_rvv()
97 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in deconvolution_packn_fp16s_rvv()
135 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in deconvolution_packn_fp16sa_rvv() local
139 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in deconvolution_packn_fp16sa_rvv()
177 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in deconvolution_packn_fp16sa_rvv()
185 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_packn_fp16sa_rvv()
187 vse16_v_f16m1(outptr + j * packn, _sum, vl); in deconvolution_packn_fp16sa_rvv()
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_x86.cpp539 _sum = _mm256_add_ps(_sum_all, _sum); in forward()
607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward()
660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward()
785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward()
787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward()
789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward()
791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward()
793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward()
795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_x86.cpp539 _sum = _mm256_add_ps(_sum_all, _sum); in forward()
607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward()
660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward()
785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward()
787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward()
789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward()
791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward()
793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward()
795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_x86.cpp539 _sum = _mm256_add_ps(_sum_all, _sum); in forward()
607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward()
660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward()
783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward()
785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward()
787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward()
789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward()
791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward()
793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward()
795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward()
[all …]

12345678910>>...38