/dports/devel/spark/spark-2.1.1/core/src/main/scala/org/apache/spark/util/ |
H A D | AccumulatorV2.scala | 299 newAcc._sum = this._sum 304 _sum = 0L 313 _sum += v 322 _sum += v 336 def sum: Long = _sum 346 _sum += o.sum 374 newAcc._sum = this._sum 379 _sum = 0.0 388 _sum += v 397 _sum += v [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_arm.cpp | 668 _sum = vmlaq_laneq_f32(_sum, _w0, _val, 0); in forward() 669 _sum = vmlaq_laneq_f32(_sum, _w1, _val, 1); in forward() 670 _sum = vmlaq_laneq_f32(_sum, _w2, _val, 2); in forward() 671 _sum = vmlaq_laneq_f32(_sum, _w3, _val, 3); in forward() 754 _sum = vmlaq_f32(_sum, _val, _w); in forward() 1356 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 1725 _sum = vfmaq_f16(_sum, _val, _w); in forward_fp16sa() 2065 _sum = vfma_lane_f16(_sum, _w0, _val, 0); in forward_fp16sa() 2066 _sum = vfma_lane_f16(_sum, _w1, _val, 1); in forward_fp16sa() 2117 _sum = vfma_f16(_sum, _val, _w); in forward_fp16sa() [all …]
|
H A D | innerproduct_arm.cpp | 258 _sum = vmlaq_f32(_sum, _val, _k); in forward() 605 _sum = vaddq_f32(_sum, _sum2); in forward() 753 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 786 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 915 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 950 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s() 1090 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa() 1320 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1353 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1713 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa() [all …]
|
H A D | convolutiondepthwise_5x5.h | 797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon() 802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon() 806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon() 827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() 1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon() 1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon() 1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon() 1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | innerproduct_arm.cpp | 160 _sum = vmlaq_f32(_sum, _val, _k); in forward() 446 _sum = vaddq_f32(_sum, _sum2); in forward() 594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s() 908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa() 1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa() [all …]
|
H A D | convolution_pack1to4_fp16s.h | 57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local 61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon() 76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon() 82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon() 84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon() 134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local 138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon() 153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon() 159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon() 161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
|
H A D | convolutiondepthwise_5x5.h | 797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon() 802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon() 806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon() 827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() 1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon() 1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon() 1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon() 1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | innerproduct_arm.cpp | 160 _sum = vmlaq_f32(_sum, _val, _k); in forward() 446 _sum = vaddq_f32(_sum, _sum2); in forward() 594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s() 908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa() 1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa() [all …]
|
H A D | convolution_pack1to4_fp16s.h | 57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local 61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon() 76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon() 82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon() 84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon() 134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local 138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon() 153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon() 159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon() 161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
|
H A D | convolutiondepthwise_5x5.h | 797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon() 802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon() 806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon() 827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() 1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon() 1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon() 1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon() 1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | innerproduct_arm.cpp | 160 _sum = vmlaq_f32(_sum, _val, _k); in forward() 446 _sum = vaddq_f32(_sum, _sum2); in forward() 594 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 627 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 733 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 768 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s() 908 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa() 1138 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1171 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1508 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa() [all …]
|
H A D | convolution_pack1to4_fp16s.h | 57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local 61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon() 76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon() 82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon() 84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon() 134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local 138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon() 153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon() 159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon() 161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
|
H A D | convolutiondepthwise_5x5.h | 797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon() 802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon() 806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon() 827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() 1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon() 1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon() 1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon() 1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_pack1to4_fp16s.h | 57 float32x4_t _sum = vdupq_n_f32(0.f); in convolution_pack1to4_fp16s_neon() local 61 _sum = vld1q_f32(bias_data_ptr + p * 4); in convolution_pack1to4_fp16s_neon() 76 _sum = vfmaq_f32(_sum, _val, _w); in convolution_pack1to4_fp16s_neon() 82 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16s_neon() 84 vst1_f16(outptr + j * 4, vcvt_f16_f32(_sum)); in convolution_pack1to4_fp16s_neon() 134 float16x4_t _sum = vdup_n_f16((__fp16)0.f); in convolution_pack1to4_fp16sa_neon() local 138 _sum = vld1_f16(bias_data_ptr + p * 4); in convolution_pack1to4_fp16sa_neon() 153 _sum = vfma_f16(_sum, _val, _w); in convolution_pack1to4_fp16sa_neon() 159 _sum = activation_ps(_sum, activation_type, activation_params); in convolution_pack1to4_fp16sa_neon() 161 vst1_f16(outptr + j * 4, _sum); in convolution_pack1to4_fp16sa_neon()
|
H A D | convolutiondepthwise_5x5.h | 797 _sum = vmlaq_f32(_sum, _r2, _k2); in convdw5x5s1_neon() 802 _sum = vmlaq_f32(_sum, _r3, _k3); in convdw5x5s1_neon() 806 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 810 _sum = vmlaq_f32(_sum, _r0, _k0123); in convdw5x5s1_neon() 827 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() 1486 _sum = vmlaq_f32(_sum, _r1, vld1q_f32(k1)); in convdw5x5s1_neon() 1489 _sum = vmlaq_f32(_sum, _r2, vld1q_f32(k2)); in convdw5x5s1_neon() 1492 _sum = vmlaq_f32(_sum, _r3, vld1q_f32(k3)); in convdw5x5s1_neon() 1495 _sum = vmlaq_f32(_sum, _r4, _k20212223); in convdw5x5s1_neon() 1510 _sum = vmlaq_f32(_sum, _r_t4, _k_t4); in convdw5x5s1_neon() [all …]
|
H A D | innerproduct_arm.cpp | 163 _sum = vmlaq_f32(_sum, _val, _k); in forward() 449 _sum = vaddq_f32(_sum, _sum2); in forward() 597 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 630 _sum = vfmaq_f32(_sum, _val, _k); in forward_fp16s() 736 _sum = vfmaq_f32(_sum, _val, _w); in forward_fp16s() 771 _sum = vfmaq_f32(_sum, _m, _w); in forward_fp16s() 911 _sum = vfmaq_f16(_sum, _val, _k); in forward_fp16sa() 1141 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1174 _sum = vfma_f16(_sum, _val, _k); in forward_fp16sa() 1511 _sum = vfmaq_f16(_sum, _m, _w); in forward_fp16sa() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_x86.cpp | 530 _sum = _mm256_add_ps(_sum_all, _sum); in forward() 598 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward() 651 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward() 773 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward() 775 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward() 777 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward() 779 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward() 781 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward() 783 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward() 785 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward() [all …]
|
/dports/databases/mongodb36/mongodb-src-r3.6.23/src/mongo/util/ |
H A D | summation.cpp | 56 if (_sum > limits::min() && _sum < limits::max()) in fitsLong() 65 if (_sum == limits::max()) in fitsLong() 70 if (_sum == limits::min()) in fitsLong() 82 if (_sum == std::numeric_limits<long long>::max()) { in getLong() 84 dassert(_addend < -0.5 && -_sum == std::numeric_limits<long long>::min()); in getLong() 87 long long sum = llround(_sum); in getLong() 88 sum += llround((_sum - sum) + _addend); in getLong()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution_pack1ton_fp16s.h | 60 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in convolution_pack1ton_fp16s_rvv() local 64 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in convolution_pack1ton_fp16s_rvv() 79 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in convolution_pack1ton_fp16s_rvv() 85 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_pack1ton_fp16s_rvv() 87 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in convolution_pack1ton_fp16s_rvv() 140 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in convolution_pack1ton_fp16sa_rvv() local 144 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in convolution_pack1ton_fp16sa_rvv() 159 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in convolution_pack1ton_fp16sa_rvv() 165 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_pack1ton_fp16sa_rvv() 167 vse16_v_f16m1(outptr + j * packn, _sum, vl); in convolution_pack1ton_fp16sa_rvv()
|
H A D | deconvolution_pack1ton_fp16s.h | 45 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in deconvolution_pack1ton_fp16s_rvv() local 49 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in deconvolution_pack1ton_fp16s_rvv() 86 _sum = vfwmacc_vf_f32m2(_sum, val, _w, vl); in deconvolution_pack1ton_fp16s_rvv() 93 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_pack1ton_fp16s_rvv() 95 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in deconvolution_pack1ton_fp16s_rvv() 133 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in deconvolution_pack1ton_fp16sa_rvv() local 137 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in deconvolution_pack1ton_fp16sa_rvv() 174 _sum = vfmacc_vf_f16m1(_sum, val, _w, vl); in deconvolution_pack1ton_fp16sa_rvv() 181 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_pack1ton_fp16sa_rvv() 183 vse16_v_f16m1(outptr + j * packn, _sum, vl); in deconvolution_pack1ton_fp16sa_rvv()
|
H A D | convolution_packn_fp16s.h | 60 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in convolution_packn_fp16s_rvv() local 64 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in convolution_packn_fp16s_rvv() 83 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in convolution_packn_fp16s_rvv() 90 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_packn_fp16s_rvv() 92 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in convolution_packn_fp16s_rvv() 145 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in convolution_packn_fp16sa_rvv() local 149 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in convolution_packn_fp16sa_rvv() 168 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in convolution_packn_fp16sa_rvv() 175 _sum = activation_ps(_sum, activation_type, activation_params, vl); in convolution_packn_fp16sa_rvv() 177 vse16_v_f16m1(outptr + j * packn, _sum, vl); in convolution_packn_fp16sa_rvv()
|
H A D | deconvolution_packn_fp16s.h | 45 vfloat32m2_t _sum = vfmv_v_f_f32m2(0.f, vl); in deconvolution_packn_fp16s_rvv() local 49 _sum = vle32_v_f32m2(bias_data_ptr + p * packn, vl); in deconvolution_packn_fp16s_rvv() 87 _sum = vfwmacc_vf_f32m2(_sum, val, _w0, vl); in deconvolution_packn_fp16s_rvv() 95 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_packn_fp16s_rvv() 97 vse16_v_f16m1(outptr + j * packn, vfncvt_f_f_w_f16m1(_sum, vl), vl); in deconvolution_packn_fp16s_rvv() 135 vfloat16m1_t _sum = vfmv_v_f_f16m1(0.f, vl); in deconvolution_packn_fp16sa_rvv() local 139 _sum = vle16_v_f16m1(bias_data_ptr + p * packn, vl); in deconvolution_packn_fp16sa_rvv() 177 _sum = vfmacc_vf_f16m1(_sum, val, _w0, vl); in deconvolution_packn_fp16sa_rvv() 185 _sum = activation_ps(_sum, activation_type, activation_params, vl); in deconvolution_packn_fp16sa_rvv() 187 vse16_v_f16m1(outptr + j * packn, _sum, vl); in deconvolution_packn_fp16sa_rvv()
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_x86.cpp | 539 _sum = _mm256_add_ps(_sum_all, _sum); in forward() 607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward() 660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward() 783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward() 785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward() 787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward() 789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward() 791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward() 793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward() 795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_x86.cpp | 539 _sum = _mm256_add_ps(_sum_all, _sum); in forward() 607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward() 660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward() 783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward() 785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward() 787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward() 789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward() 791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward() 793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward() 795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_x86.cpp | 539 _sum = _mm256_add_ps(_sum_all, _sum); in forward() 607 _sum = _mm256_fmadd_ps(_val, _w, _sum); in forward() 660 _sum = _mm256_fmadd_ps(_val0, _w0, _sum); in forward() 783 _sum = _mm_fmadd_ps(_val0, _w0, _sum); in forward() 785 _sum = _mm_fmadd_ps(_val1, _w1, _sum); in forward() 787 _sum = _mm_fmadd_ps(_val2, _w2, _sum); in forward() 789 _sum = _mm_fmadd_ps(_val3, _w3, _sum); in forward() 791 _sum = _mm_fmadd_ps(_val4, _w4, _sum); in forward() 793 _sum = _mm_fmadd_ps(_val5, _w5, _sum); in forward() 795 _sum = _mm_fmadd_ps(_val6, _w6, _sum); in forward() [all …]
|