Home
last modified time | relevance | path

Searched refs:_sum5 (Results 1 – 25 of 97) sorted by relevance

1234

/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_7x7_pack1ton.h86 _sum5 = vfmacc_vf_f32m1(_sum5, r0[10], _k00, vl); in conv7x7s2_pack1ton_rvv()
94 _sum5 = vfmacc_vf_f32m1(_sum5, r0[11], _k01, vl); in conv7x7s2_pack1ton_rvv()
102 _sum5 = vfmacc_vf_f32m1(_sum5, r0[12], _k02, vl); in conv7x7s2_pack1ton_rvv()
110 _sum5 = vfmacc_vf_f32m1(_sum5, r0[13], _k03, vl); in conv7x7s2_pack1ton_rvv()
118 _sum5 = vfmacc_vf_f32m1(_sum5, r0[14], _k04, vl); in conv7x7s2_pack1ton_rvv()
126 _sum5 = vfmacc_vf_f32m1(_sum5, r0[15], _k05, vl); in conv7x7s2_pack1ton_rvv()
134 _sum5 = vfmacc_vf_f32m1(_sum5, r0[16], _k06, vl); in conv7x7s2_pack1ton_rvv()
153 _sum5 = vfmacc_vf_f32m1(_sum5, r1[10], _k10, vl); in conv7x7s2_pack1ton_rvv()
161 _sum5 = vfmacc_vf_f32m1(_sum5, r1[11], _k11, vl); in conv7x7s2_pack1ton_rvv()
169 _sum5 = vfmacc_vf_f32m1(_sum5, r1[12], _k12, vl); in conv7x7s2_pack1ton_rvv()
[all …]
H A Dconvolution_7x7_pack1ton_fp16s.h86 _sum5 = vfmacc_vf_f16m1(_sum5, r0[10], _k00, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
94 _sum5 = vfmacc_vf_f16m1(_sum5, r0[11], _k01, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
102 _sum5 = vfmacc_vf_f16m1(_sum5, r0[12], _k02, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
110 _sum5 = vfmacc_vf_f16m1(_sum5, r0[13], _k03, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
118 _sum5 = vfmacc_vf_f16m1(_sum5, r0[14], _k04, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
126 _sum5 = vfmacc_vf_f16m1(_sum5, r0[15], _k05, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
134 _sum5 = vfmacc_vf_f16m1(_sum5, r0[16], _k06, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
153 _sum5 = vfmacc_vf_f16m1(_sum5, r1[10], _k10, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
161 _sum5 = vfmacc_vf_f16m1(_sum5, r1[11], _k11, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
169 _sum5 = vfmacc_vf_f16m1(_sum5, r1[12], _k12, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
[all …]
H A Dconvolution_3x3_pack1ton.h78 _sum5 = vfmacc_vf_f32m1(_sum5, r0[5], _k00, vl); in conv3x3s1_pack1ton_rvv()
86 _sum5 = vfmacc_vf_f32m1(_sum5, r0[6], _k01, vl); in conv3x3s1_pack1ton_rvv()
94 _sum5 = vfmacc_vf_f32m1(_sum5, r0[7], _k02, vl); in conv3x3s1_pack1ton_rvv()
103 _sum5 = vfmacc_vf_f32m1(_sum5, r1[5], _k10, vl); in conv3x3s1_pack1ton_rvv()
111 _sum5 = vfmacc_vf_f32m1(_sum5, r1[6], _k11, vl); in conv3x3s1_pack1ton_rvv()
119 _sum5 = vfmacc_vf_f32m1(_sum5, r1[7], _k12, vl); in conv3x3s1_pack1ton_rvv()
128 _sum5 = vfmacc_vf_f32m1(_sum5, r2[5], _k20, vl); in conv3x3s1_pack1ton_rvv()
136 _sum5 = vfmacc_vf_f32m1(_sum5, r2[6], _k21, vl); in conv3x3s1_pack1ton_rvv()
144 _sum5 = vfmacc_vf_f32m1(_sum5, r2[7], _k22, vl); in conv3x3s1_pack1ton_rvv()
356 _sum5 = vfmacc_vf_f32m1(_sum5, r0[10], _k00, vl); in conv3x3s2_pack1ton_rvv()
[all …]
H A Dconvolution_3x3_pack1ton_fp16s.h78 _sum5 = vfmacc_vf_f16m1(_sum5, r0[5], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
86 _sum5 = vfmacc_vf_f16m1(_sum5, r0[6], _k01, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
94 _sum5 = vfmacc_vf_f16m1(_sum5, r0[7], _k02, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
103 _sum5 = vfmacc_vf_f16m1(_sum5, r1[5], _k10, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
111 _sum5 = vfmacc_vf_f16m1(_sum5, r1[6], _k11, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
119 _sum5 = vfmacc_vf_f16m1(_sum5, r1[7], _k12, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
128 _sum5 = vfmacc_vf_f16m1(_sum5, r2[5], _k20, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
136 _sum5 = vfmacc_vf_f16m1(_sum5, r2[6], _k21, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
144 _sum5 = vfmacc_vf_f16m1(_sum5, r2[7], _k22, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
356 _sum5 = vfmacc_vf_f16m1(_sum5, r0[10], _k00, vl); in conv3x3s2_pack1ton_fp16sa_rvv()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h165 float16x8_t _sum5 = vdupq_laneq_f16(_bias0, 5); in conv1x1s1_sgemm_fp16sa_neon() local
194 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
203 _sum5 = vfmaq_laneq_f16(_sum5, _p1, _k1, 5); in conv1x1s1_sgemm_fp16sa_neon()
212 _sum5 = vfmaq_laneq_f16(_sum5, _p2, _k2, 5); in conv1x1s1_sgemm_fp16sa_neon()
221 _sum5 = vfmaq_laneq_f16(_sum5, _p3, _k3, 5); in conv1x1s1_sgemm_fp16sa_neon()
230 _sum5 = vfmaq_laneq_f16(_sum5, _p4, _k4, 5); in conv1x1s1_sgemm_fp16sa_neon()
239 _sum5 = vfmaq_laneq_f16(_sum5, _p5, _k5, 5); in conv1x1s1_sgemm_fp16sa_neon()
248 _sum5 = vfmaq_laneq_f16(_sum5, _p6, _k6, 5); in conv1x1s1_sgemm_fp16sa_neon()
257 _sum5 = vfmaq_laneq_f16(_sum5, _p7, _k7, 5); in conv1x1s1_sgemm_fp16sa_neon()
276 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h935 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val0), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
954 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val1), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
973 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w2), vget_low_s16(_val2), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
992 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w3), vget_low_s16(_val3), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1011 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w4), vget_low_s16(_val4), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1030 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w5), vget_low_s16(_val5), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1049 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w6), vget_low_s16(_val6), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1068 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w7), vget_low_s16(_val7), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1139 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val2), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1150 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val2), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h165 float16x8_t _sum5 = vdupq_laneq_f16(_bias0, 5); in conv1x1s1_sgemm_fp16sa_neon() local
194 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
203 _sum5 = vfmaq_laneq_f16(_sum5, _p1, _k1, 5); in conv1x1s1_sgemm_fp16sa_neon()
212 _sum5 = vfmaq_laneq_f16(_sum5, _p2, _k2, 5); in conv1x1s1_sgemm_fp16sa_neon()
221 _sum5 = vfmaq_laneq_f16(_sum5, _p3, _k3, 5); in conv1x1s1_sgemm_fp16sa_neon()
230 _sum5 = vfmaq_laneq_f16(_sum5, _p4, _k4, 5); in conv1x1s1_sgemm_fp16sa_neon()
239 _sum5 = vfmaq_laneq_f16(_sum5, _p5, _k5, 5); in conv1x1s1_sgemm_fp16sa_neon()
248 _sum5 = vfmaq_laneq_f16(_sum5, _p6, _k6, 5); in conv1x1s1_sgemm_fp16sa_neon()
257 _sum5 = vfmaq_laneq_f16(_sum5, _p7, _k7, 5); in conv1x1s1_sgemm_fp16sa_neon()
276 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h935 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val0), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
954 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val1), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
973 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w2), vget_low_s16(_val2), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
992 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w3), vget_low_s16(_val3), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1011 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w4), vget_low_s16(_val4), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1030 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w5), vget_low_s16(_val5), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1049 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w6), vget_low_s16(_val6), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1068 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w7), vget_low_s16(_val7), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1139 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val2), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1150 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val2), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h165 float16x8_t _sum5 = vdupq_laneq_f16(_bias0, 5); in conv1x1s1_sgemm_fp16sa_neon() local
194 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
203 _sum5 = vfmaq_laneq_f16(_sum5, _p1, _k1, 5); in conv1x1s1_sgemm_fp16sa_neon()
212 _sum5 = vfmaq_laneq_f16(_sum5, _p2, _k2, 5); in conv1x1s1_sgemm_fp16sa_neon()
221 _sum5 = vfmaq_laneq_f16(_sum5, _p3, _k3, 5); in conv1x1s1_sgemm_fp16sa_neon()
230 _sum5 = vfmaq_laneq_f16(_sum5, _p4, _k4, 5); in conv1x1s1_sgemm_fp16sa_neon()
239 _sum5 = vfmaq_laneq_f16(_sum5, _p5, _k5, 5); in conv1x1s1_sgemm_fp16sa_neon()
248 _sum5 = vfmaq_laneq_f16(_sum5, _p6, _k6, 5); in conv1x1s1_sgemm_fp16sa_neon()
257 _sum5 = vfmaq_laneq_f16(_sum5, _p7, _k7, 5); in conv1x1s1_sgemm_fp16sa_neon()
276 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h935 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val0), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
954 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val1), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
973 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w2), vget_low_s16(_val2), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
992 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w3), vget_low_s16(_val3), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1011 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w4), vget_low_s16(_val4), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1030 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w5), vget_low_s16(_val5), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1049 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w6), vget_low_s16(_val6), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1068 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w7), vget_low_s16(_val7), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1139 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val2), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1150 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val2), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_1x1_fp16s.h165 float16x8_t _sum5 = vdupq_laneq_f16(_bias0, 5); in conv1x1s1_sgemm_fp16sa_neon() local
194 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
203 _sum5 = vfmaq_laneq_f16(_sum5, _p1, _k1, 5); in conv1x1s1_sgemm_fp16sa_neon()
212 _sum5 = vfmaq_laneq_f16(_sum5, _p2, _k2, 5); in conv1x1s1_sgemm_fp16sa_neon()
221 _sum5 = vfmaq_laneq_f16(_sum5, _p3, _k3, 5); in conv1x1s1_sgemm_fp16sa_neon()
230 _sum5 = vfmaq_laneq_f16(_sum5, _p4, _k4, 5); in conv1x1s1_sgemm_fp16sa_neon()
239 _sum5 = vfmaq_laneq_f16(_sum5, _p5, _k5, 5); in conv1x1s1_sgemm_fp16sa_neon()
248 _sum5 = vfmaq_laneq_f16(_sum5, _p6, _k6, 5); in conv1x1s1_sgemm_fp16sa_neon()
257 _sum5 = vfmaq_laneq_f16(_sum5, _p7, _k7, 5); in conv1x1s1_sgemm_fp16sa_neon()
276 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h935 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val0), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
954 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val1), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
973 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w2), vget_low_s16(_val2), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
992 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w3), vget_low_s16(_val3), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1011 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w4), vget_low_s16(_val4), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1030 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w5), vget_low_s16(_val5), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1049 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w6), vget_low_s16(_val6), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1068 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w7), vget_low_s16(_val7), 2); in conv3x3s1_winograd42_pack8to4_int8_neon()
1139 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w0), vget_low_s16(_val2), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1150 _sum5 = vmlal_lane_s16(_sum5, vget_high_s16(_w1), vget_low_s16(_val2), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h165 float16x8_t _sum5 = vdupq_laneq_f16(_bias0, 5); in conv1x1s1_sgemm_fp16sa_neon() local
194 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
203 _sum5 = vfmaq_laneq_f16(_sum5, _p1, _k1, 5); in conv1x1s1_sgemm_fp16sa_neon()
212 _sum5 = vfmaq_laneq_f16(_sum5, _p2, _k2, 5); in conv1x1s1_sgemm_fp16sa_neon()
221 _sum5 = vfmaq_laneq_f16(_sum5, _p3, _k3, 5); in conv1x1s1_sgemm_fp16sa_neon()
230 _sum5 = vfmaq_laneq_f16(_sum5, _p4, _k4, 5); in conv1x1s1_sgemm_fp16sa_neon()
239 _sum5 = vfmaq_laneq_f16(_sum5, _p5, _k5, 5); in conv1x1s1_sgemm_fp16sa_neon()
248 _sum5 = vfmaq_laneq_f16(_sum5, _p6, _k6, 5); in conv1x1s1_sgemm_fp16sa_neon()
257 _sum5 = vfmaq_laneq_f16(_sum5, _p7, _k7, 5); in conv1x1s1_sgemm_fp16sa_neon()
276 _sum5 = vfmaq_laneq_f16(_sum5, _p0, _k0, 5); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolution_3x3_pack1to4.h90 _sum5 = __msa_fmadd_w(_sum5, _r05, _k00); in conv3x3s1_pack1to4_msa()
98 _sum5 = __msa_fmadd_w(_sum5, _r06, _k01); in conv3x3s1_pack1to4_msa()
106 _sum5 = __msa_fmadd_w(_sum5, _r07, _k02); in conv3x3s1_pack1to4_msa()
130 _sum5 = __msa_fmadd_w(_sum5, _r15, _k10); in conv3x3s1_pack1to4_msa()
138 _sum5 = __msa_fmadd_w(_sum5, _r16, _k11); in conv3x3s1_pack1to4_msa()
146 _sum5 = __msa_fmadd_w(_sum5, _r17, _k12); in conv3x3s1_pack1to4_msa()
170 _sum5 = __msa_fmadd_w(_sum5, _r25, _k20); in conv3x3s1_pack1to4_msa()
178 _sum5 = __msa_fmadd_w(_sum5, _r26, _k21); in conv3x3s1_pack1to4_msa()
186 _sum5 = __msa_fmadd_w(_sum5, _r27, _k22); in conv3x3s1_pack1to4_msa()
481 _sum5 = __msa_fmadd_w(_sum5, _r0a, _k00); in conv3x3s2_pack1to4_msa()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8.h485 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_pack8_avx()
486 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_pack8_avx()
487 _sum5 = _mm256_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_pack8_avx()
488 _sum5 = _mm256_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_pack8_avx()
489 _sum5 = _mm256_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_pack8_avx()
490 _sum5 = _mm256_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_pack8_avx()
491 _sum5 = _mm256_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_pack8_avx()
492 _sum5 = _mm256_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_pack8_avx()
733 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_pack8_avx()
734 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_pack8_avx()
[all …]
H A Dconvolution_1x1_pack8_fp16.h485 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
486 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
487 _sum5 = _mm256_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
488 _sum5 = _mm256_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
489 _sum5 = _mm256_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
490 _sum5 = _mm256_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
491 _sum5 = _mm256_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
492 _sum5 = _mm256_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
733 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
734 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h485 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
486 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
487 _sum5 = _mm256_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
488 _sum5 = _mm256_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
489 _sum5 = _mm256_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
490 _sum5 = _mm256_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
491 _sum5 = _mm256_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
492 _sum5 = _mm256_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
733 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
734 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h144 __m256 _sum5 = _bias0; in convdw3x3s1_pack8_avx() local
150 _sum5 = _mm256_fmadd_ps(_k00, _r05, _sum5); in convdw3x3s1_pack8_avx()
151 _sum5 = _mm256_fmadd_ps(_k01, _r06, _sum5); in convdw3x3s1_pack8_avx()
152 _sum5 = _mm256_fmadd_ps(_k02, _r07, _sum5); in convdw3x3s1_pack8_avx()
153 _sum5 = _mm256_fmadd_ps(_k10, _r15, _sum5); in convdw3x3s1_pack8_avx()
154 _sum5 = _mm256_fmadd_ps(_k11, _r16, _sum5); in convdw3x3s1_pack8_avx()
155 _sum5 = _mm256_fmadd_ps(_k12, _r17, _sum5); in convdw3x3s1_pack8_avx()
156 _sum5 = _mm256_fmadd_ps(_k20, _r25, _sum5); in convdw3x3s1_pack8_avx()
157 _sum5 = _mm256_fmadd_ps(_k21, _r26, _sum5); in convdw3x3s1_pack8_avx()
158 _sum5 = _mm256_fmadd_ps(_k22, _r27, _sum5); in convdw3x3s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h144 __m256 _sum5 = _bias0; in convdw3x3s1_fp16_pack8_avx() local
150 _sum5 = _mm256_fmadd_ps(_k00, _r05, _sum5); in convdw3x3s1_fp16_pack8_avx()
151 _sum5 = _mm256_fmadd_ps(_k01, _r06, _sum5); in convdw3x3s1_fp16_pack8_avx()
152 _sum5 = _mm256_fmadd_ps(_k02, _r07, _sum5); in convdw3x3s1_fp16_pack8_avx()
153 _sum5 = _mm256_fmadd_ps(_k10, _r15, _sum5); in convdw3x3s1_fp16_pack8_avx()
154 _sum5 = _mm256_fmadd_ps(_k11, _r16, _sum5); in convdw3x3s1_fp16_pack8_avx()
155 _sum5 = _mm256_fmadd_ps(_k12, _r17, _sum5); in convdw3x3s1_fp16_pack8_avx()
156 _sum5 = _mm256_fmadd_ps(_k20, _r25, _sum5); in convdw3x3s1_fp16_pack8_avx()
157 _sum5 = _mm256_fmadd_ps(_k21, _r26, _sum5); in convdw3x3s1_fp16_pack8_avx()
158 _sum5 = _mm256_fmadd_ps(_k22, _r27, _sum5); in convdw3x3s1_fp16_pack8_avx()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h485 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
486 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
487 _sum5 = _mm256_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
488 _sum5 = _mm256_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
489 _sum5 = _mm256_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
490 _sum5 = _mm256_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
491 _sum5 = _mm256_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
492 _sum5 = _mm256_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
733 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
734 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h144 __m256 _sum5 = _bias0; in convdw3x3s1_fp16_pack8_avx() local
150 _sum5 = _mm256_fmadd_ps(_k00, _r05, _sum5); in convdw3x3s1_fp16_pack8_avx()
151 _sum5 = _mm256_fmadd_ps(_k01, _r06, _sum5); in convdw3x3s1_fp16_pack8_avx()
152 _sum5 = _mm256_fmadd_ps(_k02, _r07, _sum5); in convdw3x3s1_fp16_pack8_avx()
153 _sum5 = _mm256_fmadd_ps(_k10, _r15, _sum5); in convdw3x3s1_fp16_pack8_avx()
154 _sum5 = _mm256_fmadd_ps(_k11, _r16, _sum5); in convdw3x3s1_fp16_pack8_avx()
155 _sum5 = _mm256_fmadd_ps(_k12, _r17, _sum5); in convdw3x3s1_fp16_pack8_avx()
156 _sum5 = _mm256_fmadd_ps(_k20, _r25, _sum5); in convdw3x3s1_fp16_pack8_avx()
157 _sum5 = _mm256_fmadd_ps(_k21, _r26, _sum5); in convdw3x3s1_fp16_pack8_avx()
158 _sum5 = _mm256_fmadd_ps(_k22, _r27, _sum5); in convdw3x3s1_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h144 __m256 _sum5 = _bias0; in convdw3x3s1_pack8_avx() local
150 _sum5 = _mm256_fmadd_ps(_k00, _r05, _sum5); in convdw3x3s1_pack8_avx()
151 _sum5 = _mm256_fmadd_ps(_k01, _r06, _sum5); in convdw3x3s1_pack8_avx()
152 _sum5 = _mm256_fmadd_ps(_k02, _r07, _sum5); in convdw3x3s1_pack8_avx()
153 _sum5 = _mm256_fmadd_ps(_k10, _r15, _sum5); in convdw3x3s1_pack8_avx()
154 _sum5 = _mm256_fmadd_ps(_k11, _r16, _sum5); in convdw3x3s1_pack8_avx()
155 _sum5 = _mm256_fmadd_ps(_k12, _r17, _sum5); in convdw3x3s1_pack8_avx()
156 _sum5 = _mm256_fmadd_ps(_k20, _r25, _sum5); in convdw3x3s1_pack8_avx()
157 _sum5 = _mm256_fmadd_ps(_k21, _r26, _sum5); in convdw3x3s1_pack8_avx()
158 _sum5 = _mm256_fmadd_ps(_k22, _r27, _sum5); in convdw3x3s1_pack8_avx()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h485 _sum5 = _mm256_comp_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
486 _sum5 = _mm256_comp_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
487 _sum5 = _mm256_comp_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
488 _sum5 = _mm256_comp_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
489 _sum5 = _mm256_comp_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
490 _sum5 = _mm256_comp_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
491 _sum5 = _mm256_comp_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
492 _sum5 = _mm256_comp_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
733 _sum5 = _mm256_comp_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
734 _sum5 = _mm256_comp_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack4.h144 __m128 _sum5 = _bias0; in convdw3x3s1_pack4_sse() local
150 _sum5 = _mm_comp_fmadd_ps(_k00, _r05, _sum5); in convdw3x3s1_pack4_sse()
151 _sum5 = _mm_comp_fmadd_ps(_k01, _r06, _sum5); in convdw3x3s1_pack4_sse()
152 _sum5 = _mm_comp_fmadd_ps(_k02, _r07, _sum5); in convdw3x3s1_pack4_sse()
153 _sum5 = _mm_comp_fmadd_ps(_k10, _r15, _sum5); in convdw3x3s1_pack4_sse()
154 _sum5 = _mm_comp_fmadd_ps(_k11, _r16, _sum5); in convdw3x3s1_pack4_sse()
155 _sum5 = _mm_comp_fmadd_ps(_k12, _r17, _sum5); in convdw3x3s1_pack4_sse()
156 _sum5 = _mm_comp_fmadd_ps(_k20, _r25, _sum5); in convdw3x3s1_pack4_sse()
157 _sum5 = _mm_comp_fmadd_ps(_k21, _r26, _sum5); in convdw3x3s1_pack4_sse()
158 _sum5 = _mm_comp_fmadd_ps(_k22, _r27, _sum5); in convdw3x3s1_pack4_sse()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h485 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
486 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
487 _sum5 = _mm256_fmadd_ps(_w2, _val52, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
488 _sum5 = _mm256_fmadd_ps(_w3, _val53, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
489 _sum5 = _mm256_fmadd_ps(_w4, _val54, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
490 _sum5 = _mm256_fmadd_ps(_w5, _val55, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
491 _sum5 = _mm256_fmadd_ps(_w6, _val56, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
492 _sum5 = _mm256_fmadd_ps(_w7, _val57, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
733 _sum5 = _mm256_fmadd_ps(_w0, _val50, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
734 _sum5 = _mm256_fmadd_ps(_w1, _val51, _sum5); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]

1234