Home
last modified time | relevance | path

Searched refs:_sum6 (Results 1 – 25 of 97) sorted by relevance

1234

/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_7x7_pack1ton.h87 _sum6 = vfmacc_vf_f32m1(_sum6, r0[12], _k00, vl); in conv7x7s2_pack1ton_rvv()
95 _sum6 = vfmacc_vf_f32m1(_sum6, r0[13], _k01, vl); in conv7x7s2_pack1ton_rvv()
103 _sum6 = vfmacc_vf_f32m1(_sum6, r0[14], _k02, vl); in conv7x7s2_pack1ton_rvv()
111 _sum6 = vfmacc_vf_f32m1(_sum6, r0[15], _k03, vl); in conv7x7s2_pack1ton_rvv()
119 _sum6 = vfmacc_vf_f32m1(_sum6, r0[16], _k04, vl); in conv7x7s2_pack1ton_rvv()
127 _sum6 = vfmacc_vf_f32m1(_sum6, r0[17], _k05, vl); in conv7x7s2_pack1ton_rvv()
135 _sum6 = vfmacc_vf_f32m1(_sum6, r0[18], _k06, vl); in conv7x7s2_pack1ton_rvv()
154 _sum6 = vfmacc_vf_f32m1(_sum6, r1[12], _k10, vl); in conv7x7s2_pack1ton_rvv()
162 _sum6 = vfmacc_vf_f32m1(_sum6, r1[13], _k11, vl); in conv7x7s2_pack1ton_rvv()
170 _sum6 = vfmacc_vf_f32m1(_sum6, r1[14], _k12, vl); in conv7x7s2_pack1ton_rvv()
[all …]
H A Dconvolution_7x7_pack1ton_fp16s.h87 _sum6 = vfmacc_vf_f16m1(_sum6, r0[12], _k00, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
95 _sum6 = vfmacc_vf_f16m1(_sum6, r0[13], _k01, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
103 _sum6 = vfmacc_vf_f16m1(_sum6, r0[14], _k02, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
111 _sum6 = vfmacc_vf_f16m1(_sum6, r0[15], _k03, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
119 _sum6 = vfmacc_vf_f16m1(_sum6, r0[16], _k04, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
127 _sum6 = vfmacc_vf_f16m1(_sum6, r0[17], _k05, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
135 _sum6 = vfmacc_vf_f16m1(_sum6, r0[18], _k06, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
154 _sum6 = vfmacc_vf_f16m1(_sum6, r1[12], _k10, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
162 _sum6 = vfmacc_vf_f16m1(_sum6, r1[13], _k11, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
170 _sum6 = vfmacc_vf_f16m1(_sum6, r1[14], _k12, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
[all …]
H A Dconvolution_3x3_pack1ton.h79 _sum6 = vfmacc_vf_f32m1(_sum6, r0[6], _k00, vl); in conv3x3s1_pack1ton_rvv()
87 _sum6 = vfmacc_vf_f32m1(_sum6, r0[7], _k01, vl); in conv3x3s1_pack1ton_rvv()
95 _sum6 = vfmacc_vf_f32m1(_sum6, r0[8], _k02, vl); in conv3x3s1_pack1ton_rvv()
104 _sum6 = vfmacc_vf_f32m1(_sum6, r1[6], _k10, vl); in conv3x3s1_pack1ton_rvv()
112 _sum6 = vfmacc_vf_f32m1(_sum6, r1[7], _k11, vl); in conv3x3s1_pack1ton_rvv()
120 _sum6 = vfmacc_vf_f32m1(_sum6, r1[8], _k12, vl); in conv3x3s1_pack1ton_rvv()
129 _sum6 = vfmacc_vf_f32m1(_sum6, r2[6], _k20, vl); in conv3x3s1_pack1ton_rvv()
137 _sum6 = vfmacc_vf_f32m1(_sum6, r2[7], _k21, vl); in conv3x3s1_pack1ton_rvv()
145 _sum6 = vfmacc_vf_f32m1(_sum6, r2[8], _k22, vl); in conv3x3s1_pack1ton_rvv()
357 _sum6 = vfmacc_vf_f32m1(_sum6, r0[12], _k00, vl); in conv3x3s2_pack1ton_rvv()
[all …]
H A Dconvolution_3x3_pack1ton_fp16s.h79 _sum6 = vfmacc_vf_f16m1(_sum6, r0[6], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
87 _sum6 = vfmacc_vf_f16m1(_sum6, r0[7], _k01, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
95 _sum6 = vfmacc_vf_f16m1(_sum6, r0[8], _k02, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
104 _sum6 = vfmacc_vf_f16m1(_sum6, r1[6], _k10, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
112 _sum6 = vfmacc_vf_f16m1(_sum6, r1[7], _k11, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
120 _sum6 = vfmacc_vf_f16m1(_sum6, r1[8], _k12, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
129 _sum6 = vfmacc_vf_f16m1(_sum6, r2[6], _k20, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
137 _sum6 = vfmacc_vf_f16m1(_sum6, r2[7], _k21, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
145 _sum6 = vfmacc_vf_f16m1(_sum6, r2[8], _k22, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
357 _sum6 = vfmacc_vf_f16m1(_sum6, r0[12], _k00, vl); in conv3x3s2_pack1ton_fp16sa_rvv()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h166 float16x8_t _sum6 = vdupq_laneq_f16(_bias0, 6); in conv1x1s1_sgemm_fp16sa_neon() local
195 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
204 _sum6 = vfmaq_laneq_f16(_sum6, _p1, _k1, 6); in conv1x1s1_sgemm_fp16sa_neon()
213 _sum6 = vfmaq_laneq_f16(_sum6, _p2, _k2, 6); in conv1x1s1_sgemm_fp16sa_neon()
222 _sum6 = vfmaq_laneq_f16(_sum6, _p3, _k3, 6); in conv1x1s1_sgemm_fp16sa_neon()
231 _sum6 = vfmaq_laneq_f16(_sum6, _p4, _k4, 6); in conv1x1s1_sgemm_fp16sa_neon()
240 _sum6 = vfmaq_laneq_f16(_sum6, _p5, _k5, 6); in conv1x1s1_sgemm_fp16sa_neon()
249 _sum6 = vfmaq_laneq_f16(_sum6, _p6, _k6, 6); in conv1x1s1_sgemm_fp16sa_neon()
258 _sum6 = vfmaq_laneq_f16(_sum6, _p7, _k7, 6); in conv1x1s1_sgemm_fp16sa_neon()
277 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h936 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val0), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
955 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val1), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
974 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w2), vget_low_s16(_val2), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
993 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w3), vget_low_s16(_val3), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1012 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w4), vget_low_s16(_val4), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1031 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w5), vget_low_s16(_val5), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1050 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w6), vget_low_s16(_val6), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1069 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w7), vget_low_s16(_val7), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1140 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val3), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1151 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val3), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h166 float16x8_t _sum6 = vdupq_laneq_f16(_bias0, 6); in conv1x1s1_sgemm_fp16sa_neon() local
195 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
204 _sum6 = vfmaq_laneq_f16(_sum6, _p1, _k1, 6); in conv1x1s1_sgemm_fp16sa_neon()
213 _sum6 = vfmaq_laneq_f16(_sum6, _p2, _k2, 6); in conv1x1s1_sgemm_fp16sa_neon()
222 _sum6 = vfmaq_laneq_f16(_sum6, _p3, _k3, 6); in conv1x1s1_sgemm_fp16sa_neon()
231 _sum6 = vfmaq_laneq_f16(_sum6, _p4, _k4, 6); in conv1x1s1_sgemm_fp16sa_neon()
240 _sum6 = vfmaq_laneq_f16(_sum6, _p5, _k5, 6); in conv1x1s1_sgemm_fp16sa_neon()
249 _sum6 = vfmaq_laneq_f16(_sum6, _p6, _k6, 6); in conv1x1s1_sgemm_fp16sa_neon()
258 _sum6 = vfmaq_laneq_f16(_sum6, _p7, _k7, 6); in conv1x1s1_sgemm_fp16sa_neon()
277 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h936 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val0), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
955 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val1), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
974 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w2), vget_low_s16(_val2), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
993 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w3), vget_low_s16(_val3), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1012 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w4), vget_low_s16(_val4), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1031 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w5), vget_low_s16(_val5), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1050 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w6), vget_low_s16(_val6), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1069 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w7), vget_low_s16(_val7), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1140 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val3), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1151 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val3), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h166 float16x8_t _sum6 = vdupq_laneq_f16(_bias0, 6); in conv1x1s1_sgemm_fp16sa_neon() local
195 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
204 _sum6 = vfmaq_laneq_f16(_sum6, _p1, _k1, 6); in conv1x1s1_sgemm_fp16sa_neon()
213 _sum6 = vfmaq_laneq_f16(_sum6, _p2, _k2, 6); in conv1x1s1_sgemm_fp16sa_neon()
222 _sum6 = vfmaq_laneq_f16(_sum6, _p3, _k3, 6); in conv1x1s1_sgemm_fp16sa_neon()
231 _sum6 = vfmaq_laneq_f16(_sum6, _p4, _k4, 6); in conv1x1s1_sgemm_fp16sa_neon()
240 _sum6 = vfmaq_laneq_f16(_sum6, _p5, _k5, 6); in conv1x1s1_sgemm_fp16sa_neon()
249 _sum6 = vfmaq_laneq_f16(_sum6, _p6, _k6, 6); in conv1x1s1_sgemm_fp16sa_neon()
258 _sum6 = vfmaq_laneq_f16(_sum6, _p7, _k7, 6); in conv1x1s1_sgemm_fp16sa_neon()
277 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h936 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val0), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
955 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val1), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
974 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w2), vget_low_s16(_val2), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
993 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w3), vget_low_s16(_val3), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1012 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w4), vget_low_s16(_val4), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1031 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w5), vget_low_s16(_val5), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1050 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w6), vget_low_s16(_val6), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1069 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w7), vget_low_s16(_val7), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1140 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val3), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1151 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val3), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_1x1_fp16s.h166 float16x8_t _sum6 = vdupq_laneq_f16(_bias0, 6); in conv1x1s1_sgemm_fp16sa_neon() local
195 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
204 _sum6 = vfmaq_laneq_f16(_sum6, _p1, _k1, 6); in conv1x1s1_sgemm_fp16sa_neon()
213 _sum6 = vfmaq_laneq_f16(_sum6, _p2, _k2, 6); in conv1x1s1_sgemm_fp16sa_neon()
222 _sum6 = vfmaq_laneq_f16(_sum6, _p3, _k3, 6); in conv1x1s1_sgemm_fp16sa_neon()
231 _sum6 = vfmaq_laneq_f16(_sum6, _p4, _k4, 6); in conv1x1s1_sgemm_fp16sa_neon()
240 _sum6 = vfmaq_laneq_f16(_sum6, _p5, _k5, 6); in conv1x1s1_sgemm_fp16sa_neon()
249 _sum6 = vfmaq_laneq_f16(_sum6, _p6, _k6, 6); in conv1x1s1_sgemm_fp16sa_neon()
258 _sum6 = vfmaq_laneq_f16(_sum6, _p7, _k7, 6); in conv1x1s1_sgemm_fp16sa_neon()
277 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack8to4_int8.h936 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val0), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
955 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val1), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
974 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w2), vget_low_s16(_val2), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
993 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w3), vget_low_s16(_val3), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1012 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w4), vget_low_s16(_val4), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1031 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w5), vget_low_s16(_val5), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1050 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w6), vget_low_s16(_val6), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1069 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w7), vget_low_s16(_val7), 3); in conv3x3s1_winograd42_pack8to4_int8_neon()
1140 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w0), vget_low_s16(_val3), 0); in conv3x3s1_winograd42_pack8to4_int8_neon()
1151 _sum6 = vmlal_lane_s16(_sum6, vget_low_s16(_w1), vget_low_s16(_val3), 1); in conv3x3s1_winograd42_pack8to4_int8_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_1x1_fp16s.h166 float16x8_t _sum6 = vdupq_laneq_f16(_bias0, 6); in conv1x1s1_sgemm_fp16sa_neon() local
195 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
204 _sum6 = vfmaq_laneq_f16(_sum6, _p1, _k1, 6); in conv1x1s1_sgemm_fp16sa_neon()
213 _sum6 = vfmaq_laneq_f16(_sum6, _p2, _k2, 6); in conv1x1s1_sgemm_fp16sa_neon()
222 _sum6 = vfmaq_laneq_f16(_sum6, _p3, _k3, 6); in conv1x1s1_sgemm_fp16sa_neon()
231 _sum6 = vfmaq_laneq_f16(_sum6, _p4, _k4, 6); in conv1x1s1_sgemm_fp16sa_neon()
240 _sum6 = vfmaq_laneq_f16(_sum6, _p5, _k5, 6); in conv1x1s1_sgemm_fp16sa_neon()
249 _sum6 = vfmaq_laneq_f16(_sum6, _p6, _k6, 6); in conv1x1s1_sgemm_fp16sa_neon()
258 _sum6 = vfmaq_laneq_f16(_sum6, _p7, _k7, 6); in conv1x1s1_sgemm_fp16sa_neon()
277 _sum6 = vfmaq_laneq_f16(_sum6, _p0, _k0, 6); in conv1x1s1_sgemm_fp16sa_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolution_3x3_pack1to4.h91 _sum6 = __msa_fmadd_w(_sum6, _r06, _k00); in conv3x3s1_pack1to4_msa()
99 _sum6 = __msa_fmadd_w(_sum6, _r07, _k01); in conv3x3s1_pack1to4_msa()
107 _sum6 = __msa_fmadd_w(_sum6, _r08, _k02); in conv3x3s1_pack1to4_msa()
131 _sum6 = __msa_fmadd_w(_sum6, _r16, _k10); in conv3x3s1_pack1to4_msa()
139 _sum6 = __msa_fmadd_w(_sum6, _r17, _k11); in conv3x3s1_pack1to4_msa()
147 _sum6 = __msa_fmadd_w(_sum6, _r18, _k12); in conv3x3s1_pack1to4_msa()
171 _sum6 = __msa_fmadd_w(_sum6, _r26, _k20); in conv3x3s1_pack1to4_msa()
179 _sum6 = __msa_fmadd_w(_sum6, _r27, _k21); in conv3x3s1_pack1to4_msa()
187 _sum6 = __msa_fmadd_w(_sum6, _r28, _k22); in conv3x3s1_pack1to4_msa()
482 _sum6 = __msa_fmadd_w(_sum6, _r0c, _k00); in conv3x3s2_pack1to4_msa()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8.h511 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_pack8_avx()
512 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_pack8_avx()
513 _sum6 = _mm256_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_pack8_avx()
514 _sum6 = _mm256_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_pack8_avx()
515 _sum6 = _mm256_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_pack8_avx()
516 _sum6 = _mm256_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_pack8_avx()
517 _sum6 = _mm256_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_pack8_avx()
518 _sum6 = _mm256_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_pack8_avx()
759 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_pack8_avx()
760 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_pack8_avx()
[all …]
H A Dconvolution_1x1_pack8_fp16.h511 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
512 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
513 _sum6 = _mm256_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
514 _sum6 = _mm256_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
515 _sum6 = _mm256_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
516 _sum6 = _mm256_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
517 _sum6 = _mm256_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
518 _sum6 = _mm256_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
759 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
760 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h511 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
512 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
513 _sum6 = _mm256_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
514 _sum6 = _mm256_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
515 _sum6 = _mm256_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
516 _sum6 = _mm256_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
517 _sum6 = _mm256_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
518 _sum6 = _mm256_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
759 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
760 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h160 __m256 _sum6 = _bias0; in convdw3x3s1_pack8_avx() local
166 _sum6 = _mm256_fmadd_ps(_k00, _r06, _sum6); in convdw3x3s1_pack8_avx()
167 _sum6 = _mm256_fmadd_ps(_k01, _r07, _sum6); in convdw3x3s1_pack8_avx()
168 _sum6 = _mm256_fmadd_ps(_k02, _r08, _sum6); in convdw3x3s1_pack8_avx()
169 _sum6 = _mm256_fmadd_ps(_k10, _r16, _sum6); in convdw3x3s1_pack8_avx()
170 _sum6 = _mm256_fmadd_ps(_k11, _r17, _sum6); in convdw3x3s1_pack8_avx()
171 _sum6 = _mm256_fmadd_ps(_k12, _r18, _sum6); in convdw3x3s1_pack8_avx()
172 _sum6 = _mm256_fmadd_ps(_k20, _r26, _sum6); in convdw3x3s1_pack8_avx()
173 _sum6 = _mm256_fmadd_ps(_k21, _r27, _sum6); in convdw3x3s1_pack8_avx()
174 _sum6 = _mm256_fmadd_ps(_k22, _r28, _sum6); in convdw3x3s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h160 __m256 _sum6 = _bias0; in convdw3x3s1_fp16_pack8_avx() local
166 _sum6 = _mm256_fmadd_ps(_k00, _r06, _sum6); in convdw3x3s1_fp16_pack8_avx()
167 _sum6 = _mm256_fmadd_ps(_k01, _r07, _sum6); in convdw3x3s1_fp16_pack8_avx()
168 _sum6 = _mm256_fmadd_ps(_k02, _r08, _sum6); in convdw3x3s1_fp16_pack8_avx()
169 _sum6 = _mm256_fmadd_ps(_k10, _r16, _sum6); in convdw3x3s1_fp16_pack8_avx()
170 _sum6 = _mm256_fmadd_ps(_k11, _r17, _sum6); in convdw3x3s1_fp16_pack8_avx()
171 _sum6 = _mm256_fmadd_ps(_k12, _r18, _sum6); in convdw3x3s1_fp16_pack8_avx()
172 _sum6 = _mm256_fmadd_ps(_k20, _r26, _sum6); in convdw3x3s1_fp16_pack8_avx()
173 _sum6 = _mm256_fmadd_ps(_k21, _r27, _sum6); in convdw3x3s1_fp16_pack8_avx()
174 _sum6 = _mm256_fmadd_ps(_k22, _r28, _sum6); in convdw3x3s1_fp16_pack8_avx()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h511 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
512 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
513 _sum6 = _mm256_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
514 _sum6 = _mm256_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
515 _sum6 = _mm256_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
516 _sum6 = _mm256_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
517 _sum6 = _mm256_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
518 _sum6 = _mm256_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
759 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
760 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h160 __m256 _sum6 = _bias0; in convdw3x3s1_fp16_pack8_avx() local
166 _sum6 = _mm256_fmadd_ps(_k00, _r06, _sum6); in convdw3x3s1_fp16_pack8_avx()
167 _sum6 = _mm256_fmadd_ps(_k01, _r07, _sum6); in convdw3x3s1_fp16_pack8_avx()
168 _sum6 = _mm256_fmadd_ps(_k02, _r08, _sum6); in convdw3x3s1_fp16_pack8_avx()
169 _sum6 = _mm256_fmadd_ps(_k10, _r16, _sum6); in convdw3x3s1_fp16_pack8_avx()
170 _sum6 = _mm256_fmadd_ps(_k11, _r17, _sum6); in convdw3x3s1_fp16_pack8_avx()
171 _sum6 = _mm256_fmadd_ps(_k12, _r18, _sum6); in convdw3x3s1_fp16_pack8_avx()
172 _sum6 = _mm256_fmadd_ps(_k20, _r26, _sum6); in convdw3x3s1_fp16_pack8_avx()
173 _sum6 = _mm256_fmadd_ps(_k21, _r27, _sum6); in convdw3x3s1_fp16_pack8_avx()
174 _sum6 = _mm256_fmadd_ps(_k22, _r28, _sum6); in convdw3x3s1_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h160 __m256 _sum6 = _bias0; in convdw3x3s1_pack8_avx() local
166 _sum6 = _mm256_fmadd_ps(_k00, _r06, _sum6); in convdw3x3s1_pack8_avx()
167 _sum6 = _mm256_fmadd_ps(_k01, _r07, _sum6); in convdw3x3s1_pack8_avx()
168 _sum6 = _mm256_fmadd_ps(_k02, _r08, _sum6); in convdw3x3s1_pack8_avx()
169 _sum6 = _mm256_fmadd_ps(_k10, _r16, _sum6); in convdw3x3s1_pack8_avx()
170 _sum6 = _mm256_fmadd_ps(_k11, _r17, _sum6); in convdw3x3s1_pack8_avx()
171 _sum6 = _mm256_fmadd_ps(_k12, _r18, _sum6); in convdw3x3s1_pack8_avx()
172 _sum6 = _mm256_fmadd_ps(_k20, _r26, _sum6); in convdw3x3s1_pack8_avx()
173 _sum6 = _mm256_fmadd_ps(_k21, _r27, _sum6); in convdw3x3s1_pack8_avx()
174 _sum6 = _mm256_fmadd_ps(_k22, _r28, _sum6); in convdw3x3s1_pack8_avx()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h511 _sum6 = _mm256_comp_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
512 _sum6 = _mm256_comp_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
513 _sum6 = _mm256_comp_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
514 _sum6 = _mm256_comp_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
515 _sum6 = _mm256_comp_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
516 _sum6 = _mm256_comp_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
517 _sum6 = _mm256_comp_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
518 _sum6 = _mm256_comp_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
759 _sum6 = _mm256_comp_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
760 _sum6 = _mm256_comp_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack4.h160 __m128 _sum6 = _bias0; in convdw3x3s1_pack4_sse() local
166 _sum6 = _mm_comp_fmadd_ps(_k00, _r06, _sum6); in convdw3x3s1_pack4_sse()
167 _sum6 = _mm_comp_fmadd_ps(_k01, _r07, _sum6); in convdw3x3s1_pack4_sse()
168 _sum6 = _mm_comp_fmadd_ps(_k02, _r08, _sum6); in convdw3x3s1_pack4_sse()
169 _sum6 = _mm_comp_fmadd_ps(_k10, _r16, _sum6); in convdw3x3s1_pack4_sse()
170 _sum6 = _mm_comp_fmadd_ps(_k11, _r17, _sum6); in convdw3x3s1_pack4_sse()
171 _sum6 = _mm_comp_fmadd_ps(_k12, _r18, _sum6); in convdw3x3s1_pack4_sse()
172 _sum6 = _mm_comp_fmadd_ps(_k20, _r26, _sum6); in convdw3x3s1_pack4_sse()
173 _sum6 = _mm_comp_fmadd_ps(_k21, _r27, _sum6); in convdw3x3s1_pack4_sse()
174 _sum6 = _mm_comp_fmadd_ps(_k22, _r28, _sum6); in convdw3x3s1_pack4_sse()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_1x1_pack8_fp16.h511 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
512 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
513 _sum6 = _mm256_fmadd_ps(_w2, _val62, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
514 _sum6 = _mm256_fmadd_ps(_w3, _val63, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
515 _sum6 = _mm256_fmadd_ps(_w4, _val64, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
516 _sum6 = _mm256_fmadd_ps(_w5, _val65, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
517 _sum6 = _mm256_fmadd_ps(_w6, _val66, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
518 _sum6 = _mm256_fmadd_ps(_w7, _val67, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
759 _sum6 = _mm256_fmadd_ps(_w0, _val60, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
760 _sum6 = _mm256_fmadd_ps(_w1, _val61, _sum6); in conv1x1s1_sgemm_fp16_pack8_avx()
[all …]

1234