Home
last modified time | relevance | path

Searched refs:_sum1 (Results 1 – 25 of 216) sorted by relevance

123456789

/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_3x3_pack1ton.h74 _sum1 = vfmacc_vf_f32m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_rvv()
82 _sum1 = vfmacc_vf_f32m1(_sum1, r0[2], _k01, vl); in conv3x3s1_pack1ton_rvv()
90 _sum1 = vfmacc_vf_f32m1(_sum1, r0[3], _k02, vl); in conv3x3s1_pack1ton_rvv()
99 _sum1 = vfmacc_vf_f32m1(_sum1, r1[1], _k10, vl); in conv3x3s1_pack1ton_rvv()
107 _sum1 = vfmacc_vf_f32m1(_sum1, r1[2], _k11, vl); in conv3x3s1_pack1ton_rvv()
115 _sum1 = vfmacc_vf_f32m1(_sum1, r1[3], _k12, vl); in conv3x3s1_pack1ton_rvv()
124 _sum1 = vfmacc_vf_f32m1(_sum1, r2[1], _k20, vl); in conv3x3s1_pack1ton_rvv()
132 _sum1 = vfmacc_vf_f32m1(_sum1, r2[2], _k21, vl); in conv3x3s1_pack1ton_rvv()
140 _sum1 = vfmacc_vf_f32m1(_sum1, r2[3], _k22, vl); in conv3x3s1_pack1ton_rvv()
171 _sum1 = vfmacc_vf_f32m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_rvv()
[all …]
H A Dconvolution_3x3_pack1ton_fp16s.h74 _sum1 = vfmacc_vf_f16m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
82 _sum1 = vfmacc_vf_f16m1(_sum1, r0[2], _k01, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
90 _sum1 = vfmacc_vf_f16m1(_sum1, r0[3], _k02, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
99 _sum1 = vfmacc_vf_f16m1(_sum1, r1[1], _k10, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
107 _sum1 = vfmacc_vf_f16m1(_sum1, r1[2], _k11, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
115 _sum1 = vfmacc_vf_f16m1(_sum1, r1[3], _k12, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
124 _sum1 = vfmacc_vf_f16m1(_sum1, r2[1], _k20, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
132 _sum1 = vfmacc_vf_f16m1(_sum1, r2[2], _k21, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
140 _sum1 = vfmacc_vf_f16m1(_sum1, r2[3], _k22, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
171 _sum1 = vfmacc_vf_f16m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv()
[all …]
H A Dconvolution_7x7_pack1ton.h82 _sum1 = vfmacc_vf_f32m1(_sum1, r0[2], _k00, vl); in conv7x7s2_pack1ton_rvv()
90 _sum1 = vfmacc_vf_f32m1(_sum1, r0[3], _k01, vl); in conv7x7s2_pack1ton_rvv()
98 _sum1 = vfmacc_vf_f32m1(_sum1, r0[4], _k02, vl); in conv7x7s2_pack1ton_rvv()
106 _sum1 = vfmacc_vf_f32m1(_sum1, r0[5], _k03, vl); in conv7x7s2_pack1ton_rvv()
114 _sum1 = vfmacc_vf_f32m1(_sum1, r0[6], _k04, vl); in conv7x7s2_pack1ton_rvv()
122 _sum1 = vfmacc_vf_f32m1(_sum1, r0[7], _k05, vl); in conv7x7s2_pack1ton_rvv()
130 _sum1 = vfmacc_vf_f32m1(_sum1, r0[8], _k06, vl); in conv7x7s2_pack1ton_rvv()
149 _sum1 = vfmacc_vf_f32m1(_sum1, r1[2], _k10, vl); in conv7x7s2_pack1ton_rvv()
157 _sum1 = vfmacc_vf_f32m1(_sum1, r1[3], _k11, vl); in conv7x7s2_pack1ton_rvv()
165 _sum1 = vfmacc_vf_f32m1(_sum1, r1[4], _k12, vl); in conv7x7s2_pack1ton_rvv()
[all …]
H A Dconvolution_7x7_pack1ton_fp16s.h82 _sum1 = vfmacc_vf_f16m1(_sum1, r0[2], _k00, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
90 _sum1 = vfmacc_vf_f16m1(_sum1, r0[3], _k01, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
98 _sum1 = vfmacc_vf_f16m1(_sum1, r0[4], _k02, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
106 _sum1 = vfmacc_vf_f16m1(_sum1, r0[5], _k03, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
114 _sum1 = vfmacc_vf_f16m1(_sum1, r0[6], _k04, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
122 _sum1 = vfmacc_vf_f16m1(_sum1, r0[7], _k05, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
130 _sum1 = vfmacc_vf_f16m1(_sum1, r0[8], _k06, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
149 _sum1 = vfmacc_vf_f16m1(_sum1, r1[2], _k10, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
157 _sum1 = vfmacc_vf_f16m1(_sum1, r1[3], _k11, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
165 _sum1 = vfmacc_vf_f16m1(_sum1, r1[4], _k12, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_2x2_pack8.h97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx()
100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx()
101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx()
102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx()
103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx()
104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx()
137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_2x2_pack8.h97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx()
100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx()
101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx()
102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx()
103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx()
104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx()
137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_2x2_pack8.h97 _sum1 = _mm256_comp_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
98 _sum1 = _mm256_comp_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
99 _sum1 = _mm256_comp_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx()
100 _sum1 = _mm256_comp_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx()
101 _sum1 = _mm256_comp_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx()
102 _sum1 = _mm256_comp_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx()
103 _sum1 = _mm256_comp_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx()
104 _sum1 = _mm256_comp_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx()
137 _sum1 = _mm256_comp_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
138 _sum1 = _mm256_comp_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack4.h86 _sum1 = _mm_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack4_sse()
87 _sum1 = _mm_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack4_sse()
88 _sum1 = _mm_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack4_sse()
89 _sum1 = _mm_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack4_sse()
90 _sum1 = _mm_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack4_sse()
91 _sum1 = _mm_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack4_sse()
92 _sum1 = _mm_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack4_sse()
93 _sum1 = _mm_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack4_sse()
94 _sum1 = _mm_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack4_sse()
228 _sum1 = _mm_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack4_sse()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h86 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
87 _sum1 = _mm256_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx()
88 _sum1 = _mm256_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx()
89 _sum1 = _mm256_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx()
90 _sum1 = _mm256_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx()
91 _sum1 = _mm256_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx()
92 _sum1 = _mm256_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx()
93 _sum1 = _mm256_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx()
94 _sum1 = _mm256_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx()
228 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h86 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
87 _sum1 = _mm256_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx()
88 _sum1 = _mm256_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx()
89 _sum1 = _mm256_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx()
90 _sum1 = _mm256_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx()
91 _sum1 = _mm256_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx()
92 _sum1 = _mm256_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx()
93 _sum1 = _mm256_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx()
94 _sum1 = _mm256_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx()
228 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_2x2_pack8.h97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx()
100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx()
101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx()
102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx()
103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx()
104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx()
137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_2x2_pack8.h97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx()
100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx()
101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx()
102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx()
103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx()
104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx()
137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx()
138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx()
[all …]
H A Dconvolutiondepthwise_3x3_pack8.h86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx()
88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx()
89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx()
90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx()
91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx()
92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx()
93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx()
94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx()
228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon()
631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon()
632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon()
633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon()
634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon()
655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon()
656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon()
657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon()
658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon()
659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon()
631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon()
632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon()
633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon()
634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon()
655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon()
656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon()
657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon()
658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon()
659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon()
631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon()
632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon()
633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon()
634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon()
655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon()
656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon()
657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon()
658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon()
659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon()
631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon()
632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon()
633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon()
634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon()
655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon()
656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon()
657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon()
658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon()
659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon()
631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon()
632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon()
633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon()
634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon()
655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon()
656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon()
657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon()
658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon()
659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon()
[all …]

123456789