/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution_3x3_pack1ton.h | 74 _sum1 = vfmacc_vf_f32m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_rvv() 82 _sum1 = vfmacc_vf_f32m1(_sum1, r0[2], _k01, vl); in conv3x3s1_pack1ton_rvv() 90 _sum1 = vfmacc_vf_f32m1(_sum1, r0[3], _k02, vl); in conv3x3s1_pack1ton_rvv() 99 _sum1 = vfmacc_vf_f32m1(_sum1, r1[1], _k10, vl); in conv3x3s1_pack1ton_rvv() 107 _sum1 = vfmacc_vf_f32m1(_sum1, r1[2], _k11, vl); in conv3x3s1_pack1ton_rvv() 115 _sum1 = vfmacc_vf_f32m1(_sum1, r1[3], _k12, vl); in conv3x3s1_pack1ton_rvv() 124 _sum1 = vfmacc_vf_f32m1(_sum1, r2[1], _k20, vl); in conv3x3s1_pack1ton_rvv() 132 _sum1 = vfmacc_vf_f32m1(_sum1, r2[2], _k21, vl); in conv3x3s1_pack1ton_rvv() 140 _sum1 = vfmacc_vf_f32m1(_sum1, r2[3], _k22, vl); in conv3x3s1_pack1ton_rvv() 171 _sum1 = vfmacc_vf_f32m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_rvv() [all …]
|
H A D | convolution_3x3_pack1ton_fp16s.h | 74 _sum1 = vfmacc_vf_f16m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 82 _sum1 = vfmacc_vf_f16m1(_sum1, r0[2], _k01, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 90 _sum1 = vfmacc_vf_f16m1(_sum1, r0[3], _k02, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 99 _sum1 = vfmacc_vf_f16m1(_sum1, r1[1], _k10, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 107 _sum1 = vfmacc_vf_f16m1(_sum1, r1[2], _k11, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 115 _sum1 = vfmacc_vf_f16m1(_sum1, r1[3], _k12, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 124 _sum1 = vfmacc_vf_f16m1(_sum1, r2[1], _k20, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 132 _sum1 = vfmacc_vf_f16m1(_sum1, r2[2], _k21, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 140 _sum1 = vfmacc_vf_f16m1(_sum1, r2[3], _k22, vl); in conv3x3s1_pack1ton_fp16sa_rvv() 171 _sum1 = vfmacc_vf_f16m1(_sum1, r0[1], _k00, vl); in conv3x3s1_pack1ton_fp16sa_rvv() [all …]
|
H A D | convolution_7x7_pack1ton.h | 82 _sum1 = vfmacc_vf_f32m1(_sum1, r0[2], _k00, vl); in conv7x7s2_pack1ton_rvv() 90 _sum1 = vfmacc_vf_f32m1(_sum1, r0[3], _k01, vl); in conv7x7s2_pack1ton_rvv() 98 _sum1 = vfmacc_vf_f32m1(_sum1, r0[4], _k02, vl); in conv7x7s2_pack1ton_rvv() 106 _sum1 = vfmacc_vf_f32m1(_sum1, r0[5], _k03, vl); in conv7x7s2_pack1ton_rvv() 114 _sum1 = vfmacc_vf_f32m1(_sum1, r0[6], _k04, vl); in conv7x7s2_pack1ton_rvv() 122 _sum1 = vfmacc_vf_f32m1(_sum1, r0[7], _k05, vl); in conv7x7s2_pack1ton_rvv() 130 _sum1 = vfmacc_vf_f32m1(_sum1, r0[8], _k06, vl); in conv7x7s2_pack1ton_rvv() 149 _sum1 = vfmacc_vf_f32m1(_sum1, r1[2], _k10, vl); in conv7x7s2_pack1ton_rvv() 157 _sum1 = vfmacc_vf_f32m1(_sum1, r1[3], _k11, vl); in conv7x7s2_pack1ton_rvv() 165 _sum1 = vfmacc_vf_f32m1(_sum1, r1[4], _k12, vl); in conv7x7s2_pack1ton_rvv() [all …]
|
H A D | convolution_7x7_pack1ton_fp16s.h | 82 _sum1 = vfmacc_vf_f16m1(_sum1, r0[2], _k00, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 90 _sum1 = vfmacc_vf_f16m1(_sum1, r0[3], _k01, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 98 _sum1 = vfmacc_vf_f16m1(_sum1, r0[4], _k02, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 106 _sum1 = vfmacc_vf_f16m1(_sum1, r0[5], _k03, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 114 _sum1 = vfmacc_vf_f16m1(_sum1, r0[6], _k04, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 122 _sum1 = vfmacc_vf_f16m1(_sum1, r0[7], _k05, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 130 _sum1 = vfmacc_vf_f16m1(_sum1, r0[8], _k06, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 149 _sum1 = vfmacc_vf_f16m1(_sum1, r1[2], _k10, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 157 _sum1 = vfmacc_vf_f16m1(_sum1, r1[3], _k11, vl); in conv7x7s2_pack1ton_fp16sa_rvv() 165 _sum1 = vfmacc_vf_f16m1(_sum1, r1[4], _k12, vl); in conv7x7s2_pack1ton_fp16sa_rvv() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_2x2_pack8.h | 97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() 99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx() 100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx() 101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx() 102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx() 103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx() 104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx() 137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_2x2_pack8.h | 97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() 99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx() 100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx() 101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx() 102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx() 103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx() 104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx() 137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_2x2_pack8.h | 97 _sum1 = _mm256_comp_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 98 _sum1 = _mm256_comp_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() 99 _sum1 = _mm256_comp_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx() 100 _sum1 = _mm256_comp_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx() 101 _sum1 = _mm256_comp_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx() 102 _sum1 = _mm256_comp_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx() 103 _sum1 = _mm256_comp_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx() 104 _sum1 = _mm256_comp_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx() 137 _sum1 = _mm256_comp_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 138 _sum1 = _mm256_comp_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack4.h | 86 _sum1 = _mm_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack4_sse() 87 _sum1 = _mm_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack4_sse() 88 _sum1 = _mm_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack4_sse() 89 _sum1 = _mm_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack4_sse() 90 _sum1 = _mm_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack4_sse() 91 _sum1 = _mm_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack4_sse() 92 _sum1 = _mm_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack4_sse() 93 _sum1 = _mm_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack4_sse() 94 _sum1 = _mm_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack4_sse() 228 _sum1 = _mm_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack4_sse() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 86 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() 87 _sum1 = _mm256_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx() 88 _sum1 = _mm256_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx() 89 _sum1 = _mm256_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx() 90 _sum1 = _mm256_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx() 91 _sum1 = _mm256_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx() 92 _sum1 = _mm256_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx() 93 _sum1 = _mm256_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx() 94 _sum1 = _mm256_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx() 228 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 86 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() 87 _sum1 = _mm256_comp_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx() 88 _sum1 = _mm256_comp_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx() 89 _sum1 = _mm256_comp_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx() 90 _sum1 = _mm256_comp_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx() 91 _sum1 = _mm256_comp_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx() 92 _sum1 = _mm256_comp_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx() 93 _sum1 = _mm256_comp_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx() 94 _sum1 = _mm256_comp_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx() 228 _sum1 = _mm256_comp_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_2x2_pack8.h | 97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() 99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx() 100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx() 101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx() 102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx() 103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx() 104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx() 137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_2x2_pack8.h | 97 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 98 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() 99 _sum1 = _mm256_fmadd_ps(_k02, _r02, _sum1); in conv2x2s1_pack8_avx() 100 _sum1 = _mm256_fmadd_ps(_k03, _r03, _sum1); in conv2x2s1_pack8_avx() 101 _sum1 = _mm256_fmadd_ps(_k04, _r04, _sum1); in conv2x2s1_pack8_avx() 102 _sum1 = _mm256_fmadd_ps(_k05, _r05, _sum1); in conv2x2s1_pack8_avx() 103 _sum1 = _mm256_fmadd_ps(_k06, _r06, _sum1); in conv2x2s1_pack8_avx() 104 _sum1 = _mm256_fmadd_ps(_k07, _r07, _sum1); in conv2x2s1_pack8_avx() 137 _sum1 = _mm256_fmadd_ps(_k00, _r00, _sum1); in conv2x2s1_pack8_avx() 138 _sum1 = _mm256_fmadd_ps(_k01, _r01, _sum1); in conv2x2s1_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_fp16_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_fp16_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_fp16_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_fp16_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_fp16_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_fp16_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_fp16_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_fp16_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_fp16_pack8_avx() [all …]
|
H A D | convolutiondepthwise_3x3_pack8.h | 86 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() 87 _sum1 = _mm256_fmadd_ps(_k01, _r02, _sum1); in convdw3x3s1_pack8_avx() 88 _sum1 = _mm256_fmadd_ps(_k02, _r03, _sum1); in convdw3x3s1_pack8_avx() 89 _sum1 = _mm256_fmadd_ps(_k10, _r11, _sum1); in convdw3x3s1_pack8_avx() 90 _sum1 = _mm256_fmadd_ps(_k11, _r12, _sum1); in convdw3x3s1_pack8_avx() 91 _sum1 = _mm256_fmadd_ps(_k12, _r13, _sum1); in convdw3x3s1_pack8_avx() 92 _sum1 = _mm256_fmadd_ps(_k20, _r21, _sum1); in convdw3x3s1_pack8_avx() 93 _sum1 = _mm256_fmadd_ps(_k21, _r22, _sum1); in convdw3x3s1_pack8_avx() 94 _sum1 = _mm256_fmadd_ps(_k22, _r23, _sum1); in convdw3x3s1_pack8_avx() 228 _sum1 = _mm256_fmadd_ps(_k00, _r01, _sum1); in convdw3x3s1_pack8_avx() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_5x5_pack4.h | 630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon() 631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon() 632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon() 633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon() 634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon() 655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon() 656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon() 657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon() 658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon() 659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_5x5_pack4.h | 630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon() 631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon() 632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon() 633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon() 634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon() 655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon() 656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon() 657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon() 658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon() 659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_5x5_pack4.h | 630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon() 631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon() 632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon() 633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon() 634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon() 655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon() 656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon() 657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon() 658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon() 659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolutiondepthwise_5x5_pack4.h | 630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon() 631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon() 632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon() 633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon() 634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon() 655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon() 656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon() 657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon() 658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon() 659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_5x5_pack4.h | 630 _sum1 = vmlaq_f32(_sum1, _k00, _r10); in convdw5x5s1_pack4_neon() 631 _sum1 = vmlaq_f32(_sum1, _k01, _r11); in convdw5x5s1_pack4_neon() 632 _sum1 = vmlaq_f32(_sum1, _k02, _r12); in convdw5x5s1_pack4_neon() 633 _sum1 = vmlaq_f32(_sum1, _k03, _r13); in convdw5x5s1_pack4_neon() 634 _sum1 = vmlaq_f32(_sum1, _k04, _r14); in convdw5x5s1_pack4_neon() 655 _sum1 = vmlaq_f32(_sum1, _k10, _r20); in convdw5x5s1_pack4_neon() 656 _sum1 = vmlaq_f32(_sum1, _k11, _r21); in convdw5x5s1_pack4_neon() 657 _sum1 = vmlaq_f32(_sum1, _k12, _r22); in convdw5x5s1_pack4_neon() 658 _sum1 = vmlaq_f32(_sum1, _k13, _r23); in convdw5x5s1_pack4_neon() 659 _sum1 = vmlaq_f32(_sum1, _k14, _r24); in convdw5x5s1_pack4_neon() [all …]
|