Home
last modified time | relevance | path

Searched refs:_k30 (Results 1 – 25 of 25) sorted by relevance

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h230 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
496 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
673 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
686 _sum0 = vmlaq_f32(_sum0, _k30, _r30); in convdw5x5s1_pack4_neon()
705 _sum1 = vmlaq_f32(_sum1, _k30, _r40); in convdw5x5s1_pack4_neon()
884 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1048 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1168 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1396 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
1567 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
[all …]
H A Dconvolutiondepthwise_5x5_pack8_fp16s.h1376 float16x8_t _k30 = vld1q_f16(k0); in convdw5x5s2_pack8_fp16sa_neon() local
1383 _sum0 = vfmaq_f16(_sum0, _k30, _r30); in convdw5x5s2_pack8_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h230 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
496 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
673 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
686 _sum0 = vmlaq_f32(_sum0, _k30, _r30); in convdw5x5s1_pack4_neon()
705 _sum1 = vmlaq_f32(_sum1, _k30, _r40); in convdw5x5s1_pack4_neon()
884 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1048 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1168 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1396 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
1567 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
[all …]
H A Dconvolutiondepthwise_5x5_pack8_fp16s.h1376 float16x8_t _k30 = vld1q_f16(k0); in convdw5x5s2_pack8_fp16sa_neon() local
1383 _sum0 = vfmaq_f16(_sum0, _k30, _r30); in convdw5x5s2_pack8_fp16sa_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h230 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
496 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
673 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
686 _sum0 = vmlaq_f32(_sum0, _k30, _r30); in convdw5x5s1_pack4_neon()
705 _sum1 = vmlaq_f32(_sum1, _k30, _r40); in convdw5x5s1_pack4_neon()
884 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1048 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1168 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1396 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
1567 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
[all …]
H A Dconvolutiondepthwise_5x5_pack8_fp16s.h1376 float16x8_t _k30 = vld1q_f16(k0); in convdw5x5s2_pack8_fp16sa_neon() local
1383 _sum0 = vfmaq_f16(_sum0, _k30, _r30); in convdw5x5s2_pack8_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h230 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
496 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
673 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
686 _sum0 = vmlaq_f32(_sum0, _k30, _r30); in convdw5x5s1_pack4_neon()
705 _sum1 = vmlaq_f32(_sum1, _k30, _r40); in convdw5x5s1_pack4_neon()
884 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1048 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1168 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1396 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
1567 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
[all …]
H A Dconvolutiondepthwise_5x5_pack8_fp16s.h1376 float16x8_t _k30 = vld1q_f16(k0); in convdw5x5s2_pack8_fp16sa_neon() local
1383 _sum0 = vfmaq_f16(_sum0, _k30, _r30); in convdw5x5s2_pack8_fp16sa_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_5x5_pack4.h230 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
496 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
673 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
686 _sum0 = vmlaq_f32(_sum0, _k30, _r30); in convdw5x5s1_pack4_neon()
705 _sum1 = vmlaq_f32(_sum1, _k30, _r40); in convdw5x5s1_pack4_neon()
884 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1048 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1168 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s1_pack4_neon() local
1396 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
1567 float32x4_t _k30 = vld1q_f32(k0); in convdw5x5s2_pack4_neon() local
[all …]
H A Dconvolutiondepthwise_5x5_pack8_fp16s.h1376 float16x8_t _k30 = vld1q_f16(k0); in convdw5x5s2_pack8_fp16sa_neon() local
1383 _sum0 = vfmaq_f16(_sum0, _k30, _r30); in convdw5x5s2_pack8_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolutiondepthwise_5x5_pack4.h146 v4f32 _k30 = (v4f32)__msa_ld_w(k0, 0); in convdw5x5s1_pack4_msa() local
153 _sum0 = __msa_fmadd_w(_sum0, _k30, _r30); in convdw5x5s1_pack4_msa()
165 _sum1 = __msa_fmadd_w(_sum1, _k30, _r40); in convdw5x5s1_pack4_msa()
298 v4f32 _k30 = (v4f32)__msa_ld_w(k0, 0); in convdw5x5s1_pack4_msa() local
305 _sum0 = __msa_fmadd_w(_sum0, _k30, _r30); in convdw5x5s1_pack4_msa()
461 v4f32 _k30 = (v4f32)__msa_ld_w(k0, 0); in convdw5x5s2_pack4_msa() local
468 _sum0 = __msa_fmadd_w(_sum0, _k30, _r30); in convdw5x5s2_pack4_msa()
H A Dconvolution_7x7_pack1to4.h235 v4f32 _k30 = (v4f32)__msa_ld_w(kptr, 0); in conv7x7s2_pack1to4_msa() local
263 _sum0 = __msa_fmadd_w(_sum0, _r30, _k30); in conv7x7s2_pack1to4_msa()
264 _sum1 = __msa_fmadd_w(_sum1, _r32, _k30); in conv7x7s2_pack1to4_msa()
265 _sum2 = __msa_fmadd_w(_sum2, _r34, _k30); in conv7x7s2_pack1to4_msa()
266 _sum3 = __msa_fmadd_w(_sum3, _r36, _k30); in conv7x7s2_pack1to4_msa()
545 v4f32 _k30 = (v4f32)__msa_ld_w(kptr, 0); in conv7x7s2_pack1to4_msa() local
558 _sum0 = __msa_fmadd_w(_sum0, (v4f32)__msa_splati_w(_r3, 0), _k30); in conv7x7s2_pack1to4_msa()
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolutiondepthwise_5x5_pack8.h114 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s1_pack8_avx() local
121 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s1_pack8_avx()
268 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s2_pack8_avx() local
275 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s2_pack8_avx()
H A Dconvolution_3x3_pack8.h188 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
199 _sum00 = _mm256_fmadd_ps(_r100, _k30, _sum00); in conv3x3s1_pack8_avx()
217 _sum10 = _mm256_fmadd_ps(_r110, _k30, _sum10); in conv3x3s1_pack8_avx()
537 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
548 _sum0 = _mm256_fmadd_ps(_r100, _k30, _sum0); in conv3x3s1_pack8_avx()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolutiondepthwise_5x5_pack8.h114 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s1_pack8_avx() local
121 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s1_pack8_avx()
268 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s2_pack8_avx() local
275 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s2_pack8_avx()
H A Dconvolution_3x3_pack8.h188 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
199 _sum00 = _mm256_fmadd_ps(_r100, _k30, _sum00); in conv3x3s1_pack8_avx()
217 _sum10 = _mm256_fmadd_ps(_r110, _k30, _sum10); in conv3x3s1_pack8_avx()
537 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
548 _sum0 = _mm256_fmadd_ps(_r100, _k30, _sum0); in conv3x3s1_pack8_avx()
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolutiondepthwise_5x5_pack8.h114 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s1_pack8_avx() local
121 _sum0 = _mm256_comp_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s1_pack8_avx()
268 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s2_pack8_avx() local
275 _sum0 = _mm256_comp_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s2_pack8_avx()
H A Dconvolution_3x3_pack8.h188 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
199 _sum00 = _mm256_comp_fmadd_ps(_r100, _k30, _sum00); in conv3x3s1_pack8_avx()
217 _sum10 = _mm256_comp_fmadd_ps(_r110, _k30, _sum10); in conv3x3s1_pack8_avx()
537 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
548 _sum0 = _mm256_comp_fmadd_ps(_r100, _k30, _sum0); in conv3x3s1_pack8_avx()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolutiondepthwise_5x5_pack8.h114 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s1_pack8_avx() local
121 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s1_pack8_avx()
268 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s2_pack8_avx() local
275 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s2_pack8_avx()
H A Dconvolution_3x3_pack8.h188 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
199 _sum00 = _mm256_fmadd_ps(_r100, _k30, _sum00); in conv3x3s1_pack8_avx()
217 _sum10 = _mm256_fmadd_ps(_r110, _k30, _sum10); in conv3x3s1_pack8_avx()
537 __m256 _k30 = _mm256_loadu_ps(kptr); in conv3x3s1_pack8_avx() local
548 _sum0 = _mm256_fmadd_ps(_r100, _k30, _sum0); in conv3x3s1_pack8_avx()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolutiondepthwise_5x5_pack8.h114 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s1_pack8_avx() local
121 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s1_pack8_avx()
268 __m256 _k30 = _mm256_loadu_ps(k0); in convdw5x5s2_pack8_avx() local
275 _sum0 = _mm256_fmadd_ps(_k30, _r30, _sum0); in convdw5x5s2_pack8_avx()
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolutiondepthwise_5x5_packn.h140 vfloat32m1_t _k30 = vle32_v_f32m1(k0, vl); in convdw5x5s1_packn_rvv() local
147 _sum0 = vfmacc_vv_f32m1(_sum0, _k30, _r30, vl); in convdw5x5s1_packn_rvv()
159 _sum1 = vfmacc_vv_f32m1(_sum1, _k30, _r40, vl); in convdw5x5s1_packn_rvv()
284 vfloat32m1_t _k30 = vle32_v_f32m1(k0, vl); in convdw5x5s1_packn_rvv() local
291 _sum0 = vfmacc_vv_f32m1(_sum0, _k30, _r30, vl); in convdw5x5s1_packn_rvv()
442 vfloat32m1_t _k30 = vle32_v_f32m1(k0, vl); in convdw5x5s2_packn_rvv() local
449 _sum0 = vfmacc_vv_f32m1(_sum0, _k30, _r30, vl); in convdw5x5s2_packn_rvv()
H A Dconvolutiondepthwise_5x5_packn_fp16s.h140 vfloat16m1_t _k30 = vle16_v_f16m1(k0, vl); in convdw5x5s1_packn_fp16sa_rvv() local
147 _sum0 = vfmacc_vv_f16m1(_sum0, _k30, _r30, vl); in convdw5x5s1_packn_fp16sa_rvv()
159 _sum1 = vfmacc_vv_f16m1(_sum1, _k30, _r40, vl); in convdw5x5s1_packn_fp16sa_rvv()
284 vfloat16m1_t _k30 = vle16_v_f16m1(k0, vl); in convdw5x5s1_packn_fp16sa_rvv() local
291 _sum0 = vfmacc_vv_f16m1(_sum0, _k30, _r30, vl); in convdw5x5s1_packn_fp16sa_rvv()
442 vfloat16m1_t _k30 = vle16_v_f16m1(k0, vl); in convdw5x5s2_packn_fp16sa_rvv() local
449 _sum0 = vfmacc_vv_f16m1(_sum0, _k30, _r30, vl); in convdw5x5s2_packn_fp16sa_rvv()
H A Dconvolution_7x7_pack1ton.h272 vfloat32m1_t _k30 = vle32_v_f32m1(kptr, vl); in conv7x7s2_pack1ton_rvv() local
282 _sum0 = vfmacc_vf_f32m1(_sum0, r3[0], _k30, vl); in conv7x7s2_pack1ton_rvv()
283 _sum1 = vfmacc_vf_f32m1(_sum1, r3[2], _k30, vl); in conv7x7s2_pack1ton_rvv()
284 _sum2 = vfmacc_vf_f32m1(_sum2, r3[4], _k30, vl); in conv7x7s2_pack1ton_rvv()
285 _sum3 = vfmacc_vf_f32m1(_sum3, r3[6], _k30, vl); in conv7x7s2_pack1ton_rvv()
286 _sum4 = vfmacc_vf_f32m1(_sum4, r3[8], _k30, vl); in conv7x7s2_pack1ton_rvv()
287 _sum5 = vfmacc_vf_f32m1(_sum5, r3[10], _k30, vl); in conv7x7s2_pack1ton_rvv()
288 _sum6 = vfmacc_vf_f32m1(_sum6, r3[12], _k30, vl); in conv7x7s2_pack1ton_rvv()
289 _sum7 = vfmacc_vf_f32m1(_sum7, r3[14], _k30, vl); in conv7x7s2_pack1ton_rvv()
683 vfloat32m1_t _k30 = vle32_v_f32m1(kptr, vl); in conv7x7s2_pack1ton_rvv() local
[all …]
H A Dconvolution_7x7_pack1ton_fp16s.h272 vfloat16m1_t _k30 = vle16_v_f16m1(kptr, vl); in conv7x7s2_pack1ton_fp16sa_rvv() local
282 _sum0 = vfmacc_vf_f16m1(_sum0, r3[0], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
283 _sum1 = vfmacc_vf_f16m1(_sum1, r3[2], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
284 _sum2 = vfmacc_vf_f16m1(_sum2, r3[4], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
285 _sum3 = vfmacc_vf_f16m1(_sum3, r3[6], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
286 _sum4 = vfmacc_vf_f16m1(_sum4, r3[8], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
287 _sum5 = vfmacc_vf_f16m1(_sum5, r3[10], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
288 _sum6 = vfmacc_vf_f16m1(_sum6, r3[12], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
289 _sum7 = vfmacc_vf_f16m1(_sum7, r3[14], _k30, vl); in conv7x7s2_pack1ton_fp16sa_rvv()
683 vfloat16m1_t _k30 = vle16_v_f16m1(kptr, vl); in conv7x7s2_pack1ton_fp16sa_rvv() local
[all …]