/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution_3x3_pack1ton.h | 64 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s1_pack1ton_rvv() local 165 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s1_pack1ton_rvv() local 222 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s1_pack1ton_rvv() local 257 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s1_pack1ton_rvv() local 342 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s2_pack1ton_rvv() local 443 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s2_pack1ton_rvv() local 500 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s2_pack1ton_rvv() local 535 vfloat32m1_t _sum0 = vle32_v_f32m1(outptr0, vl); in conv3x3s2_pack1ton_rvv() local
|
H A D | convolution_3x3_pack1ton_fp16s.h | 64 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s1_pack1ton_fp16sa_rvv() local 165 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s1_pack1ton_fp16sa_rvv() local 222 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s1_pack1ton_fp16sa_rvv() local 257 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s1_pack1ton_fp16sa_rvv() local 342 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s2_pack1ton_fp16sa_rvv() local 443 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s2_pack1ton_fp16sa_rvv() local 500 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s2_pack1ton_fp16sa_rvv() local 535 vfloat16m1_t _sum0 = vle16_v_f16m1(outptr0, vl); in conv3x3s2_pack1ton_fp16sa_rvv() local
|
H A D | convolution_sgemm_packnto1.h | 211 vfloat32m1_t _sum0 = vle32_v_f32m1(biasptr, vl); in im2col_sgemm_packnto1_rvv() local 264 vfloat32m1_t _sum0 = vle32_v_f32m1(biasptr, vl); in im2col_sgemm_packnto1_rvv() local 301 vfloat32m1_t _sum0 = vle32_v_f32m1(biasptr, vl); in im2col_sgemm_packnto1_rvv() local 371 vfloat32m1_t _sum0 = vfmv_v_f_f32m1(0.f, vl); in im2col_sgemm_packnto1_rvv() local 459 vfloat32m1_t _sum0 = vfmv_v_f_f32m1(0.f, vl); in im2col_sgemm_packnto1_rvv() local 517 vfloat32m1_t _sum0 = vfmv_v_f_f32m1(0.f, vl); in im2col_sgemm_packnto1_rvv() local 560 vfloat32m1_t _sum0 = vfmv_v_f_f32m1(0.f, vl); in im2col_sgemm_packnto1_rvv() local
|
H A D | convolution_sgemm_packnto1_fp16s.h | 211 vfloat16m1_t _sum0 = vle16_v_f16m1(biasptr, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 264 vfloat16m1_t _sum0 = vle16_v_f16m1(biasptr, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 301 vfloat16m1_t _sum0 = vle16_v_f16m1(biasptr, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 371 vfloat16m1_t _sum0 = vfmv_v_f_f16m1(0.f, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 459 vfloat16m1_t _sum0 = vfmv_v_f_f16m1(0.f, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 517 vfloat16m1_t _sum0 = vfmv_v_f_f16m1(0.f, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local 560 vfloat16m1_t _sum0 = vfmv_v_f_f16m1(0.f, vl); in im2col_sgemm_packnto1_fp16sa_rvv() local
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8to1_int8.h | 482 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 565 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 831 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1131 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1158 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1256 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1300 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1343 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1365 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1463 int32x2_t _sum0 = vdup_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local [all …]
|
H A D | convolutiondepthwise_3x3_fp16s.h | 82 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 138 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local 179 float16x4_t _sum0 = vmul_f16(_r0, _k012x); in convdw3x3s1_fp16sa_neon() local 230 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 269 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8to1_int8.h | 482 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 565 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 831 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1131 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1158 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1256 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1300 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1343 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1365 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1463 int32x2_t _sum0 = vdup_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local [all …]
|
H A D | convolutiondepthwise_3x3_fp16s.h | 82 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 138 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local 179 float16x4_t _sum0 = vmul_f16(_r0, _k012x); in convdw3x3s1_fp16sa_neon() local 230 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 269 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_sgemm_pack8to1_int8.h | 482 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 565 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 831 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1131 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1158 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1256 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1300 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1343 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1365 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1463 int32x2_t _sum0 = vdup_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local [all …]
|
H A D | convolutiondepthwise_3x3_fp16s.h | 82 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 138 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local 179 float16x4_t _sum0 = vmul_f16(_r0, _k012x); in convdw3x3s1_fp16sa_neon() local 230 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 269 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_sgemm_pack8to1_int8.h | 482 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 565 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 831 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1131 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1158 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1256 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1300 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1343 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1365 int32x4_t _sum0 = vdupq_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local 1463 int32x2_t _sum0 = vdup_n_s32(0); in im2col_sgemm_pack8to1_int8_neon() local [all …]
|
H A D | convolutiondepthwise_3x3_fp16s.h | 82 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 138 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local 179 float16x4_t _sum0 = vmul_f16(_r0, _k012x); in convdw3x3s1_fp16sa_neon() local 230 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 269 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local
|
H A D | innerproduct_arm.cpp | 274 float32x4_t _sum0 = vdupq_n_f32(0.f); in forward() local 538 float32x4_t _sum0 = vdupq_n_f32(0.f); in forward_fp16s() local 933 float16x4_t _sum0 = vdup_n_f16(0.f); in forward_fp16sa() local 1082 float16x4_t _sum0 = vdup_n_f16(0.f); in forward_fp16sa() local 1249 float16x8_t _sum0 = vdupq_n_f16(0.f); in forward_fp16sa() local 1372 float16x4_t _sum0 = vdup_n_f16(0.f); in forward_fp16sa() local 1780 float32x4_t _sum0 = vdupq_n_f32(0.f); in forward_bf16s() local 2004 int32x4_t _sum0 = vdupq_n_s32(0); in forward_int8_arm() local 2110 int32x4_t _sum0 = vdupq_n_s32(0); in forward_int8_arm() local 2207 int32x4_t _sum0 = vdupq_n_s32(0); in forward_int8_arm() local [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolutiondepthwise_3x3_pack8.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolutiondepthwise_3x3_pack4.h | 58 __m128 _sum0 = _bias0; in convdw3x3s1_pack4_sse() local 200 __m128 _sum0 = _bias0; in convdw3x3s1_pack4_sse() local 279 __m128 _sum0 = _bias0; in convdw3x3s1_pack4_sse() local 326 __m128 _sum0 = _bias0; in convdw3x3s1_pack4_sse() local 410 __m128 _sum0 = _bias0; in convdw3x3s2_pack4_sse() local 498 __m128 _sum0 = _bias0; in convdw3x3s2_pack4_sse() local 548 __m128 _sum0 = _bias0; in convdw3x3s2_pack4_sse() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolutiondepthwise_3x3_pack8.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolutiondepthwise_3x3_pack8_fp16.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_fp16_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_fp16_pack8_avx() local
|
H A D | convolutiondepthwise_3x3_pack8.h | 58 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 200 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 279 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 326 __m256 _sum0 = _bias0; in convdw3x3s1_pack8_avx() local 410 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 498 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local 548 __m256 _sum0 = _bias0; in convdw3x3s2_pack8_avx() local
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_fp16s.h | 82 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 138 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local 179 float16x4_t _sum0 = vmul_f16(_r0, _k012x); in convdw3x3s1_fp16sa_neon() local 230 float16x8_t _sum0 = _bias0; in convdw3x3s1_fp16sa_neon() local 269 float16x4_t _sum0 = vget_low_f16(_bias0); in convdw3x3s1_fp16sa_neon() local
|