Home
last modified time | relevance | path

Searched refs:_k01 (Results 1 – 25 of 107) sorted by relevance

12345

/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_3x3_pack8.h687 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
715 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx()
742 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx()
771 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx()
800 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx()
829 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx()
857 _k01 = _mm256_loadu_ps(k01 + 56); in conv3x3s1_winograd64_pack8_avx()
937 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1107 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1203 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_3x3_pack8.h66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local
1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx()
1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx()
1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx()
1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx()
1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx()
1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_3x3_pack8.h66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local
1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx()
1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx()
1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx()
1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx()
1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx()
1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_3x3_pack8.h66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local
1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx()
1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx()
1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx()
1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx()
1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx()
1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_3x3_pack8.h66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local
1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx()
1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx()
1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx()
1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx()
1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx()
1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx()
[all …]
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_3x3_pack4.h207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack4_bf16s.h46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local
279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local
203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local
760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack1to4_fp16s.h46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local
175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local
515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
H A Dconvolution_3x3_pack1to8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s1_pack1to8_fp16sa_neon() local
175 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
260 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
321 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
373 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
428 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s2_pack1to8_fp16sa_neon() local
515 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
575 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
627 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_3x3_pack4.h207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack4_bf16s.h46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local
279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local
203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local
760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack1to8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s1_pack1to8_fp16sa_neon() local
175 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
260 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
321 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
373 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon()
428 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s2_pack1to8_fp16sa_neon() local
515 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
575 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
627 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
H A Dconvolution_3x3_pack1to4_fp16s.h46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local
175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local
515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_3x3_pack4.h207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack4_bf16s.h46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local
279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local
203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local
760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack1to4_fp16s.h46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local
175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon()
428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local
515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolutiondepthwise_3x3_pack4.h207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack4_bf16s.h46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local
279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local
203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local
760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolutiondepthwise_3x3_pack4.h207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon()
501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon()
942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack4_bf16s.h46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local
279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon()
682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon()
1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon()
[all …]
H A Dconvolutiondepthwise_3x3_pack8_fp16s.h46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local
203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon()
482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon()
656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local
760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon()
[all …]

12345