/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack8.h | 687 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 715 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx() 742 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx() 771 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx() 800 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx() 829 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx() 857 _k01 = _mm256_loadu_ps(k01 + 56); in conv3x3s1_winograd64_pack8_avx() 937 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1107 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1203 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack8.h | 66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local 1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx() 1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx() 1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx() 1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx() 1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx() 1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack8.h | 66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local 1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx() 1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx() 1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx() 1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx() 1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx() 1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_3x3_pack8.h | 66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local 1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx() 1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx() 1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx() 1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx() 1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx() 1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_3x3_pack8.h | 66 __m256 _k01 = _mm256_loadu_ps(kptr + 8); in conv3x3s1_pack8_avx() local 1392 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1420 _k01 = _mm256_loadu_ps(k01 + 16); in conv3x3s1_winograd64_pack8_avx() 1447 _k01 = _mm256_loadu_ps(k01 + 24); in conv3x3s1_winograd64_pack8_avx() 1476 _k01 = _mm256_loadu_ps(k01 + 32); in conv3x3s1_winograd64_pack8_avx() 1505 _k01 = _mm256_loadu_ps(k01 + 40); in conv3x3s1_winograd64_pack8_avx() 1534 _k01 = _mm256_loadu_ps(k01 + 48); in conv3x3s1_winograd64_pack8_avx() 1642 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1812 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() 1908 _k01 = _mm256_loadu_ps(k01 + 8); in conv3x3s1_winograd64_pack8_avx() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local 279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() 1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local 203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local 760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() 835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack1to4_fp16s.h | 46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local 175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local 515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
|
H A D | convolution_3x3_pack1to8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s1_pack1to8_fp16sa_neon() local 175 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 260 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 321 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 373 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 428 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s2_pack1to8_fp16sa_neon() local 515 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon() 575 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon() 627 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local 279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() 1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local 203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local 760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() 835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack1to8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s1_pack1to8_fp16sa_neon() local 175 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 260 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 321 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 373 "w"(_k01), // %9 in conv3x3s1_pack1to8_fp16sa_neon() 428 float16x8_t _k01 = vld1q_f16(k0 + 8); in conv3x3s2_pack1to8_fp16sa_neon() local 515 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon() 575 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon() 627 "w"(_k01), // %9 in conv3x3s2_pack1to8_fp16sa_neon()
|
H A D | convolution_3x3_pack1to4_fp16s.h | 46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local 175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local 515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local 279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() 1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local 203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local 760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() 835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
H A D | convolution_3x3_pack1to4_fp16s.h | 46 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s1_pack1to4_fp16sa_neon() local 175 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 260 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 321 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 373 "w"(_k01), // %9 in conv3x3s1_pack1to4_fp16sa_neon() 428 float16x4_t _k01 = vld1_f16(k0 + 4); in conv3x3s2_pack1to4_fp16sa_neon() local 515 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 575 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon() 627 "w"(_k01), // %9 in conv3x3s2_pack1to4_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local 279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() 1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local 203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local 760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() 835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolutiondepthwise_3x3_pack4.h | 207 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 307 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 387 "w"(_k01), // %13 in convdw3x3s1_pack4_neon() 501 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 602 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 680 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 753 "w"(_k01), // %9 in convdw3x3s1_pack4_neon() 942 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1077 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() 1158 "w"(_k01), // %9 in convdw3x3s2_pack4_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack4_bf16s.h | 46 float32x4_t _k01 = vcvt_f32_bf16(vld1_u16(k0 + 4)); in convdw3x3s1_pack4_bf16s_neon() local 279 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 425 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 530 "w"(_k01), // %13 in convdw3x3s1_pack4_bf16s_neon() 682 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 815 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 911 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1002 "w"(_k01), // %9 in convdw3x3s1_pack4_bf16s_neon() 1244 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() 1352 "w"(_k01), // %9 in convdw3x3s2_pack4_bf16s_neon() [all …]
|
H A D | convolutiondepthwise_3x3_pack8_fp16s.h | 46 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s1_pack8_fp16sa_neon() local 203 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 298 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 377 "w"(_k01), // %13 in convdw3x3s1_pack8_fp16sa_neon() 482 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 549 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 606 "w"(_k01), // %9 in convdw3x3s1_pack8_fp16sa_neon() 656 float16x8_t _k01 = vld1q_f16(k0 + 8); in convdw3x3s2_pack8_fp16sa_neon() local 760 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() 835 "w"(_k01), // %9 in convdw3x3s2_pack8_fp16sa_neon() [all …]
|