/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 509 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 675 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 809 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 992 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1144 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1387 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1497 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1674 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1766 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1771 float sum0 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 505 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 640 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 967 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1058 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1134 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1200 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1252 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1308 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1350 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1398 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 540 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 715 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 1135 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1251 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1351 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1432 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1499 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1566 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1619 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1676 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 509 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 675 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 809 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 992 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1144 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1387 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1497 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1674 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1766 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1771 float sum0 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 505 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 640 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 967 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1058 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1134 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1200 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1252 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1308 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1350 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1398 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 540 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 715 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 1135 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1251 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1351 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1432 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1499 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1566 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1619 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1676 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 509 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 675 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 809 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 992 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1144 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1387 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1497 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1674 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1766 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1771 float sum0 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 505 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 640 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 967 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1058 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1134 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1200 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1252 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1308 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1350 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1398 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 540 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 715 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 1135 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1251 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1351 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1432 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1499 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1566 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1619 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1676 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 509 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 675 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 809 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 992 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1144 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1387 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1497 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1674 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1766 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1771 float sum0 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() [all …]
|
H A D | convolution_sgemm_pack4.h | 505 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 640 "r"(biasptr) // %10 in im2col_sgemm_pack4_neon() 967 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1058 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1134 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1200 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1252 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1308 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1350 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() 1398 "r"(biasptr) // %8 in im2col_sgemm_pack4_neon() [all …]
|
H A D | convolution_sgemm_pack4_bf16s.h | 540 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 715 "r"(biasptr) // %10 in im2col_sgemm_pack4_bf16s_neon() 1135 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1251 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1351 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1432 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1499 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1566 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1619 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() 1676 "r"(biasptr) // %8 in im2col_sgemm_pack4_bf16s_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_1x1_bf16s.h | 509 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 675 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 809 "r"(biasptr), // %20 in conv1x1s1_sgemm_bf16s_neon() 992 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1144 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1387 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1497 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1674 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1766 "r"(biasptr), // %12 in conv1x1s1_sgemm_bf16s_neon() 1771 float sum0 = biasptr[0]; in conv1x1s1_sgemm_bf16s_neon() [all …]
|
H A D | convolution_sgemm.h | 455 "r"(biasptr) // %21 in conv_im2col_sgemm_neon() 703 "r"(biasptr) // %21 in conv_im2col_sgemm_neon() 706 float sum0 = biasptr[0]; in conv_im2col_sgemm_neon() 707 float sum1 = biasptr[1]; in conv_im2col_sgemm_neon() 708 float sum2 = biasptr[2]; in conv_im2col_sgemm_neon() 709 float sum3 = biasptr[3]; in conv_im2col_sgemm_neon() 710 float sum4 = biasptr[4]; in conv_im2col_sgemm_neon() 889 "r"(biasptr) // %13 in conv_im2col_sgemm_neon() 999 "r"(biasptr) // %13 in conv_im2col_sgemm_neon() 1177 "r"(biasptr) // %13 in conv_im2col_sgemm_neon() [all …]
|
H A D | convolution_1x1.h | 464 "r"(biasptr), // %20 in conv1x1s1_sgemm_neon() 601 "r"(biasptr), // %20 in conv1x1s1_sgemm_neon() 715 "r"(biasptr), // %20 in conv1x1s1_sgemm_neon() 869 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 998 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 1222 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 1317 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 1481 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 1562 "r"(biasptr), // %12 in conv1x1s1_sgemm_neon() 1567 float sum0 = biasptr[0]; in conv1x1s1_sgemm_neon() [all …]
|
H A D | convolution_1x1_pack4.h | 606 "r"(biasptr) // %10 in conv1x1s1_sgemm_pack4_neon() 742 "r"(biasptr) // %10 in conv1x1s1_sgemm_pack4_neon() 1074 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1165 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1241 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1307 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1359 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1415 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1457 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() 1505 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_neon() [all …]
|
H A D | convolution_1x1_pack4_bf16s.h | 658 "r"(biasptr) // %10 in conv1x1s1_sgemm_pack4_bf16s_neon() 834 "r"(biasptr) // %10 in conv1x1s1_sgemm_pack4_bf16s_neon() 1259 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1375 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1475 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1556 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1623 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1690 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1743 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() 1800 "r"(biasptr) // %8 in conv1x1s1_sgemm_pack4_bf16s_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/ |
H A D | convolution_sgemm.h | 183 float sum0 = biasptr[0]; in im2col_sgemm_msa() 184 float sum1 = biasptr[1]; in im2col_sgemm_msa() 185 float sum2 = biasptr[2]; in im2col_sgemm_msa() 186 float sum3 = biasptr[3]; in im2col_sgemm_msa() 187 float sum4 = biasptr[4]; in im2col_sgemm_msa() 188 float sum5 = biasptr[5]; in im2col_sgemm_msa() 189 float sum6 = biasptr[6]; in im2col_sgemm_msa() 190 float sum7 = biasptr[7]; in im2col_sgemm_msa() 286 float sum0 = biasptr[0]; in im2col_sgemm_msa() 287 float sum1 = biasptr[1]; in im2col_sgemm_msa() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/ |
H A D | convolution_sgemm.h | 183 float sum0 = biasptr[0]; in im2col_sgemm_rvv() 184 float sum1 = biasptr[1]; in im2col_sgemm_rvv() 185 float sum2 = biasptr[2]; in im2col_sgemm_rvv() 186 float sum3 = biasptr[3]; in im2col_sgemm_rvv() 187 float sum4 = biasptr[4]; in im2col_sgemm_rvv() 188 float sum5 = biasptr[5]; in im2col_sgemm_rvv() 189 float sum6 = biasptr[6]; in im2col_sgemm_rvv() 190 float sum7 = biasptr[7]; in im2col_sgemm_rvv() 282 float sum0 = biasptr[0]; in im2col_sgemm_rvv() 283 float sum1 = biasptr[1]; in im2col_sgemm_rvv() [all …]
|
H A D | convolution_sgemm_fp16s.h | 183 __fp16 sum0 = biasptr[0]; in im2col_sgemm_fp16sa_rvv() 184 __fp16 sum1 = biasptr[1]; in im2col_sgemm_fp16sa_rvv() 185 __fp16 sum2 = biasptr[2]; in im2col_sgemm_fp16sa_rvv() 186 __fp16 sum3 = biasptr[3]; in im2col_sgemm_fp16sa_rvv() 187 __fp16 sum4 = biasptr[4]; in im2col_sgemm_fp16sa_rvv() 188 __fp16 sum5 = biasptr[5]; in im2col_sgemm_fp16sa_rvv() 189 __fp16 sum6 = biasptr[6]; in im2col_sgemm_fp16sa_rvv() 190 __fp16 sum7 = biasptr[7]; in im2col_sgemm_fp16sa_rvv() 282 __fp16 sum0 = biasptr[0]; in im2col_sgemm_fp16sa_rvv() 283 __fp16 sum1 = biasptr[1]; in im2col_sgemm_fp16sa_rvv() [all …]
|
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/ |
H A D | convolution_sgemm.h | 579 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 580 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() 581 float sum2 = biasptr[2]; in conv_im2col_sgemm_sse() 582 float sum3 = biasptr[3]; in conv_im2col_sgemm_sse() 583 float sum4 = biasptr[4]; in conv_im2col_sgemm_sse() 584 float sum5 = biasptr[5]; in conv_im2col_sgemm_sse() 585 float sum6 = biasptr[6]; in conv_im2col_sgemm_sse() 586 float sum7 = biasptr[7]; in conv_im2col_sgemm_sse() 874 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 875 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/ |
H A D | convolution_sgemm.h | 579 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 580 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() 581 float sum2 = biasptr[2]; in conv_im2col_sgemm_sse() 582 float sum3 = biasptr[3]; in conv_im2col_sgemm_sse() 583 float sum4 = biasptr[4]; in conv_im2col_sgemm_sse() 584 float sum5 = biasptr[5]; in conv_im2col_sgemm_sse() 585 float sum6 = biasptr[6]; in conv_im2col_sgemm_sse() 586 float sum7 = biasptr[7]; in conv_im2col_sgemm_sse() 874 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 875 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/ |
H A D | convolution_sgemm.h | 579 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 580 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() 581 float sum2 = biasptr[2]; in conv_im2col_sgemm_sse() 582 float sum3 = biasptr[3]; in conv_im2col_sgemm_sse() 583 float sum4 = biasptr[4]; in conv_im2col_sgemm_sse() 584 float sum5 = biasptr[5]; in conv_im2col_sgemm_sse() 585 float sum6 = biasptr[6]; in conv_im2col_sgemm_sse() 586 float sum7 = biasptr[7]; in conv_im2col_sgemm_sse() 874 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 875 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/ |
H A D | convolution_sgemm.h | 579 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 580 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() 581 float sum2 = biasptr[2]; in conv_im2col_sgemm_sse() 582 float sum3 = biasptr[3]; in conv_im2col_sgemm_sse() 583 float sum4 = biasptr[4]; in conv_im2col_sgemm_sse() 584 float sum5 = biasptr[5]; in conv_im2col_sgemm_sse() 585 float sum6 = biasptr[6]; in conv_im2col_sgemm_sse() 586 float sum7 = biasptr[7]; in conv_im2col_sgemm_sse() 874 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 875 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/ |
H A D | convolution_sgemm.h | 579 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 580 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() 581 float sum2 = biasptr[2]; in conv_im2col_sgemm_sse() 582 float sum3 = biasptr[3]; in conv_im2col_sgemm_sse() 583 float sum4 = biasptr[4]; in conv_im2col_sgemm_sse() 584 float sum5 = biasptr[5]; in conv_im2col_sgemm_sse() 585 float sum6 = biasptr[6]; in conv_im2col_sgemm_sse() 586 float sum7 = biasptr[7]; in conv_im2col_sgemm_sse() 874 float sum0 = biasptr[0]; in conv_im2col_sgemm_sse() 875 float sum1 = biasptr[1]; in conv_im2col_sgemm_sse() [all …]
|