/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 691 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 836 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 938 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1013 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1161 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1257 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1339 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1412 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1471 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1533 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 948 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1093 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1195 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1270 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1418 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1514 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1596 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1669 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1728 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1790 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 465 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 656 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 678 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 805 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 1654 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1845 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 1867 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1994 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 2796 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2987 output2_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3111 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3392 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3657 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3686 output2_tm += 4; in conv3x3s1_winograd64_neon4() 3771 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3843 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3866 output2_tm += 4; in conv3x3s1_winograd64_neon4() 6014 output2_tm += 1; in conv3x3s1_winograd64_neon5() 6415 output2_tm += 8; in conv3x3s1_winograd64_neon5() 6675 output2_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 489 __fp16* output2_tm = top_blob_tm.channel(p + 2); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 623 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 634 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 764 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 775 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 834 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 845 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 691 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 836 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 938 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1013 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1161 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1257 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1339 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1412 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1471 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1533 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 948 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1093 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1195 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1270 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1418 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1514 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1596 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1669 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1728 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1790 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 465 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 656 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 678 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 805 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 1654 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1845 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 1867 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1994 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 2796 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2987 output2_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3111 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3392 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3657 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3686 output2_tm += 4; in conv3x3s1_winograd64_neon4() 3771 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3843 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3866 output2_tm += 4; in conv3x3s1_winograd64_neon4() 6014 output2_tm += 1; in conv3x3s1_winograd64_neon5() 6415 output2_tm += 8; in conv3x3s1_winograd64_neon5() 6675 output2_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 489 __fp16* output2_tm = top_blob_tm.channel(p + 2); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 623 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 634 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 764 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 775 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 834 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 845 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 691 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 836 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 938 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1013 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1161 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1257 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1339 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1412 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1471 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1533 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 948 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1093 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1195 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1270 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1418 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1514 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1596 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1669 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1728 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1790 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 465 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 656 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 678 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 805 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 1654 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1845 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 1867 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1994 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 2796 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2987 output2_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3111 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3392 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3657 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3686 output2_tm += 4; in conv3x3s1_winograd64_neon4() 3771 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3843 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3866 output2_tm += 4; in conv3x3s1_winograd64_neon4() 6014 output2_tm += 1; in conv3x3s1_winograd64_neon5() 6415 output2_tm += 8; in conv3x3s1_winograd64_neon5() 6675 output2_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 489 __fp16* output2_tm = top_blob_tm.channel(p + 2); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 623 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 634 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 764 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 775 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 834 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 845 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 691 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 836 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 938 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1013 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1161 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1257 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1339 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1412 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1471 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1533 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 948 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1093 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1195 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1270 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1418 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1514 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1596 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1669 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1728 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1790 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 465 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 656 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 678 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 805 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 1654 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1845 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 1867 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1994 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 2796 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2987 output2_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3111 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3392 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3657 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3686 output2_tm += 4; in conv3x3s1_winograd64_neon4() 3771 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3843 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3866 output2_tm += 4; in conv3x3s1_winograd64_neon4() 6014 output2_tm += 1; in conv3x3s1_winograd64_neon5() 6415 output2_tm += 8; in conv3x3s1_winograd64_neon5() 6675 output2_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 489 __fp16* output2_tm = top_blob_tm.channel(p + 2); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 623 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 634 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 764 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 775 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 834 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 845 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 691 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 836 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 938 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1013 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1161 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1257 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1339 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1412 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1471 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1533 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 948 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1093 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1195 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1270 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1418 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1514 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1596 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1669 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1728 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() 1790 "3"(output2_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 465 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 656 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 678 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 805 output2_tm += 16; in conv3x3s1_winograd23_int8_neon() 1654 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1845 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 1867 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1994 output2_tm += 36; in conv3x3s1_winograd43_int8_neon() 2796 output2_tm = output2_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2987 output2_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3111 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3392 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3657 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3686 output2_tm += 4; in conv3x3s1_winograd64_neon4() 3771 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3843 "2"(output2_tm), in conv3x3s1_winograd64_neon4() 3866 output2_tm += 4; in conv3x3s1_winograd64_neon4() 6014 output2_tm += 1; in conv3x3s1_winograd64_neon5() 6415 output2_tm += 8; in conv3x3s1_winograd64_neon5() 6675 output2_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 489 __fp16* output2_tm = top_blob_tm.channel(p + 2); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 623 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 634 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 764 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 775 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 834 "=r"(output2_tm), // %3 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 845 "3"(output2_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|