/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3687 output3_tm += 4; in conv3x3s1_winograd64_neon4() 3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3867 output3_tm += 4; in conv3x3s1_winograd64_neon4() 6015 output3_tm += 1; in conv3x3s1_winograd64_neon5() 6416 output3_tm += 8; in conv3x3s1_winograd64_neon5() 6676 output3_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3687 output3_tm += 4; in conv3x3s1_winograd64_neon4() 3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3867 output3_tm += 4; in conv3x3s1_winograd64_neon4() 6015 output3_tm += 1; in conv3x3s1_winograd64_neon5() 6416 output3_tm += 8; in conv3x3s1_winograd64_neon5() 6676 output3_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3687 output3_tm += 4; in conv3x3s1_winograd64_neon4() 3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3867 output3_tm += 4; in conv3x3s1_winograd64_neon4() 6015 output3_tm += 1; in conv3x3s1_winograd64_neon5() 6416 output3_tm += 8; in conv3x3s1_winograd64_neon5() 6676 output3_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3687 output3_tm += 4; in conv3x3s1_winograd64_neon4() 3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3867 output3_tm += 4; in conv3x3s1_winograd64_neon4() 6015 output3_tm += 1; in conv3x3s1_winograd64_neon5() 6416 output3_tm += 8; in conv3x3s1_winograd64_neon5() 6676 output3_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() 1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
H A D | convolution_3x3_int8.h | 466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon() 806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon() 1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon() 1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon() 2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon() 2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon() [all …]
|
H A D | convolution_3x3.h | 3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3687 output3_tm += 4; in conv3x3s1_winograd64_neon4() 3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4() 3867 output3_tm += 4; in conv3x3s1_winograd64_neon4() 6015 output3_tm += 1; in conv3x3s1_winograd64_neon5() 6416 output3_tm += 8; in conv3x3s1_winograd64_neon5() 6676 output3_tm += 4; in conv3x3s1_winograd64_neon5() [all …]
|
H A D | convolution_3x3_pack8to1_fp16s.h | 490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|