Home
last modified time | relevance | path

Searched refs:output3_tm (Results 1 – 25 of 39) sorted by relevance

12

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
[all …]
H A Dconvolution_3x3_int8.h466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon()
2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon()
[all …]
H A Dconvolution_3x3.h3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3687 output3_tm += 4; in conv3x3s1_winograd64_neon4()
3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3867 output3_tm += 4; in conv3x3s1_winograd64_neon4()
6015 output3_tm += 1; in conv3x3s1_winograd64_neon5()
6416 output3_tm += 8; in conv3x3s1_winograd64_neon5()
6676 output3_tm += 4; in conv3x3s1_winograd64_neon5()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
[all …]
H A Dconvolution_3x3_int8.h466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon()
2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon()
[all …]
H A Dconvolution_3x3.h3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3687 output3_tm += 4; in conv3x3s1_winograd64_neon4()
3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3867 output3_tm += 4; in conv3x3s1_winograd64_neon4()
6015 output3_tm += 1; in conv3x3s1_winograd64_neon5()
6416 output3_tm += 8; in conv3x3s1_winograd64_neon5()
6676 output3_tm += 4; in conv3x3s1_winograd64_neon5()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
[all …]
H A Dconvolution_3x3_int8.h466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon()
2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon()
[all …]
H A Dconvolution_3x3.h3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3687 output3_tm += 4; in conv3x3s1_winograd64_neon4()
3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3867 output3_tm += 4; in conv3x3s1_winograd64_neon4()
6015 output3_tm += 1; in conv3x3s1_winograd64_neon5()
6416 output3_tm += 8; in conv3x3s1_winograd64_neon5()
6676 output3_tm += 4; in conv3x3s1_winograd64_neon5()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
[all …]
H A Dconvolution_3x3_int8.h466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon()
2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon()
[all …]
H A Dconvolution_3x3.h3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3687 output3_tm += 4; in conv3x3s1_winograd64_neon4()
3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3867 output3_tm += 4; in conv3x3s1_winograd64_neon4()
6015 output3_tm += 1; in conv3x3s1_winograd64_neon5()
6416 output3_tm += 8; in conv3x3s1_winograd64_neon5()
6676 output3_tm += 4; in conv3x3s1_winograd64_neon5()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h692 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
837 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
939 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1014 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1162 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1258 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1340 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1413 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1472 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1534 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h949 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1094 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1196 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1271 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1419 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1515 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1597 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1670 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1729 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
1791 "4"(output3_tm), in conv3x3s1_winograd64_pack4to1_neon()
[all …]
H A Dconvolution_3x3_int8.h466 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
657 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
679 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd23_int8_neon()
806 output3_tm += 16; in conv3x3s1_winograd23_int8_neon()
1655 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1846 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
1868 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_int8_neon()
1995 output3_tm += 36; in conv3x3s1_winograd43_int8_neon()
2797 output3_tm = output3_tm + r * 4; in conv3x3s1_winograd43_dequant_int8_neon()
2988 output3_tm += 36; in conv3x3s1_winograd43_dequant_int8_neon()
[all …]
H A Dconvolution_3x3.h3112 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3393 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3658 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3687 output3_tm += 4; in conv3x3s1_winograd64_neon4()
3772 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3844 "3"(output3_tm), in conv3x3s1_winograd64_neon4()
3867 output3_tm += 4; in conv3x3s1_winograd64_neon4()
6015 output3_tm += 1; in conv3x3s1_winograd64_neon5()
6416 output3_tm += 8; in conv3x3s1_winograd64_neon5()
6676 output3_tm += 4; in conv3x3s1_winograd64_neon5()
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h490 __fp16* output3_tm = top_blob_tm.channel(p + 3); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
624 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
635 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
765 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
776 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
835 "=r"(output3_tm), // %4 in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
846 "4"(output3_tm), in conv3x3s1_winograd64_pack8to1_fp16sa_neon()

12