Home
last modified time | relevance | path

Searched refs:tmp135c (Results 1 – 20 of 20) sorted by relevance

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local
2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4()
4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4()
4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4()
4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4()
4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5()
7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5()
7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5()
7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local
2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4()
4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4()
4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4()
4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4()
4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5()
7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5()
7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5()
7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local
2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4()
4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4()
4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4()
4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4()
4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5()
7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5()
7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5()
7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local
2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4()
4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4()
4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4()
4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4()
4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5()
7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5()
7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5()
7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local
2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon()
2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon()
2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4()
4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4()
4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4()
4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4()
4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4()
7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5()
7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5()
7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5()
7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5()
[all …]