Home
last modified time | relevance | path

Searched refs:tmp024b (Results 1 – 20 of 20) sorted by relevance

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1066 __fp16 tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1099 __fp16 tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1981 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2014 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2238 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2271 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4540 float tmp024b = output0_tm_0[3] + output0_tm_4[0]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4567 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7890 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7923 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1066 __fp16 tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1099 __fp16 tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1981 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2014 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2238 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2271 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4540 float tmp024b = output0_tm_0[3] + output0_tm_4[0]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4567 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7890 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7923 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1066 __fp16 tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1099 __fp16 tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1981 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2014 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2238 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2271 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4540 float tmp024b = output0_tm_0[3] + output0_tm_4[0]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4567 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7890 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7923 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1066 __fp16 tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1099 __fp16 tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1981 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2014 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2238 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2271 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4540 float tmp024b = output0_tm_0[3] + output0_tm_4[0]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4567 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7890 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7923 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1066 __fp16 tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1099 __fp16 tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1981 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2014 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2238 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2271 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4540 float tmp024b = output0_tm_0[3] + output0_tm_4[0]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4567 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7890 float tmp024b = output0_tm_3[0] + output0_tm_4[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7923 float tmp024b = tmp0[3] + tmp0[4]; in conv3x3s1_winograd64_neon5()
[all …]