Home
last modified time | relevance | path

Searched refs:tmp024a (Results 1 – 20 of 20) sorted by relevance

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to1_fp16s.h1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack4to1.h2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local
2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local
2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon()
2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon()
2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
H A Dconvolution_3x3.h4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4()
4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4()
4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4()
4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4()
4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4()
7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5()
7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5()
7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5()
7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5()
[all …]