/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon() 2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local 2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4() 4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4() 4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4() 4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4() 4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5() 7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5() 7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5() 7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon() 2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local 2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4() 4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4() 4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4() 4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4() 4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5() 7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5() 7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5() 7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon() 2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local 2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4() 4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4() 4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4() 4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4() 4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5() 7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5() 7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5() 7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon() 2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local 2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4() 4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4() 4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4() 4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4() 4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5() 7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5() 7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5() 7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1063 __fp16 tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1072 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1073 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1074 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1096 __fp16 tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1105 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1106 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1107 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1978 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1987 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1988 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1989 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2011 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2020 … output0[0] = float32_to_bfloat16(bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2021 … output0[2] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 4 + tmp024c * 8); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2022 … output0[4] = float32_to_bfloat16(bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2235 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2244 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2245 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2246 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon() 2268 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_pack4to1_neon() local 2277 output0[0] = bias0 + tmp0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_pack4to1_neon() 2278 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_pack4to1_neon() 2279 output0[4] = bias0 + tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4537 float tmp024a = output0_tm_0[1] + output0_tm_0[2]; in conv3x3s1_winograd64_neon4() 4546 tmp[0][m] = output0_tm_0[0] + tmp024a + tmp024b + tmp024c * 32; in conv3x3s1_winograd64_neon4() 4547 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 4548 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon4() 4564 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon4() 4574 output0[2] = bias0 + tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon4() 7887 float tmp024a = output0_tm_1[0] + output0_tm_2[0]; in conv3x3s1_winograd64_neon5() 7897 tmp[2][m] = tmp024a + tmp024b * 4 + tmp024c * 8; in conv3x3s1_winograd64_neon5() 7898 tmp[4][m] = tmp024a + tmp024b * 16 + tmp024c + tmp024c; in conv3x3s1_winograd64_neon5() 7920 float tmp024a = tmp0[1] + tmp0[2]; in conv3x3s1_winograd64_neon5() [all …]
|