/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon() 2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local 2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4() 4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4() 4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4() 4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4() 4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5() 7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5() 7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5() 7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon() 2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local 2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4() 4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4() 4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4() 4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4() 4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5() 7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5() 7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5() 7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon() 2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local 2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4() 4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4() 4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4() 4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4() 4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5() 7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5() 7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5() 7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon() 2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local 2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4() 4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4() 4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4() 4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4() 4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5() 7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5() 7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5() 7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to1_fp16s.h | 1070 __fp16 tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1076 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1077 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1078 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1103 __fp16 tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 1109 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1110 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 1111 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 1985 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 1991 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1992 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 1993 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2018 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 2024 … output0[1] = float32_to_bfloat16(bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2025 … output0[3] = float32_to_bfloat16(bias0 + tmp135a + tmp135b * 8 + tmp135c * 4); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 2026 … output0[5] = float32_to_bfloat16(bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack4to1.h | 2242 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_pack4to1_neon() local 2248 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2249 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2250 tmp[5][m] = output0_tm_7[0] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon() 2275 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_pack4to1_neon() local 2281 output0[1] = bias0 + tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_pack4to1_neon() 2282 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_pack4to1_neon() 2283 output0[5] = bias0 + tmp0[7] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_pack4to1_neon()
|
H A D | convolution_3x3.h | 4544 float tmp135c = output0_tm_4[1] - output0_tm_4[2]; in conv3x3s1_winograd64_neon4() 4550 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon4() 4551 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 4552 tmp[5][m] = output0_tm_4[3] + tmp135a + tmp135b * 32 + tmp135c; in conv3x3s1_winograd64_neon4() 4571 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon4() 4578 output0[3] = bias0 + tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon4() 7894 float tmp135c = output0_tm_5[0] - output0_tm_6[0]; in conv3x3s1_winograd64_neon5() 7900 tmp[1][m] = tmp135a + tmp135b + tmp135b + tmp135c * 16; in conv3x3s1_winograd64_neon5() 7901 tmp[3][m] = tmp135a + tmp135b * 8 + tmp135c * 4; in conv3x3s1_winograd64_neon5() 7927 float tmp135c = tmp0[5] - tmp0[6]; in conv3x3s1_winograd64_neon5() [all …]
|