Home
last modified time | relevance | path

Searched refs:_tmp04 (Results 1 – 25 of 56) sorted by relevance

123

/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h277 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1153 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1164 float16x8_t _tmp024b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1165 float16x8_t _tmp135b = vsubq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1415 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1418 … float16x8_t _r0tm0 = vfmsq_n_f16(vfmaq_n_f16(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2216 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2222 float16x8_t _tmp02b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h173 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1763 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1774 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1775 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1924 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3439 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3445 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h295 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1155 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1166 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1167 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h404 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
1994 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
2005 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2006 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2384 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
3899 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
3905 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4_fp16s.h395 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1099 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1110 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1111 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_fp16sa_neon()
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h277 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1153 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1164 float16x8_t _tmp024b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1165 float16x8_t _tmp135b = vsubq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1415 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1418 … float16x8_t _r0tm0 = vfmsq_n_f16(vfmaq_n_f16(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2216 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2222 float16x8_t _tmp02b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h173 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1763 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1774 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1775 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1924 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3439 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3445 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h295 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1155 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1166 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1167 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h404 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
1994 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
2005 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2006 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2384 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
3899 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
3905 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4_fp16s.h395 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1099 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1110 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1111 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h277 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1153 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1164 float16x8_t _tmp024b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1165 float16x8_t _tmp135b = vsubq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1415 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1418 … float16x8_t _r0tm0 = vfmsq_n_f16(vfmaq_n_f16(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2216 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2222 float16x8_t _tmp02b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h173 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1763 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1774 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1775 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1924 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3439 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3445 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h295 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1155 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1166 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1167 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h404 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
1994 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
2005 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2006 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2384 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
3899 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
3905 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h277 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1153 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1164 float16x8_t _tmp024b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1165 float16x8_t _tmp135b = vsubq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1415 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1418 … float16x8_t _r0tm0 = vfmsq_n_f16(vfmaq_n_f16(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2216 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2222 float16x8_t _tmp02b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h173 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1763 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1774 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1775 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1924 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3439 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3445 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h295 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1155 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1166 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1167 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h404 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
1994 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
2005 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2006 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2384 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
3899 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
3905 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h277 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1153 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1164 float16x8_t _tmp024b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1165 float16x8_t _tmp135b = vsubq_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1415 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1418 … float16x8_t _r0tm0 = vfmsq_n_f16(vfmaq_n_f16(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2216 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2222 float16x8_t _tmp02b = vaddq_f16(_tmp03, _tmp04); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h173 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1763 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1774 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1775 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_bf16s_neon()
1924 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3439 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3445 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h295 float16x8_t _tmp04 = vld1q_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1155 float16x4_t _tmp04 = vld1_f16(tmp[m][4]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1166 float16x4_t _tmp024b = vadd_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1167 float16x4_t _tmp135b = vsub_f16(_tmp03, _tmp04); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h404 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
1994 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd64_pack4_neon() local
2005 float32x4_t _tmp024b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2006 float32x4_t _tmp135b = vsubq_f32(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_neon()
2384 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
3899 float32x4_t _tmp04 = vld1q_f32(tmp[m][4]); in conv3x3s1_winograd42_pack4_neon() local
3905 float32x4_t _tmp02b = vaddq_f32(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_3x3_packn.h239 vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); in conv3x3s1_winograd64_packn_rvv() local
247 … vfloat32m1_t _tmp12a = vfmacc_vf_f32m1(vfadd_vv_f32m1(_tmp02, _tmp06, vl), -4.25f, _tmp04, vl); in conv3x3s1_winograd64_packn_rvv()
259 …loat32m1_t _tmp56a = vfmacc_vf_f32m1(_tmp06, 4.f, vfmacc_vf_f32m1(_tmp02, -1.25f, _tmp04, vl), vl); in conv3x3s1_winograd64_packn_rvv()
706 vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); in conv3x3s1_winograd64_packn_rvv() local
714 vfloat32m1_t _tmp024b = vfadd_vv_f32m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd64_packn_rvv()
715 vfloat32m1_t _tmp135b = vfsub_vv_f32m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd64_packn_rvv()
934 vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); in conv3x3s1_winograd42_packn_rvv() local
937 … vfloat32m1_t _r0tm0 = vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp04, 4.f, _tmp00, vl), -5.f, _tmp02, vl); in conv3x3s1_winograd42_packn_rvv()
1364 vfloat32m1_t _tmp04 = vle32_v_f32m1(tmp[m][4], vl); in conv3x3s1_winograd42_packn_rvv() local
1370 vfloat32m1_t _tmp02b = vfadd_vv_f32m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd42_packn_rvv()
[all …]
H A Dconvolution_3x3_packn_fp16s.h239 vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); in conv3x3s1_winograd64_packn_fp16sa_rvv() local
247 … vfloat16m1_t _tmp12a = vfmacc_vf_f16m1(vfadd_vv_f16m1(_tmp02, _tmp06, vl), -4.25f, _tmp04, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
259 …loat16m1_t _tmp56a = vfmacc_vf_f16m1(_tmp06, 4.f, vfmacc_vf_f16m1(_tmp02, -1.25f, _tmp04, vl), vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
706 vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); in conv3x3s1_winograd64_packn_fp16sa_rvv() local
714 vfloat16m1_t _tmp024b = vfadd_vv_f16m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
715 vfloat16m1_t _tmp135b = vfsub_vv_f16m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
934 vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); in conv3x3s1_winograd42_packn_fp16sa_rvv() local
937 … vfloat16m1_t _r0tm0 = vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp04, 4.f, _tmp00, vl), -5.f, _tmp02, vl); in conv3x3s1_winograd42_packn_fp16sa_rvv()
1364 vfloat16m1_t _tmp04 = vle16_v_f16m1(tmp[m][4], vl); in conv3x3s1_winograd42_packn_fp16sa_rvv() local
1370 vfloat16m1_t _tmp02b = vfadd_vv_f16m1(_tmp03, _tmp04, vl); in conv3x3s1_winograd42_packn_fp16sa_rvv()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolution_3x3_pack4.h242 v4f32 _tmp04 = (v4f32)__msa_ld_w(tmp[m][4], 0); in conv3x3s1_winograd64_pack4_msa() local
250 … v4f32 _tmp12a = __msa_fmadd_w(__msa_fadd_w(_tmp02, _tmp06), _vm4_25, _tmp04); in conv3x3s1_winograd64_pack4_msa()
262 … v4f32 _tmp56a = __msa_fmadd_w(_tmp06, _v4, __msa_fmadd_w(_tmp02, _vm1_25, _tmp04)); in conv3x3s1_winograd64_pack4_msa()
848 v4f32 _tmp04 = (v4f32)__msa_ld_w(tmp[m][4], 0); in conv3x3s1_winograd64_pack4_msa() local
856 v4f32 _tmp024b = __msa_fadd_w(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_msa()
857 v4f32 _tmp135b = __msa_fsub_w(_tmp03, _tmp04); in conv3x3s1_winograd64_pack4_msa()
1076 v4f32 _tmp04 = (v4f32)__msa_ld_w(tmp[m][4], 0); in conv3x3s1_winograd42_pack4_msa() local
1079 … v4f32 _r0tm0 = __msa_fmadd_w(__msa_fmadd_w(_tmp04, _v4, _tmp00), _vm5, _tmp02); in conv3x3s1_winograd42_pack4_msa()
1643 v4f32 _tmp04 = (v4f32)__msa_ld_w(tmp[m][4], 0); in conv3x3s1_winograd42_pack4_msa() local
1649 v4f32 _tmp02b = __msa_fadd_w(_tmp03, _tmp04); in conv3x3s1_winograd42_pack4_msa()
[all …]

123