Home
last modified time | relevance | path

Searched refs:_tmp05 (Results 1 – 25 of 56) sorted by relevance

123

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h296 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1156 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack8_fp16s.h278 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
301 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1154 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1416 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2217 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
[all …]
H A Dconvolution_3x3_pack4_fp16s.h396 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1100 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack4_bf16s.h174 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
197 …oat32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1764 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1780 float32x4_t _tmp024c = vaddq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1781 float32x4_t _tmp135c = vsubq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1925 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3440 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h290 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
295 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
301 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
313 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
325 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h296 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1156 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack8_fp16s.h278 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
301 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1154 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1416 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2217 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
[all …]
H A Dconvolution_3x3_pack4_fp16s.h396 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1100 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack4_bf16s.h174 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
197 …oat32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1764 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1780 float32x4_t _tmp024c = vaddq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1781 float32x4_t _tmp135c = vsubq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1925 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3440 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h290 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
295 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
301 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
313 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
325 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h296 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1156 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack8_fp16s.h278 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
301 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1154 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1416 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2217 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
[all …]
H A Dconvolution_3x3_pack4_fp16s.h396 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1100 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack4_bf16s.h174 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
197 …oat32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1764 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1780 float32x4_t _tmp024c = vaddq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1781 float32x4_t _tmp135c = vsubq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1925 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3440 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h290 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
295 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
301 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
313 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
325 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h296 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1156 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack8_fp16s.h278 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
301 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1154 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1416 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2217 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
[all …]
H A Dconvolution_3x3_pack4_fp16s.h396 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1100 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack4_bf16s.h174 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
197 …oat32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1764 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1780 float32x4_t _tmp024c = vaddq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1781 float32x4_t _tmp135c = vsubq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1925 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3440 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
[all …]
H A Dconvolution_3x3_pack8to1_fp16s.h290 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
295 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
301 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
313 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
325 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h296 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …oat16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1156 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack8_fp16s.h278 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
301 …oat16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1154 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1416 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2217 float16x8_t _tmp05 = vld1q_f16(tmp[m][5]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
[all …]
H A Dconvolution_3x3_pack4_fp16s.h396 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1100 float16x4_t _tmp05 = vld1_f16(tmp[m][5]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack4_bf16s.h174 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
197 …oat32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1764 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1780 float32x4_t _tmp024c = vaddq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1781 float32x4_t _tmp135c = vsubq_f32(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_bf16s_neon()
1925 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3440 float32x4_t _tmp05 = vld1q_f32(tmp[m][5]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_3x3_packn.h240 vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); in conv3x3s1_winograd64_packn_rvv() local
245 …vfmacc_vf_f32m1(vfsub_vv_f32m1(_tmp07, _tmp01, vl), 5.25f, vfsub_vv_f32m1(_tmp03, _tmp05, vl), vl); in conv3x3s1_winograd64_packn_rvv()
248 … vfloat32m1_t _tmp12b = vfmacc_vf_f32m1(vfadd_vv_f32m1(_tmp01, _tmp05, vl), -4.25f, _tmp03, vl); in conv3x3s1_winograd64_packn_rvv()
254 …cc_vf_f32m1(vfmacc_vf_f32m1(vfmul_vf_f32m1(_tmp01, 0.5f, vl), -2.5f, _tmp03, vl), 2.f, _tmp05, vl); in conv3x3s1_winograd64_packn_rvv()
707 vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); in conv3x3s1_winograd64_packn_rvv() local
717 vfloat32m1_t _tmp024c = vfadd_vv_f32m1(_tmp05, _tmp06, vl); in conv3x3s1_winograd64_packn_rvv()
718 vfloat32m1_t _tmp135c = vfsub_vv_f32m1(_tmp05, _tmp06, vl); in conv3x3s1_winograd64_packn_rvv()
935 vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); in conv3x3s1_winograd42_packn_rvv() local
942 … vfloat32m1_t _r0tm5 = vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp05, 4.f, _tmp01, vl), -5.f, _tmp03, vl); in conv3x3s1_winograd42_packn_rvv()
1365 vfloat32m1_t _tmp05 = vle32_v_f32m1(tmp[m][5], vl); in conv3x3s1_winograd42_packn_rvv() local
[all …]

123