Home
last modified time | relevance | path

Searched refs:_tmp01 (Results 1 – 25 of 56) sorted by relevance

123

/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h274 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1150 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1158 float16x8_t _tmp024a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1159 float16x8_t _tmp135a = vsubq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1412 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2213 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2219 float16x8_t _tmp02a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h170 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1760 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1768 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1769 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1921 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3436 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3442 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h292 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …float16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …float16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1152 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1160 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1161 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h401 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
410 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
1991 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
1999 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2000 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2381 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
2392 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_neon()
3896 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
3902 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4_fp16s.h392 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1096 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1104 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1105 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_fp16sa_neon()
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h274 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1150 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1158 float16x8_t _tmp024a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1159 float16x8_t _tmp135a = vsubq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1412 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2213 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2219 float16x8_t _tmp02a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h170 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1760 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1768 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1769 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1921 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3436 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3442 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h292 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …float16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …float16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1152 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1160 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1161 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h401 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
410 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
1991 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
1999 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2000 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2381 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
2392 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_neon()
3896 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
3902 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4_fp16s.h392 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
401 … float16x4_t _r0tm7 = vfma_n_f16(vsub_f16(_tmp07, _tmp01), vsub_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
407 float16x4_t _tmp12b = vfms_n_f16(vadd_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
419 … float16x4_t _tmp34b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
431 … float16x4_t _tmp56b = vfma_n_f16(vfms_n_f16(vmul_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1096 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1104 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1105 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h274 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1150 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1158 float16x8_t _tmp024a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1159 float16x8_t _tmp135a = vsubq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1412 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2213 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2219 float16x8_t _tmp02a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h170 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1760 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1768 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1769 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1921 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3436 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3442 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h292 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …float16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …float16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1152 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1160 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1161 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h401 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
410 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
1991 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
1999 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2000 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2381 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
2392 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_neon()
3896 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
3902 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h274 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1150 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1158 float16x8_t _tmp024a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1159 float16x8_t _tmp135a = vsubq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1412 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2213 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2219 float16x8_t _tmp02a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h170 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1760 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1768 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1769 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1921 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3436 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3442 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h292 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …float16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …float16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1152 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1160 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1161 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h401 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
410 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
1991 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
1999 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2000 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2381 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
2392 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_neon()
3896 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
3902 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8_fp16s.h274 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
283 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
289 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1150 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1158 float16x8_t _tmp024a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1159 float16x8_t _tmp135a = vsubq_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1412 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
1423 … float16x8_t _r0tm5 = vfmsq_n_f16(vfmaq_n_f16(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack8_fp16sa_neon()
2213 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd42_pack8_fp16sa_neon() local
2219 float16x8_t _tmp02a = vaddq_f16(_tmp01, _tmp02); in conv3x3s1_winograd42_pack8_fp16sa_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h170 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
179 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1760 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_bf16s_neon() local
1768 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1769 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_bf16s_neon()
1921 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
1932 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
3436 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_bf16s_neon() local
3442 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack8to4_fp16s.h292 float16x8_t _tmp01 = vld1q_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
301 … float16x8_t _r0tm7 = vfmaq_n_f16(vsubq_f16(_tmp07, _tmp01), vsubq_f16(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
307 float16x8_t _tmp12b = vfmsq_n_f16(vaddq_f16(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
319 …float16x8_t _tmp34b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
331 …float16x8_t _tmp56b = vfmaq_n_f16(vfmsq_n_f16(vmulq_n_f16(_tmp01, 2.f), _tmp03, 2.5f), _tmp05, 0.5… in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1152 float16x4_t _tmp01 = vld1_f16(tmp[m][1]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1160 float16x4_t _tmp024a = vadd_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1161 float16x4_t _tmp135a = vsub_f16(_tmp01, _tmp02); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4.h401 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
410 … float32x4_t _r0tm7 = vmlaq_n_f32(vsubq_f32(_tmp07, _tmp01), vsubq_f32(_tmp03, _tmp05), 5.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
1991 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd64_pack4_neon() local
1999 float32x4_t _tmp024a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2000 float32x4_t _tmp135a = vsubq_f32(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_neon()
2381 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
2392 … float32x4_t _r0tm5 = vmlsq_n_f32(vmlaq_n_f32(_tmp05, _tmp01, 4.f), _tmp03, 5.f); in conv3x3s1_winograd42_pack4_neon()
3896 float32x4_t _tmp01 = vld1q_f32(tmp[m][1]); in conv3x3s1_winograd42_pack4_neon() local
3902 float32x4_t _tmp02a = vaddq_f32(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/riscv/
H A Dconvolution_3x3_packn.h236 vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); in conv3x3s1_winograd64_packn_rvv() local
245 …vfloat32m1_t _r0tm7 = vfmacc_vf_f32m1(vfsub_vv_f32m1(_tmp07, _tmp01, vl), 5.25f, vfsub_vv_f32m1(_t… in conv3x3s1_winograd64_packn_rvv()
248 … vfloat32m1_t _tmp12b = vfmacc_vf_f32m1(vfadd_vv_f32m1(_tmp01, _tmp05, vl), -4.25f, _tmp03, vl); in conv3x3s1_winograd64_packn_rvv()
703 vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); in conv3x3s1_winograd64_packn_rvv() local
711 vfloat32m1_t _tmp024a = vfadd_vv_f32m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd64_packn_rvv()
712 vfloat32m1_t _tmp135a = vfsub_vv_f32m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd64_packn_rvv()
931 vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); in conv3x3s1_winograd42_packn_rvv() local
942 … vfloat32m1_t _r0tm5 = vfmacc_vf_f32m1(vfmacc_vf_f32m1(_tmp05, 4.f, _tmp01, vl), -5.f, _tmp03, vl); in conv3x3s1_winograd42_packn_rvv()
1361 vfloat32m1_t _tmp01 = vle32_v_f32m1(tmp[m][1], vl); in conv3x3s1_winograd42_packn_rvv() local
1367 vfloat32m1_t _tmp02a = vfadd_vv_f32m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd42_packn_rvv()
[all …]
H A Dconvolution_3x3_packn_fp16s.h236 vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); in conv3x3s1_winograd64_packn_fp16sa_rvv() local
245 …vfloat16m1_t _r0tm7 = vfmacc_vf_f16m1(vfsub_vv_f16m1(_tmp07, _tmp01, vl), 5.25f, vfsub_vv_f16m1(_t… in conv3x3s1_winograd64_packn_fp16sa_rvv()
248 … vfloat16m1_t _tmp12b = vfmacc_vf_f16m1(vfadd_vv_f16m1(_tmp01, _tmp05, vl), -4.25f, _tmp03, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
703 vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); in conv3x3s1_winograd64_packn_fp16sa_rvv() local
711 vfloat16m1_t _tmp024a = vfadd_vv_f16m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
712 vfloat16m1_t _tmp135a = vfsub_vv_f16m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd64_packn_fp16sa_rvv()
931 vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); in conv3x3s1_winograd42_packn_fp16sa_rvv() local
942 … vfloat16m1_t _r0tm5 = vfmacc_vf_f16m1(vfmacc_vf_f16m1(_tmp05, 4.f, _tmp01, vl), -5.f, _tmp03, vl); in conv3x3s1_winograd42_packn_fp16sa_rvv()
1361 vfloat16m1_t _tmp01 = vle16_v_f16m1(tmp[m][1], vl); in conv3x3s1_winograd42_packn_fp16sa_rvv() local
1367 vfloat16m1_t _tmp02a = vfadd_vv_f16m1(_tmp01, _tmp02, vl); in conv3x3s1_winograd42_packn_fp16sa_rvv()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/mips/
H A Dconvolution_3x3_pack4.h239 v4f32 _tmp01 = (v4f32)__msa_ld_w(tmp[m][1], 0); in conv3x3s1_winograd64_pack4_msa() local
248 … v4f32 _r0tm7 = __msa_fmadd_w(__msa_fsub_w(_tmp07, _tmp01), _v5_25, __msa_fsub_w(_tmp03, _tmp05)); in conv3x3s1_winograd64_pack4_msa()
251 … v4f32 _tmp12b = __msa_fmadd_w(__msa_fadd_w(_tmp01, _tmp05), _vm4_25, _tmp03); in conv3x3s1_winograd64_pack4_msa()
845 v4f32 _tmp01 = (v4f32)__msa_ld_w(tmp[m][1], 0); in conv3x3s1_winograd64_pack4_msa() local
853 v4f32 _tmp024a = __msa_fadd_w(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_msa()
854 v4f32 _tmp135a = __msa_fsub_w(_tmp01, _tmp02); in conv3x3s1_winograd64_pack4_msa()
1073 v4f32 _tmp01 = (v4f32)__msa_ld_w(tmp[m][1], 0); in conv3x3s1_winograd42_pack4_msa() local
1084 … v4f32 _r0tm5 = __msa_fmadd_w(__msa_fmadd_w(_tmp05, _v4, _tmp01), _vm5, _tmp03); in conv3x3s1_winograd42_pack4_msa()
1640 v4f32 _tmp01 = (v4f32)__msa_ld_w(tmp[m][1], 0); in conv3x3s1_winograd42_pack4_msa() local
1646 v4f32 _tmp02a = __msa_fadd_w(_tmp01, _tmp02); in conv3x3s1_winograd42_pack4_msa()
[all …]

123