Home
last modified time | relevance | path

Searched refs:_tmp06 (Results 1 – 25 of 48) sorted by relevance

12

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4_fp16s.h397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack8to1_fp16s.h291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack8_fp16s.h279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4_fp16s.h397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack8to1_fp16s.h291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack8_fp16s.h279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4_fp16s.h397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack8to1_fp16s.h291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack8_fp16s.h279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4_fp16s.h397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack8to1_fp16s.h291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack8_fp16s.h279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack8to4_fp16s.h297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local
1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
H A Dconvolution_3x3_pack4_fp16s.h397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local
1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
H A Dconvolution_3x3_pack8to1_fp16s.h291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local
294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
H A Dconvolution_3x3_pack4to1_bf16s.h175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local
178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
H A Dconvolution_3x3_pack8_fp16s.h279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local
1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()

12