Home
last modified time | relevance | path

Searched refs:vmlsq_n_f32 (Results 1 – 25 of 138) sorted by relevance

123456

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4.h344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon()
372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon()
2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon()
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon()
398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon()
399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon()
441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon()
[all …]
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4.h344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon()
372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon()
2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon()
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon()
398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon()
399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon()
441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon()
[all …]
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4.h344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon()
372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon()
2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon()
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon()
398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon()
399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon()
441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon()
[all …]
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4.h344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon()
372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon()
2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon()
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon()
398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon()
399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon()
441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon()
[all …]
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3_pack4to1_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4_bf16s.h113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon()
208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon()
1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon()
[all …]
H A Dconvolution_3x3_pack4.h344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon()
372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon()
416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon()
439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon()
2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon()
2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon()
2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon()
[all …]
H A Dconvolution_3x3_pack4to1.h370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon()
398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon()
399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon()
441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon()
453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon()
454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon()
[all …]
/dports/lang/gcc6-aux/gcc-6-20180516/gcc/testsuite/gcc.target/arm/neon/
H A DvmlsQ_nf32.c18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
/dports/lang/gcc48/gcc-4.8.5/gcc/testsuite/gcc.target/arm/neon/
H A DvmlsQ_nf32.c18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
/dports/devel/arm-none-eabi-gcc492/gcc-4.9.2/gcc/testsuite/gcc.target/arm/neon/
H A DvmlsQ_nf32.c18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
/dports/lang/gnat_util/gcc-6-20180516/gcc/testsuite/gcc.target/arm/neon/
H A DvmlsQ_nf32.c18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
/dports/devel/mingw32-gcc/gcc-4.8.1/gcc/testsuite/gcc.target/arm/neon/
H A DvmlsQ_nf32.c18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()

123456