/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon() 372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon() 2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon() 2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon() 398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon() 399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon() 441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon() 372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon() 2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon() 2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon() 398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon() 399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon() 441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon() 372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon() 2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon() 2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon() 398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon() 399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon() 441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon() 372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon() 2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon() 2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon() 398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon() 399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon() 441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack4to1_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 128 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 142 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 197 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4_bf16s.h | 113 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 114 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 127 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 141 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 185 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_bf16s_neon() 1894 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1899 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() 1927 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_bf16s_neon() [all …]
|
H A D | convolution_3x3_pack4.h | 344 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 345 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 358 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4_neon() 372 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 415 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_neon() 416 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4_neon() 439 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_neon() 2354 float32x4_t _tmp0m = vmlsq_n_f32(vmlaq_n_f32(_r04, _r00, 4.f), _r02, 5.f); in conv3x3s1_winograd42_pack4_neon() 2359 float32x4_t _tmp5m = vmlsq_n_f32(vmlaq_n_f32(_r05, _r01, 4.f), _r03, 5.f); in conv3x3s1_winograd42_pack4_neon() 2387 … float32x4_t _r0tm0 = vmlsq_n_f32(vmlaq_n_f32(_tmp04, _tmp00, 4.f), _tmp02, 5.f); in conv3x3s1_winograd42_pack4_neon() [all …]
|
H A D | convolution_3x3_pack4to1.h | 370 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_r02, _r06), _r04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 371 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_r01, _r05), _r03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 384 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_r06, _r02, 0.25f), _r04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 385 … float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 0.5f), _r03, 2.5f), _r05, 2.f); in conv3x3s1_winograd64_pack4to1_neon() 398 … float32x4_t _tmp56a = vmlaq_n_f32(_r06, vmlsq_n_f32(_r02, _r04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_neon() 399 … float32x4_t _tmp56b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_r01, 2.f), _r03, 2.5f), _r05, 0.5f); in conv3x3s1_winograd64_pack4to1_neon() 441 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 442 float32x4_t _tmp12b = vmlsq_n_f32(vaddq_f32(_tmp01, _tmp05), _tmp03, 4.25f); in conv3x3s1_winograd64_pack4to1_neon() 453 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_neon() 454 …float32x4_t _tmp34b = vmlaq_n_f32(vmlsq_n_f32(vmulq_n_f32(_tmp01, 0.5f), _tmp03, 2.5f), _tmp05, 2.… in conv3x3s1_winograd64_pack4to1_neon() [all …]
|
/dports/lang/gcc6-aux/gcc-6-20180516/gcc/testsuite/gcc.target/arm/neon/ |
H A D | vmlsQ_nf32.c | 18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
|
/dports/lang/gcc48/gcc-4.8.5/gcc/testsuite/gcc.target/arm/neon/ |
H A D | vmlsQ_nf32.c | 18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
|
/dports/devel/arm-none-eabi-gcc492/gcc-4.9.2/gcc/testsuite/gcc.target/arm/neon/ |
H A D | vmlsQ_nf32.c | 18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
|
/dports/lang/gnat_util/gcc-6-20180516/gcc/testsuite/gcc.target/arm/neon/ |
H A D | vmlsQ_nf32.c | 18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
|
/dports/devel/mingw32-gcc/gcc-4.8.1/gcc/testsuite/gcc.target/arm/neon/ |
H A D | vmlsQ_nf32.c | 18 out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); in test_vmlsQ_nf32()
|