/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
|
H A D | convolution_3x3_pack4_fp16s.h | 397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
|
H A D | convolution_3x3_pack8to1_fp16s.h | 291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack8_fp16s.h | 279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
|
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
|
H A D | convolution_3x3_pack4_fp16s.h | 397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
|
H A D | convolution_3x3_pack8to1_fp16s.h | 291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack8_fp16s.h | 279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
|
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
|
H A D | convolution_3x3_pack4_fp16s.h | 397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
|
H A D | convolution_3x3_pack8to1_fp16s.h | 291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack8_fp16s.h | 279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
|
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
|
H A D | convolution_3x3_pack4_fp16s.h | 397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
|
H A D | convolution_3x3_pack8to1_fp16s.h | 291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack8_fp16s.h | 279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
|
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/ |
H A D | convolution_3x3_pack8to4_fp16s.h | 297 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 300 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 306 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 318 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 330 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1157 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() local 1172 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon() 1173 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8to4_fp16sa_neon()
|
H A D | convolution_3x3_pack4_fp16s.h | 397 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 400 … float16x4_t _r0tm0 = vfma_n_f16(vsub_f16(_tmp00, _tmp06), vsub_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 406 float16x4_t _tmp12a = vfms_n_f16(vadd_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 418 … float16x4_t _tmp34a = vfms_n_f16(vfma_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 430 … float16x4_t _tmp56a = vfma_n_f16(_tmp06, vfms_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1101 float16x4_t _tmp06 = vld1_f16(tmp[m][6]); in conv3x3s1_winograd64_pack4_fp16sa_neon() local 1116 float16x4_t _tmp024c = vadd_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon() 1117 float16x4_t _tmp135c = vsub_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack4_fp16sa_neon()
|
H A D | convolution_3x3_pack8to1_fp16s.h | 291 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() local 294 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 300 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 312 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon() 324 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8to1_fp16sa_neon()
|
H A D | convolution_3x3_pack4to1_bf16s.h | 175 float32x4_t _tmp06 = vld1q_f32(tmp[m][6]); in conv3x3s1_winograd64_pack4to1_bf16s_neon() local 178 … float32x4_t _r0tm0 = vmlaq_n_f32(vsubq_f32(_tmp00, _tmp06), vsubq_f32(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 184 float32x4_t _tmp12a = vmlsq_n_f32(vaddq_f32(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 196 … float32x4_t _tmp34a = vmlsq_n_f32(vmlaq_n_f32(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack4to1_bf16s_neon() 208 … float32x4_t _tmp56a = vmlaq_n_f32(_tmp06, vmlsq_n_f32(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack4to1_bf16s_neon()
|
H A D | convolution_3x3_pack8_fp16s.h | 279 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 282 … float16x8_t _r0tm0 = vfmaq_n_f16(vsubq_f16(_tmp00, _tmp06), vsubq_f16(_tmp04, _tmp02), 5.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 288 float16x8_t _tmp12a = vfmsq_n_f16(vaddq_f16(_tmp02, _tmp06), _tmp04, 4.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 300 … float16x8_t _tmp34a = vfmsq_n_f16(vfmaq_n_f16(_tmp06, _tmp02, 0.25f), _tmp04, 1.25f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 312 … float16x8_t _tmp56a = vfmaq_n_f16(_tmp06, vfmsq_n_f16(_tmp02, _tmp04, 1.25f), 4.f); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1155 float16x8_t _tmp06 = vld1q_f16(tmp[m][6]); in conv3x3s1_winograd64_pack8_fp16sa_neon() local 1170 float16x8_t _tmp024c = vaddq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon() 1171 float16x8_t _tmp135c = vsubq_f16(_tmp05, _tmp06); in conv3x3s1_winograd64_pack8_fp16sa_neon()
|