Home
last modified time | relevance | path

Searched refs:_sum1n (Results 1 – 15 of 15) sorted by relevance

/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/x86/
H A Dconvolution_3x3.h389 __m256 _sum1n = _mm256_broadcast_ss(&zero_val); in conv3x3s1_winograd23_sse() local
423 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
443 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
462 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
481 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
511 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
521 _mm256_storeu_ps(output1_tm + 8, _sum1n); in conv3x3s1_winograd23_sse()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/x86/
H A Dconvolution_3x3.h389 __m256 _sum1n = _mm256_broadcast_ss(&zero_val); in conv3x3s1_winograd23_sse() local
423 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
443 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
462 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
481 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
511 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
521 _mm256_storeu_ps(output1_tm + 8, _sum1n); in conv3x3s1_winograd23_sse()
/dports/misc/ncnn/ncnn-20211208/src/layer/x86/
H A Dconvolution_3x3.h389 __m256 _sum1n = _mm256_broadcast_ss(&zero_val); in conv3x3s1_winograd23_sse() local
423 _sum1n = _mm256_comp_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
443 _sum1n = _mm256_comp_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
462 _sum1n = _mm256_comp_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
481 _sum1n = _mm256_comp_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
511 _sum1n = _mm256_comp_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
521 _mm256_storeu_ps(output1_tm + 8, _sum1n); in conv3x3s1_winograd23_sse()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/x86/
H A Dconvolution_3x3.h389 __m256 _sum1n = _mm256_broadcast_ss(&zero_val); in conv3x3s1_winograd23_sse() local
423 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
443 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
462 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
481 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
511 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
521 _mm256_storeu_ps(output1_tm + 8, _sum1n); in conv3x3s1_winograd23_sse()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/x86/
H A Dconvolution_3x3.h389 __m256 _sum1n = _mm256_broadcast_ss(&zero_val); in conv3x3s1_winograd23_sse() local
423 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
443 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
462 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
481 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
511 _sum1n = _mm256_fmadd_ps(_r0n, _k1n, _sum1n); in conv3x3s1_winograd23_sse()
521 _mm256_storeu_ps(output1_tm + 8, _sum1n); in conv3x3s1_winograd23_sse()
/dports/graphics/waifu2x-ncnn-vulkan/waifu2x-ncnn-vulkan-20210521/src/ncnn/src/layer/arm/
H A Dconvolution_3x3.h388 float32x4_t _sum1n = vmulq_f32(_r10, _k10); in conv3x3s1_neon() local
390 _sum1n = vmlaq_f32(_sum1n, _r20, _k13); in conv3x3s1_neon()
392 _sum1n = vmlaq_f32(_sum1n, _r30, _k16); in conv3x3s1_neon()
397 _sum1n = vsetq_lane_f32(*outptr1n, _sum1n, 3); in conv3x3s1_neon()
402 *outptr1n = vaddvq_f32(_sum1n); in conv3x3s1_neon()
407 float32x2_t _ss1n = vadd_f32(vget_low_f32(_sum1n), vget_high_f32(_sum1n)); in conv3x3s1_neon()
H A Dconvolution_3x3_int8.h4162 int32x4_t _sum1n = vmull_laneq_s16(vget_high_s16(_k1), _r0, 1); in conv3x3s2_packed_int8_neon() local
4178 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r1, 1); in conv3x3s2_packed_int8_neon()
4194 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r2, 1); in conv3x3s2_packed_int8_neon()
4199 _sum0n = vaddq_s32(_sum0n, _sum1n); in conv3x3s2_packed_int8_neon()
/dports/benchmarks/vkpeak/vkpeak-20210430/ncnn/src/layer/arm/
H A Dconvolution_3x3.h388 float32x4_t _sum1n = vmulq_f32(_r10, _k10); in conv3x3s1_neon() local
390 _sum1n = vmlaq_f32(_sum1n, _r20, _k13); in conv3x3s1_neon()
392 _sum1n = vmlaq_f32(_sum1n, _r30, _k16); in conv3x3s1_neon()
397 _sum1n = vsetq_lane_f32(*outptr1n, _sum1n, 3); in conv3x3s1_neon()
402 *outptr1n = vaddvq_f32(_sum1n); in conv3x3s1_neon()
407 float32x2_t _ss1n = vadd_f32(vget_low_f32(_sum1n), vget_high_f32(_sum1n)); in conv3x3s1_neon()
H A Dconvolution_3x3_int8.h4162 int32x4_t _sum1n = vmull_laneq_s16(vget_high_s16(_k1), _r0, 1); in conv3x3s2_packed_int8_neon() local
4178 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r1, 1); in conv3x3s2_packed_int8_neon()
4194 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r2, 1); in conv3x3s2_packed_int8_neon()
4199 _sum0n = vaddq_s32(_sum0n, _sum1n); in conv3x3s2_packed_int8_neon()
/dports/misc/ncnn/ncnn-20211208/src/layer/arm/
H A Dconvolution_3x3.h388 float32x4_t _sum1n = vmulq_f32(_r10, _k10); in conv3x3s1_neon() local
390 _sum1n = vmlaq_f32(_sum1n, _r20, _k13); in conv3x3s1_neon()
392 _sum1n = vmlaq_f32(_sum1n, _r30, _k16); in conv3x3s1_neon()
397 _sum1n = vsetq_lane_f32(*outptr1n, _sum1n, 3); in conv3x3s1_neon()
402 *outptr1n = vaddvq_f32(_sum1n); in conv3x3s1_neon()
407 float32x2_t _ss1n = vadd_f32(vget_low_f32(_sum1n), vget_high_f32(_sum1n)); in conv3x3s1_neon()
H A Dconvolution_3x3_int8.h4162 int32x4_t _sum1n = vmull_laneq_s16(vget_high_s16(_k1), _r0, 1); in conv3x3s2_packed_int8_neon() local
4178 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r1, 1); in conv3x3s2_packed_int8_neon()
4194 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r2, 1); in conv3x3s2_packed_int8_neon()
4199 _sum0n = vaddq_s32(_sum0n, _sum1n); in conv3x3s2_packed_int8_neon()
/dports/graphics/vapoursynth-waifu2x-ncnn-vulkan/vapoursynth-waifu2x-ncnn-vulkan-r4/deps/ncnn/src/layer/arm/
H A Dconvolution_3x3.h388 float32x4_t _sum1n = vmulq_f32(_r10, _k10); in conv3x3s1_neon() local
390 _sum1n = vmlaq_f32(_sum1n, _r20, _k13); in conv3x3s1_neon()
392 _sum1n = vmlaq_f32(_sum1n, _r30, _k16); in conv3x3s1_neon()
397 _sum1n = vsetq_lane_f32(*outptr1n, _sum1n, 3); in conv3x3s1_neon()
402 *outptr1n = vaddvq_f32(_sum1n); in conv3x3s1_neon()
407 float32x2_t _ss1n = vadd_f32(vget_low_f32(_sum1n), vget_high_f32(_sum1n)); in conv3x3s1_neon()
H A Dconvolution_3x3_int8.h4162 int32x4_t _sum1n = vmull_laneq_s16(vget_high_s16(_k1), _r0, 1); in conv3x3s2_packed_int8_neon() local
4178 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r1, 1); in conv3x3s2_packed_int8_neon()
4194 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r2, 1); in conv3x3s2_packed_int8_neon()
4199 _sum0n = vaddq_s32(_sum0n, _sum1n); in conv3x3s2_packed_int8_neon()
/dports/graphics/realsr-ncnn-vulkan/realsr-ncnn-vulkan-20210210/src/ncnn/src/layer/arm/
H A Dconvolution_3x3.h388 float32x4_t _sum1n = vmulq_f32(_r10, _k10); in conv3x3s1_neon() local
390 _sum1n = vmlaq_f32(_sum1n, _r20, _k13); in conv3x3s1_neon()
392 _sum1n = vmlaq_f32(_sum1n, _r30, _k16); in conv3x3s1_neon()
397 _sum1n = vsetq_lane_f32(*outptr1n, _sum1n, 3); in conv3x3s1_neon()
402 *outptr1n = vaddvq_f32(_sum1n); in conv3x3s1_neon()
407 float32x2_t _ss1n = vadd_f32(vget_low_f32(_sum1n), vget_high_f32(_sum1n)); in conv3x3s1_neon()
H A Dconvolution_3x3_int8.h4162 int32x4_t _sum1n = vmull_laneq_s16(vget_high_s16(_k1), _r0, 1); in conv3x3s2_packed_int8_neon() local
4178 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r1, 1); in conv3x3s2_packed_int8_neon()
4194 _sum1n = vmlal_laneq_s16(_sum1n, vget_high_s16(_k1), _r2, 1); in conv3x3s2_packed_int8_neon()
4199 _sum0n = vaddq_s32(_sum0n, _sum1n); in conv3x3s2_packed_int8_neon()