/dports/graphics/opencv/opencv-4.5.3/3rdparty/carotene/src/ |
H A D | convolution.cpp | 209 int32x4_t v_dst0 = v_zero_s32, v_dst1 = v_zero_s32; in convolution() local 221 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[8]); in convolution() 222 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[7]); in convolution() 223 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[6]); in convolution() 240 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[5]); in convolution() 241 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[4]); in convolution() 242 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[3]); in convolution() 259 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[2]); in convolution() 260 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[1]); in convolution() 261 v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[0]); in convolution() [all …]
|
H A D | accumulate.cpp | 68 int16x8_t v_dst0 = vld1q_s16(dst + j); in accumulate() local 72 v_dst0 = vqaddq_s16(v_dst0, v_src0); in accumulate() 74 vst1q_s16(dst + j, v_dst0); in accumulate() 121 int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8); in accumulateSquareConst() local 126 …v_dst0 = vcombine_s16(vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srclo, v_srclo), shift), vget_l… in accumulateSquareConst() 134 vst1q_s16(dst + j, v_dst0); in accumulateSquareConst() 174 int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8); in accumulateSquareConst() local 179 … v_dst0 = vcombine_s16(vqmovn_s32(vaddw_s16(vmull_s16(v_srclo, v_srclo), vget_low_s16(v_dst0))), in accumulateSquareConst() 187 vst1q_s16(dst + j, v_dst0); in accumulateSquareConst() 318 uint16x8_t v_dst0 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(v_dst0f)), in operator ()() local [all …]
|
H A D | sub.cpp | 280 int16x8_t v_dst0 = vqsubq_s16(v_src00, v_src10); in sub() local 282 vst1q_s16(dst + j, v_dst0); in sub() 306 int16x8_t v_dst0 = vsubq_s16(v_src00, v_src10); in sub() local 308 vst1q_s16(dst + j, v_dst0); in sub() 363 int16x8_t v_dst0 = vqsubq_s16(v_src00, v_src10); in sub() local 365 vst1q_s16(dst + j, v_dst0); in sub() 389 int16x8_t v_dst0 = vsubq_s16(v_src00, v_src10); in sub() local 391 vst1q_s16(dst + j, v_dst0); in sub()
|
H A D | mul.cpp | 84 uint16x8_t v_dst0 = vmull_u8(vget_low_u8(v_src0), vget_low_u8(v_src1)); in mulu8() local 87 v_dst0 = vshrq_n_u16(v_dst0, shift); in mulu8() 90 vst1q_u8(dst + j, vcombine_u8(vqmovn_u16(v_dst0), vqmovn_u16(v_dst1))); in mulu8() 112 uint16x8_t v_dst0 = vmull_u8(vget_low_u8(v_src0), vget_low_u8(v_src1)); in mulu8() local 115 v_dst0 = vshrq_n_u16(v_dst0, shift); in mulu8() 118 vst1q_u8(dst + j, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1))); in mulu8() 165 v_dst0 = vshrq_n_u16(v_dst0, shift); in muls16() 168 vst1q_s16(dst + j, vreinterpretq_s16_u16(vminq_u16(v_32767, v_dst0))); in muls16() 195 v_dst0 = vshrq_n_u16(v_dst0, shift); in muls16() 198 vst1q_s16(dst + j, vreinterpretq_s16_u16(v_dst0)); in muls16() [all …]
|
H A D | convert_depth.cpp | 68 int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src))); in lshiftConst() local 71 vst1q_s16(dst + j, vshlq_n_s16(v_dst0, shift)); in lshiftConst() 105 int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src))); in lshiftConst() local 108 vst1q_s16(dst + j, v_dst0); in lshiftConst()
|
H A D | bitwise.cpp | 134 uint8x16_t v_dst0 = vmvnq_u8(v_src0), v_dst1 = vmvnq_u8(v_src1); in bitwiseNot() local 135 vst1q_u8(dst + j, v_dst0); in bitwiseNot()
|
H A D | add.cpp | 253 int16x8_t v_dst0 = vqaddq_s16(v_src00, v_src10); in add() local 255 vst1q_s16(dst + j, v_dst0); in add() 279 int16x8_t v_dst0 = vaddq_s16(v_src00, v_src10); in add() local 281 vst1q_s16(dst + j, v_dst0); in add()
|
H A D | cmp.cpp | 162 uvec128 v_dst0; in vcompare() local 165 op(v_src00, v_src10, v_dst0); in vcompare() 168 vnst(dst + x, v_dst0, v_dst1); in vcompare()
|
H A D | colorconvert.cpp | 71 uint32x4_t v_dst0 = vmull_u16(vget_low_u16(v_g), v_g2y); in convertToGray() local 74 v_dst0 = vmlal_u16(v_dst0, vget_low_u16(v_r), v_r2y); in convertToGray() 77 v_dst0 = vmlal_u16(v_dst0, vget_low_u16(v_b), v_b2y); in convertToGray() 80 uint8x8_t v_gray = vqmovn_u16(vcombine_u16(vrshrn_n_u32(v_dst0, SHIFT), in convertToGray() 679 union { uint8x16x4_t v4; uint8x16x3_t v3; } v_dst0; in rgb2rgbx() local 680 v_dst0.v4.val[3] = vdupq_n_u8(255); in rgb2rgbx() 709 v_dst0.v3 = vld3q_u8(src + sj); in rgb2rgbx() 710 vst4q_u8(dst + dj, v_dst0.v4); in rgb2rgbx() 747 union { uint8x16x4_t v4; uint8x16x3_t v3; } v_dst0; in rgbx2rgb() local 774 v_dst0.v4 = vld4q_u8(src + sj); in rgbx2rgb() [all …]
|
H A D | remap.cpp | 149 uint16x4_t v_dst0 = vmovn_u32(vcvtq_u32_f32(v_dst)); in remapLinearReplicate() local 165 vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1))); in remapLinearReplicate() 247 uint16x4_t v_dst0 = vmovn_u32(vcvtq_u32_f32(v_dst)); in remapLinearConst() local 263 vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1))); in remapLinearConst()
|
H A D | threshold.cpp | 156 uint8x16_t v_dst0 = vandq_u8(vcgeq_u8(v_src0, v_lower), vcleq_u8(v_src0, v_upper)); in thresholdRange() local 158 vst1q_u8(dst + j, v_dst0); in thresholdRange() 192 uint8x16_t v_dst0 = vandq_u8(vcgeq_u8(v_src0, v_lower), vcleq_u8(v_src0, v_upper)); in thresholdRange() local 194 v_dst0 = vbslq_u8(v_dst0, vtrue_value, vfalse_value); in thresholdRange() 196 vst1q_u8(dst + j, v_dst0); in thresholdRange()
|
H A D | resize.cpp | 784 uint8x8x3_t v_dst0, v_dst1; in resizeAreaRounding() local 793 v_dst0.val[0] = areaDownsamplingDivision<opencv_like,2>(v_el0); in resizeAreaRounding() 794 v_dst0.val[1] = areaDownsamplingDivision<opencv_like,2>(v_el1); in resizeAreaRounding() 795 v_dst0.val[2] = areaDownsamplingDivision<opencv_like,2>(v_el2); in resizeAreaRounding() 814 v_dst.val[0] = vcombine_u8(v_dst0.val[0], v_dst1.val[0]); in resizeAreaRounding() 815 v_dst.val[1] = vcombine_u8(v_dst0.val[1], v_dst1.val[1]); in resizeAreaRounding() 816 v_dst.val[2] = vcombine_u8(v_dst0.val[2], v_dst1.val[2]); in resizeAreaRounding()
|
/dports/graphics/opencv/opencv-4.5.3/modules/imgproc/src/ |
H A D | resize.sse4_1.cpp | 201 __m128 v_dst0 = _mm_mul_ps(v_b0, _mm_loadu_ps(S0 + x)); in VResizeLanczos4Vec_32f16u_SSE41() local 202 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b1, _mm_loadu_ps(S1 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 203 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b2, _mm_loadu_ps(S2 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 204 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b3, _mm_loadu_ps(S3 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 205 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b4, _mm_loadu_ps(S4 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 206 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b5, _mm_loadu_ps(S5 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 207 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b6, _mm_loadu_ps(S6 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 208 v_dst0 = _mm_add_ps(v_dst0, _mm_mul_ps(v_b7, _mm_loadu_ps(S7 + x))); in VResizeLanczos4Vec_32f16u_SSE41() 219 __m128i v_dsti0 = _mm_cvtps_epi32(v_dst0); in VResizeLanczos4Vec_32f16u_SSE41()
|
H A D | blend.cpp | 120 store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3); in blendLinearSimd() 191 v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2); in blendLinearSimd() 212 v_float32 v_dst0, v_dst1, v_dst2, v_dst3; in blendLinearSimd() local 213 v_dst0 = blend(v_src10, v_src20, v_w10, v_w20); in blendLinearSimd() 218 store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3); in blendLinearSimd() 253 v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2); in blendLinearSimd() local 256 v_store_interleave(dst + x, v_dst0, v_dst1); in blendLinearSimd() 268 v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2); in blendLinearSimd() local 272 v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2); in blendLinearSimd() 284 v_float32 v_dst0 = blend(v_src10, v_src20, v_w1, v_w2); in blendLinearSimd() local [all …]
|
H A D | accum.simd.hpp | 589 v_dst0 = v_dst0 + v_src0; in acc_simd_() 644 v_dst0 = v_dst0 + v_src0; in acc_simd_() 770 v_dst0 = v_dst0 + v_src0; in acc_simd_() 805 v_dst0 = v_dst0 + v_src0; in acc_simd_() 880 v_dst0 = _mm256_add_pd(v_src0, v_dst0); in acc_simd_() 1374 v_dst0 = v_fma(v_src0, v_src0, v_dst0); in accSqr_simd_() 1410 v_dst0 = v_fma(v_src0, v_src0, v_dst0); in accSqr_simd_() 1515 v_dst0 = v_fma(v_src0, v_src0, v_dst0); in accSqr_simd_() 1553 v_dst0 = v_fma(v_src0, v_src0, v_dst0); in accSqr_simd_() 1645 v_dst0 = _mm256_add_pd(v_src0, v_dst0); in accSqr_simd_() [all …]
|
H A D | corner.avx.cpp | 171 __m256 v_dst0, v_dst1, v_dst2; in cornerEigenValsVecsLine_AVX() local 172 v_dst0 = _mm256_mul_ps(v_dx, v_dx); in cornerEigenValsVecsLine_AVX() 176 store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2); in cornerEigenValsVecsLine_AVX()
|
H A D | resize.cpp | 2240 … v_dst0 = vaddq_u16(v_dst0, vaddl_u8(vget_low_u8(v_row1.val[0]), vget_low_u8(v_row1.val[1]))); in operator ()() 2241 v_dst0 = vshrq_n_u16(vaddq_u16(v_dst0, v_2), 2); in operator ()() 2247 vst1q_u8(D, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1))); in operator ()() 2300 … v_dst0 = vaddq_u32(v_dst0, vaddl_u16(vget_low_u16(v_row1.val[0]), vget_low_u16(v_row1.val[1]))); in operator ()() 2301 v_dst0 = vshrq_n_u32(vaddq_u32(v_dst0, v_2), 2); in operator ()() 2307 vst1q_u16(D, vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))); in operator ()() 2350 … v_dst0 = vaddq_s32(v_dst0, vaddl_s16(vget_low_s16(v_row1.val[0]), vget_low_s16(v_row1.val[1]))); in operator ()() 2351 v_dst0 = vshrq_n_s32(vaddq_s32(v_dst0, v_2), 2); in operator ()() 2402 float32x4_t v_dst0 = vaddq_f32(v_row0.val[0], v_row0.val[1]); in operator ()() local 2405 vst1q_f32(D, vmulq_f32(vaddq_f32(v_dst0, v_dst1), v_025)); in operator ()() [all …]
|
H A D | corner.cpp | 292 v_float32x4 v_dst0, v_dst1, v_dst2; in cornerEigenValsVecs() local 293 v_dst0 = v_dx * v_dx; in cornerEigenValsVecs() 297 v_store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2); in cornerEigenValsVecs()
|
H A D | imgwarp.sse4_1.cpp | 63 __m128i v_dst0 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_loadu_ps(src1f + x)), in convertMaps_nninterpolate32f1c16s_SSE41() local 73 _mm_interleave_epi16(v_dst0, v_dst1, v_dst2, v_dst3); in convertMaps_nninterpolate32f1c16s_SSE41() 75 _mm_storeu_si128((__m128i *)(dst1 + x * 2), v_dst0); in convertMaps_nninterpolate32f1c16s_SSE41()
|
H A D | imgwarp.cpp | 1969 … v_int32x4 v_dst0 = v_muladd(v_scale3, (v_iy0 & v_mask), (v_ix0 & v_mask)); in convertMaps() local 1971 v_store(dst2 + x, v_pack_u(v_dst0, v_dst1)); in convertMaps()
|