/dports/graphics/opencv/opencv-4.5.3/3rdparty/carotene/src/ |
H A D | convolution.cpp | 209 int32x4_t v_dst0 = v_zero_s32, v_dst1 = v_zero_s32; in convolution() local 225 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[8]); in convolution() 226 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[7]); in convolution() 227 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[6]); in convolution() 244 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[5]); in convolution() 245 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[4]); in convolution() 246 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[3]); in convolution() 263 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[2]); in convolution() 264 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[1]); in convolution() 265 v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[0]); in convolution() [all …]
|
H A D | accumulate.cpp | 69 int16x8_t v_dst1 = vld1q_s16(dst + j + 8); in accumulate() local 73 v_dst1 = vqaddq_s16(v_dst1, v_src1); in accumulate() 75 vst1q_s16(dst + j + 8, v_dst1); in accumulate() 121 int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8); in accumulateSquareConst() local 131 …v_dst1 = vcombine_s16(vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srclo, v_srclo), shift), vget_l… in accumulateSquareConst() 135 vst1q_s16(dst + j + 8, v_dst1); in accumulateSquareConst() 174 int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8); in accumulateSquareConst() local 184 … v_dst1 = vcombine_s16(vqmovn_s32(vaddw_s16(vmull_s16(v_srclo, v_srclo), vget_low_s16(v_dst1))), in accumulateSquareConst() 188 vst1q_s16(dst + j + 8, v_dst1); in accumulateSquareConst() 327 uint16x8_t v_dst1 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(v_dst0f)), in operator ()() local [all …]
|
H A D | sub.cpp | 281 int16x8_t v_dst1 = vqsubq_s16(v_src01, v_src11); in sub() local 283 vst1q_s16(dst + j + 8, v_dst1); in sub() 307 int16x8_t v_dst1 = vsubq_s16(v_src01, v_src11); in sub() local 309 vst1q_s16(dst + j + 8, v_dst1); in sub() 364 int16x8_t v_dst1 = vqsubq_s16(v_src01, v_src11); in sub() local 366 vst1q_s16(dst + j + 8, v_dst1); in sub() 390 int16x8_t v_dst1 = vsubq_s16(v_src01, v_src11); in sub() local 392 vst1q_s16(dst + j + 8, v_dst1); in sub()
|
H A D | mul.cpp | 85 uint16x8_t v_dst1 = vmull_u8(vget_high_u8(v_src0), vget_high_u8(v_src1)); in mulu8() local 88 v_dst1 = vshrq_n_u16(v_dst1, shift); in mulu8() 90 vst1q_u8(dst + j, vcombine_u8(vqmovn_u16(v_dst0), vqmovn_u16(v_dst1))); in mulu8() 113 uint16x8_t v_dst1 = vmull_u8(vget_high_u8(v_src0), vget_high_u8(v_src1)); in mulu8() local 116 v_dst1 = vshrq_n_u16(v_dst1, shift); in mulu8() 118 vst1q_u8(dst + j, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1))); in mulu8() 163 uint16x8_t v_dst1 = vmull_u8(vget_high_u8(v_src0), vget_high_u8(v_src1)); in muls16() local 166 v_dst1 = vshrq_n_u16(v_dst1, shift); in muls16() 196 v_dst1 = vshrq_n_u16(v_dst1, shift); in muls16() 199 vst1q_s16(dst + j + 8, vreinterpretq_s16_u16(v_dst1)); in muls16() [all …]
|
H A D | convert_depth.cpp | 69 int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src))); in lshiftConst() local 72 vst1q_s16(dst + j + 8, vshlq_n_s16(v_dst1, shift)); in lshiftConst() 106 int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src))); in lshiftConst() local 109 vst1q_s16(dst + j + 8, v_dst1); in lshiftConst()
|
H A D | bitwise.cpp | 134 uint8x16_t v_dst0 = vmvnq_u8(v_src0), v_dst1 = vmvnq_u8(v_src1); in bitwiseNot() local 136 vst1q_u8(dst + j + 16, v_dst1); in bitwiseNot()
|
H A D | add.cpp | 254 int16x8_t v_dst1 = vqaddq_s16(v_src01, v_src11); in add() local 256 vst1q_s16(dst + j + 8, v_dst1); in add() 280 int16x8_t v_dst1 = vaddq_s16(v_src01, v_src11); in add() local 282 vst1q_s16(dst + j + 8, v_dst1); in add()
|
H A D | cmp.cpp | 163 uvec128 v_dst1; in vcompare() local 166 op(v_src01, v_src11, v_dst1); in vcompare() 168 vnst(dst + x, v_dst0, v_dst1); in vcompare()
|
H A D | remap.cpp | 162 uint16x4_t v_dst1 = vmovn_u32(vcvtq_u32_f32(v_dst)); in remapLinearReplicate() local 165 vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1))); in remapLinearReplicate() 260 uint16x4_t v_dst1 = vmovn_u32(vcvtq_u32_f32(v_dst)); in remapLinearConst() local 263 vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1))); in remapLinearConst()
|
H A D | threshold.cpp | 157 uint8x16_t v_dst1 = vandq_u8(vcgeq_u8(v_src1, v_lower), vcleq_u8(v_src1, v_upper)); in thresholdRange() local 159 vst1q_u8(dst + j + 16, v_dst1); in thresholdRange() 193 uint8x16_t v_dst1 = vandq_u8(vcgeq_u8(v_src1, v_lower), vcleq_u8(v_src1, v_upper)); in thresholdRange() local 195 v_dst1 = vbslq_u8(v_dst1, vtrue_value, vfalse_value); in thresholdRange() 197 vst1q_u8(dst + j + 16, v_dst1); in thresholdRange()
|
H A D | resize.cpp | 784 uint8x8x3_t v_dst0, v_dst1; in resizeAreaRounding() local 808 v_dst1.val[0] = areaDownsamplingDivision<opencv_like,2>(v_el0); in resizeAreaRounding() 809 v_dst1.val[1] = areaDownsamplingDivision<opencv_like,2>(v_el1); in resizeAreaRounding() 810 v_dst1.val[2] = areaDownsamplingDivision<opencv_like,2>(v_el2); in resizeAreaRounding() 814 v_dst.val[0] = vcombine_u8(v_dst0.val[0], v_dst1.val[0]); in resizeAreaRounding() 815 v_dst.val[1] = vcombine_u8(v_dst0.val[1], v_dst1.val[1]); in resizeAreaRounding() 816 v_dst.val[2] = vcombine_u8(v_dst0.val[2], v_dst1.val[2]); in resizeAreaRounding()
|
H A D | colorconvert.cpp | 72 uint32x4_t v_dst1 = vmull_u16(vget_high_u16(v_g), v_g2y); in convertToGray() local 75 v_dst1 = vmlal_u16(v_dst1, vget_high_u16(v_r), v_r2y); in convertToGray() 78 v_dst1 = vmlal_u16(v_dst1, vget_high_u16(v_b), v_b2y); in convertToGray() 81 vrshrn_n_u32(v_dst1, SHIFT))); in convertToGray()
|
/dports/graphics/opencv/opencv-4.5.3/modules/imgproc/src/ |
H A D | resize.sse4_1.cpp | 210 __m128 v_dst1 = _mm_mul_ps(v_b0, _mm_loadu_ps(S0 + x + 4)); in VResizeLanczos4Vec_32f16u_SSE41() local 211 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b1, _mm_loadu_ps(S1 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 212 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b2, _mm_loadu_ps(S2 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 213 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b3, _mm_loadu_ps(S3 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 214 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b4, _mm_loadu_ps(S4 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 215 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b5, _mm_loadu_ps(S5 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 216 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b6, _mm_loadu_ps(S6 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 217 v_dst1 = _mm_add_ps(v_dst1, _mm_mul_ps(v_b7, _mm_loadu_ps(S7 + x + 4))); in VResizeLanczos4Vec_32f16u_SSE41() 220 __m128i v_dsti1 = _mm_cvtps_epi32(v_dst1); in VResizeLanczos4Vec_32f16u_SSE41()
|
H A D | blend.cpp | 120 store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3); in blendLinearSimd() 191 v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2); in blendLinearSimd() 212 v_float32 v_dst0, v_dst1, v_dst2, v_dst3; in blendLinearSimd() local 214 v_dst1 = blend(v_src11, v_src21, v_w11, v_w21); in blendLinearSimd() 218 store_pack_f32tou8(dst + x, v_dst0, v_dst1, v_dst2, v_dst3); in blendLinearSimd() 254 v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2); in blendLinearSimd() local 256 v_store_interleave(dst + x, v_dst0, v_dst1); in blendLinearSimd() 269 v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2); in blendLinearSimd() local 272 v_store_interleave(dst + x, v_dst0, v_dst1, v_dst2); in blendLinearSimd() 285 v_float32 v_dst1 = blend(v_src11, v_src21, v_w1, v_w2); in blendLinearSimd() local [all …]
|
H A D | accum.simd.hpp | 590 v_dst1 = v_dst1 + v_src1; in acc_simd_() 645 v_dst1 = v_dst1 + v_src1; in acc_simd_() 771 v_dst1 = v_dst1 + v_src1; in acc_simd_() 806 v_dst1 = v_dst1 + v_src1; in acc_simd_() 881 v_dst1 = _mm256_add_pd(v_src1, v_dst1); in acc_simd_() 1375 v_dst1 = v_fma(v_src1, v_src1, v_dst1); in accSqr_simd_() 1411 v_dst1 = v_fma(v_src1, v_src1, v_dst1); in accSqr_simd_() 1516 v_dst1 = v_fma(v_src1, v_src1, v_dst1); in accSqr_simd_() 1554 v_dst1 = v_fma(v_src1, v_src1, v_dst1); in accSqr_simd_() 1646 v_dst1 = _mm256_add_pd(v_src1, v_dst1); in accSqr_simd_() [all …]
|
H A D | corner.avx.cpp | 171 __m256 v_dst0, v_dst1, v_dst2; in cornerEigenValsVecsLine_AVX() local 173 v_dst1 = _mm256_mul_ps(v_dx, v_dy); in cornerEigenValsVecsLine_AVX() 176 store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2); in cornerEigenValsVecsLine_AVX()
|
H A D | imgwarp.sse4_1.cpp | 65 __m128i v_dst1 = _mm_packs_epi32(_mm_cvtps_epi32(_mm_loadu_ps(src1f + x + 8)), in convertMaps_nninterpolate32f1c16s_SSE41() local 73 _mm_interleave_epi16(v_dst0, v_dst1, v_dst2, v_dst3); in convertMaps_nninterpolate32f1c16s_SSE41() 76 _mm_storeu_si128((__m128i *)(dst1 + x * 2 + 8), v_dst1); in convertMaps_nninterpolate32f1c16s_SSE41() 155 __m128i v_dst1 = _mm_packs_epi32(_mm_srai_epi32(v_src0, INTER_BITS), in convertMaps_32f2c16s_SSE41() local 157 _mm_storeu_si128((__m128i *)(dst1 + x * 2), v_dst1); in convertMaps_32f2c16s_SSE41()
|
H A D | resize.cpp | 2244 … v_dst1 = vaddq_u16(v_dst1, vaddl_u8(vget_high_u8(v_row1.val[0]), vget_high_u8(v_row1.val[1]))); in operator ()() 2245 v_dst1 = vshrq_n_u16(vaddq_u16(v_dst1, v_2), 2); in operator ()() 2247 vst1q_u8(D, vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1))); in operator ()() 2304 … v_dst1 = vaddq_u32(v_dst1, vaddl_u16(vget_high_u16(v_row1.val[0]), vget_high_u16(v_row1.val[1]))); in operator ()() 2305 v_dst1 = vshrq_n_u32(vaddq_u32(v_dst1, v_2), 2); in operator ()() 2307 vst1q_u16(D, vcombine_u16(vmovn_u32(v_dst0), vmovn_u32(v_dst1))); in operator ()() 2354 … v_dst1 = vaddq_s32(v_dst1, vaddl_s16(vget_high_s16(v_row1.val[0]), vget_high_s16(v_row1.val[1]))); in operator ()() 2355 v_dst1 = vshrq_n_s32(vaddq_s32(v_dst1, v_2), 2); in operator ()() 2403 float32x4_t v_dst1 = vaddq_f32(v_row1.val[0], v_row1.val[1]); in operator ()() local 2405 vst1q_f32(D, vmulq_f32(vaddq_f32(v_dst0, v_dst1), v_025)); in operator ()() [all …]
|
H A D | corner.cpp | 292 v_float32x4 v_dst0, v_dst1, v_dst2; in cornerEigenValsVecs() local 294 v_dst1 = v_dx * v_dy; in cornerEigenValsVecs() 297 v_store_interleave(cov_data + j * 3, v_dst0, v_dst1, v_dst2); in cornerEigenValsVecs()
|
H A D | imgwarp.cpp | 1970 … v_int32x4 v_dst1 = v_muladd(v_scale3, (v_iy1 & v_mask), (v_ix1 & v_mask)); in convertMaps() local 1971 v_store(dst2 + x, v_pack_u(v_dst0, v_dst1)); in convertMaps() 2078 v_float32x4 v_dst1 = CV_COMPUTE_MAP_X(v_src0[0], v_fxy1); in convertMaps() local 2080 v_store(dst1f + x, v_dst1); in convertMaps() 2083 v_dst1 = CV_COMPUTE_MAP_X(v_src0[1], v_fxy2); in convertMaps() 2085 v_store(dst1f + x + span, v_dst1); in convertMaps()
|
/dports/graphics/opencv/opencv-4.5.3/modules/core/src/ |
H A D | copy.cpp | 198 v_dst1 = vx_load(dst + x), v_dst2 = vx_load(dst + x + v_uint16::nlanes); in copyMask_() local 204 v_dst1 = v_select(v_reinterpret_as_u16(v_nmask1), v_dst1, v_src1); in copyMask_() 206 v_store(dst + x, v_dst1); in copyMask_()
|