1 /* 2 * By downloading, copying, installing or using the software you agree to this license. 3 * If you do not agree to this license, do not download, install, 4 * copy or use the software. 5 * 6 * 7 * License Agreement 8 * For Open Source Computer Vision Library 9 * (3-clause BSD License) 10 * 11 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved. 12 * Third party copyrights are property of their respective owners. 13 * 14 * Redistribution and use in source and binary forms, with or without modification, 15 * are permitted provided that the following conditions are met: 16 * 17 * * Redistributions of source code must retain the above copyright notice, 18 * this list of conditions and the following disclaimer. 19 * 20 * * Redistributions in binary form must reproduce the above copyright notice, 21 * this list of conditions and the following disclaimer in the documentation 22 * and/or other materials provided with the distribution. 23 * 24 * * Neither the names of the copyright holders nor the names of the contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * This software is provided by the copyright holders and contributors "as is" and 29 * any express or implied warranties, including, but not limited to, the implied 30 * warranties of merchantability and fitness for a particular purpose are disclaimed. 31 * In no event shall copyright holders or contributors be liable for any direct, 32 * indirect, incidental, special, exemplary, or consequential damages 33 * (including, but not limited to, procurement of substitute goods or services; 34 * loss of use, data, or profits; or business interruption) however caused 35 * and on any theory of liability, whether in contract, strict liability, 36 * or tort (including negligence or otherwise) arising in any way out of 37 * the use of this software, even if advised of the possibility of such damage. 38 */ 39 40 #include "common.hpp" 41 #include "vtransform.hpp" 42 43 namespace CAROTENE_NS { 44 45 #ifdef CAROTENE_NEON 46 47 namespace { 48 49 using namespace internal; 50 51 template <typename T> struct TypeTraits; 52 template <> struct TypeTraits< u8> { typedef u16 wide; typedef u8 unsign; typedef uint8x16_t vec128; }; 53 template <> struct TypeTraits< s8> { typedef s16 wide; typedef u8 unsign; typedef int8x16_t vec128; }; 54 template <> struct TypeTraits<u16> { typedef u32 wide; typedef u8 narrow; typedef u16 unsign; typedef uint16x8_t vec128; }; 55 template <> struct TypeTraits<s16> { typedef s32 wide; typedef s8 narrow; typedef u16 unsign; typedef int16x8_t vec128; }; 56 template <> struct TypeTraits<u32> { typedef u64 wide; typedef u16 narrow; typedef u32 unsign; typedef uint32x4_t vec128; }; 57 template <> struct TypeTraits<s32> { typedef s64 wide; typedef s16 narrow; typedef u32 unsign; typedef int32x4_t vec128; }; 58 template <> struct TypeTraits<f32> { typedef f64 wide; typedef float32x4_t vec128; }; 59 60 template <typename T> struct wAdd 61 { 62 typedef T type; 63 64 f32 alpha, beta, gamma; 65 typedef typename TypeTraits<T>::wide wtype; 66 wAdd<wtype> wideAdd; wAddCAROTENE_NS::__anone3ae816c0111::wAdd67 wAdd(f32 _alpha, f32 _beta, f32 _gamma): 68 alpha(_alpha), beta(_beta), gamma(_gamma), 69 wideAdd(_alpha, _beta, _gamma) {} 70 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd71 void operator() (const typename VecTraits<T>::vec128 & v_src0, 72 const typename VecTraits<T>::vec128 & v_src1, 73 typename VecTraits<T>::vec128 & v_dst) const 74 { 75 typename VecTraits<wtype>::vec128 vrl, vrh; 76 wideAdd(vmovl( vget_low(v_src0)), vmovl( vget_low(v_src1)), vrl); 77 wideAdd(vmovl(vget_high(v_src0)), vmovl(vget_high(v_src1)), vrh); 78 79 v_dst = vcombine(vqmovn(vrl), vqmovn(vrh)); 80 } 81 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd82 void operator() (const typename VecTraits<T>::vec64 & v_src0, 83 const typename VecTraits<T>::vec64 & v_src1, 84 typename VecTraits<T>::vec64 & v_dst) const 85 { 86 typename VecTraits<wtype>::vec128 vr; 87 wideAdd(vmovl(v_src0), vmovl(v_src1), vr); 88 89 v_dst = vqmovn(vr); 90 } 91 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd92 void operator() (const T * src0, const T * src1, T * dst) const 93 { 94 dst[0] = saturate_cast<T>(alpha*src0[0] + beta*src1[0] + gamma); 95 } 96 }; 97 98 template <> struct wAdd<s32> 99 { 100 typedef s32 type; 101 102 f32 alpha, beta, gamma; 103 float32x4_t valpha, vbeta, vgamma; wAddCAROTENE_NS::__anone3ae816c0111::wAdd104 wAdd(f32 _alpha, f32 _beta, f32 _gamma): 105 alpha(_alpha), beta(_beta), gamma(_gamma) 106 { 107 valpha = vdupq_n_f32(_alpha); 108 vbeta = vdupq_n_f32(_beta); 109 vgamma = vdupq_n_f32(_gamma + 0.5); 110 } 111 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd112 void operator() (const typename VecTraits<s32>::vec128 & v_src0, 113 const typename VecTraits<s32>::vec128 & v_src1, 114 typename VecTraits<s32>::vec128 & v_dst) const 115 { 116 float32x4_t vs1 = vcvtq_f32_s32(v_src0); 117 float32x4_t vs2 = vcvtq_f32_s32(v_src1); 118 119 vs1 = vmlaq_f32(vgamma, vs1, valpha); 120 vs1 = vmlaq_f32(vs1, vs2, vbeta); 121 v_dst = vcvtq_s32_f32(vs1); 122 } 123 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd124 void operator() (const typename VecTraits<s32>::vec64 & v_src0, 125 const typename VecTraits<s32>::vec64 & v_src1, 126 typename VecTraits<s32>::vec64 & v_dst) const 127 { 128 float32x2_t vs1 = vcvt_f32_s32(v_src0); 129 float32x2_t vs2 = vcvt_f32_s32(v_src1); 130 131 vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha)); 132 vs1 = vmla_f32(vs1, vs2, vget_low(vbeta)); 133 v_dst = vcvt_s32_f32(vs1); 134 } 135 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd136 void operator() (const s32 * src0, const s32 * src1, s32 * dst) const 137 { 138 dst[0] = saturate_cast<s32>(alpha*src0[0] + beta*src1[0] + gamma); 139 } 140 }; 141 142 template <> struct wAdd<u32> 143 { 144 typedef u32 type; 145 146 f32 alpha, beta, gamma; 147 float32x4_t valpha, vbeta, vgamma; wAddCAROTENE_NS::__anone3ae816c0111::wAdd148 wAdd(f32 _alpha, f32 _beta, f32 _gamma): 149 alpha(_alpha), beta(_beta), gamma(_gamma) 150 { 151 valpha = vdupq_n_f32(_alpha); 152 vbeta = vdupq_n_f32(_beta); 153 vgamma = vdupq_n_f32(_gamma + 0.5); 154 } 155 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd156 void operator() (const typename VecTraits<u32>::vec128 & v_src0, 157 const typename VecTraits<u32>::vec128 & v_src1, 158 typename VecTraits<u32>::vec128 & v_dst) const 159 { 160 float32x4_t vs1 = vcvtq_f32_u32(v_src0); 161 float32x4_t vs2 = vcvtq_f32_u32(v_src1); 162 163 vs1 = vmlaq_f32(vgamma, vs1, valpha); 164 vs1 = vmlaq_f32(vs1, vs2, vbeta); 165 v_dst = vcvtq_u32_f32(vs1); 166 } 167 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd168 void operator() (const typename VecTraits<u32>::vec64 & v_src0, 169 const typename VecTraits<u32>::vec64 & v_src1, 170 typename VecTraits<u32>::vec64 & v_dst) const 171 { 172 float32x2_t vs1 = vcvt_f32_u32(v_src0); 173 float32x2_t vs2 = vcvt_f32_u32(v_src1); 174 175 vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha)); 176 vs1 = vmla_f32(vs1, vs2, vget_low(vbeta)); 177 v_dst = vcvt_u32_f32(vs1); 178 } 179 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd180 void operator() (const u32 * src0, const u32 * src1, u32 * dst) const 181 { 182 dst[0] = saturate_cast<u32>(alpha*src0[0] + beta*src1[0] + gamma); 183 } 184 }; 185 186 template <> struct wAdd<f32> 187 { 188 typedef f32 type; 189 190 f32 alpha, beta, gamma; 191 float32x4_t valpha, vbeta, vgamma; wAddCAROTENE_NS::__anone3ae816c0111::wAdd192 wAdd(f32 _alpha, f32 _beta, f32 _gamma): 193 alpha(_alpha), beta(_beta), gamma(_gamma) 194 { 195 valpha = vdupq_n_f32(_alpha); 196 vbeta = vdupq_n_f32(_beta); 197 vgamma = vdupq_n_f32(_gamma + 0.5); 198 } 199 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd200 void operator() (const typename VecTraits<f32>::vec128 & v_src0, 201 const typename VecTraits<f32>::vec128 & v_src1, 202 typename VecTraits<f32>::vec128 & v_dst) const 203 { 204 float32x4_t vs1 = vmlaq_f32(vgamma, v_src0, valpha); 205 v_dst = vmlaq_f32(vs1, v_src1, vbeta); 206 } 207 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd208 void operator() (const typename VecTraits<f32>::vec64 & v_src0, 209 const typename VecTraits<f32>::vec64 & v_src1, 210 typename VecTraits<f32>::vec64 & v_dst) const 211 { 212 float32x2_t vs1 = vmla_f32(vget_low(vgamma), v_src0, vget_low(valpha)); 213 v_dst = vmla_f32(vs1, v_src1, vget_low(vbeta)); 214 215 } 216 operator ()CAROTENE_NS::__anone3ae816c0111::wAdd217 void operator() (const f32 * src0, const f32 * src1, f32 * dst) const 218 { 219 dst[0] = alpha*src0[0] + beta*src1[0] + gamma; 220 } 221 }; 222 223 } // namespace 224 225 #define IMPL_ADDWEIGHTED(type) \ 226 void addWeighted(const Size2D &size, \ 227 const type * src0Base, ptrdiff_t src0Stride, \ 228 const type * src1Base, ptrdiff_t src1Stride, \ 229 type * dstBase, ptrdiff_t dstStride, \ 230 f32 alpha, f32 beta, f32 gamma) \ 231 { \ 232 internal::assertSupportedConfiguration(); \ 233 wAdd<type> wgtAdd(alpha, \ 234 beta, \ 235 gamma); \ 236 internal::vtransform(size, \ 237 src0Base, src0Stride, \ 238 src1Base, src1Stride, \ 239 dstBase, dstStride, \ 240 wgtAdd); \ 241 } 242 243 #else 244 245 #define IMPL_ADDWEIGHTED(type) \ 246 void addWeighted(const Size2D &, \ 247 const type *, ptrdiff_t, \ 248 const type *, ptrdiff_t, \ 249 type *, ptrdiff_t, \ 250 f32, f32, f32) \ 251 { \ 252 internal::assertSupportedConfiguration(); \ 253 } 254 255 #endif 256 257 IMPL_ADDWEIGHTED(u8) 258 IMPL_ADDWEIGHTED(s8) 259 IMPL_ADDWEIGHTED(u16) 260 IMPL_ADDWEIGHTED(s16) 261 IMPL_ADDWEIGHTED(u32) 262 IMPL_ADDWEIGHTED(s32) 263 IMPL_ADDWEIGHTED(f32) 264 265 } // namespace CAROTENE_NS 266