1 //////////////////////////////////////////////////////////////////////////// 2 // File: SparseBundleCPU.h 3 // Author: Changchang Wu (ccwu@cs.washington.edu) 4 // Description : interface of the CPU-version of multi-core bundle adjustment 5 // 6 // Copyright (c) 2011 Changchang Wu (ccwu@cs.washington.edu) 7 // and the University of Washington at Seattle 8 // 9 // This library is free software; you can redistribute it and/or 10 // modify it under the terms of the GNU General Public 11 // License as published by the Free Software Foundation; either 12 // Version 3 of the License, or (at your option) any later version. 13 // 14 // This library is distributed in the hope that it will be useful, 15 // but WITHOUT ANY WARRANTY; without even the implied warranty of 16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 // General Public License for more details. 18 // 19 //////////////////////////////////////////////////////////////////////////////// 20 21 #if !defined(SPARSE_BUNDLE_CPU_H) 22 #define SPARSE_BUNDLE_CPU_H 23 24 // BYTE-ALIGNMENT for data allocation (16 required for SSE, 32 required for AVX) 25 // PREVIOUS version uses only SSE. The new version will include AVX. 26 // SO the alignment is increased from 16 to 32 27 #define VECTOR_ALIGNMENT 32 28 #define FLOAT_ALIGN 8 29 #define VECTOR_ALIGNMENT_MASK (VECTOR_ALIGNMENT - 1) 30 #define ALIGN_PTR(p) \ 31 ((((size_t)p) + VECTOR_ALIGNMENT_MASK) & (~VECTOR_ALIGNMENT_MASK)) 32 33 namespace pba { 34 35 template <class Float> 36 class avec { 37 bool _owner; 38 Float* _data; 39 Float* _last; 40 size_t _size; 41 size_t _capacity; 42 43 public: allocate(size_t count)44 static Float* allocate(size_t count) { 45 size_t size = count * sizeof(Float); 46 #ifdef _MSC_VER 47 Float* p = (Float*)_aligned_malloc(size, VECTOR_ALIGNMENT); 48 if (p == NULL) throw std::bad_alloc(); 49 return p; 50 #else 51 char* p = (char*)malloc(size + VECTOR_ALIGNMENT + 4); 52 if (p == NULL) throw std::bad_alloc(); 53 char* p1 = p + 1; 54 char* p2 = 55 (char*)ALIGN_PTR(p1); //(char*) (((((size_t)p1) + 15) >> 4) << 4); 56 char* p3 = (p2 - 1); 57 p3[0] = (p2 - p); 58 return (Float*)p2; 59 #endif 60 } deallocate(void * p)61 static void deallocate(void* p) { 62 #ifdef _MSC_VER 63 _aligned_free(p); 64 #else 65 char* p3 = ((char*)p) - 1; 66 free(((char*)p) - p3[0]); 67 #endif 68 } 69 70 public: avec()71 avec() { 72 _owner = true; 73 _last = _data = NULL; 74 _size = _capacity = 0; 75 } avec(size_t count)76 avec(size_t count) { 77 _data = allocate(count); 78 _size = _capacity = count; 79 _last = _data + count; 80 _owner = true; 81 } ~avec()82 ~avec() { 83 if (_data && _owner) deallocate(_data); 84 } 85 resize(size_t newcount)86 inline void resize(size_t newcount) { 87 if (!_owner) { 88 _data = _last = NULL; 89 _capacity = _size = 0; 90 _owner = true; 91 } 92 if (newcount <= _capacity) { 93 _size = newcount; 94 _last = _data + newcount; 95 } else { 96 if (_data && _owner) deallocate(_data); 97 _data = allocate(newcount); 98 _size = _capacity = newcount; 99 _last = _data + newcount; 100 } 101 } 102 set(Float * data,size_t count)103 inline void set(Float* data, size_t count) { 104 if (_data && _owner) deallocate(_data); 105 _data = data; 106 _owner = false; 107 _size = count; 108 _last = _data + _size; 109 _capacity = count; 110 } swap(avec<Float> & next)111 inline void swap(avec<Float>& next) { 112 bool _owner_bak = _owner; 113 Float* _data_bak = _data; 114 Float* _last_bak = _last; 115 size_t _size_bak = _size; 116 size_t _capa_bak = _capacity; 117 118 _owner = next._owner; 119 _data = next._data; 120 _last = next._last; 121 _size = next._size; 122 _capacity = next._capacity; 123 124 next._owner = _owner_bak; 125 next._data = _data_bak; 126 next._last = _last_bak; 127 next._size = _size_bak; 128 next._capacity = _capa_bak; 129 } 130 131 inline operator Float*() { return _size ? _data : NULL; } 132 inline operator Float* const() const { return _data; } begin()133 inline Float* begin() { return _size ? _data : NULL; } data()134 inline Float* data() { return _size ? _data : NULL; } end()135 inline Float* end() { return _last; } begin()136 inline const Float* begin() const { return _size ? _data : NULL; } end()137 inline const Float* end() const { return _last; } size()138 inline size_t size() const { return _size; } IsValid()139 inline size_t IsValid() const { return _size; } 140 void SaveToFile(const char* name); 141 }; 142 143 template <class Float> 144 class SparseBundleCPU : public ParallelBA, public ConfigBA { 145 public: 146 SparseBundleCPU(const int num_threads); 147 148 typedef avec<Float> VectorF; 149 typedef std::vector<int> VectorI; 150 typedef float float_t; 151 152 protected: // cpu data 153 int _num_camera; 154 int _num_point; 155 int _num_imgpt; 156 CameraT* _camera_data; 157 float* _point_data; 158 159 //////////////////////////////// 160 const float* _imgpt_data; 161 const int* _camera_idx; 162 const int* _point_idx; 163 const int* _focal_mask; 164 165 ///////////sumed square error 166 float _projection_sse; 167 168 protected: // cuda data 169 VectorF _cuCameraData; 170 VectorF _cuCameraDataEX; 171 VectorF _cuPointData; 172 VectorF _cuPointDataEX; 173 VectorF _cuMeasurements; 174 VectorF _cuImageProj; 175 VectorF _cuJacobianCamera; 176 VectorF _cuJacobianPoint; 177 VectorF _cuJacobianCameraT; 178 VectorI _cuProjectionMap; 179 VectorI _cuPointMeasurementMap; 180 VectorI _cuCameraMeasurementMap; 181 VectorI _cuCameraMeasurementList; 182 VectorI _cuCameraMeasurementListT; 183 184 ////////////////////////// 185 VectorF _cuBlockPC; 186 VectorF _cuVectorSJ; 187 188 /// LM normal equation 189 VectorF _cuVectorJtE; 190 VectorF _cuVectorJJ; 191 VectorF _cuVectorJX; 192 VectorF _cuVectorXK; 193 VectorF _cuVectorPK; 194 VectorF _cuVectorZK; 195 VectorF _cuVectorRK; 196 197 ////////////////////////////////// 198 protected: 199 int _num_imgpt_q; 200 float _weight_q; 201 VectorI _cuCameraQList; 202 VectorI _cuCameraQMap; 203 VectorF _cuCameraQMapW; 204 VectorF _cuCameraQListW; 205 206 protected: 207 bool ProcessIndexCameraQ(std::vector<int>& qmap, std::vector<int>& qlist); 208 void ProcessWeightCameraQ(std::vector<int>& cpnum, std::vector<int>& qmap, 209 Float* qmapw, Float* qlistw); 210 211 protected: // internal functions 212 int ValidateInputData(); 213 int InitializeBundle(); 214 int GetParameterLength(); 215 void BundleAdjustment(); 216 void NormalizeData(); 217 void TransferDataToHost(); 218 void DenormalizeData(); 219 void NormalizeDataF(); 220 void NormalizeDataD(); 221 bool InitializeStorageForSFM(); 222 bool InitializeStorageForCG(); 223 224 void SaveBundleRecord(int iter, float res, float damping, float& g_norm, 225 float& g_inf); 226 227 protected: 228 void PrepareJacobianNormalization(); 229 void EvaluateJacobians(); 230 void ComputeJtE(VectorF& E, VectorF& JtE, int mode = 0); 231 void ComputeJX(VectorF& X, VectorF& JX, int mode = 0); 232 void ComputeDiagonal(VectorF& JJI); 233 void ComputeBlockPC(float lambda, bool dampd); 234 void ApplyBlockPC(VectorF& v, VectorF& pv, int mode = 0); 235 float UpdateCameraPoint(VectorF& dx, VectorF& cuImageTempProj); 236 float EvaluateProjection(VectorF& cam, VectorF& point, VectorF& proj); 237 float EvaluateProjectionX(VectorF& cam, VectorF& point, VectorF& proj); 238 float SaveUpdatedSystem(float residual_reduction, float dx_sqnorm, 239 float damping); 240 float EvaluateDeltaNorm(); 241 int SolveNormalEquationPCGB(float lambda); 242 int SolveNormalEquationPCGX(float lambda); 243 int SolveNormalEquation(float lambda); 244 void NonlinearOptimizeLM(); 245 void AdjustBundleAdjsutmentMode(); 246 void RunProfileSteps(); 247 void RunTestIterationLM(bool reduced); 248 void DumpCooJacobian(); 249 250 private: 251 static int FindProcessorCoreNum(); 252 253 public: AbortBundleAdjustment()254 virtual void AbortBundleAdjustment() { __abort_flag = true; } GetCurrentIteration()255 virtual int GetCurrentIteration() { return __current_iteration; } SetNextTimeBudget(int seconds)256 virtual void SetNextTimeBudget(int seconds) { 257 __bundle_time_budget = seconds; 258 } SetNextBundleMode(BundleModeT mode)259 virtual void SetNextBundleMode(BundleModeT mode) { 260 __bundle_mode_next = mode; 261 } SetFixedIntrinsics(bool fixed)262 virtual void SetFixedIntrinsics(bool fixed) { __fixed_intrinsics = fixed; } EnableRadialDistortion(DistortionT type)263 virtual void EnableRadialDistortion(DistortionT type) { 264 __use_radial_distortion = type; 265 } ParseParam(int narg,char ** argv)266 virtual void ParseParam(int narg, char** argv) { 267 ConfigBA::ParseParam(narg, argv); 268 } GetInternalConfig()269 virtual ConfigBA* GetInternalConfig() { return this; } 270 271 public: 272 SparseBundleCPU(); 273 virtual void SetCameraData(size_t ncam, CameraT* cams); 274 virtual void SetPointData(size_t npoint, Point3D* pts); 275 virtual void SetProjection(size_t nproj, const Point2D* imgpts, 276 const int* point_idx, const int* cam_idx); 277 virtual void SetFocalMask(const int* fmask, float weight); 278 virtual float GetMeanSquaredError(); 279 virtual int RunBundleAdjustment(); 280 }; 281 282 ParallelBA* NewSparseBundleCPU(bool dp, const int num_threads); 283 284 } // namespace pba 285 286 #endif 287