1 // Copyright (c) 2007, 2008, 2009 libmv authors. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to 5 // deal in the Software without restriction, including without limitation the 6 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 // sell copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 // IN THE SOFTWARE. 20 // 21 // A simple implementation of Powell's dogleg nonlinear minimization. 22 // 23 // [1] K. Madsen, H. Nielsen, O. Tingleoff. Methods for Non-linear Least 24 // Squares Problems. 25 // http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf 26 // 27 // TODO(keir): Cite the Lourakis' dogleg paper. 28 29 #ifndef LIBMV_NUMERIC_DOGLEG_H 30 #define LIBMV_NUMERIC_DOGLEG_H 31 32 #include <cmath> 33 #include <cstdio> 34 35 #include "libmv/numeric/numeric.h" 36 #include "libmv/numeric/function_derivative.h" 37 #include "libmv/logging/logging.h" 38 39 namespace libmv { 40 41 template<typename Function, 42 typename Jacobian = NumericJacobian<Function>, 43 typename Solver = Eigen::PartialPivLU< 44 Matrix<typename Function::FMatrixType::RealScalar, 45 Function::XMatrixType::RowsAtCompileTime, 46 Function::XMatrixType::RowsAtCompileTime> > > 47 class Dogleg { 48 public: 49 typedef typename Function::XMatrixType::RealScalar Scalar; 50 typedef typename Function::FMatrixType FVec; 51 typedef typename Function::XMatrixType Parameters; 52 typedef Matrix<typename Function::FMatrixType::RealScalar, 53 Function::FMatrixType::RowsAtCompileTime, 54 Function::XMatrixType::RowsAtCompileTime> JMatrixType; 55 typedef Matrix<typename JMatrixType::RealScalar, 56 JMatrixType::ColsAtCompileTime, 57 JMatrixType::ColsAtCompileTime> AMatrixType; 58 59 enum Status { 60 RUNNING, 61 GRADIENT_TOO_SMALL, // eps > max(J'*f(x)) 62 RELATIVE_STEP_SIZE_TOO_SMALL, // eps > ||dx|| / ||x|| 63 TRUST_REGION_TOO_SMALL, // eps > radius / ||x|| 64 ERROR_TOO_SMALL, // eps > ||f(x)|| 65 HIT_MAX_ITERATIONS, 66 }; 67 68 enum Step { 69 DOGLEG, 70 GAUSS_NEWTON, 71 STEEPEST_DESCENT, 72 }; 73 Dogleg(const Function & f)74 Dogleg(const Function &f) 75 : f_(f), df_(f) {} 76 77 struct SolverParameters { SolverParametersSolverParameters78 SolverParameters() 79 : gradient_threshold(1e-16), 80 relative_step_threshold(1e-16), 81 error_threshold(1e-16), 82 initial_trust_radius(1e0), 83 max_iterations(500) {} 84 Scalar gradient_threshold; // eps > max(J'*f(x)) 85 Scalar relative_step_threshold; // eps > ||dx|| / ||x|| 86 Scalar error_threshold; // eps > ||f(x)|| 87 Scalar initial_trust_radius; // Initial u for solving normal equations. 88 int max_iterations; // Maximum number of solver iterations. 89 }; 90 91 struct Results { 92 Scalar error_magnitude; // ||f(x)|| 93 Scalar gradient_magnitude; // ||J'f(x)|| 94 int iterations; 95 Status status; 96 }; 97 Update(const Parameters & x,const SolverParameters & params,JMatrixType * J,AMatrixType * A,FVec * error,Parameters * g)98 Status Update(const Parameters &x, const SolverParameters ¶ms, 99 JMatrixType *J, AMatrixType *A, FVec *error, Parameters *g) { 100 *J = df_(x); 101 // TODO(keir): In the case of m = n, avoid computing A and just do J^-1 directly. 102 *A = (*J).transpose() * (*J); 103 *error = f_(x); 104 *g = (*J).transpose() * *error; 105 if (g->array().abs().maxCoeff() < params.gradient_threshold) { 106 return GRADIENT_TOO_SMALL; 107 } else if (error->array().abs().maxCoeff() < params.error_threshold) { 108 return ERROR_TOO_SMALL; 109 } 110 return RUNNING; 111 } 112 SolveDoglegDirection(const Parameters & dx_sd,const Parameters & dx_gn,Scalar radius,Scalar alpha,Parameters * dx_dl,Scalar * beta)113 Step SolveDoglegDirection(const Parameters &dx_sd, 114 const Parameters &dx_gn, 115 Scalar radius, 116 Scalar alpha, 117 Parameters *dx_dl, 118 Scalar *beta) { 119 Parameters a, b_minus_a; 120 // Solve for Dogleg step dx_dl. 121 if (dx_gn.norm() < radius) { 122 *dx_dl = dx_gn; 123 return GAUSS_NEWTON; 124 125 } else if (alpha * dx_sd.norm() > radius) { 126 *dx_dl = (radius / dx_sd.norm()) * dx_sd; 127 return STEEPEST_DESCENT; 128 129 } else { 130 Parameters a = alpha * dx_sd; 131 const Parameters &b = dx_gn; 132 b_minus_a = a - b; 133 Scalar Mbma2 = b_minus_a.squaredNorm(); 134 Scalar Ma2 = a.squaredNorm(); 135 Scalar c = a.dot(b_minus_a); 136 Scalar radius2 = radius*radius; 137 if (c <= 0) { 138 *beta = (-c + sqrt(c*c + Mbma2*(radius2 - Ma2)))/(Mbma2); 139 } else { 140 *beta = (radius2 - Ma2) / 141 (c + sqrt(c*c + Mbma2*(radius2 - Ma2))); 142 } 143 *dx_dl = alpha * dx_sd + (*beta) * (dx_gn - alpha*dx_sd); 144 return DOGLEG; 145 } 146 } 147 minimize(Parameters * x_and_min)148 Results minimize(Parameters *x_and_min) { 149 SolverParameters params; 150 return minimize(params, x_and_min); 151 } 152 minimize(const SolverParameters & params,Parameters * x_and_min)153 Results minimize(const SolverParameters ¶ms, Parameters *x_and_min) { 154 Parameters &x = *x_and_min; 155 JMatrixType J; 156 AMatrixType A; 157 FVec error; 158 Parameters g; 159 160 Results results; 161 results.status = Update(x, params, &J, &A, &error, &g); 162 163 Scalar radius = params.initial_trust_radius; 164 bool x_updated = true; 165 166 Parameters x_new; 167 Parameters dx_sd; // Steepest descent step. 168 Parameters dx_dl; // Dogleg step. 169 Parameters dx_gn; // Gauss-Newton step. 170 printf("iteration ||f(x)|| max(g) radius\n"); 171 int i = 0; 172 for (; results.status == RUNNING && i < params.max_iterations; ++i) { 173 printf("%9d %12g %12g %12g", 174 i, f_(x).norm(), g.array().abs().maxCoeff(), radius); 175 176 //LG << "iteration: " << i; 177 //LG << "||f(x)||: " << f_(x).norm(); 178 //LG << "max(g): " << g.cwise().abs().maxCoeff(); 179 //LG << "radius: " << radius; 180 // Eqn 3.19 from [1] 181 Scalar alpha = g.squaredNorm() / (J*g).squaredNorm(); 182 183 // Solve for steepest descent direction dx_sd. 184 dx_sd = -g; 185 186 // Solve for Gauss-Newton direction dx_gn. 187 if (x_updated) { 188 // TODO(keir): See Appendix B of [1] for discussion of when A is 189 // singular and there are many solutions. Solving that involves the SVD 190 // and is slower, but should still work. 191 Solver solver(A); 192 dx_gn = solver.solve(-g); 193 if (!(A * dx_gn).isApprox(-g)) { 194 LOG(ERROR) << "Failed to solve normal eqns. TODO: Solve via SVD."; 195 return results; 196 } 197 x_updated = false; 198 } 199 200 // Solve for dogleg direction dx_dl. 201 Scalar beta = 0; 202 Step step = SolveDoglegDirection(dx_sd, dx_gn, radius, alpha, 203 &dx_dl, &beta); 204 205 Scalar e3 = params.relative_step_threshold; 206 if (dx_dl.norm() < e3*(x.norm() + e3)) { 207 results.status = RELATIVE_STEP_SIZE_TOO_SMALL; 208 break; 209 } 210 211 x_new = x + dx_dl; 212 Scalar actual = f_(x).squaredNorm() - f_(x_new).squaredNorm(); 213 Scalar predicted = 0; 214 if (step == GAUSS_NEWTON) { 215 predicted = f_(x).squaredNorm(); 216 } else if (step == STEEPEST_DESCENT) { 217 predicted = radius * (2*alpha*g.norm() - radius) / 2 / alpha; 218 } else if (step == DOGLEG) { 219 predicted = 0.5 * alpha * (1-beta)*(1-beta)*g.squaredNorm() + 220 beta*(2-beta)*f_(x).squaredNorm(); 221 } 222 Scalar rho = actual / predicted; 223 224 if (step == GAUSS_NEWTON) printf(" GAUSS"); 225 if (step == STEEPEST_DESCENT) printf(" STEE"); 226 if (step == DOGLEG) printf(" DOGL"); 227 228 printf(" %12g %12g %12g\n", rho, actual, predicted); 229 230 if (rho > 0) { 231 // Accept update because the linear model is a good fit. 232 x = x_new; 233 results.status = Update(x, params, &J, &A, &error, &g); 234 x_updated = true; 235 } 236 if (rho > 0.75) { 237 radius = std::max(radius, 3*dx_dl.norm()); 238 } else if (rho < 0.25) { 239 radius /= 2; 240 if (radius < e3 * (x.norm() + e3)) { 241 results.status = TRUST_REGION_TOO_SMALL; 242 } 243 } 244 } 245 if (results.status == RUNNING) { 246 results.status = HIT_MAX_ITERATIONS; 247 } 248 results.error_magnitude = error.norm(); 249 results.gradient_magnitude = g.norm(); 250 results.iterations = i; 251 return results; 252 } 253 254 private: 255 const Function &f_; 256 Jacobian df_; 257 }; 258 259 } // namespace mv 260 261 #endif // LIBMV_NUMERIC_DOGLEG_H 262