1 // Copyright (c) 2007, 2008, 2009 libmv authors.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to
5 // deal in the Software without restriction, including without limitation the
6 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 // sell copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 // IN THE SOFTWARE.
20 //
21 // A simple implementation of Powell's dogleg nonlinear minimization.
22 //
23 // [1] K. Madsen, H. Nielsen, O. Tingleoff. Methods for Non-linear Least
24 // Squares Problems.
25 // http://www2.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf
26 //
27 // TODO(keir): Cite the Lourakis' dogleg paper.
28 
29 #ifndef LIBMV_NUMERIC_DOGLEG_H
30 #define LIBMV_NUMERIC_DOGLEG_H
31 
32 #include <cmath>
33 #include <cstdio>
34 
35 #include "libmv/numeric/numeric.h"
36 #include "libmv/numeric/function_derivative.h"
37 #include "libmv/logging/logging.h"
38 
39 namespace libmv {
40 
41 template<typename Function,
42          typename Jacobian = NumericJacobian<Function>,
43          typename Solver = Eigen::PartialPivLU<
44            Matrix<typename Function::FMatrixType::RealScalar,
45                   Function::XMatrixType::RowsAtCompileTime,
46                   Function::XMatrixType::RowsAtCompileTime> > >
47 class Dogleg {
48  public:
49   typedef typename Function::XMatrixType::RealScalar Scalar;
50   typedef typename Function::FMatrixType FVec;
51   typedef typename Function::XMatrixType Parameters;
52   typedef Matrix<typename Function::FMatrixType::RealScalar,
53                  Function::FMatrixType::RowsAtCompileTime,
54                  Function::XMatrixType::RowsAtCompileTime> JMatrixType;
55   typedef Matrix<typename JMatrixType::RealScalar,
56                  JMatrixType::ColsAtCompileTime,
57                  JMatrixType::ColsAtCompileTime> AMatrixType;
58 
59   enum Status {
60     RUNNING,
61     GRADIENT_TOO_SMALL,            // eps > max(J'*f(x))
62     RELATIVE_STEP_SIZE_TOO_SMALL,  // eps > ||dx|| / ||x||
63     TRUST_REGION_TOO_SMALL,        // eps > radius / ||x||
64     ERROR_TOO_SMALL,               // eps > ||f(x)||
65     HIT_MAX_ITERATIONS,
66   };
67 
68   enum Step {
69     DOGLEG,
70     GAUSS_NEWTON,
71     STEEPEST_DESCENT,
72   };
73 
Dogleg(const Function & f)74   Dogleg(const Function &f)
75       : f_(f), df_(f) {}
76 
77   struct SolverParameters {
SolverParametersSolverParameters78     SolverParameters()
79        : gradient_threshold(1e-16),
80          relative_step_threshold(1e-16),
81          error_threshold(1e-16),
82          initial_trust_radius(1e0),
83          max_iterations(500) {}
84     Scalar gradient_threshold;       // eps > max(J'*f(x))
85     Scalar relative_step_threshold;  // eps > ||dx|| / ||x||
86     Scalar error_threshold;          // eps > ||f(x)||
87     Scalar initial_trust_radius;     // Initial u for solving normal equations.
88     int    max_iterations;           // Maximum number of solver iterations.
89   };
90 
91   struct Results {
92     Scalar error_magnitude;     // ||f(x)||
93     Scalar gradient_magnitude;  // ||J'f(x)||
94     int    iterations;
95     Status status;
96   };
97 
Update(const Parameters & x,const SolverParameters & params,JMatrixType * J,AMatrixType * A,FVec * error,Parameters * g)98   Status Update(const Parameters &x, const SolverParameters &params,
99                 JMatrixType *J, AMatrixType *A, FVec *error, Parameters *g) {
100     *J = df_(x);
101     // TODO(keir): In the case of m = n, avoid computing A and just do J^-1 directly.
102     *A = (*J).transpose() * (*J);
103     *error = f_(x);
104     *g = (*J).transpose() * *error;
105     if (g->array().abs().maxCoeff() < params.gradient_threshold) {
106       return GRADIENT_TOO_SMALL;
107     } else if (error->array().abs().maxCoeff() < params.error_threshold) {
108       return ERROR_TOO_SMALL;
109     }
110     return RUNNING;
111   }
112 
SolveDoglegDirection(const Parameters & dx_sd,const Parameters & dx_gn,Scalar radius,Scalar alpha,Parameters * dx_dl,Scalar * beta)113   Step SolveDoglegDirection(const Parameters &dx_sd,
114                             const Parameters &dx_gn,
115                             Scalar radius,
116                             Scalar alpha,
117                             Parameters *dx_dl,
118                             Scalar *beta) {
119     Parameters a, b_minus_a;
120     // Solve for Dogleg step dx_dl.
121     if (dx_gn.norm() < radius) {
122       *dx_dl = dx_gn;
123       return GAUSS_NEWTON;
124 
125     } else if (alpha * dx_sd.norm() > radius) {
126       *dx_dl = (radius / dx_sd.norm()) * dx_sd;
127       return STEEPEST_DESCENT;
128 
129     } else {
130       Parameters a = alpha * dx_sd;
131       const Parameters &b = dx_gn;
132       b_minus_a = a - b;
133       Scalar Mbma2 = b_minus_a.squaredNorm();
134       Scalar Ma2 = a.squaredNorm();
135       Scalar c = a.dot(b_minus_a);
136       Scalar radius2 = radius*radius;
137       if (c <= 0) {
138         *beta = (-c + sqrt(c*c + Mbma2*(radius2 - Ma2)))/(Mbma2);
139       } else {
140         *beta = (radius2 - Ma2) /
141                (c + sqrt(c*c + Mbma2*(radius2 - Ma2)));
142       }
143       *dx_dl = alpha * dx_sd + (*beta) * (dx_gn - alpha*dx_sd);
144       return DOGLEG;
145     }
146   }
147 
minimize(Parameters * x_and_min)148   Results minimize(Parameters *x_and_min) {
149     SolverParameters params;
150     return minimize(params, x_and_min);
151   }
152 
minimize(const SolverParameters & params,Parameters * x_and_min)153   Results minimize(const SolverParameters &params, Parameters *x_and_min) {
154     Parameters &x = *x_and_min;
155     JMatrixType J;
156     AMatrixType A;
157     FVec error;
158     Parameters g;
159 
160     Results results;
161     results.status = Update(x, params, &J, &A, &error, &g);
162 
163     Scalar radius = params.initial_trust_radius;
164     bool x_updated = true;
165 
166     Parameters x_new;
167     Parameters dx_sd;  // Steepest descent step.
168     Parameters dx_dl;  // Dogleg step.
169     Parameters dx_gn;  // Gauss-Newton step.
170       printf("iteration     ||f(x)||      max(g)       radius\n");
171     int i = 0;
172     for (; results.status == RUNNING && i < params.max_iterations; ++i) {
173       printf("%9d %12g %12g %12g",
174           i, f_(x).norm(), g.array().abs().maxCoeff(), radius);
175 
176       //LG << "iteration: " << i;
177       //LG << "||f(x)||: " << f_(x).norm();
178       //LG << "max(g): " << g.cwise().abs().maxCoeff();
179       //LG << "radius: " << radius;
180       // Eqn 3.19 from [1]
181       Scalar alpha = g.squaredNorm() / (J*g).squaredNorm();
182 
183       // Solve for steepest descent direction dx_sd.
184       dx_sd = -g;
185 
186       // Solve for Gauss-Newton direction dx_gn.
187       if (x_updated) {
188         // TODO(keir): See Appendix B of [1] for discussion of when A is
189         // singular and there are many solutions. Solving that involves the SVD
190         // and is slower, but should still work.
191         Solver solver(A);
192         dx_gn = solver.solve(-g);
193         if (!(A * dx_gn).isApprox(-g)) {
194           LOG(ERROR) << "Failed to solve normal eqns. TODO: Solve via SVD.";
195           return results;
196         }
197         x_updated = false;
198       }
199 
200       // Solve for dogleg direction dx_dl.
201       Scalar beta = 0;
202       Step step = SolveDoglegDirection(dx_sd, dx_gn, radius, alpha,
203                                        &dx_dl, &beta);
204 
205       Scalar e3 = params.relative_step_threshold;
206       if (dx_dl.norm() < e3*(x.norm() + e3)) {
207         results.status = RELATIVE_STEP_SIZE_TOO_SMALL;
208         break;
209       }
210 
211       x_new = x + dx_dl;
212       Scalar actual = f_(x).squaredNorm() - f_(x_new).squaredNorm();
213       Scalar predicted = 0;
214       if (step == GAUSS_NEWTON) {
215         predicted = f_(x).squaredNorm();
216       } else if (step == STEEPEST_DESCENT) {
217         predicted = radius * (2*alpha*g.norm() - radius) / 2 / alpha;
218       } else if (step == DOGLEG) {
219         predicted = 0.5 * alpha * (1-beta)*(1-beta)*g.squaredNorm() +
220                     beta*(2-beta)*f_(x).squaredNorm();
221       }
222       Scalar rho = actual / predicted;
223 
224       if (step == GAUSS_NEWTON) printf("  GAUSS");
225       if (step == STEEPEST_DESCENT) printf("   STEE");
226       if (step == DOGLEG) printf("   DOGL");
227 
228       printf(" %12g %12g %12g\n", rho, actual, predicted);
229 
230       if (rho > 0) {
231         // Accept update because the linear model is a good fit.
232         x = x_new;
233         results.status = Update(x, params, &J, &A, &error, &g);
234         x_updated = true;
235       }
236       if (rho > 0.75) {
237         radius = std::max(radius, 3*dx_dl.norm());
238       } else if (rho < 0.25) {
239         radius /= 2;
240         if (radius < e3 * (x.norm() + e3)) {
241           results.status = TRUST_REGION_TOO_SMALL;
242         }
243       }
244     }
245     if (results.status == RUNNING) {
246       results.status = HIT_MAX_ITERATIONS;
247     }
248     results.error_magnitude = error.norm();
249     results.gradient_magnitude = g.norm();
250     results.iterations = i;
251     return results;
252   }
253 
254  private:
255   const Function &f_;
256   Jacobian df_;
257 };
258 
259 }  // namespace mv
260 
261 #endif  // LIBMV_NUMERIC_DOGLEG_H
262