1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2019 Google Inc. All rights reserved.
3 // http://ceres-solver.org/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 
31 #ifndef CERES_PUBLIC_COVARIANCE_H_
32 #define CERES_PUBLIC_COVARIANCE_H_
33 
34 #include <memory>
35 #include <utility>
36 #include <vector>
37 
38 #include "ceres/internal/disable_warnings.h"
39 #include "ceres/internal/port.h"
40 #include "ceres/types.h"
41 
42 namespace ceres {
43 
44 class Problem;
45 
46 namespace internal {
47 class CovarianceImpl;
48 }  // namespace internal
49 
50 // WARNING
51 // =======
52 // It is very easy to use this class incorrectly without understanding
53 // the underlying mathematics. Please read and understand the
54 // documentation completely before attempting to use it.
55 //
56 //
57 // This class allows the user to evaluate the covariance for a
58 // non-linear least squares problem and provides random access to its
59 // blocks
60 //
61 // Background
62 // ==========
63 // One way to assess the quality of the solution returned by a
64 // non-linear least squares solver is to analyze the covariance of the
65 // solution.
66 //
67 // Let us consider the non-linear regression problem
68 //
69 //   y = f(x) + N(0, I)
70 //
71 // i.e., the observation y is a random non-linear function of the
72 // independent variable x with mean f(x) and identity covariance. Then
73 // the maximum likelihood estimate of x given observations y is the
74 // solution to the non-linear least squares problem:
75 //
76 //  x* = arg min_x |f(x) - y|^2
77 //
78 // And the covariance of x* is given by
79 //
80 //  C(x*) = inverse[J'(x*)J(x*)]
81 //
82 // Here J(x*) is the Jacobian of f at x*. The above formula assumes
83 // that J(x*) has full column rank.
84 //
85 // If J(x*) is rank deficient, then the covariance matrix C(x*) is
86 // also rank deficient and is given by
87 //
88 //  C(x*) =  pseudoinverse[J'(x*)J(x*)]
89 //
90 // Note that in the above, we assumed that the covariance
91 // matrix for y was identity. This is an important assumption. If this
92 // is not the case and we have
93 //
94 //  y = f(x) + N(0, S)
95 //
96 // Where S is a positive semi-definite matrix denoting the covariance
97 // of y, then the maximum likelihood problem to be solved is
98 //
99 //  x* = arg min_x f'(x) inverse[S] f(x)
100 //
101 // and the corresponding covariance estimate of x* is given by
102 //
103 //  C(x*) = inverse[J'(x*) inverse[S] J(x*)]
104 //
105 // So, if it is the case that the observations being fitted to have a
106 // covariance matrix not equal to identity, then it is the user's
107 // responsibility that the corresponding cost functions are correctly
108 // scaled, e.g. in the above case the cost function for this problem
109 // should evaluate S^{-1/2} f(x) instead of just f(x), where S^{-1/2}
110 // is the inverse square root of the covariance matrix S.
111 //
112 // This class allows the user to evaluate the covariance for a
113 // non-linear least squares problem and provides random access to its
114 // blocks. The computation assumes that the CostFunctions compute
115 // residuals such that their covariance is identity.
116 //
117 // Since the computation of the covariance matrix requires computing
118 // the inverse of a potentially large matrix, this can involve a
119 // rather large amount of time and memory. However, it is usually the
120 // case that the user is only interested in a small part of the
121 // covariance matrix. Quite often just the block diagonal. This class
122 // allows the user to specify the parts of the covariance matrix that
123 // she is interested in and then uses this information to only compute
124 // and store those parts of the covariance matrix.
125 //
126 // Rank of the Jacobian
127 // --------------------
128 // As we noted above, if the jacobian is rank deficient, then the
129 // inverse of J'J is not defined and instead a pseudo inverse needs to
130 // be computed.
131 //
132 // The rank deficiency in J can be structural -- columns which are
133 // always known to be zero or numerical -- depending on the exact
134 // values in the Jacobian.
135 //
136 // Structural rank deficiency occurs when the problem contains
137 // parameter blocks that are constant. This class correctly handles
138 // structural rank deficiency like that.
139 //
140 // Numerical rank deficiency, where the rank of the matrix cannot be
141 // predicted by its sparsity structure and requires looking at its
142 // numerical values is more complicated. Here again there are two
143 // cases.
144 //
145 //   a. The rank deficiency arises from overparameterization. e.g., a
146 //   four dimensional quaternion used to parameterize SO(3), which is
147 //   a three dimensional manifold. In cases like this, the user should
148 //   use an appropriate LocalParameterization. Not only will this lead
149 //   to better numerical behaviour of the Solver, it will also expose
150 //   the rank deficiency to the Covariance object so that it can
151 //   handle it correctly.
152 //
153 //   b. More general numerical rank deficiency in the Jacobian
154 //   requires the computation of the so called Singular Value
155 //   Decomposition (SVD) of J'J. We do not know how to do this for
156 //   large sparse matrices efficiently. For small and moderate sized
157 //   problems this is done using dense linear algebra.
158 //
159 // Gauge Invariance
160 // ----------------
161 // In structure from motion (3D reconstruction) problems, the
162 // reconstruction is ambiguous up to a similarity transform. This is
163 // known as a Gauge Ambiguity. Handling Gauges correctly requires the
164 // use of SVD or custom inversion algorithms. For small problems the
165 // user can use the dense algorithm. For more details see
166 //
167 // Ken-ichi Kanatani, Daniel D. Morris: Gauges and gauge
168 // transformations for uncertainty description of geometric structure
169 // with indeterminacy. IEEE Transactions on Information Theory 47(5):
170 // 2017-2028 (2001)
171 //
172 // Example Usage
173 // =============
174 //
175 //  double x[3];
176 //  double y[2];
177 //
178 //  Problem problem;
179 //  problem.AddParameterBlock(x, 3);
180 //  problem.AddParameterBlock(y, 2);
181 //  <Build Problem>
182 //  <Solve Problem>
183 //
184 //  Covariance::Options options;
185 //  Covariance covariance(options);
186 //
187 //  std::vector<std::pair<const double*, const double*>> covariance_blocks;
188 //  covariance_blocks.push_back(make_pair(x, x));
189 //  covariance_blocks.push_back(make_pair(y, y));
190 //  covariance_blocks.push_back(make_pair(x, y));
191 //
192 //  CHECK(covariance.Compute(covariance_blocks, &problem));
193 //
194 //  double covariance_xx[3 * 3];
195 //  double covariance_yy[2 * 2];
196 //  double covariance_xy[3 * 2];
197 //  covariance.GetCovarianceBlock(x, x, covariance_xx)
198 //  covariance.GetCovarianceBlock(y, y, covariance_yy)
199 //  covariance.GetCovarianceBlock(x, y, covariance_xy)
200 //
201 class CERES_EXPORT Covariance {
202  public:
203   struct CERES_EXPORT Options {
204     // Sparse linear algebra library to use when a sparse matrix
205     // factorization is being used to compute the covariance matrix.
206     //
207     // Currently this only applies to SPARSE_QR.
208     SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type =
209 #if !defined(CERES_NO_SUITESPARSE)
210         SUITE_SPARSE;
211 #else
212         // Eigen's QR factorization is always available.
213         EIGEN_SPARSE;
214 #endif
215 
216     // Ceres supports two different algorithms for covariance
217     // estimation, which represent different tradeoffs in speed,
218     // accuracy and reliability.
219     //
220     // 1. DENSE_SVD uses Eigen's JacobiSVD to perform the
221     //    computations. It computes the singular value decomposition
222     //
223     //      U * D * V' = J
224     //
225     //    and then uses it to compute the pseudo inverse of J'J as
226     //
227     //      pseudoinverse[J'J] = V * pseudoinverse[D^2] * V'
228     //
229     //    It is an accurate but slow method and should only be used
230     //    for small to moderate sized problems. It can handle
231     //    full-rank as well as rank deficient Jacobians.
232     //
233     // 2. SPARSE_QR uses the sparse QR factorization algorithm
234     //    to compute the decomposition
235     //
236     //      Q * R = J
237     //
238     //    [J'J]^-1 = [R'*R]^-1
239     //
240     // SPARSE_QR is not capable of computing the covariance if the
241     // Jacobian is rank deficient. Depending on the value of
242     // Covariance::Options::sparse_linear_algebra_library_type, either
243     // Eigen's Sparse QR factorization algorithm will be used or
244     // SuiteSparse's high performance SuiteSparseQR algorithm will be
245     // used.
246     CovarianceAlgorithmType algorithm_type = SPARSE_QR;
247 
248     // If the Jacobian matrix is near singular, then inverting J'J
249     // will result in unreliable results, e.g, if
250     //
251     //   J = [1.0 1.0         ]
252     //       [1.0 1.0000001   ]
253     //
254     // which is essentially a rank deficient matrix, we have
255     //
256     //   inv(J'J) = [ 2.0471e+14  -2.0471e+14]
257     //              [-2.0471e+14   2.0471e+14]
258     //
259     // This is not a useful result. Therefore, by default
260     // Covariance::Compute will return false if a rank deficient
261     // Jacobian is encountered. How rank deficiency is detected
262     // depends on the algorithm being used.
263     //
264     // 1. DENSE_SVD
265     //
266     //      min_sigma / max_sigma < sqrt(min_reciprocal_condition_number)
267     //
268     //    where min_sigma and max_sigma are the minimum and maxiumum
269     //    singular values of J respectively.
270     //
271     // 2. SPARSE_QR
272     //
273     //      rank(J) < num_col(J)
274     //
275     //   Here rank(J) is the estimate of the rank of J returned by the
276     //   sparse QR factorization algorithm. It is a fairly reliable
277     //   indication of rank deficiency.
278     //
279     double min_reciprocal_condition_number = 1e-14;
280 
281     // When using DENSE_SVD, the user has more control in dealing with
282     // singular and near singular covariance matrices.
283     //
284     // As mentioned above, when the covariance matrix is near
285     // singular, instead of computing the inverse of J'J, the
286     // Moore-Penrose pseudoinverse of J'J should be computed.
287     //
288     // If J'J has the eigen decomposition (lambda_i, e_i), where
289     // lambda_i is the i^th eigenvalue and e_i is the corresponding
290     // eigenvector, then the inverse of J'J is
291     //
292     //   inverse[J'J] = sum_i e_i e_i' / lambda_i
293     //
294     // and computing the pseudo inverse involves dropping terms from
295     // this sum that correspond to small eigenvalues.
296     //
297     // How terms are dropped is controlled by
298     // min_reciprocal_condition_number and null_space_rank.
299     //
300     // If null_space_rank is non-negative, then the smallest
301     // null_space_rank eigenvalue/eigenvectors are dropped
302     // irrespective of the magnitude of lambda_i. If the ratio of the
303     // smallest non-zero eigenvalue to the largest eigenvalue in the
304     // truncated matrix is still below
305     // min_reciprocal_condition_number, then the Covariance::Compute()
306     // will fail and return false.
307     //
308     // Setting null_space_rank = -1 drops all terms for which
309     //
310     //   lambda_i / lambda_max < min_reciprocal_condition_number.
311     //
312     // This option has no effect on the SUITE_SPARSE_QR and
313     // EIGEN_SPARSE_QR algorithms.
314     int null_space_rank = 0;
315 
316     int num_threads = 1;
317 
318     // Even though the residual blocks in the problem may contain loss
319     // functions, setting apply_loss_function to false will turn off
320     // the application of the loss function to the output of the cost
321     // function and in turn its effect on the covariance.
322     //
323     // TODO(sameergaarwal): Expand this based on Jim's experiments.
324     bool apply_loss_function = true;
325   };
326 
327   explicit Covariance(const Options& options);
328   ~Covariance();
329 
330   // Compute a part of the covariance matrix.
331   //
332   // The vector covariance_blocks, indexes into the covariance matrix
333   // block-wise using pairs of parameter blocks. This allows the
334   // covariance estimation algorithm to only compute and store these
335   // blocks.
336   //
337   // Since the covariance matrix is symmetric, if the user passes
338   // (block1, block2), then GetCovarianceBlock can be called with
339   // block1, block2 as well as block2, block1.
340   //
341   // covariance_blocks cannot contain duplicates. Bad things will
342   // happen if they do.
343   //
344   // Note that the list of covariance_blocks is only used to determine
345   // what parts of the covariance matrix are computed. The full
346   // Jacobian is used to do the computation, i.e. they do not have an
347   // impact on what part of the Jacobian is used for computation.
348   //
349   // The return value indicates the success or failure of the
350   // covariance computation. Please see the documentation for
351   // Covariance::Options for more on the conditions under which this
352   // function returns false.
353   bool Compute(const std::vector<std::pair<const double*, const double*>>&
354                    covariance_blocks,
355                Problem* problem);
356 
357   // Compute a part of the covariance matrix.
358   //
359   // The vector parameter_blocks contains the parameter blocks that
360   // are used for computing the covariance matrix. From this vector
361   // all covariance pairs are generated. This allows the covariance
362   // estimation algorithm to only compute and store these blocks.
363   //
364   // parameter_blocks cannot contain duplicates. Bad things will
365   // happen if they do.
366   //
367   // Note that the list of covariance_blocks is only used to determine
368   // what parts of the covariance matrix are computed. The full
369   // Jacobian is used to do the computation, i.e. they do not have an
370   // impact on what part of the Jacobian is used for computation.
371   //
372   // The return value indicates the success or failure of the
373   // covariance computation. Please see the documentation for
374   // Covariance::Options for more on the conditions under which this
375   // function returns false.
376   bool Compute(const std::vector<const double*>& parameter_blocks,
377                Problem* problem);
378 
379   // Return the block of the cross-covariance matrix corresponding to
380   // parameter_block1 and parameter_block2.
381   //
382   // Compute must be called before the first call to
383   // GetCovarianceBlock and the pair <parameter_block1,
384   // parameter_block2> OR the pair <parameter_block2,
385   // parameter_block1> must have been present in the vector
386   // covariance_blocks when Compute was called. Otherwise
387   // GetCovarianceBlock will return false.
388   //
389   // covariance_block must point to a memory location that can store a
390   // parameter_block1_size x parameter_block2_size matrix. The
391   // returned covariance will be a row-major matrix.
392   bool GetCovarianceBlock(const double* parameter_block1,
393                           const double* parameter_block2,
394                           double* covariance_block) const;
395 
396   // Return the block of the cross-covariance matrix corresponding to
397   // parameter_block1 and parameter_block2.
398   // Returns cross-covariance in the tangent space if a local
399   // parameterization is associated with either parameter block;
400   // else returns cross-covariance in the ambient space.
401   //
402   // Compute must be called before the first call to
403   // GetCovarianceBlock and the pair <parameter_block1,
404   // parameter_block2> OR the pair <parameter_block2,
405   // parameter_block1> must have been present in the vector
406   // covariance_blocks when Compute was called. Otherwise
407   // GetCovarianceBlock will return false.
408   //
409   // covariance_block must point to a memory location that can store a
410   // parameter_block1_local_size x parameter_block2_local_size matrix. The
411   // returned covariance will be a row-major matrix.
412   bool GetCovarianceBlockInTangentSpace(const double* parameter_block1,
413                                         const double* parameter_block2,
414                                         double* covariance_block) const;
415 
416   // Return the covariance matrix corresponding to all parameter_blocks.
417   //
418   // Compute must be called before calling GetCovarianceMatrix and all
419   // parameter_blocks must have been present in the vector
420   // parameter_blocks when Compute was called. Otherwise
421   // GetCovarianceMatrix returns false.
422   //
423   // covariance_matrix must point to a memory location that can store
424   // the size of the covariance matrix. The covariance matrix will be
425   // a square matrix whose row and column count is equal to the sum of
426   // the sizes of the individual parameter blocks. The covariance
427   // matrix will be a row-major matrix.
428   bool GetCovarianceMatrix(const std::vector<const double*>& parameter_blocks,
429                            double* covariance_matrix) const;
430 
431   // Return the covariance matrix corresponding to parameter_blocks
432   // in the tangent space if a local parameterization is associated
433   // with one of the parameter blocks else returns the covariance
434   // matrix in the ambient space.
435   //
436   // Compute must be called before calling GetCovarianceMatrix and all
437   // parameter_blocks must have been present in the vector
438   // parameters_blocks when Compute was called. Otherwise
439   // GetCovarianceMatrix returns false.
440   //
441   // covariance_matrix must point to a memory location that can store
442   // the size of the covariance matrix. The covariance matrix will be
443   // a square matrix whose row and column count is equal to the sum of
444   // the sizes of the tangent spaces of the individual parameter
445   // blocks. The covariance matrix will be a row-major matrix.
446   bool GetCovarianceMatrixInTangentSpace(
447       const std::vector<const double*>& parameter_blocks,
448       double* covariance_matrix) const;
449 
450  private:
451   std::unique_ptr<internal::CovarianceImpl> impl_;
452 };
453 
454 }  // namespace ceres
455 
456 #include "ceres/internal/reenable_warnings.h"
457 
458 #endif  // CERES_PUBLIC_COVARIANCE_H_
459