1 // Ceres Solver - A fast non-linear least squares minimizer
2 // Copyright 2015 Google Inc. All rights reserved.
3 // http://ceres-solver.org/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are met:
7 //
8 // * Redistributions of source code must retain the above copyright notice,
9 //   this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright notice,
11 //   this list of conditions and the following disclaimer in the documentation
12 //   and/or other materials provided with the distribution.
13 // * Neither the name of Google Inc. nor the names of its contributors may be
14 //   used to endorse or promote products derived from this software without
15 //   specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 //
29 // Author: sameeragarwal@google.com (Sameer Agarwal)
30 
31 #include <algorithm>
32 #include <cstring>
33 #include <vector>
34 
35 #include "ceres/block_sparse_matrix.h"
36 #include "ceres/block_structure.h"
37 #include "ceres/internal/eigen.h"
38 #include "ceres/partitioned_matrix_view.h"
39 #include "ceres/small_blas.h"
40 #include "glog/logging.h"
41 
42 namespace ceres {
43 namespace internal {
44 
45 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
46 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
PartitionedMatrixView(const BlockSparseMatrix & matrix,int num_col_blocks_e)47     PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e)
48     : matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) {
49   const CompressedRowBlockStructure* bs = matrix_.block_structure();
50   CHECK(bs != nullptr);
51 
52   num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
53 
54   // Compute the number of row blocks in E. The number of row blocks
55   // in E maybe less than the number of row blocks in the input matrix
56   // as some of the row blocks at the bottom may not have any
57   // e_blocks. For a definition of what an e_block is, please see
58   // explicit_schur_complement_solver.h
59   num_row_blocks_e_ = 0;
60   for (int r = 0; r < bs->rows.size(); ++r) {
61     const std::vector<Cell>& cells = bs->rows[r].cells;
62     if (cells[0].block_id < num_col_blocks_e_) {
63       ++num_row_blocks_e_;
64     }
65   }
66 
67   // Compute the number of columns in E and F.
68   num_cols_e_ = 0;
69   num_cols_f_ = 0;
70 
71   for (int c = 0; c < bs->cols.size(); ++c) {
72     const Block& block = bs->cols[c];
73     if (c < num_col_blocks_e_) {
74       num_cols_e_ += block.size;
75     } else {
76       num_cols_f_ += block.size;
77     }
78   }
79 
80   CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
81 }
82 
83 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
84 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
~PartitionedMatrixView()85     ~PartitionedMatrixView() {}
86 
87 // The next four methods don't seem to be particularly cache
88 // friendly. This is an artifact of how the BlockStructure of the
89 // input matrix is constructed. These methods will benefit from
90 // multithreading as well as improved data layout.
91 
92 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
93 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyE(const double * x,double * y)94     RightMultiplyE(const double* x, double* y) const {
95   const CompressedRowBlockStructure* bs = matrix_.block_structure();
96 
97   // Iterate over the first num_row_blocks_e_ row blocks, and multiply
98   // by the first cell in each row block.
99   const double* values = matrix_.values();
100   for (int r = 0; r < num_row_blocks_e_; ++r) {
101     const Cell& cell = bs->rows[r].cells[0];
102     const int row_block_pos = bs->rows[r].block.position;
103     const int row_block_size = bs->rows[r].block.size;
104     const int col_block_id = cell.block_id;
105     const int col_block_pos = bs->cols[col_block_id].position;
106     const int col_block_size = bs->cols[col_block_id].size;
107     // clang-format off
108     MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
109         values + cell.position, row_block_size, col_block_size,
110         x + col_block_pos,
111         y + row_block_pos);
112     // clang-format on
113   }
114 }
115 
116 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
117 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
RightMultiplyF(const double * x,double * y)118     RightMultiplyF(const double* x, double* y) const {
119   const CompressedRowBlockStructure* bs = matrix_.block_structure();
120 
121   // Iterate over row blocks, and if the row block is in E, then
122   // multiply by all the cells except the first one which is of type
123   // E. If the row block is not in E (i.e its in the bottom
124   // num_row_blocks - num_row_blocks_e row blocks), then all the cells
125   // are of type F and multiply by them all.
126   const double* values = matrix_.values();
127   for (int r = 0; r < num_row_blocks_e_; ++r) {
128     const int row_block_pos = bs->rows[r].block.position;
129     const int row_block_size = bs->rows[r].block.size;
130     const std::vector<Cell>& cells = bs->rows[r].cells;
131     for (int c = 1; c < cells.size(); ++c) {
132       const int col_block_id = cells[c].block_id;
133       const int col_block_pos = bs->cols[col_block_id].position;
134       const int col_block_size = bs->cols[col_block_id].size;
135       // clang-format off
136       MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
137           values + cells[c].position, row_block_size, col_block_size,
138           x + col_block_pos - num_cols_e_,
139           y + row_block_pos);
140       // clang-format on
141     }
142   }
143 
144   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
145     const int row_block_pos = bs->rows[r].block.position;
146     const int row_block_size = bs->rows[r].block.size;
147     const std::vector<Cell>& cells = bs->rows[r].cells;
148     for (int c = 0; c < cells.size(); ++c) {
149       const int col_block_id = cells[c].block_id;
150       const int col_block_pos = bs->cols[col_block_id].position;
151       const int col_block_size = bs->cols[col_block_id].size;
152       // clang-format off
153       MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
154           values + cells[c].position, row_block_size, col_block_size,
155           x + col_block_pos - num_cols_e_,
156           y + row_block_pos);
157       // clang-format on
158     }
159   }
160 }
161 
162 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
163 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyE(const double * x,double * y)164     LeftMultiplyE(const double* x, double* y) const {
165   const CompressedRowBlockStructure* bs = matrix_.block_structure();
166 
167   // Iterate over the first num_row_blocks_e_ row blocks, and multiply
168   // by the first cell in each row block.
169   const double* values = matrix_.values();
170   for (int r = 0; r < num_row_blocks_e_; ++r) {
171     const Cell& cell = bs->rows[r].cells[0];
172     const int row_block_pos = bs->rows[r].block.position;
173     const int row_block_size = bs->rows[r].block.size;
174     const int col_block_id = cell.block_id;
175     const int col_block_pos = bs->cols[col_block_id].position;
176     const int col_block_size = bs->cols[col_block_id].size;
177     // clang-format off
178     MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
179         values + cell.position, row_block_size, col_block_size,
180         x + row_block_pos,
181         y + col_block_pos);
182     // clang-format on
183   }
184 }
185 
186 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
187 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
LeftMultiplyF(const double * x,double * y)188     LeftMultiplyF(const double* x, double* y) const {
189   const CompressedRowBlockStructure* bs = matrix_.block_structure();
190 
191   // Iterate over row blocks, and if the row block is in E, then
192   // multiply by all the cells except the first one which is of type
193   // E. If the row block is not in E (i.e its in the bottom
194   // num_row_blocks - num_row_blocks_e row blocks), then all the cells
195   // are of type F and multiply by them all.
196   const double* values = matrix_.values();
197   for (int r = 0; r < num_row_blocks_e_; ++r) {
198     const int row_block_pos = bs->rows[r].block.position;
199     const int row_block_size = bs->rows[r].block.size;
200     const std::vector<Cell>& cells = bs->rows[r].cells;
201     for (int c = 1; c < cells.size(); ++c) {
202       const int col_block_id = cells[c].block_id;
203       const int col_block_pos = bs->cols[col_block_id].position;
204       const int col_block_size = bs->cols[col_block_id].size;
205       // clang-format off
206       MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
207         values + cells[c].position, row_block_size, col_block_size,
208         x + row_block_pos,
209         y + col_block_pos - num_cols_e_);
210       // clang-format on
211     }
212   }
213 
214   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
215     const int row_block_pos = bs->rows[r].block.position;
216     const int row_block_size = bs->rows[r].block.size;
217     const std::vector<Cell>& cells = bs->rows[r].cells;
218     for (int c = 0; c < cells.size(); ++c) {
219       const int col_block_id = cells[c].block_id;
220       const int col_block_pos = bs->cols[col_block_id].position;
221       const int col_block_size = bs->cols[col_block_id].size;
222       // clang-format off
223       MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
224         values + cells[c].position, row_block_size, col_block_size,
225         x + row_block_pos,
226         y + col_block_pos - num_cols_e_);
227       // clang-format on
228     }
229   }
230 }
231 
232 // Given a range of columns blocks of a matrix m, compute the block
233 // structure of the block diagonal of the matrix m(:,
234 // start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
235 // and return a BlockSparseMatrix with the this block structure. The
236 // caller owns the result.
237 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
238 BlockSparseMatrix*
239 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
CreateBlockDiagonalMatrixLayout(int start_col_block,int end_col_block)240     CreateBlockDiagonalMatrixLayout(int start_col_block,
241                                     int end_col_block) const {
242   const CompressedRowBlockStructure* bs = matrix_.block_structure();
243   CompressedRowBlockStructure* block_diagonal_structure =
244       new CompressedRowBlockStructure;
245 
246   int block_position = 0;
247   int diagonal_cell_position = 0;
248 
249   // Iterate over the column blocks, creating a new diagonal block for
250   // each column block.
251   for (int c = start_col_block; c < end_col_block; ++c) {
252     const Block& block = bs->cols[c];
253     block_diagonal_structure->cols.push_back(Block());
254     Block& diagonal_block = block_diagonal_structure->cols.back();
255     diagonal_block.size = block.size;
256     diagonal_block.position = block_position;
257 
258     block_diagonal_structure->rows.push_back(CompressedRow());
259     CompressedRow& row = block_diagonal_structure->rows.back();
260     row.block = diagonal_block;
261 
262     row.cells.push_back(Cell());
263     Cell& cell = row.cells.back();
264     cell.block_id = c - start_col_block;
265     cell.position = diagonal_cell_position;
266 
267     block_position += block.size;
268     diagonal_cell_position += block.size * block.size;
269   }
270 
271   // Build a BlockSparseMatrix with the just computed block
272   // structure.
273   return new BlockSparseMatrix(block_diagonal_structure);
274 }
275 
276 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
277 BlockSparseMatrix* PartitionedMatrixView<kRowBlockSize,
278                                          kEBlockSize,
CreateBlockDiagonalEtE()279                                          kFBlockSize>::CreateBlockDiagonalEtE()
280     const {
281   BlockSparseMatrix* block_diagonal =
282       CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_);
283   UpdateBlockDiagonalEtE(block_diagonal);
284   return block_diagonal;
285 }
286 
287 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
288 BlockSparseMatrix* PartitionedMatrixView<kRowBlockSize,
289                                          kEBlockSize,
CreateBlockDiagonalFtF()290                                          kFBlockSize>::CreateBlockDiagonalFtF()
291     const {
292   BlockSparseMatrix* block_diagonal = CreateBlockDiagonalMatrixLayout(
293       num_col_blocks_e_, num_col_blocks_e_ + num_col_blocks_f_);
294   UpdateBlockDiagonalFtF(block_diagonal);
295   return block_diagonal;
296 }
297 
298 // Similar to the code in RightMultiplyE, except instead of the matrix
299 // vector multiply its an outer product.
300 //
301 //    block_diagonal = block_diagonal(E'E)
302 //
303 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
304 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalEtE(BlockSparseMatrix * block_diagonal)305     UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
306   const CompressedRowBlockStructure* bs = matrix_.block_structure();
307   const CompressedRowBlockStructure* block_diagonal_structure =
308       block_diagonal->block_structure();
309 
310   block_diagonal->SetZero();
311   const double* values = matrix_.values();
312   for (int r = 0; r < num_row_blocks_e_; ++r) {
313     const Cell& cell = bs->rows[r].cells[0];
314     const int row_block_size = bs->rows[r].block.size;
315     const int block_id = cell.block_id;
316     const int col_block_size = bs->cols[block_id].size;
317     const int cell_position =
318         block_diagonal_structure->rows[block_id].cells[0].position;
319 
320     // clang-format off
321     MatrixTransposeMatrixMultiply
322         <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
323             values + cell.position, row_block_size, col_block_size,
324             values + cell.position, row_block_size, col_block_size,
325             block_diagonal->mutable_values() + cell_position,
326             0, 0, col_block_size, col_block_size);
327     // clang-format on
328   }
329 }
330 
331 // Similar to the code in RightMultiplyF, except instead of the matrix
332 // vector multiply its an outer product.
333 //
334 //   block_diagonal = block_diagonal(F'F)
335 //
336 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
337 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
UpdateBlockDiagonalFtF(BlockSparseMatrix * block_diagonal)338     UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
339   const CompressedRowBlockStructure* bs = matrix_.block_structure();
340   const CompressedRowBlockStructure* block_diagonal_structure =
341       block_diagonal->block_structure();
342 
343   block_diagonal->SetZero();
344   const double* values = matrix_.values();
345   for (int r = 0; r < num_row_blocks_e_; ++r) {
346     const int row_block_size = bs->rows[r].block.size;
347     const std::vector<Cell>& cells = bs->rows[r].cells;
348     for (int c = 1; c < cells.size(); ++c) {
349       const int col_block_id = cells[c].block_id;
350       const int col_block_size = bs->cols[col_block_id].size;
351       const int diagonal_block_id = col_block_id - num_col_blocks_e_;
352       const int cell_position =
353           block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
354 
355       // clang-format off
356       MatrixTransposeMatrixMultiply
357           <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
358               values + cells[c].position, row_block_size, col_block_size,
359               values + cells[c].position, row_block_size, col_block_size,
360               block_diagonal->mutable_values() + cell_position,
361               0, 0, col_block_size, col_block_size);
362       // clang-format on
363     }
364   }
365 
366   for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
367     const int row_block_size = bs->rows[r].block.size;
368     const std::vector<Cell>& cells = bs->rows[r].cells;
369     for (int c = 0; c < cells.size(); ++c) {
370       const int col_block_id = cells[c].block_id;
371       const int col_block_size = bs->cols[col_block_id].size;
372       const int diagonal_block_id = col_block_id - num_col_blocks_e_;
373       const int cell_position =
374           block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
375 
376       // clang-format off
377       MatrixTransposeMatrixMultiply
378           <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
379               values + cells[c].position, row_block_size, col_block_size,
380               values + cells[c].position, row_block_size, col_block_size,
381               block_diagonal->mutable_values() + cell_position,
382               0, 0, col_block_size, col_block_size);
383       // clang-format on
384     }
385   }
386 }
387 
388 }  // namespace internal
389 }  // namespace ceres
390