1 // Ceres Solver - A fast non-linear least squares minimizer 2 // Copyright 2015 Google Inc. All rights reserved. 3 // http://ceres-solver.org/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are met: 7 // 8 // * Redistributions of source code must retain the above copyright notice, 9 // this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright notice, 11 // this list of conditions and the following disclaimer in the documentation 12 // and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors may be 14 // used to endorse or promote products derived from this software without 15 // specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 // 29 // Author: sameeragarwal@google.com (Sameer Agarwal) 30 31 #include <algorithm> 32 #include <cstring> 33 #include <vector> 34 35 #include "ceres/block_sparse_matrix.h" 36 #include "ceres/block_structure.h" 37 #include "ceres/internal/eigen.h" 38 #include "ceres/partitioned_matrix_view.h" 39 #include "ceres/small_blas.h" 40 #include "glog/logging.h" 41 42 namespace ceres { 43 namespace internal { 44 45 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 46 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: PartitionedMatrixView(const BlockSparseMatrix & matrix,int num_col_blocks_e)47 PartitionedMatrixView(const BlockSparseMatrix& matrix, int num_col_blocks_e) 48 : matrix_(matrix), num_col_blocks_e_(num_col_blocks_e) { 49 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 50 CHECK(bs != nullptr); 51 52 num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_; 53 54 // Compute the number of row blocks in E. The number of row blocks 55 // in E maybe less than the number of row blocks in the input matrix 56 // as some of the row blocks at the bottom may not have any 57 // e_blocks. For a definition of what an e_block is, please see 58 // explicit_schur_complement_solver.h 59 num_row_blocks_e_ = 0; 60 for (int r = 0; r < bs->rows.size(); ++r) { 61 const std::vector<Cell>& cells = bs->rows[r].cells; 62 if (cells[0].block_id < num_col_blocks_e_) { 63 ++num_row_blocks_e_; 64 } 65 } 66 67 // Compute the number of columns in E and F. 68 num_cols_e_ = 0; 69 num_cols_f_ = 0; 70 71 for (int c = 0; c < bs->cols.size(); ++c) { 72 const Block& block = bs->cols[c]; 73 if (c < num_col_blocks_e_) { 74 num_cols_e_ += block.size; 75 } else { 76 num_cols_f_ += block.size; 77 } 78 } 79 80 CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols()); 81 } 82 83 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 84 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: ~PartitionedMatrixView()85 ~PartitionedMatrixView() {} 86 87 // The next four methods don't seem to be particularly cache 88 // friendly. This is an artifact of how the BlockStructure of the 89 // input matrix is constructed. These methods will benefit from 90 // multithreading as well as improved data layout. 91 92 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 93 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: RightMultiplyE(const double * x,double * y)94 RightMultiplyE(const double* x, double* y) const { 95 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 96 97 // Iterate over the first num_row_blocks_e_ row blocks, and multiply 98 // by the first cell in each row block. 99 const double* values = matrix_.values(); 100 for (int r = 0; r < num_row_blocks_e_; ++r) { 101 const Cell& cell = bs->rows[r].cells[0]; 102 const int row_block_pos = bs->rows[r].block.position; 103 const int row_block_size = bs->rows[r].block.size; 104 const int col_block_id = cell.block_id; 105 const int col_block_pos = bs->cols[col_block_id].position; 106 const int col_block_size = bs->cols[col_block_id].size; 107 // clang-format off 108 MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>( 109 values + cell.position, row_block_size, col_block_size, 110 x + col_block_pos, 111 y + row_block_pos); 112 // clang-format on 113 } 114 } 115 116 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 117 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: RightMultiplyF(const double * x,double * y)118 RightMultiplyF(const double* x, double* y) const { 119 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 120 121 // Iterate over row blocks, and if the row block is in E, then 122 // multiply by all the cells except the first one which is of type 123 // E. If the row block is not in E (i.e its in the bottom 124 // num_row_blocks - num_row_blocks_e row blocks), then all the cells 125 // are of type F and multiply by them all. 126 const double* values = matrix_.values(); 127 for (int r = 0; r < num_row_blocks_e_; ++r) { 128 const int row_block_pos = bs->rows[r].block.position; 129 const int row_block_size = bs->rows[r].block.size; 130 const std::vector<Cell>& cells = bs->rows[r].cells; 131 for (int c = 1; c < cells.size(); ++c) { 132 const int col_block_id = cells[c].block_id; 133 const int col_block_pos = bs->cols[col_block_id].position; 134 const int col_block_size = bs->cols[col_block_id].size; 135 // clang-format off 136 MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>( 137 values + cells[c].position, row_block_size, col_block_size, 138 x + col_block_pos - num_cols_e_, 139 y + row_block_pos); 140 // clang-format on 141 } 142 } 143 144 for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { 145 const int row_block_pos = bs->rows[r].block.position; 146 const int row_block_size = bs->rows[r].block.size; 147 const std::vector<Cell>& cells = bs->rows[r].cells; 148 for (int c = 0; c < cells.size(); ++c) { 149 const int col_block_id = cells[c].block_id; 150 const int col_block_pos = bs->cols[col_block_id].position; 151 const int col_block_size = bs->cols[col_block_id].size; 152 // clang-format off 153 MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( 154 values + cells[c].position, row_block_size, col_block_size, 155 x + col_block_pos - num_cols_e_, 156 y + row_block_pos); 157 // clang-format on 158 } 159 } 160 } 161 162 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 163 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: LeftMultiplyE(const double * x,double * y)164 LeftMultiplyE(const double* x, double* y) const { 165 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 166 167 // Iterate over the first num_row_blocks_e_ row blocks, and multiply 168 // by the first cell in each row block. 169 const double* values = matrix_.values(); 170 for (int r = 0; r < num_row_blocks_e_; ++r) { 171 const Cell& cell = bs->rows[r].cells[0]; 172 const int row_block_pos = bs->rows[r].block.position; 173 const int row_block_size = bs->rows[r].block.size; 174 const int col_block_id = cell.block_id; 175 const int col_block_pos = bs->cols[col_block_id].position; 176 const int col_block_size = bs->cols[col_block_id].size; 177 // clang-format off 178 MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( 179 values + cell.position, row_block_size, col_block_size, 180 x + row_block_pos, 181 y + col_block_pos); 182 // clang-format on 183 } 184 } 185 186 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 187 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: LeftMultiplyF(const double * x,double * y)188 LeftMultiplyF(const double* x, double* y) const { 189 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 190 191 // Iterate over row blocks, and if the row block is in E, then 192 // multiply by all the cells except the first one which is of type 193 // E. If the row block is not in E (i.e its in the bottom 194 // num_row_blocks - num_row_blocks_e row blocks), then all the cells 195 // are of type F and multiply by them all. 196 const double* values = matrix_.values(); 197 for (int r = 0; r < num_row_blocks_e_; ++r) { 198 const int row_block_pos = bs->rows[r].block.position; 199 const int row_block_size = bs->rows[r].block.size; 200 const std::vector<Cell>& cells = bs->rows[r].cells; 201 for (int c = 1; c < cells.size(); ++c) { 202 const int col_block_id = cells[c].block_id; 203 const int col_block_pos = bs->cols[col_block_id].position; 204 const int col_block_size = bs->cols[col_block_id].size; 205 // clang-format off 206 MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>( 207 values + cells[c].position, row_block_size, col_block_size, 208 x + row_block_pos, 209 y + col_block_pos - num_cols_e_); 210 // clang-format on 211 } 212 } 213 214 for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { 215 const int row_block_pos = bs->rows[r].block.position; 216 const int row_block_size = bs->rows[r].block.size; 217 const std::vector<Cell>& cells = bs->rows[r].cells; 218 for (int c = 0; c < cells.size(); ++c) { 219 const int col_block_id = cells[c].block_id; 220 const int col_block_pos = bs->cols[col_block_id].position; 221 const int col_block_size = bs->cols[col_block_id].size; 222 // clang-format off 223 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( 224 values + cells[c].position, row_block_size, col_block_size, 225 x + row_block_pos, 226 y + col_block_pos - num_cols_e_); 227 // clang-format on 228 } 229 } 230 } 231 232 // Given a range of columns blocks of a matrix m, compute the block 233 // structure of the block diagonal of the matrix m(:, 234 // start_col_block:end_col_block)'m(:, start_col_block:end_col_block) 235 // and return a BlockSparseMatrix with the this block structure. The 236 // caller owns the result. 237 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 238 BlockSparseMatrix* 239 PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: CreateBlockDiagonalMatrixLayout(int start_col_block,int end_col_block)240 CreateBlockDiagonalMatrixLayout(int start_col_block, 241 int end_col_block) const { 242 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 243 CompressedRowBlockStructure* block_diagonal_structure = 244 new CompressedRowBlockStructure; 245 246 int block_position = 0; 247 int diagonal_cell_position = 0; 248 249 // Iterate over the column blocks, creating a new diagonal block for 250 // each column block. 251 for (int c = start_col_block; c < end_col_block; ++c) { 252 const Block& block = bs->cols[c]; 253 block_diagonal_structure->cols.push_back(Block()); 254 Block& diagonal_block = block_diagonal_structure->cols.back(); 255 diagonal_block.size = block.size; 256 diagonal_block.position = block_position; 257 258 block_diagonal_structure->rows.push_back(CompressedRow()); 259 CompressedRow& row = block_diagonal_structure->rows.back(); 260 row.block = diagonal_block; 261 262 row.cells.push_back(Cell()); 263 Cell& cell = row.cells.back(); 264 cell.block_id = c - start_col_block; 265 cell.position = diagonal_cell_position; 266 267 block_position += block.size; 268 diagonal_cell_position += block.size * block.size; 269 } 270 271 // Build a BlockSparseMatrix with the just computed block 272 // structure. 273 return new BlockSparseMatrix(block_diagonal_structure); 274 } 275 276 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 277 BlockSparseMatrix* PartitionedMatrixView<kRowBlockSize, 278 kEBlockSize, CreateBlockDiagonalEtE()279 kFBlockSize>::CreateBlockDiagonalEtE() 280 const { 281 BlockSparseMatrix* block_diagonal = 282 CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_); 283 UpdateBlockDiagonalEtE(block_diagonal); 284 return block_diagonal; 285 } 286 287 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 288 BlockSparseMatrix* PartitionedMatrixView<kRowBlockSize, 289 kEBlockSize, CreateBlockDiagonalFtF()290 kFBlockSize>::CreateBlockDiagonalFtF() 291 const { 292 BlockSparseMatrix* block_diagonal = CreateBlockDiagonalMatrixLayout( 293 num_col_blocks_e_, num_col_blocks_e_ + num_col_blocks_f_); 294 UpdateBlockDiagonalFtF(block_diagonal); 295 return block_diagonal; 296 } 297 298 // Similar to the code in RightMultiplyE, except instead of the matrix 299 // vector multiply its an outer product. 300 // 301 // block_diagonal = block_diagonal(E'E) 302 // 303 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 304 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: UpdateBlockDiagonalEtE(BlockSparseMatrix * block_diagonal)305 UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const { 306 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 307 const CompressedRowBlockStructure* block_diagonal_structure = 308 block_diagonal->block_structure(); 309 310 block_diagonal->SetZero(); 311 const double* values = matrix_.values(); 312 for (int r = 0; r < num_row_blocks_e_; ++r) { 313 const Cell& cell = bs->rows[r].cells[0]; 314 const int row_block_size = bs->rows[r].block.size; 315 const int block_id = cell.block_id; 316 const int col_block_size = bs->cols[block_id].size; 317 const int cell_position = 318 block_diagonal_structure->rows[block_id].cells[0].position; 319 320 // clang-format off 321 MatrixTransposeMatrixMultiply 322 <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( 323 values + cell.position, row_block_size, col_block_size, 324 values + cell.position, row_block_size, col_block_size, 325 block_diagonal->mutable_values() + cell_position, 326 0, 0, col_block_size, col_block_size); 327 // clang-format on 328 } 329 } 330 331 // Similar to the code in RightMultiplyF, except instead of the matrix 332 // vector multiply its an outer product. 333 // 334 // block_diagonal = block_diagonal(F'F) 335 // 336 template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> 337 void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>:: UpdateBlockDiagonalFtF(BlockSparseMatrix * block_diagonal)338 UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const { 339 const CompressedRowBlockStructure* bs = matrix_.block_structure(); 340 const CompressedRowBlockStructure* block_diagonal_structure = 341 block_diagonal->block_structure(); 342 343 block_diagonal->SetZero(); 344 const double* values = matrix_.values(); 345 for (int r = 0; r < num_row_blocks_e_; ++r) { 346 const int row_block_size = bs->rows[r].block.size; 347 const std::vector<Cell>& cells = bs->rows[r].cells; 348 for (int c = 1; c < cells.size(); ++c) { 349 const int col_block_id = cells[c].block_id; 350 const int col_block_size = bs->cols[col_block_id].size; 351 const int diagonal_block_id = col_block_id - num_col_blocks_e_; 352 const int cell_position = 353 block_diagonal_structure->rows[diagonal_block_id].cells[0].position; 354 355 // clang-format off 356 MatrixTransposeMatrixMultiply 357 <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( 358 values + cells[c].position, row_block_size, col_block_size, 359 values + cells[c].position, row_block_size, col_block_size, 360 block_diagonal->mutable_values() + cell_position, 361 0, 0, col_block_size, col_block_size); 362 // clang-format on 363 } 364 } 365 366 for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) { 367 const int row_block_size = bs->rows[r].block.size; 368 const std::vector<Cell>& cells = bs->rows[r].cells; 369 for (int c = 0; c < cells.size(); ++c) { 370 const int col_block_id = cells[c].block_id; 371 const int col_block_size = bs->cols[col_block_id].size; 372 const int diagonal_block_id = col_block_id - num_col_blocks_e_; 373 const int cell_position = 374 block_diagonal_structure->rows[diagonal_block_id].cells[0].position; 375 376 // clang-format off 377 MatrixTransposeMatrixMultiply 378 <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( 379 values + cells[c].position, row_block_size, col_block_size, 380 values + cells[c].position, row_block_size, col_block_size, 381 block_diagonal->mutable_values() + cell_position, 382 0, 0, col_block_size, col_block_size); 383 // clang-format on 384 } 385 } 386 } 387 388 } // namespace internal 389 } // namespace ceres 390