1 // This is core/vnl/algo/vnl_sparse_lm.cxx
2 //:
3 // \file
4 // \author Matt Leotta (Brown)
5 // \date April 14, 2005
6 //
7 //-----------------------------------------------------------------------------
8
9 #include <iostream>
10 #include <iomanip>
11 #include <algorithm>
12 #include "vnl_sparse_lm.h"
13 #include <vnl/vnl_fastops.h>
14 #include <vnl/vnl_vector_ref.h>
15 #include <vnl/vnl_crs_index.h>
16 #include <vnl/vnl_sparse_lst_sqr_function.h>
17
18 #include <vnl/algo/vnl_cholesky.h>
19 #include <vnl/algo/vnl_svd.h>
20
21
22 //: Initialize with the function object that is to be minimized.
vnl_sparse_lm(vnl_sparse_lst_sqr_function & f)23 vnl_sparse_lm::vnl_sparse_lm(vnl_sparse_lst_sqr_function& f)
24 : num_a_(f.number_of_a()),
25 num_b_(f.number_of_b()),
26 num_e_(f.number_of_e()),
27 num_nz_(f.residual_indices().num_non_zero()),
28 size_a_(f.index_a(num_a_)),
29 size_b_(f.index_b(num_b_)),
30 size_c_(f.number_of_params_c()),
31 size_e_(f.index_e(num_e_)),
32 A_(num_nz_),
33 B_(num_nz_),
34 C_(num_nz_),
35 U_(num_a_),
36 V_(num_b_),
37 T_(size_c_, size_c_),
38 W_(num_nz_),
39 R_(num_b_),
40 Q_(num_a_),
41 ea_(size_a_),
42 eb_(size_b_),
43 ec_(size_c_),
44 e_(size_e_),
45 weights_(f.has_weights() ? num_e_ : 0, 1.0),
46 inv_V_(num_b_),
47 Y_(num_nz_),
48 Z_(num_a_),
49 Ma_(num_a_),
50 Mb_(num_b_)
51 {
52 init(&f);
53 }
54
55
56 // ctor
init(vnl_sparse_lst_sqr_function * f)57 void vnl_sparse_lm::init(vnl_sparse_lst_sqr_function* f)
58 {
59 f_ = f;
60
61 // If changing these defaults, check the help comments in vnl_sparse_lm.h,
62 // and MAKE SURE they're consistent.
63 xtol = 1e-15; // Termination tolerance on X (solution vector)
64 maxfev = 1000; // Termination maximum number of iterations.
65 ftol = 1e-15; // Termination tolerance on F (sum of squared residuals)
66 gtol = 1e-15; // Termination tolerance on Grad(F)' * F = 0
67 epsfcn = 0.001; // Step length for FD Jacobian
68
69 tau_ = 0.001;
70
71 allocate_matrices();
72 }
73
74 vnl_sparse_lm::~vnl_sparse_lm() = default;
75
76
77 //: Minimize the function supplied in the constructor until convergence or failure.
78 // On return, a, b, and c are such that f(a,b,c) is the lowest value achieved.
79 // Returns true for convergence, false for failure.
80 // If use_gradient is set to false, a finite difference approximation will be used,
81 // even if the Jacobian functions have been provided.
82 // If use_weights is set to false, weights will not be computed even if a
83 // weighting function has been provided.
minimize(vnl_vector<double> & a,vnl_vector<double> & b,vnl_vector<double> & c,bool use_gradient,bool use_weights)84 bool vnl_sparse_lm::minimize(vnl_vector<double>& a,
85 vnl_vector<double>& b,
86 vnl_vector<double>& c,
87 bool use_gradient,
88 bool use_weights)
89 {
90 // verify that the vectors are of the correct size
91 if (!check_vector_sizes(a,b,c))
92 return false;
93
94 //: Systems to solve will be Sc*dc=sec and Sa*da=sea
95 vnl_matrix<double> Sc(size_c_,size_c_), Sa(size_a_, size_a_);
96 vnl_vector<double> sec(size_c_), sea(size_a_);
97 // update vectors
98 vnl_vector<double> da(size_a_), db(size_b_), dc(size_c_);
99
100
101 // mu is initialized now because the compiler produces warnings -MM
102 double mu=0; // damping term (initialized later)
103 double nu=2.0;
104 // compute the initial residual
105 f_->f(a,b,c,e_);
106 num_evaluations_ = 1;
107
108 // Compute and apply the weights if applicable
109 if (use_weights && f_->has_weights())
110 {
111 f_->compute_weights(a,b,c,e_,weights_);
112 f_->apply_weights(weights_, e_);
113 }
114
115 double sqr_error = e_.squared_magnitude();
116 start_error_ = std::sqrt(sqr_error/e_.size()); // RMS error
117
118 for (num_iterations_=0; num_iterations_<(unsigned int)maxfev; ++num_iterations_)
119 {
120 if (verbose_)
121 std::cout << "iteration "<<std::setw(4)<<num_iterations_
122 << " RMS error = "<< std::setprecision(6)<< std::setw(12)<<std::sqrt(sqr_error/e_.size())
123 << " mu = "<<std::setprecision(6)<< std::setw(12) <<mu<< " nu = "<< nu << std::endl;
124 if (trace)
125 f_->trace(num_iterations_,a,b,c,e_);
126
127 // Compute the Jacobian in block form J = [A|B|C]
128 // where A, B, and C are sparse and contain subblocks Aij, Bij, and Cij
129 if (use_gradient && f_->has_gradient())
130 f_->jac_blocks(a,b,c,A_,B_,C_);
131 else
132 f_->fd_jac_blocks(a,b,c,A_,B_,C_,epsfcn); // use finite differences
133
134 // Apply the weights if applicable
135 if (use_weights && f_->has_weights())
136 {
137 f_->apply_weights(weights_, A_,B_,C_);
138 }
139
140 compute_normal_equations();
141
142 // check for convergence in gradient
143 if (std::max(std::max(ea_.inf_norm(),eb_.inf_norm()),ec_.inf_norm()) <= gtol)
144 {
145 failure_code_ = CONVERGED_GTOL;
146 break;
147 }
148
149
150 double sqr_params = a.squared_magnitude();
151 sqr_params += b.squared_magnitude();
152 sqr_params += c.squared_magnitude();
153
154 // Extract the diagonal of J^T*J as a vector
155 vnl_vector<double> diag_UVT = extract_diagonal();
156
157 // initialize mu if this is the first iteration
158 // proportional to the diagonal entry with the largest magnitude
159 if (num_iterations_==0)
160 mu = tau_*diag_UVT.inf_norm();
161
162 // Re-solve the system while adapting mu until we decrease error or converge
163 while (true)
164 {
165 // augment the diagonals with damping term mu
166 set_diagonal(diag_UVT + mu);
167
168 // compute inv(Vj) and Yij
169 compute_invV_Y();
170
171 if ( size_c_ > 0 )
172 {
173 // compute Z = RYt-Q and Sa
174 compute_Z_Sa(Sa);
175
176 // this large inverse is the bottle neck of this algorithm
177 vnl_matrix<double> H;
178 vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
179 vnl_svd<double> *Sa_svd = nullptr;
180 // use SVD as a backup if Cholesky is deficient
181 if ( Sa_cholesky.rank_deficiency() > 0 )
182 {
183 Sa_svd = new vnl_svd<double>(Sa);
184 H = Sa_svd->inverse();
185 }
186 else
187 H = Sa_cholesky.inverse();
188
189 // construct the Ma = ZH
190 compute_Ma(H);
191 // construct Mb = (R+MaW)inv(V)
192 compute_Mb();
193
194 // use Ma and Mb to solve for dc
195 solve_dc(dc);
196
197 // compute sea from ea, Z, dc, Y, and eb
198 compute_sea(dc,sea);
199
200
201 if ( Sa_svd )
202 da = Sa_svd->solve(sea);
203 else
204 da = Sa_cholesky.solve(sea);
205 delete Sa_svd;
206 }
207 else // size_c_ == 0
208 {
209 // |I -W*inv(V)| * |U W| * |da| = |I -W*inv(V)| * |ea|
210 // |0 I | |Wt V| |db| |0 I | |eb|
211 //
212 // premultiplying as shown above gives:
213 // |Sa 0| * |da| = |sea|
214 // |Wt V| |db| |eb |
215 //
216 // so we can first solve Sa*da = sea and then substitute to find db
217
218 // compute Sa and sea
219 compute_Sa_sea(Sa,sea);
220
221 #ifdef DEBUG
222 std::cout << "singular values = "<< vnl_svd<double>(Sa).W() <<std::endl;
223 #endif
224 // We could use a faster solver here, maybe conjugate gradients?
225 // Solve the system Sa*da = sea for da
226
227 vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
228 // use SVD as a backup if Cholesky is deficient
229 if ( Sa_cholesky.rank_deficiency() > 0 )
230 {
231 vnl_svd<double> Sa_svd(Sa);
232 da = Sa_svd.solve(sea);
233 }
234 else
235 da = Sa_cholesky.solve(sea);
236 }
237
238 // substitute da and dc to compute db
239 backsolve_db(da, dc, db);
240
241 // check for convergence in parameters
242 // (change in parameters is below a tolerance)
243 double sqr_delta = da.squared_magnitude();
244 sqr_delta += db.squared_magnitude();
245 sqr_delta += dc.squared_magnitude();
246 if (sqr_delta < xtol*xtol*sqr_params) {
247 failure_code_ = CONVERGED_XTOL;
248 break;
249 }
250
251 // compute updated parameters and residuals of the new parameters
252 vnl_vector<double> new_a(a-da), new_b(b-db), new_c(c-dc);
253 vnl_vector<double> new_e(e_.size()), new_weights(weights_.size());
254 f_->f(new_a,new_b,new_c,new_e); // compute the new residual vector
255 ++num_evaluations_;
256
257 // Compute and apply the weights if applicable
258 if (use_weights && f_->has_weights())
259 {
260 f_->compute_weights(new_a,new_b,new_c,new_e,new_weights);
261 f_->apply_weights(new_weights, new_e);
262 }
263
264 double new_sqr_error = new_e.squared_magnitude();
265
266 double dF = sqr_error - new_sqr_error;
267 double dL = dot_product(da,(mu*da+ea_))
268 +dot_product(db,(mu*db+eb_))
269 +dot_product(dc,(mu*dc+ec_));
270 if (dF>0.0 && dL>0.0) {
271 double tmp = 2.0*dF/dL-1.0;
272 mu *= std::max(1.0/3.0, 1.0 - tmp*tmp*tmp);
273 nu = 2.0;
274 a.swap(new_a);
275 b.swap(new_b);
276 c.swap(new_c);
277 e_.swap(new_e);
278 weights_.swap(new_weights);
279 sqr_error = new_sqr_error;
280 break;
281 }
282
283 mu *= nu;
284 nu *= 2.0;
285
286 if (verbose_)
287 std::cout <<" RMS error = "<< std::setprecision(6)
288 << std::setw(12) << std::sqrt(sqr_error/e_.size())
289 << " mu = " << std::setprecision(6) << std::setw(12) << mu
290 << " nu = " << nu << std::endl;
291 }
292 }
293
294
295 end_error_ = std::sqrt(sqr_error/e_.size()); // RMS error
296
297 if ((int)num_iterations_ >= maxfev) {
298 failure_code_ = TOO_MANY_ITERATIONS;
299 }
300
301 // Translate status code
302 switch (failure_code_) {
303 case CONVERGED_FTOL:
304 case CONVERGED_XTOL:
305 case CONVERGED_XFTOL:
306 case CONVERGED_GTOL:
307 return true;
308 default:
309 diagnose_outcome();
310 return false;
311 }
312 }
313
314
315 //: check vector sizes and verify that they match the problem size
check_vector_sizes(vnl_vector<double> const & a,vnl_vector<double> const & b,vnl_vector<double> const & c)316 bool vnl_sparse_lm::check_vector_sizes(vnl_vector<double> const& a,
317 vnl_vector<double> const& b,
318 vnl_vector<double> const& c)
319 {
320 if (size_a_+size_b_ > size_e_) {
321 std::cerr << "vnl_sparse_lm: Number of unknowns("<<size_a_+size_b_<<')'
322 << " greater than number of data ("<<size_e_<<")\n";
323 failure_code_ = ERROR_DODGY_INPUT;
324 return false;
325 }
326
327 if (int(a.size()) != size_a_) {
328 std::cerr << "vnl_sparse_lm: Input vector \"a\" length ("<<a.size()<<')'
329 << " not equal to num parameters in \"a\" ("<<size_a_<<")\n";
330 failure_code_ = ERROR_DODGY_INPUT;
331 return false;
332 }
333
334 if (int(b.size()) != size_b_) {
335 std::cerr << "vnl_sparse_lm: Input vector \"b\" length ("<<b.size()<<')'
336 << " not equal to num parameters in \"b\" ("<<size_b_<<")\n";
337 failure_code_ = ERROR_DODGY_INPUT;
338 return false;
339 }
340
341 if (int(c.size()) != size_c_) {
342 std::cerr << "vnl_sparse_lm: Input vector \"c\" length ("<<c.size()<<')'
343 << " not equal to num parameters in \"c\" ("<<size_c_<<")\n";
344 failure_code_ = ERROR_DODGY_INPUT;
345 return false;
346 }
347
348 return true;
349 }
350
351
352 //: allocate matrix memory by setting all the matrix sizes
allocate_matrices()353 void vnl_sparse_lm::allocate_matrices()
354 {
355 // CRS matrix of indices into e, A, B, C, W, Y
356 const vnl_crs_index& crs = f_->residual_indices();
357
358 // Iterate through all i and j to set the size of the matrices and vectors defined above
359 for (int i=0; i<num_a_; ++i)
360 {
361 const unsigned int ai_size = f_->number_of_params_a(i);
362 U_[i].set_size(ai_size,ai_size);
363 Q_[i].set_size(size_c_, ai_size);
364 Z_[i].set_size(size_c_, ai_size);
365 Ma_[i].set_size(size_c_, ai_size);
366
367 vnl_crs_index::sparse_vector row = crs.sparse_row(i);
368 for (auto & r_itr : row)
369 {
370 const unsigned int j = r_itr.second;
371 const unsigned int k = r_itr.first;
372 const unsigned int bj_size = f_->number_of_params_b(j);
373 const unsigned int eij_size = f_->number_of_residuals(k);
374 A_[k].set_size(eij_size, ai_size);
375 B_[k].set_size(eij_size, bj_size);
376 C_[k].set_size(eij_size, size_c_);
377 W_[k].set_size(ai_size, bj_size);
378 Y_[k].set_size(ai_size, bj_size);
379 }
380 }
381 for (int j=0; j<num_b_; ++j)
382 {
383 const unsigned int bj_size = f_->number_of_params_b(j);
384 V_[j].set_size(bj_size,bj_size);
385 R_[j].set_size(size_c_, bj_size);
386 Mb_[j].set_size(size_c_, bj_size);
387 inv_V_[j].set_size(bj_size,bj_size);
388 }
389 }
390
391
392 //: compute the blocks making up the the normal equations: Jt J d = Jt e
compute_normal_equations()393 void vnl_sparse_lm::compute_normal_equations()
394 {
395 // CRS matrix of indices into e, A, B, C, W, Y
396 const vnl_crs_index& crs = f_->residual_indices();
397 // sparse vector iterator
398
399 // clear the ea and eb for summation
400 ea_.fill(0.0);
401 eb_.fill(0.0);
402 ec_.fill(0.0);
403 // clear the V for summation
404 for (unsigned int j=0; j<f_->number_of_b(); ++j)
405 {
406 V_[j].fill(0.0);
407 R_[j].fill(0.0);
408 }
409 T_.fill(0.0);
410 // compute blocks T, Q, R, U, V, W, ea, eb, and ec
411 // JtJ = |T Q R|
412 // |Qt U W| with U and V block diagonal
413 // |Rt Wt V| and W with same sparsity as residuals
414 for (unsigned int i=0; i<f_->number_of_a(); ++i)
415 {
416 vnl_matrix<double>& Ui = U_[i];
417 Ui.fill(0.0);
418 vnl_matrix<double>& Qi = Q_[i];
419 Qi.fill(0.0);
420 unsigned int ai_size = f_->number_of_params_a(i);
421 vnl_vector_ref<double> eai(ai_size, ea_.data_block()+f_->index_a(i));
422
423 vnl_crs_index::sparse_vector row = crs.sparse_row(i);
424 for (auto & r_itr : row)
425 {
426 unsigned int j = r_itr.second;
427 unsigned int k = r_itr.first;;
428 vnl_matrix<double>& Aij = A_[k];
429 vnl_matrix<double>& Bij = B_[k];
430 vnl_matrix<double>& Cij = C_[k];
431 vnl_matrix<double>& Vj = V_[j];
432 vnl_matrix<double>& Rj = R_[j];
433 vnl_vector_ref<double> ebj(Bij.cols(), eb_.data_block()+f_->index_b(j));
434
435 vnl_fastops::inc_X_by_AtA(T_, Cij); // T = C^T * C
436 vnl_fastops::inc_X_by_AtA(Ui,Aij); // Ui += A_ij^T * A_ij
437 vnl_fastops::inc_X_by_AtA(Vj,Bij); // Vj += B_ij^T * B_ij
438 vnl_fastops::AtB(W_[k],Aij,Bij); // Wij = A_ij^T * B_ij
439 vnl_fastops::inc_X_by_AtB(Qi,Cij,Aij); // Qi += C_ij^T * A_ij
440 vnl_fastops::inc_X_by_AtB(Rj,Cij,Bij); // Rj += C_ij^T * B_ij
441
442 vnl_vector_ref<double> eij(f_->number_of_residuals(k), e_.data_block()+f_->index_e(k));
443 vnl_fastops::inc_X_by_AtB(eai,Aij,eij); // e_a_i += A_ij^T * e_ij
444 vnl_fastops::inc_X_by_AtB(ebj,Bij,eij); // e_b_j += B_ij^T * e_ij
445 vnl_fastops::inc_X_by_AtB(ec_,Cij,eij); // e_c += C_ij^T * e_ij
446 }
447 }
448 }
449
450
451 //: extract the vector on the diagonal of Jt J
extract_diagonal() const452 vnl_vector<double> vnl_sparse_lm::extract_diagonal() const
453 {
454 // Extract the diagonal of J^T*J as a vector
455 vnl_vector<double> diag_UVT(size_a_+size_b_+size_c_);
456 int z = 0;
457 for (int i=0; i<num_a_; ++i) {
458 const vnl_matrix<double>& Ui = U_[i];
459 for (unsigned int ii=0; ii<Ui.rows(); ++ii)
460 diag_UVT[z++] = Ui(ii,ii);
461 }
462 for (int j=0; j<num_b_; ++j) {
463 const vnl_matrix<double>& Vj = V_[j];
464 for (unsigned int ii=0; ii<Vj.rows(); ++ii)
465 diag_UVT[z++] = Vj(ii,ii);
466 }
467 for (int ii=0; ii<size_c_; ++ii)
468 diag_UVT[z++] = T_(ii,ii);
469
470 return diag_UVT;
471 }
472
473
474 //: set the vector on the diagonal of Jt J
set_diagonal(const vnl_vector<double> & diag)475 void vnl_sparse_lm::set_diagonal(const vnl_vector<double>& diag)
476 {
477 int z=0;
478 for (int i=0; i<num_a_; ++i) {
479 vnl_matrix<double>& Ui = U_[i];
480 for (unsigned int ii=0; ii<Ui.rows(); ++ii)
481 Ui(ii,ii) = diag[z++];
482 }
483 for (int j=0; j<num_b_; ++j) {
484 vnl_matrix<double>& Vj = V_[j];
485 for (unsigned int ii=0; ii<Vj.rows(); ++ii)
486 Vj(ii,ii) = diag[z++];
487 }
488 for (int ii=0; ii<size_c_; ++ii)
489 T_(ii,ii) = diag[z++];
490 }
491
492
493 //: compute all inv(Vi) and Yij
compute_invV_Y()494 void vnl_sparse_lm::compute_invV_Y()
495 {
496 // CRS matrix of indices into e, A, B, C, W, Y
497 const vnl_crs_index& crs = f_->residual_indices();
498
499 for (int j=0; j<num_b_; ++j) {
500 vnl_matrix<double>& inv_Vj = inv_V_[j];
501 vnl_cholesky Vj_cholesky(V_[j],vnl_cholesky::quiet);
502 // use SVD as a backup if Cholesky is deficient
503 if ( Vj_cholesky.rank_deficiency() > 0 )
504 {
505 vnl_svd<double> Vj_svd(V_[j]);
506 inv_Vj = Vj_svd.inverse();
507 }
508 else
509 inv_Vj = Vj_cholesky.inverse();
510
511 vnl_crs_index::sparse_vector col = crs.sparse_col(j);
512 for (auto & c_itr : col)
513 {
514 unsigned int k = c_itr.first;
515 Y_[k] = W_[k]*inv_Vj; // Y_ij = W_ij * inv(V_j)
516 }
517 }
518 }
519
520
521 // compute Z and Sa
compute_Z_Sa(vnl_matrix<double> & Sa)522 void vnl_sparse_lm::compute_Z_Sa(vnl_matrix<double>& Sa)
523 {
524 // CRS matrix of indices into e, A, B, C, W, Y
525 const vnl_crs_index& crs = f_->residual_indices();
526 // sparse vector iterator
527
528 // compute Z = RYt-Q and Sa
529 for (int i=0; i<num_a_; ++i)
530 {
531 vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
532 vnl_matrix<double>& Zi = Z_[i];
533 Zi.fill(0.0);
534 Zi -= Q_[i];
535
536 // handle the diagonal blocks separately
537 vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
538 for (auto & ri : row_i)
539 {
540 unsigned int j = ri.second;
541 unsigned int k = ri.first;
542 vnl_matrix<double>& Yij = Y_[k];
543 vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
544 vnl_fastops::inc_X_by_ABt(Zi,R_[j],Yij); // Z_i += R_j * Y_ij^T
545 }
546 Sa.update(Sii,f_->index_a(i),f_->index_a(i));
547
548 // handle the (symmetric) off diagonal blocks
549 for (int h=i+1; h<num_a_; ++h)
550 {
551 vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
552 vnl_matrix<double> Sih(f_->number_of_params_a(i),
553 f_->number_of_params_a(h), 0.0);
554
555 // iterate through both sparse rows finding matching columns
556 bool row_done = false;
557 for (auto ri = row_i.begin(), rh = row_h.begin();
558 ri != row_i.end() && rh != row_h.end(); ++ri, ++rh)
559 {
560 while (!row_done && ri->second != rh->second)
561 {
562 while (!row_done && ri->second < rh->second)
563 row_done = (++ri == row_i.end());
564 while (!row_done && rh->second < ri->second)
565 row_done = (++rh == row_h.end());
566 }
567 if (row_done)
568 break;
569 // S_ih -= Y_ij * W_hj^T
570 vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
571 }
572 // this should also be a symmetric matrix
573 Sa.update(Sih,f_->index_a(i),f_->index_a(h));
574 Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
575 }
576 }
577 }
578
579
580 //: compute Ma
compute_Ma(const vnl_matrix<double> & H)581 void vnl_sparse_lm::compute_Ma(const vnl_matrix<double>& H)
582 {
583 // construct Ma = ZH
584 vnl_matrix<double> Hik;
585 for (int i=0; i<num_a_; ++i)
586 {
587 vnl_matrix<double>& Mai = Ma_[i];
588 Mai.fill(0.0);
589
590 for (int k=0; k<num_a_; ++k)
591 {
592 Hik.set_size(f_->number_of_params_a(i), f_->number_of_params_a(k));
593 H.extract(Hik,f_->index_a(i), f_->index_a(k));
594 vnl_fastops::inc_X_by_AB(Mai, Z_[k], Hik);
595 }
596 }
597 }
598
599
600 //: compute Mb
compute_Mb()601 void vnl_sparse_lm::compute_Mb()
602 {
603 // CRS matrix of indices into e, A, B, C, W, Y
604 const vnl_crs_index& crs = f_->residual_indices();
605
606 vnl_matrix<double> temp;
607 // construct Mb = (-R-MaW)inv(V)
608 for (int j=0; j<num_b_; ++j)
609 {
610 temp.set_size(size_c_,f_->number_of_params_b(j));
611 temp.fill(0.0);
612 temp -= R_[j];
613
614 vnl_crs_index::sparse_vector col = crs.sparse_col(j);
615 for (auto & c_itr : col)
616 {
617 unsigned int k = c_itr.first;
618 unsigned int i = c_itr.second;
619 vnl_fastops::dec_X_by_AB(temp,Ma_[i],W_[k]);
620 }
621 vnl_fastops::AB(Mb_[j],temp,inv_V_[j]);
622 }
623 }
624
625
626 //: solve for dc
solve_dc(vnl_vector<double> & dc)627 void vnl_sparse_lm::solve_dc(vnl_vector<double>& dc)
628 {
629 vnl_matrix<double> Sc(T_); // start with a copy of T
630 vnl_vector<double> sec(ec_); // start with a copy of ec
631
632 for (int i=0; i<num_a_; ++i)
633 {
634 const vnl_vector_ref<double> eai(f_->number_of_params_a(i),
635 const_cast<double*>(ea_.data_block()+f_->index_a(i)));
636 vnl_fastops::inc_X_by_ABt(Sc,Ma_[i],Q_[i]);
637 sec += Ma_[i] * eai;
638 }
639 for (int j=0; j<num_b_; ++j)
640 {
641 const vnl_vector_ref<double> ebi(f_->number_of_params_b(j),
642 const_cast<double*>(eb_.data_block()+f_->index_b(j)));
643 vnl_fastops::inc_X_by_ABt(Sc,Mb_[j],R_[j]);
644 sec += Mb_[j] * ebi;
645 }
646
647 if (size_c_ == 1)
648 {
649 dc[0] = sec[0] / Sc(0,0);
650 }
651 else
652 {
653 // Solve Sc*dc = sec for dc
654 vnl_cholesky Sc_cholesky(Sc,vnl_cholesky::quiet);
655 // use SVD as a backup if Cholesky is deficient
656 if ( Sc_cholesky.rank_deficiency() > 0 )
657 {
658 vnl_svd<double> Sc_svd(Sc);
659 dc = Sc_svd.solve(sec);
660 }
661 else
662 dc = Sc_cholesky.solve(sec);
663 }
664 }
665
666
667 //: compute sea using ea, Z, dc, Y, and eb
compute_sea(vnl_vector<double> const & dc,vnl_vector<double> & sea)668 void vnl_sparse_lm::compute_sea(vnl_vector<double> const& dc,
669 vnl_vector<double>& sea)
670 {
671 // CRS matrix of indices into e, A, B, C, W, Y
672 const vnl_crs_index& crs = f_->residual_indices();
673 // sparse vector iterator
674
675 sea = ea_; // initialize se to ea_
676 for (int i=0; i<num_a_; ++i)
677 {
678 vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
679 vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
680
681 vnl_fastops::inc_X_by_AtB(sei,Z_[i],dc);
682
683 for (auto & ri : row_i)
684 {
685 unsigned int k = ri.first;
686 vnl_matrix<double>& Yij = Y_[k];
687 vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri.second));
688 sei -= Yij*ebj; // se_i -= Y_ij * e_b_j
689 }
690 }
691 }
692
693
694 //: compute Sa and sea
695 // only used when size_c_ == 0
compute_Sa_sea(vnl_matrix<double> & Sa,vnl_vector<double> & sea)696 void vnl_sparse_lm::compute_Sa_sea(vnl_matrix<double>& Sa,
697 vnl_vector<double>& sea)
698 {
699 // CRS matrix of indices into e, A, B, C, W, Y
700 const vnl_crs_index& crs = f_->residual_indices();
701
702 sea = ea_; // initialize se to ea_
703 for (int i=0; i<num_a_; ++i)
704 {
705 vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
706 vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
707
708 // handle the diagonal blocks and computation of se separately
709 vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
710 for (auto & ri : row_i)
711 {
712 unsigned int k = ri.first;
713 vnl_matrix<double>& Yij = Y_[k];
714 vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
715 vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri.second));
716 sei -= Yij*ebj; // se_i -= Y_ij * e_b_j
717 }
718 Sa.update(Sii,f_->index_a(i),f_->index_a(i));
719
720 // handle the (symmetric) off diagonal blocks
721 for (int h=i+1; h<num_a_; ++h)
722 {
723 vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
724 vnl_matrix<double> Sih(f_->number_of_params_a(i),f_->number_of_params_a(h),0.0);
725
726 // iterate through both sparse rows finding matching columns
727 bool row_done = false;
728 for (auto ri = row_i.begin(), rh = row_h.begin();
729 ri != row_i.end() && rh != row_h.end(); ++ri, ++rh)
730 {
731 while (!row_done && ri->second != rh->second)
732 {
733 while (!row_done && ri->second < rh->second)
734 row_done = (++ri == row_i.end());
735 while (!row_done && rh->second < ri->second)
736 row_done = (++rh == row_h.end());
737 }
738 if (row_done)
739 break;
740 // S_ih -= Y_ij * W_hj^T
741 vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
742 }
743 // this should also be a symmetric matrix
744 Sa.update(Sih,f_->index_a(i),f_->index_a(h));
745 Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
746 }
747 }
748 }
749
750
751 //: back solve to find db using da and dc
backsolve_db(vnl_vector<double> const & da,vnl_vector<double> const & dc,vnl_vector<double> & db)752 void vnl_sparse_lm::backsolve_db(vnl_vector<double> const& da,
753 vnl_vector<double> const& dc,
754 vnl_vector<double>& db)
755 {
756 // CRS matrix of indices into e, A, B, C, W, Y
757 const vnl_crs_index& crs = f_->residual_indices();
758 // sparse vector iterator
759
760 for (int j=0; j<num_b_; ++j)
761 {
762 vnl_vector<double> seb(eb_.data_block()+f_->index_b(j),f_->number_of_params_b(j));
763 vnl_crs_index::sparse_vector col = crs.sparse_col(j);
764 if ( size_c_ > 0 )
765 {
766 vnl_fastops::dec_X_by_AtB(seb,R_[j],dc);
767 }
768 for (auto & c_itr : col)
769 {
770 unsigned int k = c_itr.first;
771 unsigned int i = c_itr.second;
772 const vnl_vector_ref<double> dai(f_->number_of_params_a(i),
773 const_cast<double*>(da.data_block()+f_->index_a(i)));
774 vnl_fastops::dec_X_by_AtB(seb,W_[k],dai);
775 }
776 vnl_vector_ref<double> dbi(f_->number_of_params_b(j),db.data_block()+f_->index_b(j));
777 vnl_fastops::Ab(dbi,inv_V_[j],seb);
778 }
779 }
780
781 //------------------------------------------------------------------------------
782
diagnose_outcome() const783 void vnl_sparse_lm::diagnose_outcome() const
784 {
785 diagnose_outcome(std::cerr);
786 }
787
788 // fsm: should this function be a method on vnl_nonlinear_minimizer?
789 // if not, the return codes should be moved into LM.
diagnose_outcome(std::ostream & s) const790 void vnl_sparse_lm::diagnose_outcome(std::ostream& s) const
791 {
792 #define whoami "vnl_sparse_lm"
793 //if (!verbose_) return;
794 switch (failure_code_)
795 {
796 case ERROR_FAILURE:
797 s << (whoami ": OIOIOI -- failure in leastsquares function\n");
798 break;
799 case ERROR_DODGY_INPUT:
800 s << (whoami ": OIOIOI -- lmdif dodgy input\n");
801 break;
802 case CONVERGED_FTOL:
803 s << (whoami ": converged to ftol\n");
804 break;
805 case CONVERGED_XTOL:
806 s << (whoami ": converged to xtol\n");
807 break;
808 case CONVERGED_XFTOL:
809 s << (whoami ": converged nicely\n");
810 break;
811 case CONVERGED_GTOL:
812 s << (whoami ": converged via gtol\n");
813 break;
814 case TOO_MANY_ITERATIONS:
815 s << (whoami ": too many iterations\n");
816 break;
817 case FAILED_FTOL_TOO_SMALL:
818 s << (whoami ": ftol is too small. no further reduction in the sum of squares is possible.\n");
819 break;
820 case FAILED_XTOL_TOO_SMALL:
821 s << (whoami ": xtol is too small. no further improvement in the approximate solution x is possible.\n");
822 break;
823 case FAILED_GTOL_TOO_SMALL:
824 s << (whoami ": gtol is too small. f(a,b) is orthogonal to the columns of the jacobian to machine precision.\n");
825 break;
826 default:
827 s << (whoami ": OIOIOI: unkown info code from lmder.\n");
828 break;
829 }
830 unsigned int num_e = f_->number_of_e();
831 s << whoami ": " << num_iterations_ << " iterations, "
832 << num_evaluations_ << " evaluations, "<< num_e <<" residuals. RMS error start/end "
833 << get_start_error() << '/' << get_end_error() << std::endl;
834 #undef whoami
835 }
836
837
get_JtJ()838 vnl_matrix<double> const& vnl_sparse_lm::get_JtJ()
839 {
840 return inv_covar_;
841 }
842