1 // This is core/vnl/algo/vnl_sparse_lm.cxx
2 //:
3 // \file
4 // \author Matt Leotta (Brown)
5 // \date   April 14, 2005
6 //
7 //-----------------------------------------------------------------------------
8 
9 #include <iostream>
10 #include <iomanip>
11 #include <algorithm>
12 #include "vnl_sparse_lm.h"
13 #include <vnl/vnl_fastops.h>
14 #include <vnl/vnl_vector_ref.h>
15 #include <vnl/vnl_crs_index.h>
16 #include <vnl/vnl_sparse_lst_sqr_function.h>
17 
18 #include <vnl/algo/vnl_cholesky.h>
19 #include <vnl/algo/vnl_svd.h>
20 
21 
22 //: Initialize with the function object that is to be minimized.
vnl_sparse_lm(vnl_sparse_lst_sqr_function & f)23 vnl_sparse_lm::vnl_sparse_lm(vnl_sparse_lst_sqr_function& f)
24  : num_a_(f.number_of_a()),
25    num_b_(f.number_of_b()),
26    num_e_(f.number_of_e()),
27    num_nz_(f.residual_indices().num_non_zero()),
28    size_a_(f.index_a(num_a_)),
29    size_b_(f.index_b(num_b_)),
30    size_c_(f.number_of_params_c()),
31    size_e_(f.index_e(num_e_)),
32    A_(num_nz_),
33    B_(num_nz_),
34    C_(num_nz_),
35    U_(num_a_),
36    V_(num_b_),
37    T_(size_c_, size_c_),
38    W_(num_nz_),
39    R_(num_b_),
40    Q_(num_a_),
41    ea_(size_a_),
42    eb_(size_b_),
43    ec_(size_c_),
44    e_(size_e_),
45    weights_(f.has_weights() ? num_e_ : 0, 1.0),
46    inv_V_(num_b_),
47    Y_(num_nz_),
48    Z_(num_a_),
49    Ma_(num_a_),
50    Mb_(num_b_)
51 {
52   init(&f);
53 }
54 
55 
56 // ctor
init(vnl_sparse_lst_sqr_function * f)57 void vnl_sparse_lm::init(vnl_sparse_lst_sqr_function* f)
58 {
59   f_ = f;
60 
61   // If changing these defaults, check the help comments in vnl_sparse_lm.h,
62   // and MAKE SURE they're consistent.
63   xtol = 1e-15;          // Termination tolerance on X (solution vector)
64   maxfev = 1000;         // Termination maximum number of iterations.
65   ftol = 1e-15;          // Termination tolerance on F (sum of squared residuals)
66   gtol = 1e-15;          // Termination tolerance on Grad(F)' * F = 0
67   epsfcn = 0.001;        // Step length for FD Jacobian
68 
69   tau_ = 0.001;
70 
71   allocate_matrices();
72 }
73 
74 vnl_sparse_lm::~vnl_sparse_lm() = default;
75 
76 
77 //: Minimize the function supplied in the constructor until convergence or failure.
78 //  On return, a, b, and c are such that f(a,b,c) is the lowest value achieved.
79 //  Returns true for convergence, false for failure.
80 //  If use_gradient is set to false, a finite difference approximation will be used,
81 //  even if the Jacobian functions have been provided.
82 //  If use_weights is set to false, weights will not be computed even if a
83 //  weighting function has been provided.
minimize(vnl_vector<double> & a,vnl_vector<double> & b,vnl_vector<double> & c,bool use_gradient,bool use_weights)84 bool vnl_sparse_lm::minimize(vnl_vector<double>& a,
85                              vnl_vector<double>& b,
86                              vnl_vector<double>& c,
87                              bool use_gradient,
88                              bool use_weights)
89 {
90   // verify that the vectors are of the correct size
91   if (!check_vector_sizes(a,b,c))
92     return false;
93 
94   //: Systems to solve will be Sc*dc=sec and Sa*da=sea
95   vnl_matrix<double> Sc(size_c_,size_c_), Sa(size_a_, size_a_);
96   vnl_vector<double> sec(size_c_), sea(size_a_);
97   // update vectors
98   vnl_vector<double> da(size_a_), db(size_b_), dc(size_c_);
99 
100 
101   // mu is initialized now because the compiler produces warnings -MM
102   double mu=0; // damping term (initialized later)
103   double nu=2.0;
104   // compute the initial residual
105   f_->f(a,b,c,e_);
106   num_evaluations_ = 1;
107 
108   // Compute and apply the weights if applicable
109   if (use_weights && f_->has_weights())
110   {
111     f_->compute_weights(a,b,c,e_,weights_);
112     f_->apply_weights(weights_, e_);
113   }
114 
115   double sqr_error = e_.squared_magnitude();
116   start_error_ = std::sqrt(sqr_error/e_.size()); // RMS error
117 
118   for (num_iterations_=0; num_iterations_<(unsigned int)maxfev; ++num_iterations_)
119   {
120     if (verbose_)
121       std::cout << "iteration "<<std::setw(4)<<num_iterations_
122                << " RMS error = "<< std::setprecision(6)<< std::setw(12)<<std::sqrt(sqr_error/e_.size())
123                << " mu = "<<std::setprecision(6)<< std::setw(12) <<mu<< " nu = "<< nu << std::endl;
124     if (trace)
125       f_->trace(num_iterations_,a,b,c,e_);
126 
127     // Compute the Jacobian in block form J = [A|B|C]
128     // where A, B, and C are sparse and contain subblocks Aij, Bij, and Cij
129     if (use_gradient && f_->has_gradient())
130       f_->jac_blocks(a,b,c,A_,B_,C_);
131     else
132       f_->fd_jac_blocks(a,b,c,A_,B_,C_,epsfcn); // use finite differences
133 
134     // Apply the weights if applicable
135     if (use_weights && f_->has_weights())
136     {
137       f_->apply_weights(weights_, A_,B_,C_);
138     }
139 
140     compute_normal_equations();
141 
142     // check for convergence in gradient
143     if (std::max(std::max(ea_.inf_norm(),eb_.inf_norm()),ec_.inf_norm()) <= gtol)
144     {
145       failure_code_ = CONVERGED_GTOL;
146       break;
147     }
148 
149 
150     double sqr_params = a.squared_magnitude();
151     sqr_params += b.squared_magnitude();
152     sqr_params += c.squared_magnitude();
153 
154     // Extract the diagonal of J^T*J as a vector
155     vnl_vector<double> diag_UVT = extract_diagonal();
156 
157     // initialize mu if this is the first iteration
158     // proportional to the diagonal entry with the largest magnitude
159     if (num_iterations_==0)
160       mu = tau_*diag_UVT.inf_norm();
161 
162     // Re-solve the system while adapting mu until we decrease error or converge
163     while (true)
164     {
165       // augment the diagonals with damping term mu
166       set_diagonal(diag_UVT + mu);
167 
168       // compute inv(Vj) and Yij
169       compute_invV_Y();
170 
171       if ( size_c_ > 0 )
172       {
173         // compute Z = RYt-Q and Sa
174         compute_Z_Sa(Sa);
175 
176         // this large inverse is the bottle neck of this algorithm
177         vnl_matrix<double> H;
178         vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
179         vnl_svd<double> *Sa_svd = nullptr;
180         // use SVD as a backup if Cholesky is deficient
181         if ( Sa_cholesky.rank_deficiency() > 0 )
182         {
183           Sa_svd = new vnl_svd<double>(Sa);
184           H = Sa_svd->inverse();
185         }
186         else
187           H = Sa_cholesky.inverse();
188 
189         // construct the Ma = ZH
190         compute_Ma(H);
191         // construct Mb = (R+MaW)inv(V)
192         compute_Mb();
193 
194         // use Ma and Mb to solve for dc
195         solve_dc(dc);
196 
197         // compute sea from ea, Z, dc, Y, and eb
198         compute_sea(dc,sea);
199 
200 
201         if ( Sa_svd )
202           da = Sa_svd->solve(sea);
203         else
204           da = Sa_cholesky.solve(sea);
205         delete Sa_svd;
206       }
207       else // size_c_ == 0
208       {
209         // |I -W*inv(V)| * |U  W| * |da| = |I -W*inv(V)| * |ea|
210         // |0     I    |   |Wt V|   |db|   |0     I    |   |eb|
211         //
212         // premultiplying as shown above gives:
213         // |Sa 0| * |da| = |sea|
214         // |Wt V|   |db|   |eb |
215         //
216         // so we can first solve  Sa*da = sea  and then substitute to find db
217 
218         // compute Sa and sea
219         compute_Sa_sea(Sa,sea);
220 
221 #ifdef DEBUG
222         std::cout << "singular values = "<< vnl_svd<double>(Sa).W() <<std::endl;
223 #endif
224         // We could use a faster solver here, maybe conjugate gradients?
225         // Solve the system  Sa*da = sea  for da
226 
227         vnl_cholesky Sa_cholesky(Sa,vnl_cholesky::quiet);
228         // use SVD as a backup if Cholesky is deficient
229         if ( Sa_cholesky.rank_deficiency() > 0 )
230         {
231           vnl_svd<double> Sa_svd(Sa);
232           da = Sa_svd.solve(sea);
233         }
234         else
235           da = Sa_cholesky.solve(sea);
236       }
237 
238       // substitute da and dc to compute db
239       backsolve_db(da, dc, db);
240 
241       // check for convergence in parameters
242       // (change in parameters is below a tolerance)
243       double sqr_delta = da.squared_magnitude();
244       sqr_delta += db.squared_magnitude();
245       sqr_delta += dc.squared_magnitude();
246       if (sqr_delta < xtol*xtol*sqr_params) {
247         failure_code_ = CONVERGED_XTOL;
248         break;
249       }
250 
251       // compute updated parameters and residuals of the new parameters
252       vnl_vector<double> new_a(a-da), new_b(b-db), new_c(c-dc);
253       vnl_vector<double> new_e(e_.size()), new_weights(weights_.size());
254       f_->f(new_a,new_b,new_c,new_e); // compute the new residual vector
255       ++num_evaluations_;
256 
257       // Compute and apply the weights if applicable
258       if (use_weights && f_->has_weights())
259       {
260         f_->compute_weights(new_a,new_b,new_c,new_e,new_weights);
261         f_->apply_weights(new_weights, new_e);
262       }
263 
264       double new_sqr_error = new_e.squared_magnitude();
265 
266       double dF = sqr_error - new_sqr_error;
267       double dL = dot_product(da,(mu*da+ea_))
268                  +dot_product(db,(mu*db+eb_))
269                  +dot_product(dc,(mu*dc+ec_));
270       if (dF>0.0 && dL>0.0) {
271         double tmp = 2.0*dF/dL-1.0;
272         mu *= std::max(1.0/3.0, 1.0 - tmp*tmp*tmp);
273         nu = 2.0;
274         a.swap(new_a);
275         b.swap(new_b);
276         c.swap(new_c);
277         e_.swap(new_e);
278         weights_.swap(new_weights);
279         sqr_error = new_sqr_error;
280         break;
281       }
282 
283       mu *= nu;
284       nu *= 2.0;
285 
286       if (verbose_)
287         std::cout <<"               RMS error = "<< std::setprecision(6)
288                  << std::setw(12) << std::sqrt(sqr_error/e_.size())
289                  << " mu = " << std::setprecision(6) << std::setw(12) << mu
290                  << " nu = " << nu << std::endl;
291     }
292   }
293 
294 
295   end_error_ = std::sqrt(sqr_error/e_.size()); // RMS error
296 
297   if ((int)num_iterations_ >= maxfev) {
298     failure_code_ = TOO_MANY_ITERATIONS;
299   }
300 
301   // Translate status code
302   switch (failure_code_) {
303    case CONVERGED_FTOL:
304    case CONVERGED_XTOL:
305    case CONVERGED_XFTOL:
306    case CONVERGED_GTOL:
307     return true;
308    default:
309     diagnose_outcome();
310     return false;
311   }
312 }
313 
314 
315 //: check vector sizes and verify that they match the problem size
check_vector_sizes(vnl_vector<double> const & a,vnl_vector<double> const & b,vnl_vector<double> const & c)316 bool vnl_sparse_lm::check_vector_sizes(vnl_vector<double> const& a,
317                                        vnl_vector<double> const& b,
318                                        vnl_vector<double> const& c)
319 {
320   if (size_a_+size_b_ > size_e_) {
321     std::cerr << "vnl_sparse_lm: Number of unknowns("<<size_a_+size_b_<<')'
322              << " greater than number of data ("<<size_e_<<")\n";
323     failure_code_ = ERROR_DODGY_INPUT;
324     return false;
325   }
326 
327   if (int(a.size()) != size_a_) {
328     std::cerr << "vnl_sparse_lm: Input vector \"a\" length ("<<a.size()<<')'
329              << " not equal to num parameters in \"a\" ("<<size_a_<<")\n";
330     failure_code_ = ERROR_DODGY_INPUT;
331     return false;
332   }
333 
334   if (int(b.size()) != size_b_) {
335     std::cerr << "vnl_sparse_lm: Input vector \"b\" length ("<<b.size()<<')'
336              << " not equal to num parameters in \"b\" ("<<size_b_<<")\n";
337     failure_code_ = ERROR_DODGY_INPUT;
338     return false;
339   }
340 
341   if (int(c.size()) != size_c_) {
342     std::cerr << "vnl_sparse_lm: Input vector \"c\" length ("<<c.size()<<')'
343              << " not equal to num parameters in \"c\" ("<<size_c_<<")\n";
344     failure_code_ = ERROR_DODGY_INPUT;
345     return false;
346   }
347 
348   return true;
349 }
350 
351 
352 //: allocate matrix memory by setting all the matrix sizes
allocate_matrices()353 void vnl_sparse_lm::allocate_matrices()
354 {
355   // CRS matrix of indices into e, A, B, C, W, Y
356   const vnl_crs_index& crs = f_->residual_indices();
357 
358   // Iterate through all i and j to set the size of the matrices and vectors defined above
359   for (int i=0; i<num_a_; ++i)
360   {
361     const unsigned int ai_size = f_->number_of_params_a(i);
362     U_[i].set_size(ai_size,ai_size);
363     Q_[i].set_size(size_c_, ai_size);
364     Z_[i].set_size(size_c_, ai_size);
365     Ma_[i].set_size(size_c_, ai_size);
366 
367     vnl_crs_index::sparse_vector row = crs.sparse_row(i);
368     for (auto & r_itr : row)
369     {
370       const unsigned int j = r_itr.second;
371       const unsigned int k = r_itr.first;
372       const unsigned int bj_size = f_->number_of_params_b(j);
373       const unsigned int eij_size = f_->number_of_residuals(k);
374       A_[k].set_size(eij_size, ai_size);
375       B_[k].set_size(eij_size, bj_size);
376       C_[k].set_size(eij_size, size_c_);
377       W_[k].set_size(ai_size, bj_size);
378       Y_[k].set_size(ai_size, bj_size);
379     }
380   }
381   for (int j=0; j<num_b_; ++j)
382   {
383     const unsigned int bj_size = f_->number_of_params_b(j);
384     V_[j].set_size(bj_size,bj_size);
385     R_[j].set_size(size_c_, bj_size);
386     Mb_[j].set_size(size_c_, bj_size);
387     inv_V_[j].set_size(bj_size,bj_size);
388   }
389 }
390 
391 
392 //: compute the blocks making up the the normal equations: Jt J d = Jt e
compute_normal_equations()393 void vnl_sparse_lm::compute_normal_equations()
394 {
395   // CRS matrix of indices into e, A, B, C, W, Y
396   const vnl_crs_index& crs = f_->residual_indices();
397   // sparse vector iterator
398 
399   // clear the ea and eb for summation
400   ea_.fill(0.0);
401   eb_.fill(0.0);
402   ec_.fill(0.0);
403   // clear the V for summation
404   for (unsigned int j=0; j<f_->number_of_b(); ++j)
405   {
406     V_[j].fill(0.0);
407     R_[j].fill(0.0);
408   }
409   T_.fill(0.0);
410   // compute blocks T, Q, R, U, V, W, ea, eb, and ec
411   // JtJ = |T  Q  R|
412   //       |Qt U  W|  with U and V block diagonal
413   //       |Rt Wt V|  and W with same sparsity as residuals
414   for (unsigned int i=0; i<f_->number_of_a(); ++i)
415   {
416     vnl_matrix<double>& Ui = U_[i];
417     Ui.fill(0.0);
418     vnl_matrix<double>& Qi = Q_[i];
419     Qi.fill(0.0);
420     unsigned int ai_size = f_->number_of_params_a(i);
421     vnl_vector_ref<double> eai(ai_size, ea_.data_block()+f_->index_a(i));
422 
423     vnl_crs_index::sparse_vector row = crs.sparse_row(i);
424     for (auto & r_itr : row)
425     {
426       unsigned int j = r_itr.second;
427       unsigned int k = r_itr.first;;
428       vnl_matrix<double>& Aij = A_[k];
429       vnl_matrix<double>& Bij = B_[k];
430       vnl_matrix<double>& Cij = C_[k];
431       vnl_matrix<double>& Vj = V_[j];
432       vnl_matrix<double>& Rj = R_[j];
433       vnl_vector_ref<double> ebj(Bij.cols(), eb_.data_block()+f_->index_b(j));
434 
435       vnl_fastops::inc_X_by_AtA(T_, Cij);       // T = C^T * C
436       vnl_fastops::inc_X_by_AtA(Ui,Aij);       // Ui += A_ij^T * A_ij
437       vnl_fastops::inc_X_by_AtA(Vj,Bij);       // Vj += B_ij^T * B_ij
438       vnl_fastops::AtB(W_[k],Aij,Bij);          // Wij = A_ij^T * B_ij
439       vnl_fastops::inc_X_by_AtB(Qi,Cij,Aij);   // Qi += C_ij^T * A_ij
440       vnl_fastops::inc_X_by_AtB(Rj,Cij,Bij);   // Rj += C_ij^T * B_ij
441 
442       vnl_vector_ref<double> eij(f_->number_of_residuals(k), e_.data_block()+f_->index_e(k));
443       vnl_fastops::inc_X_by_AtB(eai,Aij,eij);  // e_a_i += A_ij^T * e_ij
444       vnl_fastops::inc_X_by_AtB(ebj,Bij,eij);  // e_b_j += B_ij^T * e_ij
445       vnl_fastops::inc_X_by_AtB(ec_,Cij,eij);   // e_c   += C_ij^T * e_ij
446     }
447   }
448 }
449 
450 
451 //: extract the vector on the diagonal of Jt J
extract_diagonal() const452 vnl_vector<double> vnl_sparse_lm::extract_diagonal() const
453 {
454   // Extract the diagonal of J^T*J as a vector
455   vnl_vector<double> diag_UVT(size_a_+size_b_+size_c_);
456   int z = 0;
457   for (int i=0; i<num_a_; ++i) {
458     const vnl_matrix<double>& Ui = U_[i];
459     for (unsigned int ii=0; ii<Ui.rows(); ++ii)
460       diag_UVT[z++] = Ui(ii,ii);
461   }
462   for (int j=0; j<num_b_; ++j) {
463     const vnl_matrix<double>& Vj = V_[j];
464     for (unsigned int ii=0; ii<Vj.rows(); ++ii)
465       diag_UVT[z++] = Vj(ii,ii);
466   }
467   for (int ii=0; ii<size_c_; ++ii)
468     diag_UVT[z++] = T_(ii,ii);
469 
470   return diag_UVT;
471 }
472 
473 
474 //: set the vector on the diagonal of Jt J
set_diagonal(const vnl_vector<double> & diag)475 void vnl_sparse_lm::set_diagonal(const vnl_vector<double>& diag)
476 {
477   int z=0;
478   for (int i=0; i<num_a_; ++i) {
479     vnl_matrix<double>& Ui = U_[i];
480     for (unsigned int ii=0; ii<Ui.rows(); ++ii)
481       Ui(ii,ii) = diag[z++];
482   }
483   for (int j=0; j<num_b_; ++j) {
484     vnl_matrix<double>& Vj = V_[j];
485     for (unsigned int ii=0; ii<Vj.rows(); ++ii)
486       Vj(ii,ii) = diag[z++];
487   }
488   for (int ii=0; ii<size_c_; ++ii)
489     T_(ii,ii) = diag[z++];
490 }
491 
492 
493 //: compute all inv(Vi) and Yij
compute_invV_Y()494 void vnl_sparse_lm::compute_invV_Y()
495 {
496   // CRS matrix of indices into e, A, B, C, W, Y
497   const vnl_crs_index& crs = f_->residual_indices();
498 
499   for (int j=0; j<num_b_; ++j) {
500     vnl_matrix<double>& inv_Vj = inv_V_[j];
501     vnl_cholesky Vj_cholesky(V_[j],vnl_cholesky::quiet);
502     // use SVD as a backup if Cholesky is deficient
503     if ( Vj_cholesky.rank_deficiency() > 0 )
504     {
505       vnl_svd<double> Vj_svd(V_[j]);
506       inv_Vj = Vj_svd.inverse();
507     }
508     else
509       inv_Vj = Vj_cholesky.inverse();
510 
511     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
512     for (auto & c_itr : col)
513     {
514       unsigned int k = c_itr.first;
515       Y_[k] = W_[k]*inv_Vj;  // Y_ij = W_ij * inv(V_j)
516     }
517   }
518 }
519 
520 
521 // compute Z and Sa
compute_Z_Sa(vnl_matrix<double> & Sa)522 void vnl_sparse_lm::compute_Z_Sa(vnl_matrix<double>& Sa)
523 {
524   // CRS matrix of indices into e, A, B, C, W, Y
525   const vnl_crs_index& crs = f_->residual_indices();
526   // sparse vector iterator
527 
528   // compute Z = RYt-Q and Sa
529   for (int i=0; i<num_a_; ++i)
530   {
531     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
532     vnl_matrix<double>& Zi = Z_[i];
533     Zi.fill(0.0);
534     Zi -= Q_[i];
535 
536     // handle the diagonal blocks separately
537     vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
538     for (auto & ri : row_i)
539     {
540       unsigned int j = ri.second;
541       unsigned int k = ri.first;
542       vnl_matrix<double>& Yij = Y_[k];
543       vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
544       vnl_fastops::inc_X_by_ABt(Zi,R_[j],Yij);  // Z_i  += R_j * Y_ij^T
545     }
546     Sa.update(Sii,f_->index_a(i),f_->index_a(i));
547 
548     // handle the (symmetric) off diagonal blocks
549     for (int h=i+1; h<num_a_; ++h)
550     {
551       vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
552       vnl_matrix<double> Sih(f_->number_of_params_a(i),
553                              f_->number_of_params_a(h), 0.0);
554 
555       // iterate through both sparse rows finding matching columns
556       bool row_done = false;
557       for (auto ri = row_i.begin(), rh = row_h.begin();
558            ri != row_i.end() && rh != row_h.end();  ++ri, ++rh)
559       {
560         while (!row_done && ri->second != rh->second)
561         {
562           while (!row_done && ri->second < rh->second)
563             row_done = (++ri == row_i.end());
564           while (!row_done && rh->second < ri->second)
565             row_done = (++rh == row_h.end());
566         }
567         if (row_done)
568           break;
569         // S_ih -= Y_ij * W_hj^T
570         vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
571       }
572       // this should also be a symmetric matrix
573       Sa.update(Sih,f_->index_a(i),f_->index_a(h));
574       Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
575     }
576   }
577 }
578 
579 
580 //: compute Ma
compute_Ma(const vnl_matrix<double> & H)581 void vnl_sparse_lm::compute_Ma(const vnl_matrix<double>& H)
582 {
583   // construct Ma = ZH
584   vnl_matrix<double> Hik;
585   for (int i=0; i<num_a_; ++i)
586   {
587     vnl_matrix<double>& Mai = Ma_[i];
588     Mai.fill(0.0);
589 
590     for (int k=0; k<num_a_; ++k)
591     {
592       Hik.set_size(f_->number_of_params_a(i), f_->number_of_params_a(k));
593       H.extract(Hik,f_->index_a(i), f_->index_a(k));
594       vnl_fastops::inc_X_by_AB(Mai, Z_[k], Hik);
595     }
596   }
597 }
598 
599 
600 //: compute Mb
compute_Mb()601 void vnl_sparse_lm::compute_Mb()
602 {
603   // CRS matrix of indices into e, A, B, C, W, Y
604   const vnl_crs_index& crs = f_->residual_indices();
605 
606   vnl_matrix<double> temp;
607   // construct Mb = (-R-MaW)inv(V)
608   for (int j=0; j<num_b_; ++j)
609   {
610     temp.set_size(size_c_,f_->number_of_params_b(j));
611     temp.fill(0.0);
612     temp -= R_[j];
613 
614     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
615     for (auto & c_itr : col)
616     {
617       unsigned int k = c_itr.first;
618       unsigned int i = c_itr.second;
619       vnl_fastops::dec_X_by_AB(temp,Ma_[i],W_[k]);
620     }
621     vnl_fastops::AB(Mb_[j],temp,inv_V_[j]);
622   }
623 }
624 
625 
626 //: solve for dc
solve_dc(vnl_vector<double> & dc)627 void vnl_sparse_lm::solve_dc(vnl_vector<double>& dc)
628 {
629   vnl_matrix<double> Sc(T_); // start with a copy of T
630   vnl_vector<double> sec(ec_); // start with a copy of ec
631 
632   for (int i=0; i<num_a_; ++i)
633   {
634     const vnl_vector_ref<double> eai(f_->number_of_params_a(i),
635                                      const_cast<double*>(ea_.data_block()+f_->index_a(i)));
636     vnl_fastops::inc_X_by_ABt(Sc,Ma_[i],Q_[i]);
637     sec += Ma_[i] * eai;
638   }
639   for (int j=0; j<num_b_; ++j)
640   {
641     const vnl_vector_ref<double> ebi(f_->number_of_params_b(j),
642                                      const_cast<double*>(eb_.data_block()+f_->index_b(j)));
643     vnl_fastops::inc_X_by_ABt(Sc,Mb_[j],R_[j]);
644     sec += Mb_[j] * ebi;
645   }
646 
647   if (size_c_ == 1)
648   {
649     dc[0] = sec[0] / Sc(0,0);
650   }
651   else
652   {
653     // Solve Sc*dc = sec for dc
654     vnl_cholesky Sc_cholesky(Sc,vnl_cholesky::quiet);
655     // use SVD as a backup if Cholesky is deficient
656     if ( Sc_cholesky.rank_deficiency() > 0 )
657     {
658       vnl_svd<double> Sc_svd(Sc);
659       dc = Sc_svd.solve(sec);
660     }
661     else
662       dc = Sc_cholesky.solve(sec);
663   }
664 }
665 
666 
667 //: compute sea using ea, Z, dc, Y, and eb
compute_sea(vnl_vector<double> const & dc,vnl_vector<double> & sea)668 void vnl_sparse_lm::compute_sea(vnl_vector<double> const& dc,
669                                 vnl_vector<double>& sea)
670 {
671   // CRS matrix of indices into e, A, B, C, W, Y
672   const vnl_crs_index& crs = f_->residual_indices();
673   // sparse vector iterator
674 
675   sea = ea_; // initialize se to ea_
676   for (int i=0; i<num_a_; ++i)
677   {
678     vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
679     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
680 
681     vnl_fastops::inc_X_by_AtB(sei,Z_[i],dc);
682 
683     for (auto & ri : row_i)
684     {
685       unsigned int k = ri.first;
686       vnl_matrix<double>& Yij = Y_[k];
687       vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri.second));
688       sei -= Yij*ebj;  // se_i -= Y_ij * e_b_j
689     }
690   }
691 }
692 
693 
694 //: compute Sa and sea
695 // only used when size_c_ == 0
compute_Sa_sea(vnl_matrix<double> & Sa,vnl_vector<double> & sea)696 void vnl_sparse_lm::compute_Sa_sea(vnl_matrix<double>& Sa,
697                                    vnl_vector<double>& sea)
698 {
699   // CRS matrix of indices into e, A, B, C, W, Y
700   const vnl_crs_index& crs = f_->residual_indices();
701 
702   sea = ea_; // initialize se to ea_
703   for (int i=0; i<num_a_; ++i)
704   {
705     vnl_vector_ref<double> sei(f_->number_of_params_a(i),sea.data_block()+f_->index_a(i));
706     vnl_crs_index::sparse_vector row_i = crs.sparse_row(i);
707 
708     // handle the diagonal blocks and computation of se separately
709     vnl_matrix<double> Sii(U_[i]); // copy Ui to initialize Sii
710     for (auto & ri : row_i)
711     {
712       unsigned int k = ri.first;
713       vnl_matrix<double>& Yij = Y_[k];
714       vnl_fastops::dec_X_by_ABt(Sii,Yij,W_[k]); // S_ii -= Y_ij * W_ij^T
715       vnl_vector_ref<double> ebj(Yij.cols(), eb_.data_block()+f_->index_b(ri.second));
716       sei -= Yij*ebj;  // se_i -= Y_ij * e_b_j
717     }
718     Sa.update(Sii,f_->index_a(i),f_->index_a(i));
719 
720     // handle the (symmetric) off diagonal blocks
721     for (int h=i+1; h<num_a_; ++h)
722     {
723       vnl_crs_index::sparse_vector row_h = crs.sparse_row(h);
724       vnl_matrix<double> Sih(f_->number_of_params_a(i),f_->number_of_params_a(h),0.0);
725 
726       // iterate through both sparse rows finding matching columns
727       bool row_done = false;
728       for (auto ri = row_i.begin(), rh = row_h.begin();
729            ri != row_i.end() && rh != row_h.end();  ++ri, ++rh)
730       {
731         while (!row_done && ri->second != rh->second)
732         {
733           while (!row_done && ri->second < rh->second)
734             row_done = (++ri == row_i.end());
735           while (!row_done && rh->second < ri->second)
736             row_done = (++rh == row_h.end());
737         }
738         if (row_done)
739           break;
740         // S_ih -= Y_ij * W_hj^T
741         vnl_fastops::dec_X_by_ABt(Sih,Y_[ri->first],W_[rh->first]);
742       }
743       // this should also be a symmetric matrix
744       Sa.update(Sih,f_->index_a(i),f_->index_a(h));
745       Sa.update(Sih.transpose(),f_->index_a(h),f_->index_a(i));
746     }
747   }
748 }
749 
750 
751 //: back solve to find db using da and dc
backsolve_db(vnl_vector<double> const & da,vnl_vector<double> const & dc,vnl_vector<double> & db)752 void vnl_sparse_lm::backsolve_db(vnl_vector<double> const& da,
753                                  vnl_vector<double> const& dc,
754                                  vnl_vector<double>& db)
755 {
756   // CRS matrix of indices into e, A, B, C, W, Y
757   const vnl_crs_index& crs = f_->residual_indices();
758   // sparse vector iterator
759 
760   for (int j=0; j<num_b_; ++j)
761   {
762     vnl_vector<double> seb(eb_.data_block()+f_->index_b(j),f_->number_of_params_b(j));
763     vnl_crs_index::sparse_vector col = crs.sparse_col(j);
764     if ( size_c_ > 0 )
765     {
766       vnl_fastops::dec_X_by_AtB(seb,R_[j],dc);
767     }
768     for (auto & c_itr : col)
769     {
770       unsigned int k = c_itr.first;
771       unsigned int i = c_itr.second;
772       const vnl_vector_ref<double> dai(f_->number_of_params_a(i),
773                                        const_cast<double*>(da.data_block()+f_->index_a(i)));
774       vnl_fastops::dec_X_by_AtB(seb,W_[k],dai);
775     }
776     vnl_vector_ref<double> dbi(f_->number_of_params_b(j),db.data_block()+f_->index_b(j));
777     vnl_fastops::Ab(dbi,inv_V_[j],seb);
778   }
779 }
780 
781 //------------------------------------------------------------------------------
782 
diagnose_outcome() const783 void vnl_sparse_lm::diagnose_outcome() const
784 {
785   diagnose_outcome(std::cerr);
786 }
787 
788 // fsm: should this function be a method on vnl_nonlinear_minimizer?
789 // if not, the return codes should be moved into LM.
diagnose_outcome(std::ostream & s) const790 void vnl_sparse_lm::diagnose_outcome(std::ostream& s) const
791 {
792 #define whoami "vnl_sparse_lm"
793   //if (!verbose_) return;
794   switch (failure_code_)
795   {
796    case ERROR_FAILURE:
797     s << (whoami ": OIOIOI -- failure in leastsquares function\n");
798     break;
799    case ERROR_DODGY_INPUT:
800     s << (whoami ": OIOIOI -- lmdif dodgy input\n");
801     break;
802    case CONVERGED_FTOL:
803     s << (whoami ": converged to ftol\n");
804     break;
805    case CONVERGED_XTOL:
806     s << (whoami ": converged to xtol\n");
807     break;
808    case CONVERGED_XFTOL:
809     s << (whoami ": converged nicely\n");
810     break;
811    case CONVERGED_GTOL:
812     s << (whoami ": converged via gtol\n");
813     break;
814    case TOO_MANY_ITERATIONS:
815     s << (whoami ": too many iterations\n");
816     break;
817    case FAILED_FTOL_TOO_SMALL:
818     s << (whoami ": ftol is too small. no further reduction in the sum of squares is possible.\n");
819     break;
820    case FAILED_XTOL_TOO_SMALL:
821     s << (whoami ": xtol is too small. no further improvement in the approximate solution x is possible.\n");
822     break;
823    case FAILED_GTOL_TOO_SMALL:
824     s << (whoami ": gtol is too small. f(a,b) is orthogonal to the columns of the jacobian to machine precision.\n");
825     break;
826    default:
827     s << (whoami ": OIOIOI: unkown info code from lmder.\n");
828     break;
829   }
830   unsigned int num_e = f_->number_of_e();
831   s << whoami ": " << num_iterations_ << " iterations, "
832     << num_evaluations_ << " evaluations, "<< num_e <<" residuals.  RMS error start/end "
833     << get_start_error() << '/' << get_end_error() << std::endl;
834 #undef whoami
835 }
836 
837 
get_JtJ()838 vnl_matrix<double> const& vnl_sparse_lm::get_JtJ()
839 {
840   return inv_covar_;
841 }
842