1 /*=========================================================================
2  *
3  *  Copyright Insight Software Consortium
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at
8  *
9  *         http://www.apache.org/licenses/LICENSE-2.0.txt
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  *=========================================================================*/
18 #ifndef LSQR_lsmr_h
19 #define LSQR_lsmr_h
20 
21 #include <iosfwd>
22 
23 /** \class lsmrBase
24  *
25  *  \brief LSMR solves Ax = b or min ||Ax - b|| with or without damping,
26  * using the iterative algorithm of David Fong and Michael Saunders:
27  *     http://www.stanford.edu/group/SOL/software/lsmr.html
28  *
29  * The original fortran code is maintained by
30  *     David Fong       <clfong@stanford.edu>
31  *     Michael Saunders <saunders@stanford.edu>
32  *     Systems Optimization Laboratory (SOL)
33  *     Stanford University
34  *     Stanford, CA 94305-4026, USA
35  *
36  * 17 Jul 2010: F90 LSMR derived from F90 LSQR and lsqr.m.
37  * 07 Sep 2010: Local reorthogonalization now works (localSize > 0).
38  *
39  *
40  * LSMR  finds a solution x to the following problems:
41  *
42  * 1. Unsymmetric equations:    Solve  A*x = b
43  *
44  * 2. Linear least squares:     Solve  A*x = b
45  *                              in the least-squares sense
46  *
47  * 3. Damped least squares:     Solve  (   A    )*x = ( b )
48  *                                     ( damp*I )     ( 0 )
49  *                              in the least-squares sense
50  *
51  * where A is a matrix with m rows and n columns, b is an m-vector,
52  * and damp is a scalar.  (All quantities are real.)
53  * The matrix A is treated as a linear operator.  It is accessed
54  * by means of subroutine calls with the following purpose:
55  *
56  * call Aprod1(m,n,x,y)  must compute y = y + A*x  without altering x.
57  * call Aprod2(m,n,x,y)  must compute x = x + A'*y without altering y.
58  *
59  * LSMR uses an iterative method to approximate the solution.
60  * The number of iterations required to reach a certain accuracy
61  * depends strongly on the scaling of the problem.  Poor scaling of
62  * the rows or columns of A should therefore be avoided where
63  * possible.
64  *
65  * For example, in problem 1 the solution is unaltered by
66  * row-scaling.  If a row of A is very small or large compared to
67  * the other rows of A, the corresponding row of ( A  b ) should be
68  * scaled up or down.
69  *
70  * In problems 1 and 2, the solution x is easily recovered
71  * following column-scaling.  Unless better information is known,
72  * the nonzero columns of A should be scaled so that they all have
73  * the same Euclidean norm (e.g., 1.0).
74  *
75  * In problem 3, there is no freedom to re-scale if damp is
76  * nonzero.  However, the value of damp should be assigned only
77  * after attention has been paid to the scaling of A.
78  *
79  * The parameter damp is intended to help regularize
80  * ill-conditioned systems, by preventing the true solution from
81  * being very large.  Another aid to regularization is provided by
82  * the parameter condA, which may be used to terminate iterations
83  * before the computed solution becomes very large.
84  *
85  * Note that x is not an input parameter.
86  * If some initial estimate x0 is known and if damp = 0,
87  * one could proceed as follows:
88  *
89  * 1. Compute a residual vector     r0 = b - A*x0.
90  * 2. Use LSMR to solve the system  A*dx = r0.
91  * 3. Add the correction dx to obtain a final solution x = x0 + dx.
92  *
93  * This requires that x0 be available before and after the call
94  * to LSMR.  To judge the benefits, suppose LSMR takes k1 iterations
95  * to solve A*x = b and k2 iterations to solve A*dx = r0.
96  * If x0 is "good", norm(r0) will be smaller than norm(b).
97  * If the same stopping tolerances atol and btol are used for each
98  * system, k1 and k2 will be similar, but the final solution x0 + dx
99  * should be more accurate.  The only way to reduce the total work
100  * is to use a larger stopping tolerance for the second system.
101  * If some value btol is suitable for A*x = b, the larger value
102  * btol*norm(b)/norm(r0)  should be suitable for A*dx = r0.
103  *
104  * Preconditioning is another way to reduce the number of iterations.
105  * If it is possible to solve a related system M*x = b efficiently,
106  * where M approximates A in some helpful way
107  * (e.g. M - A has low rank or its elements are small relative to
108  * those of A), LSMR may converge more rapidly on the system
109  *       A*M(inverse)*z = b,
110  * after which x can be recovered by solving M*x = z.
111  *
112  * NOTE: If A is symmetric, LSMR should not be used*
113  * Alternatives are the symmetric conjugate-gradient method (CG)
114  * and/or SYMMLQ.
115  * SYMMLQ is an implementation of symmetric CG that applies to
116  * any symmetric A and will converge more rapidly than LSMR.
117  * If A is positive definite, there are other implementations of
118  * symmetric CG that require slightly less work per iteration
119  * than SYMMLQ (but will take the same number of iterations).
120  *
121  *
122  * Notation
123  * --------
124  * The following quantities are used in discussing the subroutine
125  * parameters:
126  *
127  * Abar   =  (  A   ),        bbar  =  (b)
128  *           (damp*I)                  (0)
129  *
130  * r      =  b - A*x,         rbar  =  bbar - Abar*x
131  *
132  * normr  =  sqrt( norm(r)**2  +  damp**2 * norm(x)**2 )
133  *        =  norm( rbar )
134  *
135  * eps    =  the relative precision of floating-point arithmetic.
136  *           On most machines, eps is about 1.0e-7 and 1.0e-16
137  *           in single and double precision respectively.
138  *           We expect eps to be about 1e-16 always.
139  *
140  * LSMR  minimizes the function normr with respect to x.
141  *
142  *
143  * Parameters
144  * ----------
145  * m       input      m, the number of rows in A.
146  *
147  * n       input      n, the number of columns in A.
148  *
149  * Aprod1, Aprod2     See above.
150  *
151  * damp    input      The damping parameter for problem 3 above.
152  *                    (damp should be 0.0 for problems 1 and 2.)
153  *                    If the system A*x = b is incompatible, values
154  *                    of damp in the range 0 to sqrt(eps)*norm(A)
155  *                    will probably have a negligible effect.
156  *                    Larger values of damp will tend to decrease
157  *                    the norm of x and reduce the number of
158  *                    iterations required by LSMR.
159  *
160  *                    The work per iteration and the storage needed
161  *                    by LSMR are the same for all values of damp.
162  *
163  * b(m)    input      The rhs vector b.
164  *
165  * x(n)    output     Returns the computed solution x.
166  *
167  * atol    input      An estimate of the relative error in the data
168  *                    defining the matrix A.  For example, if A is
169  *                    accurate to about 6 digits, set atol = 1.0e-6.
170  *
171  * btol    input      An estimate of the relative error in the data
172  *                    defining the rhs b.  For example, if b is
173  *                    accurate to about 6 digits, set btol = 1.0e-6.
174  *
175  * conlim  input      An upper limit on cond(Abar), the apparent
176  *                    condition number of the matrix Abar.
177  *                    Iterations will be terminated if a computed
178  *                    estimate of cond(Abar) exceeds conlim.
179  *                    This is intended to prevent certain small or
180  *                    zero singular values of A or Abar from
181  *                    coming into effect and causing unwanted growth
182  *                    in the computed solution.
183  *
184  *                    conlim and damp may be used separately or
185  *                    together to regularize ill-conditioned systems.
186  *
187  *                    Normally, conlim should be in the range
188  *                    1000 to 1/eps.
189  *                    Suggested value:
190  *                    conlim = 1/(100*eps)  for compatible systems,
191  *                    conlim = 1/(10*sqrt(eps)) for least squares.
192  *
193  *         Note: Any or all of atol, btol, conlim may be set to zero.
194  *         The effect will be the same as the values eps, eps, 1/eps.
195  *
196  * itnlim  input      An upper limit on the number of iterations.
197  *                    Suggested value:
198  *                    itnlim = n/2   for well-conditioned systems
199  *                                   with clustered singular values,
200  *                    itnlim = 4*n   otherwise.
201  *
202  * localSize input    No. of vectors for local reorthogonalization.
203  *            0       No reorthogonalization is performed.
204  *           >0       This many n-vectors "v" (the most recent ones)
205  *                    are saved for reorthogonalizing the next v.
206  *                    localSize need not be more than min(m,n).
207  *                    At most min(m,n) vectors will be allocated.
208  *
209  * nout    input      File number for printed output.  If positive,
210  *                    a summary will be printed on file nout.
211  *
212  * istop   output     An integer giving the reason for termination:
213  *
214  *            0       x = 0  is the exact solution.
215  *                    No iterations were performed.
216  *
217  *            1       The equations A*x = b are probably compatible.
218  *                    Norm(A*x - b) is sufficiently small, given the
219  *                    values of atol and btol.
220  *
221  *            2       damp is zero.  The system A*x = b is probably
222  *                    not compatible.  A least-squares solution has
223  *                    been obtained that is sufficiently accurate,
224  *                    given the value of atol.
225  *
226  *            3       damp is nonzero.  A damped least-squares
227  *                    solution has been obtained that is sufficiently
228  *                    accurate, given the value of atol.
229  *
230  *            4       An estimate of cond(Abar) has exceeded conlim.
231  *                    The system A*x = b appears to be ill-conditioned,
232  *                    or there could be an error in Aprod1 or Aprod2.
233  *
234  *            5       The iteration limit itnlim was reached.
235  *
236  * itn     output     The number of iterations performed.
237  *
238  * normA   output     An estimate of the Frobenius norm of Abar.
239  *                    This is the square-root of the sum of squares
240  *                    of the elements of Abar.
241  *                    If damp is small and the columns of A
242  *                    have all been scaled to have length 1.0,
243  *                    normA should increase to roughly sqrt(n).
244  *                    A radically different value for normA may
245  *                    indicate an error in Aprod1 or Aprod2.
246  *
247  * condA   output     An estimate of cond(Abar), the condition
248  *                    number of Abar.  A very high value of condA
249  *                    may again indicate an error in Aprod1 or Aprod2.
250  *
251  * normr   output     An estimate of the final value of norm(rbar),
252  *                    the function being minimized (see notation
253  *                    above).  This will be small if A*x = b has
254  *                    a solution.
255  *
256  * normAr  output     An estimate of the final value of
257  *                    norm( Abar'*rbar ), the norm of
258  *                    the residual for the normal equations.
259  *                    This should be small in all cases.  (normAr
260  *                    will often be smaller than the true value
261  *                    computed from the output vector x.)
262  *
263  * normx   output     An estimate of norm(x) for the final solution x.
264  *
265  * Precision
266  * ---------
267  * The number of iterations required by LSMR will decrease
268  * if the computation is performed in higher precision.
269  * At least 15-digit arithmetic should normally be used.
270  * "real(dp)" declarations should normally be 8-byte words.
271  * If this ever changes, the BLAS routines  dnrm2, dscal
272  * (Lawson, et al., 1979) will also need to be changed.
273  *
274  *
275  * Reference
276  * ---------
277  * http://www.stanford.edu/group/SOL/software/lsmr.html
278  * ------------------------------------------------------------------
279  *
280  * LSMR development:
281  * 21 Sep 2007: Fortran 90 version of LSQR implemented.
282  *              Aprod1, Aprod2 implemented via f90 interface.
283  * 17 Jul 2010: LSMR derived from LSQR and lsmr.m.
284  * 07 Sep 2010: Local reorthogonalization now working.
285  *-------------------------------------------------------------------
286  */
287 class lsmrBase
288 {
289 public:
290 
291   lsmrBase();
292   virtual ~lsmrBase();
293 
294   /**
295    * computes y = y + A*x without altering x,
296    * where A is a matrix of dimensions A[m][n].
297    * The size of the vector x is n.
298    * The size of the vector y is m.
299    */
300   virtual void Aprod1(unsigned int m, unsigned int n, const double * x, double * y ) const = 0;
301 
302   /**
303    * computes x = x + A'*y without altering y,
304    * where A is a matrix of dimensions A[m][n].
305    * The size of the vector x is n.
306    * The size of the vector y is m.
307    */
308   virtual void Aprod2(unsigned int m, unsigned int n, double * x, const double * y ) const = 0;
309 
310   /**
311    * returns sqrt( a**2 + b**2 )
312    * with precautions to avoid overflow.
313    */
314   double D2Norm( double a, double b ) const;
315 
316   /**
317    * returns sqrt( x' * x )
318    * with precautions to avoid overflow.
319    */
320   double Dnrm2( unsigned int n, const double *x ) const;
321 
322   /**
323    * Scale a vector by multiplying with a constant
324    */
325   void Scale( unsigned int n, double factor, double *x ) const;
326 
327   /**  No. of vectors for local reorthogonalization.
328    * n=0  No reorthogonalization is performed.
329    * n>0  This many n-vectors "v" (the most recent ones)
330    *      are saved for reorthogonalizing the next v.
331    *      localSize need not be more than min(m,n).
332    *      At most min(m,n) vectors will be allocated.
333    */
334   void SetLocalSize( unsigned int n );
335 
336   /** An estimate of the relative error in the data
337    *  defining the matrix A.  For example, if A is
338    *  accurate to about 6 digits, set atol = 1.0e-6.
339    */
340   void SetToleranceA( double );
341 
342   /** An estimate of the relative error in the data
343    *  defining the rhs b.  For example, if b is
344    *  accurate to about 6 digits, set btol = 1.0e-6.
345    */
346   void SetToleranceB( double );
347 
348   /** An upper limit on cond(Abar), the apparent
349    *  condition number of the matrix Abar.
350    *  Iterations will be terminated if a computed
351    *  estimate of cond(Abar) exceeds conlim.
352    *  This is intended to prevent certain small or
353    *  zero singular values of A or Abar from
354    *  coming into effect and causing unwanted growth
355    *  in the computed solution.
356    *
357    *  conlim and damp may be used separately or
358    *  together to regularize ill-conditioned systems.
359    *
360    *  Normally, conlim should be in the range
361    *  1000 to 1/eps.
362    *  Suggested value:
363    *  conlim = 1/(100*eps)  for compatible systems,
364    *  conlim = 1/(10*sqrt(eps)) for least squares.
365    *
366    * Note: Any or all of atol, btol, conlim may be set to zero.
367    * The effect will be the same as the values eps, eps, 1/eps.
368    *
369    */
370   void SetUpperLimitOnConditional( double );
371 
372   /**  the relative precision of floating-point arithmetic.
373    *   On most machines, eps is about 1.0e-7 and 1.0e-16
374    *   in single and double precision respectively.
375    *   We expect eps to be about 1e-16 always.
376    */
377   void SetEpsilon( double );
378 
379   /**
380    *   The damping parameter for problem 3 above.
381    *   (damp should be 0.0 for problems 1 and 2.)
382    *   If the system A*x = b is incompatible, values
383    *   of damp in the range 0 to sqrt(eps)*norm(A)
384    *   will probably have a negligible effect.
385    *   Larger values of damp will tend to decrease
386    *   the norm of x and reduce the number of
387    *   iterations required by LSMR.
388    *
389    *   The work per iteration and the storage needed
390    *   by LSMR are the same for all values of damp.
391    *
392    */
393   void SetDamp( double );
394 
395   /**  An upper limit on the number of iterations.
396    *   Suggested value:
397    *   itnlim = n/2   for well-conditioned systems
398    *                  with clustered singular values,
399    *   itnlim = 4*n   otherwise.
400    */
401   void SetMaximumNumberOfIterations( unsigned int );
402 
403   /**
404    * If provided, a summary will be printed out to this stream during
405    * the execution of the Solve function.
406    */
407   void SetOutputStream( std::ostream & os );
408 
409   /**
410    *   Returns an integer giving the reason for termination:
411    *
412    *     0       x = 0  is the exact solution.
413    *             No iterations were performed.
414    *
415    *     1       The equations A*x = b are probably compatible.
416    *             Norm(A*x - b) is sufficiently small, given the
417    *             values of atol and btol.
418    *
419    *     2       damp is zero.  The system A*x = b is probably
420    *             not compatible.  A least-squares solution has
421    *             been obtained that is sufficiently accurate,
422    *             given the value of atol.
423    *
424    *     3       damp is nonzero.  A damped least-squares
425    *             solution has been obtained that is sufficiently
426    *             accurate, given the value of atol.
427    *
428    *     4       An estimate of cond(Abar) has exceeded conlim.
429    *             The system A*x = b appears to be ill-conditioned,
430    *             or there could be an error in Aprod1 or Aprod2.
431    *
432    *     5       The iteration limit itnlim was reached.
433    *
434    */
435   unsigned int GetStoppingReason() const;
436 
437 
438   /** Returns the actual number of iterations performed. */
439   unsigned int GetNumberOfIterationsPerformed() const;
440 
441 
442   /**
443    *   An estimate of the Frobenius norm of Abar.
444    *   This is the square-root of the sum of squares
445    *   of the elements of Abar.
446    *   If damp is small and the columns of A
447    *   have all been scaled to have length 1.0,
448    *   Anorm should increase to roughly sqrt(n).
449    *   A radically different value for Anorm may
450    *   indicate an error in Aprod1 or Aprod2.
451    */
452   double GetFrobeniusNormEstimateOfAbar() const;
453 
454 
455   /**
456    *   An estimate of cond(Abar), the condition
457    *   number of Abar.  A very high value of Acond
458    *   may again indicate an error in Aprod1 or Aprod2.
459    */
460   double GetConditionNumberEstimateOfAbar() const;
461 
462 
463   /** An estimate of the final value of norm(rbar),
464    *  the function being minimized (see notation
465    *  above).  This will be small if A*x = b has
466    *  a solution.
467    */
468   double GetFinalEstimateOfNormRbar() const;
469 
470 
471   /** An estimate of the final value of
472    *  norm( Abar(transpose)*rbar ), the norm of
473    *  the residual for the normal equations.
474    *  This should be small in all cases.  (Arnorm
475    *  will often be smaller than the true value
476    *  computed from the output vector x.)
477    */
478   double GetFinalEstimateOfNormOfResiduals() const;
479 
480 
481   /**
482    * An estimate of norm(x) for the final solution x.
483    */
484   double GetFinalEstimateOfNormOfX() const;
485 
486 
487   /**
488    *    Execute the solver
489    *
490    *    solves Ax = b or min ||Ax - b|| with or without damping,
491    *
492    *    m is the size of the input  vector b
493    *    n is the size of the output vector x
494    */
495   void Solve( unsigned int m, unsigned int n, const double * b, double * x );
496 
497 private:
498 
499   void TerminationPrintOut();
500 
501   double normA;
502   double condA;
503   double normb;
504   double normr;
505   double normAr;
506   double normx;
507   double dxmax;
508 
509   double atol;
510   double btol;
511   double conlim;
512 
513   double eps;
514   double damp;
515   bool   damped;
516 
517   unsigned int itnlim;
518   unsigned int itn;
519 
520   unsigned int istop;
521 
522   unsigned int maxdx;
523   unsigned int localSize;
524   std::ostream * nout;
525 };
526 
527 #endif
528