1 // @HEADER
2 // ************************************************************************
3 //
4 //               Rapid Optimization Library (ROL) Package
5 //                 Copyright (2014) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact lead developers:
38 //              Drew Kouri   (dpkouri@sandia.gov) and
39 //              Denis Ridzal (dridzal@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
44 #ifndef ROL_LINESEARCHSTEP_H
45 #define ROL_LINESEARCHSTEP_H
46 
47 #include "ROL_Types.hpp"
48 #include "ROL_Step.hpp"
49 #include "ROL_LineSearch.hpp"
50 
51 // Unconstrained Methods
52 #include "ROL_GradientStep.hpp"
53 #include "ROL_NonlinearCGStep.hpp"
54 #include "ROL_SecantStep.hpp"
55 #include "ROL_NewtonStep.hpp"
56 #include "ROL_NewtonKrylovStep.hpp"
57 
58 // Bound Constrained Methods
59 #include "ROL_ProjectedSecantStep.hpp"
60 #include "ROL_ProjectedNewtonStep.hpp"
61 #include "ROL_ProjectedNewtonKrylovStep.hpp"
62 
63 #include <sstream>
64 #include <iomanip>
65 
66 /** @ingroup step_group
67     \class ROL::LineSearchStep
68     \brief Provides the interface to compute optimization steps
69            with line search.
70 
71     Suppose \f$\mathcal{X}\f$ is a Hilbert space of
72     functions mapping \f$\Xi\f$ to \f$\mathbb{R}\f$.  For example,
73     \f$\Xi\subset\mathbb{R}^n\f$ and \f$\mathcal{X}=L^2(\Xi)\f$ or
74     \f$\Xi = \{1,\ldots,n\}\f$ and \f$\mathcal{X}=\mathbb{R}^n\f$. We
75     assume \f$f:\mathcal{X}\to\mathbb{R}\f$ is twice-continuously Fr&eacute;chet
76     differentiable and \f$a,\,b\in\mathcal{X}\f$ with \f$a\le b\f$ almost
77     everywhere in \f$\Xi\f$.  Note that these line-search algorithms will also work
78     with secant approximations of the Hessian.
79     This step applies to unconstrained and bound constrained optimization problems,
80     \f[
81         \min_x\quad f(x) \qquad\text{and}\qquad \min_x\quad f(x)\quad\text{s.t.}\quad a\le x\le b,
82     \f]
83     respectively.
84 
85     For unconstrained problems, given the \f$k\f$-th iterate \f$x_k\f$ and a descent direction
86     \f$s_k\f$, the line search approximately minimizes the 1D objective function
87     \f$\phi_k(t) = f(x_k + t s_k)\f$.  The approximate minimizer \f$t\f$ must satisfy
88     sufficient decrease and curvature conditions into guarantee global convergence.  The
89     sufficient decrease condition (often called the Armijo condition) is
90     \f[
91        \phi_k(t) \le \phi_k(0) + c_1 t \phi_k'(0) \qquad\iff\qquad
92        f(x_k+ts_k) \le f(x_k) + c_1 t \langle \nabla f(x_k),s_k\rangle_{\mathcal{X}}
93     \f]
94     where \f$0 < c_1 < 1\f$.  The curvature conditions implemented in ROL include:
95 
96     <CENTER>
97     | Name              | Condition                                                     | Parameters |
98     | :---------------- | :-----------------------------------------------------------: | :---------------------: |
99     | Wolfe             | \f$\phi_k'(t) \ge c_2\phi_k'(0)\f$                            | \f$c_1<c_2<1\f$         |
100     | Strong Wolfe      | \f$\left|\phi_k'(t)\right| \le c_2 \left|\phi_k'(0)\right|\f$ | \f$c_1<c_2<1\f$         |
101     | Generalized Wolfe | \f$c_2\phi_k'(0)\le \phi_k'(t)\le-c_3\phi_k'(0)\f$            | \f$0<c_3<1\f$           |
102     | Approximate Wolfe | \f$c_2\phi_k'(0)\le \phi_k'(t)\le(2c_1-1)\phi_k'(0)\f$        | \f$c_1<c_2<1\f$         |
103     | Goldstein         | \f$\phi_k(0)+(1-c_1)t\phi_k'(0)\le \phi_k(t)\f$               | \f$0<c_1<\frac{1}{2}\f$ |
104     </CENTER>
105 
106     Note that \f$\phi_k'(t) = \langle \nabla f(x_k+ts_k),s_k\rangle_{\mathcal{X}}\f$.
107 
108     For bound constrained problems, the line-search step performs a projected search.  That is,
109     the line search approximately minimizes the 1D objective function
110     \f$\phi_k(t) = f(P_{[a,b]}(x_k+ts_k))\f$ where \f$P_{[a,b]}\f$ denotes the projection onto
111     the upper and lower bounds.  Such line-search algorithms correspond to projected gradient
112     and projected Newton-type algorithms.
113 
114     For projected methods, we require the notion of an active set of an iterate \f$x_k\f$,
115     \f[
116        \mathcal{A}_k = \{\, \xi\in\Xi\,:\,x_k(\xi) = a(\xi)\,\}\cap
117                        \{\, \xi\in\Xi\,:\,x_k(\xi) = b(\xi)\,\}.
118     \f]
119     Given \f$\mathcal{A}_k\f$ and a search direction \f$s_k\f$, we define the binding set as
120     \f[
121        \mathcal{B}_k = \{\, \xi\in\Xi\,:\,x_k(\xi) = a(\xi) \;\text{and}\; s_k(\xi) < 0 \,\}\cap
122                        \{\, \xi\in\Xi\,:\,x_k(\xi) = b(\xi) \;\text{and}\; s_k(\xi) > 0 \,\}.
123     \f]
124     The binding set contains the values of \f$\xi\in\Xi\f$ such that if \f$x_k(\xi)\f$ is on a
125     bound, then \f$(x_k+s_k)(\xi)\f$ will violate bound.  Using these definitions, we can
126     redefine the sufficient decrease and curvature conditions from the unconstrained case to
127     the case of bound constraints.
128 
129     LineSearchStep implements a number of algorithms for both bound constrained and unconstrained
130     optimization.  These algorithms are: Steepest descent; Nonlinear CG; Quasi-Newton methods;
131     Inexact Newton methods; Newton's method. These methods are chosen through the EDescent enum.
132 */
133 
134 
135 namespace ROL {
136 
137 template <class Real>
138 class LineSearchStep : public Step<Real> {
139 private:
140 
141   ROL::Ptr<Step<Real> >        desc_;       ///< Unglobalized step object
142   ROL::Ptr<Secant<Real> >      secant_;     ///< Secant object (used for quasi-Newton)
143   ROL::Ptr<Krylov<Real> >      krylov_;     ///< Krylov solver object (used for inexact Newton)
144   ROL::Ptr<NonlinearCG<Real> > nlcg_;       ///< Nonlinear CG object (used for nonlinear CG)
145   ROL::Ptr<LineSearch<Real> >  lineSearch_; ///< Line-search object
146 
147   ROL::Ptr<Vector<Real> > d_;
148 
149   ELineSearch         els_;   ///< enum determines type of line search
150   ECurvatureCondition econd_; ///< enum determines type of curvature condition
151 
152   bool acceptLastAlpha_;  ///< For backwards compatibility. When max function evaluations are reached take last step
153 
154   bool usePreviousAlpha_; ///< If true, use the previously accepted step length (if any) as the new initial step length
155 
156   int verbosity_;
157   bool computeObj_;
158   Real fval_;
159 
160   ROL::ParameterList parlist_;
161 
162   std::string lineSearchName_;
163 
GradDotStep(const Vector<Real> & g,const Vector<Real> & s,const Vector<Real> & x,BoundConstraint<Real> & bnd,Real eps=0)164   Real GradDotStep(const Vector<Real> &g, const Vector<Real> &s,
165                    const Vector<Real> &x,
166                    BoundConstraint<Real> &bnd, Real eps = 0) {
167     Real gs(0), one(1);
168     if (!bnd.isActivated()) {
169       gs = s.dot(g.dual());
170     }
171     else {
172       d_->set(s);
173       bnd.pruneActive(*d_,g,x,eps);
174       gs = d_->dot(g.dual());
175       d_->set(x);
176       d_->axpy(-one,g.dual());
177       bnd.project(*d_);
178       d_->scale(-one);
179       d_->plus(x);
180       bnd.pruneInactive(*d_,g,x,eps);
181       gs -= d_->dot(g.dual());
182     }
183     return gs;
184   }
185 
186 public:
187 
188   using Step<Real>::initialize;
189   using Step<Real>::compute;
190   using Step<Real>::update;
191 
192   /** \brief Constructor.
193 
194       Standard constructor to build a LineSearchStep object.  Algorithmic
195       specifications are passed in through a ROL::ParameterList.  The
196       line-search type, secant type, Krylov type, or nonlinear CG type can
197       be set using user-defined objects.
198 
199       @param[in]     parlist    is a parameter list containing algorithmic specifications
200       @param[in]     lineSearch is a user-defined line search object
201       @param[in]     secant     is a user-defined secant object
202       @param[in]     krylov     is a user-defined Krylov object
203       @param[in]     nlcg       is a user-defined Nonlinear CG object
204   */
LineSearchStep(ROL::ParameterList & parlist,const ROL::Ptr<LineSearch<Real>> & lineSearch=ROL::nullPtr,const ROL::Ptr<Secant<Real>> & secant=ROL::nullPtr,const ROL::Ptr<Krylov<Real>> & krylov=ROL::nullPtr,const ROL::Ptr<NonlinearCG<Real>> & nlcg=ROL::nullPtr)205   LineSearchStep( ROL::ParameterList &parlist,
206                   const ROL::Ptr<LineSearch<Real> > &lineSearch = ROL::nullPtr,
207                   const ROL::Ptr<Secant<Real> > &secant = ROL::nullPtr,
208                   const ROL::Ptr<Krylov<Real> > &krylov = ROL::nullPtr,
209                   const ROL::Ptr<NonlinearCG<Real> > &nlcg = ROL::nullPtr )
210     : Step<Real>(), desc_(ROL::nullPtr), secant_(secant),
211       krylov_(krylov), nlcg_(nlcg), lineSearch_(lineSearch),
212       els_(LINESEARCH_USERDEFINED), econd_(CURVATURECONDITION_WOLFE),
213       verbosity_(0), computeObj_(true), fval_(0), parlist_(parlist) {
214     // Parse parameter list
215     ROL::ParameterList& Llist = parlist.sublist("Step").sublist("Line Search");
216     ROL::ParameterList& Glist = parlist.sublist("General");
217     econd_ = StringToECurvatureCondition(Llist.sublist("Curvature Condition").get("Type","Strong Wolfe Conditions") );
218     acceptLastAlpha_ = Llist.get("Accept Last Alpha", false);
219     verbosity_ = Glist.get("Print Verbosity",0);
220     computeObj_ = Glist.get("Recompute Objective Function",false);
221     // Initialize Line Search
222     if (lineSearch_ == ROL::nullPtr) {
223       lineSearchName_ = Llist.sublist("Line-Search Method").get("Type","Cubic Interpolation");
224       els_ = StringToELineSearch(lineSearchName_);
225       lineSearch_ = LineSearchFactory<Real>(parlist);
226     }
227     else { // User-defined linesearch provided
228       lineSearchName_ = Llist.sublist("Line-Search Method").get("User Defined Line-Search Name",
229                                                                 "Unspecified User Defined Line-Search");
230     }
231 
232   }
233 
initialize(Vector<Real> & x,const Vector<Real> & s,const Vector<Real> & g,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)234   void initialize( Vector<Real> &x, const Vector<Real> &s, const Vector<Real> &g,
235                    Objective<Real> &obj, BoundConstraint<Real> &bnd,
236                    AlgorithmState<Real> &algo_state ) {
237     d_ = x.clone();
238 
239     // Initialize unglobalized step
240     ROL::ParameterList& list
241       = parlist_.sublist("Step").sublist("Line Search").sublist("Descent Method");
242     EDescent edesc = StringToEDescent(list.get("Type","Quasi-Newton Method"));
243     if (bnd.isActivated()) {
244       switch(edesc) {
245         case DESCENT_STEEPEST: {
246           desc_ = ROL::makePtr<GradientStep<Real>>(parlist_,computeObj_);
247           break;
248         }
249         case DESCENT_NONLINEARCG: {
250           desc_ = ROL::makePtr<NonlinearCGStep<Real>>(parlist_,nlcg_,computeObj_);
251           break;
252         }
253         case DESCENT_SECANT: {
254           desc_ = ROL::makePtr<ProjectedSecantStep<Real>>(parlist_,secant_,computeObj_);
255           break;
256         }
257         case DESCENT_NEWTON: {
258           desc_ = ROL::makePtr<ProjectedNewtonStep<Real>>(parlist_,computeObj_);
259           break;
260         }
261         case DESCENT_NEWTONKRYLOV: {
262           desc_ = ROL::makePtr<ProjectedNewtonKrylovStep<Real>>(parlist_,krylov_,secant_,computeObj_);
263           break;
264         }
265         default:
266           ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument,
267             ">>> (LineSearchStep::Initialize): Undefined descent type!");
268       }
269     }
270     else {
271       switch(edesc) {
272         case DESCENT_STEEPEST: {
273           desc_ = ROL::makePtr<GradientStep<Real>>(parlist_,computeObj_);
274           break;
275         }
276         case DESCENT_NONLINEARCG: {
277           desc_ = ROL::makePtr<NonlinearCGStep<Real>>(parlist_,nlcg_,computeObj_);
278           break;
279         }
280         case DESCENT_SECANT: {
281           desc_ = ROL::makePtr<SecantStep<Real>>(parlist_,secant_,computeObj_);
282           break;
283         }
284         case DESCENT_NEWTON: {
285           desc_ = ROL::makePtr<NewtonStep<Real>>(parlist_,computeObj_);
286           break;
287         }
288         case DESCENT_NEWTONKRYLOV: {
289           desc_ = ROL::makePtr<NewtonKrylovStep<Real>>(parlist_,krylov_,secant_,computeObj_);
290           break;
291         }
292         default:
293           ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument,
294             ">>> (LineSearchStep::Initialize): Undefined descent type!");
295       }
296     }
297     desc_->initialize(x,s,g,obj,bnd,algo_state);
298 
299     // Initialize line search
300     lineSearch_->initialize(x,s,g,obj,bnd);
301     //const ROL::Ptr<const StepState<Real> > desc_state = desc_->getStepState();
302     //lineSearch_->initialize(x,s,*(desc_state->gradientVec),obj,bnd);
303   }
304 
305   /** \brief Compute step.
306 
307       Computes a trial step, \f$s_k\f$ as defined by the enum EDescent.  Once the
308       trial step is determined, this function determines an approximate minimizer
309       of the 1D function \f$\phi_k(t) = f(x_k+ts_k)\f$.  This approximate
310       minimizer must satisfy sufficient decrease and curvature conditions.
311 
312       @param[out]      s          is the computed trial step
313       @param[in]       x          is the current iterate
314       @param[in]       obj        is the objective function
315       @param[in]       bnd        are the bound constraints
316       @param[in]       algo_state contains the current state of the algorithm
317   */
compute(Vector<Real> & s,const Vector<Real> & x,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)318   void compute( Vector<Real> &s, const Vector<Real> &x,
319                 Objective<Real> &obj, BoundConstraint<Real> &bnd,
320                 AlgorithmState<Real> &algo_state ) {
321     Real zero(0), one(1);
322     // Compute unglobalized step
323     desc_->compute(s,x,obj,bnd,algo_state);
324 
325     // Ensure that s is a descent direction
326     // ---> If not, then default to steepest descent
327     const ROL::Ptr<const StepState<Real> > desc_state = desc_->getStepState();
328     Real gs = GradDotStep(*(desc_state->gradientVec),s,x,bnd,algo_state.gnorm);
329     if (gs >= zero) {
330       s.set((desc_state->gradientVec)->dual());
331       s.scale(-one);
332       gs = GradDotStep(*(desc_state->gradientVec),s,x,bnd,algo_state.gnorm);
333     }
334 
335     // Perform line search
336     ROL::Ptr<StepState<Real> > step_state = Step<Real>::getState();
337     fval_ = algo_state.value;
338     step_state->nfval = 0; step_state->ngrad = 0;
339     lineSearch_->setData(algo_state.gnorm,*(desc_state->gradientVec));
340     lineSearch_->run(step_state->searchSize,fval_,step_state->nfval,step_state->ngrad,gs,s,x,obj,bnd);
341 
342     // Make correction if maximum function evaluations reached
343     if(!acceptLastAlpha_) {
344       lineSearch_->setMaxitUpdate(step_state->searchSize,fval_,algo_state.value);
345     }
346 
347     // Compute scaled descent direction
348     s.scale(step_state->searchSize);
349     if ( bnd.isActivated() ) {
350       s.plus(x);
351       bnd.project(s);
352       s.axpy(static_cast<Real>(-1),x);
353     }
354   }
355 
356   /** \brief Update step, if successful.
357 
358       Given a trial step, \f$s_k\f$, this function updates \f$x_{k+1}=x_k+s_k\f$.
359       This function also updates the secant approximation.
360 
361       @param[in,out]   x          is the updated iterate
362       @param[in]       s          is the computed trial step
363       @param[in]       obj        is the objective function
364       @param[in]       con        are the bound constraints
365       @param[in]       algo_state contains the current state of the algorithm
366   */
update(Vector<Real> & x,const Vector<Real> & s,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)367   void update( Vector<Real> &x, const Vector<Real> &s,
368                Objective<Real> &obj, BoundConstraint<Real> &bnd,
369                AlgorithmState<Real> &algo_state ) {
370     ROL::Ptr<StepState<Real> > step_state = Step<Real>::getState();
371     algo_state.nfval += step_state->nfval;
372     algo_state.ngrad += step_state->ngrad;
373     desc_->update(x,s,obj,bnd,algo_state);
374     step_state->flag = desc_->getStepState()->flag;
375     step_state->SPiter = desc_->getStepState()->SPiter;
376     step_state->SPflag = desc_->getStepState()->SPflag;
377     if ( !computeObj_ ) {
378       algo_state.value = fval_;
379     }
380   }
381 
382   /** \brief Print iterate header.
383 
384       This function produces a string containing header information.
385   */
printHeader(void) const386   std::string printHeader( void ) const {
387     std::string head = desc_->printHeader();
388     head.erase(std::remove(head.end()-3,head.end(),'\n'), head.end());
389     std::stringstream hist;
390     hist.write(head.c_str(),head.length());
391     hist << std::setw(10) << std::left << "ls_#fval";
392     hist << std::setw(10) << std::left << "ls_#grad";
393     hist << "\n";
394     return hist.str();
395   }
396 
397   /** \brief Print step name.
398 
399       This function produces a string containing the algorithmic step information.
400   */
printName(void) const401   std::string printName( void ) const {
402     std::string name = desc_->printName();
403     std::stringstream hist;
404     hist << name;
405     hist << "Line Search: " << lineSearchName_;
406     hist << " satisfying " << ECurvatureConditionToString(econd_) << "\n";
407     return hist.str();
408   }
409 
410   /** \brief Print iterate status.
411 
412       This function prints the iteration status.
413 
414       @param[in]     algo_state    is the current state of the algorithm
415       @param[in]     printHeader   if set to true will print the header at each iteration
416   */
print(AlgorithmState<Real> & algo_state,bool print_header=false) const417   std::string print( AlgorithmState<Real> & algo_state, bool print_header = false ) const  {
418     const ROL::Ptr<const StepState<Real> > step_state = Step<Real>::getStepState();
419     std::string desc = desc_->print(algo_state,false);
420     desc.erase(std::remove(desc.end()-3,desc.end(),'\n'), desc.end());
421     std::string name = desc_->printName();
422     size_t pos = desc.find(name);
423     if ( pos != std::string::npos ) {
424       desc.erase(pos, name.length());
425     }
426 
427     std::stringstream hist;
428     if ( algo_state.iter == 0 ) {
429       hist << printName();
430     }
431     if ( print_header ) {
432       hist << printHeader();
433     }
434     hist << desc;
435     if ( algo_state.iter == 0 ) {
436       hist << "\n";
437     }
438     else {
439       hist << std::setw(10) << std::left << step_state->nfval;
440       hist << std::setw(10) << std::left << step_state->ngrad;
441       hist << "\n";
442     }
443     return hist.str();
444   }
445 }; // class LineSearchStep
446 
447 } // namespace ROL
448 
449 #endif
450