1 // @HEADER 2 // ************************************************************************ 3 // 4 // Rapid Optimization Library (ROL) Package 5 // Copyright (2014) Sandia Corporation 6 // 7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 8 // license for use of this work by or on behalf of the U.S. Government. 9 // 10 // Redistribution and use in source and binary forms, with or without 11 // modification, are permitted provided that the following conditions are 12 // met: 13 // 14 // 1. Redistributions of source code must retain the above copyright 15 // notice, this list of conditions and the following disclaimer. 16 // 17 // 2. Redistributions in binary form must reproduce the above copyright 18 // notice, this list of conditions and the following disclaimer in the 19 // documentation and/or other materials provided with the distribution. 20 // 21 // 3. Neither the name of the Corporation nor the names of the 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 // 37 // Questions? Contact lead developers: 38 // Drew Kouri (dpkouri@sandia.gov) and 39 // Denis Ridzal (dridzal@sandia.gov) 40 // 41 // ************************************************************************ 42 // @HEADER 43 44 #ifndef ROL_LINESEARCHSTEP_H 45 #define ROL_LINESEARCHSTEP_H 46 47 #include "ROL_Types.hpp" 48 #include "ROL_Step.hpp" 49 #include "ROL_LineSearch.hpp" 50 51 // Unconstrained Methods 52 #include "ROL_GradientStep.hpp" 53 #include "ROL_NonlinearCGStep.hpp" 54 #include "ROL_SecantStep.hpp" 55 #include "ROL_NewtonStep.hpp" 56 #include "ROL_NewtonKrylovStep.hpp" 57 58 // Bound Constrained Methods 59 #include "ROL_ProjectedSecantStep.hpp" 60 #include "ROL_ProjectedNewtonStep.hpp" 61 #include "ROL_ProjectedNewtonKrylovStep.hpp" 62 63 #include <sstream> 64 #include <iomanip> 65 66 /** @ingroup step_group 67 \class ROL::LineSearchStep 68 \brief Provides the interface to compute optimization steps 69 with line search. 70 71 Suppose \f$\mathcal{X}\f$ is a Hilbert space of 72 functions mapping \f$\Xi\f$ to \f$\mathbb{R}\f$. For example, 73 \f$\Xi\subset\mathbb{R}^n\f$ and \f$\mathcal{X}=L^2(\Xi)\f$ or 74 \f$\Xi = \{1,\ldots,n\}\f$ and \f$\mathcal{X}=\mathbb{R}^n\f$. We 75 assume \f$f:\mathcal{X}\to\mathbb{R}\f$ is twice-continuously Fréchet 76 differentiable and \f$a,\,b\in\mathcal{X}\f$ with \f$a\le b\f$ almost 77 everywhere in \f$\Xi\f$. Note that these line-search algorithms will also work 78 with secant approximations of the Hessian. 79 This step applies to unconstrained and bound constrained optimization problems, 80 \f[ 81 \min_x\quad f(x) \qquad\text{and}\qquad \min_x\quad f(x)\quad\text{s.t.}\quad a\le x\le b, 82 \f] 83 respectively. 84 85 For unconstrained problems, given the \f$k\f$-th iterate \f$x_k\f$ and a descent direction 86 \f$s_k\f$, the line search approximately minimizes the 1D objective function 87 \f$\phi_k(t) = f(x_k + t s_k)\f$. The approximate minimizer \f$t\f$ must satisfy 88 sufficient decrease and curvature conditions into guarantee global convergence. The 89 sufficient decrease condition (often called the Armijo condition) is 90 \f[ 91 \phi_k(t) \le \phi_k(0) + c_1 t \phi_k'(0) \qquad\iff\qquad 92 f(x_k+ts_k) \le f(x_k) + c_1 t \langle \nabla f(x_k),s_k\rangle_{\mathcal{X}} 93 \f] 94 where \f$0 < c_1 < 1\f$. The curvature conditions implemented in ROL include: 95 96 <CENTER> 97 | Name | Condition | Parameters | 98 | :---------------- | :-----------------------------------------------------------: | :---------------------: | 99 | Wolfe | \f$\phi_k'(t) \ge c_2\phi_k'(0)\f$ | \f$c_1<c_2<1\f$ | 100 | Strong Wolfe | \f$\left|\phi_k'(t)\right| \le c_2 \left|\phi_k'(0)\right|\f$ | \f$c_1<c_2<1\f$ | 101 | Generalized Wolfe | \f$c_2\phi_k'(0)\le \phi_k'(t)\le-c_3\phi_k'(0)\f$ | \f$0<c_3<1\f$ | 102 | Approximate Wolfe | \f$c_2\phi_k'(0)\le \phi_k'(t)\le(2c_1-1)\phi_k'(0)\f$ | \f$c_1<c_2<1\f$ | 103 | Goldstein | \f$\phi_k(0)+(1-c_1)t\phi_k'(0)\le \phi_k(t)\f$ | \f$0<c_1<\frac{1}{2}\f$ | 104 </CENTER> 105 106 Note that \f$\phi_k'(t) = \langle \nabla f(x_k+ts_k),s_k\rangle_{\mathcal{X}}\f$. 107 108 For bound constrained problems, the line-search step performs a projected search. That is, 109 the line search approximately minimizes the 1D objective function 110 \f$\phi_k(t) = f(P_{[a,b]}(x_k+ts_k))\f$ where \f$P_{[a,b]}\f$ denotes the projection onto 111 the upper and lower bounds. Such line-search algorithms correspond to projected gradient 112 and projected Newton-type algorithms. 113 114 For projected methods, we require the notion of an active set of an iterate \f$x_k\f$, 115 \f[ 116 \mathcal{A}_k = \{\, \xi\in\Xi\,:\,x_k(\xi) = a(\xi)\,\}\cap 117 \{\, \xi\in\Xi\,:\,x_k(\xi) = b(\xi)\,\}. 118 \f] 119 Given \f$\mathcal{A}_k\f$ and a search direction \f$s_k\f$, we define the binding set as 120 \f[ 121 \mathcal{B}_k = \{\, \xi\in\Xi\,:\,x_k(\xi) = a(\xi) \;\text{and}\; s_k(\xi) < 0 \,\}\cap 122 \{\, \xi\in\Xi\,:\,x_k(\xi) = b(\xi) \;\text{and}\; s_k(\xi) > 0 \,\}. 123 \f] 124 The binding set contains the values of \f$\xi\in\Xi\f$ such that if \f$x_k(\xi)\f$ is on a 125 bound, then \f$(x_k+s_k)(\xi)\f$ will violate bound. Using these definitions, we can 126 redefine the sufficient decrease and curvature conditions from the unconstrained case to 127 the case of bound constraints. 128 129 LineSearchStep implements a number of algorithms for both bound constrained and unconstrained 130 optimization. These algorithms are: Steepest descent; Nonlinear CG; Quasi-Newton methods; 131 Inexact Newton methods; Newton's method. These methods are chosen through the EDescent enum. 132 */ 133 134 135 namespace ROL { 136 137 template <class Real> 138 class LineSearchStep : public Step<Real> { 139 private: 140 141 ROL::Ptr<Step<Real> > desc_; ///< Unglobalized step object 142 ROL::Ptr<Secant<Real> > secant_; ///< Secant object (used for quasi-Newton) 143 ROL::Ptr<Krylov<Real> > krylov_; ///< Krylov solver object (used for inexact Newton) 144 ROL::Ptr<NonlinearCG<Real> > nlcg_; ///< Nonlinear CG object (used for nonlinear CG) 145 ROL::Ptr<LineSearch<Real> > lineSearch_; ///< Line-search object 146 147 ROL::Ptr<Vector<Real> > d_; 148 149 ELineSearch els_; ///< enum determines type of line search 150 ECurvatureCondition econd_; ///< enum determines type of curvature condition 151 152 bool acceptLastAlpha_; ///< For backwards compatibility. When max function evaluations are reached take last step 153 154 bool usePreviousAlpha_; ///< If true, use the previously accepted step length (if any) as the new initial step length 155 156 int verbosity_; 157 bool computeObj_; 158 Real fval_; 159 160 ROL::ParameterList parlist_; 161 162 std::string lineSearchName_; 163 GradDotStep(const Vector<Real> & g,const Vector<Real> & s,const Vector<Real> & x,BoundConstraint<Real> & bnd,Real eps=0)164 Real GradDotStep(const Vector<Real> &g, const Vector<Real> &s, 165 const Vector<Real> &x, 166 BoundConstraint<Real> &bnd, Real eps = 0) { 167 Real gs(0), one(1); 168 if (!bnd.isActivated()) { 169 gs = s.dot(g.dual()); 170 } 171 else { 172 d_->set(s); 173 bnd.pruneActive(*d_,g,x,eps); 174 gs = d_->dot(g.dual()); 175 d_->set(x); 176 d_->axpy(-one,g.dual()); 177 bnd.project(*d_); 178 d_->scale(-one); 179 d_->plus(x); 180 bnd.pruneInactive(*d_,g,x,eps); 181 gs -= d_->dot(g.dual()); 182 } 183 return gs; 184 } 185 186 public: 187 188 using Step<Real>::initialize; 189 using Step<Real>::compute; 190 using Step<Real>::update; 191 192 /** \brief Constructor. 193 194 Standard constructor to build a LineSearchStep object. Algorithmic 195 specifications are passed in through a ROL::ParameterList. The 196 line-search type, secant type, Krylov type, or nonlinear CG type can 197 be set using user-defined objects. 198 199 @param[in] parlist is a parameter list containing algorithmic specifications 200 @param[in] lineSearch is a user-defined line search object 201 @param[in] secant is a user-defined secant object 202 @param[in] krylov is a user-defined Krylov object 203 @param[in] nlcg is a user-defined Nonlinear CG object 204 */ LineSearchStep(ROL::ParameterList & parlist,const ROL::Ptr<LineSearch<Real>> & lineSearch=ROL::nullPtr,const ROL::Ptr<Secant<Real>> & secant=ROL::nullPtr,const ROL::Ptr<Krylov<Real>> & krylov=ROL::nullPtr,const ROL::Ptr<NonlinearCG<Real>> & nlcg=ROL::nullPtr)205 LineSearchStep( ROL::ParameterList &parlist, 206 const ROL::Ptr<LineSearch<Real> > &lineSearch = ROL::nullPtr, 207 const ROL::Ptr<Secant<Real> > &secant = ROL::nullPtr, 208 const ROL::Ptr<Krylov<Real> > &krylov = ROL::nullPtr, 209 const ROL::Ptr<NonlinearCG<Real> > &nlcg = ROL::nullPtr ) 210 : Step<Real>(), desc_(ROL::nullPtr), secant_(secant), 211 krylov_(krylov), nlcg_(nlcg), lineSearch_(lineSearch), 212 els_(LINESEARCH_USERDEFINED), econd_(CURVATURECONDITION_WOLFE), 213 verbosity_(0), computeObj_(true), fval_(0), parlist_(parlist) { 214 // Parse parameter list 215 ROL::ParameterList& Llist = parlist.sublist("Step").sublist("Line Search"); 216 ROL::ParameterList& Glist = parlist.sublist("General"); 217 econd_ = StringToECurvatureCondition(Llist.sublist("Curvature Condition").get("Type","Strong Wolfe Conditions") ); 218 acceptLastAlpha_ = Llist.get("Accept Last Alpha", false); 219 verbosity_ = Glist.get("Print Verbosity",0); 220 computeObj_ = Glist.get("Recompute Objective Function",false); 221 // Initialize Line Search 222 if (lineSearch_ == ROL::nullPtr) { 223 lineSearchName_ = Llist.sublist("Line-Search Method").get("Type","Cubic Interpolation"); 224 els_ = StringToELineSearch(lineSearchName_); 225 lineSearch_ = LineSearchFactory<Real>(parlist); 226 } 227 else { // User-defined linesearch provided 228 lineSearchName_ = Llist.sublist("Line-Search Method").get("User Defined Line-Search Name", 229 "Unspecified User Defined Line-Search"); 230 } 231 232 } 233 initialize(Vector<Real> & x,const Vector<Real> & s,const Vector<Real> & g,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)234 void initialize( Vector<Real> &x, const Vector<Real> &s, const Vector<Real> &g, 235 Objective<Real> &obj, BoundConstraint<Real> &bnd, 236 AlgorithmState<Real> &algo_state ) { 237 d_ = x.clone(); 238 239 // Initialize unglobalized step 240 ROL::ParameterList& list 241 = parlist_.sublist("Step").sublist("Line Search").sublist("Descent Method"); 242 EDescent edesc = StringToEDescent(list.get("Type","Quasi-Newton Method")); 243 if (bnd.isActivated()) { 244 switch(edesc) { 245 case DESCENT_STEEPEST: { 246 desc_ = ROL::makePtr<GradientStep<Real>>(parlist_,computeObj_); 247 break; 248 } 249 case DESCENT_NONLINEARCG: { 250 desc_ = ROL::makePtr<NonlinearCGStep<Real>>(parlist_,nlcg_,computeObj_); 251 break; 252 } 253 case DESCENT_SECANT: { 254 desc_ = ROL::makePtr<ProjectedSecantStep<Real>>(parlist_,secant_,computeObj_); 255 break; 256 } 257 case DESCENT_NEWTON: { 258 desc_ = ROL::makePtr<ProjectedNewtonStep<Real>>(parlist_,computeObj_); 259 break; 260 } 261 case DESCENT_NEWTONKRYLOV: { 262 desc_ = ROL::makePtr<ProjectedNewtonKrylovStep<Real>>(parlist_,krylov_,secant_,computeObj_); 263 break; 264 } 265 default: 266 ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument, 267 ">>> (LineSearchStep::Initialize): Undefined descent type!"); 268 } 269 } 270 else { 271 switch(edesc) { 272 case DESCENT_STEEPEST: { 273 desc_ = ROL::makePtr<GradientStep<Real>>(parlist_,computeObj_); 274 break; 275 } 276 case DESCENT_NONLINEARCG: { 277 desc_ = ROL::makePtr<NonlinearCGStep<Real>>(parlist_,nlcg_,computeObj_); 278 break; 279 } 280 case DESCENT_SECANT: { 281 desc_ = ROL::makePtr<SecantStep<Real>>(parlist_,secant_,computeObj_); 282 break; 283 } 284 case DESCENT_NEWTON: { 285 desc_ = ROL::makePtr<NewtonStep<Real>>(parlist_,computeObj_); 286 break; 287 } 288 case DESCENT_NEWTONKRYLOV: { 289 desc_ = ROL::makePtr<NewtonKrylovStep<Real>>(parlist_,krylov_,secant_,computeObj_); 290 break; 291 } 292 default: 293 ROL_TEST_FOR_EXCEPTION(true,std::invalid_argument, 294 ">>> (LineSearchStep::Initialize): Undefined descent type!"); 295 } 296 } 297 desc_->initialize(x,s,g,obj,bnd,algo_state); 298 299 // Initialize line search 300 lineSearch_->initialize(x,s,g,obj,bnd); 301 //const ROL::Ptr<const StepState<Real> > desc_state = desc_->getStepState(); 302 //lineSearch_->initialize(x,s,*(desc_state->gradientVec),obj,bnd); 303 } 304 305 /** \brief Compute step. 306 307 Computes a trial step, \f$s_k\f$ as defined by the enum EDescent. Once the 308 trial step is determined, this function determines an approximate minimizer 309 of the 1D function \f$\phi_k(t) = f(x_k+ts_k)\f$. This approximate 310 minimizer must satisfy sufficient decrease and curvature conditions. 311 312 @param[out] s is the computed trial step 313 @param[in] x is the current iterate 314 @param[in] obj is the objective function 315 @param[in] bnd are the bound constraints 316 @param[in] algo_state contains the current state of the algorithm 317 */ compute(Vector<Real> & s,const Vector<Real> & x,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)318 void compute( Vector<Real> &s, const Vector<Real> &x, 319 Objective<Real> &obj, BoundConstraint<Real> &bnd, 320 AlgorithmState<Real> &algo_state ) { 321 Real zero(0), one(1); 322 // Compute unglobalized step 323 desc_->compute(s,x,obj,bnd,algo_state); 324 325 // Ensure that s is a descent direction 326 // ---> If not, then default to steepest descent 327 const ROL::Ptr<const StepState<Real> > desc_state = desc_->getStepState(); 328 Real gs = GradDotStep(*(desc_state->gradientVec),s,x,bnd,algo_state.gnorm); 329 if (gs >= zero) { 330 s.set((desc_state->gradientVec)->dual()); 331 s.scale(-one); 332 gs = GradDotStep(*(desc_state->gradientVec),s,x,bnd,algo_state.gnorm); 333 } 334 335 // Perform line search 336 ROL::Ptr<StepState<Real> > step_state = Step<Real>::getState(); 337 fval_ = algo_state.value; 338 step_state->nfval = 0; step_state->ngrad = 0; 339 lineSearch_->setData(algo_state.gnorm,*(desc_state->gradientVec)); 340 lineSearch_->run(step_state->searchSize,fval_,step_state->nfval,step_state->ngrad,gs,s,x,obj,bnd); 341 342 // Make correction if maximum function evaluations reached 343 if(!acceptLastAlpha_) { 344 lineSearch_->setMaxitUpdate(step_state->searchSize,fval_,algo_state.value); 345 } 346 347 // Compute scaled descent direction 348 s.scale(step_state->searchSize); 349 if ( bnd.isActivated() ) { 350 s.plus(x); 351 bnd.project(s); 352 s.axpy(static_cast<Real>(-1),x); 353 } 354 } 355 356 /** \brief Update step, if successful. 357 358 Given a trial step, \f$s_k\f$, this function updates \f$x_{k+1}=x_k+s_k\f$. 359 This function also updates the secant approximation. 360 361 @param[in,out] x is the updated iterate 362 @param[in] s is the computed trial step 363 @param[in] obj is the objective function 364 @param[in] con are the bound constraints 365 @param[in] algo_state contains the current state of the algorithm 366 */ update(Vector<Real> & x,const Vector<Real> & s,Objective<Real> & obj,BoundConstraint<Real> & bnd,AlgorithmState<Real> & algo_state)367 void update( Vector<Real> &x, const Vector<Real> &s, 368 Objective<Real> &obj, BoundConstraint<Real> &bnd, 369 AlgorithmState<Real> &algo_state ) { 370 ROL::Ptr<StepState<Real> > step_state = Step<Real>::getState(); 371 algo_state.nfval += step_state->nfval; 372 algo_state.ngrad += step_state->ngrad; 373 desc_->update(x,s,obj,bnd,algo_state); 374 step_state->flag = desc_->getStepState()->flag; 375 step_state->SPiter = desc_->getStepState()->SPiter; 376 step_state->SPflag = desc_->getStepState()->SPflag; 377 if ( !computeObj_ ) { 378 algo_state.value = fval_; 379 } 380 } 381 382 /** \brief Print iterate header. 383 384 This function produces a string containing header information. 385 */ printHeader(void) const386 std::string printHeader( void ) const { 387 std::string head = desc_->printHeader(); 388 head.erase(std::remove(head.end()-3,head.end(),'\n'), head.end()); 389 std::stringstream hist; 390 hist.write(head.c_str(),head.length()); 391 hist << std::setw(10) << std::left << "ls_#fval"; 392 hist << std::setw(10) << std::left << "ls_#grad"; 393 hist << "\n"; 394 return hist.str(); 395 } 396 397 /** \brief Print step name. 398 399 This function produces a string containing the algorithmic step information. 400 */ printName(void) const401 std::string printName( void ) const { 402 std::string name = desc_->printName(); 403 std::stringstream hist; 404 hist << name; 405 hist << "Line Search: " << lineSearchName_; 406 hist << " satisfying " << ECurvatureConditionToString(econd_) << "\n"; 407 return hist.str(); 408 } 409 410 /** \brief Print iterate status. 411 412 This function prints the iteration status. 413 414 @param[in] algo_state is the current state of the algorithm 415 @param[in] printHeader if set to true will print the header at each iteration 416 */ print(AlgorithmState<Real> & algo_state,bool print_header=false) const417 std::string print( AlgorithmState<Real> & algo_state, bool print_header = false ) const { 418 const ROL::Ptr<const StepState<Real> > step_state = Step<Real>::getStepState(); 419 std::string desc = desc_->print(algo_state,false); 420 desc.erase(std::remove(desc.end()-3,desc.end(),'\n'), desc.end()); 421 std::string name = desc_->printName(); 422 size_t pos = desc.find(name); 423 if ( pos != std::string::npos ) { 424 desc.erase(pos, name.length()); 425 } 426 427 std::stringstream hist; 428 if ( algo_state.iter == 0 ) { 429 hist << printName(); 430 } 431 if ( print_header ) { 432 hist << printHeader(); 433 } 434 hist << desc; 435 if ( algo_state.iter == 0 ) { 436 hist << "\n"; 437 } 438 else { 439 hist << std::setw(10) << std::left << step_state->nfval; 440 hist << std::setw(10) << std::left << step_state->ngrad; 441 hist << "\n"; 442 } 443 return hist.str(); 444 } 445 }; // class LineSearchStep 446 447 } // namespace ROL 448 449 #endif 450