1 // @HEADER 2 // ************************************************************************ 3 // 4 // Rapid Optimization Library (ROL) Package 5 // Copyright (2014) Sandia Corporation 6 // 7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 8 // license for use of this work by or on behalf of the U.S. Government. 9 // 10 // Redistribution and use in source and binary forms, with or without 11 // modification, are permitted provided that the following conditions are 12 // met: 13 // 14 // 1. Redistributions of source code must retain the above copyright 15 // notice, this list of conditions and the following disclaimer. 16 // 17 // 2. Redistributions in binary form must reproduce the above copyright 18 // notice, this list of conditions and the following disclaimer in the 19 // documentation and/or other materials provided with the distribution. 20 // 21 // 3. Neither the name of the Corporation nor the names of the 22 // contributors may be used to endorse or promote products derived from 23 // this software without specific prior written permission. 24 // 25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 // 37 // Questions? Contact lead developers: 38 // Drew Kouri (dpkouri@sandia.gov) and 39 // Denis Ridzal (dridzal@sandia.gov) 40 // 41 // ************************************************************************ 42 // @HEADER 43 44 #ifndef ROL_BUNDLE_H 45 #define ROL_BUNDLE_H 46 47 #include "ROL_Types.hpp" 48 #include "ROL_Vector.hpp" 49 50 #include "ROL_Ptr.hpp" 51 52 #include <vector> 53 #include <set> 54 55 /** \class ROL::Bundle 56 \brief Provides the interface for and implements a bundle. 57 */ 58 59 namespace ROL { 60 61 template<class Real> 62 class Bundle { 63 /***********************************************************************************************/ 64 /***************** BUNDLE STORAGE **************************************************************/ 65 /***********************************************************************************************/ 66 private: 67 std::vector<ROL::Ptr<Vector<Real> > > subgradients_; 68 std::vector<Real> linearizationErrors_; 69 std::vector<Real> distanceMeasures_; 70 71 std::vector<Real> dualVariables_; 72 73 ROL::Ptr<Vector<Real> > tG_; 74 ROL::Ptr<Vector<Real> > eG_; 75 ROL::Ptr<Vector<Real> > yG_; 76 ROL::Ptr<Vector<Real> > gx_; 77 ROL::Ptr<Vector<Real> > ge_; 78 79 unsigned size_; 80 81 unsigned maxSize_; 82 unsigned remSize_; 83 Real coeff_; 84 Real omega_; 85 86 bool isInitialized_; 87 remove(const std::vector<unsigned> & ind)88 void remove(const std::vector<unsigned> &ind) { 89 Real zero(0); 90 for (unsigned j = ind.back()+1; j < size_; ++j) { 91 (subgradients_[j-1])->set(*(subgradients_[j])); 92 linearizationErrors_[j-1] = linearizationErrors_[j]; 93 distanceMeasures_[j-1] = distanceMeasures_[j]; 94 dualVariables_[j-1] = dualVariables_[j]; 95 } 96 (subgradients_[size_-1])->zero(); 97 linearizationErrors_[size_-1] = ROL_OVERFLOW<Real>(); 98 distanceMeasures_[size_-1] = ROL_OVERFLOW<Real>(); 99 dualVariables_[size_-1] = zero; 100 for (unsigned i = ind.size()-1; i > 0; --i) { 101 for (unsigned j = ind[i-1]+1; j < size_; ++j) { 102 (subgradients_[j-1])->set(*(subgradients_[j])); 103 linearizationErrors_[j-1] = linearizationErrors_[j]; 104 distanceMeasures_[j-1] = distanceMeasures_[j]; 105 dualVariables_[j-1] = dualVariables_[j]; 106 } 107 } 108 size_ -= ind.size(); 109 } 110 add(const Vector<Real> & g,const Real le,const Real dm)111 void add(const Vector<Real> &g, const Real le, const Real dm) { 112 Real zero(0); 113 (subgradients_[size_])->set(g); 114 linearizationErrors_[size_] = le; 115 distanceMeasures_[size_] = dm; 116 dualVariables_[size_] = zero; 117 size_++; 118 } 119 120 /***********************************************************************************************/ 121 /***************** BUNDLE MODIFICATION AND ACCESS ROUTINES *************************************/ 122 /***********************************************************************************************/ 123 public: ~Bundle(void)124 virtual ~Bundle(void) {} 125 Bundle(const unsigned maxSize=10,const Real coeff=0.0,const Real omega=2.0,const unsigned remSize=2)126 Bundle(const unsigned maxSize = 10, 127 const Real coeff = 0.0, 128 const Real omega = 2.0, 129 const unsigned remSize = 2) 130 : size_(0), maxSize_(maxSize), remSize_(remSize), 131 coeff_(coeff), omega_(omega), isInitialized_(false) { 132 Real zero(0); 133 remSize_ = ((remSize_ < 2) ? 2 : ((remSize_ > maxSize_-1) ? maxSize_-1 : remSize_)); 134 coeff_ = std::max(static_cast<Real>(0),coeff_); 135 omega_ = std::max(static_cast<Real>(1),omega_); 136 subgradients_.clear(); 137 subgradients_.assign(maxSize,ROL::nullPtr); 138 linearizationErrors_.clear(); 139 linearizationErrors_.assign(maxSize_,ROL_OVERFLOW<Real>()); 140 distanceMeasures_.clear(); 141 distanceMeasures_.assign(maxSize_,ROL_OVERFLOW<Real>()); 142 dualVariables_.clear(); 143 dualVariables_.assign(maxSize_,zero); 144 } 145 initialize(const Vector<Real> & g)146 virtual void initialize(const Vector<Real> &g) { 147 if ( !isInitialized_ ) { 148 Real zero(0), one(1); 149 for (unsigned i = 0; i < maxSize_; ++i) { 150 subgradients_[i] = g.clone(); 151 } 152 (subgradients_[0])->set(g); 153 linearizationErrors_[0] = zero; 154 distanceMeasures_[0] = zero; 155 dualVariables_[0] = one; 156 size_++; 157 isInitialized_ = true; 158 tG_ = g.clone(); 159 yG_ = g.clone(); 160 eG_ = g.clone(); 161 gx_ = g.clone(); 162 ge_ = g.clone(); 163 } 164 } 165 166 virtual unsigned solveDual(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) = 0; 167 linearizationError(const unsigned i) const168 const Real linearizationError(const unsigned i) const { 169 return linearizationErrors_[i]; 170 } 171 distanceMeasure(const unsigned i) const172 const Real distanceMeasure(const unsigned i) const { 173 return distanceMeasures_[i]; 174 } 175 subgradient(const unsigned i) const176 const Vector<Real> & subgradient(const unsigned i) const { 177 return *(subgradients_[i]); 178 } 179 getDualVariable(const unsigned i) const180 const Real getDualVariable(const unsigned i) const { 181 return dualVariables_[i]; 182 } 183 setDualVariable(const unsigned i,const Real val)184 void setDualVariable(const unsigned i, const Real val) { 185 dualVariables_[i] = val; 186 } 187 resetDualVariables(void)188 void resetDualVariables(void) { 189 const Real zero(0); 190 dualVariables_.assign(size_,zero); 191 } 192 computeAlpha(const Real dm,const Real le) const193 const Real computeAlpha(const Real dm, const Real le) const { 194 Real alpha = le; 195 if ( coeff_ > ROL_EPSILON<Real>() ) { 196 alpha = std::max(coeff_*std::pow(dm,omega_),le); 197 } 198 return alpha; 199 } 200 alpha(const unsigned i) const201 const Real alpha(const unsigned i) const { 202 return computeAlpha(distanceMeasures_[i],linearizationErrors_[i]); 203 } 204 size(void) const205 unsigned size(void) const { 206 return size_; 207 } 208 aggregate(Vector<Real> & aggSubGrad,Real & aggLinErr,Real & aggDistMeas) const209 void aggregate(Vector<Real> &aggSubGrad, Real &aggLinErr, Real &aggDistMeas) const { 210 Real zero(0), one(1); 211 aggSubGrad.zero(); aggLinErr = zero; aggDistMeas = zero; eG_->zero(); 212 Real eLE(0), eDM(0), yLE(0), yDM(0), tLE(0), tDM(0); 213 for (unsigned i = 0; i < size_; ++i) { 214 // Compute aggregate subgradient using Kahan's compensated sum 215 //aggSubGrad.axpy(dualVariables_[i],*subgradients_[i]); 216 yG_->set(*subgradients_[i]); yG_->scale(dualVariables_[i]); yG_->axpy(-one,*eG_); 217 tG_->set(aggSubGrad); tG_->plus(*yG_); 218 eG_->set(*tG_); eG_->axpy(-one,aggSubGrad); eG_->axpy(-one,*yG_); 219 aggSubGrad.set(*tG_); 220 // Compute aggregate linearization error using Kahan's compensated sum 221 //aggLinErr += dualVariables_[i]*linearizationErrors_[i]; 222 yLE = dualVariables_[i]*linearizationErrors_[i] - eLE; 223 tLE = aggLinErr + yLE; 224 eLE = (tLE - aggLinErr) - yLE; 225 aggLinErr = tLE; 226 // Compute aggregate distance measure using Kahan's compensated sum 227 //aggDistMeas += dualVariables_[i]*distanceMeasures_[i]; 228 yDM = dualVariables_[i]*distanceMeasures_[i] - eDM; 229 tDM = aggDistMeas + yDM; 230 eDM = (tDM - aggDistMeas) - yDM; 231 aggDistMeas = tDM; 232 } 233 } 234 reset(const Vector<Real> & g,const Real le,const Real dm)235 void reset(const Vector<Real> &g, const Real le, const Real dm) { 236 if (size_ == maxSize_) { 237 // Find indices to remove 238 unsigned loc = size_, cnt = 0; 239 std::vector<unsigned> ind(remSize_,0); 240 for (unsigned i = size_; i > 0; --i) { 241 if ( std::abs(linearizationErrors_[i-1]) < ROL_EPSILON<Real>() ) { 242 loc = i-1; 243 break; 244 } 245 } 246 for (unsigned i = 0; i < size_; ++i) { 247 if ( i != loc ) { 248 ind[cnt] = i; 249 cnt++; 250 } 251 if (cnt == remSize_) { 252 break; 253 } 254 } 255 // Remove indices 256 remove(ind); 257 // Add aggregate subgradient 258 add(g,le,dm); 259 } 260 } 261 update(const bool flag,const Real linErr,const Real distMeas,const Vector<Real> & g,const Vector<Real> & s)262 void update(const bool flag, const Real linErr, const Real distMeas, 263 const Vector<Real> &g, const Vector<Real> &s) { 264 Real zero(0); 265 if ( flag ) { 266 // Serious step taken: Update linearlization errors and distance measures 267 for (unsigned i = 0; i < size_; ++i) { 268 linearizationErrors_[i] += linErr - subgradients_[i]->dot(s.dual()); 269 distanceMeasures_[i] += distMeas; 270 } 271 linearizationErrors_[size_] = zero; 272 distanceMeasures_[size_] = zero; 273 } 274 else { 275 // Null step taken: 276 linearizationErrors_[size_] = linErr; 277 distanceMeasures_[size_] = distMeas; 278 } 279 // Update (sub)gradient bundle 280 (subgradients_[size_])->set(g); 281 // Update dual variables 282 dualVariables_[size_] = zero; 283 // Update bundle size 284 size_++; 285 } 286 287 protected: GiGj(const unsigned i,const unsigned j) const288 const Real GiGj(const unsigned i, const unsigned j) const { 289 return subgradient(i).dot(subgradient(j)); 290 } 291 dotGi(const unsigned i,const Vector<Real> & x) const292 const Real dotGi(const unsigned i, const Vector<Real> &x) const { 293 return x.dot(subgradient(i)); 294 } 295 addGi(const unsigned i,const Real a,Vector<Real> & x) const296 void addGi(const unsigned i, const Real a, Vector<Real> &x) const { 297 x.axpy(a,subgradient(i)); 298 } 299 evaluateObjective(std::vector<Real> & g,const std::vector<Real> & x,const Real t) const300 Real evaluateObjective(std::vector<Real> &g, const std::vector<Real> &x, const Real t) const { 301 Real one(1), half(0.5); 302 gx_->zero(); eG_->zero(); 303 for (unsigned i = 0; i < Bundle<Real>::size(); ++i) { 304 // Compute Gx using Kahan's compensated sum 305 //gx_->axpy(x[i],*Bundle<Real>::subgradients_[i]); 306 yG_->set(subgradient(i)); yG_->scale(x[i]); yG_->axpy(-one,*eG_); 307 tG_->set(*gx_); tG_->plus(*yG_); 308 eG_->set(*tG_); eG_->axpy(-one,*gx_); eG_->axpy(-one,*yG_); 309 gx_->set(*tG_); 310 } 311 Real Hx(0), val(0), err(0), tmp(0), y(0); 312 for (unsigned i = 0; i < size(); ++i) { 313 // Compute < g_i, Gx > 314 Hx = dotGi(i,*gx_); 315 // Add to the objective function value using Kahan's compensated sum 316 //val += x[i]*(half*Hx + Bundle<Real>::alpha(i)/t); 317 y = x[i]*(half*Hx + alpha(i)/t) - err; 318 tmp = val + y; 319 err = (tmp - val) - y; 320 val = tmp; 321 // Add gradient component 322 g[i] = Hx + alpha(i)/t; 323 } 324 return val; 325 } 326 solveDual_dim1(const Real t,const unsigned maxit=1000,const Real tol=1.e-8)327 unsigned solveDual_dim1(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) { 328 setDualVariable(0,static_cast<Real>(1)); 329 //std::cout << "dim = " << Bundle<Real>::size() << " iter = " << 0 << " CONVERGED!\n"; 330 return 0; 331 } 332 solveDual_dim2(const Real t,const unsigned maxit=1000,const Real tol=1.e-8)333 unsigned solveDual_dim2(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) { 334 Real diffg = gx_->dot(*gx_), zero(0), one(1), half(0.5); 335 gx_->set(subgradient(0)); addGi(1,-one,*gx_); 336 if ( std::abs(diffg) > ROL_EPSILON<Real>() ) { 337 Real diffa = (alpha(0)-alpha(1))/t; 338 Real gdiffg = dotGi(1,*gx_); 339 setDualVariable(0,std::min(one,std::max(zero,-(gdiffg+diffa)/diffg))); 340 setDualVariable(1,one-getDualVariable(0)); 341 } 342 else { 343 if ( std::abs(alpha(0)-alpha(1)) > ROL_EPSILON<Real>() ) { 344 if ( alpha(0) < alpha(1) ) { 345 setDualVariable(0,one); setDualVariable(1,zero); 346 } 347 else if ( alpha(0) > alpha(1) ) { 348 setDualVariable(0,zero); setDualVariable(1,one); 349 } 350 } 351 else { 352 setDualVariable(0,half); setDualVariable(1,half); 353 } 354 } 355 //std::cout << "dim = " << Bundle<Real>::size() << " iter = " << 0 << " CONVERGED!\n"; 356 return 0; 357 } 358 359 }; // class Bundle 360 361 } // namespace ROL 362 363 #endif 364 365 // void aggregate(Vector<Real> &aggSubGrad, Real &aggLinErr, Real &aggDistMeas) const { 366 // aggSubGrad.zero(); aggLinErr = 0.0; aggDistMeas = 0.0; 367 // for (unsigned i = 0; i < size_; ++i) { 368 // //if ( dualVariables_[i] > ROL_EPSILON<Real>() ) { 369 // aggSubGrad.axpy(dualVariables_[i],*(subgradients_[i])); 370 // aggLinErr += dualVariables_[i]*linearizationErrors_[i]; 371 // aggDistMeas += dualVariables_[i]*distanceMeasures_[i]; 372 // //} 373 // } 374 // } 375