1 // @HEADER
2 // ************************************************************************
3 //
4 //               Rapid Optimization Library (ROL) Package
5 //                 Copyright (2014) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact lead developers:
38 //              Drew Kouri   (dpkouri@sandia.gov) and
39 //              Denis Ridzal (dridzal@sandia.gov)
40 //
41 // ************************************************************************
42 // @HEADER
43 
44 #ifndef ROL_BUNDLE_H
45 #define ROL_BUNDLE_H
46 
47 #include "ROL_Types.hpp"
48 #include "ROL_Vector.hpp"
49 
50 #include "ROL_Ptr.hpp"
51 
52 #include <vector>
53 #include <set>
54 
55 /** \class ROL::Bundle
56     \brief Provides the interface for and implements a bundle.
57 */
58 
59 namespace ROL {
60 
61 template<class Real>
62 class Bundle {
63 /***********************************************************************************************/
64 /***************** BUNDLE STORAGE **************************************************************/
65 /***********************************************************************************************/
66 private:
67   std::vector<ROL::Ptr<Vector<Real> > > subgradients_;
68   std::vector<Real> linearizationErrors_;
69   std::vector<Real> distanceMeasures_;
70 
71   std::vector<Real> dualVariables_;
72 
73   ROL::Ptr<Vector<Real> > tG_;
74   ROL::Ptr<Vector<Real> > eG_;
75   ROL::Ptr<Vector<Real> > yG_;
76   ROL::Ptr<Vector<Real> > gx_;
77   ROL::Ptr<Vector<Real> > ge_;
78 
79   unsigned size_;
80 
81   unsigned maxSize_;
82   unsigned remSize_;
83   Real coeff_;
84   Real omega_;
85 
86   bool isInitialized_;
87 
remove(const std::vector<unsigned> & ind)88   void remove(const std::vector<unsigned> &ind) {
89     Real zero(0);
90     for (unsigned j = ind.back()+1; j < size_; ++j) {
91       (subgradients_[j-1])->set(*(subgradients_[j]));
92       linearizationErrors_[j-1] = linearizationErrors_[j];
93       distanceMeasures_[j-1]    = distanceMeasures_[j];
94       dualVariables_[j-1]       = dualVariables_[j];
95     }
96     (subgradients_[size_-1])->zero();
97     linearizationErrors_[size_-1] = ROL_OVERFLOW<Real>();
98     distanceMeasures_[size_-1]    = ROL_OVERFLOW<Real>();
99     dualVariables_[size_-1]       = zero;
100     for (unsigned i = ind.size()-1; i > 0; --i) {
101       for (unsigned j = ind[i-1]+1; j < size_; ++j) {
102         (subgradients_[j-1])->set(*(subgradients_[j]));
103         linearizationErrors_[j-1] = linearizationErrors_[j];
104         distanceMeasures_[j-1]    = distanceMeasures_[j];
105         dualVariables_[j-1]       = dualVariables_[j];
106       }
107     }
108     size_ -= ind.size();
109   }
110 
add(const Vector<Real> & g,const Real le,const Real dm)111   void add(const Vector<Real> &g, const Real le, const Real dm) {
112     Real zero(0);
113     (subgradients_[size_])->set(g);
114     linearizationErrors_[size_] = le;
115     distanceMeasures_[size_]    = dm;
116     dualVariables_[size_]       = zero;
117     size_++;
118   }
119 
120 /***********************************************************************************************/
121 /***************** BUNDLE MODIFICATION AND ACCESS ROUTINES *************************************/
122 /***********************************************************************************************/
123 public:
~Bundle(void)124   virtual ~Bundle(void) {}
125 
Bundle(const unsigned maxSize=10,const Real coeff=0.0,const Real omega=2.0,const unsigned remSize=2)126   Bundle(const unsigned maxSize = 10,
127          const Real coeff = 0.0,
128          const Real omega = 2.0,
129          const unsigned remSize = 2)
130     : size_(0), maxSize_(maxSize), remSize_(remSize),
131       coeff_(coeff), omega_(omega), isInitialized_(false) {
132     Real zero(0);
133     remSize_ = ((remSize_ < 2) ? 2 : ((remSize_ > maxSize_-1) ? maxSize_-1 : remSize_));
134     coeff_ = std::max(static_cast<Real>(0),coeff_);
135     omega_ = std::max(static_cast<Real>(1),omega_);
136     subgradients_.clear();
137     subgradients_.assign(maxSize,ROL::nullPtr);
138     linearizationErrors_.clear();
139     linearizationErrors_.assign(maxSize_,ROL_OVERFLOW<Real>());
140     distanceMeasures_.clear();
141     distanceMeasures_.assign(maxSize_,ROL_OVERFLOW<Real>());
142     dualVariables_.clear();
143     dualVariables_.assign(maxSize_,zero);
144   }
145 
initialize(const Vector<Real> & g)146   virtual void initialize(const Vector<Real> &g) {
147     if ( !isInitialized_ ) {
148       Real zero(0), one(1);
149       for (unsigned i = 0; i < maxSize_; ++i) {
150         subgradients_[i] = g.clone();
151       }
152       (subgradients_[0])->set(g);
153       linearizationErrors_[0] = zero;
154       distanceMeasures_[0]    = zero;
155       dualVariables_[0]       = one;
156       size_++;
157       isInitialized_ = true;
158       tG_ = g.clone();
159       yG_ = g.clone();
160       eG_ = g.clone();
161       gx_ = g.clone();
162       ge_ = g.clone();
163     }
164   }
165 
166   virtual unsigned solveDual(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) = 0;
167 
linearizationError(const unsigned i) const168   const Real linearizationError(const unsigned i) const {
169     return linearizationErrors_[i];
170   }
171 
distanceMeasure(const unsigned i) const172   const Real distanceMeasure(const unsigned i) const {
173     return distanceMeasures_[i];
174   }
175 
subgradient(const unsigned i) const176   const Vector<Real> & subgradient(const unsigned i) const {
177     return *(subgradients_[i]);
178   }
179 
getDualVariable(const unsigned i) const180   const Real getDualVariable(const unsigned i) const {
181     return dualVariables_[i];
182   }
183 
setDualVariable(const unsigned i,const Real val)184   void setDualVariable(const unsigned i, const Real val) {
185     dualVariables_[i] = val;
186   }
187 
resetDualVariables(void)188   void resetDualVariables(void) {
189     const Real zero(0);
190     dualVariables_.assign(size_,zero);
191   }
192 
computeAlpha(const Real dm,const Real le) const193   const Real computeAlpha(const Real dm, const Real le) const {
194     Real alpha = le;
195     if ( coeff_ > ROL_EPSILON<Real>() ) {
196       alpha = std::max(coeff_*std::pow(dm,omega_),le);
197     }
198     return alpha;
199   }
200 
alpha(const unsigned i) const201   const Real alpha(const unsigned i) const {
202     return computeAlpha(distanceMeasures_[i],linearizationErrors_[i]);
203   }
204 
size(void) const205   unsigned size(void) const {
206     return size_;
207   }
208 
aggregate(Vector<Real> & aggSubGrad,Real & aggLinErr,Real & aggDistMeas) const209   void aggregate(Vector<Real> &aggSubGrad, Real &aggLinErr, Real &aggDistMeas) const {
210     Real zero(0), one(1);
211     aggSubGrad.zero(); aggLinErr = zero; aggDistMeas = zero; eG_->zero();
212     Real eLE(0), eDM(0), yLE(0), yDM(0), tLE(0), tDM(0);
213     for (unsigned i = 0; i < size_; ++i) {
214       // Compute aggregate subgradient using Kahan's compensated sum
215       //aggSubGrad.axpy(dualVariables_[i],*subgradients_[i]);
216       yG_->set(*subgradients_[i]); yG_->scale(dualVariables_[i]); yG_->axpy(-one,*eG_);
217       tG_->set(aggSubGrad); tG_->plus(*yG_);
218       eG_->set(*tG_); eG_->axpy(-one,aggSubGrad); eG_->axpy(-one,*yG_);
219       aggSubGrad.set(*tG_);
220       // Compute aggregate linearization error using Kahan's compensated sum
221       //aggLinErr += dualVariables_[i]*linearizationErrors_[i];
222       yLE = dualVariables_[i]*linearizationErrors_[i] - eLE;
223       tLE = aggLinErr + yLE;
224       eLE = (tLE - aggLinErr) - yLE;
225       aggLinErr = tLE;
226       // Compute aggregate distance measure using Kahan's compensated sum
227       //aggDistMeas += dualVariables_[i]*distanceMeasures_[i];
228       yDM = dualVariables_[i]*distanceMeasures_[i] - eDM;
229       tDM = aggDistMeas + yDM;
230       eDM = (tDM - aggDistMeas) - yDM;
231       aggDistMeas = tDM;
232     }
233   }
234 
reset(const Vector<Real> & g,const Real le,const Real dm)235   void reset(const Vector<Real> &g, const Real le, const Real dm) {
236     if (size_ == maxSize_) {
237       // Find indices to remove
238       unsigned loc = size_, cnt = 0;
239       std::vector<unsigned> ind(remSize_,0);
240       for (unsigned i = size_; i > 0; --i) {
241         if ( std::abs(linearizationErrors_[i-1]) < ROL_EPSILON<Real>() ) {
242           loc = i-1;
243           break;
244         }
245       }
246       for (unsigned i = 0; i < size_; ++i) {
247         if ( i != loc ) {
248           ind[cnt] = i;
249           cnt++;
250         }
251         if (cnt == remSize_) {
252           break;
253         }
254       }
255       // Remove indices
256       remove(ind);
257       // Add aggregate subgradient
258       add(g,le,dm);
259     }
260   }
261 
update(const bool flag,const Real linErr,const Real distMeas,const Vector<Real> & g,const Vector<Real> & s)262   void update(const bool flag, const Real linErr, const Real distMeas,
263               const Vector<Real> &g, const Vector<Real> &s) {
264     Real zero(0);
265     if ( flag ) {
266       // Serious step taken: Update linearlization errors and distance measures
267       for (unsigned i = 0; i < size_; ++i) {
268         linearizationErrors_[i] += linErr - subgradients_[i]->dot(s.dual());
269         distanceMeasures_[i]    += distMeas;
270       }
271       linearizationErrors_[size_] = zero;
272       distanceMeasures_[size_]    = zero;
273     }
274     else {
275       // Null step taken:
276       linearizationErrors_[size_] = linErr;
277       distanceMeasures_[size_]    = distMeas;
278     }
279     // Update (sub)gradient bundle
280     (subgradients_[size_])->set(g);
281     // Update dual variables
282     dualVariables_[size_] = zero;
283     // Update bundle size
284     size_++;
285   }
286 
287 protected:
GiGj(const unsigned i,const unsigned j) const288   const Real GiGj(const unsigned i, const unsigned j) const {
289     return subgradient(i).dot(subgradient(j));
290   }
291 
dotGi(const unsigned i,const Vector<Real> & x) const292   const Real dotGi(const unsigned i, const Vector<Real> &x) const {
293     return x.dot(subgradient(i));
294   }
295 
addGi(const unsigned i,const Real a,Vector<Real> & x) const296   void addGi(const unsigned i, const Real a, Vector<Real> &x) const {
297     x.axpy(a,subgradient(i));
298   }
299 
evaluateObjective(std::vector<Real> & g,const std::vector<Real> & x,const Real t) const300   Real evaluateObjective(std::vector<Real> &g, const std::vector<Real> &x, const Real t) const {
301     Real one(1), half(0.5);
302     gx_->zero(); eG_->zero();
303     for (unsigned i = 0; i < Bundle<Real>::size(); ++i) {
304       // Compute Gx using Kahan's compensated sum
305       //gx_->axpy(x[i],*Bundle<Real>::subgradients_[i]);
306       yG_->set(subgradient(i)); yG_->scale(x[i]); yG_->axpy(-one,*eG_);
307       tG_->set(*gx_); tG_->plus(*yG_);
308       eG_->set(*tG_); eG_->axpy(-one,*gx_); eG_->axpy(-one,*yG_);
309       gx_->set(*tG_);
310     }
311     Real Hx(0), val(0), err(0), tmp(0), y(0);
312     for (unsigned i = 0; i < size(); ++i) {
313       // Compute < g_i, Gx >
314       Hx   = dotGi(i,*gx_);
315       // Add to the objective function value using Kahan's compensated sum
316       //val += x[i]*(half*Hx + Bundle<Real>::alpha(i)/t);
317       y    = x[i]*(half*Hx + alpha(i)/t) - err;
318       tmp  = val + y;
319       err  = (tmp - val) - y;
320       val  = tmp;
321       // Add gradient component
322       g[i] = Hx + alpha(i)/t;
323     }
324     return val;
325   }
326 
solveDual_dim1(const Real t,const unsigned maxit=1000,const Real tol=1.e-8)327   unsigned solveDual_dim1(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) {
328     setDualVariable(0,static_cast<Real>(1));
329     //std::cout << "dim = " << Bundle<Real>::size() << "  iter = " << 0 << "  CONVERGED!\n";
330     return 0;
331   }
332 
solveDual_dim2(const Real t,const unsigned maxit=1000,const Real tol=1.e-8)333   unsigned solveDual_dim2(const Real t, const unsigned maxit = 1000, const Real tol = 1.e-8) {
334     Real diffg  = gx_->dot(*gx_), zero(0), one(1), half(0.5);
335     gx_->set(subgradient(0)); addGi(1,-one,*gx_);
336     if ( std::abs(diffg) > ROL_EPSILON<Real>() ) {
337       Real diffa  = (alpha(0)-alpha(1))/t;
338       Real gdiffg = dotGi(1,*gx_);
339       setDualVariable(0,std::min(one,std::max(zero,-(gdiffg+diffa)/diffg)));
340       setDualVariable(1,one-getDualVariable(0));
341     }
342     else {
343       if ( std::abs(alpha(0)-alpha(1)) > ROL_EPSILON<Real>() ) {
344         if ( alpha(0) < alpha(1) ) {
345           setDualVariable(0,one); setDualVariable(1,zero);
346         }
347         else if ( alpha(0) > alpha(1) ) {
348           setDualVariable(0,zero); setDualVariable(1,one);
349         }
350       }
351       else {
352         setDualVariable(0,half); setDualVariable(1,half);
353       }
354     }
355     //std::cout << "dim = " << Bundle<Real>::size() << "  iter = " << 0 << "  CONVERGED!\n";
356     return 0;
357   }
358 
359 }; // class Bundle
360 
361 } // namespace ROL
362 
363 #endif
364 
365 //  void aggregate(Vector<Real> &aggSubGrad, Real &aggLinErr, Real &aggDistMeas) const {
366 //    aggSubGrad.zero(); aggLinErr = 0.0; aggDistMeas = 0.0;
367 //    for (unsigned i = 0; i < size_; ++i) {
368 //      //if ( dualVariables_[i] > ROL_EPSILON<Real>() ) {
369 //        aggSubGrad.axpy(dualVariables_[i],*(subgradients_[i]));
370 //        aggLinErr   += dualVariables_[i]*linearizationErrors_[i];
371 //        aggDistMeas += dualVariables_[i]*distanceMeasures_[i];
372 //      //}
373 //    }
374 //  }
375