1 # ifndef CPPAD_EXAMPLE_ABS_NORMAL_ABS_MIN_QUAD_HPP
2 # define CPPAD_EXAMPLE_ABS_NORMAL_ABS_MIN_QUAD_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-20 Bradley M. Bell
5 
6 CppAD is distributed under the terms of the
7              Eclipse Public License Version 2.0.
8 
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12       GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14 /*
15 $begin abs_min_quad$$
16 $spell
17     hpp
18     qp
19     jac
20     Jacobian
21     maxitr
22 $$
23 $section abs_normal: Minimize a Linear Abs-normal Approximation$$
24 
25 $head Syntax$$
26 $icode%ok% = abs_min_quad(
27     %level%, %n%, %m%, %s%,
28     %g_hat%, %g_jac%, %hessian%, %bound%, %epsilon%, %maxitr%, %delta_x%
29 )%$$
30 
31 $head Prototype$$
32 $srcthisfile%
33     0%// BEGIN PROTOTYPE%// END PROTOTYPE%
34 1%$$
35 
36 $head Source$$
37 This following is a link to the source code for this example:
38 $cref/abs_min_quad.hpp/abs_min_quad.hpp/$$.
39 
40 $head Purpose$$
41 We are given a point $latex \hat{x} \in \B{R}^n$$ and
42 use the notation $latex \tilde{f} (x)$$ for the abs-normal
43 $cref/approximation for f(x)
44     /abs_normal_fun
45     /Abs-normal Approximation
46     /Approximating f(x)
47 /$$
48 near $latex \hat{x}$$.
49 We are also given a vector $latex b \in \B{R}_+^n$$
50 and a positive definite matrix $latex H \in \B{R}^{n \times n}$$.
51 This routine solves the problem
52 $latex \[
53 \begin{array}{lll}
54 \R{minimize} &
55     \Delta x^T H \Delta x / 2 + \tilde{f}( \hat{x} + \Delta x ) &
56     \R{w.r.t} \; \Delta x \in \B{R}^n
57 \\
58 \R{subject \; to} & | \Delta x_j | \leq b_j & j = 0 , \ldots , n-1
59 \end{array}
60 \] $$
61 
62 $head DblVector$$
63 is a $cref SimpleVector$$ class with elements of type $code double$$.
64 
65 $head SizeVector$$
66 is a $cref SimpleVector$$ class with elements of type $code size_t$$.
67 
68 $head f$$
69 We use the notation $icode f$$ for the original function; see
70 $cref/f/abs_normal_fun/f/$$.
71 
72 $head level$$
73 This value is less that or equal 3.
74 If $icode%level% == 0%$$,
75 no tracing of the optimization is printed.
76 If $icode%level% >= 1%$$,
77 a trace of each iteration of $code abs_min_quad$$ is printed.
78 If $icode%level% >= 2%$$,
79 a trace of the $cref qp_box$$ sub-problem is printed.
80 If $icode%level% >= 3%$$,
81 a trace of the $cref qp_interior$$ sub-problem is printed.
82 
83 $head n$$
84 This is the dimension of the domain space for $icode f$$; see
85 $cref/n/abs_normal_fun/f/n/$$.
86 
87 $head m$$
88 This is the dimension of the range space for $icode f$$; see
89 $cref/m/abs_normal_fun/f/m/$$. This must be one so that $latex f$$
90 is an objective function.
91 
92 $head s$$
93 This is the number of absolute value terms in $icode f$$; see
94 $cref/s/abs_normal_fun/f/s/$$.
95 
96 $head g$$
97 We use the notation $icode g$$ for the abs-normal representation of $icode f$$;
98 see $cref/g/abs_normal_fun/g/$$.
99 
100 $head g_hat$$
101 This vector has size $icode%m% + %s%$$ and is the value of
102 $icode g(x, u)$$ at $latex x = \hat{x}$$ and $latex u = a( \hat{x} )$$.
103 
104 $head g_jac$$
105 This vector has size $codei%(%m% + %s%) * (%n% + %s%)%$$ and is the Jacobian of
106 $latex g(x, u)$$ at $latex x = \hat{x}$$ and $latex u = a( \hat{x} )$$.
107 
108 $head hessian$$
109 This vector has size $icode%n% * %n%$$.
110 It is a $cref/row-major/glossary/Row-major Representation/$$ representation
111 of the matrix $latex H \in \B{R}^{n \times n}$$.
112 
113 $head bound$$
114 This vector has size $icode n$$ and is the vector $latex b \in \B{R}^n$$.
115 The trust region is defined as the set of $latex \Delta x$$ such that
116 $latex \[
117     | \Delta x | \leq b_j
118 \]$$
119 for $latex j = 0 , \ldots , n-1$$.
120 
121 $head epsilon$$
122 The value $icode%epsilon%[0]%$$ is convergence criteria in terms
123 of the infinity norm of the difference of $icode delta_x$$
124 between iterations.
125 The value $icode%epsilon%[1]%$$ is convergence criteria in terms
126 of the derivative of the objective; i.e.
127 $latex \[
128     \Delta x^T H \Delta x / 2 + \tilde{f}( \hat{x} + \Delta x)
129 \] $$
130 
131 $head maxitr$$
132 This is a vector with size 2.
133 The value $icode%maxitr%[0]%$$ is the maximum number of
134 $code abs_min_quad$$ iterations to try before giving up on convergence.
135 The value $icode%maxitr%[1]%$$ is the maximum number of iterations in
136 the $cref/qp_interior/qp_interior/maxitr/$$ sub-problems.
137 
138 $head delta_x$$
139 This vector $latex \Delta x$$ has size $icode n$$.
140 The input value of its elements does not matter.
141 Upon return,
142 the approximate minimizer of the objective with respect to the trust region.
143 
144 $head Method$$
145 
146 $subhead sigma$$
147 We use the notation
148 $latex \[
149     \sigma (x) = \R{sign} ( z[ x , a(x) ] )
150 \] $$
151 where
152 $cref/a(x)/abs_normal_fun/a/a(x)/$$ and
153 $cref/z(x, u)/abs_normal_fun/g/z(x, u)/$$
154 are as defined in the abs-normal representation of $latex f(x)$$.
155 
156 $subhead Cutting Planes$$
157 At each iteration,
158 we are given affine functions $latex p_k (x)$$
159 such that $latex p_k ( x_k ) = \tilde{f}( x_k )$$  and
160 $latex p_k^{(1)} ( x_k )$$ is the derivative $latex \tilde{f}^{(1)} ( x_k )$$
161 corresponding to $latex \sigma ( x_k )$$.
162 
163 $subhead Iteration$$
164 At iteration $latex k$$, we solve the problem
165 $latex \[
166 \begin{array}{lll}
167 \R{minimize}
168 & \Delta x^T H \Delta x / 2 +
169     \max \{ p_k ( \hat{x} + \Delta x) \W{:} k = 0 , \ldots , K-1 \}
170 & \R{w.r.t} \; \Delta x
171 \\
172 \R{subject \; to} & - b \leq \Delta x \leq + b
173 \end{array}
174 \] $$
175 The solution is the new point $latex x_K$$
176 at which the new affine approximation
177 $latex p_K (x)$$ is constructed.
178 This process is iterated until the difference
179 $latex x_K - x_{K-1}$$ is small enough.
180 
181 
182 $children%example/abs_normal/abs_min_quad.cpp
183     %example/abs_normal/abs_min_quad.omh
184 %$$
185 $head Example$$
186 The file $cref abs_min_quad.cpp$$ contains an example and test of
187 $code abs_min_quad$$.
188 
189 $end
190 -----------------------------------------------------------------------------
191 */
192 # include <cppad/cppad.hpp>
193 # include "qp_box.hpp"
194 # include "abs_eval.hpp"
195 
196 // BEGIN C++
197 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
198 
199 // BEGIN PROTOTYPE
200 template <class DblVector, class SizeVector>
abs_min_quad(size_t level,size_t n,size_t m,size_t s,const DblVector & g_hat,const DblVector & g_jac,const DblVector & hessian,const DblVector & bound,const DblVector & epsilon,const SizeVector & maxitr,DblVector & delta_x)201 bool abs_min_quad(
202     size_t            level   ,
203     size_t            n       ,
204     size_t            m       ,
205     size_t            s       ,
206     const DblVector&  g_hat   ,
207     const DblVector&  g_jac   ,
208     const DblVector&  hessian ,
209     const DblVector&  bound   ,
210     const DblVector&  epsilon ,
211     const SizeVector& maxitr  ,
212     DblVector&        delta_x )
213 // END PROTOTYPE
214 {   using std::fabs;
215     bool ok    = true;
216     double inf = std::numeric_limits<double>::infinity();
217     //
218     CPPAD_ASSERT_KNOWN(
219         level <= 4,
220         "abs_min_quad: level is not less that or equal 3"
221     );
222     CPPAD_ASSERT_KNOWN(
223         size_t(epsilon.size()) == 2,
224         "abs_min_quad: size of epsilon not equal to 2"
225     );
226     CPPAD_ASSERT_KNOWN(
227         size_t(maxitr.size()) == 2,
228         "abs_min_quad: size of maxitr not equal to 2"
229     );
230     CPPAD_ASSERT_KNOWN(
231         m == 1,
232         "abs_min_quad: m is not equal to 1"
233     );
234     CPPAD_ASSERT_KNOWN(
235         size_t(delta_x.size()) == n,
236         "abs_min_quad: size of delta_x not equal to n"
237     );
238     CPPAD_ASSERT_KNOWN(
239         size_t(bound.size()) == n,
240         "abs_min_quad: size of bound not equal to n"
241     );
242     CPPAD_ASSERT_KNOWN(
243         size_t(g_hat.size()) == m + s,
244         "abs_min_quad: size of g_hat not equal to m + s"
245     );
246     CPPAD_ASSERT_KNOWN(
247         size_t(g_jac.size()) == (m + s) * (n + s),
248         "abs_min_quad: size of g_jac not equal to (m + s)*(n + s)"
249     );
250     CPPAD_ASSERT_KNOWN(
251         size_t(hessian.size()) == n * n,
252         "abs_min_quad: size of hessian not equal to n * n"
253     );
254     CPPAD_ASSERT_KNOWN(
255         size_t(bound.size()) == n,
256         "abs_min_quad: size of bound is not equal to n"
257     );
258     if( level > 0 )
259     {   std::cout << "start abs_min_quad\n";
260         CppAD::abs_print_mat("g_hat", m + s, 1, g_hat);
261         CppAD::abs_print_mat("g_jac", m + s, n + s, g_jac);
262         CppAD::abs_print_mat("hessian", n, n, hessian);
263         CppAD::abs_print_mat("bound", n, 1, bound);
264     }
265     // partial y(x, u) w.r.t x (J in reference)
266     DblVector py_px(n);
267     for(size_t j = 0; j < n; j++)
268         py_px[ j ] = g_jac[ j ];
269     //
270     // partial y(x, u) w.r.t u (Y in reference)
271     DblVector py_pu(s);
272     for(size_t j = 0; j < s; j++)
273         py_pu[ j ] = g_jac[ n + j ];
274     //
275     // partial z(x, u) w.r.t x (Z in reference)
276     DblVector pz_px(s * n);
277     for(size_t i = 0; i < s; i++)
278     {   for(size_t j = 0; j < n; j++)
279         {   pz_px[ i * n + j ] = g_jac[ (n + s) * (i + m) + j ];
280         }
281     }
282     // partial z(x, u) w.r.t u (L in reference)
283     DblVector pz_pu(s * s);
284     for(size_t i = 0; i < s; i++)
285     {   for(size_t j = 0; j < s; j++)
286         {   pz_pu[ i * s + j ] = g_jac[ (n + s) * (i + m) + n + j ];
287         }
288     }
289     // initailize delta_x
290     for(size_t j = 0; j < n; j++)
291         delta_x[j] = 0.0;
292     //
293     // current set of cutting planes
294     DblVector C(maxitr[0] * n), c(maxitr[0]);
295     //
296     // value of abs-normal approximation at x_hat + delta_x
297     DblVector g_tilde = CppAD::abs_eval(n, m, s, g_hat, g_jac, delta_x);
298     //
299     // value of sigma at delta_x = 0; i.e., sign( z(x, u) )
300     CppAD::vector<double> sigma(s);
301     for(size_t i = 0; i < s; i++)
302         sigma[i] = CppAD::sign( g_tilde[m + i] );
303     //
304     // initial value of the objective
305     double obj_cur =  g_tilde[0];
306     //
307     // initial number of cutting planes
308     size_t n_plane = 0;
309     //
310     if( level > 0 )
311     {   std::cout << "obj = " << obj_cur << "\n";
312         CppAD::abs_print_mat("delta_x", n, 1, delta_x);
313     }
314     for(size_t itr = 0; itr < maxitr[0]; itr++)
315     {
316         // Equation (5), Propostion 3.1 of reference
317         // dy_dx = py_px + py_pu * Sigma * (I - pz_pu * Sigma)^-1 * pz_px
318         //
319         // tmp_ss = I - pz_pu * Sigma
320         DblVector tmp_ss(s * s);
321         for(size_t i = 0; i < s; i++)
322         {   for(size_t j = 0; j < s; j++)
323                 tmp_ss[i * s + j] = - pz_pu[i * s + j] * sigma[j];
324             tmp_ss[i * s + i] += 1.0;
325         }
326         // tmp_sn = (I - pz_pu * Sigma)^-1 * pz_px
327         double logdet;
328         DblVector tmp_sn(s * n);
329         LuSolve(s, n, tmp_ss, pz_px, tmp_sn, logdet);
330         //
331         // tmp_sn = Sigma * (I - pz_pu * Sigma)^-1 * pz_px
332         for(size_t i = 0; i < s; i++)
333         {   for(size_t j = 0; j < n; j++)
334                 tmp_sn[i * n + j] *= sigma[i];
335         }
336         // dy_dx = py_px + py_pu * Sigma * (I - pz_pu * Sigma)^-1 * pz_px
337         DblVector dy_dx(n);
338         for(size_t j = 0; j < n; j++)
339         {   dy_dx[j] = py_px[j];
340             for(size_t k = 0; k < s; k++)
341                 dy_dx[j] += py_pu[k] * tmp_sn[ k * n + j];
342         }
343         //
344         // compute derivative of the quadratic term
345         DblVector dq_dx(n);
346         for(size_t j = 0; j < n; j++)
347         {   dq_dx[j] = 0.0;
348             for(size_t i = 0; i < n; i++)
349                 dq_dx[j] += delta_x[i] * hessian[i * n + j];
350         }
351         //
352         // check for case where derivative of objective is zero
353         // (in convex case, this is the minimizer)
354         bool near_zero = true;
355         for(size_t j = 0; j < n; j++)
356             near_zero &= std::fabs( dq_dx[j] + dy_dx[j] ) < epsilon[1];
357         if( near_zero )
358         {   if( level > 0 )
359                 std::cout << "end abs_min_quad: local derivative near zero\n";
360             return true;
361         }
362         // value of hyperplane at delta_x
363         double plane_at_zero = g_tilde[0];
364         //
365         // value of hyperplane at 0
366         for(size_t j = 0; j < n; j++)
367             plane_at_zero -= dy_dx[j] * delta_x[j];
368         //
369         // add a cutting plane with value g_tilde[0] at delta_x
370         // and derivative dy_dx
371         c[n_plane] = plane_at_zero;
372         for(size_t j = 0; j < n; j++)
373             C[n_plane * n + j] = dy_dx[j];
374         ++n_plane;
375         //
376         // variables for cutting plane problem are (dx, w)
377         // c[i] + C[i,:] * dx <= w
378         DblVector c_box(n_plane), C_box(n_plane * (n + 1));
379         for(size_t i = 0; i < n_plane; i++)
380         {   c_box[i] = c[i];
381             for(size_t j = 0; j < n; j++)
382                 C_box[i * (n+1) + j] = C[i * n + j];
383             C_box[i * (n+1) + n] = -1.0;
384         }
385         //
386         // w is the objective
387         DblVector g_box(n + 1);
388         for(size_t i = 0; i < size_t(c_box.size()); i++)
389             g_box[i] = 0.0;
390         g_box[n] = 1.0;
391         //
392         // a_box, b_box
393         DblVector a_box(n+1), b_box(n+1);
394         for(size_t j = 0; j < n; j++)
395         {   a_box[j] = - bound[j];
396             b_box[j] = + bound[j];
397         }
398         a_box[n] = - inf;
399         b_box[n] = + inf;
400         //
401         // initial delta_x in qp_box is zero
402         DblVector xin_box(n + 1);
403         for(size_t j = 0; j < n; j++)
404             xin_box[j] = 0.0;
405         // initial w in qp_box is 1 + max_i c[i]
406         xin_box[n] = 1.0 + c_box[0];
407         for(size_t i = 1; i < n_plane; i++)
408             xin_box[n] = std::max( xin_box[n], 1.0 + c_box[i] );
409         //
410         DblVector hessian_box( (n+1) * (n+1) );
411         for(size_t i = 0; i < n+1; i++)
412         {   for(size_t j = 0; j < n+1; j++)
413             {   if( i == n || j == n )
414                     hessian_box[i * (n+1) + j] = 0.0;
415                 else
416                     hessian_box[i * (n+1) + j] = hessian[i * n + j];
417             }
418         }
419         //
420         // solve the cutting plane problem
421         DblVector xout_box(n + 1);
422         size_t level_box = 0;
423         if( level > 0 )
424             level_box = level - 1;
425         ok &= CppAD::qp_box(
426             level_box,
427             a_box,
428             b_box,
429             c_box,
430             C_box,
431             g_box,
432             hessian_box,
433             epsilon[1],
434             maxitr[1],
435             xin_box,
436             xout_box
437         );
438         if( ! ok )
439         {   if( level > 0 )
440             {   CppAD::abs_print_mat("delta_x", n, 1, delta_x);
441                 std::cout << "end abs_min_quad: qp_box failed\n";
442             }
443             return false;
444         }
445         DblVector delta_new(n);
446         for(size_t j = 0; j < n; j++)
447             delta_new[j] = xout_box[j];
448         //
449         // check for convergence
450         double max_diff = 0.0;
451         for(size_t j = 0; j < n; j++)
452         {   double diff = delta_x[j] - delta_new[j];
453             max_diff    = std::max( max_diff, std::fabs(diff) );
454         }
455         //
456         // new value of the objective
457         DblVector g_new   = CppAD::abs_eval(n, m, s, g_hat, g_jac, delta_new);
458         double    obj_new = g_new[0];
459         for(size_t i = 0; i < n; i++)
460         {   for(size_t j = 0; j < n; j++)
461                 obj_new += delta_new[i] * hessian[i * n + j] * delta_new[j];
462         }
463         g_tilde = g_new;
464         obj_cur = obj_new;
465         delta_x = delta_new;
466         //
467         if( level > 0 )
468         {   std::cout << "itr = " << itr << ", max_diff = " << max_diff
469                 << ", obj_cur = " << obj_cur << "\n";
470             CppAD::abs_print_mat("delta_x", n, 1, delta_x);
471         }
472         //
473         // value of sigma at new delta_x; i.e., sign( z(x, u) )
474         for(size_t i = 0; i < s; i++)
475             sigma[i] = CppAD::sign( g_tilde[m + i] );
476         //
477         if( max_diff < epsilon[0] )
478         {   if( level > 0 )
479                 std::cout << "end abs_min_quad: change in delta_x near zero\n";
480             return true;
481         }
482     }
483     if( level > 0 )
484         std::cout << "end abs_min_quad: maximum number of iterations exceeded\n";
485     return false;
486 }
487 } // END_CPPAD_NAMESPACE
488 // END C++
489 
490 # endif
491