1 # ifndef CPPAD_EXAMPLE_ABS_NORMAL_ABS_MIN_QUAD_HPP
2 # define CPPAD_EXAMPLE_ABS_NORMAL_ABS_MIN_QUAD_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-20 Bradley M. Bell
5
6 CppAD is distributed under the terms of the
7 Eclipse Public License Version 2.0.
8
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12 GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14 /*
15 $begin abs_min_quad$$
16 $spell
17 hpp
18 qp
19 jac
20 Jacobian
21 maxitr
22 $$
23 $section abs_normal: Minimize a Linear Abs-normal Approximation$$
24
25 $head Syntax$$
26 $icode%ok% = abs_min_quad(
27 %level%, %n%, %m%, %s%,
28 %g_hat%, %g_jac%, %hessian%, %bound%, %epsilon%, %maxitr%, %delta_x%
29 )%$$
30
31 $head Prototype$$
32 $srcthisfile%
33 0%// BEGIN PROTOTYPE%// END PROTOTYPE%
34 1%$$
35
36 $head Source$$
37 This following is a link to the source code for this example:
38 $cref/abs_min_quad.hpp/abs_min_quad.hpp/$$.
39
40 $head Purpose$$
41 We are given a point $latex \hat{x} \in \B{R}^n$$ and
42 use the notation $latex \tilde{f} (x)$$ for the abs-normal
43 $cref/approximation for f(x)
44 /abs_normal_fun
45 /Abs-normal Approximation
46 /Approximating f(x)
47 /$$
48 near $latex \hat{x}$$.
49 We are also given a vector $latex b \in \B{R}_+^n$$
50 and a positive definite matrix $latex H \in \B{R}^{n \times n}$$.
51 This routine solves the problem
52 $latex \[
53 \begin{array}{lll}
54 \R{minimize} &
55 \Delta x^T H \Delta x / 2 + \tilde{f}( \hat{x} + \Delta x ) &
56 \R{w.r.t} \; \Delta x \in \B{R}^n
57 \\
58 \R{subject \; to} & | \Delta x_j | \leq b_j & j = 0 , \ldots , n-1
59 \end{array}
60 \] $$
61
62 $head DblVector$$
63 is a $cref SimpleVector$$ class with elements of type $code double$$.
64
65 $head SizeVector$$
66 is a $cref SimpleVector$$ class with elements of type $code size_t$$.
67
68 $head f$$
69 We use the notation $icode f$$ for the original function; see
70 $cref/f/abs_normal_fun/f/$$.
71
72 $head level$$
73 This value is less that or equal 3.
74 If $icode%level% == 0%$$,
75 no tracing of the optimization is printed.
76 If $icode%level% >= 1%$$,
77 a trace of each iteration of $code abs_min_quad$$ is printed.
78 If $icode%level% >= 2%$$,
79 a trace of the $cref qp_box$$ sub-problem is printed.
80 If $icode%level% >= 3%$$,
81 a trace of the $cref qp_interior$$ sub-problem is printed.
82
83 $head n$$
84 This is the dimension of the domain space for $icode f$$; see
85 $cref/n/abs_normal_fun/f/n/$$.
86
87 $head m$$
88 This is the dimension of the range space for $icode f$$; see
89 $cref/m/abs_normal_fun/f/m/$$. This must be one so that $latex f$$
90 is an objective function.
91
92 $head s$$
93 This is the number of absolute value terms in $icode f$$; see
94 $cref/s/abs_normal_fun/f/s/$$.
95
96 $head g$$
97 We use the notation $icode g$$ for the abs-normal representation of $icode f$$;
98 see $cref/g/abs_normal_fun/g/$$.
99
100 $head g_hat$$
101 This vector has size $icode%m% + %s%$$ and is the value of
102 $icode g(x, u)$$ at $latex x = \hat{x}$$ and $latex u = a( \hat{x} )$$.
103
104 $head g_jac$$
105 This vector has size $codei%(%m% + %s%) * (%n% + %s%)%$$ and is the Jacobian of
106 $latex g(x, u)$$ at $latex x = \hat{x}$$ and $latex u = a( \hat{x} )$$.
107
108 $head hessian$$
109 This vector has size $icode%n% * %n%$$.
110 It is a $cref/row-major/glossary/Row-major Representation/$$ representation
111 of the matrix $latex H \in \B{R}^{n \times n}$$.
112
113 $head bound$$
114 This vector has size $icode n$$ and is the vector $latex b \in \B{R}^n$$.
115 The trust region is defined as the set of $latex \Delta x$$ such that
116 $latex \[
117 | \Delta x | \leq b_j
118 \]$$
119 for $latex j = 0 , \ldots , n-1$$.
120
121 $head epsilon$$
122 The value $icode%epsilon%[0]%$$ is convergence criteria in terms
123 of the infinity norm of the difference of $icode delta_x$$
124 between iterations.
125 The value $icode%epsilon%[1]%$$ is convergence criteria in terms
126 of the derivative of the objective; i.e.
127 $latex \[
128 \Delta x^T H \Delta x / 2 + \tilde{f}( \hat{x} + \Delta x)
129 \] $$
130
131 $head maxitr$$
132 This is a vector with size 2.
133 The value $icode%maxitr%[0]%$$ is the maximum number of
134 $code abs_min_quad$$ iterations to try before giving up on convergence.
135 The value $icode%maxitr%[1]%$$ is the maximum number of iterations in
136 the $cref/qp_interior/qp_interior/maxitr/$$ sub-problems.
137
138 $head delta_x$$
139 This vector $latex \Delta x$$ has size $icode n$$.
140 The input value of its elements does not matter.
141 Upon return,
142 the approximate minimizer of the objective with respect to the trust region.
143
144 $head Method$$
145
146 $subhead sigma$$
147 We use the notation
148 $latex \[
149 \sigma (x) = \R{sign} ( z[ x , a(x) ] )
150 \] $$
151 where
152 $cref/a(x)/abs_normal_fun/a/a(x)/$$ and
153 $cref/z(x, u)/abs_normal_fun/g/z(x, u)/$$
154 are as defined in the abs-normal representation of $latex f(x)$$.
155
156 $subhead Cutting Planes$$
157 At each iteration,
158 we are given affine functions $latex p_k (x)$$
159 such that $latex p_k ( x_k ) = \tilde{f}( x_k )$$ and
160 $latex p_k^{(1)} ( x_k )$$ is the derivative $latex \tilde{f}^{(1)} ( x_k )$$
161 corresponding to $latex \sigma ( x_k )$$.
162
163 $subhead Iteration$$
164 At iteration $latex k$$, we solve the problem
165 $latex \[
166 \begin{array}{lll}
167 \R{minimize}
168 & \Delta x^T H \Delta x / 2 +
169 \max \{ p_k ( \hat{x} + \Delta x) \W{:} k = 0 , \ldots , K-1 \}
170 & \R{w.r.t} \; \Delta x
171 \\
172 \R{subject \; to} & - b \leq \Delta x \leq + b
173 \end{array}
174 \] $$
175 The solution is the new point $latex x_K$$
176 at which the new affine approximation
177 $latex p_K (x)$$ is constructed.
178 This process is iterated until the difference
179 $latex x_K - x_{K-1}$$ is small enough.
180
181
182 $children%example/abs_normal/abs_min_quad.cpp
183 %example/abs_normal/abs_min_quad.omh
184 %$$
185 $head Example$$
186 The file $cref abs_min_quad.cpp$$ contains an example and test of
187 $code abs_min_quad$$.
188
189 $end
190 -----------------------------------------------------------------------------
191 */
192 # include <cppad/cppad.hpp>
193 # include "qp_box.hpp"
194 # include "abs_eval.hpp"
195
196 // BEGIN C++
197 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
198
199 // BEGIN PROTOTYPE
200 template <class DblVector, class SizeVector>
abs_min_quad(size_t level,size_t n,size_t m,size_t s,const DblVector & g_hat,const DblVector & g_jac,const DblVector & hessian,const DblVector & bound,const DblVector & epsilon,const SizeVector & maxitr,DblVector & delta_x)201 bool abs_min_quad(
202 size_t level ,
203 size_t n ,
204 size_t m ,
205 size_t s ,
206 const DblVector& g_hat ,
207 const DblVector& g_jac ,
208 const DblVector& hessian ,
209 const DblVector& bound ,
210 const DblVector& epsilon ,
211 const SizeVector& maxitr ,
212 DblVector& delta_x )
213 // END PROTOTYPE
214 { using std::fabs;
215 bool ok = true;
216 double inf = std::numeric_limits<double>::infinity();
217 //
218 CPPAD_ASSERT_KNOWN(
219 level <= 4,
220 "abs_min_quad: level is not less that or equal 3"
221 );
222 CPPAD_ASSERT_KNOWN(
223 size_t(epsilon.size()) == 2,
224 "abs_min_quad: size of epsilon not equal to 2"
225 );
226 CPPAD_ASSERT_KNOWN(
227 size_t(maxitr.size()) == 2,
228 "abs_min_quad: size of maxitr not equal to 2"
229 );
230 CPPAD_ASSERT_KNOWN(
231 m == 1,
232 "abs_min_quad: m is not equal to 1"
233 );
234 CPPAD_ASSERT_KNOWN(
235 size_t(delta_x.size()) == n,
236 "abs_min_quad: size of delta_x not equal to n"
237 );
238 CPPAD_ASSERT_KNOWN(
239 size_t(bound.size()) == n,
240 "abs_min_quad: size of bound not equal to n"
241 );
242 CPPAD_ASSERT_KNOWN(
243 size_t(g_hat.size()) == m + s,
244 "abs_min_quad: size of g_hat not equal to m + s"
245 );
246 CPPAD_ASSERT_KNOWN(
247 size_t(g_jac.size()) == (m + s) * (n + s),
248 "abs_min_quad: size of g_jac not equal to (m + s)*(n + s)"
249 );
250 CPPAD_ASSERT_KNOWN(
251 size_t(hessian.size()) == n * n,
252 "abs_min_quad: size of hessian not equal to n * n"
253 );
254 CPPAD_ASSERT_KNOWN(
255 size_t(bound.size()) == n,
256 "abs_min_quad: size of bound is not equal to n"
257 );
258 if( level > 0 )
259 { std::cout << "start abs_min_quad\n";
260 CppAD::abs_print_mat("g_hat", m + s, 1, g_hat);
261 CppAD::abs_print_mat("g_jac", m + s, n + s, g_jac);
262 CppAD::abs_print_mat("hessian", n, n, hessian);
263 CppAD::abs_print_mat("bound", n, 1, bound);
264 }
265 // partial y(x, u) w.r.t x (J in reference)
266 DblVector py_px(n);
267 for(size_t j = 0; j < n; j++)
268 py_px[ j ] = g_jac[ j ];
269 //
270 // partial y(x, u) w.r.t u (Y in reference)
271 DblVector py_pu(s);
272 for(size_t j = 0; j < s; j++)
273 py_pu[ j ] = g_jac[ n + j ];
274 //
275 // partial z(x, u) w.r.t x (Z in reference)
276 DblVector pz_px(s * n);
277 for(size_t i = 0; i < s; i++)
278 { for(size_t j = 0; j < n; j++)
279 { pz_px[ i * n + j ] = g_jac[ (n + s) * (i + m) + j ];
280 }
281 }
282 // partial z(x, u) w.r.t u (L in reference)
283 DblVector pz_pu(s * s);
284 for(size_t i = 0; i < s; i++)
285 { for(size_t j = 0; j < s; j++)
286 { pz_pu[ i * s + j ] = g_jac[ (n + s) * (i + m) + n + j ];
287 }
288 }
289 // initailize delta_x
290 for(size_t j = 0; j < n; j++)
291 delta_x[j] = 0.0;
292 //
293 // current set of cutting planes
294 DblVector C(maxitr[0] * n), c(maxitr[0]);
295 //
296 // value of abs-normal approximation at x_hat + delta_x
297 DblVector g_tilde = CppAD::abs_eval(n, m, s, g_hat, g_jac, delta_x);
298 //
299 // value of sigma at delta_x = 0; i.e., sign( z(x, u) )
300 CppAD::vector<double> sigma(s);
301 for(size_t i = 0; i < s; i++)
302 sigma[i] = CppAD::sign( g_tilde[m + i] );
303 //
304 // initial value of the objective
305 double obj_cur = g_tilde[0];
306 //
307 // initial number of cutting planes
308 size_t n_plane = 0;
309 //
310 if( level > 0 )
311 { std::cout << "obj = " << obj_cur << "\n";
312 CppAD::abs_print_mat("delta_x", n, 1, delta_x);
313 }
314 for(size_t itr = 0; itr < maxitr[0]; itr++)
315 {
316 // Equation (5), Propostion 3.1 of reference
317 // dy_dx = py_px + py_pu * Sigma * (I - pz_pu * Sigma)^-1 * pz_px
318 //
319 // tmp_ss = I - pz_pu * Sigma
320 DblVector tmp_ss(s * s);
321 for(size_t i = 0; i < s; i++)
322 { for(size_t j = 0; j < s; j++)
323 tmp_ss[i * s + j] = - pz_pu[i * s + j] * sigma[j];
324 tmp_ss[i * s + i] += 1.0;
325 }
326 // tmp_sn = (I - pz_pu * Sigma)^-1 * pz_px
327 double logdet;
328 DblVector tmp_sn(s * n);
329 LuSolve(s, n, tmp_ss, pz_px, tmp_sn, logdet);
330 //
331 // tmp_sn = Sigma * (I - pz_pu * Sigma)^-1 * pz_px
332 for(size_t i = 0; i < s; i++)
333 { for(size_t j = 0; j < n; j++)
334 tmp_sn[i * n + j] *= sigma[i];
335 }
336 // dy_dx = py_px + py_pu * Sigma * (I - pz_pu * Sigma)^-1 * pz_px
337 DblVector dy_dx(n);
338 for(size_t j = 0; j < n; j++)
339 { dy_dx[j] = py_px[j];
340 for(size_t k = 0; k < s; k++)
341 dy_dx[j] += py_pu[k] * tmp_sn[ k * n + j];
342 }
343 //
344 // compute derivative of the quadratic term
345 DblVector dq_dx(n);
346 for(size_t j = 0; j < n; j++)
347 { dq_dx[j] = 0.0;
348 for(size_t i = 0; i < n; i++)
349 dq_dx[j] += delta_x[i] * hessian[i * n + j];
350 }
351 //
352 // check for case where derivative of objective is zero
353 // (in convex case, this is the minimizer)
354 bool near_zero = true;
355 for(size_t j = 0; j < n; j++)
356 near_zero &= std::fabs( dq_dx[j] + dy_dx[j] ) < epsilon[1];
357 if( near_zero )
358 { if( level > 0 )
359 std::cout << "end abs_min_quad: local derivative near zero\n";
360 return true;
361 }
362 // value of hyperplane at delta_x
363 double plane_at_zero = g_tilde[0];
364 //
365 // value of hyperplane at 0
366 for(size_t j = 0; j < n; j++)
367 plane_at_zero -= dy_dx[j] * delta_x[j];
368 //
369 // add a cutting plane with value g_tilde[0] at delta_x
370 // and derivative dy_dx
371 c[n_plane] = plane_at_zero;
372 for(size_t j = 0; j < n; j++)
373 C[n_plane * n + j] = dy_dx[j];
374 ++n_plane;
375 //
376 // variables for cutting plane problem are (dx, w)
377 // c[i] + C[i,:] * dx <= w
378 DblVector c_box(n_plane), C_box(n_plane * (n + 1));
379 for(size_t i = 0; i < n_plane; i++)
380 { c_box[i] = c[i];
381 for(size_t j = 0; j < n; j++)
382 C_box[i * (n+1) + j] = C[i * n + j];
383 C_box[i * (n+1) + n] = -1.0;
384 }
385 //
386 // w is the objective
387 DblVector g_box(n + 1);
388 for(size_t i = 0; i < size_t(c_box.size()); i++)
389 g_box[i] = 0.0;
390 g_box[n] = 1.0;
391 //
392 // a_box, b_box
393 DblVector a_box(n+1), b_box(n+1);
394 for(size_t j = 0; j < n; j++)
395 { a_box[j] = - bound[j];
396 b_box[j] = + bound[j];
397 }
398 a_box[n] = - inf;
399 b_box[n] = + inf;
400 //
401 // initial delta_x in qp_box is zero
402 DblVector xin_box(n + 1);
403 for(size_t j = 0; j < n; j++)
404 xin_box[j] = 0.0;
405 // initial w in qp_box is 1 + max_i c[i]
406 xin_box[n] = 1.0 + c_box[0];
407 for(size_t i = 1; i < n_plane; i++)
408 xin_box[n] = std::max( xin_box[n], 1.0 + c_box[i] );
409 //
410 DblVector hessian_box( (n+1) * (n+1) );
411 for(size_t i = 0; i < n+1; i++)
412 { for(size_t j = 0; j < n+1; j++)
413 { if( i == n || j == n )
414 hessian_box[i * (n+1) + j] = 0.0;
415 else
416 hessian_box[i * (n+1) + j] = hessian[i * n + j];
417 }
418 }
419 //
420 // solve the cutting plane problem
421 DblVector xout_box(n + 1);
422 size_t level_box = 0;
423 if( level > 0 )
424 level_box = level - 1;
425 ok &= CppAD::qp_box(
426 level_box,
427 a_box,
428 b_box,
429 c_box,
430 C_box,
431 g_box,
432 hessian_box,
433 epsilon[1],
434 maxitr[1],
435 xin_box,
436 xout_box
437 );
438 if( ! ok )
439 { if( level > 0 )
440 { CppAD::abs_print_mat("delta_x", n, 1, delta_x);
441 std::cout << "end abs_min_quad: qp_box failed\n";
442 }
443 return false;
444 }
445 DblVector delta_new(n);
446 for(size_t j = 0; j < n; j++)
447 delta_new[j] = xout_box[j];
448 //
449 // check for convergence
450 double max_diff = 0.0;
451 for(size_t j = 0; j < n; j++)
452 { double diff = delta_x[j] - delta_new[j];
453 max_diff = std::max( max_diff, std::fabs(diff) );
454 }
455 //
456 // new value of the objective
457 DblVector g_new = CppAD::abs_eval(n, m, s, g_hat, g_jac, delta_new);
458 double obj_new = g_new[0];
459 for(size_t i = 0; i < n; i++)
460 { for(size_t j = 0; j < n; j++)
461 obj_new += delta_new[i] * hessian[i * n + j] * delta_new[j];
462 }
463 g_tilde = g_new;
464 obj_cur = obj_new;
465 delta_x = delta_new;
466 //
467 if( level > 0 )
468 { std::cout << "itr = " << itr << ", max_diff = " << max_diff
469 << ", obj_cur = " << obj_cur << "\n";
470 CppAD::abs_print_mat("delta_x", n, 1, delta_x);
471 }
472 //
473 // value of sigma at new delta_x; i.e., sign( z(x, u) )
474 for(size_t i = 0; i < s; i++)
475 sigma[i] = CppAD::sign( g_tilde[m + i] );
476 //
477 if( max_diff < epsilon[0] )
478 { if( level > 0 )
479 std::cout << "end abs_min_quad: change in delta_x near zero\n";
480 return true;
481 }
482 }
483 if( level > 0 )
484 std::cout << "end abs_min_quad: maximum number of iterations exceeded\n";
485 return false;
486 }
487 } // END_CPPAD_NAMESPACE
488 // END C++
489
490 # endif
491