1 # ifndef CPPAD_CORE_SPARSE_HES_HPP
2 # define CPPAD_CORE_SPARSE_HES_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-19 Bradley M. Bell
5
6 CppAD is distributed under the terms of the
7 Eclipse Public License Version 2.0.
8
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12 GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14
15 /*
16 $begin sparse_hes$$
17 $spell
18 const
19 Taylor
20 rc
21 rcv
22 nr
23 nc
24 hes
25 std
26 cppad
27 colpack
28 cmake
29 Jacobian
30 $$
31
32 $section Computing Sparse Hessians$$
33
34 $head Syntax$$
35 $icode%n_sweep% = %f%.sparse_hes(
36 %x%, %w%, %subset%, %pattern%, %coloring%, %work%
37 )%$$
38
39 $head Purpose$$
40 We use $latex F : \B{R}^n \rightarrow \B{R}^m$$ to denote the
41 function corresponding to $icode f$$.
42 Here $icode n$$ is the $cref/domain/seq_property/Domain/$$ size,
43 and $icode m$$ is the $cref/range/seq_property/Range/$$ size, or $icode f$$.
44 The syntax above takes advantage of sparsity when computing the Hessian
45 $latex \[
46 H(x) = \dpow{2}{x} \sum_{i=0}^{m-1} w_i F_i (x)
47 \] $$
48 In the sparse case, this should be faster and take less memory than
49 $cref Hessian$$.
50 The matrix element $latex H_{i,j} (x)$$ is the second partial of
51 $latex w^\R{T} F (x)$$ with respect to $latex x_i$$ and $latex x_j$$.
52
53 $head SizeVector$$
54 The type $icode SizeVector$$ is a $cref SimpleVector$$ class with
55 $cref/elements of type/SimpleVector/Elements of Specified Type/$$
56 $code size_t$$.
57
58 $head BaseVector$$
59 The type $icode BaseVector$$ is a $cref SimpleVector$$ class with
60 $cref/elements of type/SimpleVector/Elements of Specified Type/$$
61 $code size_t$$.
62
63 $head f$$
64 This object has prototype
65 $codei%
66 ADFun<%Base%> %f%
67 %$$
68 Note that the Taylor coefficients stored in $icode f$$ are affected
69 by this operation; see
70 $cref/uses forward/sparse_hes/Uses Forward/$$ below.
71
72 $head x$$
73 This argument has prototype
74 $codei%
75 const %BaseVector%& %x%
76 %$$
77 and its size is $icode n$$.
78 It specifies the point at which to evaluate the Hessian
79 $latex H(x)$$.
80
81 $head w$$
82 This argument has prototype
83 $codei%
84 const %BaseVector%& %w%
85 %$$
86 and its size is $icode m$$.
87 It specifies the weight for each of the components of $latex F(x)$$;
88 i.e. $latex w_i$$ is the weight for $latex F_i (x)$$.
89
90 $head subset$$
91 This argument has prototype
92 $codei%
93 sparse_rcv<%SizeVector%, %BaseVector%>& %subset%
94 %$$
95 Its row size and column size is $icode n$$; i.e.,
96 $icode%subset%.nr() == %n%$$ and $icode%subset%.nc() == %n%$$.
97 It specifies which elements of the Hessian are computed.
98 $list number$$
99 The input value of its value vector
100 $icode%subset%.val()%$$ does not matter.
101 Upon return it contains the value of the corresponding elements
102 of the Hessian.
103 $lnext
104 All of the row, column pairs in $icode subset$$ must also appear in
105 $icode pattern$$; i.e., they must be possibly non-zero.
106 $lnext
107 The Hessian is symmetric, so one has a choice as to which off diagonal
108 elements to put in $icode subset$$.
109 It will probably be more efficient if one makes this choice so that
110 the there are more entries in each non-zero column of $icode subset$$;
111 see $cref/n_sweep/sparse_hes/n_sweep/$$ below.
112 $lend
113
114 $head pattern$$
115 This argument has prototype
116 $codei%
117 const sparse_rc<%SizeVector%>& %pattern%
118 %$$
119 Its row size and column size is $icode n$$; i.e.,
120 $icode%pattern%.nr() == %n%$$ and $icode%pattern%.nc() == %n%$$.
121 It is a sparsity pattern for the Hessian $latex H(x)$$.
122 If the $th i$$ row ($th j$$ column) does not appear in $icode subset$$,
123 the $th i$$ row ($th j$$ column) of $icode pattern$$ does not matter
124 and need not be computed.
125 This argument is not used (and need not satisfy any conditions),
126 when $cref/work/sparse_hes/work/$$ is non-empty.
127
128 $subhead subset$$
129 If the $th i$$ row and $th i$$ column do not appear in $icode subset$$,
130 the $th i$$ row and column of $icode pattern$$ do not matter.
131 In this case the $th i-th$$ row and column may have no entries in
132 $icode pattern$$ even though they are possibly non-zero in $latex H(x)$$.
133 (This can be used to reduce the amount of computation required to find
134 $icode pattern$$.)
135
136 $head coloring$$
137 The coloring algorithm determines which rows and columns
138 can be computed during the same sweep.
139 This field has prototype
140 $codei%
141 const std::string& %coloring%
142 %$$
143 This value only matters when work is empty; i.e.,
144 after the $icode work$$ constructor or $icode%work%.clear()%$$.
145
146 $subhead cppad.symmetric$$
147 This coloring takes advantage of the fact that the Hessian matrix
148 is symmetric when find a coloring that requires fewer
149 $cref/sweeps/sparse_hes/n_sweep/$$.
150
151 $subhead cppad.general$$
152 This is the same as the sparse Jacobian
153 $cref/cppad/sparse_jac/coloring/cppad/$$ method
154 which does not take advantage of symmetry.
155
156 $subhead colpack.symmetric$$
157 If $cref colpack_prefix$$ was specified on the
158 $cref/cmake command/cmake/CMake Command/$$ line,
159 you can set $icode coloring$$ to $code colpack.symmetric$$.
160 This also takes advantage of the fact that the Hessian matrix is symmetric.
161
162 $subhead colpack.general$$
163 If $cref colpack_prefix$$ was specified on the
164 $cref/cmake command/cmake/CMake Command/$$ line,
165 you can set $icode coloring$$ to $code colpack.general$$.
166 This is the same as the sparse Jacobian
167 $cref/colpack/sparse_jac/coloring/colpack/$$ method
168 which does not take advantage of symmetry.
169
170 $subhead colpack.star Deprecated 2017-06-01$$
171 The $code colpack.star$$ method is deprecated.
172 It is the same as the $code colpack.symmetric$$ method
173 which should be used instead.
174
175
176 $head work$$
177 This argument has prototype
178 $codei%
179 sparse_hes_work& %work%
180 %$$
181 We refer to its initial value,
182 and its value after $icode%work%.clear()%$$, as empty.
183 If it is empty, information is stored in $icode work$$.
184 This can be used to reduce computation when
185 a future call is for the same object $icode f$$,
186 and the same subset of the Hessian.
187 In fact, it can be used with a different $icode f$$
188 and a different $icode subset$$ provided that Hessian sparsity pattern
189 for $icode f$$ and the sparsity pattern in $icode subset$$ are the same.
190 If either of these values change, use $icode%work%.clear()%$$ to
191 empty this structure.
192
193 $head n_sweep$$
194 The return value $icode n_sweep$$ has prototype
195 $codei%
196 size_t %n_sweep%
197 %$$
198 It is the number of first order forward sweeps
199 used to compute the requested Hessian values.
200 Each first forward sweep is followed by a second order reverse sweep
201 so it is also the number of reverse sweeps.
202 It is also the number of colors determined by the coloring method
203 mentioned above.
204 This is proportional to the total computational work,
205 not counting the zero order forward sweep,
206 or combining multiple columns and rows into a single sweep.
207
208 $head Uses Forward$$
209 After each call to $cref Forward$$,
210 the object $icode f$$ contains the corresponding
211 $cref/Taylor coefficients/glossary/Taylor Coefficient/$$.
212 After a call to $code sparse_hes$$
213 the zero order coefficients correspond to
214 $codei%
215 %f%.Forward(0, %x%)
216 %$$
217 All the other forward mode coefficients are unspecified.
218
219 $head Example$$
220 $children%
221 example/sparse/sparse_hes.cpp
222 %$$
223 The files $cref sparse_hes.cpp$$
224 is an example and test of $code sparse_hes$$.
225 It returns $code true$$, if it succeeds, and $code false$$ otherwise.
226
227 $head Subset Hessian$$
228 The routine
229 $cref sparse_sub_hes.cpp$$
230 is an example and test that compute a subset of a sparse Hessian.
231 It returns $code true$$, for success, and $code false$$ otherwise.
232
233 $end
234 */
235 # include <cppad/core/cppad_assert.hpp>
236 # include <cppad/local/sparse/internal.hpp>
237 # include <cppad/local/color_general.hpp>
238 # include <cppad/local/color_symmetric.hpp>
239
240 /*!
241 \file sparse_hes.hpp
242 Sparse Hessian calculation routines.
243 */
244 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
245
246 /*!
247 Class used to hold information used by Sparse Hessian routine in this file,
248 so it does not need to be recomputed every time.
249 */
250 class sparse_hes_work {
251 public:
252 /// row and column indicies for return values
253 /// (some may be reflected by symmetric coloring algorithms)
254 CppAD::vector<size_t> row;
255 CppAD::vector<size_t> col;
256 /// indices that sort the row and col arrays by color
257 CppAD::vector<size_t> order;
258 /// results of the coloring algorithm
259 CppAD::vector<size_t> color;
260
261 /// constructor
sparse_hes_work(void)262 sparse_hes_work(void)
263 { }
264 /// inform CppAD that this information needs to be recomputed
clear(void)265 void clear(void)
266 {
267 row.clear();
268 col.clear();
269 order.clear();
270 color.clear();
271 }
272 };
273 // ----------------------------------------------------------------------------
274 /*!
275 Calculate sparse Hessians using forward mode
276
277 \tparam Base
278 the base type for the recording that is stored in the ADFun object.
279
280 \tparam SizeVector
281 a simple vector class with elements of type size_t.
282
283 \tparam BaseVector
284 a simple vector class with elements of type Base.
285
286 \param x
287 a vector of length n, the number of independent variables in f
288 (this ADFun object).
289
290 \param w
291 a vector of length m, the number of dependent variables in f
292 (this ADFun object).
293
294 \param subset
295 specifices the subset of the sparsity pattern where the Hessian is evaluated.
296 subset.nr() == n,
297 subset.nc() == n.
298
299 \param pattern
300 is a sparsity pattern for the Hessian of w^T * f;
301 pattern.nr() == n,
302 pattern.nc() == n,
303 where m is number of dependent variables in f.
304
305 \param coloring
306 determines which coloring algorithm is used.
307 This must be cppad.symmetric, cppad.general, colpack.symmetic,
308 or colpack.star.
309
310 \param work
311 this structure must be empty, or contain the information stored
312 by a previous call to sparse_hes.
313 The previous call must be for the same ADFun object f
314 and the same subset.
315
316 \return
317 This is the number of first order forward
318 (and second order reverse) sweeps used to compute thhe Hessian.
319 */
320 template <class Base, class RecBase>
321 template <class SizeVector, class BaseVector>
sparse_hes(const BaseVector & x,const BaseVector & w,sparse_rcv<SizeVector,BaseVector> & subset,const sparse_rc<SizeVector> & pattern,const std::string & coloring,sparse_hes_work & work)322 size_t ADFun<Base,RecBase>::sparse_hes(
323 const BaseVector& x ,
324 const BaseVector& w ,
325 sparse_rcv<SizeVector , BaseVector>& subset ,
326 const sparse_rc<SizeVector>& pattern ,
327 const std::string& coloring ,
328 sparse_hes_work& work )
329 { size_t n = Domain();
330 //
331 CPPAD_ASSERT_KNOWN(
332 subset.nr() == n,
333 "sparse_hes: subset.nr() not equal domain dimension for f"
334 );
335 CPPAD_ASSERT_KNOWN(
336 subset.nc() == n,
337 "sparse_hes: subset.nc() not equal domain dimension for f"
338 );
339 CPPAD_ASSERT_KNOWN(
340 size_t( x.size() ) == n,
341 "sparse_hes: x.size() not equal domain dimension for f"
342 );
343 CPPAD_ASSERT_KNOWN(
344 size_t( w.size() ) == Range(),
345 "sparse_hes: w.size() not equal range dimension for f"
346 );
347 //
348 // work information
349 vector<size_t>& row(work.row);
350 vector<size_t>& col(work.col);
351 vector<size_t>& color(work.color);
352 vector<size_t>& order(work.order);
353 //
354 // subset information
355 const SizeVector& subset_row( subset.row() );
356 const SizeVector& subset_col( subset.col() );
357 //
358 // point at which we are evaluationg the Hessian
359 Forward(0, x);
360 //
361 // number of elements in the subset
362 size_t K = subset.nnz();
363 //
364 // check for case were there is nothing to do
365 // (except for call to Forward(0, x)
366 if( K == 0 )
367 return 0;
368 //
369 # ifndef NDEBUG
370 if( color.size() != 0 )
371 { CPPAD_ASSERT_KNOWN(
372 color.size() == n,
373 "sparse_hes: work is non-empty and conditions have changed"
374 );
375 CPPAD_ASSERT_KNOWN(
376 row.size() == K,
377 "sparse_hes: work is non-empty and conditions have changed"
378 );
379 CPPAD_ASSERT_KNOWN(
380 col.size() == K,
381 "sparse_hes: work is non-empty and conditions have changed"
382 );
383 //
384 for(size_t k = 0; k < K; k++)
385 { bool ok = row[k] == subset_row[k] && col[k] == subset_col[k];
386 ok |= row[k] == subset_col[k] && col[k] == subset_row[k];
387 CPPAD_ASSERT_KNOWN(
388 ok,
389 "sparse_hes: work is non-empty and conditions have changed"
390 );
391 }
392 }
393 # endif
394 //
395 // check for case where input work is empty
396 if( color.size() == 0 )
397 { // compute work color and order vectors
398 CPPAD_ASSERT_KNOWN(
399 pattern.nr() == n,
400 "sparse_hes: pattern.nr() not equal domain dimension for f"
401 );
402 CPPAD_ASSERT_KNOWN(
403 pattern.nc() == n,
404 "sparse_hes: pattern.nc() not equal domain dimension for f"
405 );
406 //
407 // initialize work row, col to be same as subset row, col
408 row.resize(K);
409 col.resize(K);
410 // cannot assign vectors becasue may be of different types
411 // (SizeVector and CppAD::vector<size_t>)
412 for(size_t k = 0; k < K; k++)
413 { row[k] = subset_row[k];
414 col[k] = subset_col[k];
415 }
416 //
417 // convert pattern to an internal version of its transpose
418 local::pod_vector<size_t> internal_index(n);
419 for(size_t j = 0; j < n; j++)
420 internal_index[j] = j;
421 bool transpose = true;
422 bool zero_empty = false;
423 bool input_empty = true;
424 local::sparse::list_setvec internal_pattern;
425 internal_pattern.resize(n, n);
426 local::sparse::set_internal_pattern(zero_empty, input_empty,
427 transpose, internal_index, internal_pattern, pattern
428 );
429 //
430 // execute coloring algorithm
431 // (we are using transpose becasue coloring groups rows, not columns)
432 color.resize(n);
433 if( coloring == "cppad.general" )
434 local::color_general_cppad(internal_pattern, col, row, color);
435 else if( coloring == "cppad.symmetric" )
436 local::color_symmetric_cppad(internal_pattern, col, row, color);
437 else if( coloring == "colpack.general" )
438 {
439 # if CPPAD_HAS_COLPACK
440 local::color_general_colpack(internal_pattern, col, row, color);
441 # else
442 CPPAD_ASSERT_KNOWN(
443 false,
444 "sparse_hes: coloring = colpack.star "
445 "and colpack_prefix not in cmake command line."
446 );
447 # endif
448 }
449 else if(
450 coloring == "colpack.symmetric" ||
451 coloring == "colpack.star"
452 )
453 {
454 # if CPPAD_HAS_COLPACK
455 local::color_symmetric_colpack(internal_pattern, col, row, color);
456 # else
457 CPPAD_ASSERT_KNOWN(
458 false,
459 "sparse_hes: coloring = colpack.symmetic or colpack.star "
460 "and colpack_prefix not in cmake command line."
461 );
462 # endif
463 }
464 else CPPAD_ASSERT_KNOWN(
465 false,
466 "sparse_hes: coloring is not valid."
467 );
468 //
469 // put sorting indices in color order
470 SizeVector key(K);
471 order.resize(K);
472 for(size_t k = 0; k < K; k++)
473 key[k] = color[ col[k] ];
474 index_sort(key, order);
475 }
476 // Base versions of zero and one
477 Base one(1.0);
478 Base zero(0.0);
479 //
480 size_t n_color = 1;
481 for(size_t j = 0; j < n; j++) if( color[j] < n )
482 n_color = std::max<size_t>(n_color, color[j] + 1);
483 //
484 // initialize the return Hessian values as zero
485 for(size_t k = 0; k < K; k++)
486 subset.set(k, zero);
487 //
488 // direction vector for calls to first order forward
489 BaseVector dx(n);
490 //
491 // return values for calls to second order reverse
492 BaseVector ddw(2 * n);
493 //
494 // loop over colors
495 size_t k = 0;
496 for(size_t ell = 0; ell < n_color; ell++)
497 if( k == K )
498 { // kludge because colpack returns colors that are not used
499 // (it does not know about the subset corresponding to row, col)
500 CPPAD_ASSERT_UNKNOWN(
501 coloring == "colpack.general" ||
502 coloring == "colpack.symmetric" ||
503 coloring == "colpack.star"
504 );
505 }
506 else if( color[ col[ order[k] ] ] != ell )
507 { // kludge because colpack returns colors that are not used
508 // (it does not know about the subset corresponding to row, col)
509 CPPAD_ASSERT_UNKNOWN(
510 coloring == "colpack.general" ||
511 coloring == "colpack.symmetic" ||
512 coloring == "colpack.star"
513 );
514 }
515 else
516 { CPPAD_ASSERT_UNKNOWN( color[ col[ order[k] ] ] == ell );
517 //
518 // combine all columns with this color
519 for(size_t j = 0; j < n; j++)
520 { dx[j] = zero;
521 if( color[j] == ell )
522 dx[j] = one;
523 }
524 // call forward mode for all these rows at once
525 Forward(1, dx);
526 //
527 // evaluate derivative of w^T * F'(x) * dx
528 ddw = Reverse(2, w);
529 //
530 // set the corresponding components of the result
531 while( k < K && color[ col[order[k]] ] == ell )
532 { size_t index = row[ order[k] ] * 2 + 1;
533 subset.set(order[k], ddw[index] );
534 k++;
535 }
536 }
537 // check that all the required entries have been set
538 CPPAD_ASSERT_UNKNOWN( k == K );
539 return n_color;
540 }
541
542 } // END_CPPAD_NAMESPACE
543
544 # endif
545