1 # ifndef CPPAD_CORE_SPARSE_HES_HPP
2 # define CPPAD_CORE_SPARSE_HES_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-19 Bradley M. Bell
5 
6 CppAD is distributed under the terms of the
7              Eclipse Public License Version 2.0.
8 
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12       GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14 
15 /*
16 $begin sparse_hes$$
17 $spell
18     const
19     Taylor
20     rc
21     rcv
22     nr
23     nc
24     hes
25     std
26     cppad
27     colpack
28     cmake
29     Jacobian
30 $$
31 
32 $section Computing Sparse Hessians$$
33 
34 $head Syntax$$
35 $icode%n_sweep% = %f%.sparse_hes(
36     %x%, %w%, %subset%, %pattern%, %coloring%, %work%
37 )%$$
38 
39 $head Purpose$$
40 We use $latex F : \B{R}^n \rightarrow \B{R}^m$$ to denote the
41 function corresponding to $icode f$$.
42 Here $icode n$$ is the $cref/domain/seq_property/Domain/$$ size,
43 and $icode m$$ is the $cref/range/seq_property/Range/$$ size, or $icode f$$.
44 The syntax above takes advantage of sparsity when computing the Hessian
45 $latex \[
46     H(x) = \dpow{2}{x} \sum_{i=0}^{m-1} w_i F_i (x)
47 \] $$
48 In the sparse case, this should be faster and take less memory than
49 $cref Hessian$$.
50 The matrix element $latex H_{i,j} (x)$$ is the second partial of
51 $latex w^\R{T} F (x)$$ with respect to $latex x_i$$ and $latex x_j$$.
52 
53 $head SizeVector$$
54 The type $icode SizeVector$$ is a $cref SimpleVector$$ class with
55 $cref/elements of type/SimpleVector/Elements of Specified Type/$$
56 $code size_t$$.
57 
58 $head BaseVector$$
59 The type $icode BaseVector$$ is a $cref SimpleVector$$ class with
60 $cref/elements of type/SimpleVector/Elements of Specified Type/$$
61 $code size_t$$.
62 
63 $head f$$
64 This object has prototype
65 $codei%
66     ADFun<%Base%> %f%
67 %$$
68 Note that the Taylor coefficients stored in $icode f$$ are affected
69 by this operation; see
70 $cref/uses forward/sparse_hes/Uses Forward/$$ below.
71 
72 $head x$$
73 This argument has prototype
74 $codei%
75     const %BaseVector%& %x%
76 %$$
77 and its size is $icode n$$.
78 It specifies the point at which to evaluate the Hessian
79 $latex H(x)$$.
80 
81 $head w$$
82 This argument has prototype
83 $codei%
84     const %BaseVector%& %w%
85 %$$
86 and its size is $icode m$$.
87 It specifies the weight for each of the components of $latex F(x)$$;
88 i.e. $latex w_i$$ is the weight for $latex F_i (x)$$.
89 
90 $head subset$$
91 This argument has prototype
92 $codei%
93     sparse_rcv<%SizeVector%, %BaseVector%>& %subset%
94 %$$
95 Its row size and column size is $icode n$$; i.e.,
96 $icode%subset%.nr() == %n%$$ and $icode%subset%.nc() == %n%$$.
97 It specifies which elements of the Hessian are computed.
98 $list number$$
99 The input value of its value vector
100 $icode%subset%.val()%$$ does not matter.
101 Upon return it contains the value of the corresponding elements
102 of the Hessian.
103 $lnext
104 All of the row, column pairs in $icode subset$$ must also appear in
105 $icode pattern$$; i.e., they must be possibly non-zero.
106 $lnext
107 The Hessian is symmetric, so one has a choice as to which off diagonal
108 elements to put in $icode subset$$.
109 It will probably be more efficient if one makes this choice so that
110 the there are more entries in each non-zero column of $icode subset$$;
111 see $cref/n_sweep/sparse_hes/n_sweep/$$ below.
112 $lend
113 
114 $head pattern$$
115 This argument has prototype
116 $codei%
117     const sparse_rc<%SizeVector%>& %pattern%
118 %$$
119 Its row size and column size is $icode n$$; i.e.,
120 $icode%pattern%.nr() == %n%$$ and $icode%pattern%.nc() == %n%$$.
121 It is a sparsity pattern for the Hessian $latex H(x)$$.
122 If the $th i$$ row ($th j$$ column) does not appear in $icode subset$$,
123 the $th i$$ row ($th j$$ column) of $icode pattern$$ does not matter
124 and need not be computed.
125 This argument is not used (and need not satisfy any conditions),
126 when $cref/work/sparse_hes/work/$$ is non-empty.
127 
128 $subhead subset$$
129 If the $th i$$ row and $th i$$ column do not appear in $icode subset$$,
130 the $th i$$ row and column of $icode pattern$$ do not matter.
131 In this case the $th i-th$$ row and column may have no entries in
132 $icode pattern$$ even though they are possibly non-zero in $latex H(x)$$.
133 (This can be used to reduce the amount of computation required to find
134 $icode pattern$$.)
135 
136 $head coloring$$
137 The coloring algorithm determines which rows and columns
138 can be computed during the same sweep.
139 This field has prototype
140 $codei%
141     const std::string& %coloring%
142 %$$
143 This value only matters when work is empty; i.e.,
144 after the $icode work$$ constructor or $icode%work%.clear()%$$.
145 
146 $subhead cppad.symmetric$$
147 This coloring takes advantage of the fact that the Hessian matrix
148 is symmetric when find a coloring that requires fewer
149 $cref/sweeps/sparse_hes/n_sweep/$$.
150 
151 $subhead cppad.general$$
152 This is the same as the sparse Jacobian
153 $cref/cppad/sparse_jac/coloring/cppad/$$ method
154 which does not take advantage of symmetry.
155 
156 $subhead colpack.symmetric$$
157 If $cref colpack_prefix$$ was specified on the
158 $cref/cmake command/cmake/CMake Command/$$ line,
159 you can set $icode coloring$$ to $code colpack.symmetric$$.
160 This also takes advantage of the fact that the Hessian matrix is symmetric.
161 
162 $subhead colpack.general$$
163 If $cref colpack_prefix$$ was specified on the
164 $cref/cmake command/cmake/CMake Command/$$ line,
165 you can set $icode coloring$$ to $code colpack.general$$.
166 This is the same as the sparse Jacobian
167 $cref/colpack/sparse_jac/coloring/colpack/$$ method
168 which does not take advantage of symmetry.
169 
170 $subhead colpack.star Deprecated 2017-06-01$$
171 The $code colpack.star$$ method is deprecated.
172 It is the same as the $code colpack.symmetric$$ method
173 which should be used instead.
174 
175 
176 $head work$$
177 This argument has prototype
178 $codei%
179     sparse_hes_work& %work%
180 %$$
181 We refer to its initial value,
182 and its value after $icode%work%.clear()%$$, as empty.
183 If it is empty, information is stored in $icode work$$.
184 This can be used to reduce computation when
185 a future call is for the same object $icode f$$,
186 and the same subset of the Hessian.
187 In fact, it can be used with a different $icode f$$
188 and a different $icode subset$$ provided that Hessian sparsity pattern
189 for $icode f$$ and the sparsity pattern in $icode subset$$ are the same.
190 If either of these values change, use $icode%work%.clear()%$$ to
191 empty this structure.
192 
193 $head n_sweep$$
194 The return value $icode n_sweep$$ has prototype
195 $codei%
196     size_t %n_sweep%
197 %$$
198 It is the number of first order forward sweeps
199 used to compute the requested Hessian values.
200 Each first forward sweep is followed by a second order reverse sweep
201 so it is also the number of reverse sweeps.
202 It is also the number of colors determined by the coloring method
203 mentioned above.
204 This is proportional to the total computational work,
205 not counting the zero order forward sweep,
206 or combining multiple columns and rows into a single sweep.
207 
208 $head Uses Forward$$
209 After each call to $cref Forward$$,
210 the object $icode f$$ contains the corresponding
211 $cref/Taylor coefficients/glossary/Taylor Coefficient/$$.
212 After a call to $code sparse_hes$$
213 the zero order coefficients correspond to
214 $codei%
215     %f%.Forward(0, %x%)
216 %$$
217 All the other forward mode coefficients are unspecified.
218 
219 $head Example$$
220 $children%
221     example/sparse/sparse_hes.cpp
222 %$$
223 The files $cref sparse_hes.cpp$$
224 is an example and test of $code sparse_hes$$.
225 It returns $code true$$, if it succeeds, and $code false$$ otherwise.
226 
227 $head Subset Hessian$$
228 The routine
229 $cref sparse_sub_hes.cpp$$
230 is an example and test that compute a subset of a sparse Hessian.
231 It returns $code true$$, for success, and $code false$$ otherwise.
232 
233 $end
234 */
235 # include <cppad/core/cppad_assert.hpp>
236 # include <cppad/local/sparse/internal.hpp>
237 # include <cppad/local/color_general.hpp>
238 # include <cppad/local/color_symmetric.hpp>
239 
240 /*!
241 \file sparse_hes.hpp
242 Sparse Hessian calculation routines.
243 */
244 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
245 
246 /*!
247 Class used to hold information used by Sparse Hessian routine in this file,
248 so it does not need to be recomputed every time.
249 */
250 class sparse_hes_work {
251     public:
252         /// row and column indicies for return values
253         /// (some may be reflected by symmetric coloring algorithms)
254         CppAD::vector<size_t> row;
255         CppAD::vector<size_t> col;
256         /// indices that sort the row and col arrays by color
257         CppAD::vector<size_t> order;
258         /// results of the coloring algorithm
259         CppAD::vector<size_t> color;
260 
261         /// constructor
sparse_hes_work(void)262         sparse_hes_work(void)
263         { }
264         /// inform CppAD that this information needs to be recomputed
clear(void)265         void clear(void)
266         {
267             row.clear();
268             col.clear();
269             order.clear();
270             color.clear();
271         }
272 };
273 // ----------------------------------------------------------------------------
274 /*!
275 Calculate sparse Hessians using forward mode
276 
277 \tparam Base
278 the base type for the recording that is stored in the ADFun object.
279 
280 \tparam SizeVector
281 a simple vector class with elements of type size_t.
282 
283 \tparam BaseVector
284 a simple vector class with elements of type Base.
285 
286 \param x
287 a vector of length n, the number of independent variables in f
288 (this ADFun object).
289 
290 \param w
291 a vector of length m, the number of dependent variables in f
292 (this ADFun object).
293 
294 \param subset
295 specifices the subset of the sparsity pattern where the Hessian is evaluated.
296 subset.nr() == n,
297 subset.nc() == n.
298 
299 \param pattern
300 is a sparsity pattern for the Hessian of w^T * f;
301 pattern.nr() == n,
302 pattern.nc() == n,
303 where m is number of dependent variables in f.
304 
305 \param coloring
306 determines which coloring algorithm is used.
307 This must be cppad.symmetric, cppad.general, colpack.symmetic,
308 or colpack.star.
309 
310 \param work
311 this structure must be empty, or contain the information stored
312 by a previous call to sparse_hes.
313 The previous call must be for the same ADFun object f
314 and the same subset.
315 
316 \return
317 This is the number of first order forward
318 (and second order reverse) sweeps used to compute thhe Hessian.
319 */
320 template <class Base, class RecBase>
321 template <class SizeVector, class BaseVector>
sparse_hes(const BaseVector & x,const BaseVector & w,sparse_rcv<SizeVector,BaseVector> & subset,const sparse_rc<SizeVector> & pattern,const std::string & coloring,sparse_hes_work & work)322 size_t ADFun<Base,RecBase>::sparse_hes(
323     const BaseVector&                    x        ,
324     const BaseVector&                    w        ,
325     sparse_rcv<SizeVector , BaseVector>& subset   ,
326     const sparse_rc<SizeVector>&         pattern  ,
327     const std::string&                   coloring ,
328     sparse_hes_work&                     work     )
329 {   size_t n = Domain();
330     //
331     CPPAD_ASSERT_KNOWN(
332         subset.nr() == n,
333         "sparse_hes: subset.nr() not equal domain dimension for f"
334     );
335     CPPAD_ASSERT_KNOWN(
336         subset.nc() == n,
337         "sparse_hes: subset.nc() not equal domain dimension for f"
338     );
339     CPPAD_ASSERT_KNOWN(
340         size_t( x.size() ) == n,
341         "sparse_hes: x.size() not equal domain dimension for f"
342     );
343     CPPAD_ASSERT_KNOWN(
344         size_t( w.size() ) == Range(),
345         "sparse_hes: w.size() not equal range dimension for f"
346     );
347     //
348     // work information
349     vector<size_t>& row(work.row);
350     vector<size_t>& col(work.col);
351     vector<size_t>& color(work.color);
352     vector<size_t>& order(work.order);
353     //
354     // subset information
355     const SizeVector& subset_row( subset.row() );
356     const SizeVector& subset_col( subset.col() );
357     //
358     // point at which we are evaluationg the Hessian
359     Forward(0, x);
360     //
361     // number of elements in the subset
362     size_t K = subset.nnz();
363     //
364     // check for case were there is nothing to do
365     // (except for call to Forward(0, x)
366     if( K == 0 )
367         return 0;
368     //
369 # ifndef NDEBUG
370     if( color.size() != 0 )
371     {   CPPAD_ASSERT_KNOWN(
372             color.size() == n,
373             "sparse_hes: work is non-empty and conditions have changed"
374         );
375         CPPAD_ASSERT_KNOWN(
376             row.size() == K,
377             "sparse_hes: work is non-empty and conditions have changed"
378         );
379         CPPAD_ASSERT_KNOWN(
380             col.size() == K,
381             "sparse_hes: work is non-empty and conditions have changed"
382         );
383         //
384         for(size_t k = 0; k < K; k++)
385         {   bool ok = row[k] == subset_row[k] && col[k] == subset_col[k];
386             ok     |= row[k] == subset_col[k] && col[k] == subset_row[k];
387             CPPAD_ASSERT_KNOWN(
388                 ok,
389                 "sparse_hes: work is non-empty and conditions have changed"
390             );
391         }
392     }
393 # endif
394     //
395     // check for case where input work is empty
396     if( color.size() == 0 )
397     {   // compute work color and order vectors
398         CPPAD_ASSERT_KNOWN(
399             pattern.nr() == n,
400             "sparse_hes: pattern.nr() not equal domain dimension for f"
401         );
402         CPPAD_ASSERT_KNOWN(
403             pattern.nc() == n,
404             "sparse_hes: pattern.nc() not equal domain dimension for f"
405         );
406         //
407         // initialize work row, col to be same as subset row, col
408         row.resize(K);
409         col.resize(K);
410         // cannot assign vectors becasue may be of different types
411         // (SizeVector and CppAD::vector<size_t>)
412         for(size_t k = 0; k < K; k++)
413         {   row[k] = subset_row[k];
414             col[k] = subset_col[k];
415         }
416         //
417         // convert pattern to an internal version of its transpose
418         local::pod_vector<size_t> internal_index(n);
419         for(size_t j = 0; j < n; j++)
420             internal_index[j] = j;
421         bool transpose   = true;
422         bool zero_empty  = false;
423         bool input_empty = true;
424         local::sparse::list_setvec internal_pattern;
425         internal_pattern.resize(n, n);
426         local::sparse::set_internal_pattern(zero_empty, input_empty,
427             transpose, internal_index, internal_pattern, pattern
428         );
429         //
430         // execute coloring algorithm
431         // (we are using transpose becasue coloring groups rows, not columns)
432         color.resize(n);
433         if( coloring == "cppad.general" )
434             local::color_general_cppad(internal_pattern, col, row, color);
435         else if( coloring == "cppad.symmetric" )
436             local::color_symmetric_cppad(internal_pattern, col, row, color);
437         else if( coloring == "colpack.general" )
438         {
439 # if CPPAD_HAS_COLPACK
440             local::color_general_colpack(internal_pattern, col, row, color);
441 # else
442             CPPAD_ASSERT_KNOWN(
443                 false,
444                 "sparse_hes: coloring = colpack.star "
445                 "and colpack_prefix not in cmake command line."
446             );
447 # endif
448         }
449         else if(
450             coloring == "colpack.symmetric" ||
451             coloring == "colpack.star"
452         )
453         {
454 # if CPPAD_HAS_COLPACK
455             local::color_symmetric_colpack(internal_pattern, col, row, color);
456 # else
457             CPPAD_ASSERT_KNOWN(
458                 false,
459                 "sparse_hes: coloring = colpack.symmetic or colpack.star "
460                 "and colpack_prefix not in cmake command line."
461             );
462 # endif
463         }
464         else CPPAD_ASSERT_KNOWN(
465             false,
466             "sparse_hes: coloring is not valid."
467         );
468         //
469         // put sorting indices in color order
470         SizeVector key(K);
471         order.resize(K);
472         for(size_t k = 0; k < K; k++)
473             key[k] = color[ col[k] ];
474         index_sort(key, order);
475     }
476     // Base versions of zero and one
477     Base one(1.0);
478     Base zero(0.0);
479     //
480     size_t n_color = 1;
481     for(size_t j = 0; j < n; j++) if( color[j] < n )
482         n_color = std::max<size_t>(n_color, color[j] + 1);
483     //
484     // initialize the return Hessian values as zero
485     for(size_t k = 0; k < K; k++)
486         subset.set(k, zero);
487     //
488     // direction vector for calls to first order forward
489     BaseVector dx(n);
490     //
491     // return values for calls to second order reverse
492     BaseVector ddw(2 * n);
493     //
494     // loop over colors
495     size_t k = 0;
496     for(size_t ell = 0; ell < n_color; ell++)
497     if( k  == K )
498     {   // kludge because colpack returns colors that are not used
499         // (it does not know about the subset corresponding to row, col)
500         CPPAD_ASSERT_UNKNOWN(
501             coloring == "colpack.general" ||
502             coloring == "colpack.symmetric" ||
503             coloring == "colpack.star"
504         );
505     }
506     else if( color[ col[ order[k] ] ] != ell )
507     {   // kludge because colpack returns colors that are not used
508         // (it does not know about the subset corresponding to row, col)
509         CPPAD_ASSERT_UNKNOWN(
510             coloring == "colpack.general" ||
511             coloring == "colpack.symmetic" ||
512             coloring == "colpack.star"
513         );
514     }
515     else
516     {   CPPAD_ASSERT_UNKNOWN( color[ col[ order[k] ] ] == ell );
517         //
518         // combine all columns with this color
519         for(size_t j = 0; j < n; j++)
520         {   dx[j] = zero;
521             if( color[j] == ell )
522                 dx[j] = one;
523         }
524         // call forward mode for all these rows at once
525         Forward(1, dx);
526         //
527         // evaluate derivative of w^T * F'(x) * dx
528         ddw = Reverse(2, w);
529         //
530         // set the corresponding components of the result
531         while( k < K && color[ col[order[k]] ] == ell )
532         {   size_t index = row[ order[k] ] * 2 + 1;
533             subset.set(order[k], ddw[index] );
534             k++;
535         }
536     }
537     // check that all the required entries have been set
538     CPPAD_ASSERT_UNKNOWN( k == K );
539     return n_color;
540 }
541 
542 } // END_CPPAD_NAMESPACE
543 
544 # endif
545