1 # ifndef CPPAD_CORE_OPTIMIZE_HPP
2 # define CPPAD_CORE_OPTIMIZE_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-20 Bradley M. Bell
5 
6 CppAD is distributed under the terms of the
7              Eclipse Public License Version 2.0.
8 
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12       GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14 
15 # define CPPAD_CORE_OPTIMIZE_PRINT_RESULT 0
16 
17 /*
18 $begin optimize$$
19 $spell
20     enum
21     jac
22     bool
23     Taylor
24     CppAD
25     cppad
26     std
27     const
28     onetape
29     op
30     optimizer
31 $$
32 
33 $section Optimize an ADFun Object Tape$$
34 
35 
36 $head Syntax$$
37 $icode%f%.optimize()
38 %$$
39 $icode%f%.optimize(%options%)
40 %$$
41 $icode%flag% = %f%.exceed_collision_limit()
42 %$$
43 
44 $head Purpose$$
45 The operation sequence corresponding to an $cref ADFun$$ object can
46 be very large and involve many operations; see the
47 size functions in $cref seq_property$$.
48 The $icode%f%.optimize%$$ procedure reduces the number of operations,
49 and thereby the time and the memory, required to
50 compute function and derivative values.
51 
52 $head f$$
53 The object $icode f$$ has prototype
54 $codei%
55     ADFun<%Base%> %f%
56 %$$
57 
58 $head options$$
59 This argument has prototype
60 $codei%
61     const std::string& %options%
62 %$$
63 The default for $icode options$$ is the empty string.
64 If it is present, it must consist of one or more of the options below
65 separated by a single space character.
66 
67 $subhead no_conditional_skip$$
68 The $code optimize$$ function can create conditional skip operators
69 to improve the speed of conditional expressions; see
70 $cref/optimize/CondExp/Optimize/$$.
71 If the sub-string $code no_conditional_skip$$ appears in $icode options$$,
72 conditional skip operations are not be generated.
73 This may make the optimize routine use significantly less memory
74 and take less time to optimize $icode f$$.
75 If conditional skip operations are generated,
76 it may save a significant amount of time when
77 using $icode f$$ for $cref forward$$ or $cref reverse$$ mode calculations;
78 see $cref number_skip$$.
79 
80 $subhead no_compare_op$$
81 If the sub-string $code no_compare_op$$ appears in $icode options$$,
82 comparison operators will be removed from the optimized function.
83 These operators are necessary for the
84 $cref compare_change$$ functions to be meaningful.
85 On the other hand, they are not necessary, and take extra time,
86 when the compare_change functions are not used.
87 
88 $subhead no_print_for_op$$
89 If the sub-string $code no_compare_op$$ appears in $icode options$$,
90 $cref PrintFor$$ operations will be removed form the optimized function.
91 These operators are useful for reporting problems evaluating derivatives
92 at independent variable values different from those used to record a function.
93 
94 $subhead no_cumulative_sum_op$$
95 If this sub-string appears,
96 no cumulative sum operations will be generated during the optimization; see
97 $cref optimize_cumulative_sum.cpp$$.
98 
99 $subhead collision_limit=value$$
100 If this substring appears,
101 where $icode value$$ is a sequence of decimal digits,
102 the optimizer's hash code collision limit will be set to $icode value$$.
103 When the collision limit is reached, the expressions with that hash code
104 are removed and a new lists of expressions with that has code is started.
105 The larger $icode value$$, the more identical expressions the optimizer
106 can recognize, but the slower the optimizer may run.
107 The default for $icode value$$ is $code 10$$.
108 
109 $head Re-Optimize$$
110 Before 2019-06-28, optimizing twice was not supported and would fail
111 if cumulative sum operators were present after the first optimization.
112 This is now supported but it is not expected to have much benefit.
113 If you find a case where it does have a benefit, please inform the CppAD
114 developers of this.
115 
116 $head Efficiency$$
117 If a $cref/zero order forward/forward_zero/$$ calculation is done during
118 the construction of $icode f$$, it will require more memory
119 and time than required after the optimization procedure.
120 In addition, it will need to be redone.
121 For this reason, it is more efficient to use
122 $codei%
123     ADFun<%Base%> %f%;
124     %f%.Dependent(%x%, %y%);
125     %f%.optimize();
126 %$$
127 instead of
128 $codei%
129     ADFun<%Base%> %f%(%x%, %y%)
130     %f%.optimize();
131 %$$
132 See the discussion about
133 $cref/sequence constructors/FunConstruct/Sequence Constructor/$$.
134 
135 $head Taylor Coefficients$$
136 Any Taylor coefficients in the function object are lost; i.e.,
137 $cref/f.size_order()/size_order/$$ after the optimization is zero.
138 (See the discussion about efficiency above.)
139 
140 $head Speed Testing$$
141 You can run the CppAD $cref/speed/speed_main/$$ tests and see
142 the corresponding changes in number of variables and execution time.
143 Note that there is an interaction between using
144 $cref/optimize/speed_main/Global Options/optimize/$$ and
145 $cref/onetape/speed_main/Global Options/onetape/$$.
146 If $icode onetape$$ is true and $icode optimize$$ is true,
147 the optimized tape will be reused many times.
148 If $icode onetape$$ is false and $icode optimize$$ is true,
149 the tape will be re-optimized for each test.
150 
151 $head Atomic Functions$$
152 There are some subtitle issue with optimized $cref atomic$$ functions
153 $latex v = g(u)$$:
154 
155 $subhead rev_sparse_jac$$
156 The $cref atomic_two_rev_sparse_jac$$ function is be used to determine
157 which components of $icode u$$ affect the dependent variables of $icode f$$.
158 For each atomic operation, the current
159 $cref/atomic_sparsity/atomic_two_option/atomic_sparsity/$$ setting is used
160 to determine if $code pack_sparsity_enum$$, $code bool_sparsity_enum$$,
161 or $code set_sparsity_enum$$ is used to determine dependency relations
162 between argument and result variables.
163 
164 $subhead nan$$
165 If $icode%u%[%i%]%$$ does not affect the value of
166 the dependent variables for $icode f$$,
167 the value of $icode%u%[%i%]%$$ is set to $cref nan$$.
168 
169 $head Checking Optimization$$
170 If $cref/NDEBUG/Faq/Speed/NDEBUG/$$ is not defined,
171 and $cref/f.size_order()/size_order/$$ is greater than zero,
172 a $cref forward_zero$$ calculation is done using the optimized version
173 of $icode f$$ and the results are checked to see that they are
174 the same as before.
175 If they are not the same, the
176 $cref ErrorHandler$$ is called with a known error message
177 related to $icode%f%.optimize()%$$.
178 
179 $head exceed_collision_limit$$
180 If the return value $icode flag$$ is true (false),
181 the previous call to $icode%f%.optimize%$$ exceed the
182 $cref/collision_limit/optimize/options/collision_limit=value/$$.
183 
184 $head Examples$$
185 $comment childtable without Example instead of Contents for header$$
186 $children%
187     example/optimize/optimize_twice.cpp
188     %example/optimize/forward_active.cpp
189     %example/optimize/reverse_active.cpp
190     %example/optimize/compare_op.cpp
191     %example/optimize/print_for.cpp
192     %example/optimize/conditional_skip.cpp
193     %example/optimize/nest_conditional.cpp
194     %example/optimize/cumulative_sum.cpp
195 %$$
196 $table
197 $rref optimize_twice.cpp$$
198 $rref optimize_forward_active.cpp$$
199 $rref optimize_reverse_active.cpp$$
200 $rref optimize_compare_op.cpp$$
201 $rref optimize_print_for.cpp$$
202 $rref optimize_conditional_skip.cpp$$
203 $rref optimize_nest_conditional.cpp$$
204 $rref optimize_cumulative_sum.cpp$$
205 $tend
206 
207 $end
208 -----------------------------------------------------------------------------
209 */
210 # include <cppad/local/optimize/optimize_run.hpp>
211 /*!
212 \file optimize.hpp
213 Optimize a player object operation sequence
214 */
215 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
216 /*!
217 Optimize a player object operation sequence
218 
219 The operation sequence for this object is replaced by one with fewer operations
220 but the same funcition and derivative values.
221 
222 \tparam Base
223 base type for the operator; i.e., this operation was recorded
224 using AD<Base> and computations by this routine are done using type
225  Base.
226 
227 \param options
228 \li
229 If the sub-string "no_conditional_skip" appears,
230 conditional skip operations will not be generated.
231 This may make the optimize routine use significantly less memory
232 and take significantly less time.
233 \li
234 If the sub-string "no_compare_op" appears,
235 then comparison operators will be removed from the optimized tape.
236 These operators are necessary for the compare_change function to be
237 be meaningful in the resulting recording.
238 On the other hand, they are not necessary and take extra time
239 when compare_change is not used.
240 */
241 template <class Base, class RecBase>
optimize(const std::string & options)242 void ADFun<Base,RecBase>::optimize(const std::string& options)
243 {
244 # if CPPAD_CORE_OPTIMIZE_PRINT_RESULT
245     // size of operation sequence before optimizatiton
246     size_t size_op_before = size_op();
247 # endif
248 
249     // place to store the optimized version of the recording
250     local::recorder<Base> rec;
251 
252     // number of independent variables
253     size_t n = ind_taddr_.size();
254 
255 # ifndef NDEBUG
256     size_t i, j, m = dep_taddr_.size();
257     CppAD::vector<Base> x(n), y(m), check(m);
258     Base max_taylor(0);
259     bool check_zero_order = num_order_taylor_ > 0;
260     if( check_zero_order )
261     {   // zero order coefficients for independent vars
262         for(j = 0; j < n; j++)
263         {   CPPAD_ASSERT_UNKNOWN( play_.GetOp(j+1) == local::InvOp );
264             CPPAD_ASSERT_UNKNOWN( ind_taddr_[j]    == j+1   );
265             x[j] = taylor_[ ind_taddr_[j] * cap_order_taylor_ + 0];
266         }
267         // zero order coefficients for dependent vars
268         for(i = 0; i < m; i++)
269         {   CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_  );
270             y[i] = taylor_[ dep_taddr_[i] * cap_order_taylor_ + 0];
271         }
272         // maximum zero order coefficient not counting BeginOp at beginning
273         // (which is correpsonds to uninitialized memory).
274         for(i = 1; i < num_var_tape_; i++)
275         {   if(  abs_geq(taylor_[i*cap_order_taylor_+0] , max_taylor) )
276                 max_taylor = taylor_[i*cap_order_taylor_+0];
277         }
278     }
279 # endif
280 
281     // create the optimized recording
282     size_t exceed = false;
283     switch( play_.address_type() )
284     {
285         case local::play::unsigned_short_enum:
286         exceed = local::optimize::optimize_run<unsigned short>(
287             options, n, dep_taddr_, &play_, &rec
288         );
289         break;
290 
291         case local::play::unsigned_int_enum:
292         exceed = local::optimize::optimize_run<unsigned int>(
293             options, n, dep_taddr_, &play_, &rec
294         );
295         break;
296 
297         case local::play::size_t_enum:
298         exceed = local::optimize::optimize_run<size_t>(
299             options, n, dep_taddr_, &play_, &rec
300         );
301         break;
302 
303         default:
304         CPPAD_ASSERT_UNKNOWN(false);
305     }
306     exceed_collision_limit_ = exceed;
307 
308     // number of variables in the recording
309     num_var_tape_  = rec.num_var_rec();
310 
311     // now replace the recording
312     play_.get_recording(rec, n);
313 
314     // set flag so this function knows it has been optimized
315     has_been_optimized_ = true;
316 
317     // free memory allocated for sparse Jacobian calculation
318     // (the results are no longer valid)
319     for_jac_sparse_pack_.resize(0, 0);
320     for_jac_sparse_set_.resize(0,0);
321 
322     // free old Taylor coefficient memory
323     taylor_.clear();
324     num_order_taylor_     = 0;
325     cap_order_taylor_     = 0;
326 
327     // resize and initilaize conditional skip vector
328     // (must use player size because it now has the recoreder information)
329     cskip_op_.resize( play_.num_op_rec() );
330 
331     // resize subgraph_info_
332     subgraph_info_.resize(
333         ind_taddr_.size(),    // n_ind
334         dep_taddr_.size(),    // n_dep
335         play_.num_op_rec(),   // n_op
336         play_.num_var_rec()   // n_var
337     );
338 
339 # ifndef NDEBUG
340     if( check_zero_order )
341     {   std::stringstream s;
342         //
343         // zero order forward calculation using new operation sequence
344         check = Forward(0, x, s);
345 
346         // check results
347         Base eps99 = Base(99) * CppAD::numeric_limits<Base>::epsilon();
348         for(i = 0; i < m; i++)
349         if( ! abs_geq( eps99 * max_taylor , check[i] - y[i] ) )
350         {   std::string msg = "Error during check of f.optimize().";
351             msg += "\neps99 * max_taylor = " + to_string(eps99 * max_taylor);
352             msg += "\ncheck[i] = " + to_string(check[i]);
353             msg += "\ny[i]     = " + to_string(y[i]);
354             CPPAD_ASSERT_KNOWN(
355                 abs_geq( eps99 * max_taylor , check[i] - y[i] ) ,
356                 msg.c_str()
357             );
358         }
359 
360         // Erase memory that this calculation was done so NDEBUG gives
361         // same final state for this object (from users perspective)
362         num_order_taylor_     = 0;
363     }
364 # endif
365 # if CPPAD_CORE_OPTIMIZE_PRINT_RESULT
366     // size of operation sequence after optimizatiton
367     size_t size_op_after = size_op();
368     std::cout << "optimize: size_op:  before = " <<
369     size_op_before << ", after = " << size_op_after << "\n";
370 # endif
371 }
372 
373 } // END_CPPAD_NAMESPACE
374 
375 # undef CPPAD_CORE_OPTIMIZE_PRINT_RESULT
376 # endif
377