1 # ifndef CPPAD_CORE_OPTIMIZE_HPP
2 # define CPPAD_CORE_OPTIMIZE_HPP
3 /* --------------------------------------------------------------------------
4 CppAD: C++ Algorithmic Differentiation: Copyright (C) 2003-20 Bradley M. Bell
5
6 CppAD is distributed under the terms of the
7 Eclipse Public License Version 2.0.
8
9 This Source Code may also be made available under the following
10 Secondary License when the conditions for such availability set forth
11 in the Eclipse Public License, Version 2.0 are satisfied:
12 GNU General Public License, Version 2.0 or later.
13 ---------------------------------------------------------------------------- */
14
15 # define CPPAD_CORE_OPTIMIZE_PRINT_RESULT 0
16
17 /*
18 $begin optimize$$
19 $spell
20 enum
21 jac
22 bool
23 Taylor
24 CppAD
25 cppad
26 std
27 const
28 onetape
29 op
30 optimizer
31 $$
32
33 $section Optimize an ADFun Object Tape$$
34
35
36 $head Syntax$$
37 $icode%f%.optimize()
38 %$$
39 $icode%f%.optimize(%options%)
40 %$$
41 $icode%flag% = %f%.exceed_collision_limit()
42 %$$
43
44 $head Purpose$$
45 The operation sequence corresponding to an $cref ADFun$$ object can
46 be very large and involve many operations; see the
47 size functions in $cref seq_property$$.
48 The $icode%f%.optimize%$$ procedure reduces the number of operations,
49 and thereby the time and the memory, required to
50 compute function and derivative values.
51
52 $head f$$
53 The object $icode f$$ has prototype
54 $codei%
55 ADFun<%Base%> %f%
56 %$$
57
58 $head options$$
59 This argument has prototype
60 $codei%
61 const std::string& %options%
62 %$$
63 The default for $icode options$$ is the empty string.
64 If it is present, it must consist of one or more of the options below
65 separated by a single space character.
66
67 $subhead no_conditional_skip$$
68 The $code optimize$$ function can create conditional skip operators
69 to improve the speed of conditional expressions; see
70 $cref/optimize/CondExp/Optimize/$$.
71 If the sub-string $code no_conditional_skip$$ appears in $icode options$$,
72 conditional skip operations are not be generated.
73 This may make the optimize routine use significantly less memory
74 and take less time to optimize $icode f$$.
75 If conditional skip operations are generated,
76 it may save a significant amount of time when
77 using $icode f$$ for $cref forward$$ or $cref reverse$$ mode calculations;
78 see $cref number_skip$$.
79
80 $subhead no_compare_op$$
81 If the sub-string $code no_compare_op$$ appears in $icode options$$,
82 comparison operators will be removed from the optimized function.
83 These operators are necessary for the
84 $cref compare_change$$ functions to be meaningful.
85 On the other hand, they are not necessary, and take extra time,
86 when the compare_change functions are not used.
87
88 $subhead no_print_for_op$$
89 If the sub-string $code no_compare_op$$ appears in $icode options$$,
90 $cref PrintFor$$ operations will be removed form the optimized function.
91 These operators are useful for reporting problems evaluating derivatives
92 at independent variable values different from those used to record a function.
93
94 $subhead no_cumulative_sum_op$$
95 If this sub-string appears,
96 no cumulative sum operations will be generated during the optimization; see
97 $cref optimize_cumulative_sum.cpp$$.
98
99 $subhead collision_limit=value$$
100 If this substring appears,
101 where $icode value$$ is a sequence of decimal digits,
102 the optimizer's hash code collision limit will be set to $icode value$$.
103 When the collision limit is reached, the expressions with that hash code
104 are removed and a new lists of expressions with that has code is started.
105 The larger $icode value$$, the more identical expressions the optimizer
106 can recognize, but the slower the optimizer may run.
107 The default for $icode value$$ is $code 10$$.
108
109 $head Re-Optimize$$
110 Before 2019-06-28, optimizing twice was not supported and would fail
111 if cumulative sum operators were present after the first optimization.
112 This is now supported but it is not expected to have much benefit.
113 If you find a case where it does have a benefit, please inform the CppAD
114 developers of this.
115
116 $head Efficiency$$
117 If a $cref/zero order forward/forward_zero/$$ calculation is done during
118 the construction of $icode f$$, it will require more memory
119 and time than required after the optimization procedure.
120 In addition, it will need to be redone.
121 For this reason, it is more efficient to use
122 $codei%
123 ADFun<%Base%> %f%;
124 %f%.Dependent(%x%, %y%);
125 %f%.optimize();
126 %$$
127 instead of
128 $codei%
129 ADFun<%Base%> %f%(%x%, %y%)
130 %f%.optimize();
131 %$$
132 See the discussion about
133 $cref/sequence constructors/FunConstruct/Sequence Constructor/$$.
134
135 $head Taylor Coefficients$$
136 Any Taylor coefficients in the function object are lost; i.e.,
137 $cref/f.size_order()/size_order/$$ after the optimization is zero.
138 (See the discussion about efficiency above.)
139
140 $head Speed Testing$$
141 You can run the CppAD $cref/speed/speed_main/$$ tests and see
142 the corresponding changes in number of variables and execution time.
143 Note that there is an interaction between using
144 $cref/optimize/speed_main/Global Options/optimize/$$ and
145 $cref/onetape/speed_main/Global Options/onetape/$$.
146 If $icode onetape$$ is true and $icode optimize$$ is true,
147 the optimized tape will be reused many times.
148 If $icode onetape$$ is false and $icode optimize$$ is true,
149 the tape will be re-optimized for each test.
150
151 $head Atomic Functions$$
152 There are some subtitle issue with optimized $cref atomic$$ functions
153 $latex v = g(u)$$:
154
155 $subhead rev_sparse_jac$$
156 The $cref atomic_two_rev_sparse_jac$$ function is be used to determine
157 which components of $icode u$$ affect the dependent variables of $icode f$$.
158 For each atomic operation, the current
159 $cref/atomic_sparsity/atomic_two_option/atomic_sparsity/$$ setting is used
160 to determine if $code pack_sparsity_enum$$, $code bool_sparsity_enum$$,
161 or $code set_sparsity_enum$$ is used to determine dependency relations
162 between argument and result variables.
163
164 $subhead nan$$
165 If $icode%u%[%i%]%$$ does not affect the value of
166 the dependent variables for $icode f$$,
167 the value of $icode%u%[%i%]%$$ is set to $cref nan$$.
168
169 $head Checking Optimization$$
170 If $cref/NDEBUG/Faq/Speed/NDEBUG/$$ is not defined,
171 and $cref/f.size_order()/size_order/$$ is greater than zero,
172 a $cref forward_zero$$ calculation is done using the optimized version
173 of $icode f$$ and the results are checked to see that they are
174 the same as before.
175 If they are not the same, the
176 $cref ErrorHandler$$ is called with a known error message
177 related to $icode%f%.optimize()%$$.
178
179 $head exceed_collision_limit$$
180 If the return value $icode flag$$ is true (false),
181 the previous call to $icode%f%.optimize%$$ exceed the
182 $cref/collision_limit/optimize/options/collision_limit=value/$$.
183
184 $head Examples$$
185 $comment childtable without Example instead of Contents for header$$
186 $children%
187 example/optimize/optimize_twice.cpp
188 %example/optimize/forward_active.cpp
189 %example/optimize/reverse_active.cpp
190 %example/optimize/compare_op.cpp
191 %example/optimize/print_for.cpp
192 %example/optimize/conditional_skip.cpp
193 %example/optimize/nest_conditional.cpp
194 %example/optimize/cumulative_sum.cpp
195 %$$
196 $table
197 $rref optimize_twice.cpp$$
198 $rref optimize_forward_active.cpp$$
199 $rref optimize_reverse_active.cpp$$
200 $rref optimize_compare_op.cpp$$
201 $rref optimize_print_for.cpp$$
202 $rref optimize_conditional_skip.cpp$$
203 $rref optimize_nest_conditional.cpp$$
204 $rref optimize_cumulative_sum.cpp$$
205 $tend
206
207 $end
208 -----------------------------------------------------------------------------
209 */
210 # include <cppad/local/optimize/optimize_run.hpp>
211 /*!
212 \file optimize.hpp
213 Optimize a player object operation sequence
214 */
215 namespace CppAD { // BEGIN_CPPAD_NAMESPACE
216 /*!
217 Optimize a player object operation sequence
218
219 The operation sequence for this object is replaced by one with fewer operations
220 but the same funcition and derivative values.
221
222 \tparam Base
223 base type for the operator; i.e., this operation was recorded
224 using AD<Base> and computations by this routine are done using type
225 Base.
226
227 \param options
228 \li
229 If the sub-string "no_conditional_skip" appears,
230 conditional skip operations will not be generated.
231 This may make the optimize routine use significantly less memory
232 and take significantly less time.
233 \li
234 If the sub-string "no_compare_op" appears,
235 then comparison operators will be removed from the optimized tape.
236 These operators are necessary for the compare_change function to be
237 be meaningful in the resulting recording.
238 On the other hand, they are not necessary and take extra time
239 when compare_change is not used.
240 */
241 template <class Base, class RecBase>
optimize(const std::string & options)242 void ADFun<Base,RecBase>::optimize(const std::string& options)
243 {
244 # if CPPAD_CORE_OPTIMIZE_PRINT_RESULT
245 // size of operation sequence before optimizatiton
246 size_t size_op_before = size_op();
247 # endif
248
249 // place to store the optimized version of the recording
250 local::recorder<Base> rec;
251
252 // number of independent variables
253 size_t n = ind_taddr_.size();
254
255 # ifndef NDEBUG
256 size_t i, j, m = dep_taddr_.size();
257 CppAD::vector<Base> x(n), y(m), check(m);
258 Base max_taylor(0);
259 bool check_zero_order = num_order_taylor_ > 0;
260 if( check_zero_order )
261 { // zero order coefficients for independent vars
262 for(j = 0; j < n; j++)
263 { CPPAD_ASSERT_UNKNOWN( play_.GetOp(j+1) == local::InvOp );
264 CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] == j+1 );
265 x[j] = taylor_[ ind_taddr_[j] * cap_order_taylor_ + 0];
266 }
267 // zero order coefficients for dependent vars
268 for(i = 0; i < m; i++)
269 { CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < num_var_tape_ );
270 y[i] = taylor_[ dep_taddr_[i] * cap_order_taylor_ + 0];
271 }
272 // maximum zero order coefficient not counting BeginOp at beginning
273 // (which is correpsonds to uninitialized memory).
274 for(i = 1; i < num_var_tape_; i++)
275 { if( abs_geq(taylor_[i*cap_order_taylor_+0] , max_taylor) )
276 max_taylor = taylor_[i*cap_order_taylor_+0];
277 }
278 }
279 # endif
280
281 // create the optimized recording
282 size_t exceed = false;
283 switch( play_.address_type() )
284 {
285 case local::play::unsigned_short_enum:
286 exceed = local::optimize::optimize_run<unsigned short>(
287 options, n, dep_taddr_, &play_, &rec
288 );
289 break;
290
291 case local::play::unsigned_int_enum:
292 exceed = local::optimize::optimize_run<unsigned int>(
293 options, n, dep_taddr_, &play_, &rec
294 );
295 break;
296
297 case local::play::size_t_enum:
298 exceed = local::optimize::optimize_run<size_t>(
299 options, n, dep_taddr_, &play_, &rec
300 );
301 break;
302
303 default:
304 CPPAD_ASSERT_UNKNOWN(false);
305 }
306 exceed_collision_limit_ = exceed;
307
308 // number of variables in the recording
309 num_var_tape_ = rec.num_var_rec();
310
311 // now replace the recording
312 play_.get_recording(rec, n);
313
314 // set flag so this function knows it has been optimized
315 has_been_optimized_ = true;
316
317 // free memory allocated for sparse Jacobian calculation
318 // (the results are no longer valid)
319 for_jac_sparse_pack_.resize(0, 0);
320 for_jac_sparse_set_.resize(0,0);
321
322 // free old Taylor coefficient memory
323 taylor_.clear();
324 num_order_taylor_ = 0;
325 cap_order_taylor_ = 0;
326
327 // resize and initilaize conditional skip vector
328 // (must use player size because it now has the recoreder information)
329 cskip_op_.resize( play_.num_op_rec() );
330
331 // resize subgraph_info_
332 subgraph_info_.resize(
333 ind_taddr_.size(), // n_ind
334 dep_taddr_.size(), // n_dep
335 play_.num_op_rec(), // n_op
336 play_.num_var_rec() // n_var
337 );
338
339 # ifndef NDEBUG
340 if( check_zero_order )
341 { std::stringstream s;
342 //
343 // zero order forward calculation using new operation sequence
344 check = Forward(0, x, s);
345
346 // check results
347 Base eps99 = Base(99) * CppAD::numeric_limits<Base>::epsilon();
348 for(i = 0; i < m; i++)
349 if( ! abs_geq( eps99 * max_taylor , check[i] - y[i] ) )
350 { std::string msg = "Error during check of f.optimize().";
351 msg += "\neps99 * max_taylor = " + to_string(eps99 * max_taylor);
352 msg += "\ncheck[i] = " + to_string(check[i]);
353 msg += "\ny[i] = " + to_string(y[i]);
354 CPPAD_ASSERT_KNOWN(
355 abs_geq( eps99 * max_taylor , check[i] - y[i] ) ,
356 msg.c_str()
357 );
358 }
359
360 // Erase memory that this calculation was done so NDEBUG gives
361 // same final state for this object (from users perspective)
362 num_order_taylor_ = 0;
363 }
364 # endif
365 # if CPPAD_CORE_OPTIMIZE_PRINT_RESULT
366 // size of operation sequence after optimizatiton
367 size_t size_op_after = size_op();
368 std::cout << "optimize: size_op: before = " <<
369 size_op_before << ", after = " << size_op_after << "\n";
370 # endif
371 }
372
373 } // END_CPPAD_NAMESPACE
374
375 # undef CPPAD_CORE_OPTIMIZE_PRINT_RESULT
376 # endif
377