1 // Copyright (C) 2017  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_FiND_GLOBAL_MAXIMUM_hH_
4 #define DLIB_FiND_GLOBAL_MAXIMUM_hH_
5 
6 #include "find_max_global_abstract.h"
7 #include "global_function_search.h"
8 #include "../metaprogramming.h"
9 #include <utility>
10 #include <chrono>
11 #include <memory>
12 #include <thread>
13 #include <functional>
14 #include "../threads/thread_pool_extension.h"
15 #include "../statistics/statistics.h"
16 #include "../enable_if.h"
17 
18 namespace dlib
19 {
20     namespace gopt_impl
21     {
22 
23     // ----------------------------------------------------------------------------------------
24 
25         class disable_decay_to_scalar
26         {
27             const matrix<double,0,1>& a;
28         public:
disable_decay_to_scalar(const matrix<double,0,1> & a)29             disable_decay_to_scalar(const matrix<double,0,1>& a) : a(a){}
30             operator const matrix<double,0,1>&() const { return a;}
31         };
32 
33 
34         template <typename T, size_t... indices>
35         auto _cwv (
36             T&& f,
37             const matrix<double,0,1>& a,
38             compile_time_integer_list<indices...>
39         ) -> decltype(f(a(indices-1)...))
40         {
41             DLIB_CASSERT(a.size() == sizeof...(indices),
42                 "You invoked dlib::call_function_and_expand_args(f,a) but the number of arguments expected by f() doesn't match the size of 'a'. "
43                 << "Expected " << sizeof...(indices) << " arguments but got " << a.size() << "."
44             );
45             return f(a(indices-1)...);
46         }
47 
48         // Visual studio, as of November 2017, doesn't support C++11 and can't compile this code.
49         // So we write the terrible garbage in the #else for visual studio.  When Visual Studio supports C++11 I'll update this #ifdef to use the C++11 code.
50 #ifndef _MSC_VER
51         template <size_t max_unpack>
52         struct call_function_and_expand_args
53         {
54             template <typename T>
55             static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(_cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type()))
56             {
57                 return _cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type());
58             }
59 
60             template <typename T>
61             static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(call_function_and_expand_args<max_unpack-1>::template go(std::forward<T>(f),a))
62             {
63                 return call_function_and_expand_args<max_unpack-1>::go(std::forward<T>(f),a);
64             }
65         };
66 
67         template <>
68         struct call_function_and_expand_args<0>
69         {
70             template <typename T>
71             static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(f(disable_decay_to_scalar(a)))
72             {
73                 return f(disable_decay_to_scalar(a));
74             }
75         };
76 #else
77         template <size_t max_unpack>
78         struct call_function_and_expand_args
79         {
80 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(disable_decay_to_scalar(a)))  {return f(disable_decay_to_scalar(a));   }
81 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0))) { DLIB_CASSERT(a.size() == 1); return f(a(0)); }
82 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0),a(1))) { DLIB_CASSERT(a.size() == 2); return f(a(0),a(1)); }
83 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2))) { DLIB_CASSERT(a.size() == 3); return f(a(0), a(1),a(2)); }
84 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3))) { DLIB_CASSERT(a.size() == 4); return f(a(0), a(1), a(2), a(3)); }
85 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4))) { DLIB_CASSERT(a.size() == 5); return f(a(0), a(1), a(2), a(3), a(4)); }
86 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5))) { DLIB_CASSERT(a.size() == 6); return f(a(0), a(1), a(2), a(3), a(4), a(5)); }
87 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5), a(6))) { DLIB_CASSERT(a.size() == 7); return f(a(0), a(1), a(2), a(3), a(4), a(5), a(6)); }
88         };
89 #endif
90     }
91 
92 // ----------------------------------------------------------------------------------------
93 // ----------------------------------------------------------------------------------------
94 
95     template <typename T>
96     auto call_function_and_expand_args(
97         T&& f,
98         const matrix<double,0,1>& a
99     ) -> decltype(gopt_impl::call_function_and_expand_args<40>::go(f,a))
100     {
101         // unpack up to 40 parameters when calling f()
102         return gopt_impl::call_function_and_expand_args<40>::go(std::forward<T>(f),a);
103     }
104 
105 // ----------------------------------------------------------------------------------------
106 // ----------------------------------------------------------------------------------------
107 
108     struct max_function_calls
109     {
110         max_function_calls() = default;
111         explicit max_function_calls(size_t max_calls) : max_calls(max_calls) {}
112         size_t max_calls = std::numeric_limits<size_t>::max();
113     };
114 
115 // ----------------------------------------------------------------------------------------
116 
117     const auto FOREVER = std::chrono::hours(24*365*290); // 290 years
118     using stop_condition = std::function<bool(double)>;
119     const stop_condition never_stop_early = [](double) { return false; };
120 
121 // ----------------------------------------------------------------------------------------
122 
123     namespace impl
124     {
125         template <
126             typename funct
127             >
128         std::pair<size_t,function_evaluation> find_max_global (
129             double ymult,
130             thread_pool& tp,
131             std::vector<funct>& functions,
132             std::vector<function_spec> specs,
133             const max_function_calls num,
134             const std::chrono::nanoseconds max_runtime = FOREVER,
135             double solver_epsilon = 0,
136             std::vector<std::vector<function_evaluation>> initial_function_evals = {},
137             stop_condition should_stop = never_stop_early
138         )
139         {
140             // Decide which parameters should be searched on a log scale.  Basically, it's
141             // common for machine learning models to have parameters that should be searched on
142             // a log scale (e.g. SVM C).  These parameters are usually identifiable because
143             // they have bounds like [1e-5 1e10], that is, they span a very large range of
144             // magnitudes from really small to really big.  So there we are going to check for
145             // that and if we find parameters with that kind of bound constraints we will
146             // transform them to a log scale automatically.
147             std::vector<std::vector<bool>> log_scale(specs.size());
148             for (size_t i = 0; i < specs.size(); ++i)
149             {
150                 for (long j = 0; j < specs[i].lower.size(); ++j)
151                 {
152                     if (!specs[i].is_integer_variable[j] && specs[i].lower(j) > 0 && specs[i].upper(j)/specs[i].lower(j) >= 1000)
153                     {
154                         log_scale[i].push_back(true);
155                         specs[i].lower(j) = std::log(specs[i].lower(j));
156                         specs[i].upper(j) = std::log(specs[i].upper(j));
157                     }
158                     else
159                     {
160                         log_scale[i].push_back(false);
161                     }
162                 }
163             }
164 
165             if (initial_function_evals.empty())
166             {
167                 initial_function_evals.resize(specs.size());
168             }
169             for (auto& evals : initial_function_evals) {
170                 for (auto& eval : evals) {
171                     eval.y *= ymult;
172                 }
173             }
174 
175             global_function_search opt(specs, {initial_function_evals});
176             opt.set_solver_epsilon(solver_epsilon);
177 
178             running_stats_decayed<double> objective_funct_eval_time(functions.size()*5);
179             std::mutex eval_time_mutex;
180             using namespace std::chrono;
181 
182             const auto time_to_stop = steady_clock::now() + max_runtime;
183             //atomic<bool> doesn't support .fetch_or, use std::atomic<int> instead
184             std::atomic<int> this_should_stop{false};
185 
186             double max_solver_overhead_time = 0;
187 
188             // Now run the main solver loop.
189             for (size_t i = 0; i < num.max_calls && steady_clock::now() < time_to_stop && !this_should_stop.load(); ++i)
190             {
191                 const auto get_next_x_start_time = steady_clock::now();
192                 auto next = std::make_shared<function_evaluation_request>(opt.get_next_x());
193                 const auto get_next_x_runtime = steady_clock::now() - get_next_x_start_time;
194 
195                 auto execute_call = [&functions,&ymult,&log_scale,&eval_time_mutex,&objective_funct_eval_time,next,&should_stop,&this_should_stop]() {
196                     matrix<double,0,1> x = next->x();
197                     // Undo any log-scaling that was applied to the variables before we pass them
198                     // to the functions being optimized.
199                     for (long j = 0; j < x.size(); ++j)
200                     {
201                         if (log_scale[next->function_idx()][j])
202                             x(j) = std::exp(x(j));
203                     }
204                     const auto funct_eval_start = steady_clock::now();
205                     double y = ymult*call_function_and_expand_args(functions[next->function_idx()], x);
206                     const double funct_eval_runtime = duration_cast<nanoseconds>(steady_clock::now() - funct_eval_start).count();
207                     this_should_stop.fetch_or(should_stop(y*ymult));
208                     next->set(y);
209 
210                     std::lock_guard<std::mutex> lock(eval_time_mutex);
211                     objective_funct_eval_time.add(funct_eval_runtime);
212                 };
213 
214                 tp.add_task_by_value(execute_call);
215 
216                 std::lock_guard<std::mutex> lock(eval_time_mutex);
217                 const double obj_funct_time = objective_funct_eval_time.mean()/std::max(1ul,tp.num_threads_in_pool());
218                 const double solver_overhead_time = duration_cast<nanoseconds>(get_next_x_runtime).count();
219                 max_solver_overhead_time = std::max(max_solver_overhead_time, solver_overhead_time);
220                 // Don't start thinking about the logic below until we have at least 5 objective
221                 // function samples for each objective function.  This way we have a decent idea how
222                 // fast these things are.  The solver overhead is really small initially so none of
223                 // the stuff below really matters in the beginning anyway.
224                 if (objective_funct_eval_time.current_n() > functions.size()*5)
225                 {
226                     // If calling opt.get_next_x() is taking a long time relative to how long it takes
227                     // to evaluate the objective function then we should spend less time grinding on the
228                     // internal details of the optimizer and more time running the actual objective
229                     // function.  E.g. if we could just run 2x more objective function calls in the same
230                     // amount of time then we should just do that.  The main slowness in the solver is
231                     // from the Monte Carlo sampling, which we can turn down if the objective function
232                     // is really fast to evaluate.  This is because the point of the Monte Carlo part is
233                     // to try really hard to avoid calls to really expensive objective functions.  But
234                     // if the objective function is not expensive then we should just call it.
235                     if (obj_funct_time < solver_overhead_time)
236                     {
237                         // Reduce the amount of Monte Carlo sampling we do.  If it goes low enough
238                         // we will disable it altogether.
239                         const size_t new_val = static_cast<size_t>(std::floor(opt.get_monte_carlo_upper_bound_sample_num()*0.8));
240                         opt.set_monte_carlo_upper_bound_sample_num(std::max<size_t>(1, new_val));
241                         // At this point just disable the upper bounding Monte Carlo search stuff and
242                         // use only pure random search since the objective function is super cheap to
243                         // evaluate, making this more fancy search a waste of time.
244                         if (opt.get_monte_carlo_upper_bound_sample_num() == 1)
245                         {
246                             opt.set_pure_random_search_probability(1);
247                         }
248                     } else if (obj_funct_time > 1.5*max_solver_overhead_time) // Consider reenabling
249                     {
250                         // The Monte Carlo overhead grows over time as the solver accumulates more
251                         // information about the objective function.  So we only want to reenable it
252                         // or make it bigger if the objective function really is more expensive.  So
253                         // we compare to the max solver runtime we have seen so far. If the
254                         // objective function has suddenly gotten more expensive then we start to
255                         // turn the Monte Carlo modeling back on.
256                         const size_t new_val = static_cast<size_t>(std::ceil(opt.get_monte_carlo_upper_bound_sample_num()*1.28));
257                         opt.set_monte_carlo_upper_bound_sample_num(std::min<size_t>(5000, new_val));
258                         // Set this back to its default value.
259                         opt.set_pure_random_search_probability(0.02);
260                     }
261                 }
262             }
263             tp.wait_for_all_tasks();
264 
265 
266             matrix<double,0,1> x;
267             double y;
268             size_t function_idx;
269             opt.get_best_function_eval(x,y,function_idx);
270             // Undo any log-scaling that was applied to the variables before we output them.
271             for (long j = 0; j < x.size(); ++j)
272             {
273                 if (log_scale[function_idx][j])
274                     x(j) = std::exp(x(j));
275             }
276             return std::make_pair(function_idx, function_evaluation(x,y/ymult));
277         }
278 
279         // This overload allows the order of max_runtime and num to be reversed.
280         template <
281             typename funct,
282             typename ...Args
283             >
284         std::pair<size_t,function_evaluation> find_max_global (
285             double ymult,
286             thread_pool& tp,
287             std::vector<funct>& functions,
288             std::vector<function_spec> specs,
289             const std::chrono::nanoseconds max_runtime,
290             const max_function_calls num,
291             double solver_epsilon = 0,
292             Args&& ...args
293         )
294         {
295             return find_max_global(ymult, tp, functions, std::move(specs), num, max_runtime, solver_epsilon, std::forward<Args>(args)...);
296         }
297 
298         // This overload allows the num argument to be skipped.
299         template <
300             typename funct,
301             typename ...Args
302             >
303         std::pair<size_t,function_evaluation> find_max_global (
304             double ymult,
305             thread_pool& tp,
306             std::vector<funct>& functions,
307             std::vector<function_spec> specs,
308             const std::chrono::nanoseconds max_runtime,
309             double solver_epsilon = 0,
310             Args&& ...args
311         )
312         {
313             return find_max_global(ymult, tp, functions, std::move(specs), max_function_calls(), max_runtime, solver_epsilon, std::forward<Args>(args)...);
314         }
315 
316         // This overload allows the max_runtime argument to be skipped.
317         template <
318             typename funct,
319             typename ...Args
320             >
321         std::pair<size_t,function_evaluation> find_max_global (
322             double ymult,
323             thread_pool& tp,
324             std::vector<funct>& functions,
325             std::vector<function_spec> specs,
326             const max_function_calls num,
327             double solver_epsilon,
328             Args&& ...args
329         )
330         {
331             return find_max_global(ymult, tp, functions, std::move(specs), num, FOREVER, solver_epsilon, std::forward<Args>(args)...);
332         }
333 
334         // This overload makes the thread_pool argument optional.
335         template <
336             typename funct,
337             typename ...Args
338             >
339         std::pair<size_t,function_evaluation> find_max_global (
340             double ymult,
341             std::vector<funct>& functions,
342             Args&& ...args
343         )
344         {
345             // disabled, don't use any threads
346             thread_pool tp(0);
347 
348             return find_max_global(ymult, tp, functions, std::forward<Args>(args)...);
349         }
350 
351         // The point of normalize() is to handle some of the overloaded argument types in
352         // find_max_global() instances below and turn them into the argument types expected by
353         // find_max_global() above.
354         template <typename T>
355         const T& normalize(const T& item)
356         {
357             return item;
358         }
359 
360         inline std::vector<std::vector<function_evaluation>> normalize(
361             const std::vector<function_evaluation>& initial_function_evals
362         )
363         {
364             return {initial_function_evals};
365         }
366     }
367 
368 // ----------------------------------------------------------------------------------------
369 
370     template <
371         typename funct,
372         typename ...Args
373         >
374     std::pair<size_t,function_evaluation> find_max_global (
375         std::vector<funct>& functions,
376         std::vector<function_spec> specs,
377         Args&& ...args
378     )
379     {
380         return impl::find_max_global(+1, functions, std::move(specs), std::forward<Args>(args)...);
381     }
382 
383     template <
384         typename funct,
385         typename ...Args
386         >
387     std::pair<size_t,function_evaluation> find_min_global (
388         std::vector<funct>& functions,
389         std::vector<function_spec> specs,
390         Args&& ...args
391     )
392     {
393         return impl::find_max_global(-1, functions, std::move(specs), std::forward<Args>(args)...);
394     }
395 
396     template <
397         typename funct,
398         typename ...Args
399         >
400     std::pair<size_t,function_evaluation> find_max_global (
401         thread_pool& tp,
402         std::vector<funct>& functions,
403         std::vector<function_spec> specs,
404         Args&& ...args
405     )
406     {
407         return impl::find_max_global(+1, tp, functions, std::move(specs), std::forward<Args>(args)...);
408     }
409 
410     template <
411         typename funct,
412         typename ...Args
413         >
414     std::pair<size_t,function_evaluation> find_min_global (
415         thread_pool& tp,
416         std::vector<funct>& functions,
417         std::vector<function_spec> specs,
418         Args&& ...args
419     )
420     {
421         return impl::find_max_global(-1, tp, functions, std::move(specs), std::forward<Args>(args)...);
422     }
423 
424 // ----------------------------------------------------------------------------------------
425 
426 // Overloads that take function objects and simple matrix bounds instead of function_specs.
427     template <
428         typename funct,
429         typename ...Args
430         >
431     function_evaluation find_max_global (
432         funct f,
433         const matrix<double,0,1>& bound1,
434         const matrix<double,0,1>& bound2,
435         const std::vector<bool>& is_integer_variable,
436         Args&& ...args
437     )
438     {
439         std::vector<funct> functions(1,std::move(f));
440         std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
441         return find_max_global(functions, std::move(specs), impl::normalize(args)...).second;
442     }
443 
444     template <
445         typename funct,
446         typename ...Args
447         >
448     function_evaluation find_min_global (
449         funct f,
450         const matrix<double,0,1>& bound1,
451         const matrix<double,0,1>& bound2,
452         const std::vector<bool>& is_integer_variable,
453         Args&& ...args
454     )
455     {
456         std::vector<funct> functions(1,std::move(f));
457         std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
458         return find_min_global(functions, std::move(specs), impl::normalize(args)...).second;
459     }
460 
461     template <
462         typename funct,
463         typename ...Args
464         >
465     function_evaluation find_max_global (
466         thread_pool& tp,
467         funct f,
468         const matrix<double,0,1>& bound1,
469         const matrix<double,0,1>& bound2,
470         const std::vector<bool>& is_integer_variable,
471         Args&& ...args
472     )
473     {
474         std::vector<funct> functions(1,std::move(f));
475         std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
476         return find_max_global(tp, functions, std::move(specs), impl::normalize(args)...).second;
477     }
478 
479     template <
480         typename funct,
481         typename ...Args
482         >
483     function_evaluation find_min_global (
484         thread_pool& tp,
485         funct f,
486         const matrix<double,0,1>& bound1,
487         const matrix<double,0,1>& bound2,
488         const std::vector<bool>& is_integer_variable,
489         Args&& ...args
490     )
491     {
492         std::vector<funct> functions(1,std::move(f));
493         std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable));
494         return find_min_global(tp, functions, std::move(specs), impl::normalize(args)...).second;
495     }
496 
497 // ----------------------------------------------------------------------------------------
498 
499 // overloads that are the same as above, but is_integer_variable defaulted to false for all parameters.
500     template <
501         typename funct,
502         typename T,
503         typename ...Args
504         >
505     typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type
506     find_max_global (
507         funct f,
508         const matrix<double,0,1>& bound1,
509         const matrix<double,0,1>& bound2,
510         const T& arg,
511         Args&& ...args
512     )
513     {
514         const std::vector<bool> is_integer_variable(bound1.size(),false);
515         return find_max_global(std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...);
516     }
517 
518     template <
519         typename funct,
520         typename T,
521         typename ...Args
522         >
523     typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type
524     find_min_global (
525         funct f,
526         const matrix<double,0,1>& bound1,
527         const matrix<double,0,1>& bound2,
528         const T& arg,
529         Args&& ...args
530     )
531     {
532         const std::vector<bool> is_integer_variable(bound1.size(),false);
533         return find_min_global(std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...);
534     }
535 
536     template <
537         typename funct,
538         typename T,
539         typename ...Args
540         >
541     typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type
542     find_max_global (
543         thread_pool& tp,
544         funct f,
545         const matrix<double,0,1>& bound1,
546         const matrix<double,0,1>& bound2,
547         const T& arg,
548         Args&& ...args
549     )
550     {
551         const std::vector<bool> is_integer_variable(bound1.size(),false);
552         return find_max_global(tp, std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...);
553     }
554 
555     template <
556         typename funct,
557         typename T,
558         typename ...Args
559         >
560     typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type
561     find_min_global (
562         thread_pool& tp,
563         funct f,
564         const matrix<double,0,1>& bound1,
565         const matrix<double,0,1>& bound2,
566         const T& arg,
567         Args&& ...args
568     )
569     {
570         const std::vector<bool> is_integer_variable(bound1.size(),false);
571         return find_min_global(tp, std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...);
572     }
573 
574 // ----------------------------------------------------------------------------------------
575 
576 // overloads for a function taking a single scalar.
577     template <
578         typename funct,
579         typename T,
580         typename ...Args
581         >
582     function_evaluation find_max_global (
583         funct f,
584         const double bound1,
585         const double bound2,
586         const T& arg,
587         Args&& ...args
588     )
589     {
590         return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...);
591     }
592 
593     template <
594         typename funct,
595         typename T,
596         typename ...Args
597         >
598     function_evaluation find_min_global (
599         funct f,
600         const double bound1,
601         const double bound2,
602         const T& arg,
603         Args&& ...args
604     )
605     {
606         return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...);
607     }
608 
609     template <
610         typename funct,
611         typename T,
612         typename ...Args
613         >
614     function_evaluation find_max_global (
615         thread_pool& tp,
616         funct f,
617         const double bound1,
618         const double bound2,
619         const T& arg,
620         Args&& ...args
621     )
622     {
623         return find_max_global(tp, std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...);
624     }
625 
626     template <
627         typename funct,
628         typename T,
629         typename ...Args
630         >
631     function_evaluation find_min_global (
632         thread_pool& tp,
633         funct f,
634         const double bound1,
635         const double bound2,
636         const T& arg,
637         Args&& ...args
638     )
639     {
640         return find_min_global(tp, std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...);
641     }
642 
643 // ----------------------------------------------------------------------------------------
644 
645 }
646 
647 #endif // DLIB_FiND_GLOBAL_MAXIMUM_hH_
648 
649