1 // Copyright (C) 2017 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_FiND_GLOBAL_MAXIMUM_hH_ 4 #define DLIB_FiND_GLOBAL_MAXIMUM_hH_ 5 6 #include "find_max_global_abstract.h" 7 #include "global_function_search.h" 8 #include "../metaprogramming.h" 9 #include <utility> 10 #include <chrono> 11 #include <memory> 12 #include <thread> 13 #include <functional> 14 #include "../threads/thread_pool_extension.h" 15 #include "../statistics/statistics.h" 16 #include "../enable_if.h" 17 18 namespace dlib 19 { 20 namespace gopt_impl 21 { 22 23 // ---------------------------------------------------------------------------------------- 24 25 class disable_decay_to_scalar 26 { 27 const matrix<double,0,1>& a; 28 public: disable_decay_to_scalar(const matrix<double,0,1> & a)29 disable_decay_to_scalar(const matrix<double,0,1>& a) : a(a){} 30 operator const matrix<double,0,1>&() const { return a;} 31 }; 32 33 34 template <typename T, size_t... indices> 35 auto _cwv ( 36 T&& f, 37 const matrix<double,0,1>& a, 38 compile_time_integer_list<indices...> 39 ) -> decltype(f(a(indices-1)...)) 40 { 41 DLIB_CASSERT(a.size() == sizeof...(indices), 42 "You invoked dlib::call_function_and_expand_args(f,a) but the number of arguments expected by f() doesn't match the size of 'a'. " 43 << "Expected " << sizeof...(indices) << " arguments but got " << a.size() << "." 44 ); 45 return f(a(indices-1)...); 46 } 47 48 // Visual studio, as of November 2017, doesn't support C++11 and can't compile this code. 49 // So we write the terrible garbage in the #else for visual studio. When Visual Studio supports C++11 I'll update this #ifdef to use the C++11 code. 50 #ifndef _MSC_VER 51 template <size_t max_unpack> 52 struct call_function_and_expand_args 53 { 54 template <typename T> 55 static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(_cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type())) 56 { 57 return _cwv(std::forward<T>(f),a,typename make_compile_time_integer_range<max_unpack>::type()); 58 } 59 60 template <typename T> 61 static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(call_function_and_expand_args<max_unpack-1>::template go(std::forward<T>(f),a)) 62 { 63 return call_function_and_expand_args<max_unpack-1>::go(std::forward<T>(f),a); 64 } 65 }; 66 67 template <> 68 struct call_function_and_expand_args<0> 69 { 70 template <typename T> 71 static auto go(T&& f, const matrix<double,0,1>& a) -> decltype(f(disable_decay_to_scalar(a))) 72 { 73 return f(disable_decay_to_scalar(a)); 74 } 75 }; 76 #else 77 template <size_t max_unpack> 78 struct call_function_and_expand_args 79 { 80 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(disable_decay_to_scalar(a))) {return f(disable_decay_to_scalar(a)); } 81 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0))) { DLIB_CASSERT(a.size() == 1); return f(a(0)); } 82 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0),a(1))) { DLIB_CASSERT(a.size() == 2); return f(a(0),a(1)); } 83 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2))) { DLIB_CASSERT(a.size() == 3); return f(a(0), a(1),a(2)); } 84 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3))) { DLIB_CASSERT(a.size() == 4); return f(a(0), a(1), a(2), a(3)); } 85 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4))) { DLIB_CASSERT(a.size() == 5); return f(a(0), a(1), a(2), a(3), a(4)); } 86 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5))) { DLIB_CASSERT(a.size() == 6); return f(a(0), a(1), a(2), a(3), a(4), a(5)); } 87 template <typename T> static auto go(T&& f, const matrix<double, 0, 1>& a) -> decltype(f(a(0), a(1), a(2), a(3), a(4), a(5), a(6))) { DLIB_CASSERT(a.size() == 7); return f(a(0), a(1), a(2), a(3), a(4), a(5), a(6)); } 88 }; 89 #endif 90 } 91 92 // ---------------------------------------------------------------------------------------- 93 // ---------------------------------------------------------------------------------------- 94 95 template <typename T> 96 auto call_function_and_expand_args( 97 T&& f, 98 const matrix<double,0,1>& a 99 ) -> decltype(gopt_impl::call_function_and_expand_args<40>::go(f,a)) 100 { 101 // unpack up to 40 parameters when calling f() 102 return gopt_impl::call_function_and_expand_args<40>::go(std::forward<T>(f),a); 103 } 104 105 // ---------------------------------------------------------------------------------------- 106 // ---------------------------------------------------------------------------------------- 107 108 struct max_function_calls 109 { 110 max_function_calls() = default; 111 explicit max_function_calls(size_t max_calls) : max_calls(max_calls) {} 112 size_t max_calls = std::numeric_limits<size_t>::max(); 113 }; 114 115 // ---------------------------------------------------------------------------------------- 116 117 const auto FOREVER = std::chrono::hours(24*365*290); // 290 years 118 using stop_condition = std::function<bool(double)>; 119 const stop_condition never_stop_early = [](double) { return false; }; 120 121 // ---------------------------------------------------------------------------------------- 122 123 namespace impl 124 { 125 template < 126 typename funct 127 > 128 std::pair<size_t,function_evaluation> find_max_global ( 129 double ymult, 130 thread_pool& tp, 131 std::vector<funct>& functions, 132 std::vector<function_spec> specs, 133 const max_function_calls num, 134 const std::chrono::nanoseconds max_runtime = FOREVER, 135 double solver_epsilon = 0, 136 std::vector<std::vector<function_evaluation>> initial_function_evals = {}, 137 stop_condition should_stop = never_stop_early 138 ) 139 { 140 // Decide which parameters should be searched on a log scale. Basically, it's 141 // common for machine learning models to have parameters that should be searched on 142 // a log scale (e.g. SVM C). These parameters are usually identifiable because 143 // they have bounds like [1e-5 1e10], that is, they span a very large range of 144 // magnitudes from really small to really big. So there we are going to check for 145 // that and if we find parameters with that kind of bound constraints we will 146 // transform them to a log scale automatically. 147 std::vector<std::vector<bool>> log_scale(specs.size()); 148 for (size_t i = 0; i < specs.size(); ++i) 149 { 150 for (long j = 0; j < specs[i].lower.size(); ++j) 151 { 152 if (!specs[i].is_integer_variable[j] && specs[i].lower(j) > 0 && specs[i].upper(j)/specs[i].lower(j) >= 1000) 153 { 154 log_scale[i].push_back(true); 155 specs[i].lower(j) = std::log(specs[i].lower(j)); 156 specs[i].upper(j) = std::log(specs[i].upper(j)); 157 } 158 else 159 { 160 log_scale[i].push_back(false); 161 } 162 } 163 } 164 165 if (initial_function_evals.empty()) 166 { 167 initial_function_evals.resize(specs.size()); 168 } 169 for (auto& evals : initial_function_evals) { 170 for (auto& eval : evals) { 171 eval.y *= ymult; 172 } 173 } 174 175 global_function_search opt(specs, {initial_function_evals}); 176 opt.set_solver_epsilon(solver_epsilon); 177 178 running_stats_decayed<double> objective_funct_eval_time(functions.size()*5); 179 std::mutex eval_time_mutex; 180 using namespace std::chrono; 181 182 const auto time_to_stop = steady_clock::now() + max_runtime; 183 //atomic<bool> doesn't support .fetch_or, use std::atomic<int> instead 184 std::atomic<int> this_should_stop{false}; 185 186 double max_solver_overhead_time = 0; 187 188 // Now run the main solver loop. 189 for (size_t i = 0; i < num.max_calls && steady_clock::now() < time_to_stop && !this_should_stop.load(); ++i) 190 { 191 const auto get_next_x_start_time = steady_clock::now(); 192 auto next = std::make_shared<function_evaluation_request>(opt.get_next_x()); 193 const auto get_next_x_runtime = steady_clock::now() - get_next_x_start_time; 194 195 auto execute_call = [&functions,&ymult,&log_scale,&eval_time_mutex,&objective_funct_eval_time,next,&should_stop,&this_should_stop]() { 196 matrix<double,0,1> x = next->x(); 197 // Undo any log-scaling that was applied to the variables before we pass them 198 // to the functions being optimized. 199 for (long j = 0; j < x.size(); ++j) 200 { 201 if (log_scale[next->function_idx()][j]) 202 x(j) = std::exp(x(j)); 203 } 204 const auto funct_eval_start = steady_clock::now(); 205 double y = ymult*call_function_and_expand_args(functions[next->function_idx()], x); 206 const double funct_eval_runtime = duration_cast<nanoseconds>(steady_clock::now() - funct_eval_start).count(); 207 this_should_stop.fetch_or(should_stop(y*ymult)); 208 next->set(y); 209 210 std::lock_guard<std::mutex> lock(eval_time_mutex); 211 objective_funct_eval_time.add(funct_eval_runtime); 212 }; 213 214 tp.add_task_by_value(execute_call); 215 216 std::lock_guard<std::mutex> lock(eval_time_mutex); 217 const double obj_funct_time = objective_funct_eval_time.mean()/std::max(1ul,tp.num_threads_in_pool()); 218 const double solver_overhead_time = duration_cast<nanoseconds>(get_next_x_runtime).count(); 219 max_solver_overhead_time = std::max(max_solver_overhead_time, solver_overhead_time); 220 // Don't start thinking about the logic below until we have at least 5 objective 221 // function samples for each objective function. This way we have a decent idea how 222 // fast these things are. The solver overhead is really small initially so none of 223 // the stuff below really matters in the beginning anyway. 224 if (objective_funct_eval_time.current_n() > functions.size()*5) 225 { 226 // If calling opt.get_next_x() is taking a long time relative to how long it takes 227 // to evaluate the objective function then we should spend less time grinding on the 228 // internal details of the optimizer and more time running the actual objective 229 // function. E.g. if we could just run 2x more objective function calls in the same 230 // amount of time then we should just do that. The main slowness in the solver is 231 // from the Monte Carlo sampling, which we can turn down if the objective function 232 // is really fast to evaluate. This is because the point of the Monte Carlo part is 233 // to try really hard to avoid calls to really expensive objective functions. But 234 // if the objective function is not expensive then we should just call it. 235 if (obj_funct_time < solver_overhead_time) 236 { 237 // Reduce the amount of Monte Carlo sampling we do. If it goes low enough 238 // we will disable it altogether. 239 const size_t new_val = static_cast<size_t>(std::floor(opt.get_monte_carlo_upper_bound_sample_num()*0.8)); 240 opt.set_monte_carlo_upper_bound_sample_num(std::max<size_t>(1, new_val)); 241 // At this point just disable the upper bounding Monte Carlo search stuff and 242 // use only pure random search since the objective function is super cheap to 243 // evaluate, making this more fancy search a waste of time. 244 if (opt.get_monte_carlo_upper_bound_sample_num() == 1) 245 { 246 opt.set_pure_random_search_probability(1); 247 } 248 } else if (obj_funct_time > 1.5*max_solver_overhead_time) // Consider reenabling 249 { 250 // The Monte Carlo overhead grows over time as the solver accumulates more 251 // information about the objective function. So we only want to reenable it 252 // or make it bigger if the objective function really is more expensive. So 253 // we compare to the max solver runtime we have seen so far. If the 254 // objective function has suddenly gotten more expensive then we start to 255 // turn the Monte Carlo modeling back on. 256 const size_t new_val = static_cast<size_t>(std::ceil(opt.get_monte_carlo_upper_bound_sample_num()*1.28)); 257 opt.set_monte_carlo_upper_bound_sample_num(std::min<size_t>(5000, new_val)); 258 // Set this back to its default value. 259 opt.set_pure_random_search_probability(0.02); 260 } 261 } 262 } 263 tp.wait_for_all_tasks(); 264 265 266 matrix<double,0,1> x; 267 double y; 268 size_t function_idx; 269 opt.get_best_function_eval(x,y,function_idx); 270 // Undo any log-scaling that was applied to the variables before we output them. 271 for (long j = 0; j < x.size(); ++j) 272 { 273 if (log_scale[function_idx][j]) 274 x(j) = std::exp(x(j)); 275 } 276 return std::make_pair(function_idx, function_evaluation(x,y/ymult)); 277 } 278 279 // This overload allows the order of max_runtime and num to be reversed. 280 template < 281 typename funct, 282 typename ...Args 283 > 284 std::pair<size_t,function_evaluation> find_max_global ( 285 double ymult, 286 thread_pool& tp, 287 std::vector<funct>& functions, 288 std::vector<function_spec> specs, 289 const std::chrono::nanoseconds max_runtime, 290 const max_function_calls num, 291 double solver_epsilon = 0, 292 Args&& ...args 293 ) 294 { 295 return find_max_global(ymult, tp, functions, std::move(specs), num, max_runtime, solver_epsilon, std::forward<Args>(args)...); 296 } 297 298 // This overload allows the num argument to be skipped. 299 template < 300 typename funct, 301 typename ...Args 302 > 303 std::pair<size_t,function_evaluation> find_max_global ( 304 double ymult, 305 thread_pool& tp, 306 std::vector<funct>& functions, 307 std::vector<function_spec> specs, 308 const std::chrono::nanoseconds max_runtime, 309 double solver_epsilon = 0, 310 Args&& ...args 311 ) 312 { 313 return find_max_global(ymult, tp, functions, std::move(specs), max_function_calls(), max_runtime, solver_epsilon, std::forward<Args>(args)...); 314 } 315 316 // This overload allows the max_runtime argument to be skipped. 317 template < 318 typename funct, 319 typename ...Args 320 > 321 std::pair<size_t,function_evaluation> find_max_global ( 322 double ymult, 323 thread_pool& tp, 324 std::vector<funct>& functions, 325 std::vector<function_spec> specs, 326 const max_function_calls num, 327 double solver_epsilon, 328 Args&& ...args 329 ) 330 { 331 return find_max_global(ymult, tp, functions, std::move(specs), num, FOREVER, solver_epsilon, std::forward<Args>(args)...); 332 } 333 334 // This overload makes the thread_pool argument optional. 335 template < 336 typename funct, 337 typename ...Args 338 > 339 std::pair<size_t,function_evaluation> find_max_global ( 340 double ymult, 341 std::vector<funct>& functions, 342 Args&& ...args 343 ) 344 { 345 // disabled, don't use any threads 346 thread_pool tp(0); 347 348 return find_max_global(ymult, tp, functions, std::forward<Args>(args)...); 349 } 350 351 // The point of normalize() is to handle some of the overloaded argument types in 352 // find_max_global() instances below and turn them into the argument types expected by 353 // find_max_global() above. 354 template <typename T> 355 const T& normalize(const T& item) 356 { 357 return item; 358 } 359 360 inline std::vector<std::vector<function_evaluation>> normalize( 361 const std::vector<function_evaluation>& initial_function_evals 362 ) 363 { 364 return {initial_function_evals}; 365 } 366 } 367 368 // ---------------------------------------------------------------------------------------- 369 370 template < 371 typename funct, 372 typename ...Args 373 > 374 std::pair<size_t,function_evaluation> find_max_global ( 375 std::vector<funct>& functions, 376 std::vector<function_spec> specs, 377 Args&& ...args 378 ) 379 { 380 return impl::find_max_global(+1, functions, std::move(specs), std::forward<Args>(args)...); 381 } 382 383 template < 384 typename funct, 385 typename ...Args 386 > 387 std::pair<size_t,function_evaluation> find_min_global ( 388 std::vector<funct>& functions, 389 std::vector<function_spec> specs, 390 Args&& ...args 391 ) 392 { 393 return impl::find_max_global(-1, functions, std::move(specs), std::forward<Args>(args)...); 394 } 395 396 template < 397 typename funct, 398 typename ...Args 399 > 400 std::pair<size_t,function_evaluation> find_max_global ( 401 thread_pool& tp, 402 std::vector<funct>& functions, 403 std::vector<function_spec> specs, 404 Args&& ...args 405 ) 406 { 407 return impl::find_max_global(+1, tp, functions, std::move(specs), std::forward<Args>(args)...); 408 } 409 410 template < 411 typename funct, 412 typename ...Args 413 > 414 std::pair<size_t,function_evaluation> find_min_global ( 415 thread_pool& tp, 416 std::vector<funct>& functions, 417 std::vector<function_spec> specs, 418 Args&& ...args 419 ) 420 { 421 return impl::find_max_global(-1, tp, functions, std::move(specs), std::forward<Args>(args)...); 422 } 423 424 // ---------------------------------------------------------------------------------------- 425 426 // Overloads that take function objects and simple matrix bounds instead of function_specs. 427 template < 428 typename funct, 429 typename ...Args 430 > 431 function_evaluation find_max_global ( 432 funct f, 433 const matrix<double,0,1>& bound1, 434 const matrix<double,0,1>& bound2, 435 const std::vector<bool>& is_integer_variable, 436 Args&& ...args 437 ) 438 { 439 std::vector<funct> functions(1,std::move(f)); 440 std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable)); 441 return find_max_global(functions, std::move(specs), impl::normalize(args)...).second; 442 } 443 444 template < 445 typename funct, 446 typename ...Args 447 > 448 function_evaluation find_min_global ( 449 funct f, 450 const matrix<double,0,1>& bound1, 451 const matrix<double,0,1>& bound2, 452 const std::vector<bool>& is_integer_variable, 453 Args&& ...args 454 ) 455 { 456 std::vector<funct> functions(1,std::move(f)); 457 std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable)); 458 return find_min_global(functions, std::move(specs), impl::normalize(args)...).second; 459 } 460 461 template < 462 typename funct, 463 typename ...Args 464 > 465 function_evaluation find_max_global ( 466 thread_pool& tp, 467 funct f, 468 const matrix<double,0,1>& bound1, 469 const matrix<double,0,1>& bound2, 470 const std::vector<bool>& is_integer_variable, 471 Args&& ...args 472 ) 473 { 474 std::vector<funct> functions(1,std::move(f)); 475 std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable)); 476 return find_max_global(tp, functions, std::move(specs), impl::normalize(args)...).second; 477 } 478 479 template < 480 typename funct, 481 typename ...Args 482 > 483 function_evaluation find_min_global ( 484 thread_pool& tp, 485 funct f, 486 const matrix<double,0,1>& bound1, 487 const matrix<double,0,1>& bound2, 488 const std::vector<bool>& is_integer_variable, 489 Args&& ...args 490 ) 491 { 492 std::vector<funct> functions(1,std::move(f)); 493 std::vector<function_spec> specs(1, function_spec(bound1, bound2, is_integer_variable)); 494 return find_min_global(tp, functions, std::move(specs), impl::normalize(args)...).second; 495 } 496 497 // ---------------------------------------------------------------------------------------- 498 499 // overloads that are the same as above, but is_integer_variable defaulted to false for all parameters. 500 template < 501 typename funct, 502 typename T, 503 typename ...Args 504 > 505 typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type 506 find_max_global ( 507 funct f, 508 const matrix<double,0,1>& bound1, 509 const matrix<double,0,1>& bound2, 510 const T& arg, 511 Args&& ...args 512 ) 513 { 514 const std::vector<bool> is_integer_variable(bound1.size(),false); 515 return find_max_global(std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...); 516 } 517 518 template < 519 typename funct, 520 typename T, 521 typename ...Args 522 > 523 typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type 524 find_min_global ( 525 funct f, 526 const matrix<double,0,1>& bound1, 527 const matrix<double,0,1>& bound2, 528 const T& arg, 529 Args&& ...args 530 ) 531 { 532 const std::vector<bool> is_integer_variable(bound1.size(),false); 533 return find_min_global(std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...); 534 } 535 536 template < 537 typename funct, 538 typename T, 539 typename ...Args 540 > 541 typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type 542 find_max_global ( 543 thread_pool& tp, 544 funct f, 545 const matrix<double,0,1>& bound1, 546 const matrix<double,0,1>& bound2, 547 const T& arg, 548 Args&& ...args 549 ) 550 { 551 const std::vector<bool> is_integer_variable(bound1.size(),false); 552 return find_max_global(tp, std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...); 553 } 554 555 template < 556 typename funct, 557 typename T, 558 typename ...Args 559 > 560 typename disable_if<std::is_same<T,std::vector<bool>>, function_evaluation>::type 561 find_min_global ( 562 thread_pool& tp, 563 funct f, 564 const matrix<double,0,1>& bound1, 565 const matrix<double,0,1>& bound2, 566 const T& arg, 567 Args&& ...args 568 ) 569 { 570 const std::vector<bool> is_integer_variable(bound1.size(),false); 571 return find_min_global(tp, std::move(f), bound1, bound2, is_integer_variable, arg, impl::normalize(args)...); 572 } 573 574 // ---------------------------------------------------------------------------------------- 575 576 // overloads for a function taking a single scalar. 577 template < 578 typename funct, 579 typename T, 580 typename ...Args 581 > 582 function_evaluation find_max_global ( 583 funct f, 584 const double bound1, 585 const double bound2, 586 const T& arg, 587 Args&& ...args 588 ) 589 { 590 return find_max_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...); 591 } 592 593 template < 594 typename funct, 595 typename T, 596 typename ...Args 597 > 598 function_evaluation find_min_global ( 599 funct f, 600 const double bound1, 601 const double bound2, 602 const T& arg, 603 Args&& ...args 604 ) 605 { 606 return find_min_global(std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...); 607 } 608 609 template < 610 typename funct, 611 typename T, 612 typename ...Args 613 > 614 function_evaluation find_max_global ( 615 thread_pool& tp, 616 funct f, 617 const double bound1, 618 const double bound2, 619 const T& arg, 620 Args&& ...args 621 ) 622 { 623 return find_max_global(tp, std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...); 624 } 625 626 template < 627 typename funct, 628 typename T, 629 typename ...Args 630 > 631 function_evaluation find_min_global ( 632 thread_pool& tp, 633 funct f, 634 const double bound1, 635 const double bound2, 636 const T& arg, 637 Args&& ...args 638 ) 639 { 640 return find_min_global(tp, std::move(f), matrix<double,0,1>({bound1}), matrix<double,0,1>({bound2}), arg, impl::normalize(args)...); 641 } 642 643 // ---------------------------------------------------------------------------------------- 644 645 } 646 647 #endif // DLIB_FiND_GLOBAL_MAXIMUM_hH_ 648 649