1 //----------------------------------------------------------- 2 // 3 // Copyright (C) 2018 - 2020 by the deal.II authors 4 // 5 // This file is part of the deal.II library. 6 // 7 // The deal.II library is free software; you can use it, redistribute 8 // it, and/or modify it under the terms of the GNU Lesser General 9 // Public License as published by the Free Software Foundation; either 10 // version 2.1 of the License, or (at your option) any later version. 11 // The full text of the license can be found in the file LICENSE.md at 12 // the top level directory of deal.II. 13 // 14 //--------------------------------------------------------------- 15 16 #ifndef dealii_line_minimization_h 17 #define dealii_line_minimization_h 18 19 #include <deal.II/base/config.h> 20 21 #include <deal.II/base/exceptions.h> 22 #include <deal.II/base/logstream.h> 23 #include <deal.II/base/numbers.h> 24 #include <deal.II/base/std_cxx17/optional.h> 25 #include <deal.II/base/utilities.h> 26 27 #include <deal.II/numerics/history.h> 28 29 #include <fstream> 30 #include <string> 31 32 33 DEAL_II_NAMESPACE_OPEN 34 35 /** 36 * A namespace for various algorithms related to minimization a over line. 37 */ 38 namespace LineMinimization 39 { 40 /** 41 * Given $x\_low$ and $x\_hi$ together with values of function 42 * $f(x\_low)$ and $f(x\_hi)$ and the gradient $g(x\_low)$, return the local 43 * minimizer of the quadratic interpolation function. 44 * 45 * The return type is optional to fit with similar functions that may 46 * not have a solution for given parameters. 47 */ 48 template <typename NumberType> 49 std_cxx17::optional<NumberType> 50 quadratic_fit(const NumberType x_low, 51 const NumberType f_low, 52 const NumberType g_low, 53 const NumberType x_hi, 54 const NumberType f_hi); 55 56 /** 57 * Given $x\_low$ and $x\_hi$ together with values of function 58 * $f(x\_low)$ and $f(x\_hi)$ and its gradients ($g(x\_low)*g(x\_hi) < 0$) at 59 * those points, return the local minimizer of the cubic interpolation 60 * function (that is, the location where the cubic interpolation function 61 * attains its minimum value). 62 * 63 * The return type is optional as the real-valued solution might not exist. 64 */ 65 template <typename NumberType> 66 std_cxx17::optional<NumberType> 67 cubic_fit(const NumberType x_low, 68 const NumberType f_low, 69 const NumberType g_low, 70 const NumberType x_hi, 71 const NumberType f_hi, 72 const NumberType g_hi); 73 74 /** 75 * Find the minimizer of a cubic polynomial that goes through the 76 * points $f\_low=f(x\_low)$, $f\_hi=f(x\_hi)$ and $f\_rec(x\_rec)$ 77 * and has derivatve $g\_low$ at $x\_low$. 78 * 79 * The return type is optional as the real-valued solution might not exist. 80 */ 81 template <typename NumberType> 82 std_cxx17::optional<NumberType> 83 cubic_fit_three_points(const NumberType x_low, 84 const NumberType f_low, 85 const NumberType g_low, 86 const NumberType x_hi, 87 const NumberType f_hi, 88 const NumberType x_rec, 89 const NumberType f_rec); 90 91 /** 92 * Return the minimizer of a polynomial using function values @p f_low , 93 * @p f_hi , and @p f_rec[0] at three points @p x_low , @p x_hi , and 94 * @p x_rec[0] as well as the derivatives at two points @p g_low and @p g_hi. 95 * The returned point should be within the bounds @p bounds . 96 * 97 * This function will first try to perform a cubic_fit(). If its unsuccessful, 98 * or if the minimum is not within the provided @p bounds, a quadratic_fit() 99 * will be performed. The function will fallback to a bisection method if 100 * quadratic_fit() fails as well. 101 */ 102 template <typename NumberType> 103 NumberType 104 poly_fit(const NumberType x_low, 105 const NumberType f_low, 106 const NumberType g_low, 107 const NumberType x_hi, 108 const NumberType f_hi, 109 const NumberType g_hi, 110 const FiniteSizeHistory<NumberType> & x_rec, 111 const FiniteSizeHistory<NumberType> & f_rec, 112 const FiniteSizeHistory<NumberType> & g_rec, 113 const std::pair<NumberType, NumberType> bounds); 114 115 /** 116 * Same as poly_fit(), but performing a cubic fit with three points (see 117 * cubic_fit_three_points() ). 118 */ 119 template <typename NumberType> 120 NumberType 121 poly_fit_three_points(const NumberType x_low, 122 const NumberType f_low, 123 const NumberType g_low, 124 const NumberType x_hi, 125 const NumberType f_hi, 126 const NumberType g_hi, 127 const FiniteSizeHistory<NumberType> & x_rec, 128 const FiniteSizeHistory<NumberType> & f_rec, 129 const FiniteSizeHistory<NumberType> & g_rec, 130 const std::pair<NumberType, NumberType> bounds); 131 132 133 /** 134 * Perform a line search in $(0,max]$ with strong Wolfe conditions 135 * \f[ 136 * f(\alpha) \le f(0) + \alpha \mu f'(0) \\ 137 * |f'(\alpha)| \le \eta |f'(0)| 138 * \f] 139 * using the one dimensional function @p func in conjunction with a function @p interpolate 140 * to choose a new point from the interval based on the function values and 141 * derivatives at its ends. 142 * The parameter @p a1 is a trial estimate of the first step. 143 * Interpolation can be done using either poly_fit() or 144 * poly_fit_three_points(), or any other function that has a similar 145 * signature. 146 * 147 * The function implements Algorithms 2.6.2 and 2.6.4 on pages 34-35 in 148 * @code{.bib} 149 * @book{Fletcher2013, 150 * title = {Practical methods of optimization}, 151 * publisher = {John Wiley \& Sons}, 152 * year = {2013}, 153 * author = {Fletcher, Roger}, 154 * isbn = {978-0-471-49463-8}, 155 * doi = {10.1002/9781118723203}, 156 * } 157 * @endcode 158 * These are minor variations of Algorithms 3.5 and 3.6 on pages 60-61 in 159 * @code{.bib} 160 * @book{Nocedal2006, 161 * title = {Numerical Optimization}, 162 * publisher = {Springer New York}, 163 * year = {2006}, 164 * author = {Jorge Nocedal and S. Wright}, 165 * address = {233 Spring Street, New York, NY 10013, USA}, 166 * doi = {10.1007/978-0-387-40065-5}, 167 * } 168 * @endcode 169 * It consists of a bracketing phase and a zoom phase, where @p interpolate is used. 170 * 171 * Two examples of use might be as follows: 172 * In the first example, we wish to find the minimum of the function 173 * $100 * x^4 + (1-x)^2$. To find the approximate solution using line search 174 * with a polynomial fit to the curve one would perform the following steps: 175 * 176 * @code 177 * auto func = [](const double x) 178 * { 179 * const double f = 100. * std::pow(x, 4) + std::pow(1. - x, 2); // Value 180 * const double g = 400. * std::pow(x, 3) - 2. * (1. - x); // Gradient 181 * return std::make_pair(f, g); 182 * }; 183 * 184 * const auto fg0 = func(0); 185 * const auto res = LineMinimization::line_search<double>( 186 * func, 187 * fg0.first, fg0.second, 188 * LineMinimization::poly_fit<double>, 189 * 0.1, 0.1, 0.01, 100, 20); 190 * 191 * const double approx_solution = res.first; 192 * @endcode 193 * 194 * In the second example, we wish to perform line search in the context of a 195 * non-linear finite element problem. What follows below is a non-optimized 196 * implementation of the back-tracking algorithm, which may be useful when 197 * the load-step size is too large. The following illustrates the basic steps 198 * necessary to utilize the scheme within the context of a global nonlinear 199 * solver: 200 * 201 * @code 202 * // Solve some incremental linear system 203 * const Vector<double> newton_update = solver_linear_system(...); 204 * 205 * // Now we check to see if the suggested Newton update is a good one. 206 * // First we define what it means to perform linesearch in the context of 207 * // this incremental nonlinear finite element problem. 208 * auto ls_minimization_function = [&](const double step_size) 209 * { 210 * // Scale the full Newton update by the proposed line search step size. 211 * Vector<double> newton_update_trial(newton_update); 212 * newton_update_trial *= step_size; 213 * // Ensure that the Dirichlet constraints are correctly applied, 214 * // irrespective of the step size 215 * constraints.distribute(newton_update_trial); 216 * // Now add the constribution from the previously accepted solution 217 * // history. 218 * const Vector<double> solution_total_trial = 219 * get_solution_total(newton_update_trial); 220 * 221 * // Recompute the linear system based on the trial newton update 222 * Vector<double> system_rhs (...); 223 * SparseMatrix<double> tangent_matrix (...); 224 * assemble_linear_system( 225 * tangent_matrix, system_rhs, solution_total_trial); 226 * Vector<double> residual_trial (system_rhs); 227 * residual_trial *= -1.0; // Residual = -RHS 228 * 229 * // Negelect the constrained entries in the consideration 230 * // of the function (value and gradient) to be minimized. 231 * constraints.set_zero(residual_trial); 232 * 233 * // Here we compute the function value according to the text given in 234 * // section 5.1.4 of Wriggers, P., "Nonlinear finite element methods", 235 * // 2008. 236 * // The function value correspeonds to equ. 5.11 on p159. 237 * const double f = 0.5 * (residual_trial * residual_trial); // Value 238 * 239 * // However, the corresponding gradient given in eq 5.14 is wrong. The 240 * // suggested result 241 * // const double g = -(residual_0*residual_trial); 242 * // should actually be 243 * // g = G(V + alpha*delta)*[ K(V + alpha*delta)*delta. 244 * Vector<double> tmp; 245 * tmp.reinit(newton_update); 246 * tangent_matrix.vmult(tmp, newton_update); 247 * const double g = tmp * residual_trial; // Gradient 248 * 249 * return std::make_pair(f, g); 250 * }; 251 * 252 * // Next we can write a function to determine if taking the full Newton 253 * // step is a good idea or not (i.e. if it offers good convergence 254 * // characterisics). This function calls the one we defined above, 255 * // and actually only performs the line search if an early exit 256 * // criterion is not met. 257 * auto perform_linesearch = [&]() 258 * { 259 * const auto res_0 = ls_minimization_function(0.0); 260 * Assert(res_0.second < 0.0, 261 * ExcMessage("Gradient should be negative. Current value: " + 262 * std::to_string(res_0.second))); 263 * const auto res_1 = ls_minimization_function(1.0); 264 * 265 * // Check to see if the minimum lies in the interval [0,1] through the 266 * // values of the gradients at the limit points. 267 * // If it does not, then the full step is accepted. This is discussed by 268 * // Wriggers in the paragraph after equ. 5.14. 269 * if (res_0.second * res_1.second > 0.0) 270 * return 1.0; 271 * 272 * // The values for eta, mu are chosen such that more strict convergence 273 * // conditions are enforced. 274 * // They should be adjusted according to the problem requirements. 275 * const double a1 = 1.0; 276 * const double eta = 0.5; 277 * const double mu = 0.49; 278 * const double a_max = 1.25; 279 * const double max_evals = 20; 280 * const auto res = LineMinimization::line_search<double>( 281 * ls_minimization_function, 282 * res_0.first, res_0.second, 283 * LineMinimization::poly_fit<double>, 284 * a1, eta, mu, a_max, max_evals)); 285 * 286 * return res.first; // Final stepsize 287 * }; 288 * 289 * // Finally, we can perform the line search and adjust the Newton update 290 * // accordingly. 291 * const double linesearch_step_size = perform_linesearch(); 292 * if (linesearch_step_size != 1.0) 293 * { 294 * newton_update *= linesearch_step_size; 295 * constraints.distribute(newton_update); 296 * } 297 * @endcode 298 * 299 * 300 * @param func A one dimensional function which returns value and derivative 301 * at the given point. 302 * @param f0 The function value at the origin. 303 * @param g0 The function derivative at the origin. 304 * @param interpolate A function which determines how interpolation is done 305 * during the zoom phase. It takes values and derivatives at the current 306 * interval/bracket ($f\_low$, $f\_hi$) as well as up to 5 values and 307 * derivatives at previous steps. The returned value is to be provided within 308 * the given bounds. 309 * @param a1 Initial trial step for the bracketing phase. 310 * @param eta A parameter in the second Wolfe condition (curvature condition). 311 * @param mu A parameter in the first Wolfe condition (sufficient decrease). 312 * @param a_max The maximum allowed step size. 313 * @param max_evaluations The maximum allowed number of function evaluations. 314 * @param debug_output A flag to output extra debug information into the 315 * <code>deallog</code> static object. 316 * @return The function returns the step size and the number of times function 317 * @p func was called. 318 */ 319 template <typename NumberType> 320 std::pair<NumberType, unsigned int> 321 line_search( 322 const std::function<std::pair<NumberType, NumberType>(const NumberType x)> 323 & func, 324 const NumberType f0, 325 const NumberType g0, 326 const std::function< 327 NumberType(const NumberType x_low, 328 const NumberType f_low, 329 const NumberType g_low, 330 const NumberType x_hi, 331 const NumberType f_hi, 332 const NumberType g_hi, 333 const FiniteSizeHistory<NumberType> & x_rec, 334 const FiniteSizeHistory<NumberType> & f_rec, 335 const FiniteSizeHistory<NumberType> & g_rec, 336 const std::pair<NumberType, NumberType> bounds)> &interpolate, 337 const NumberType a1, 338 const NumberType eta = 0.9, 339 const NumberType mu = 0.01, 340 const NumberType a_max = std::numeric_limits<NumberType>::max(), 341 const unsigned int max_evaluations = 20, 342 const bool debug_output = false); 343 344 345 // ------------------- inline and template functions ---------------- 346 347 348 #ifndef DOXYGEN 349 350 351 template <typename NumberType> 352 std_cxx17::optional<NumberType> quadratic_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2)353 quadratic_fit(const NumberType x1, 354 const NumberType f1, 355 const NumberType g1, 356 const NumberType x2, 357 const NumberType f2) 358 { 359 Assert(x1 != x2, ExcMessage("Point are the same")); 360 const NumberType denom = (2. * g1 * x2 - 2. * g1 * x1 - 2. * f2 + 2. * f1); 361 if (denom == 0) 362 return {}; 363 else 364 return (g1 * (x2 * x2 - x1 * x1) + 2. * (f1 - f2) * x1) / denom; 365 } 366 367 368 369 template <typename NumberType> 370 std_cxx17::optional<NumberType> cubic_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2)371 cubic_fit(const NumberType x1, 372 const NumberType f1, 373 const NumberType g1, 374 const NumberType x2, 375 const NumberType f2, 376 const NumberType g2) 377 { 378 Assert(x1 != x2, ExcMessage("Points are the same")); 379 const NumberType beta1 = g1 + g2 - 3. * (f1 - f2) / (x1 - x2); 380 const NumberType s = beta1 * beta1 - g1 * g2; 381 if (s < 0) 382 return {}; 383 384 const NumberType beta2 = std::sqrt(s); 385 const NumberType denom = 386 x1 < x2 ? g2 - g1 + 2. * beta2 : g1 - g2 + 2. * beta2; 387 if (denom == 0.) 388 return {}; 389 390 return x1 < x2 ? x2 - (x2 - x1) * (g2 + beta2 - beta1) / denom : 391 x1 - (x1 - x2) * (g1 + beta2 - beta1) / denom; 392 } 393 394 395 396 template <typename NumberType> 397 std_cxx17::optional<NumberType> cubic_fit_three_points(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType x3,const NumberType f3)398 cubic_fit_three_points(const NumberType x1, 399 const NumberType f1, 400 const NumberType g1, 401 const NumberType x2, 402 const NumberType f2, 403 const NumberType x3, 404 const NumberType f3) 405 { 406 Assert(x1 != x2, ExcMessage("Points are the same")); 407 Assert(x1 != x3, ExcMessage("Points are the same")); 408 // f(x) = A *(x-x1)^3 + B*(x-x1)^2 + C*(x-x1) + D 409 // => 410 // D = f1 411 // C = g1 412 413 // the rest is a system of 2 equations: 414 415 const NumberType x2_shift = x2 - x1; 416 const NumberType x3_shift = x3 - x1; 417 const NumberType r1 = f2 - f1 - g1 * x2_shift; 418 const NumberType r2 = f3 - f1 - g1 * x3_shift; 419 const NumberType denom = 420 std::pow(x2_shift * x3_shift, 2) * (x2_shift - x3_shift); 421 if (denom == 0.) 422 return {}; 423 424 const NumberType A = 425 (r1 * std::pow(x3_shift, 2) - r2 * std::pow(x2_shift, 2)) / denom; 426 const NumberType B = 427 (r2 * std::pow(x2_shift, 3) - r1 * std::pow(x3_shift, 3)) / denom; 428 const NumberType &C = g1; 429 430 // now get the minimizer: 431 const NumberType radical = B * B - A * C * 3; 432 if (radical < 0) 433 return {}; 434 435 return x1 + (-B + std::sqrt(radical)) / (A * 3); 436 } 437 438 439 440 template <typename NumberType> 441 NumberType poly_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2,const FiniteSizeHistory<NumberType> &,const FiniteSizeHistory<NumberType> &,const FiniteSizeHistory<NumberType> &,const std::pair<NumberType,NumberType> bounds)442 poly_fit(const NumberType x1, 443 const NumberType f1, 444 const NumberType g1, 445 const NumberType x2, 446 const NumberType f2, 447 const NumberType g2, 448 const FiniteSizeHistory<NumberType> &, 449 const FiniteSizeHistory<NumberType> &, 450 const FiniteSizeHistory<NumberType> &, 451 const std::pair<NumberType, NumberType> bounds) 452 { 453 Assert(bounds.first < bounds.second, ExcMessage("Incorrect bounds")); 454 455 // Similar to scipy implementation but we fit based on two points 456 // with their gradients and do bisection on bounds. 457 // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L555-L563 458 459 // First try cubic interpolation 460 std_cxx17::optional<NumberType> res = cubic_fit(x1, f1, g1, x2, f2, g2); 461 if (res && *res >= bounds.first && *res <= bounds.second) 462 return *res; 463 464 // cubic either fails or outside of safe region, do quadratic: 465 res = quadratic_fit(x1, f1, g1, x2, f2); 466 if (res && *res >= bounds.first && *res <= bounds.second) 467 return *res; 468 469 // quadratic either failed or outside of safe region. Do bisection 470 // on safe region 471 return (bounds.first + bounds.second) * 0.5; 472 } 473 474 475 476 template <typename NumberType> 477 NumberType poly_fit_three_points(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2,const FiniteSizeHistory<NumberType> & x_rec,const FiniteSizeHistory<NumberType> & f_rec,const FiniteSizeHistory<NumberType> &,const std::pair<NumberType,NumberType> bounds)478 poly_fit_three_points(const NumberType x1, 479 const NumberType f1, 480 const NumberType g1, 481 const NumberType x2, 482 const NumberType f2, 483 const NumberType g2, 484 const FiniteSizeHistory<NumberType> &x_rec, 485 const FiniteSizeHistory<NumberType> &f_rec, 486 const FiniteSizeHistory<NumberType> & /*g_rec*/, 487 const std::pair<NumberType, NumberType> bounds) 488 { 489 Assert(bounds.first < bounds.second, ExcMessage("Incorrect bounds")); 490 AssertDimension(x_rec.size(), f_rec.size()); 491 492 // Same as scipy implementation where cubic fit is using 3 points 493 // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L555-L563 494 495 // First try cubic interpolation after first iteration 496 std_cxx17::optional<NumberType> res = 497 x_rec.size() > 0 ? 498 cubic_fit_three_points(x1, f1, g1, x2, f2, x_rec[0], f_rec[0]) : 499 std_cxx17::optional<NumberType>{}; 500 if (res && *res >= bounds.first && *res <= bounds.second) 501 return *res; 502 503 // cubic either fails or outside of safe region, do quadratic: 504 res = quadratic_fit(x1, f1, g1, x2, f2); 505 if (res && *res >= bounds.first && *res <= bounds.second) 506 return *res; 507 508 // quadratic either failed or outside of safe region. Do bisection 509 // on safe region 510 return (bounds.first + bounds.second) * 0.5; 511 } 512 513 514 515 template <typename NumberType> 516 std::pair<NumberType, unsigned int> line_search(const std::function<std::pair<NumberType,NumberType> (const NumberType x)> & func,const NumberType f0,const NumberType g0,const std::function<NumberType (const NumberType x_low,const NumberType f_low,const NumberType g_low,const NumberType x_hi,const NumberType f_hi,const NumberType g_hi,const FiniteSizeHistory<NumberType> & x_rec,const FiniteSizeHistory<NumberType> & f_rec,const FiniteSizeHistory<NumberType> & g_rec,const std::pair<NumberType,NumberType> bounds)> & choose,const NumberType a1,const NumberType eta,const NumberType mu,const NumberType a_max,const unsigned int max_evaluations,const bool debug_output)517 line_search( 518 const std::function<std::pair<NumberType, NumberType>(const NumberType x)> 519 & func, 520 const NumberType f0, 521 const NumberType g0, 522 const std::function< 523 NumberType(const NumberType x_low, 524 const NumberType f_low, 525 const NumberType g_low, 526 const NumberType x_hi, 527 const NumberType f_hi, 528 const NumberType g_hi, 529 const FiniteSizeHistory<NumberType> & x_rec, 530 const FiniteSizeHistory<NumberType> & f_rec, 531 const FiniteSizeHistory<NumberType> & g_rec, 532 const std::pair<NumberType, NumberType> bounds)> &choose, 533 const NumberType a1, 534 const NumberType eta, 535 const NumberType mu, 536 const NumberType a_max, 537 const unsigned int max_evaluations, 538 const bool debug_output) 539 { 540 // Note that scipy use dcsrch() from Minpack2 Fortran lib for line search 541 Assert(mu < 0.5 && mu > 0, ExcMessage("mu is not in (0,1/2).")); 542 Assert(eta < 1. && eta > mu, ExcMessage("eta is not in (mu,1).")); 543 Assert(a_max > 0, ExcMessage("max is not positive.")); 544 Assert(a1 > 0 && a1 <= a_max, ExcMessage("a1 is not in (0,max].")); 545 Assert(g0 < 0, ExcMessage("Initial slope is not negative")); 546 547 // Growth parameter for bracketing phase: 548 // 1 < tau1 549 const NumberType tau1 = 9.; 550 // shrink parameters for sectioning phase to prevent ai from being 551 // arbitrary close to the extremes of the interval. 552 // 0 < tau2 < tau3 <= 1/2 553 // tau2 <= eta is advisable 554 const NumberType tau2 = 0.1; // bound for closeness to a_lo 555 const NumberType tau3 = 0.5; // bound for closeness to a_hi 556 557 const NumberType g0_abs = std::abs(g0); 558 const NumberType f_min = f0 + a_max * mu * g0; 559 560 // return True if the first Wolfe condition (sufficient decrease) is 561 // satisfied 562 const auto w1 = [&](const NumberType a, const NumberType f) { 563 return f <= f0 + a * mu * g0; 564 }; 565 566 // return True if the second Wolfe condition (curvature condition) is 567 // satisfied 568 const auto w2 = [&](const NumberType g) { 569 return std::abs(g) <= eta * g0_abs; 570 }; 571 572 // Bracketing phase (Algorithm 2.6.2): look for a non-trivial interval 573 // which is known to contain an interval of acceptable points. 574 // We adopt notation of Noceal. 575 const NumberType x = std::numeric_limits<NumberType>::signaling_NaN(); 576 NumberType a_lo = x, f_lo = x, g_lo = x; 577 NumberType a_hi = x, f_hi = x, g_hi = x; 578 NumberType ai = x, fi = x, gi = x; 579 580 // count function calls in i: 581 unsigned int i = 0; 582 { 583 NumberType f_prev, g_prev, a_prev; 584 ai = a1; 585 f_prev = f0; 586 g_prev = g0; 587 a_prev = 0; 588 589 while (i < max_evaluations) 590 { 591 const auto fgi = func(ai); 592 fi = fgi.first; 593 gi = fgi.second; 594 i++; 595 596 if (debug_output) 597 deallog << "Bracketing phase: " << i << std::endl 598 << ai << " " << fi << " " << gi << " " << w1(ai, fi) << " " 599 << w2(gi) << " " << f_min << std::endl; 600 601 // first check if we can stop bracketing or the whole line search: 602 if (fi <= f_min || ai == a_max) 603 { 604 if (debug_output) 605 deallog << "Reached the maximum step size." << std::endl; 606 return std::make_pair(ai, i); 607 } 608 609 if (!w1(ai, fi) || 610 (fi >= f_prev && i > 1)) // violate first Wolfe or not descending 611 { 612 a_lo = a_prev; 613 f_lo = f_prev; 614 g_lo = g_prev; 615 616 a_hi = ai; 617 f_hi = fi; 618 g_hi = gi; 619 break; // end bracketing 620 } 621 622 if (w2(gi)) // satisfies both Wolfe conditions 623 { 624 if (debug_output) 625 deallog << "Satisfied both Wolfe conditions during Bracketing." 626 << std::endl; 627 628 Assert(w1(ai, fi), ExcInternalError()); 629 return std::make_pair(ai, i); 630 } 631 632 if (gi >= 0) // not descending 633 { 634 a_lo = ai; 635 f_lo = fi; 636 g_lo = gi; 637 638 a_hi = a_prev; 639 f_hi = f_prev; 640 g_hi = g_prev; 641 break; // end bracketing 642 } 643 644 // extrapolation step with the bounds 645 const auto bounds = 646 std::make_pair(2. * ai - a_prev, 647 std::min(a_max, ai + tau1 * (ai - a_prev))); 648 649 a_prev = ai; 650 f_prev = fi; 651 g_prev = gi; 652 653 // NOTE: Fletcher's 2.6.2 includes optional extrapolation, we 654 // simply take the upper bound 655 // Scipy increases by factor of two: 656 // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L447 657 ai = bounds.second; 658 } 659 } 660 661 AssertThrow( 662 i < max_evaluations, 663 ExcMessage( 664 "Could not find the initial bracket within the given number of iterations.")); 665 666 // Check properties of the bracket (Theorem 3.2 in More and Thuente, 94 667 // and Eq. 2.6.3 in Fletcher 2013 668 669 // FIXME: these conditions are actually violated for Fig3 and a1=10^3 in 670 // More and Thorenton, 94. 671 672 /* 673 Assert((f_lo < f_hi) && w1(a_lo, f_lo), ExcInternalError()); 674 Assert(((a_hi - a_lo) * g_lo < 0) && !w2(g_lo), ExcInternalError()); 675 Assert((w1(a_hi, f_hi) || f_hi >= f_lo), ExcInternalError()); 676 */ 677 678 // keep short history of last points to improve interpolation 679 FiniteSizeHistory<NumberType> a_rec(5), f_rec(5), g_rec(5); 680 // if neither a_lo nor a_hi are zero: 681 if (std::abs(a_lo) > std::numeric_limits<NumberType>::epsilon() && 682 std::abs(a_hi) > std::numeric_limits<NumberType>::epsilon()) 683 { 684 a_rec.add(0); 685 f_rec.add(f0); 686 g_rec.add(g0); 687 } 688 689 // Now sectioning phase: we allow both [a_lo, a_hi] and [a_hi, a_lo] 690 while (i < max_evaluations) 691 { 692 const NumberType a_lo_safe = a_lo + tau2 * (a_hi - a_lo); 693 const NumberType a_hi_safe = a_hi - tau3 * (a_hi - a_lo); 694 const auto bounds = std::minmax(a_lo_safe, a_hi_safe); 695 696 ai = choose( 697 a_lo, f_lo, g_lo, a_hi, f_hi, g_hi, a_rec, f_rec, g_rec, bounds); 698 699 const std::pair<NumberType, NumberType> fgi = func(ai); 700 fi = fgi.first; 701 gi = fgi.second; 702 i++; 703 704 if (debug_output) 705 deallog << "Sectioning phase: " << i << std::endl 706 << a_lo << " " << f_lo << " " << g_lo << " " << w1(a_lo, f_lo) 707 << " " << w2(g_lo) << std::endl 708 << a_hi << " " << f_hi << " " << g_hi << " " << w1(a_hi, f_hi) 709 << " " << w2(g_hi) << std::endl 710 << ai << " " << fi << " " << gi << " " << w1(ai, fi) << " " 711 << w2(gi) << std::endl; 712 713 if (!w1(ai, fi) || fi >= f_lo) 714 // take [a_lo, ai] 715 { 716 a_rec.add(a_hi); 717 f_rec.add(f_hi); 718 g_rec.add(g_hi); 719 720 a_hi = ai; 721 f_hi = fi; 722 g_hi = gi; 723 } 724 else 725 { 726 if (w2(gi)) // satisfies both wolf 727 { 728 if (debug_output) 729 deallog << "Satisfied both Wolfe conditions." << std::endl; 730 Assert(w1(ai, fi), ExcInternalError()); 731 return std::make_pair(ai, i); 732 } 733 734 if (gi * (a_hi - a_lo) >= 0) 735 // take [ai, a_lo] 736 { 737 a_rec.add(a_hi); 738 f_rec.add(f_hi); 739 g_rec.add(g_hi); 740 741 a_hi = a_lo; 742 f_hi = f_lo; 743 g_hi = g_lo; 744 } 745 else 746 // take [ai, a_hi] 747 { 748 a_rec.add(a_lo); 749 f_rec.add(f_lo); 750 g_rec.add(g_lo); 751 } 752 753 a_lo = ai; 754 f_lo = fi; 755 g_lo = gi; 756 } 757 } 758 759 // if we got here, we could not find the solution 760 AssertThrow( 761 false, 762 ExcMessage( 763 "Could not could complete the sectioning phase within the given number of iterations.")); 764 return std::make_pair(std::numeric_limits<NumberType>::signaling_NaN(), i); 765 } 766 767 #endif // DOXYGEN 768 769 } // namespace LineMinimization 770 771 DEAL_II_NAMESPACE_CLOSE 772 773 #endif // dealii_line_minimization_h 774