1 //-----------------------------------------------------------
2 //
3 //    Copyright (C) 2018 - 2020 by the deal.II authors
4 //
5 //    This file is part of the deal.II library.
6 //
7 //    The deal.II library is free software; you can use it, redistribute
8 //    it, and/or modify it under the terms of the GNU Lesser General
9 //    Public License as published by the Free Software Foundation; either
10 //    version 2.1 of the License, or (at your option) any later version.
11 //    The full text of the license can be found in the file LICENSE.md at
12 //    the top level directory of deal.II.
13 //
14 //---------------------------------------------------------------
15 
16 #ifndef dealii_line_minimization_h
17 #define dealii_line_minimization_h
18 
19 #include <deal.II/base/config.h>
20 
21 #include <deal.II/base/exceptions.h>
22 #include <deal.II/base/logstream.h>
23 #include <deal.II/base/numbers.h>
24 #include <deal.II/base/std_cxx17/optional.h>
25 #include <deal.II/base/utilities.h>
26 
27 #include <deal.II/numerics/history.h>
28 
29 #include <fstream>
30 #include <string>
31 
32 
33 DEAL_II_NAMESPACE_OPEN
34 
35 /**
36  * A namespace for various algorithms related to minimization a over line.
37  */
38 namespace LineMinimization
39 {
40   /**
41    * Given $x\_low$ and $x\_hi$ together with values of function
42    * $f(x\_low)$ and $f(x\_hi)$ and the gradient $g(x\_low)$, return the local
43    * minimizer of the quadratic interpolation function.
44    *
45    * The return type is optional to fit with similar functions that may
46    * not have a solution for given parameters.
47    */
48   template <typename NumberType>
49   std_cxx17::optional<NumberType>
50   quadratic_fit(const NumberType x_low,
51                 const NumberType f_low,
52                 const NumberType g_low,
53                 const NumberType x_hi,
54                 const NumberType f_hi);
55 
56   /**
57    * Given $x\_low$ and $x\_hi$ together with values of function
58    * $f(x\_low)$ and $f(x\_hi)$ and its gradients ($g(x\_low)*g(x\_hi) < 0$) at
59    * those points, return the local minimizer of the cubic interpolation
60    * function (that is, the location where the cubic interpolation function
61    * attains its minimum value).
62    *
63    * The return type is optional as the real-valued solution might not exist.
64    */
65   template <typename NumberType>
66   std_cxx17::optional<NumberType>
67   cubic_fit(const NumberType x_low,
68             const NumberType f_low,
69             const NumberType g_low,
70             const NumberType x_hi,
71             const NumberType f_hi,
72             const NumberType g_hi);
73 
74   /**
75    * Find the minimizer of a cubic polynomial that goes through the
76    * points $f\_low=f(x\_low)$, $f\_hi=f(x\_hi)$ and $f\_rec(x\_rec)$
77    * and has derivatve $g\_low$ at $x\_low$.
78    *
79    * The return type is optional as the real-valued solution might not exist.
80    */
81   template <typename NumberType>
82   std_cxx17::optional<NumberType>
83   cubic_fit_three_points(const NumberType x_low,
84                          const NumberType f_low,
85                          const NumberType g_low,
86                          const NumberType x_hi,
87                          const NumberType f_hi,
88                          const NumberType x_rec,
89                          const NumberType f_rec);
90 
91   /**
92    * Return the minimizer of a polynomial using function values @p f_low ,
93    * @p f_hi , and @p f_rec[0] at three points @p x_low , @p x_hi , and
94    * @p x_rec[0] as well as the derivatives at two points @p g_low and @p g_hi.
95    * The returned point should be within the bounds @p bounds .
96    *
97    * This function will first try to perform a cubic_fit(). If its unsuccessful,
98    * or if the minimum is not within the provided @p bounds, a quadratic_fit()
99    * will be performed. The function will fallback to a bisection method if
100    * quadratic_fit() fails as well.
101    */
102   template <typename NumberType>
103   NumberType
104   poly_fit(const NumberType                        x_low,
105            const NumberType                        f_low,
106            const NumberType                        g_low,
107            const NumberType                        x_hi,
108            const NumberType                        f_hi,
109            const NumberType                        g_hi,
110            const FiniteSizeHistory<NumberType> &   x_rec,
111            const FiniteSizeHistory<NumberType> &   f_rec,
112            const FiniteSizeHistory<NumberType> &   g_rec,
113            const std::pair<NumberType, NumberType> bounds);
114 
115   /**
116    * Same as poly_fit(), but performing a cubic fit with three points (see
117    * cubic_fit_three_points() ).
118    */
119   template <typename NumberType>
120   NumberType
121   poly_fit_three_points(const NumberType                        x_low,
122                         const NumberType                        f_low,
123                         const NumberType                        g_low,
124                         const NumberType                        x_hi,
125                         const NumberType                        f_hi,
126                         const NumberType                        g_hi,
127                         const FiniteSizeHistory<NumberType> &   x_rec,
128                         const FiniteSizeHistory<NumberType> &   f_rec,
129                         const FiniteSizeHistory<NumberType> &   g_rec,
130                         const std::pair<NumberType, NumberType> bounds);
131 
132 
133   /**
134    * Perform a line search in $(0,max]$ with strong Wolfe conditions
135    * \f[
136    * f(\alpha) \le f(0) + \alpha \mu f'(0) \\
137    * |f'(\alpha)| \le \eta |f'(0)|
138    * \f]
139    * using the one dimensional function @p func in conjunction with a function @p interpolate
140    * to choose a new point from the interval based on the function values and
141    * derivatives at its ends.
142    * The parameter @p a1 is a trial estimate of the first step.
143    * Interpolation can be done using either poly_fit() or
144    * poly_fit_three_points(), or any other function that has a similar
145    * signature.
146    *
147    * The function implements Algorithms 2.6.2 and 2.6.4 on pages 34-35 in
148    * @code{.bib}
149    *   @book{Fletcher2013,
150    *   title     = {Practical methods of optimization},
151    *   publisher = {John Wiley \& Sons},
152    *   year      = {2013},
153    *   author    = {Fletcher, Roger},
154    *   isbn      = {978-0-471-49463-8},
155    *   doi       = {10.1002/9781118723203},
156    *   }
157    * @endcode
158    * These are minor variations of  Algorithms 3.5 and 3.6 on pages 60-61 in
159    * @code{.bib}
160    *   @book{Nocedal2006,
161    *   title     = {Numerical Optimization},
162    *   publisher = {Springer New York},
163    *   year      = {2006},
164    *   author    = {Jorge Nocedal and S. Wright},
165    *   address   = {233 Spring Street, New York, NY 10013, USA},
166    *   doi       = {10.1007/978-0-387-40065-5},
167    *   }
168    * @endcode
169    * It consists of a bracketing phase and a zoom phase, where @p interpolate is used.
170    *
171    * Two examples of use might be as follows:
172    * In the first example, we wish to find the minimum of the function
173    * $100 * x^4 + (1-x)^2$. To find the approximate solution using line search
174    * with a polynomial fit to the curve one would perform the following steps:
175    *
176    * @code
177    *   auto func = [](const double x)
178    *   {
179    *     const double f = 100. * std::pow(x, 4) + std::pow(1. - x, 2); // Value
180    *     const double g = 400. * std::pow(x, 3) - 2. * (1. - x); // Gradient
181    *     return std::make_pair(f, g);
182    *   };
183    *
184    *   const auto fg0 = func(0);
185    *   const auto res = LineMinimization::line_search<double>(
186    *     func,
187    *     fg0.first, fg0.second,
188    *     LineMinimization::poly_fit<double>,
189    *     0.1, 0.1, 0.01, 100, 20);
190    *
191    *   const double approx_solution = res.first;
192    * @endcode
193    *
194    * In the second example, we wish to perform line search in the context of a
195    * non-linear finite element problem. What follows below is a non-optimized
196    * implementation of the back-tracking algorithm, which may be useful when
197    * the load-step size is too large. The following illustrates the basic steps
198    * necessary to utilize the scheme within the context of a global nonlinear
199    * solver:
200    *
201    * @code
202    *   // Solve some incremental linear system
203    *   const Vector<double> newton_update = solver_linear_system(...);
204    *
205    *   // Now we check to see if the suggested Newton update is a good one.
206    *   // First we define what it means to perform linesearch in the context of
207    *   // this incremental nonlinear finite element problem.
208    *   auto ls_minimization_function = [&](const double step_size)
209    *   {
210    *     // Scale the full Newton update by the proposed line search step size.
211    *     Vector<double> newton_update_trial(newton_update);
212    *     newton_update_trial *= step_size;
213    *     // Ensure that the Dirichlet constraints are correctly applied,
214    *     // irrespective of the step size
215    *     constraints.distribute(newton_update_trial);
216    *     // Now add the constribution from the previously accepted solution
217    *     // history.
218    *     const Vector<double> solution_total_trial =
219    *       get_solution_total(newton_update_trial);
220    *
221    *     // Recompute the linear system based on the trial newton update
222    *     Vector<double> system_rhs (...);
223    *     SparseMatrix<double> tangent_matrix (...);
224    *     assemble_linear_system(
225    *       tangent_matrix, system_rhs, solution_total_trial);
226    *     Vector<double> residual_trial (system_rhs);
227    *     residual_trial *= -1.0; // Residual = -RHS
228    *
229    *     // Negelect the constrained entries in the consideration
230    *     // of the function (value and gradient) to be minimized.
231    *     constraints.set_zero(residual_trial);
232    *
233    *     // Here we compute the function value according to the text given in
234    *     // section 5.1.4 of Wriggers, P., "Nonlinear finite element methods",
235    *     // 2008.
236    *     // The function value correspeonds to equ. 5.11 on p159.
237    *     const double f = 0.5 * (residual_trial * residual_trial); // Value
238    *
239    *     // However, the corresponding gradient given in eq 5.14 is wrong. The
240    *     // suggested result
241    *     // const double g = -(residual_0*residual_trial);
242    *     // should actually be
243    *     // g = G(V + alpha*delta)*[ K(V + alpha*delta)*delta.
244    *     Vector<double> tmp;
245    *     tmp.reinit(newton_update);
246    *     tangent_matrix.vmult(tmp, newton_update);
247    *     const double g = tmp * residual_trial; // Gradient
248    *
249    *     return std::make_pair(f, g);
250    *   };
251    *
252    *   // Next we can write a function to determine if taking the full Newton
253    *   // step is a good idea or not (i.e. if it offers good convergence
254    *   // characterisics). This function calls the one we defined above,
255    *   // and actually only performs the line search if an early exit
256    *   // criterion is not met.
257    *   auto perform_linesearch = [&]()
258    *   {
259    *     const auto res_0 = ls_minimization_function(0.0);
260    *     Assert(res_0.second < 0.0,
261    *            ExcMessage("Gradient should be negative. Current value: " +
262    *                        std::to_string(res_0.second)));
263    *     const auto res_1 = ls_minimization_function(1.0);
264    *
265    *     // Check to see if the minimum lies in the interval [0,1] through the
266    *     // values of the gradients at the limit points.
267    *     // If it does not, then the full step is accepted. This is discussed by
268    *     // Wriggers in the paragraph after equ. 5.14.
269    *     if (res_0.second * res_1.second > 0.0)
270    *       return 1.0;
271    *
272    *     // The values for eta, mu are chosen such that more strict convergence
273    *     // conditions are enforced.
274    *     // They should be adjusted according to the problem requirements.
275    *     const double a1        = 1.0;
276    *     const double eta       = 0.5;
277    *     const double mu        = 0.49;
278    *     const double a_max     = 1.25;
279    *     const double max_evals = 20;
280    *     const auto   res = LineMinimization::line_search<double>(
281    *       ls_minimization_function,
282    *       res_0.first, res_0.second,
283    *       LineMinimization::poly_fit<double>,
284    *       a1, eta, mu, a_max, max_evals));
285    *
286    *     return res.first; // Final stepsize
287    *   };
288    *
289    *   // Finally, we can perform the line search and adjust the Newton update
290    *   // accordingly.
291    *   const double linesearch_step_size = perform_linesearch();
292    *   if (linesearch_step_size != 1.0)
293    *   {
294    *     newton_update *= linesearch_step_size;
295    *     constraints.distribute(newton_update);
296    *   }
297    * @endcode
298    *
299    *
300    * @param func A one dimensional function which returns value and derivative
301    * at the given point.
302    * @param f0 The function value at the origin.
303    * @param g0 The function derivative at the origin.
304    * @param interpolate A function which determines how interpolation is done
305    * during the zoom phase. It takes values and derivatives at the current
306    * interval/bracket ($f\_low$, $f\_hi$) as well as up to 5 values and
307    * derivatives at previous steps. The returned value is to be provided within
308    * the given bounds.
309    * @param a1 Initial trial step for the bracketing phase.
310    * @param eta A parameter in the second Wolfe condition (curvature condition).
311    * @param mu A parameter in the first Wolfe condition (sufficient decrease).
312    * @param a_max The maximum allowed step size.
313    * @param max_evaluations The maximum allowed number of function evaluations.
314    * @param debug_output A flag to output extra debug information into the
315    * <code>deallog</code> static object.
316    * @return The function returns the step size and the number of times function
317    * @p func was called.
318    */
319   template <typename NumberType>
320   std::pair<NumberType, unsigned int>
321   line_search(
322     const std::function<std::pair<NumberType, NumberType>(const NumberType x)>
323       &              func,
324     const NumberType f0,
325     const NumberType g0,
326     const std::function<
327       NumberType(const NumberType                        x_low,
328                  const NumberType                        f_low,
329                  const NumberType                        g_low,
330                  const NumberType                        x_hi,
331                  const NumberType                        f_hi,
332                  const NumberType                        g_hi,
333                  const FiniteSizeHistory<NumberType> &   x_rec,
334                  const FiniteSizeHistory<NumberType> &   f_rec,
335                  const FiniteSizeHistory<NumberType> &   g_rec,
336                  const std::pair<NumberType, NumberType> bounds)> &interpolate,
337     const NumberType                                               a1,
338     const NumberType                                               eta = 0.9,
339     const NumberType                                               mu  = 0.01,
340     const NumberType   a_max           = std::numeric_limits<NumberType>::max(),
341     const unsigned int max_evaluations = 20,
342     const bool         debug_output    = false);
343 
344 
345   // -------------------  inline and template functions ----------------
346 
347 
348 #ifndef DOXYGEN
349 
350 
351   template <typename NumberType>
352   std_cxx17::optional<NumberType>
quadratic_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2)353   quadratic_fit(const NumberType x1,
354                 const NumberType f1,
355                 const NumberType g1,
356                 const NumberType x2,
357                 const NumberType f2)
358   {
359     Assert(x1 != x2, ExcMessage("Point are the same"));
360     const NumberType denom = (2. * g1 * x2 - 2. * g1 * x1 - 2. * f2 + 2. * f1);
361     if (denom == 0)
362       return {};
363     else
364       return (g1 * (x2 * x2 - x1 * x1) + 2. * (f1 - f2) * x1) / denom;
365   }
366 
367 
368 
369   template <typename NumberType>
370   std_cxx17::optional<NumberType>
cubic_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2)371   cubic_fit(const NumberType x1,
372             const NumberType f1,
373             const NumberType g1,
374             const NumberType x2,
375             const NumberType f2,
376             const NumberType g2)
377   {
378     Assert(x1 != x2, ExcMessage("Points are the same"));
379     const NumberType beta1 = g1 + g2 - 3. * (f1 - f2) / (x1 - x2);
380     const NumberType s     = beta1 * beta1 - g1 * g2;
381     if (s < 0)
382       return {};
383 
384     const NumberType beta2 = std::sqrt(s);
385     const NumberType denom =
386       x1 < x2 ? g2 - g1 + 2. * beta2 : g1 - g2 + 2. * beta2;
387     if (denom == 0.)
388       return {};
389 
390     return x1 < x2 ? x2 - (x2 - x1) * (g2 + beta2 - beta1) / denom :
391                      x1 - (x1 - x2) * (g1 + beta2 - beta1) / denom;
392   }
393 
394 
395 
396   template <typename NumberType>
397   std_cxx17::optional<NumberType>
cubic_fit_three_points(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType x3,const NumberType f3)398   cubic_fit_three_points(const NumberType x1,
399                          const NumberType f1,
400                          const NumberType g1,
401                          const NumberType x2,
402                          const NumberType f2,
403                          const NumberType x3,
404                          const NumberType f3)
405   {
406     Assert(x1 != x2, ExcMessage("Points are the same"));
407     Assert(x1 != x3, ExcMessage("Points are the same"));
408     // f(x) = A *(x-x1)^3 + B*(x-x1)^2 + C*(x-x1) + D
409     // =>
410     // D = f1
411     // C = g1
412 
413     // the rest is a system of 2 equations:
414 
415     const NumberType x2_shift = x2 - x1;
416     const NumberType x3_shift = x3 - x1;
417     const NumberType r1       = f2 - f1 - g1 * x2_shift;
418     const NumberType r2       = f3 - f1 - g1 * x3_shift;
419     const NumberType denom =
420       std::pow(x2_shift * x3_shift, 2) * (x2_shift - x3_shift);
421     if (denom == 0.)
422       return {};
423 
424     const NumberType A =
425       (r1 * std::pow(x3_shift, 2) - r2 * std::pow(x2_shift, 2)) / denom;
426     const NumberType B =
427       (r2 * std::pow(x2_shift, 3) - r1 * std::pow(x3_shift, 3)) / denom;
428     const NumberType &C = g1;
429 
430     // now get the minimizer:
431     const NumberType radical = B * B - A * C * 3;
432     if (radical < 0)
433       return {};
434 
435     return x1 + (-B + std::sqrt(radical)) / (A * 3);
436   }
437 
438 
439 
440   template <typename NumberType>
441   NumberType
poly_fit(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2,const FiniteSizeHistory<NumberType> &,const FiniteSizeHistory<NumberType> &,const FiniteSizeHistory<NumberType> &,const std::pair<NumberType,NumberType> bounds)442   poly_fit(const NumberType x1,
443            const NumberType f1,
444            const NumberType g1,
445            const NumberType x2,
446            const NumberType f2,
447            const NumberType g2,
448            const FiniteSizeHistory<NumberType> &,
449            const FiniteSizeHistory<NumberType> &,
450            const FiniteSizeHistory<NumberType> &,
451            const std::pair<NumberType, NumberType> bounds)
452   {
453     Assert(bounds.first < bounds.second, ExcMessage("Incorrect bounds"));
454 
455     // Similar to scipy implementation but we fit based on two points
456     // with their gradients and do bisection on bounds.
457     // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L555-L563
458 
459     // First try cubic interpolation
460     std_cxx17::optional<NumberType> res = cubic_fit(x1, f1, g1, x2, f2, g2);
461     if (res && *res >= bounds.first && *res <= bounds.second)
462       return *res;
463 
464     // cubic either fails or outside of safe region, do quadratic:
465     res = quadratic_fit(x1, f1, g1, x2, f2);
466     if (res && *res >= bounds.first && *res <= bounds.second)
467       return *res;
468 
469     // quadratic either failed or outside of safe region. Do bisection
470     // on safe region
471     return (bounds.first + bounds.second) * 0.5;
472   }
473 
474 
475 
476   template <typename NumberType>
477   NumberType
poly_fit_three_points(const NumberType x1,const NumberType f1,const NumberType g1,const NumberType x2,const NumberType f2,const NumberType g2,const FiniteSizeHistory<NumberType> & x_rec,const FiniteSizeHistory<NumberType> & f_rec,const FiniteSizeHistory<NumberType> &,const std::pair<NumberType,NumberType> bounds)478   poly_fit_three_points(const NumberType                     x1,
479                         const NumberType                     f1,
480                         const NumberType                     g1,
481                         const NumberType                     x2,
482                         const NumberType                     f2,
483                         const NumberType                     g2,
484                         const FiniteSizeHistory<NumberType> &x_rec,
485                         const FiniteSizeHistory<NumberType> &f_rec,
486                         const FiniteSizeHistory<NumberType> & /*g_rec*/,
487                         const std::pair<NumberType, NumberType> bounds)
488   {
489     Assert(bounds.first < bounds.second, ExcMessage("Incorrect bounds"));
490     AssertDimension(x_rec.size(), f_rec.size());
491 
492     // Same as scipy implementation where cubic fit is using 3 points
493     // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L555-L563
494 
495     // First try cubic interpolation after first iteration
496     std_cxx17::optional<NumberType> res =
497       x_rec.size() > 0 ?
498         cubic_fit_three_points(x1, f1, g1, x2, f2, x_rec[0], f_rec[0]) :
499         std_cxx17::optional<NumberType>{};
500     if (res && *res >= bounds.first && *res <= bounds.second)
501       return *res;
502 
503     // cubic either fails or outside of safe region, do quadratic:
504     res = quadratic_fit(x1, f1, g1, x2, f2);
505     if (res && *res >= bounds.first && *res <= bounds.second)
506       return *res;
507 
508     // quadratic either failed or outside of safe region. Do bisection
509     // on safe region
510     return (bounds.first + bounds.second) * 0.5;
511   }
512 
513 
514 
515   template <typename NumberType>
516   std::pair<NumberType, unsigned int>
line_search(const std::function<std::pair<NumberType,NumberType> (const NumberType x)> & func,const NumberType f0,const NumberType g0,const std::function<NumberType (const NumberType x_low,const NumberType f_low,const NumberType g_low,const NumberType x_hi,const NumberType f_hi,const NumberType g_hi,const FiniteSizeHistory<NumberType> & x_rec,const FiniteSizeHistory<NumberType> & f_rec,const FiniteSizeHistory<NumberType> & g_rec,const std::pair<NumberType,NumberType> bounds)> & choose,const NumberType a1,const NumberType eta,const NumberType mu,const NumberType a_max,const unsigned int max_evaluations,const bool debug_output)517   line_search(
518     const std::function<std::pair<NumberType, NumberType>(const NumberType x)>
519       &              func,
520     const NumberType f0,
521     const NumberType g0,
522     const std::function<
523       NumberType(const NumberType                        x_low,
524                  const NumberType                        f_low,
525                  const NumberType                        g_low,
526                  const NumberType                        x_hi,
527                  const NumberType                        f_hi,
528                  const NumberType                        g_hi,
529                  const FiniteSizeHistory<NumberType> &   x_rec,
530                  const FiniteSizeHistory<NumberType> &   f_rec,
531                  const FiniteSizeHistory<NumberType> &   g_rec,
532                  const std::pair<NumberType, NumberType> bounds)> &choose,
533     const NumberType                                               a1,
534     const NumberType                                               eta,
535     const NumberType                                               mu,
536     const NumberType                                               a_max,
537     const unsigned int max_evaluations,
538     const bool         debug_output)
539   {
540     // Note that scipy use dcsrch() from Minpack2 Fortran lib for line search
541     Assert(mu < 0.5 && mu > 0, ExcMessage("mu is not in (0,1/2)."));
542     Assert(eta < 1. && eta > mu, ExcMessage("eta is not in (mu,1)."));
543     Assert(a_max > 0, ExcMessage("max is not positive."));
544     Assert(a1 > 0 && a1 <= a_max, ExcMessage("a1 is not in (0,max]."));
545     Assert(g0 < 0, ExcMessage("Initial slope is not negative"));
546 
547     // Growth parameter for bracketing phase:
548     // 1 < tau1
549     const NumberType tau1 = 9.;
550     // shrink parameters for sectioning phase to prevent ai from being
551     // arbitrary close to the extremes of the interval.
552     // 0 < tau2 < tau3 <= 1/2
553     // tau2 <= eta is advisable
554     const NumberType tau2 = 0.1; // bound for closeness to a_lo
555     const NumberType tau3 = 0.5; // bound for closeness to a_hi
556 
557     const NumberType g0_abs = std::abs(g0);
558     const NumberType f_min  = f0 + a_max * mu * g0;
559 
560     // return True if the first Wolfe condition (sufficient decrease) is
561     // satisfied
562     const auto w1 = [&](const NumberType a, const NumberType f) {
563       return f <= f0 + a * mu * g0;
564     };
565 
566     // return True if the second Wolfe condition (curvature condition) is
567     // satisfied
568     const auto w2 = [&](const NumberType g) {
569       return std::abs(g) <= eta * g0_abs;
570     };
571 
572     // Bracketing phase (Algorithm 2.6.2): look for a non-trivial interval
573     // which is known to contain an interval of acceptable points.
574     // We adopt notation of Noceal.
575     const NumberType x    = std::numeric_limits<NumberType>::signaling_NaN();
576     NumberType       a_lo = x, f_lo = x, g_lo = x;
577     NumberType       a_hi = x, f_hi = x, g_hi = x;
578     NumberType       ai = x, fi = x, gi = x;
579 
580     // count function calls in i:
581     unsigned int i = 0;
582     {
583       NumberType f_prev, g_prev, a_prev;
584       ai     = a1;
585       f_prev = f0;
586       g_prev = g0;
587       a_prev = 0;
588 
589       while (i < max_evaluations)
590         {
591           const auto fgi = func(ai);
592           fi             = fgi.first;
593           gi             = fgi.second;
594           i++;
595 
596           if (debug_output)
597             deallog << "Bracketing phase: " << i << std::endl
598                     << ai << " " << fi << " " << gi << " " << w1(ai, fi) << " "
599                     << w2(gi) << " " << f_min << std::endl;
600 
601           // first check if we can stop bracketing or the whole line search:
602           if (fi <= f_min || ai == a_max)
603             {
604               if (debug_output)
605                 deallog << "Reached the maximum step size." << std::endl;
606               return std::make_pair(ai, i);
607             }
608 
609           if (!w1(ai, fi) ||
610               (fi >= f_prev && i > 1)) // violate first Wolfe or not descending
611             {
612               a_lo = a_prev;
613               f_lo = f_prev;
614               g_lo = g_prev;
615 
616               a_hi = ai;
617               f_hi = fi;
618               g_hi = gi;
619               break; // end bracketing
620             }
621 
622           if (w2(gi)) // satisfies both Wolfe conditions
623             {
624               if (debug_output)
625                 deallog << "Satisfied both Wolfe conditions during Bracketing."
626                         << std::endl;
627 
628               Assert(w1(ai, fi), ExcInternalError());
629               return std::make_pair(ai, i);
630             }
631 
632           if (gi >= 0) // not descending
633             {
634               a_lo = ai;
635               f_lo = fi;
636               g_lo = gi;
637 
638               a_hi = a_prev;
639               f_hi = f_prev;
640               g_hi = g_prev;
641               break; // end bracketing
642             }
643 
644           // extrapolation step with the bounds
645           const auto bounds =
646             std::make_pair(2. * ai - a_prev,
647                            std::min(a_max, ai + tau1 * (ai - a_prev)));
648 
649           a_prev = ai;
650           f_prev = fi;
651           g_prev = gi;
652 
653           // NOTE: Fletcher's 2.6.2 includes optional extrapolation, we
654           // simply take the upper bound
655           // Scipy increases by factor of two:
656           // https://github.com/scipy/scipy/blob/v1.0.0/scipy/optimize/linesearch.py#L447
657           ai = bounds.second;
658         }
659     }
660 
661     AssertThrow(
662       i < max_evaluations,
663       ExcMessage(
664         "Could not find the initial bracket within the given number of iterations."));
665 
666     // Check properties of the bracket (Theorem 3.2 in More and Thuente, 94
667     // and Eq. 2.6.3 in Fletcher 2013
668 
669     // FIXME: these conditions are actually violated for Fig3 and a1=10^3 in
670     // More and Thorenton, 94.
671 
672     /*
673     Assert((f_lo < f_hi) && w1(a_lo, f_lo), ExcInternalError());
674     Assert(((a_hi - a_lo) * g_lo < 0) && !w2(g_lo), ExcInternalError());
675     Assert((w1(a_hi, f_hi) || f_hi >= f_lo), ExcInternalError());
676     */
677 
678     // keep short history of last points to improve interpolation
679     FiniteSizeHistory<NumberType> a_rec(5), f_rec(5), g_rec(5);
680     // if neither a_lo nor a_hi are zero:
681     if (std::abs(a_lo) > std::numeric_limits<NumberType>::epsilon() &&
682         std::abs(a_hi) > std::numeric_limits<NumberType>::epsilon())
683       {
684         a_rec.add(0);
685         f_rec.add(f0);
686         g_rec.add(g0);
687       }
688 
689     // Now sectioning phase: we allow both [a_lo, a_hi] and [a_hi, a_lo]
690     while (i < max_evaluations)
691       {
692         const NumberType a_lo_safe = a_lo + tau2 * (a_hi - a_lo);
693         const NumberType a_hi_safe = a_hi - tau3 * (a_hi - a_lo);
694         const auto       bounds    = std::minmax(a_lo_safe, a_hi_safe);
695 
696         ai = choose(
697           a_lo, f_lo, g_lo, a_hi, f_hi, g_hi, a_rec, f_rec, g_rec, bounds);
698 
699         const std::pair<NumberType, NumberType> fgi = func(ai);
700         fi                                          = fgi.first;
701         gi                                          = fgi.second;
702         i++;
703 
704         if (debug_output)
705           deallog << "Sectioning phase: " << i << std::endl
706                   << a_lo << " " << f_lo << " " << g_lo << " " << w1(a_lo, f_lo)
707                   << " " << w2(g_lo) << std::endl
708                   << a_hi << " " << f_hi << " " << g_hi << " " << w1(a_hi, f_hi)
709                   << " " << w2(g_hi) << std::endl
710                   << ai << " " << fi << " " << gi << " " << w1(ai, fi) << " "
711                   << w2(gi) << std::endl;
712 
713         if (!w1(ai, fi) || fi >= f_lo)
714           // take [a_lo, ai]
715           {
716             a_rec.add(a_hi);
717             f_rec.add(f_hi);
718             g_rec.add(g_hi);
719 
720             a_hi = ai;
721             f_hi = fi;
722             g_hi = gi;
723           }
724         else
725           {
726             if (w2(gi)) // satisfies both wolf
727               {
728                 if (debug_output)
729                   deallog << "Satisfied both Wolfe conditions." << std::endl;
730                 Assert(w1(ai, fi), ExcInternalError());
731                 return std::make_pair(ai, i);
732               }
733 
734             if (gi * (a_hi - a_lo) >= 0)
735               // take [ai, a_lo]
736               {
737                 a_rec.add(a_hi);
738                 f_rec.add(f_hi);
739                 g_rec.add(g_hi);
740 
741                 a_hi = a_lo;
742                 f_hi = f_lo;
743                 g_hi = g_lo;
744               }
745             else
746               // take [ai, a_hi]
747               {
748                 a_rec.add(a_lo);
749                 f_rec.add(f_lo);
750                 g_rec.add(g_lo);
751               }
752 
753             a_lo = ai;
754             f_lo = fi;
755             g_lo = gi;
756           }
757       }
758 
759     // if we got here, we could not find the solution
760     AssertThrow(
761       false,
762       ExcMessage(
763         "Could not could complete the sectioning phase within the given number of iterations."));
764     return std::make_pair(std::numeric_limits<NumberType>::signaling_NaN(), i);
765   }
766 
767 #endif // DOXYGEN
768 
769 } // namespace LineMinimization
770 
771 DEAL_II_NAMESPACE_CLOSE
772 
773 #endif // dealii_line_minimization_h
774