1"""
2Functions
3---------
4.. autosummary::
5   :toctree: generated/
6
7    line_search_armijo
8    line_search_wolfe1
9    line_search_wolfe2
10    scalar_search_wolfe1
11    scalar_search_wolfe2
12
13"""
14from warnings import warn
15
16from scipy.optimize import minpack2
17import numpy as np
18
19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
20           'scalar_search_wolfe1', 'scalar_search_wolfe2',
21           'line_search_armijo']
22
23class LineSearchWarning(RuntimeWarning):
24    pass
25
26
27#------------------------------------------------------------------------------
28# Minpack's Wolfe line and scalar searches
29#------------------------------------------------------------------------------
30
31def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
32                       old_fval=None, old_old_fval=None,
33                       args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
34                       xtol=1e-14):
35    """
36    As `scalar_search_wolfe1` but do a line search to direction `pk`
37
38    Parameters
39    ----------
40    f : callable
41        Function `f(x)`
42    fprime : callable
43        Gradient of `f`
44    xk : array_like
45        Current point
46    pk : array_like
47        Search direction
48
49    gfk : array_like, optional
50        Gradient of `f` at point `xk`
51    old_fval : float, optional
52        Value of `f` at point `xk`
53    old_old_fval : float, optional
54        Value of `f` at point preceding `xk`
55
56    The rest of the parameters are the same as for `scalar_search_wolfe1`.
57
58    Returns
59    -------
60    stp, f_count, g_count, fval, old_fval
61        As in `line_search_wolfe1`
62    gval : array
63        Gradient of `f` at the final point
64
65    """
66    if gfk is None:
67        gfk = fprime(xk)
68
69    if isinstance(fprime, tuple):
70        eps = fprime[1]
71        fprime = fprime[0]
72        newargs = (f, eps) + args
73        gradient = False
74    else:
75        newargs = args
76        gradient = True
77
78    gval = [gfk]
79    gc = [0]
80    fc = [0]
81
82    def phi(s):
83        fc[0] += 1
84        return f(xk + s*pk, *args)
85
86    def derphi(s):
87        gval[0] = fprime(xk + s*pk, *newargs)
88        if gradient:
89            gc[0] += 1
90        else:
91            fc[0] += len(xk) + 1
92        return np.dot(gval[0], pk)
93
94    derphi0 = np.dot(gfk, pk)
95
96    stp, fval, old_fval = scalar_search_wolfe1(
97            phi, derphi, old_fval, old_old_fval, derphi0,
98            c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
99
100    return stp, fc[0], gc[0], fval, old_fval, gval[0]
101
102
103def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
104                         c1=1e-4, c2=0.9,
105                         amax=50, amin=1e-8, xtol=1e-14):
106    """
107    Scalar function search for alpha that satisfies strong Wolfe conditions
108
109    alpha > 0 is assumed to be a descent direction.
110
111    Parameters
112    ----------
113    phi : callable phi(alpha)
114        Function at point `alpha`
115    derphi : callable phi'(alpha)
116        Objective function derivative. Returns a scalar.
117    phi0 : float, optional
118        Value of phi at 0
119    old_phi0 : float, optional
120        Value of phi at previous point
121    derphi0 : float, optional
122        Value derphi at 0
123    c1 : float, optional
124        Parameter for Armijo condition rule.
125    c2 : float, optional
126        Parameter for curvature condition rule.
127    amax, amin : float, optional
128        Maximum and minimum step size
129    xtol : float, optional
130        Relative tolerance for an acceptable step.
131
132    Returns
133    -------
134    alpha : float
135        Step size, or None if no suitable step was found
136    phi : float
137        Value of `phi` at the new point `alpha`
138    phi0 : float
139        Value of `phi` at `alpha=0`
140
141    Notes
142    -----
143    Uses routine DCSRCH from MINPACK.
144
145    """
146
147    if phi0 is None:
148        phi0 = phi(0.)
149    if derphi0 is None:
150        derphi0 = derphi(0.)
151
152    if old_phi0 is not None and derphi0 != 0:
153        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
154        if alpha1 < 0:
155            alpha1 = 1.0
156    else:
157        alpha1 = 1.0
158
159    phi1 = phi0
160    derphi1 = derphi0
161    isave = np.zeros((2,), np.intc)
162    dsave = np.zeros((13,), float)
163    task = b'START'
164
165    maxiter = 100
166    for i in range(maxiter):
167        stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
168                                                   c1, c2, xtol, task,
169                                                   amin, amax, isave, dsave)
170        if task[:2] == b'FG':
171            alpha1 = stp
172            phi1 = phi(stp)
173            derphi1 = derphi(stp)
174        else:
175            break
176    else:
177        # maxiter reached, the line search did not converge
178        stp = None
179
180    if task[:5] == b'ERROR' or task[:4] == b'WARN':
181        stp = None  # failed
182
183    return stp, phi1, phi0
184
185
186line_search = line_search_wolfe1
187
188
189#------------------------------------------------------------------------------
190# Pure-Python Wolfe line and scalar searches
191#------------------------------------------------------------------------------
192
193def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
194                       old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
195                       extra_condition=None, maxiter=10):
196    """Find alpha that satisfies strong Wolfe conditions.
197
198    Parameters
199    ----------
200    f : callable f(x,*args)
201        Objective function.
202    myfprime : callable f'(x,*args)
203        Objective function gradient.
204    xk : ndarray
205        Starting point.
206    pk : ndarray
207        Search direction.
208    gfk : ndarray, optional
209        Gradient value for x=xk (xk being the current parameter
210        estimate). Will be recomputed if omitted.
211    old_fval : float, optional
212        Function value for x=xk. Will be recomputed if omitted.
213    old_old_fval : float, optional
214        Function value for the point preceding x=xk.
215    args : tuple, optional
216        Additional arguments passed to objective function.
217    c1 : float, optional
218        Parameter for Armijo condition rule.
219    c2 : float, optional
220        Parameter for curvature condition rule.
221    amax : float, optional
222        Maximum step size
223    extra_condition : callable, optional
224        A callable of the form ``extra_condition(alpha, x, f, g)``
225        returning a boolean. Arguments are the proposed step ``alpha``
226        and the corresponding ``x``, ``f`` and ``g`` values. The line search
227        accepts the value of ``alpha`` only if this
228        callable returns ``True``. If the callable returns ``False``
229        for the step length, the algorithm will continue with
230        new iterates. The callable is only called for iterates
231        satisfying the strong Wolfe conditions.
232    maxiter : int, optional
233        Maximum number of iterations to perform.
234
235    Returns
236    -------
237    alpha : float or None
238        Alpha for which ``x_new = x0 + alpha * pk``,
239        or None if the line search algorithm did not converge.
240    fc : int
241        Number of function evaluations made.
242    gc : int
243        Number of gradient evaluations made.
244    new_fval : float or None
245        New function value ``f(x_new)=f(x0+alpha*pk)``,
246        or None if the line search algorithm did not converge.
247    old_fval : float
248        Old function value ``f(x0)``.
249    new_slope : float or None
250        The local slope along the search direction at the
251        new value ``<myfprime(x_new), pk>``,
252        or None if the line search algorithm did not converge.
253
254
255    Notes
256    -----
257    Uses the line search algorithm to enforce strong Wolfe
258    conditions. See Wright and Nocedal, 'Numerical Optimization',
259    1999, pp. 59-61.
260
261    Examples
262    --------
263    >>> from scipy.optimize import line_search
264
265    A objective function and its gradient are defined.
266
267    >>> def obj_func(x):
268    ...     return (x[0])**2+(x[1])**2
269    >>> def obj_grad(x):
270    ...     return [2*x[0], 2*x[1]]
271
272    We can find alpha that satisfies strong Wolfe conditions.
273
274    >>> start_point = np.array([1.8, 1.7])
275    >>> search_gradient = np.array([-1.0, -1.0])
276    >>> line_search(obj_func, obj_grad, start_point, search_gradient)
277    (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
278
279    """
280    fc = [0]
281    gc = [0]
282    gval = [None]
283    gval_alpha = [None]
284
285    def phi(alpha):
286        fc[0] += 1
287        return f(xk + alpha * pk, *args)
288
289    if isinstance(myfprime, tuple):
290        def derphi(alpha):
291            fc[0] += len(xk) + 1
292            eps = myfprime[1]
293            fprime = myfprime[0]
294            newargs = (f, eps) + args
295            gval[0] = fprime(xk + alpha * pk, *newargs)  # store for later use
296            gval_alpha[0] = alpha
297            return np.dot(gval[0], pk)
298    else:
299        fprime = myfprime
300
301        def derphi(alpha):
302            gc[0] += 1
303            gval[0] = fprime(xk + alpha * pk, *args)  # store for later use
304            gval_alpha[0] = alpha
305            return np.dot(gval[0], pk)
306
307    if gfk is None:
308        gfk = fprime(xk, *args)
309    derphi0 = np.dot(gfk, pk)
310
311    if extra_condition is not None:
312        # Add the current gradient as argument, to avoid needless
313        # re-evaluation
314        def extra_condition2(alpha, phi):
315            if gval_alpha[0] != alpha:
316                derphi(alpha)
317            x = xk + alpha * pk
318            return extra_condition(alpha, x, phi, gval[0])
319    else:
320        extra_condition2 = None
321
322    alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
323            phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
324            extra_condition2, maxiter=maxiter)
325
326    if derphi_star is None:
327        warn('The line search algorithm did not converge', LineSearchWarning)
328    else:
329        # derphi_star is a number (derphi) -- so use the most recently
330        # calculated gradient used in computing it derphi = gfk*pk
331        # this is the gradient at the next step no need to compute it
332        # again in the outer loop.
333        derphi_star = gval[0]
334
335    return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
336
337
338def scalar_search_wolfe2(phi, derphi, phi0=None,
339                         old_phi0=None, derphi0=None,
340                         c1=1e-4, c2=0.9, amax=None,
341                         extra_condition=None, maxiter=10):
342    """Find alpha that satisfies strong Wolfe conditions.
343
344    alpha > 0 is assumed to be a descent direction.
345
346    Parameters
347    ----------
348    phi : callable phi(alpha)
349        Objective scalar function.
350    derphi : callable phi'(alpha)
351        Objective function derivative. Returns a scalar.
352    phi0 : float, optional
353        Value of phi at 0.
354    old_phi0 : float, optional
355        Value of phi at previous point.
356    derphi0 : float, optional
357        Value of derphi at 0
358    c1 : float, optional
359        Parameter for Armijo condition rule.
360    c2 : float, optional
361        Parameter for curvature condition rule.
362    amax : float, optional
363        Maximum step size.
364    extra_condition : callable, optional
365        A callable of the form ``extra_condition(alpha, phi_value)``
366        returning a boolean. The line search accepts the value
367        of ``alpha`` only if this callable returns ``True``.
368        If the callable returns ``False`` for the step length,
369        the algorithm will continue with new iterates.
370        The callable is only called for iterates satisfying
371        the strong Wolfe conditions.
372    maxiter : int, optional
373        Maximum number of iterations to perform.
374
375    Returns
376    -------
377    alpha_star : float or None
378        Best alpha, or None if the line search algorithm did not converge.
379    phi_star : float
380        phi at alpha_star.
381    phi0 : float
382        phi at 0.
383    derphi_star : float or None
384        derphi at alpha_star, or None if the line search algorithm
385        did not converge.
386
387    Notes
388    -----
389    Uses the line search algorithm to enforce strong Wolfe
390    conditions. See Wright and Nocedal, 'Numerical Optimization',
391    1999, pp. 59-61.
392
393    """
394
395    if phi0 is None:
396        phi0 = phi(0.)
397
398    if derphi0 is None:
399        derphi0 = derphi(0.)
400
401    alpha0 = 0
402    if old_phi0 is not None and derphi0 != 0:
403        alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
404    else:
405        alpha1 = 1.0
406
407    if alpha1 < 0:
408        alpha1 = 1.0
409
410    if amax is not None:
411        alpha1 = min(alpha1, amax)
412
413    phi_a1 = phi(alpha1)
414    #derphi_a1 = derphi(alpha1) evaluated below
415
416    phi_a0 = phi0
417    derphi_a0 = derphi0
418
419    if extra_condition is None:
420        extra_condition = lambda alpha, phi: True
421
422    for i in range(maxiter):
423        if alpha1 == 0 or (amax is not None and alpha0 == amax):
424            # alpha1 == 0: This shouldn't happen. Perhaps the increment has
425            # slipped below machine precision?
426            alpha_star = None
427            phi_star = phi0
428            phi0 = old_phi0
429            derphi_star = None
430
431            if alpha1 == 0:
432                msg = 'Rounding errors prevent the line search from converging'
433            else:
434                msg = "The line search algorithm could not find a solution " + \
435                      "less than or equal to amax: %s" % amax
436
437            warn(msg, LineSearchWarning)
438            break
439
440        not_first_iteration = i > 0
441        if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
442           ((phi_a1 >= phi_a0) and not_first_iteration):
443            alpha_star, phi_star, derphi_star = \
444                        _zoom(alpha0, alpha1, phi_a0,
445                              phi_a1, derphi_a0, phi, derphi,
446                              phi0, derphi0, c1, c2, extra_condition)
447            break
448
449        derphi_a1 = derphi(alpha1)
450        if (abs(derphi_a1) <= -c2*derphi0):
451            if extra_condition(alpha1, phi_a1):
452                alpha_star = alpha1
453                phi_star = phi_a1
454                derphi_star = derphi_a1
455                break
456
457        if (derphi_a1 >= 0):
458            alpha_star, phi_star, derphi_star = \
459                        _zoom(alpha1, alpha0, phi_a1,
460                              phi_a0, derphi_a1, phi, derphi,
461                              phi0, derphi0, c1, c2, extra_condition)
462            break
463
464        alpha2 = 2 * alpha1  # increase by factor of two on each iteration
465        if amax is not None:
466            alpha2 = min(alpha2, amax)
467        alpha0 = alpha1
468        alpha1 = alpha2
469        phi_a0 = phi_a1
470        phi_a1 = phi(alpha1)
471        derphi_a0 = derphi_a1
472
473    else:
474        # stopping test maxiter reached
475        alpha_star = alpha1
476        phi_star = phi_a1
477        derphi_star = None
478        warn('The line search algorithm did not converge', LineSearchWarning)
479
480    return alpha_star, phi_star, phi0, derphi_star
481
482
483def _cubicmin(a, fa, fpa, b, fb, c, fc):
484    """
485    Finds the minimizer for a cubic polynomial that goes through the
486    points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
487
488    If no minimizer can be found, return None.
489
490    """
491    # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
492
493    with np.errstate(divide='raise', over='raise', invalid='raise'):
494        try:
495            C = fpa
496            db = b - a
497            dc = c - a
498            denom = (db * dc) ** 2 * (db - dc)
499            d1 = np.empty((2, 2))
500            d1[0, 0] = dc ** 2
501            d1[0, 1] = -db ** 2
502            d1[1, 0] = -dc ** 3
503            d1[1, 1] = db ** 3
504            [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
505                                            fc - fa - C * dc]).flatten())
506            A /= denom
507            B /= denom
508            radical = B * B - 3 * A * C
509            xmin = a + (-B + np.sqrt(radical)) / (3 * A)
510        except ArithmeticError:
511            return None
512    if not np.isfinite(xmin):
513        return None
514    return xmin
515
516
517def _quadmin(a, fa, fpa, b, fb):
518    """
519    Finds the minimizer for a quadratic polynomial that goes through
520    the points (a,fa), (b,fb) with derivative at a of fpa.
521
522    """
523    # f(x) = B*(x-a)^2 + C*(x-a) + D
524    with np.errstate(divide='raise', over='raise', invalid='raise'):
525        try:
526            D = fa
527            C = fpa
528            db = b - a * 1.0
529            B = (fb - D - C * db) / (db * db)
530            xmin = a - C / (2.0 * B)
531        except ArithmeticError:
532            return None
533    if not np.isfinite(xmin):
534        return None
535    return xmin
536
537
538def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
539          phi, derphi, phi0, derphi0, c1, c2, extra_condition):
540    """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
541
542    Part of the optimization algorithm in `scalar_search_wolfe2`.
543
544    Notes
545    -----
546    Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
547    'Numerical Optimization', 1999, pp. 61.
548
549    """
550
551    maxiter = 10
552    i = 0
553    delta1 = 0.2  # cubic interpolant check
554    delta2 = 0.1  # quadratic interpolant check
555    phi_rec = phi0
556    a_rec = 0
557    while True:
558        # interpolate to find a trial step length between a_lo and
559        # a_hi Need to choose interpolation here. Use cubic
560        # interpolation and then if the result is within delta *
561        # dalpha or outside of the interval bounded by a_lo or a_hi
562        # then use quadratic interpolation, if the result is still too
563        # close, then use bisection
564
565        dalpha = a_hi - a_lo
566        if dalpha < 0:
567            a, b = a_hi, a_lo
568        else:
569            a, b = a_lo, a_hi
570
571        # minimizer of cubic interpolant
572        # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
573        #
574        # if the result is too close to the end points (or out of the
575        # interval), then use quadratic interpolation with phi_lo,
576        # derphi_lo and phi_hi if the result is still too close to the
577        # end points (or out of the interval) then use bisection
578
579        if (i > 0):
580            cchk = delta1 * dalpha
581            a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
582                            a_rec, phi_rec)
583        if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
584            qchk = delta2 * dalpha
585            a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
586            if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
587                a_j = a_lo + 0.5*dalpha
588
589        # Check new value of a_j
590
591        phi_aj = phi(a_j)
592        if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
593            phi_rec = phi_hi
594            a_rec = a_hi
595            a_hi = a_j
596            phi_hi = phi_aj
597        else:
598            derphi_aj = derphi(a_j)
599            if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
600                a_star = a_j
601                val_star = phi_aj
602                valprime_star = derphi_aj
603                break
604            if derphi_aj*(a_hi - a_lo) >= 0:
605                phi_rec = phi_hi
606                a_rec = a_hi
607                a_hi = a_lo
608                phi_hi = phi_lo
609            else:
610                phi_rec = phi_lo
611                a_rec = a_lo
612            a_lo = a_j
613            phi_lo = phi_aj
614            derphi_lo = derphi_aj
615        i += 1
616        if (i > maxiter):
617            # Failed to find a conforming step size
618            a_star = None
619            val_star = None
620            valprime_star = None
621            break
622    return a_star, val_star, valprime_star
623
624
625#------------------------------------------------------------------------------
626# Armijo line and scalar searches
627#------------------------------------------------------------------------------
628
629def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
630    """Minimize over alpha, the function ``f(xk+alpha pk)``.
631
632    Parameters
633    ----------
634    f : callable
635        Function to be minimized.
636    xk : array_like
637        Current point.
638    pk : array_like
639        Search direction.
640    gfk : array_like
641        Gradient of `f` at point `xk`.
642    old_fval : float
643        Value of `f` at point `xk`.
644    args : tuple, optional
645        Optional arguments.
646    c1 : float, optional
647        Value to control stopping criterion.
648    alpha0 : scalar, optional
649        Value of `alpha` at start of the optimization.
650
651    Returns
652    -------
653    alpha
654    f_count
655    f_val_at_alpha
656
657    Notes
658    -----
659    Uses the interpolation algorithm (Armijo backtracking) as suggested by
660    Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
661
662    """
663    xk = np.atleast_1d(xk)
664    fc = [0]
665
666    def phi(alpha1):
667        fc[0] += 1
668        return f(xk + alpha1*pk, *args)
669
670    if old_fval is None:
671        phi0 = phi(0.)
672    else:
673        phi0 = old_fval  # compute f(xk) -- done in past loop
674
675    derphi0 = np.dot(gfk, pk)
676    alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
677                                       alpha0=alpha0)
678    return alpha, fc[0], phi1
679
680
681def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
682    """
683    Compatibility wrapper for `line_search_armijo`
684    """
685    r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
686                           alpha0=alpha0)
687    return r[0], r[1], 0, r[2]
688
689
690def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
691    """Minimize over alpha, the function ``phi(alpha)``.
692
693    Uses the interpolation algorithm (Armijo backtracking) as suggested by
694    Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
695
696    alpha > 0 is assumed to be a descent direction.
697
698    Returns
699    -------
700    alpha
701    phi1
702
703    """
704    phi_a0 = phi(alpha0)
705    if phi_a0 <= phi0 + c1*alpha0*derphi0:
706        return alpha0, phi_a0
707
708    # Otherwise, compute the minimizer of a quadratic interpolant:
709
710    alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
711    phi_a1 = phi(alpha1)
712
713    if (phi_a1 <= phi0 + c1*alpha1*derphi0):
714        return alpha1, phi_a1
715
716    # Otherwise, loop with cubic interpolation until we find an alpha which
717    # satisfies the first Wolfe condition (since we are backtracking, we will
718    # assume that the value of alpha is not too small and satisfies the second
719    # condition.
720
721    while alpha1 > amin:       # we are assuming alpha>0 is a descent direction
722        factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
723        a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
724            alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
725        a = a / factor
726        b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
727            alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
728        b = b / factor
729
730        alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
731        phi_a2 = phi(alpha2)
732
733        if (phi_a2 <= phi0 + c1*alpha2*derphi0):
734            return alpha2, phi_a2
735
736        if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
737            alpha2 = alpha1 / 2.0
738
739        alpha0 = alpha1
740        alpha1 = alpha2
741        phi_a0 = phi_a1
742        phi_a1 = phi_a2
743
744    # Failed to find a suitable step length
745    return None, phi_a1
746
747
748#------------------------------------------------------------------------------
749# Non-monotone line search for DF-SANE
750#------------------------------------------------------------------------------
751
752def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
753                                  gamma=1e-4, tau_min=0.1, tau_max=0.5):
754    """
755    Nonmonotone backtracking line search as described in [1]_
756
757    Parameters
758    ----------
759    f : callable
760        Function returning a tuple ``(f, F)`` where ``f`` is the value
761        of a merit function and ``F`` the residual.
762    x_k : ndarray
763        Initial position.
764    d : ndarray
765        Search direction.
766    prev_fs : float
767        List of previous merit function values. Should have ``len(prev_fs) <= M``
768        where ``M`` is the nonmonotonicity window parameter.
769    eta : float
770        Allowed merit function increase, see [1]_
771    gamma, tau_min, tau_max : float, optional
772        Search parameters, see [1]_
773
774    Returns
775    -------
776    alpha : float
777        Step length
778    xp : ndarray
779        Next position
780    fp : float
781        Merit function value at next position
782    Fp : ndarray
783        Residual at next position
784
785    References
786    ----------
787    [1] "Spectral residual method without gradient information for solving
788        large-scale nonlinear systems of equations." W. La Cruz,
789        J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
790
791    """
792    f_k = prev_fs[-1]
793    f_bar = max(prev_fs)
794
795    alpha_p = 1
796    alpha_m = 1
797    alpha = 1
798
799    while True:
800        xp = x_k + alpha_p * d
801        fp, Fp = f(xp)
802
803        if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
804            alpha = alpha_p
805            break
806
807        alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
808
809        xp = x_k - alpha_m * d
810        fp, Fp = f(xp)
811
812        if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
813            alpha = -alpha_m
814            break
815
816        alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
817
818        alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
819        alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
820
821    return alpha, xp, fp, Fp
822
823
824def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
825                                   gamma=1e-4, tau_min=0.1, tau_max=0.5,
826                                   nu=0.85):
827    """
828    Nonmonotone line search from [1]
829
830    Parameters
831    ----------
832    f : callable
833        Function returning a tuple ``(f, F)`` where ``f`` is the value
834        of a merit function and ``F`` the residual.
835    x_k : ndarray
836        Initial position.
837    d : ndarray
838        Search direction.
839    f_k : float
840        Initial merit function value.
841    C, Q : float
842        Control parameters. On the first iteration, give values
843        Q=1.0, C=f_k
844    eta : float
845        Allowed merit function increase, see [1]_
846    nu, gamma, tau_min, tau_max : float, optional
847        Search parameters, see [1]_
848
849    Returns
850    -------
851    alpha : float
852        Step length
853    xp : ndarray
854        Next position
855    fp : float
856        Merit function value at next position
857    Fp : ndarray
858        Residual at next position
859    C : float
860        New value for the control parameter C
861    Q : float
862        New value for the control parameter Q
863
864    References
865    ----------
866    .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
867           search and its application to the spectral residual
868           method'', IMA J. Numer. Anal. 29, 814 (2009).
869
870    """
871    alpha_p = 1
872    alpha_m = 1
873    alpha = 1
874
875    while True:
876        xp = x_k + alpha_p * d
877        fp, Fp = f(xp)
878
879        if fp <= C + eta - gamma * alpha_p**2 * f_k:
880            alpha = alpha_p
881            break
882
883        alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
884
885        xp = x_k - alpha_m * d
886        fp, Fp = f(xp)
887
888        if fp <= C + eta - gamma * alpha_m**2 * f_k:
889            alpha = -alpha_m
890            break
891
892        alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
893
894        alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
895        alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
896
897    # Update C and Q
898    Q_next = nu * Q + 1
899    C = (nu * Q * (C + eta) + fp) / Q_next
900    Q = Q_next
901
902    return alpha, xp, fp, Fp, C, Q
903