1""" 2Functions 3--------- 4.. autosummary:: 5 :toctree: generated/ 6 7 line_search_armijo 8 line_search_wolfe1 9 line_search_wolfe2 10 scalar_search_wolfe1 11 scalar_search_wolfe2 12 13""" 14from warnings import warn 15 16from scipy.optimize import minpack2 17import numpy as np 18 19__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2', 20 'scalar_search_wolfe1', 'scalar_search_wolfe2', 21 'line_search_armijo'] 22 23class LineSearchWarning(RuntimeWarning): 24 pass 25 26 27#------------------------------------------------------------------------------ 28# Minpack's Wolfe line and scalar searches 29#------------------------------------------------------------------------------ 30 31def line_search_wolfe1(f, fprime, xk, pk, gfk=None, 32 old_fval=None, old_old_fval=None, 33 args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8, 34 xtol=1e-14): 35 """ 36 As `scalar_search_wolfe1` but do a line search to direction `pk` 37 38 Parameters 39 ---------- 40 f : callable 41 Function `f(x)` 42 fprime : callable 43 Gradient of `f` 44 xk : array_like 45 Current point 46 pk : array_like 47 Search direction 48 49 gfk : array_like, optional 50 Gradient of `f` at point `xk` 51 old_fval : float, optional 52 Value of `f` at point `xk` 53 old_old_fval : float, optional 54 Value of `f` at point preceding `xk` 55 56 The rest of the parameters are the same as for `scalar_search_wolfe1`. 57 58 Returns 59 ------- 60 stp, f_count, g_count, fval, old_fval 61 As in `line_search_wolfe1` 62 gval : array 63 Gradient of `f` at the final point 64 65 """ 66 if gfk is None: 67 gfk = fprime(xk) 68 69 if isinstance(fprime, tuple): 70 eps = fprime[1] 71 fprime = fprime[0] 72 newargs = (f, eps) + args 73 gradient = False 74 else: 75 newargs = args 76 gradient = True 77 78 gval = [gfk] 79 gc = [0] 80 fc = [0] 81 82 def phi(s): 83 fc[0] += 1 84 return f(xk + s*pk, *args) 85 86 def derphi(s): 87 gval[0] = fprime(xk + s*pk, *newargs) 88 if gradient: 89 gc[0] += 1 90 else: 91 fc[0] += len(xk) + 1 92 return np.dot(gval[0], pk) 93 94 derphi0 = np.dot(gfk, pk) 95 96 stp, fval, old_fval = scalar_search_wolfe1( 97 phi, derphi, old_fval, old_old_fval, derphi0, 98 c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol) 99 100 return stp, fc[0], gc[0], fval, old_fval, gval[0] 101 102 103def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None, 104 c1=1e-4, c2=0.9, 105 amax=50, amin=1e-8, xtol=1e-14): 106 """ 107 Scalar function search for alpha that satisfies strong Wolfe conditions 108 109 alpha > 0 is assumed to be a descent direction. 110 111 Parameters 112 ---------- 113 phi : callable phi(alpha) 114 Function at point `alpha` 115 derphi : callable phi'(alpha) 116 Objective function derivative. Returns a scalar. 117 phi0 : float, optional 118 Value of phi at 0 119 old_phi0 : float, optional 120 Value of phi at previous point 121 derphi0 : float, optional 122 Value derphi at 0 123 c1 : float, optional 124 Parameter for Armijo condition rule. 125 c2 : float, optional 126 Parameter for curvature condition rule. 127 amax, amin : float, optional 128 Maximum and minimum step size 129 xtol : float, optional 130 Relative tolerance for an acceptable step. 131 132 Returns 133 ------- 134 alpha : float 135 Step size, or None if no suitable step was found 136 phi : float 137 Value of `phi` at the new point `alpha` 138 phi0 : float 139 Value of `phi` at `alpha=0` 140 141 Notes 142 ----- 143 Uses routine DCSRCH from MINPACK. 144 145 """ 146 147 if phi0 is None: 148 phi0 = phi(0.) 149 if derphi0 is None: 150 derphi0 = derphi(0.) 151 152 if old_phi0 is not None and derphi0 != 0: 153 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 154 if alpha1 < 0: 155 alpha1 = 1.0 156 else: 157 alpha1 = 1.0 158 159 phi1 = phi0 160 derphi1 = derphi0 161 isave = np.zeros((2,), np.intc) 162 dsave = np.zeros((13,), float) 163 task = b'START' 164 165 maxiter = 100 166 for i in range(maxiter): 167 stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1, 168 c1, c2, xtol, task, 169 amin, amax, isave, dsave) 170 if task[:2] == b'FG': 171 alpha1 = stp 172 phi1 = phi(stp) 173 derphi1 = derphi(stp) 174 else: 175 break 176 else: 177 # maxiter reached, the line search did not converge 178 stp = None 179 180 if task[:5] == b'ERROR' or task[:4] == b'WARN': 181 stp = None # failed 182 183 return stp, phi1, phi0 184 185 186line_search = line_search_wolfe1 187 188 189#------------------------------------------------------------------------------ 190# Pure-Python Wolfe line and scalar searches 191#------------------------------------------------------------------------------ 192 193def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, 194 old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None, 195 extra_condition=None, maxiter=10): 196 """Find alpha that satisfies strong Wolfe conditions. 197 198 Parameters 199 ---------- 200 f : callable f(x,*args) 201 Objective function. 202 myfprime : callable f'(x,*args) 203 Objective function gradient. 204 xk : ndarray 205 Starting point. 206 pk : ndarray 207 Search direction. 208 gfk : ndarray, optional 209 Gradient value for x=xk (xk being the current parameter 210 estimate). Will be recomputed if omitted. 211 old_fval : float, optional 212 Function value for x=xk. Will be recomputed if omitted. 213 old_old_fval : float, optional 214 Function value for the point preceding x=xk. 215 args : tuple, optional 216 Additional arguments passed to objective function. 217 c1 : float, optional 218 Parameter for Armijo condition rule. 219 c2 : float, optional 220 Parameter for curvature condition rule. 221 amax : float, optional 222 Maximum step size 223 extra_condition : callable, optional 224 A callable of the form ``extra_condition(alpha, x, f, g)`` 225 returning a boolean. Arguments are the proposed step ``alpha`` 226 and the corresponding ``x``, ``f`` and ``g`` values. The line search 227 accepts the value of ``alpha`` only if this 228 callable returns ``True``. If the callable returns ``False`` 229 for the step length, the algorithm will continue with 230 new iterates. The callable is only called for iterates 231 satisfying the strong Wolfe conditions. 232 maxiter : int, optional 233 Maximum number of iterations to perform. 234 235 Returns 236 ------- 237 alpha : float or None 238 Alpha for which ``x_new = x0 + alpha * pk``, 239 or None if the line search algorithm did not converge. 240 fc : int 241 Number of function evaluations made. 242 gc : int 243 Number of gradient evaluations made. 244 new_fval : float or None 245 New function value ``f(x_new)=f(x0+alpha*pk)``, 246 or None if the line search algorithm did not converge. 247 old_fval : float 248 Old function value ``f(x0)``. 249 new_slope : float or None 250 The local slope along the search direction at the 251 new value ``<myfprime(x_new), pk>``, 252 or None if the line search algorithm did not converge. 253 254 255 Notes 256 ----- 257 Uses the line search algorithm to enforce strong Wolfe 258 conditions. See Wright and Nocedal, 'Numerical Optimization', 259 1999, pp. 59-61. 260 261 Examples 262 -------- 263 >>> from scipy.optimize import line_search 264 265 A objective function and its gradient are defined. 266 267 >>> def obj_func(x): 268 ... return (x[0])**2+(x[1])**2 269 >>> def obj_grad(x): 270 ... return [2*x[0], 2*x[1]] 271 272 We can find alpha that satisfies strong Wolfe conditions. 273 274 >>> start_point = np.array([1.8, 1.7]) 275 >>> search_gradient = np.array([-1.0, -1.0]) 276 >>> line_search(obj_func, obj_grad, start_point, search_gradient) 277 (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4]) 278 279 """ 280 fc = [0] 281 gc = [0] 282 gval = [None] 283 gval_alpha = [None] 284 285 def phi(alpha): 286 fc[0] += 1 287 return f(xk + alpha * pk, *args) 288 289 if isinstance(myfprime, tuple): 290 def derphi(alpha): 291 fc[0] += len(xk) + 1 292 eps = myfprime[1] 293 fprime = myfprime[0] 294 newargs = (f, eps) + args 295 gval[0] = fprime(xk + alpha * pk, *newargs) # store for later use 296 gval_alpha[0] = alpha 297 return np.dot(gval[0], pk) 298 else: 299 fprime = myfprime 300 301 def derphi(alpha): 302 gc[0] += 1 303 gval[0] = fprime(xk + alpha * pk, *args) # store for later use 304 gval_alpha[0] = alpha 305 return np.dot(gval[0], pk) 306 307 if gfk is None: 308 gfk = fprime(xk, *args) 309 derphi0 = np.dot(gfk, pk) 310 311 if extra_condition is not None: 312 # Add the current gradient as argument, to avoid needless 313 # re-evaluation 314 def extra_condition2(alpha, phi): 315 if gval_alpha[0] != alpha: 316 derphi(alpha) 317 x = xk + alpha * pk 318 return extra_condition(alpha, x, phi, gval[0]) 319 else: 320 extra_condition2 = None 321 322 alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2( 323 phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax, 324 extra_condition2, maxiter=maxiter) 325 326 if derphi_star is None: 327 warn('The line search algorithm did not converge', LineSearchWarning) 328 else: 329 # derphi_star is a number (derphi) -- so use the most recently 330 # calculated gradient used in computing it derphi = gfk*pk 331 # this is the gradient at the next step no need to compute it 332 # again in the outer loop. 333 derphi_star = gval[0] 334 335 return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star 336 337 338def scalar_search_wolfe2(phi, derphi, phi0=None, 339 old_phi0=None, derphi0=None, 340 c1=1e-4, c2=0.9, amax=None, 341 extra_condition=None, maxiter=10): 342 """Find alpha that satisfies strong Wolfe conditions. 343 344 alpha > 0 is assumed to be a descent direction. 345 346 Parameters 347 ---------- 348 phi : callable phi(alpha) 349 Objective scalar function. 350 derphi : callable phi'(alpha) 351 Objective function derivative. Returns a scalar. 352 phi0 : float, optional 353 Value of phi at 0. 354 old_phi0 : float, optional 355 Value of phi at previous point. 356 derphi0 : float, optional 357 Value of derphi at 0 358 c1 : float, optional 359 Parameter for Armijo condition rule. 360 c2 : float, optional 361 Parameter for curvature condition rule. 362 amax : float, optional 363 Maximum step size. 364 extra_condition : callable, optional 365 A callable of the form ``extra_condition(alpha, phi_value)`` 366 returning a boolean. The line search accepts the value 367 of ``alpha`` only if this callable returns ``True``. 368 If the callable returns ``False`` for the step length, 369 the algorithm will continue with new iterates. 370 The callable is only called for iterates satisfying 371 the strong Wolfe conditions. 372 maxiter : int, optional 373 Maximum number of iterations to perform. 374 375 Returns 376 ------- 377 alpha_star : float or None 378 Best alpha, or None if the line search algorithm did not converge. 379 phi_star : float 380 phi at alpha_star. 381 phi0 : float 382 phi at 0. 383 derphi_star : float or None 384 derphi at alpha_star, or None if the line search algorithm 385 did not converge. 386 387 Notes 388 ----- 389 Uses the line search algorithm to enforce strong Wolfe 390 conditions. See Wright and Nocedal, 'Numerical Optimization', 391 1999, pp. 59-61. 392 393 """ 394 395 if phi0 is None: 396 phi0 = phi(0.) 397 398 if derphi0 is None: 399 derphi0 = derphi(0.) 400 401 alpha0 = 0 402 if old_phi0 is not None and derphi0 != 0: 403 alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) 404 else: 405 alpha1 = 1.0 406 407 if alpha1 < 0: 408 alpha1 = 1.0 409 410 if amax is not None: 411 alpha1 = min(alpha1, amax) 412 413 phi_a1 = phi(alpha1) 414 #derphi_a1 = derphi(alpha1) evaluated below 415 416 phi_a0 = phi0 417 derphi_a0 = derphi0 418 419 if extra_condition is None: 420 extra_condition = lambda alpha, phi: True 421 422 for i in range(maxiter): 423 if alpha1 == 0 or (amax is not None and alpha0 == amax): 424 # alpha1 == 0: This shouldn't happen. Perhaps the increment has 425 # slipped below machine precision? 426 alpha_star = None 427 phi_star = phi0 428 phi0 = old_phi0 429 derphi_star = None 430 431 if alpha1 == 0: 432 msg = 'Rounding errors prevent the line search from converging' 433 else: 434 msg = "The line search algorithm could not find a solution " + \ 435 "less than or equal to amax: %s" % amax 436 437 warn(msg, LineSearchWarning) 438 break 439 440 not_first_iteration = i > 0 441 if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \ 442 ((phi_a1 >= phi_a0) and not_first_iteration): 443 alpha_star, phi_star, derphi_star = \ 444 _zoom(alpha0, alpha1, phi_a0, 445 phi_a1, derphi_a0, phi, derphi, 446 phi0, derphi0, c1, c2, extra_condition) 447 break 448 449 derphi_a1 = derphi(alpha1) 450 if (abs(derphi_a1) <= -c2*derphi0): 451 if extra_condition(alpha1, phi_a1): 452 alpha_star = alpha1 453 phi_star = phi_a1 454 derphi_star = derphi_a1 455 break 456 457 if (derphi_a1 >= 0): 458 alpha_star, phi_star, derphi_star = \ 459 _zoom(alpha1, alpha0, phi_a1, 460 phi_a0, derphi_a1, phi, derphi, 461 phi0, derphi0, c1, c2, extra_condition) 462 break 463 464 alpha2 = 2 * alpha1 # increase by factor of two on each iteration 465 if amax is not None: 466 alpha2 = min(alpha2, amax) 467 alpha0 = alpha1 468 alpha1 = alpha2 469 phi_a0 = phi_a1 470 phi_a1 = phi(alpha1) 471 derphi_a0 = derphi_a1 472 473 else: 474 # stopping test maxiter reached 475 alpha_star = alpha1 476 phi_star = phi_a1 477 derphi_star = None 478 warn('The line search algorithm did not converge', LineSearchWarning) 479 480 return alpha_star, phi_star, phi0, derphi_star 481 482 483def _cubicmin(a, fa, fpa, b, fb, c, fc): 484 """ 485 Finds the minimizer for a cubic polynomial that goes through the 486 points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. 487 488 If no minimizer can be found, return None. 489 490 """ 491 # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D 492 493 with np.errstate(divide='raise', over='raise', invalid='raise'): 494 try: 495 C = fpa 496 db = b - a 497 dc = c - a 498 denom = (db * dc) ** 2 * (db - dc) 499 d1 = np.empty((2, 2)) 500 d1[0, 0] = dc ** 2 501 d1[0, 1] = -db ** 2 502 d1[1, 0] = -dc ** 3 503 d1[1, 1] = db ** 3 504 [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, 505 fc - fa - C * dc]).flatten()) 506 A /= denom 507 B /= denom 508 radical = B * B - 3 * A * C 509 xmin = a + (-B + np.sqrt(radical)) / (3 * A) 510 except ArithmeticError: 511 return None 512 if not np.isfinite(xmin): 513 return None 514 return xmin 515 516 517def _quadmin(a, fa, fpa, b, fb): 518 """ 519 Finds the minimizer for a quadratic polynomial that goes through 520 the points (a,fa), (b,fb) with derivative at a of fpa. 521 522 """ 523 # f(x) = B*(x-a)^2 + C*(x-a) + D 524 with np.errstate(divide='raise', over='raise', invalid='raise'): 525 try: 526 D = fa 527 C = fpa 528 db = b - a * 1.0 529 B = (fb - D - C * db) / (db * db) 530 xmin = a - C / (2.0 * B) 531 except ArithmeticError: 532 return None 533 if not np.isfinite(xmin): 534 return None 535 return xmin 536 537 538def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo, 539 phi, derphi, phi0, derphi0, c1, c2, extra_condition): 540 """Zoom stage of approximate linesearch satisfying strong Wolfe conditions. 541 542 Part of the optimization algorithm in `scalar_search_wolfe2`. 543 544 Notes 545 ----- 546 Implements Algorithm 3.6 (zoom) in Wright and Nocedal, 547 'Numerical Optimization', 1999, pp. 61. 548 549 """ 550 551 maxiter = 10 552 i = 0 553 delta1 = 0.2 # cubic interpolant check 554 delta2 = 0.1 # quadratic interpolant check 555 phi_rec = phi0 556 a_rec = 0 557 while True: 558 # interpolate to find a trial step length between a_lo and 559 # a_hi Need to choose interpolation here. Use cubic 560 # interpolation and then if the result is within delta * 561 # dalpha or outside of the interval bounded by a_lo or a_hi 562 # then use quadratic interpolation, if the result is still too 563 # close, then use bisection 564 565 dalpha = a_hi - a_lo 566 if dalpha < 0: 567 a, b = a_hi, a_lo 568 else: 569 a, b = a_lo, a_hi 570 571 # minimizer of cubic interpolant 572 # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi) 573 # 574 # if the result is too close to the end points (or out of the 575 # interval), then use quadratic interpolation with phi_lo, 576 # derphi_lo and phi_hi if the result is still too close to the 577 # end points (or out of the interval) then use bisection 578 579 if (i > 0): 580 cchk = delta1 * dalpha 581 a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, 582 a_rec, phi_rec) 583 if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk): 584 qchk = delta2 * dalpha 585 a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi) 586 if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk): 587 a_j = a_lo + 0.5*dalpha 588 589 # Check new value of a_j 590 591 phi_aj = phi(a_j) 592 if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo): 593 phi_rec = phi_hi 594 a_rec = a_hi 595 a_hi = a_j 596 phi_hi = phi_aj 597 else: 598 derphi_aj = derphi(a_j) 599 if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj): 600 a_star = a_j 601 val_star = phi_aj 602 valprime_star = derphi_aj 603 break 604 if derphi_aj*(a_hi - a_lo) >= 0: 605 phi_rec = phi_hi 606 a_rec = a_hi 607 a_hi = a_lo 608 phi_hi = phi_lo 609 else: 610 phi_rec = phi_lo 611 a_rec = a_lo 612 a_lo = a_j 613 phi_lo = phi_aj 614 derphi_lo = derphi_aj 615 i += 1 616 if (i > maxiter): 617 # Failed to find a conforming step size 618 a_star = None 619 val_star = None 620 valprime_star = None 621 break 622 return a_star, val_star, valprime_star 623 624 625#------------------------------------------------------------------------------ 626# Armijo line and scalar searches 627#------------------------------------------------------------------------------ 628 629def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 630 """Minimize over alpha, the function ``f(xk+alpha pk)``. 631 632 Parameters 633 ---------- 634 f : callable 635 Function to be minimized. 636 xk : array_like 637 Current point. 638 pk : array_like 639 Search direction. 640 gfk : array_like 641 Gradient of `f` at point `xk`. 642 old_fval : float 643 Value of `f` at point `xk`. 644 args : tuple, optional 645 Optional arguments. 646 c1 : float, optional 647 Value to control stopping criterion. 648 alpha0 : scalar, optional 649 Value of `alpha` at start of the optimization. 650 651 Returns 652 ------- 653 alpha 654 f_count 655 f_val_at_alpha 656 657 Notes 658 ----- 659 Uses the interpolation algorithm (Armijo backtracking) as suggested by 660 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 661 662 """ 663 xk = np.atleast_1d(xk) 664 fc = [0] 665 666 def phi(alpha1): 667 fc[0] += 1 668 return f(xk + alpha1*pk, *args) 669 670 if old_fval is None: 671 phi0 = phi(0.) 672 else: 673 phi0 = old_fval # compute f(xk) -- done in past loop 674 675 derphi0 = np.dot(gfk, pk) 676 alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, 677 alpha0=alpha0) 678 return alpha, fc[0], phi1 679 680 681def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): 682 """ 683 Compatibility wrapper for `line_search_armijo` 684 """ 685 r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1, 686 alpha0=alpha0) 687 return r[0], r[1], 0, r[2] 688 689 690def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0): 691 """Minimize over alpha, the function ``phi(alpha)``. 692 693 Uses the interpolation algorithm (Armijo backtracking) as suggested by 694 Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 695 696 alpha > 0 is assumed to be a descent direction. 697 698 Returns 699 ------- 700 alpha 701 phi1 702 703 """ 704 phi_a0 = phi(alpha0) 705 if phi_a0 <= phi0 + c1*alpha0*derphi0: 706 return alpha0, phi_a0 707 708 # Otherwise, compute the minimizer of a quadratic interpolant: 709 710 alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) 711 phi_a1 = phi(alpha1) 712 713 if (phi_a1 <= phi0 + c1*alpha1*derphi0): 714 return alpha1, phi_a1 715 716 # Otherwise, loop with cubic interpolation until we find an alpha which 717 # satisfies the first Wolfe condition (since we are backtracking, we will 718 # assume that the value of alpha is not too small and satisfies the second 719 # condition. 720 721 while alpha1 > amin: # we are assuming alpha>0 is a descent direction 722 factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) 723 a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ 724 alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) 725 a = a / factor 726 b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ 727 alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) 728 b = b / factor 729 730 alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) 731 phi_a2 = phi(alpha2) 732 733 if (phi_a2 <= phi0 + c1*alpha2*derphi0): 734 return alpha2, phi_a2 735 736 if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: 737 alpha2 = alpha1 / 2.0 738 739 alpha0 = alpha1 740 alpha1 = alpha2 741 phi_a0 = phi_a1 742 phi_a1 = phi_a2 743 744 # Failed to find a suitable step length 745 return None, phi_a1 746 747 748#------------------------------------------------------------------------------ 749# Non-monotone line search for DF-SANE 750#------------------------------------------------------------------------------ 751 752def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta, 753 gamma=1e-4, tau_min=0.1, tau_max=0.5): 754 """ 755 Nonmonotone backtracking line search as described in [1]_ 756 757 Parameters 758 ---------- 759 f : callable 760 Function returning a tuple ``(f, F)`` where ``f`` is the value 761 of a merit function and ``F`` the residual. 762 x_k : ndarray 763 Initial position. 764 d : ndarray 765 Search direction. 766 prev_fs : float 767 List of previous merit function values. Should have ``len(prev_fs) <= M`` 768 where ``M`` is the nonmonotonicity window parameter. 769 eta : float 770 Allowed merit function increase, see [1]_ 771 gamma, tau_min, tau_max : float, optional 772 Search parameters, see [1]_ 773 774 Returns 775 ------- 776 alpha : float 777 Step length 778 xp : ndarray 779 Next position 780 fp : float 781 Merit function value at next position 782 Fp : ndarray 783 Residual at next position 784 785 References 786 ---------- 787 [1] "Spectral residual method without gradient information for solving 788 large-scale nonlinear systems of equations." W. La Cruz, 789 J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006). 790 791 """ 792 f_k = prev_fs[-1] 793 f_bar = max(prev_fs) 794 795 alpha_p = 1 796 alpha_m = 1 797 alpha = 1 798 799 while True: 800 xp = x_k + alpha_p * d 801 fp, Fp = f(xp) 802 803 if fp <= f_bar + eta - gamma * alpha_p**2 * f_k: 804 alpha = alpha_p 805 break 806 807 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 808 809 xp = x_k - alpha_m * d 810 fp, Fp = f(xp) 811 812 if fp <= f_bar + eta - gamma * alpha_m**2 * f_k: 813 alpha = -alpha_m 814 break 815 816 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 817 818 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 819 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 820 821 return alpha, xp, fp, Fp 822 823 824def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta, 825 gamma=1e-4, tau_min=0.1, tau_max=0.5, 826 nu=0.85): 827 """ 828 Nonmonotone line search from [1] 829 830 Parameters 831 ---------- 832 f : callable 833 Function returning a tuple ``(f, F)`` where ``f`` is the value 834 of a merit function and ``F`` the residual. 835 x_k : ndarray 836 Initial position. 837 d : ndarray 838 Search direction. 839 f_k : float 840 Initial merit function value. 841 C, Q : float 842 Control parameters. On the first iteration, give values 843 Q=1.0, C=f_k 844 eta : float 845 Allowed merit function increase, see [1]_ 846 nu, gamma, tau_min, tau_max : float, optional 847 Search parameters, see [1]_ 848 849 Returns 850 ------- 851 alpha : float 852 Step length 853 xp : ndarray 854 Next position 855 fp : float 856 Merit function value at next position 857 Fp : ndarray 858 Residual at next position 859 C : float 860 New value for the control parameter C 861 Q : float 862 New value for the control parameter Q 863 864 References 865 ---------- 866 .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line 867 search and its application to the spectral residual 868 method'', IMA J. Numer. Anal. 29, 814 (2009). 869 870 """ 871 alpha_p = 1 872 alpha_m = 1 873 alpha = 1 874 875 while True: 876 xp = x_k + alpha_p * d 877 fp, Fp = f(xp) 878 879 if fp <= C + eta - gamma * alpha_p**2 * f_k: 880 alpha = alpha_p 881 break 882 883 alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) 884 885 xp = x_k - alpha_m * d 886 fp, Fp = f(xp) 887 888 if fp <= C + eta - gamma * alpha_m**2 * f_k: 889 alpha = -alpha_m 890 break 891 892 alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) 893 894 alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) 895 alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) 896 897 # Update C and Q 898 Q_next = nu * Q + 1 899 C = (nu * Q * (C + eta) + fp) / Q_next 900 Q = Q_next 901 902 return alpha, xp, fp, Fp, C, Q 903