1#__docformat__ = "restructuredtext en" 2# ******NOTICE*************** 3# optimize.py module by Travis E. Oliphant 4# 5# You may copy and use this module as you see fit with no 6# guarantee implied provided you keep this notice in all copies. 7# *****END NOTICE************ 8 9# A collection of optimization algorithms. Version 0.5 10# CHANGES 11# Added fminbound (July 2001) 12# Added brute (Aug. 2002) 13# Finished line search satisfying strong Wolfe conditions (Mar. 2004) 14# Updated strong Wolfe conditions line search to use 15# cubic-interpolation (Mar. 2004) 16 17 18# Minimization routines 19 20__all__ = ['fmin', 'fmin_powell', 'fmin_bfgs', 'fmin_ncg', 'fmin_cg', 21 'fminbound', 'brent', 'golden', 'bracket', 'rosen', 'rosen_der', 22 'rosen_hess', 'rosen_hess_prod', 'brute', 'approx_fprime', 23 'line_search', 'check_grad', 'OptimizeResult', 'show_options', 24 'OptimizeWarning'] 25 26__docformat__ = "restructuredtext en" 27 28import warnings 29import sys 30from numpy import (atleast_1d, eye, argmin, zeros, shape, squeeze, 31 asarray, sqrt, Inf, asfarray, isinf) 32import numpy as np 33from .linesearch import (line_search_wolfe1, line_search_wolfe2, 34 line_search_wolfe2 as line_search, 35 LineSearchWarning) 36from ._numdiff import approx_derivative 37from scipy._lib._util import getfullargspec_no_self as _getfullargspec 38from scipy._lib._util import MapWrapper 39from scipy.optimize._differentiable_functions import ScalarFunction, FD_METHODS 40 41 42# standard status messages of optimizers 43_status_message = {'success': 'Optimization terminated successfully.', 44 'maxfev': 'Maximum number of function evaluations has ' 45 'been exceeded.', 46 'maxiter': 'Maximum number of iterations has been ' 47 'exceeded.', 48 'pr_loss': 'Desired error not necessarily achieved due ' 49 'to precision loss.', 50 'nan': 'NaN result encountered.', 51 'out_of_bounds': 'The result is outside of the provided ' 52 'bounds.'} 53 54 55class MemoizeJac: 56 """ Decorator that caches the return values of a function returning `(fun, grad)` 57 each time it is called. """ 58 59 def __init__(self, fun): 60 self.fun = fun 61 self.jac = None 62 self._value = None 63 self.x = None 64 65 def _compute_if_needed(self, x, *args): 66 if not np.all(x == self.x) or self._value is None or self.jac is None: 67 self.x = np.asarray(x).copy() 68 fg = self.fun(x, *args) 69 self.jac = fg[1] 70 self._value = fg[0] 71 72 def __call__(self, x, *args): 73 """ returns the the function value """ 74 self._compute_if_needed(x, *args) 75 return self._value 76 77 def derivative(self, x, *args): 78 self._compute_if_needed(x, *args) 79 return self.jac 80 81 82class OptimizeResult(dict): 83 """ Represents the optimization result. 84 85 Attributes 86 ---------- 87 x : ndarray 88 The solution of the optimization. 89 success : bool 90 Whether or not the optimizer exited successfully. 91 status : int 92 Termination status of the optimizer. Its value depends on the 93 underlying solver. Refer to `message` for details. 94 message : str 95 Description of the cause of the termination. 96 fun, jac, hess: ndarray 97 Values of objective function, its Jacobian and its Hessian (if 98 available). The Hessians may be approximations, see the documentation 99 of the function in question. 100 hess_inv : object 101 Inverse of the objective function's Hessian; may be an approximation. 102 Not available for all solvers. The type of this attribute may be 103 either np.ndarray or scipy.sparse.linalg.LinearOperator. 104 nfev, njev, nhev : int 105 Number of evaluations of the objective functions and of its 106 Jacobian and Hessian. 107 nit : int 108 Number of iterations performed by the optimizer. 109 maxcv : float 110 The maximum constraint violation. 111 112 Notes 113 ----- 114 There may be additional attributes not listed above depending of the 115 specific solver. Since this class is essentially a subclass of dict 116 with attribute accessors, one can see which attributes are available 117 using the `keys()` method. 118 """ 119 120 def __getattr__(self, name): 121 try: 122 return self[name] 123 except KeyError as e: 124 raise AttributeError(name) from e 125 126 __setattr__ = dict.__setitem__ 127 __delattr__ = dict.__delitem__ 128 129 def __repr__(self): 130 if self.keys(): 131 m = max(map(len, list(self.keys()))) + 1 132 return '\n'.join([k.rjust(m) + ': ' + repr(v) 133 for k, v in sorted(self.items())]) 134 else: 135 return self.__class__.__name__ + "()" 136 137 def __dir__(self): 138 return list(self.keys()) 139 140 141class OptimizeWarning(UserWarning): 142 pass 143 144 145def _check_unknown_options(unknown_options): 146 if unknown_options: 147 msg = ", ".join(map(str, unknown_options.keys())) 148 # Stack level 4: this is called from _minimize_*, which is 149 # called from another function in SciPy. Level 4 is the first 150 # level in user code. 151 warnings.warn("Unknown solver options: %s" % msg, OptimizeWarning, 4) 152 153 154def is_array_scalar(x): 155 """Test whether `x` is either a scalar or an array scalar. 156 157 """ 158 return np.size(x) == 1 159 160 161_epsilon = sqrt(np.finfo(float).eps) 162 163 164def vecnorm(x, ord=2): 165 if ord == Inf: 166 return np.amax(np.abs(x)) 167 elif ord == -Inf: 168 return np.amin(np.abs(x)) 169 else: 170 return np.sum(np.abs(x)**ord, axis=0)**(1.0 / ord) 171 172 173def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None, 174 epsilon=None, finite_diff_rel_step=None, 175 hess=None): 176 """ 177 Creates a ScalarFunction object for use with scalar minimizers 178 (BFGS/LBFGSB/SLSQP/TNC/CG/etc). 179 180 Parameters 181 ---------- 182 fun : callable 183 The objective function to be minimized. 184 185 ``fun(x, *args) -> float`` 186 187 where ``x`` is an 1-D array with shape (n,) and ``args`` 188 is a tuple of the fixed parameters needed to completely 189 specify the function. 190 x0 : ndarray, shape (n,) 191 Initial guess. Array of real elements of size (n,), 192 where 'n' is the number of independent variables. 193 jac : {callable, '2-point', '3-point', 'cs', None}, optional 194 Method for computing the gradient vector. If it is a callable, it 195 should be a function that returns the gradient vector: 196 197 ``jac(x, *args) -> array_like, shape (n,)`` 198 199 If one of `{'2-point', '3-point', 'cs'}` is selected then the gradient 200 is calculated with a relative step for finite differences. If `None`, 201 then two-point finite differences with an absolute step is used. 202 args : tuple, optional 203 Extra arguments passed to the objective function and its 204 derivatives (`fun`, `jac` functions). 205 bounds : sequence, optional 206 Bounds on variables. 'new-style' bounds are required. 207 eps : float or ndarray 208 If `jac is None` the absolute step size used for numerical 209 approximation of the jacobian via forward differences. 210 finite_diff_rel_step : None or array_like, optional 211 If `jac in ['2-point', '3-point', 'cs']` the relative step size to 212 use for numerical approximation of the jacobian. The absolute step 213 size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``, 214 possibly adjusted to fit into the bounds. For ``method='3-point'`` 215 the sign of `h` is ignored. If None (default) then step is selected 216 automatically. 217 hess : {callable, '2-point', '3-point', 'cs', None} 218 Computes the Hessian matrix. If it is callable, it should return the 219 Hessian matrix: 220 221 ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)`` 222 223 Alternatively, the keywords {'2-point', '3-point', 'cs'} select a 224 finite difference scheme for numerical estimation. 225 Whenever the gradient is estimated via finite-differences, the Hessian 226 cannot be estimated with options {'2-point', '3-point', 'cs'} and needs 227 to be estimated using one of the quasi-Newton strategies. 228 229 Returns 230 ------- 231 sf : ScalarFunction 232 """ 233 if callable(jac): 234 grad = jac 235 elif jac in FD_METHODS: 236 # epsilon is set to None so that ScalarFunction is made to use 237 # rel_step 238 epsilon = None 239 grad = jac 240 else: 241 # default (jac is None) is to do 2-point finite differences with 242 # absolute step size. ScalarFunction has to be provided an 243 # epsilon value that is not None to use absolute steps. This is 244 # normally the case from most _minimize* methods. 245 grad = '2-point' 246 epsilon = epsilon 247 248 if hess is None: 249 # ScalarFunction requires something for hess, so we give a dummy 250 # implementation here if nothing is provided, return a value of None 251 # so that downstream minimisers halt. The results of `fun.hess` 252 # should not be used. 253 def hess(x, *args): 254 return None 255 256 if bounds is None: 257 bounds = (-np.inf, np.inf) 258 259 # ScalarFunction caches. Reuse of fun(x) during grad 260 # calculation reduces overall function evaluations. 261 sf = ScalarFunction(fun, x0, args, grad, hess, 262 finite_diff_rel_step, bounds, epsilon=epsilon) 263 264 return sf 265 266 267def _clip_x_for_func(func, bounds): 268 # ensures that x values sent to func are clipped to bounds 269 270 # this is used as a mitigation for gh11403, slsqp/tnc sometimes 271 # suggest a move that is outside the limits by 1 or 2 ULP. This 272 # unclean fix makes sure x is strictly within bounds. 273 def eval(x): 274 x = _check_clip_x(x, bounds) 275 return func(x) 276 277 return eval 278 279 280def _check_clip_x(x, bounds): 281 if (x < bounds[0]).any() or (x > bounds[1]).any(): 282 warnings.warn("Values in x were outside bounds during a " 283 "minimize step, clipping to bounds", RuntimeWarning) 284 x = np.clip(x, bounds[0], bounds[1]) 285 return x 286 287 return x 288 289 290def rosen(x): 291 """ 292 The Rosenbrock function. 293 294 The function computed is:: 295 296 sum(100.0*(x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0) 297 298 Parameters 299 ---------- 300 x : array_like 301 1-D array of points at which the Rosenbrock function is to be computed. 302 303 Returns 304 ------- 305 f : float 306 The value of the Rosenbrock function. 307 308 See Also 309 -------- 310 rosen_der, rosen_hess, rosen_hess_prod 311 312 Examples 313 -------- 314 >>> from scipy.optimize import rosen 315 >>> X = 0.1 * np.arange(10) 316 >>> rosen(X) 317 76.56 318 319 For higher-dimensional input ``rosen`` broadcasts. 320 In the following example, we use this to plot a 2D landscape. 321 Note that ``rosen_hess`` does not broadcast in this manner. 322 323 >>> import matplotlib.pyplot as plt 324 >>> from mpl_toolkits.mplot3d import Axes3D 325 >>> x = np.linspace(-1, 1, 50) 326 >>> X, Y = np.meshgrid(x, x) 327 >>> ax = plt.subplot(111, projection='3d') 328 >>> ax.plot_surface(X, Y, rosen([X, Y])) 329 >>> plt.show() 330 """ 331 x = asarray(x) 332 r = np.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0, 333 axis=0) 334 return r 335 336 337def rosen_der(x): 338 """ 339 The derivative (i.e. gradient) of the Rosenbrock function. 340 341 Parameters 342 ---------- 343 x : array_like 344 1-D array of points at which the derivative is to be computed. 345 346 Returns 347 ------- 348 rosen_der : (N,) ndarray 349 The gradient of the Rosenbrock function at `x`. 350 351 See Also 352 -------- 353 rosen, rosen_hess, rosen_hess_prod 354 355 Examples 356 -------- 357 >>> from scipy.optimize import rosen_der 358 >>> X = 0.1 * np.arange(9) 359 >>> rosen_der(X) 360 array([ -2. , 10.6, 15.6, 13.4, 6.4, -3. , -12.4, -19.4, 62. ]) 361 362 """ 363 x = asarray(x) 364 xm = x[1:-1] 365 xm_m1 = x[:-2] 366 xm_p1 = x[2:] 367 der = np.zeros_like(x) 368 der[1:-1] = (200 * (xm - xm_m1**2) - 369 400 * (xm_p1 - xm**2) * xm - 2 * (1 - xm)) 370 der[0] = -400 * x[0] * (x[1] - x[0]**2) - 2 * (1 - x[0]) 371 der[-1] = 200 * (x[-1] - x[-2]**2) 372 return der 373 374 375def rosen_hess(x): 376 """ 377 The Hessian matrix of the Rosenbrock function. 378 379 Parameters 380 ---------- 381 x : array_like 382 1-D array of points at which the Hessian matrix is to be computed. 383 384 Returns 385 ------- 386 rosen_hess : ndarray 387 The Hessian matrix of the Rosenbrock function at `x`. 388 389 See Also 390 -------- 391 rosen, rosen_der, rosen_hess_prod 392 393 Examples 394 -------- 395 >>> from scipy.optimize import rosen_hess 396 >>> X = 0.1 * np.arange(4) 397 >>> rosen_hess(X) 398 array([[-38., 0., 0., 0.], 399 [ 0., 134., -40., 0.], 400 [ 0., -40., 130., -80.], 401 [ 0., 0., -80., 200.]]) 402 403 """ 404 x = atleast_1d(x) 405 H = np.diag(-400 * x[:-1], 1) - np.diag(400 * x[:-1], -1) 406 diagonal = np.zeros(len(x), dtype=x.dtype) 407 diagonal[0] = 1200 * x[0]**2 - 400 * x[1] + 2 408 diagonal[-1] = 200 409 diagonal[1:-1] = 202 + 1200 * x[1:-1]**2 - 400 * x[2:] 410 H = H + np.diag(diagonal) 411 return H 412 413 414def rosen_hess_prod(x, p): 415 """ 416 Product of the Hessian matrix of the Rosenbrock function with a vector. 417 418 Parameters 419 ---------- 420 x : array_like 421 1-D array of points at which the Hessian matrix is to be computed. 422 p : array_like 423 1-D array, the vector to be multiplied by the Hessian matrix. 424 425 Returns 426 ------- 427 rosen_hess_prod : ndarray 428 The Hessian matrix of the Rosenbrock function at `x` multiplied 429 by the vector `p`. 430 431 See Also 432 -------- 433 rosen, rosen_der, rosen_hess 434 435 Examples 436 -------- 437 >>> from scipy.optimize import rosen_hess_prod 438 >>> X = 0.1 * np.arange(9) 439 >>> p = 0.5 * np.arange(9) 440 >>> rosen_hess_prod(X, p) 441 array([ -0., 27., -10., -95., -192., -265., -278., -195., -180.]) 442 443 """ 444 x = atleast_1d(x) 445 Hp = np.zeros(len(x), dtype=x.dtype) 446 Hp[0] = (1200 * x[0]**2 - 400 * x[1] + 2) * p[0] - 400 * x[0] * p[1] 447 Hp[1:-1] = (-400 * x[:-2] * p[:-2] + 448 (202 + 1200 * x[1:-1]**2 - 400 * x[2:]) * p[1:-1] - 449 400 * x[1:-1] * p[2:]) 450 Hp[-1] = -400 * x[-2] * p[-2] + 200*p[-1] 451 return Hp 452 453 454def _wrap_function(function, args): 455 # wraps a minimizer function to count number of evaluations 456 # and to easily provide an args kwd. 457 # A copy of x is sent to the user function (gh13740) 458 ncalls = [0] 459 if function is None: 460 return ncalls, None 461 462 def function_wrapper(x, *wrapper_args): 463 ncalls[0] += 1 464 return function(np.copy(x), *(wrapper_args + args)) 465 466 return ncalls, function_wrapper 467 468 469def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None, 470 full_output=0, disp=1, retall=0, callback=None, initial_simplex=None): 471 """ 472 Minimize a function using the downhill simplex algorithm. 473 474 This algorithm only uses function values, not derivatives or second 475 derivatives. 476 477 Parameters 478 ---------- 479 func : callable func(x,*args) 480 The objective function to be minimized. 481 x0 : ndarray 482 Initial guess. 483 args : tuple, optional 484 Extra arguments passed to func, i.e., ``f(x,*args)``. 485 xtol : float, optional 486 Absolute error in xopt between iterations that is acceptable for 487 convergence. 488 ftol : number, optional 489 Absolute error in func(xopt) between iterations that is acceptable for 490 convergence. 491 maxiter : int, optional 492 Maximum number of iterations to perform. 493 maxfun : number, optional 494 Maximum number of function evaluations to make. 495 full_output : bool, optional 496 Set to True if fopt and warnflag outputs are desired. 497 disp : bool, optional 498 Set to True to print convergence messages. 499 retall : bool, optional 500 Set to True to return list of solutions at each iteration. 501 callback : callable, optional 502 Called after each iteration, as callback(xk), where xk is the 503 current parameter vector. 504 initial_simplex : array_like of shape (N + 1, N), optional 505 Initial simplex. If given, overrides `x0`. 506 ``initial_simplex[j,:]`` should contain the coordinates of 507 the jth vertex of the ``N+1`` vertices in the simplex, where 508 ``N`` is the dimension. 509 510 Returns 511 ------- 512 xopt : ndarray 513 Parameter that minimizes function. 514 fopt : float 515 Value of function at minimum: ``fopt = func(xopt)``. 516 iter : int 517 Number of iterations performed. 518 funcalls : int 519 Number of function calls made. 520 warnflag : int 521 1 : Maximum number of function evaluations made. 522 2 : Maximum number of iterations reached. 523 allvecs : list 524 Solution at each iteration. 525 526 See also 527 -------- 528 minimize: Interface to minimization algorithms for multivariate 529 functions. See the 'Nelder-Mead' `method` in particular. 530 531 Notes 532 ----- 533 Uses a Nelder-Mead simplex algorithm to find the minimum of function of 534 one or more variables. 535 536 This algorithm has a long history of successful use in applications. 537 But it will usually be slower than an algorithm that uses first or 538 second derivative information. In practice, it can have poor 539 performance in high-dimensional problems and is not robust to 540 minimizing complicated functions. Additionally, there currently is no 541 complete theory describing when the algorithm will successfully 542 converge to the minimum, or how fast it will if it does. Both the ftol and 543 xtol criteria must be met for convergence. 544 545 Examples 546 -------- 547 >>> def f(x): 548 ... return x**2 549 550 >>> from scipy import optimize 551 552 >>> minimum = optimize.fmin(f, 1) 553 Optimization terminated successfully. 554 Current function value: 0.000000 555 Iterations: 17 556 Function evaluations: 34 557 >>> minimum[0] 558 -8.8817841970012523e-16 559 560 References 561 ---------- 562 .. [1] Nelder, J.A. and Mead, R. (1965), "A simplex method for function 563 minimization", The Computer Journal, 7, pp. 308-313 564 565 .. [2] Wright, M.H. (1996), "Direct Search Methods: Once Scorned, Now 566 Respectable", in Numerical Analysis 1995, Proceedings of the 567 1995 Dundee Biennial Conference in Numerical Analysis, D.F. 568 Griffiths and G.A. Watson (Eds.), Addison Wesley Longman, 569 Harlow, UK, pp. 191-208. 570 571 """ 572 opts = {'xatol': xtol, 573 'fatol': ftol, 574 'maxiter': maxiter, 575 'maxfev': maxfun, 576 'disp': disp, 577 'return_all': retall, 578 'initial_simplex': initial_simplex} 579 580 res = _minimize_neldermead(func, x0, args, callback=callback, **opts) 581 if full_output: 582 retlist = res['x'], res['fun'], res['nit'], res['nfev'], res['status'] 583 if retall: 584 retlist += (res['allvecs'], ) 585 return retlist 586 else: 587 if retall: 588 return res['x'], res['allvecs'] 589 else: 590 return res['x'] 591 592 593def _minimize_neldermead(func, x0, args=(), callback=None, 594 maxiter=None, maxfev=None, disp=False, 595 return_all=False, initial_simplex=None, 596 xatol=1e-4, fatol=1e-4, adaptive=False, bounds=None, 597 **unknown_options): 598 """ 599 Minimization of scalar function of one or more variables using the 600 Nelder-Mead algorithm. 601 602 Options 603 ------- 604 disp : bool 605 Set to True to print convergence messages. 606 maxiter, maxfev : int 607 Maximum allowed number of iterations and function evaluations. 608 Will default to ``N*200``, where ``N`` is the number of 609 variables, if neither `maxiter` or `maxfev` is set. If both 610 `maxiter` and `maxfev` are set, minimization will stop at the 611 first reached. 612 return_all : bool, optional 613 Set to True to return a list of the best solution at each of the 614 iterations. 615 initial_simplex : array_like of shape (N + 1, N) 616 Initial simplex. If given, overrides `x0`. 617 ``initial_simplex[j,:]`` should contain the coordinates of 618 the jth vertex of the ``N+1`` vertices in the simplex, where 619 ``N`` is the dimension. 620 xatol : float, optional 621 Absolute error in xopt between iterations that is acceptable for 622 convergence. 623 fatol : number, optional 624 Absolute error in func(xopt) between iterations that is acceptable for 625 convergence. 626 adaptive : bool, optional 627 Adapt algorithm parameters to dimensionality of problem. Useful for 628 high-dimensional minimization [1]_. 629 bounds : sequence or `Bounds`, optional 630 Bounds on variables. There are two ways to specify the bounds: 631 632 1. Instance of `Bounds` class. 633 2. Sequence of ``(min, max)`` pairs for each element in `x`. None 634 is used to specify no bound. 635 636 Note that this just clips all vertices in simplex based on 637 the bounds. 638 639 References 640 ---------- 641 .. [1] Gao, F. and Han, L. 642 Implementing the Nelder-Mead simplex algorithm with adaptive 643 parameters. 2012. Computational Optimization and Applications. 644 51:1, pp. 259-277 645 646 """ 647 if 'ftol' in unknown_options: 648 warnings.warn("ftol is deprecated for Nelder-Mead," 649 " use fatol instead. If you specified both, only" 650 " fatol is used.", 651 DeprecationWarning) 652 if (np.isclose(fatol, 1e-4) and 653 not np.isclose(unknown_options['ftol'], 1e-4)): 654 # only ftol was probably specified, use it. 655 fatol = unknown_options['ftol'] 656 unknown_options.pop('ftol') 657 if 'xtol' in unknown_options: 658 warnings.warn("xtol is deprecated for Nelder-Mead," 659 " use xatol instead. If you specified both, only" 660 " xatol is used.", 661 DeprecationWarning) 662 if (np.isclose(xatol, 1e-4) and 663 not np.isclose(unknown_options['xtol'], 1e-4)): 664 # only xtol was probably specified, use it. 665 xatol = unknown_options['xtol'] 666 unknown_options.pop('xtol') 667 668 _check_unknown_options(unknown_options) 669 maxfun = maxfev 670 retall = return_all 671 672 fcalls, func = _wrap_function(func, args) 673 674 if adaptive: 675 dim = float(len(x0)) 676 rho = 1 677 chi = 1 + 2/dim 678 psi = 0.75 - 1/(2*dim) 679 sigma = 1 - 1/dim 680 else: 681 rho = 1 682 chi = 2 683 psi = 0.5 684 sigma = 0.5 685 686 nonzdelt = 0.05 687 zdelt = 0.00025 688 689 x0 = asfarray(x0).flatten() 690 691 if bounds is not None: 692 lower_bound, upper_bound = bounds.lb, bounds.ub 693 # check bounds 694 if (lower_bound > upper_bound).any(): 695 raise ValueError("Nelder Mead - one of the lower bounds is greater than an upper bound.") 696 if np.any(lower_bound > x0) or np.any(x0 > upper_bound): 697 warnings.warn("Initial guess is not within the specified bounds", 698 OptimizeWarning, 3) 699 700 if bounds is not None: 701 x0 = np.clip(x0, lower_bound, upper_bound) 702 703 if initial_simplex is None: 704 N = len(x0) 705 706 sim = np.empty((N + 1, N), dtype=x0.dtype) 707 sim[0] = x0 708 for k in range(N): 709 y = np.array(x0, copy=True) 710 if y[k] != 0: 711 y[k] = (1 + nonzdelt)*y[k] 712 else: 713 y[k] = zdelt 714 sim[k + 1] = y 715 else: 716 sim = np.asfarray(initial_simplex).copy() 717 if sim.ndim != 2 or sim.shape[0] != sim.shape[1] + 1: 718 raise ValueError("`initial_simplex` should be an array of shape (N+1,N)") 719 if len(x0) != sim.shape[1]: 720 raise ValueError("Size of `initial_simplex` is not consistent with `x0`") 721 N = sim.shape[1] 722 723 if retall: 724 allvecs = [sim[0]] 725 726 # If neither are set, then set both to default 727 if maxiter is None and maxfun is None: 728 maxiter = N * 200 729 maxfun = N * 200 730 elif maxiter is None: 731 # Convert remaining Nones, to np.inf, unless the other is np.inf, in 732 # which case use the default to avoid unbounded iteration 733 if maxfun == np.inf: 734 maxiter = N * 200 735 else: 736 maxiter = np.inf 737 elif maxfun is None: 738 if maxiter == np.inf: 739 maxfun = N * 200 740 else: 741 maxfun = np.inf 742 743 if bounds is not None: 744 sim = np.clip(sim, lower_bound, upper_bound) 745 746 one2np1 = list(range(1, N + 1)) 747 fsim = np.empty((N + 1,), float) 748 749 for k in range(N + 1): 750 fsim[k] = func(sim[k]) 751 752 ind = np.argsort(fsim) 753 fsim = np.take(fsim, ind, 0) 754 # sort so sim[0,:] has the lowest function value 755 sim = np.take(sim, ind, 0) 756 757 iterations = 1 758 759 while (fcalls[0] < maxfun and iterations < maxiter): 760 if (np.max(np.ravel(np.abs(sim[1:] - sim[0]))) <= xatol and 761 np.max(np.abs(fsim[0] - fsim[1:])) <= fatol): 762 break 763 764 xbar = np.add.reduce(sim[:-1], 0) / N 765 xr = (1 + rho) * xbar - rho * sim[-1] 766 if bounds is not None: 767 xr = np.clip(xr, lower_bound, upper_bound) 768 fxr = func(xr) 769 doshrink = 0 770 771 if fxr < fsim[0]: 772 xe = (1 + rho * chi) * xbar - rho * chi * sim[-1] 773 if bounds is not None: 774 xe = np.clip(xe, lower_bound, upper_bound) 775 fxe = func(xe) 776 777 if fxe < fxr: 778 sim[-1] = xe 779 fsim[-1] = fxe 780 else: 781 sim[-1] = xr 782 fsim[-1] = fxr 783 else: # fsim[0] <= fxr 784 if fxr < fsim[-2]: 785 sim[-1] = xr 786 fsim[-1] = fxr 787 else: # fxr >= fsim[-2] 788 # Perform contraction 789 if fxr < fsim[-1]: 790 xc = (1 + psi * rho) * xbar - psi * rho * sim[-1] 791 if bounds is not None: 792 xc = np.clip(xc, lower_bound, upper_bound) 793 fxc = func(xc) 794 795 if fxc <= fxr: 796 sim[-1] = xc 797 fsim[-1] = fxc 798 else: 799 doshrink = 1 800 else: 801 # Perform an inside contraction 802 xcc = (1 - psi) * xbar + psi * sim[-1] 803 if bounds is not None: 804 xcc = np.clip(xcc, lower_bound, upper_bound) 805 fxcc = func(xcc) 806 807 if fxcc < fsim[-1]: 808 sim[-1] = xcc 809 fsim[-1] = fxcc 810 else: 811 doshrink = 1 812 813 if doshrink: 814 for j in one2np1: 815 sim[j] = sim[0] + sigma * (sim[j] - sim[0]) 816 if bounds is not None: 817 sim[j] = np.clip(sim[j], lower_bound, upper_bound) 818 fsim[j] = func(sim[j]) 819 820 ind = np.argsort(fsim) 821 sim = np.take(sim, ind, 0) 822 fsim = np.take(fsim, ind, 0) 823 if callback is not None: 824 callback(sim[0]) 825 iterations += 1 826 if retall: 827 allvecs.append(sim[0]) 828 829 x = sim[0] 830 fval = np.min(fsim) 831 warnflag = 0 832 833 if fcalls[0] >= maxfun: 834 warnflag = 1 835 msg = _status_message['maxfev'] 836 if disp: 837 print('Warning: ' + msg) 838 elif iterations >= maxiter: 839 warnflag = 2 840 msg = _status_message['maxiter'] 841 if disp: 842 print('Warning: ' + msg) 843 else: 844 msg = _status_message['success'] 845 if disp: 846 print(msg) 847 print(" Current function value: %f" % fval) 848 print(" Iterations: %d" % iterations) 849 print(" Function evaluations: %d" % fcalls[0]) 850 851 result = OptimizeResult(fun=fval, nit=iterations, nfev=fcalls[0], 852 status=warnflag, success=(warnflag == 0), 853 message=msg, x=x, final_simplex=(sim, fsim)) 854 if retall: 855 result['allvecs'] = allvecs 856 return result 857 858 859def approx_fprime(xk, f, epsilon, *args): 860 """Finite-difference approximation of the gradient of a scalar function. 861 862 Parameters 863 ---------- 864 xk : array_like 865 The coordinate vector at which to determine the gradient of `f`. 866 f : callable 867 The function of which to determine the gradient (partial derivatives). 868 Should take `xk` as first argument, other arguments to `f` can be 869 supplied in ``*args``. Should return a scalar, the value of the 870 function at `xk`. 871 epsilon : array_like 872 Increment to `xk` to use for determining the function gradient. 873 If a scalar, uses the same finite difference delta for all partial 874 derivatives. If an array, should contain one value per element of 875 `xk`. 876 \\*args : args, optional 877 Any other arguments that are to be passed to `f`. 878 879 Returns 880 ------- 881 grad : ndarray 882 The partial derivatives of `f` to `xk`. 883 884 See Also 885 -------- 886 check_grad : Check correctness of gradient function against approx_fprime. 887 888 Notes 889 ----- 890 The function gradient is determined by the forward finite difference 891 formula:: 892 893 f(xk[i] + epsilon[i]) - f(xk[i]) 894 f'[i] = --------------------------------- 895 epsilon[i] 896 897 The main use of `approx_fprime` is in scalar function optimizers like 898 `fmin_bfgs`, to determine numerically the Jacobian of a function. 899 900 Examples 901 -------- 902 >>> from scipy import optimize 903 >>> def func(x, c0, c1): 904 ... "Coordinate vector `x` should be an array of size two." 905 ... return c0 * x[0]**2 + c1*x[1]**2 906 907 >>> x = np.ones(2) 908 >>> c0, c1 = (1, 200) 909 >>> eps = np.sqrt(np.finfo(float).eps) 910 >>> optimize.approx_fprime(x, func, [eps, np.sqrt(200) * eps], c0, c1) 911 array([ 2. , 400.00004198]) 912 913 """ 914 xk = np.asarray(xk, float) 915 916 f0 = f(xk, *args) 917 if not np.isscalar(f0): 918 try: 919 f0 = f0.item() 920 except (ValueError, AttributeError) as e: 921 raise ValueError("The user-provided " 922 "objective function must " 923 "return a scalar value.") from e 924 925 return approx_derivative(f, xk, method='2-point', abs_step=epsilon, 926 args=args, f0=f0) 927 928 929def check_grad(func, grad, x0, *args, **kwargs): 930 """Check the correctness of a gradient function by comparing it against a 931 (forward) finite-difference approximation of the gradient. 932 933 Parameters 934 ---------- 935 func : callable ``func(x0, *args)`` 936 Function whose derivative is to be checked. 937 grad : callable ``grad(x0, *args)`` 938 Gradient of `func`. 939 x0 : ndarray 940 Points to check `grad` against forward difference approximation of grad 941 using `func`. 942 args : \\*args, optional 943 Extra arguments passed to `func` and `grad`. 944 epsilon : float, optional 945 Step size used for the finite difference approximation. It defaults to 946 ``sqrt(np.finfo(float).eps)``, which is approximately 1.49e-08. 947 948 Returns 949 ------- 950 err : float 951 The square root of the sum of squares (i.e., the 2-norm) of the 952 difference between ``grad(x0, *args)`` and the finite difference 953 approximation of `grad` using func at the points `x0`. 954 955 See Also 956 -------- 957 approx_fprime 958 959 Examples 960 -------- 961 >>> def func(x): 962 ... return x[0]**2 - 0.5 * x[1]**3 963 >>> def grad(x): 964 ... return [2 * x[0], -1.5 * x[1]**2] 965 >>> from scipy.optimize import check_grad 966 >>> check_grad(func, grad, [1.5, -1.5]) 967 2.9802322387695312e-08 968 969 """ 970 step = kwargs.pop('epsilon', _epsilon) 971 if kwargs: 972 raise ValueError("Unknown keyword arguments: %r" % 973 (list(kwargs.keys()),)) 974 return sqrt(sum((grad(x0, *args) - 975 approx_fprime(x0, func, step, *args))**2)) 976 977 978def approx_fhess_p(x0, p, fprime, epsilon, *args): 979 # calculate fprime(x0) first, as this may be cached by ScalarFunction 980 f1 = fprime(*((x0,) + args)) 981 f2 = fprime(*((x0 + epsilon*p,) + args)) 982 return (f2 - f1) / epsilon 983 984 985class _LineSearchError(RuntimeError): 986 pass 987 988 989def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval, 990 **kwargs): 991 """ 992 Same as line_search_wolfe1, but fall back to line_search_wolfe2 if 993 suitable step length is not found, and raise an exception if a 994 suitable step length is not found. 995 996 Raises 997 ------ 998 _LineSearchError 999 If no suitable step size is found 1000 1001 """ 1002 1003 extra_condition = kwargs.pop('extra_condition', None) 1004 1005 ret = line_search_wolfe1(f, fprime, xk, pk, gfk, 1006 old_fval, old_old_fval, 1007 **kwargs) 1008 1009 if ret[0] is not None and extra_condition is not None: 1010 xp1 = xk + ret[0] * pk 1011 if not extra_condition(ret[0], xp1, ret[3], ret[5]): 1012 # Reject step if extra_condition fails 1013 ret = (None,) 1014 1015 if ret[0] is None: 1016 # line search failed: try different one. 1017 with warnings.catch_warnings(): 1018 warnings.simplefilter('ignore', LineSearchWarning) 1019 kwargs2 = {} 1020 for key in ('c1', 'c2', 'amax'): 1021 if key in kwargs: 1022 kwargs2[key] = kwargs[key] 1023 ret = line_search_wolfe2(f, fprime, xk, pk, gfk, 1024 old_fval, old_old_fval, 1025 extra_condition=extra_condition, 1026 **kwargs2) 1027 1028 if ret[0] is None: 1029 raise _LineSearchError() 1030 1031 return ret 1032 1033 1034def fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, 1035 epsilon=_epsilon, maxiter=None, full_output=0, disp=1, 1036 retall=0, callback=None): 1037 """ 1038 Minimize a function using the BFGS algorithm. 1039 1040 Parameters 1041 ---------- 1042 f : callable ``f(x,*args)`` 1043 Objective function to be minimized. 1044 x0 : ndarray 1045 Initial guess. 1046 fprime : callable ``f'(x,*args)``, optional 1047 Gradient of f. 1048 args : tuple, optional 1049 Extra arguments passed to f and fprime. 1050 gtol : float, optional 1051 Gradient norm must be less than `gtol` before successful termination. 1052 norm : float, optional 1053 Order of norm (Inf is max, -Inf is min) 1054 epsilon : int or ndarray, optional 1055 If `fprime` is approximated, use this value for the step size. 1056 callback : callable, optional 1057 An optional user-supplied function to call after each 1058 iteration. Called as ``callback(xk)``, where ``xk`` is the 1059 current parameter vector. 1060 maxiter : int, optional 1061 Maximum number of iterations to perform. 1062 full_output : bool, optional 1063 If True, return ``fopt``, ``func_calls``, ``grad_calls``, and 1064 ``warnflag`` in addition to ``xopt``. 1065 disp : bool, optional 1066 Print convergence message if True. 1067 retall : bool, optional 1068 Return a list of results at each iteration if True. 1069 1070 Returns 1071 ------- 1072 xopt : ndarray 1073 Parameters which minimize f, i.e., ``f(xopt) == fopt``. 1074 fopt : float 1075 Minimum value. 1076 gopt : ndarray 1077 Value of gradient at minimum, f'(xopt), which should be near 0. 1078 Bopt : ndarray 1079 Value of 1/f''(xopt), i.e., the inverse Hessian matrix. 1080 func_calls : int 1081 Number of function_calls made. 1082 grad_calls : int 1083 Number of gradient calls made. 1084 warnflag : integer 1085 1 : Maximum number of iterations exceeded. 1086 2 : Gradient and/or function calls not changing. 1087 3 : NaN result encountered. 1088 allvecs : list 1089 The value of `xopt` at each iteration. Only returned if `retall` is 1090 True. 1091 1092 Notes 1093 ----- 1094 Optimize the function, `f`, whose gradient is given by `fprime` 1095 using the quasi-Newton method of Broyden, Fletcher, Goldfarb, 1096 and Shanno (BFGS). 1097 1098 See Also 1099 -------- 1100 minimize: Interface to minimization algorithms for multivariate 1101 functions. See ``method='BFGS'`` in particular. 1102 1103 References 1104 ---------- 1105 Wright, and Nocedal 'Numerical Optimization', 1999, p. 198. 1106 1107 Examples 1108 -------- 1109 >>> from scipy.optimize import fmin_bfgs 1110 >>> def quadratic_cost(x, Q): 1111 ... return x @ Q @ x 1112 ... 1113 >>> x0 = np.array([-3, -4]) 1114 >>> cost_weight = np.diag([1., 10.]) 1115 >>> # Note that a trailing comma is necessary for a tuple with single element 1116 >>> fmin_bfgs(quadratic_cost, x0, args=(cost_weight,)) 1117 Optimization terminated successfully. 1118 Current function value: 0.000000 1119 Iterations: 7 # may vary 1120 Function evaluations: 24 # may vary 1121 Gradient evaluations: 8 # may vary 1122 array([ 2.85169950e-06, -4.61820139e-07]) 1123 1124 >>> def quadratic_cost_grad(x, Q): 1125 ... return 2 * Q @ x 1126 ... 1127 >>> fmin_bfgs(quadratic_cost, x0, quadratic_cost_grad, args=(cost_weight,)) 1128 Optimization terminated successfully. 1129 Current function value: 0.000000 1130 Iterations: 7 1131 Function evaluations: 8 1132 Gradient evaluations: 8 1133 array([ 2.85916637e-06, -4.54371951e-07]) 1134 1135 """ 1136 opts = {'gtol': gtol, 1137 'norm': norm, 1138 'eps': epsilon, 1139 'disp': disp, 1140 'maxiter': maxiter, 1141 'return_all': retall} 1142 1143 res = _minimize_bfgs(f, x0, args, fprime, callback=callback, **opts) 1144 1145 if full_output: 1146 retlist = (res['x'], res['fun'], res['jac'], res['hess_inv'], 1147 res['nfev'], res['njev'], res['status']) 1148 if retall: 1149 retlist += (res['allvecs'], ) 1150 return retlist 1151 else: 1152 if retall: 1153 return res['x'], res['allvecs'] 1154 else: 1155 return res['x'] 1156 1157 1158def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None, 1159 gtol=1e-5, norm=Inf, eps=_epsilon, maxiter=None, 1160 disp=False, return_all=False, finite_diff_rel_step=None, 1161 **unknown_options): 1162 """ 1163 Minimization of scalar function of one or more variables using the 1164 BFGS algorithm. 1165 1166 Options 1167 ------- 1168 disp : bool 1169 Set to True to print convergence messages. 1170 maxiter : int 1171 Maximum number of iterations to perform. 1172 gtol : float 1173 Gradient norm must be less than `gtol` before successful 1174 termination. 1175 norm : float 1176 Order of norm (Inf is max, -Inf is min). 1177 eps : float or ndarray 1178 If `jac is None` the absolute step size used for numerical 1179 approximation of the jacobian via forward differences. 1180 return_all : bool, optional 1181 Set to True to return a list of the best solution at each of the 1182 iterations. 1183 finite_diff_rel_step : None or array_like, optional 1184 If `jac in ['2-point', '3-point', 'cs']` the relative step size to 1185 use for numerical approximation of the jacobian. The absolute step 1186 size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``, 1187 possibly adjusted to fit into the bounds. For ``method='3-point'`` 1188 the sign of `h` is ignored. If None (default) then step is selected 1189 automatically. 1190 1191 """ 1192 _check_unknown_options(unknown_options) 1193 retall = return_all 1194 1195 x0 = asarray(x0).flatten() 1196 if x0.ndim == 0: 1197 x0.shape = (1,) 1198 if maxiter is None: 1199 maxiter = len(x0) * 200 1200 1201 sf = _prepare_scalar_function(fun, x0, jac, args=args, epsilon=eps, 1202 finite_diff_rel_step=finite_diff_rel_step) 1203 1204 f = sf.fun 1205 myfprime = sf.grad 1206 1207 old_fval = f(x0) 1208 gfk = myfprime(x0) 1209 1210 if not np.isscalar(old_fval): 1211 try: 1212 old_fval = old_fval.item() 1213 except (ValueError, AttributeError) as e: 1214 raise ValueError("The user-provided " 1215 "objective function must " 1216 "return a scalar value.") from e 1217 1218 k = 0 1219 N = len(x0) 1220 I = np.eye(N, dtype=int) 1221 Hk = I 1222 1223 # Sets the initial step guess to dx ~ 1 1224 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 1225 1226 xk = x0 1227 if retall: 1228 allvecs = [x0] 1229 warnflag = 0 1230 gnorm = vecnorm(gfk, ord=norm) 1231 while (gnorm > gtol) and (k < maxiter): 1232 pk = -np.dot(Hk, gfk) 1233 try: 1234 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ 1235 _line_search_wolfe12(f, myfprime, xk, pk, gfk, 1236 old_fval, old_old_fval, amin=1e-100, amax=1e100) 1237 except _LineSearchError: 1238 # Line search failed to find a better solution. 1239 warnflag = 2 1240 break 1241 1242 xkp1 = xk + alpha_k * pk 1243 if retall: 1244 allvecs.append(xkp1) 1245 sk = xkp1 - xk 1246 xk = xkp1 1247 if gfkp1 is None: 1248 gfkp1 = myfprime(xkp1) 1249 1250 yk = gfkp1 - gfk 1251 gfk = gfkp1 1252 if callback is not None: 1253 callback(xk) 1254 k += 1 1255 gnorm = vecnorm(gfk, ord=norm) 1256 if (gnorm <= gtol): 1257 break 1258 1259 if not np.isfinite(old_fval): 1260 # We correctly found +-Inf as optimal value, or something went 1261 # wrong. 1262 warnflag = 2 1263 break 1264 1265 rhok_inv = np.dot(yk, sk) 1266 # this was handled in numeric, let it remaines for more safety 1267 if rhok_inv == 0.: 1268 rhok = 1000.0 1269 if disp: 1270 print("Divide-by-zero encountered: rhok assumed large") 1271 else: 1272 rhok = 1. / rhok_inv 1273 1274 A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok 1275 A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok 1276 Hk = np.dot(A1, np.dot(Hk, A2)) + (rhok * sk[:, np.newaxis] * 1277 sk[np.newaxis, :]) 1278 1279 fval = old_fval 1280 1281 if warnflag == 2: 1282 msg = _status_message['pr_loss'] 1283 elif k >= maxiter: 1284 warnflag = 1 1285 msg = _status_message['maxiter'] 1286 elif np.isnan(gnorm) or np.isnan(fval) or np.isnan(xk).any(): 1287 warnflag = 3 1288 msg = _status_message['nan'] 1289 else: 1290 msg = _status_message['success'] 1291 1292 if disp: 1293 print("%s%s" % ("Warning: " if warnflag != 0 else "", msg)) 1294 print(" Current function value: %f" % fval) 1295 print(" Iterations: %d" % k) 1296 print(" Function evaluations: %d" % sf.nfev) 1297 print(" Gradient evaluations: %d" % sf.ngev) 1298 1299 result = OptimizeResult(fun=fval, jac=gfk, hess_inv=Hk, nfev=sf.nfev, 1300 njev=sf.ngev, status=warnflag, 1301 success=(warnflag == 0), message=msg, x=xk, 1302 nit=k) 1303 if retall: 1304 result['allvecs'] = allvecs 1305 return result 1306 1307 1308def fmin_cg(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon, 1309 maxiter=None, full_output=0, disp=1, retall=0, callback=None): 1310 """ 1311 Minimize a function using a nonlinear conjugate gradient algorithm. 1312 1313 Parameters 1314 ---------- 1315 f : callable, ``f(x, *args)`` 1316 Objective function to be minimized. Here `x` must be a 1-D array of 1317 the variables that are to be changed in the search for a minimum, and 1318 `args` are the other (fixed) parameters of `f`. 1319 x0 : ndarray 1320 A user-supplied initial estimate of `xopt`, the optimal value of `x`. 1321 It must be a 1-D array of values. 1322 fprime : callable, ``fprime(x, *args)``, optional 1323 A function that returns the gradient of `f` at `x`. Here `x` and `args` 1324 are as described above for `f`. The returned value must be a 1-D array. 1325 Defaults to None, in which case the gradient is approximated 1326 numerically (see `epsilon`, below). 1327 args : tuple, optional 1328 Parameter values passed to `f` and `fprime`. Must be supplied whenever 1329 additional fixed parameters are needed to completely specify the 1330 functions `f` and `fprime`. 1331 gtol : float, optional 1332 Stop when the norm of the gradient is less than `gtol`. 1333 norm : float, optional 1334 Order to use for the norm of the gradient 1335 (``-np.Inf`` is min, ``np.Inf`` is max). 1336 epsilon : float or ndarray, optional 1337 Step size(s) to use when `fprime` is approximated numerically. Can be a 1338 scalar or a 1-D array. Defaults to ``sqrt(eps)``, with eps the 1339 floating point machine precision. Usually ``sqrt(eps)`` is about 1340 1.5e-8. 1341 maxiter : int, optional 1342 Maximum number of iterations to perform. Default is ``200 * len(x0)``. 1343 full_output : bool, optional 1344 If True, return `fopt`, `func_calls`, `grad_calls`, and `warnflag` in 1345 addition to `xopt`. See the Returns section below for additional 1346 information on optional return values. 1347 disp : bool, optional 1348 If True, return a convergence message, followed by `xopt`. 1349 retall : bool, optional 1350 If True, add to the returned values the results of each iteration. 1351 callback : callable, optional 1352 An optional user-supplied function, called after each iteration. 1353 Called as ``callback(xk)``, where ``xk`` is the current value of `x0`. 1354 1355 Returns 1356 ------- 1357 xopt : ndarray 1358 Parameters which minimize f, i.e., ``f(xopt) == fopt``. 1359 fopt : float, optional 1360 Minimum value found, f(xopt). Only returned if `full_output` is True. 1361 func_calls : int, optional 1362 The number of function_calls made. Only returned if `full_output` 1363 is True. 1364 grad_calls : int, optional 1365 The number of gradient calls made. Only returned if `full_output` is 1366 True. 1367 warnflag : int, optional 1368 Integer value with warning status, only returned if `full_output` is 1369 True. 1370 1371 0 : Success. 1372 1373 1 : The maximum number of iterations was exceeded. 1374 1375 2 : Gradient and/or function calls were not changing. May indicate 1376 that precision was lost, i.e., the routine did not converge. 1377 1378 3 : NaN result encountered. 1379 1380 allvecs : list of ndarray, optional 1381 List of arrays, containing the results at each iteration. 1382 Only returned if `retall` is True. 1383 1384 See Also 1385 -------- 1386 minimize : common interface to all `scipy.optimize` algorithms for 1387 unconstrained and constrained minimization of multivariate 1388 functions. It provides an alternative way to call 1389 ``fmin_cg``, by specifying ``method='CG'``. 1390 1391 Notes 1392 ----- 1393 This conjugate gradient algorithm is based on that of Polak and Ribiere 1394 [1]_. 1395 1396 Conjugate gradient methods tend to work better when: 1397 1398 1. `f` has a unique global minimizing point, and no local minima or 1399 other stationary points, 1400 2. `f` is, at least locally, reasonably well approximated by a 1401 quadratic function of the variables, 1402 3. `f` is continuous and has a continuous gradient, 1403 4. `fprime` is not too large, e.g., has a norm less than 1000, 1404 5. The initial guess, `x0`, is reasonably close to `f` 's global 1405 minimizing point, `xopt`. 1406 1407 References 1408 ---------- 1409 .. [1] Wright & Nocedal, "Numerical Optimization", 1999, pp. 120-122. 1410 1411 Examples 1412 -------- 1413 Example 1: seek the minimum value of the expression 1414 ``a*u**2 + b*u*v + c*v**2 + d*u + e*v + f`` for given values 1415 of the parameters and an initial guess ``(u, v) = (0, 0)``. 1416 1417 >>> args = (2, 3, 7, 8, 9, 10) # parameter values 1418 >>> def f(x, *args): 1419 ... u, v = x 1420 ... a, b, c, d, e, f = args 1421 ... return a*u**2 + b*u*v + c*v**2 + d*u + e*v + f 1422 >>> def gradf(x, *args): 1423 ... u, v = x 1424 ... a, b, c, d, e, f = args 1425 ... gu = 2*a*u + b*v + d # u-component of the gradient 1426 ... gv = b*u + 2*c*v + e # v-component of the gradient 1427 ... return np.asarray((gu, gv)) 1428 >>> x0 = np.asarray((0, 0)) # Initial guess. 1429 >>> from scipy import optimize 1430 >>> res1 = optimize.fmin_cg(f, x0, fprime=gradf, args=args) 1431 Optimization terminated successfully. 1432 Current function value: 1.617021 1433 Iterations: 4 1434 Function evaluations: 8 1435 Gradient evaluations: 8 1436 >>> res1 1437 array([-1.80851064, -0.25531915]) 1438 1439 Example 2: solve the same problem using the `minimize` function. 1440 (This `myopts` dictionary shows all of the available options, 1441 although in practice only non-default values would be needed. 1442 The returned value will be a dictionary.) 1443 1444 >>> opts = {'maxiter' : None, # default value. 1445 ... 'disp' : True, # non-default value. 1446 ... 'gtol' : 1e-5, # default value. 1447 ... 'norm' : np.inf, # default value. 1448 ... 'eps' : 1.4901161193847656e-08} # default value. 1449 >>> res2 = optimize.minimize(f, x0, jac=gradf, args=args, 1450 ... method='CG', options=opts) 1451 Optimization terminated successfully. 1452 Current function value: 1.617021 1453 Iterations: 4 1454 Function evaluations: 8 1455 Gradient evaluations: 8 1456 >>> res2.x # minimum found 1457 array([-1.80851064, -0.25531915]) 1458 1459 """ 1460 opts = {'gtol': gtol, 1461 'norm': norm, 1462 'eps': epsilon, 1463 'disp': disp, 1464 'maxiter': maxiter, 1465 'return_all': retall} 1466 1467 res = _minimize_cg(f, x0, args, fprime, callback=callback, **opts) 1468 1469 if full_output: 1470 retlist = res['x'], res['fun'], res['nfev'], res['njev'], res['status'] 1471 if retall: 1472 retlist += (res['allvecs'], ) 1473 return retlist 1474 else: 1475 if retall: 1476 return res['x'], res['allvecs'] 1477 else: 1478 return res['x'] 1479 1480 1481def _minimize_cg(fun, x0, args=(), jac=None, callback=None, 1482 gtol=1e-5, norm=Inf, eps=_epsilon, maxiter=None, 1483 disp=False, return_all=False, finite_diff_rel_step=None, 1484 **unknown_options): 1485 """ 1486 Minimization of scalar function of one or more variables using the 1487 conjugate gradient algorithm. 1488 1489 Options 1490 ------- 1491 disp : bool 1492 Set to True to print convergence messages. 1493 maxiter : int 1494 Maximum number of iterations to perform. 1495 gtol : float 1496 Gradient norm must be less than `gtol` before successful 1497 termination. 1498 norm : float 1499 Order of norm (Inf is max, -Inf is min). 1500 eps : float or ndarray 1501 If `jac is None` the absolute step size used for numerical 1502 approximation of the jacobian via forward differences. 1503 return_all : bool, optional 1504 Set to True to return a list of the best solution at each of the 1505 iterations. 1506 finite_diff_rel_step : None or array_like, optional 1507 If `jac in ['2-point', '3-point', 'cs']` the relative step size to 1508 use for numerical approximation of the jacobian. The absolute step 1509 size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``, 1510 possibly adjusted to fit into the bounds. For ``method='3-point'`` 1511 the sign of `h` is ignored. If None (default) then step is selected 1512 automatically. 1513 """ 1514 _check_unknown_options(unknown_options) 1515 1516 retall = return_all 1517 1518 x0 = asarray(x0).flatten() 1519 if maxiter is None: 1520 maxiter = len(x0) * 200 1521 1522 sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps, 1523 finite_diff_rel_step=finite_diff_rel_step) 1524 1525 f = sf.fun 1526 myfprime = sf.grad 1527 1528 old_fval = f(x0) 1529 gfk = myfprime(x0) 1530 1531 if not np.isscalar(old_fval): 1532 try: 1533 old_fval = old_fval.item() 1534 except (ValueError, AttributeError) as e: 1535 raise ValueError("The user-provided " 1536 "objective function must " 1537 "return a scalar value.") from e 1538 1539 k = 0 1540 xk = x0 1541 # Sets the initial step guess to dx ~ 1 1542 old_old_fval = old_fval + np.linalg.norm(gfk) / 2 1543 1544 if retall: 1545 allvecs = [xk] 1546 warnflag = 0 1547 pk = -gfk 1548 gnorm = vecnorm(gfk, ord=norm) 1549 1550 sigma_3 = 0.01 1551 1552 while (gnorm > gtol) and (k < maxiter): 1553 deltak = np.dot(gfk, gfk) 1554 1555 cached_step = [None] 1556 1557 def polak_ribiere_powell_step(alpha, gfkp1=None): 1558 xkp1 = xk + alpha * pk 1559 if gfkp1 is None: 1560 gfkp1 = myfprime(xkp1) 1561 yk = gfkp1 - gfk 1562 beta_k = max(0, np.dot(yk, gfkp1) / deltak) 1563 pkp1 = -gfkp1 + beta_k * pk 1564 gnorm = vecnorm(gfkp1, ord=norm) 1565 return (alpha, xkp1, pkp1, gfkp1, gnorm) 1566 1567 def descent_condition(alpha, xkp1, fp1, gfkp1): 1568 # Polak-Ribiere+ needs an explicit check of a sufficient 1569 # descent condition, which is not guaranteed by strong Wolfe. 1570 # 1571 # See Gilbert & Nocedal, "Global convergence properties of 1572 # conjugate gradient methods for optimization", 1573 # SIAM J. Optimization 2, 21 (1992). 1574 cached_step[:] = polak_ribiere_powell_step(alpha, gfkp1) 1575 alpha, xk, pk, gfk, gnorm = cached_step 1576 1577 # Accept step if it leads to convergence. 1578 if gnorm <= gtol: 1579 return True 1580 1581 # Accept step if sufficient descent condition applies. 1582 return np.dot(pk, gfk) <= -sigma_3 * np.dot(gfk, gfk) 1583 1584 try: 1585 alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \ 1586 _line_search_wolfe12(f, myfprime, xk, pk, gfk, old_fval, 1587 old_old_fval, c2=0.4, amin=1e-100, amax=1e100, 1588 extra_condition=descent_condition) 1589 except _LineSearchError: 1590 # Line search failed to find a better solution. 1591 warnflag = 2 1592 break 1593 1594 # Reuse already computed results if possible 1595 if alpha_k == cached_step[0]: 1596 alpha_k, xk, pk, gfk, gnorm = cached_step 1597 else: 1598 alpha_k, xk, pk, gfk, gnorm = polak_ribiere_powell_step(alpha_k, gfkp1) 1599 1600 if retall: 1601 allvecs.append(xk) 1602 if callback is not None: 1603 callback(xk) 1604 k += 1 1605 1606 fval = old_fval 1607 if warnflag == 2: 1608 msg = _status_message['pr_loss'] 1609 elif k >= maxiter: 1610 warnflag = 1 1611 msg = _status_message['maxiter'] 1612 elif np.isnan(gnorm) or np.isnan(fval) or np.isnan(xk).any(): 1613 warnflag = 3 1614 msg = _status_message['nan'] 1615 else: 1616 msg = _status_message['success'] 1617 1618 if disp: 1619 print("%s%s" % ("Warning: " if warnflag != 0 else "", msg)) 1620 print(" Current function value: %f" % fval) 1621 print(" Iterations: %d" % k) 1622 print(" Function evaluations: %d" % sf.nfev) 1623 print(" Gradient evaluations: %d" % sf.ngev) 1624 1625 result = OptimizeResult(fun=fval, jac=gfk, nfev=sf.nfev, 1626 njev=sf.ngev, status=warnflag, 1627 success=(warnflag == 0), message=msg, x=xk, 1628 nit=k) 1629 if retall: 1630 result['allvecs'] = allvecs 1631 return result 1632 1633 1634def fmin_ncg(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5, 1635 epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0, 1636 callback=None): 1637 """ 1638 Unconstrained minimization of a function using the Newton-CG method. 1639 1640 Parameters 1641 ---------- 1642 f : callable ``f(x, *args)`` 1643 Objective function to be minimized. 1644 x0 : ndarray 1645 Initial guess. 1646 fprime : callable ``f'(x, *args)`` 1647 Gradient of f. 1648 fhess_p : callable ``fhess_p(x, p, *args)``, optional 1649 Function which computes the Hessian of f times an 1650 arbitrary vector, p. 1651 fhess : callable ``fhess(x, *args)``, optional 1652 Function to compute the Hessian matrix of f. 1653 args : tuple, optional 1654 Extra arguments passed to f, fprime, fhess_p, and fhess 1655 (the same set of extra arguments is supplied to all of 1656 these functions). 1657 epsilon : float or ndarray, optional 1658 If fhess is approximated, use this value for the step size. 1659 callback : callable, optional 1660 An optional user-supplied function which is called after 1661 each iteration. Called as callback(xk), where xk is the 1662 current parameter vector. 1663 avextol : float, optional 1664 Convergence is assumed when the average relative error in 1665 the minimizer falls below this amount. 1666 maxiter : int, optional 1667 Maximum number of iterations to perform. 1668 full_output : bool, optional 1669 If True, return the optional outputs. 1670 disp : bool, optional 1671 If True, print convergence message. 1672 retall : bool, optional 1673 If True, return a list of results at each iteration. 1674 1675 Returns 1676 ------- 1677 xopt : ndarray 1678 Parameters which minimize f, i.e., ``f(xopt) == fopt``. 1679 fopt : float 1680 Value of the function at xopt, i.e., ``fopt = f(xopt)``. 1681 fcalls : int 1682 Number of function calls made. 1683 gcalls : int 1684 Number of gradient calls made. 1685 hcalls : int 1686 Number of Hessian calls made. 1687 warnflag : int 1688 Warnings generated by the algorithm. 1689 1 : Maximum number of iterations exceeded. 1690 2 : Line search failure (precision loss). 1691 3 : NaN result encountered. 1692 allvecs : list 1693 The result at each iteration, if retall is True (see below). 1694 1695 See also 1696 -------- 1697 minimize: Interface to minimization algorithms for multivariate 1698 functions. See the 'Newton-CG' `method` in particular. 1699 1700 Notes 1701 ----- 1702 Only one of `fhess_p` or `fhess` need to be given. If `fhess` 1703 is provided, then `fhess_p` will be ignored. If neither `fhess` 1704 nor `fhess_p` is provided, then the hessian product will be 1705 approximated using finite differences on `fprime`. `fhess_p` 1706 must compute the hessian times an arbitrary vector. If it is not 1707 given, finite-differences on `fprime` are used to compute 1708 it. 1709 1710 Newton-CG methods are also called truncated Newton methods. This 1711 function differs from scipy.optimize.fmin_tnc because 1712 1713 1. scipy.optimize.fmin_ncg is written purely in Python using NumPy 1714 and scipy while scipy.optimize.fmin_tnc calls a C function. 1715 2. scipy.optimize.fmin_ncg is only for unconstrained minimization 1716 while scipy.optimize.fmin_tnc is for unconstrained minimization 1717 or box constrained minimization. (Box constraints give 1718 lower and upper bounds for each variable separately.) 1719 1720 References 1721 ---------- 1722 Wright & Nocedal, 'Numerical Optimization', 1999, p. 140. 1723 1724 """ 1725 opts = {'xtol': avextol, 1726 'eps': epsilon, 1727 'maxiter': maxiter, 1728 'disp': disp, 1729 'return_all': retall} 1730 1731 res = _minimize_newtoncg(f, x0, args, fprime, fhess, fhess_p, 1732 callback=callback, **opts) 1733 1734 if full_output: 1735 retlist = (res['x'], res['fun'], res['nfev'], res['njev'], 1736 res['nhev'], res['status']) 1737 if retall: 1738 retlist += (res['allvecs'], ) 1739 return retlist 1740 else: 1741 if retall: 1742 return res['x'], res['allvecs'] 1743 else: 1744 return res['x'] 1745 1746 1747def _minimize_newtoncg(fun, x0, args=(), jac=None, hess=None, hessp=None, 1748 callback=None, xtol=1e-5, eps=_epsilon, maxiter=None, 1749 disp=False, return_all=False, 1750 **unknown_options): 1751 """ 1752 Minimization of scalar function of one or more variables using the 1753 Newton-CG algorithm. 1754 1755 Note that the `jac` parameter (Jacobian) is required. 1756 1757 Options 1758 ------- 1759 disp : bool 1760 Set to True to print convergence messages. 1761 xtol : float 1762 Average relative error in solution `xopt` acceptable for 1763 convergence. 1764 maxiter : int 1765 Maximum number of iterations to perform. 1766 eps : float or ndarray 1767 If `hessp` is approximated, use this value for the step size. 1768 return_all : bool, optional 1769 Set to True to return a list of the best solution at each of the 1770 iterations. 1771 """ 1772 _check_unknown_options(unknown_options) 1773 if jac is None: 1774 raise ValueError('Jacobian is required for Newton-CG method') 1775 fhess_p = hessp 1776 fhess = hess 1777 avextol = xtol 1778 epsilon = eps 1779 retall = return_all 1780 1781 x0 = asarray(x0).flatten() 1782 # TODO: allow hess to be approximated by FD? 1783 # TODO: add hessp (callable or FD) to ScalarFunction? 1784 sf = _prepare_scalar_function(fun, x0, jac, args=args, epsilon=eps, hess=fhess) 1785 f = sf.fun 1786 fprime = sf.grad 1787 1788 def terminate(warnflag, msg): 1789 if disp: 1790 print(msg) 1791 print(" Current function value: %f" % old_fval) 1792 print(" Iterations: %d" % k) 1793 print(" Function evaluations: %d" % sf.nfev) 1794 print(" Gradient evaluations: %d" % sf.ngev) 1795 print(" Hessian evaluations: %d" % hcalls) 1796 fval = old_fval 1797 result = OptimizeResult(fun=fval, jac=gfk, nfev=sf.nfev, 1798 njev=sf.ngev, nhev=hcalls, status=warnflag, 1799 success=(warnflag == 0), message=msg, x=xk, 1800 nit=k) 1801 if retall: 1802 result['allvecs'] = allvecs 1803 return result 1804 1805 hcalls = 0 1806 if maxiter is None: 1807 maxiter = len(x0)*200 1808 cg_maxiter = 20*len(x0) 1809 1810 xtol = len(x0) * avextol 1811 update = [2 * xtol] 1812 xk = x0 1813 if retall: 1814 allvecs = [xk] 1815 k = 0 1816 gfk = None 1817 old_fval = f(x0) 1818 old_old_fval = None 1819 float64eps = np.finfo(np.float64).eps 1820 while np.add.reduce(np.abs(update)) > xtol: 1821 if k >= maxiter: 1822 msg = "Warning: " + _status_message['maxiter'] 1823 return terminate(1, msg) 1824 # Compute a search direction pk by applying the CG method to 1825 # del2 f(xk) p = - grad f(xk) starting from 0. 1826 b = -fprime(xk) 1827 maggrad = np.add.reduce(np.abs(b)) 1828 eta = np.min([0.5, np.sqrt(maggrad)]) 1829 termcond = eta * maggrad 1830 xsupi = zeros(len(x0), dtype=x0.dtype) 1831 ri = -b 1832 psupi = -ri 1833 i = 0 1834 dri0 = np.dot(ri, ri) 1835 1836 if fhess is not None: # you want to compute hessian once. 1837 A = sf.hess(xk) 1838 hcalls = hcalls + 1 1839 1840 for k2 in range(cg_maxiter): 1841 if np.add.reduce(np.abs(ri)) <= termcond: 1842 break 1843 if fhess is None: 1844 if fhess_p is None: 1845 Ap = approx_fhess_p(xk, psupi, fprime, epsilon) 1846 else: 1847 Ap = fhess_p(xk, psupi, *args) 1848 hcalls = hcalls + 1 1849 else: 1850 Ap = np.dot(A, psupi) 1851 # check curvature 1852 Ap = asarray(Ap).squeeze() # get rid of matrices... 1853 curv = np.dot(psupi, Ap) 1854 if 0 <= curv <= 3 * float64eps: 1855 break 1856 elif curv < 0: 1857 if (i > 0): 1858 break 1859 else: 1860 # fall back to steepest descent direction 1861 xsupi = dri0 / (-curv) * b 1862 break 1863 alphai = dri0 / curv 1864 xsupi = xsupi + alphai * psupi 1865 ri = ri + alphai * Ap 1866 dri1 = np.dot(ri, ri) 1867 betai = dri1 / dri0 1868 psupi = -ri + betai * psupi 1869 i = i + 1 1870 dri0 = dri1 # update np.dot(ri,ri) for next time. 1871 else: 1872 # curvature keeps increasing, bail out 1873 msg = ("Warning: CG iterations didn't converge. The Hessian is not " 1874 "positive definite.") 1875 return terminate(3, msg) 1876 1877 pk = xsupi # search direction is solution to system. 1878 gfk = -b # gradient at xk 1879 1880 try: 1881 alphak, fc, gc, old_fval, old_old_fval, gfkp1 = \ 1882 _line_search_wolfe12(f, fprime, xk, pk, gfk, 1883 old_fval, old_old_fval) 1884 except _LineSearchError: 1885 # Line search failed to find a better solution. 1886 msg = "Warning: " + _status_message['pr_loss'] 1887 return terminate(2, msg) 1888 1889 update = alphak * pk 1890 xk = xk + update # upcast if necessary 1891 if callback is not None: 1892 callback(xk) 1893 if retall: 1894 allvecs.append(xk) 1895 k += 1 1896 else: 1897 if np.isnan(old_fval) or np.isnan(update).any(): 1898 return terminate(3, _status_message['nan']) 1899 1900 msg = _status_message['success'] 1901 return terminate(0, msg) 1902 1903 1904def fminbound(func, x1, x2, args=(), xtol=1e-5, maxfun=500, 1905 full_output=0, disp=1): 1906 """Bounded minimization for scalar functions. 1907 1908 Parameters 1909 ---------- 1910 func : callable f(x,*args) 1911 Objective function to be minimized (must accept and return scalars). 1912 x1, x2 : float or array scalar 1913 The optimization bounds. 1914 args : tuple, optional 1915 Extra arguments passed to function. 1916 xtol : float, optional 1917 The convergence tolerance. 1918 maxfun : int, optional 1919 Maximum number of function evaluations allowed. 1920 full_output : bool, optional 1921 If True, return optional outputs. 1922 disp : int, optional 1923 If non-zero, print messages. 1924 0 : no message printing. 1925 1 : non-convergence notification messages only. 1926 2 : print a message on convergence too. 1927 3 : print iteration results. 1928 1929 1930 Returns 1931 ------- 1932 xopt : ndarray 1933 Parameters (over given interval) which minimize the 1934 objective function. 1935 fval : number 1936 The function value at the minimum point. 1937 ierr : int 1938 An error flag (0 if converged, 1 if maximum number of 1939 function calls reached). 1940 numfunc : int 1941 The number of function calls made. 1942 1943 See also 1944 -------- 1945 minimize_scalar: Interface to minimization algorithms for scalar 1946 univariate functions. See the 'Bounded' `method` in particular. 1947 1948 Notes 1949 ----- 1950 Finds a local minimizer of the scalar function `func` in the 1951 interval x1 < xopt < x2 using Brent's method. (See `brent` 1952 for auto-bracketing.) 1953 1954 Examples 1955 -------- 1956 `fminbound` finds the minimum of the function in the given range. 1957 The following examples illustrate the same 1958 1959 >>> def f(x): 1960 ... return x**2 1961 1962 >>> from scipy import optimize 1963 1964 >>> minimum = optimize.fminbound(f, -1, 2) 1965 >>> minimum 1966 0.0 1967 >>> minimum = optimize.fminbound(f, 1, 2) 1968 >>> minimum 1969 1.0000059608609866 1970 """ 1971 options = {'xatol': xtol, 1972 'maxiter': maxfun, 1973 'disp': disp} 1974 1975 res = _minimize_scalar_bounded(func, (x1, x2), args, **options) 1976 if full_output: 1977 return res['x'], res['fun'], res['status'], res['nfev'] 1978 else: 1979 return res['x'] 1980 1981 1982def _minimize_scalar_bounded(func, bounds, args=(), 1983 xatol=1e-5, maxiter=500, disp=0, 1984 **unknown_options): 1985 """ 1986 Options 1987 ------- 1988 maxiter : int 1989 Maximum number of iterations to perform. 1990 disp: int, optional 1991 If non-zero, print messages. 1992 0 : no message printing. 1993 1 : non-convergence notification messages only. 1994 2 : print a message on convergence too. 1995 3 : print iteration results. 1996 xatol : float 1997 Absolute error in solution `xopt` acceptable for convergence. 1998 1999 """ 2000 _check_unknown_options(unknown_options) 2001 maxfun = maxiter 2002 # Test bounds are of correct form 2003 if len(bounds) != 2: 2004 raise ValueError('bounds must have two elements.') 2005 x1, x2 = bounds 2006 2007 if not (is_array_scalar(x1) and is_array_scalar(x2)): 2008 raise ValueError("Optimization bounds must be scalars" 2009 " or array scalars.") 2010 if x1 > x2: 2011 raise ValueError("The lower bound exceeds the upper bound.") 2012 2013 flag = 0 2014 header = ' Func-count x f(x) Procedure' 2015 step = ' initial' 2016 2017 sqrt_eps = sqrt(2.2e-16) 2018 golden_mean = 0.5 * (3.0 - sqrt(5.0)) 2019 a, b = x1, x2 2020 fulc = a + golden_mean * (b - a) 2021 nfc, xf = fulc, fulc 2022 rat = e = 0.0 2023 x = xf 2024 fx = func(x, *args) 2025 num = 1 2026 fmin_data = (1, xf, fx) 2027 fu = np.inf 2028 2029 ffulc = fnfc = fx 2030 xm = 0.5 * (a + b) 2031 tol1 = sqrt_eps * np.abs(xf) + xatol / 3.0 2032 tol2 = 2.0 * tol1 2033 2034 if disp > 2: 2035 print(" ") 2036 print(header) 2037 print("%5.0f %12.6g %12.6g %s" % (fmin_data + (step,))) 2038 2039 while (np.abs(xf - xm) > (tol2 - 0.5 * (b - a))): 2040 golden = 1 2041 # Check for parabolic fit 2042 if np.abs(e) > tol1: 2043 golden = 0 2044 r = (xf - nfc) * (fx - ffulc) 2045 q = (xf - fulc) * (fx - fnfc) 2046 p = (xf - fulc) * q - (xf - nfc) * r 2047 q = 2.0 * (q - r) 2048 if q > 0.0: 2049 p = -p 2050 q = np.abs(q) 2051 r = e 2052 e = rat 2053 2054 # Check for acceptability of parabola 2055 if ((np.abs(p) < np.abs(0.5*q*r)) and (p > q*(a - xf)) and 2056 (p < q * (b - xf))): 2057 rat = (p + 0.0) / q 2058 x = xf + rat 2059 step = ' parabolic' 2060 2061 if ((x - a) < tol2) or ((b - x) < tol2): 2062 si = np.sign(xm - xf) + ((xm - xf) == 0) 2063 rat = tol1 * si 2064 else: # do a golden-section step 2065 golden = 1 2066 2067 if golden: # do a golden-section step 2068 if xf >= xm: 2069 e = a - xf 2070 else: 2071 e = b - xf 2072 rat = golden_mean*e 2073 step = ' golden' 2074 2075 si = np.sign(rat) + (rat == 0) 2076 x = xf + si * np.maximum(np.abs(rat), tol1) 2077 fu = func(x, *args) 2078 num += 1 2079 fmin_data = (num, x, fu) 2080 if disp > 2: 2081 print("%5.0f %12.6g %12.6g %s" % (fmin_data + (step,))) 2082 2083 if fu <= fx: 2084 if x >= xf: 2085 a = xf 2086 else: 2087 b = xf 2088 fulc, ffulc = nfc, fnfc 2089 nfc, fnfc = xf, fx 2090 xf, fx = x, fu 2091 else: 2092 if x < xf: 2093 a = x 2094 else: 2095 b = x 2096 if (fu <= fnfc) or (nfc == xf): 2097 fulc, ffulc = nfc, fnfc 2098 nfc, fnfc = x, fu 2099 elif (fu <= ffulc) or (fulc == xf) or (fulc == nfc): 2100 fulc, ffulc = x, fu 2101 2102 xm = 0.5 * (a + b) 2103 tol1 = sqrt_eps * np.abs(xf) + xatol / 3.0 2104 tol2 = 2.0 * tol1 2105 2106 if num >= maxfun: 2107 flag = 1 2108 break 2109 2110 if np.isnan(xf) or np.isnan(fx) or np.isnan(fu): 2111 flag = 2 2112 2113 fval = fx 2114 if disp > 0: 2115 _endprint(x, flag, fval, maxfun, xatol, disp) 2116 2117 result = OptimizeResult(fun=fval, status=flag, success=(flag == 0), 2118 message={0: 'Solution found.', 2119 1: 'Maximum number of function calls ' 2120 'reached.', 2121 2: _status_message['nan']}.get(flag, ''), 2122 x=xf, nfev=num) 2123 2124 return result 2125 2126 2127class Brent: 2128 #need to rethink design of __init__ 2129 def __init__(self, func, args=(), tol=1.48e-8, maxiter=500, 2130 full_output=0): 2131 self.func = func 2132 self.args = args 2133 self.tol = tol 2134 self.maxiter = maxiter 2135 self._mintol = 1.0e-11 2136 self._cg = 0.3819660 2137 self.xmin = None 2138 self.fval = None 2139 self.iter = 0 2140 self.funcalls = 0 2141 2142 # need to rethink design of set_bracket (new options, etc.) 2143 def set_bracket(self, brack=None): 2144 self.brack = brack 2145 2146 def get_bracket_info(self): 2147 #set up 2148 func = self.func 2149 args = self.args 2150 brack = self.brack 2151 ### BEGIN core bracket_info code ### 2152 ### carefully DOCUMENT any CHANGES in core ## 2153 if brack is None: 2154 xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args) 2155 elif len(brack) == 2: 2156 xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0], 2157 xb=brack[1], args=args) 2158 elif len(brack) == 3: 2159 xa, xb, xc = brack 2160 if (xa > xc): # swap so xa < xc can be assumed 2161 xc, xa = xa, xc 2162 if not ((xa < xb) and (xb < xc)): 2163 raise ValueError("Not a bracketing interval.") 2164 fa = func(*((xa,) + args)) 2165 fb = func(*((xb,) + args)) 2166 fc = func(*((xc,) + args)) 2167 if not ((fb < fa) and (fb < fc)): 2168 raise ValueError("Not a bracketing interval.") 2169 funcalls = 3 2170 else: 2171 raise ValueError("Bracketing interval must be " 2172 "length 2 or 3 sequence.") 2173 ### END core bracket_info code ### 2174 2175 return xa, xb, xc, fa, fb, fc, funcalls 2176 2177 def optimize(self): 2178 # set up for optimization 2179 func = self.func 2180 xa, xb, xc, fa, fb, fc, funcalls = self.get_bracket_info() 2181 _mintol = self._mintol 2182 _cg = self._cg 2183 ################################# 2184 #BEGIN CORE ALGORITHM 2185 ################################# 2186 x = w = v = xb 2187 fw = fv = fx = func(*((x,) + self.args)) 2188 if (xa < xc): 2189 a = xa 2190 b = xc 2191 else: 2192 a = xc 2193 b = xa 2194 deltax = 0.0 2195 funcalls += 1 2196 iter = 0 2197 while (iter < self.maxiter): 2198 tol1 = self.tol * np.abs(x) + _mintol 2199 tol2 = 2.0 * tol1 2200 xmid = 0.5 * (a + b) 2201 # check for convergence 2202 if np.abs(x - xmid) < (tol2 - 0.5 * (b - a)): 2203 break 2204 # XXX In the first iteration, rat is only bound in the true case 2205 # of this conditional. This used to cause an UnboundLocalError 2206 # (gh-4140). It should be set before the if (but to what?). 2207 if (np.abs(deltax) <= tol1): 2208 if (x >= xmid): 2209 deltax = a - x # do a golden section step 2210 else: 2211 deltax = b - x 2212 rat = _cg * deltax 2213 else: # do a parabolic step 2214 tmp1 = (x - w) * (fx - fv) 2215 tmp2 = (x - v) * (fx - fw) 2216 p = (x - v) * tmp2 - (x - w) * tmp1 2217 tmp2 = 2.0 * (tmp2 - tmp1) 2218 if (tmp2 > 0.0): 2219 p = -p 2220 tmp2 = np.abs(tmp2) 2221 dx_temp = deltax 2222 deltax = rat 2223 # check parabolic fit 2224 if ((p > tmp2 * (a - x)) and (p < tmp2 * (b - x)) and 2225 (np.abs(p) < np.abs(0.5 * tmp2 * dx_temp))): 2226 rat = p * 1.0 / tmp2 # if parabolic step is useful. 2227 u = x + rat 2228 if ((u - a) < tol2 or (b - u) < tol2): 2229 if xmid - x >= 0: 2230 rat = tol1 2231 else: 2232 rat = -tol1 2233 else: 2234 if (x >= xmid): 2235 deltax = a - x # if it's not do a golden section step 2236 else: 2237 deltax = b - x 2238 rat = _cg * deltax 2239 2240 if (np.abs(rat) < tol1): # update by at least tol1 2241 if rat >= 0: 2242 u = x + tol1 2243 else: 2244 u = x - tol1 2245 else: 2246 u = x + rat 2247 fu = func(*((u,) + self.args)) # calculate new output value 2248 funcalls += 1 2249 2250 if (fu > fx): # if it's bigger than current 2251 if (u < x): 2252 a = u 2253 else: 2254 b = u 2255 if (fu <= fw) or (w == x): 2256 v = w 2257 w = u 2258 fv = fw 2259 fw = fu 2260 elif (fu <= fv) or (v == x) or (v == w): 2261 v = u 2262 fv = fu 2263 else: 2264 if (u >= x): 2265 a = x 2266 else: 2267 b = x 2268 v = w 2269 w = x 2270 x = u 2271 fv = fw 2272 fw = fx 2273 fx = fu 2274 2275 iter += 1 2276 ################################# 2277 #END CORE ALGORITHM 2278 ################################# 2279 2280 self.xmin = x 2281 self.fval = fx 2282 self.iter = iter 2283 self.funcalls = funcalls 2284 2285 def get_result(self, full_output=False): 2286 if full_output: 2287 return self.xmin, self.fval, self.iter, self.funcalls 2288 else: 2289 return self.xmin 2290 2291 2292def brent(func, args=(), brack=None, tol=1.48e-8, full_output=0, maxiter=500): 2293 """ 2294 Given a function of one variable and a possible bracket, return 2295 the local minimum of the function isolated to a fractional precision 2296 of tol. 2297 2298 Parameters 2299 ---------- 2300 func : callable f(x,*args) 2301 Objective function. 2302 args : tuple, optional 2303 Additional arguments (if present). 2304 brack : tuple, optional 2305 Either a triple (xa,xb,xc) where xa<xb<xc and func(xb) < 2306 func(xa), func(xc) or a pair (xa,xb) which are used as a 2307 starting interval for a downhill bracket search (see 2308 `bracket`). Providing the pair (xa,xb) does not always mean 2309 the obtained solution will satisfy xa<=x<=xb. 2310 tol : float, optional 2311 Stop if between iteration change is less than `tol`. 2312 full_output : bool, optional 2313 If True, return all output args (xmin, fval, iter, 2314 funcalls). 2315 maxiter : int, optional 2316 Maximum number of iterations in solution. 2317 2318 Returns 2319 ------- 2320 xmin : ndarray 2321 Optimum point. 2322 fval : float 2323 Optimum value. 2324 iter : int 2325 Number of iterations. 2326 funcalls : int 2327 Number of objective function evaluations made. 2328 2329 See also 2330 -------- 2331 minimize_scalar: Interface to minimization algorithms for scalar 2332 univariate functions. See the 'Brent' `method` in particular. 2333 2334 Notes 2335 ----- 2336 Uses inverse parabolic interpolation when possible to speed up 2337 convergence of golden section method. 2338 2339 Does not ensure that the minimum lies in the range specified by 2340 `brack`. See `fminbound`. 2341 2342 Examples 2343 -------- 2344 We illustrate the behaviour of the function when `brack` is of 2345 size 2 and 3 respectively. In the case where `brack` is of the 2346 form (xa,xb), we can see for the given values, the output need 2347 not necessarily lie in the range (xa,xb). 2348 2349 >>> def f(x): 2350 ... return x**2 2351 2352 >>> from scipy import optimize 2353 2354 >>> minimum = optimize.brent(f,brack=(1,2)) 2355 >>> minimum 2356 0.0 2357 >>> minimum = optimize.brent(f,brack=(-1,0.5,2)) 2358 >>> minimum 2359 -2.7755575615628914e-17 2360 2361 """ 2362 options = {'xtol': tol, 2363 'maxiter': maxiter} 2364 res = _minimize_scalar_brent(func, brack, args, **options) 2365 if full_output: 2366 return res['x'], res['fun'], res['nit'], res['nfev'] 2367 else: 2368 return res['x'] 2369 2370 2371def _minimize_scalar_brent(func, brack=None, args=(), 2372 xtol=1.48e-8, maxiter=500, 2373 **unknown_options): 2374 """ 2375 Options 2376 ------- 2377 maxiter : int 2378 Maximum number of iterations to perform. 2379 xtol : float 2380 Relative error in solution `xopt` acceptable for convergence. 2381 2382 Notes 2383 ----- 2384 Uses inverse parabolic interpolation when possible to speed up 2385 convergence of golden section method. 2386 2387 """ 2388 _check_unknown_options(unknown_options) 2389 tol = xtol 2390 if tol < 0: 2391 raise ValueError('tolerance should be >= 0, got %r' % tol) 2392 2393 brent = Brent(func=func, args=args, tol=tol, 2394 full_output=True, maxiter=maxiter) 2395 brent.set_bracket(brack) 2396 brent.optimize() 2397 x, fval, nit, nfev = brent.get_result(full_output=True) 2398 2399 success = nit < maxiter and not (np.isnan(x) or np.isnan(fval)) 2400 2401 return OptimizeResult(fun=fval, x=x, nit=nit, nfev=nfev, 2402 success=success) 2403 2404 2405def golden(func, args=(), brack=None, tol=_epsilon, 2406 full_output=0, maxiter=5000): 2407 """ 2408 Return the minimum of a function of one variable using golden section 2409 method. 2410 2411 Given a function of one variable and a possible bracketing interval, 2412 return the minimum of the function isolated to a fractional precision of 2413 tol. 2414 2415 Parameters 2416 ---------- 2417 func : callable func(x,*args) 2418 Objective function to minimize. 2419 args : tuple, optional 2420 Additional arguments (if present), passed to func. 2421 brack : tuple, optional 2422 Triple (a,b,c), where (a<b<c) and func(b) < 2423 func(a),func(c). If bracket consists of two numbers (a, 2424 c), then they are assumed to be a starting interval for a 2425 downhill bracket search (see `bracket`); it doesn't always 2426 mean that obtained solution will satisfy a<=x<=c. 2427 tol : float, optional 2428 x tolerance stop criterion 2429 full_output : bool, optional 2430 If True, return optional outputs. 2431 maxiter : int 2432 Maximum number of iterations to perform. 2433 2434 See also 2435 -------- 2436 minimize_scalar: Interface to minimization algorithms for scalar 2437 univariate functions. See the 'Golden' `method` in particular. 2438 2439 Notes 2440 ----- 2441 Uses analog of bisection method to decrease the bracketed 2442 interval. 2443 2444 Examples 2445 -------- 2446 We illustrate the behaviour of the function when `brack` is of 2447 size 2 and 3, respectively. In the case where `brack` is of the 2448 form (xa,xb), we can see for the given values, the output need 2449 not necessarily lie in the range ``(xa, xb)``. 2450 2451 >>> def f(x): 2452 ... return x**2 2453 2454 >>> from scipy import optimize 2455 2456 >>> minimum = optimize.golden(f, brack=(1, 2)) 2457 >>> minimum 2458 1.5717277788484873e-162 2459 >>> minimum = optimize.golden(f, brack=(-1, 0.5, 2)) 2460 >>> minimum 2461 -1.5717277788484873e-162 2462 2463 """ 2464 options = {'xtol': tol, 'maxiter': maxiter} 2465 res = _minimize_scalar_golden(func, brack, args, **options) 2466 if full_output: 2467 return res['x'], res['fun'], res['nfev'] 2468 else: 2469 return res['x'] 2470 2471 2472def _minimize_scalar_golden(func, brack=None, args=(), 2473 xtol=_epsilon, maxiter=5000, **unknown_options): 2474 """ 2475 Options 2476 ------- 2477 maxiter : int 2478 Maximum number of iterations to perform. 2479 xtol : float 2480 Relative error in solution `xopt` acceptable for convergence. 2481 2482 """ 2483 _check_unknown_options(unknown_options) 2484 tol = xtol 2485 if brack is None: 2486 xa, xb, xc, fa, fb, fc, funcalls = bracket(func, args=args) 2487 elif len(brack) == 2: 2488 xa, xb, xc, fa, fb, fc, funcalls = bracket(func, xa=brack[0], 2489 xb=brack[1], args=args) 2490 elif len(brack) == 3: 2491 xa, xb, xc = brack 2492 if (xa > xc): # swap so xa < xc can be assumed 2493 xc, xa = xa, xc 2494 if not ((xa < xb) and (xb < xc)): 2495 raise ValueError("Not a bracketing interval.") 2496 fa = func(*((xa,) + args)) 2497 fb = func(*((xb,) + args)) 2498 fc = func(*((xc,) + args)) 2499 if not ((fb < fa) and (fb < fc)): 2500 raise ValueError("Not a bracketing interval.") 2501 funcalls = 3 2502 else: 2503 raise ValueError("Bracketing interval must be length 2 or 3 sequence.") 2504 2505 _gR = 0.61803399 # golden ratio conjugate: 2.0/(1.0+sqrt(5.0)) 2506 _gC = 1.0 - _gR 2507 x3 = xc 2508 x0 = xa 2509 if (np.abs(xc - xb) > np.abs(xb - xa)): 2510 x1 = xb 2511 x2 = xb + _gC * (xc - xb) 2512 else: 2513 x2 = xb 2514 x1 = xb - _gC * (xb - xa) 2515 f1 = func(*((x1,) + args)) 2516 f2 = func(*((x2,) + args)) 2517 funcalls += 2 2518 nit = 0 2519 for i in range(maxiter): 2520 if np.abs(x3 - x0) <= tol * (np.abs(x1) + np.abs(x2)): 2521 break 2522 if (f2 < f1): 2523 x0 = x1 2524 x1 = x2 2525 x2 = _gR * x1 + _gC * x3 2526 f1 = f2 2527 f2 = func(*((x2,) + args)) 2528 else: 2529 x3 = x2 2530 x2 = x1 2531 x1 = _gR * x2 + _gC * x0 2532 f2 = f1 2533 f1 = func(*((x1,) + args)) 2534 funcalls += 1 2535 nit += 1 2536 if (f1 < f2): 2537 xmin = x1 2538 fval = f1 2539 else: 2540 xmin = x2 2541 fval = f2 2542 2543 success = nit < maxiter and not (np.isnan(fval) or np.isnan(xmin)) 2544 2545 return OptimizeResult(fun=fval, nfev=funcalls, x=xmin, nit=nit, 2546 success=success) 2547 2548 2549def bracket(func, xa=0.0, xb=1.0, args=(), grow_limit=110.0, maxiter=1000): 2550 """ 2551 Bracket the minimum of the function. 2552 2553 Given a function and distinct initial points, search in the 2554 downhill direction (as defined by the initial points) and return 2555 new points xa, xb, xc that bracket the minimum of the function 2556 f(xa) > f(xb) < f(xc). It doesn't always mean that obtained 2557 solution will satisfy xa<=x<=xb. 2558 2559 Parameters 2560 ---------- 2561 func : callable f(x,*args) 2562 Objective function to minimize. 2563 xa, xb : float, optional 2564 Bracketing interval. Defaults `xa` to 0.0, and `xb` to 1.0. 2565 args : tuple, optional 2566 Additional arguments (if present), passed to `func`. 2567 grow_limit : float, optional 2568 Maximum grow limit. Defaults to 110.0 2569 maxiter : int, optional 2570 Maximum number of iterations to perform. Defaults to 1000. 2571 2572 Returns 2573 ------- 2574 xa, xb, xc : float 2575 Bracket. 2576 fa, fb, fc : float 2577 Objective function values in bracket. 2578 funcalls : int 2579 Number of function evaluations made. 2580 2581 Examples 2582 -------- 2583 This function can find a downward convex region of a function: 2584 2585 >>> import matplotlib.pyplot as plt 2586 >>> from scipy.optimize import bracket 2587 >>> def f(x): 2588 ... return 10*x**2 + 3*x + 5 2589 >>> x = np.linspace(-2, 2) 2590 >>> y = f(x) 2591 >>> init_xa, init_xb = 0, 1 2592 >>> xa, xb, xc, fa, fb, fc, funcalls = bracket(f, xa=init_xa, xb=init_xb) 2593 >>> plt.axvline(x=init_xa, color="k", linestyle="--") 2594 >>> plt.axvline(x=init_xb, color="k", linestyle="--") 2595 >>> plt.plot(x, y, "-k") 2596 >>> plt.plot(xa, fa, "bx") 2597 >>> plt.plot(xb, fb, "rx") 2598 >>> plt.plot(xc, fc, "bx") 2599 >>> plt.show() 2600 2601 """ 2602 _gold = 1.618034 # golden ratio: (1.0+sqrt(5.0))/2.0 2603 _verysmall_num = 1e-21 2604 fa = func(*(xa,) + args) 2605 fb = func(*(xb,) + args) 2606 if (fa < fb): # Switch so fa > fb 2607 xa, xb = xb, xa 2608 fa, fb = fb, fa 2609 xc = xb + _gold * (xb - xa) 2610 fc = func(*((xc,) + args)) 2611 funcalls = 3 2612 iter = 0 2613 while (fc < fb): 2614 tmp1 = (xb - xa) * (fb - fc) 2615 tmp2 = (xb - xc) * (fb - fa) 2616 val = tmp2 - tmp1 2617 if np.abs(val) < _verysmall_num: 2618 denom = 2.0 * _verysmall_num 2619 else: 2620 denom = 2.0 * val 2621 w = xb - ((xb - xc) * tmp2 - (xb - xa) * tmp1) / denom 2622 wlim = xb + grow_limit * (xc - xb) 2623 if iter > maxiter: 2624 raise RuntimeError("Too many iterations.") 2625 iter += 1 2626 if (w - xc) * (xb - w) > 0.0: 2627 fw = func(*((w,) + args)) 2628 funcalls += 1 2629 if (fw < fc): 2630 xa = xb 2631 xb = w 2632 fa = fb 2633 fb = fw 2634 return xa, xb, xc, fa, fb, fc, funcalls 2635 elif (fw > fb): 2636 xc = w 2637 fc = fw 2638 return xa, xb, xc, fa, fb, fc, funcalls 2639 w = xc + _gold * (xc - xb) 2640 fw = func(*((w,) + args)) 2641 funcalls += 1 2642 elif (w - wlim)*(wlim - xc) >= 0.0: 2643 w = wlim 2644 fw = func(*((w,) + args)) 2645 funcalls += 1 2646 elif (w - wlim)*(xc - w) > 0.0: 2647 fw = func(*((w,) + args)) 2648 funcalls += 1 2649 if (fw < fc): 2650 xb = xc 2651 xc = w 2652 w = xc + _gold * (xc - xb) 2653 fb = fc 2654 fc = fw 2655 fw = func(*((w,) + args)) 2656 funcalls += 1 2657 else: 2658 w = xc + _gold * (xc - xb) 2659 fw = func(*((w,) + args)) 2660 funcalls += 1 2661 xa = xb 2662 xb = xc 2663 xc = w 2664 fa = fb 2665 fb = fc 2666 fc = fw 2667 return xa, xb, xc, fa, fb, fc, funcalls 2668 2669 2670def _line_for_search(x0, alpha, lower_bound, upper_bound): 2671 """ 2672 Given a parameter vector ``x0`` with length ``n`` and a direction 2673 vector ``alpha`` with length ``n``, and lower and upper bounds on 2674 each of the ``n`` parameters, what are the bounds on a scalar 2675 ``l`` such that ``lower_bound <= x0 + alpha * l <= upper_bound``. 2676 2677 2678 Parameters 2679 ---------- 2680 x0 : np.array. 2681 The vector representing the current location. 2682 Note ``np.shape(x0) == (n,)``. 2683 alpha : np.array. 2684 The vector representing the direction. 2685 Note ``np.shape(alpha) == (n,)``. 2686 lower_bound : np.array. 2687 The lower bounds for each parameter in ``x0``. If the ``i``th 2688 parameter in ``x0`` is unbounded below, then ``lower_bound[i]`` 2689 should be ``-np.inf``. 2690 Note ``np.shape(lower_bound) == (n,)``. 2691 upper_bound : np.array. 2692 The upper bounds for each parameter in ``x0``. If the ``i``th 2693 parameter in ``x0`` is unbounded above, then ``upper_bound[i]`` 2694 should be ``np.inf``. 2695 Note ``np.shape(upper_bound) == (n,)``. 2696 2697 Returns 2698 ------- 2699 res : tuple ``(lmin, lmax)`` 2700 The bounds for ``l`` such that 2701 ``lower_bound[i] <= x0[i] + alpha[i] * l <= upper_bound[i]`` 2702 for all ``i``. 2703 2704 """ 2705 # get nonzero indices of alpha so we don't get any zero division errors. 2706 # alpha will not be all zero, since it is called from _linesearch_powell 2707 # where we have a check for this. 2708 nonzero, = alpha.nonzero() 2709 lower_bound, upper_bound = lower_bound[nonzero], upper_bound[nonzero] 2710 x0, alpha = x0[nonzero], alpha[nonzero] 2711 low = (lower_bound - x0) / alpha 2712 high = (upper_bound - x0) / alpha 2713 2714 # positive and negative indices 2715 pos = alpha > 0 2716 2717 lmin_pos = np.where(pos, low, 0) 2718 lmin_neg = np.where(pos, 0, high) 2719 lmax_pos = np.where(pos, high, 0) 2720 lmax_neg = np.where(pos, 0, low) 2721 2722 lmin = np.max(lmin_pos + lmin_neg) 2723 lmax = np.min(lmax_pos + lmax_neg) 2724 2725 # if x0 is outside the bounds, then it is possible that there is 2726 # no way to get back in the bounds for the parameters being updated 2727 # with the current direction alpha. 2728 # when this happens, lmax < lmin. 2729 # If this is the case, then we can just return (0, 0) 2730 return (lmin, lmax) if lmax >= lmin else (0, 0) 2731 2732 2733def _linesearch_powell(func, p, xi, tol=1e-3, 2734 lower_bound=None, upper_bound=None, fval=None): 2735 """Line-search algorithm using fminbound. 2736 2737 Find the minimium of the function ``func(x0 + alpha*direc)``. 2738 2739 lower_bound : np.array. 2740 The lower bounds for each parameter in ``x0``. If the ``i``th 2741 parameter in ``x0`` is unbounded below, then ``lower_bound[i]`` 2742 should be ``-np.inf``. 2743 Note ``np.shape(lower_bound) == (n,)``. 2744 upper_bound : np.array. 2745 The upper bounds for each parameter in ``x0``. If the ``i``th 2746 parameter in ``x0`` is unbounded above, then ``upper_bound[i]`` 2747 should be ``np.inf``. 2748 Note ``np.shape(upper_bound) == (n,)``. 2749 fval : number. 2750 ``fval`` is equal to ``func(p)``, the idea is just to avoid 2751 recomputing it so we can limit the ``fevals``. 2752 2753 """ 2754 def myfunc(alpha): 2755 return func(p + alpha*xi) 2756 2757 # if xi is zero, then don't optimize 2758 if not np.any(xi): 2759 return ((fval, p, xi) if fval is not None else (func(p), p, xi)) 2760 elif lower_bound is None and upper_bound is None: 2761 # non-bounded minimization 2762 alpha_min, fret, _, _ = brent(myfunc, full_output=1, tol=tol) 2763 xi = alpha_min * xi 2764 return squeeze(fret), p + xi, xi 2765 else: 2766 bound = _line_for_search(p, xi, lower_bound, upper_bound) 2767 if np.isneginf(bound[0]) and np.isposinf(bound[1]): 2768 # equivalent to unbounded 2769 return _linesearch_powell(func, p, xi, fval=fval, tol=tol) 2770 elif not np.isneginf(bound[0]) and not np.isposinf(bound[1]): 2771 # we can use a bounded scalar minimization 2772 res = _minimize_scalar_bounded(myfunc, bound, xatol=tol / 100) 2773 xi = res.x * xi 2774 return squeeze(res.fun), p + xi, xi 2775 else: 2776 # only bounded on one side. use the tangent function to convert 2777 # the infinity bound to a finite bound. The new bounded region 2778 # is a subregion of the region bounded by -np.pi/2 and np.pi/2. 2779 bound = np.arctan(bound[0]), np.arctan(bound[1]) 2780 res = _minimize_scalar_bounded( 2781 lambda x: myfunc(np.tan(x)), 2782 bound, 2783 xatol=tol / 100) 2784 xi = np.tan(res.x) * xi 2785 return squeeze(res.fun), p + xi, xi 2786 2787 2788def fmin_powell(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, 2789 maxfun=None, full_output=0, disp=1, retall=0, callback=None, 2790 direc=None): 2791 """ 2792 Minimize a function using modified Powell's method. 2793 2794 This method only uses function values, not derivatives. 2795 2796 Parameters 2797 ---------- 2798 func : callable f(x,*args) 2799 Objective function to be minimized. 2800 x0 : ndarray 2801 Initial guess. 2802 args : tuple, optional 2803 Extra arguments passed to func. 2804 xtol : float, optional 2805 Line-search error tolerance. 2806 ftol : float, optional 2807 Relative error in ``func(xopt)`` acceptable for convergence. 2808 maxiter : int, optional 2809 Maximum number of iterations to perform. 2810 maxfun : int, optional 2811 Maximum number of function evaluations to make. 2812 full_output : bool, optional 2813 If True, ``fopt``, ``xi``, ``direc``, ``iter``, ``funcalls``, and 2814 ``warnflag`` are returned. 2815 disp : bool, optional 2816 If True, print convergence messages. 2817 retall : bool, optional 2818 If True, return a list of the solution at each iteration. 2819 callback : callable, optional 2820 An optional user-supplied function, called after each 2821 iteration. Called as ``callback(xk)``, where ``xk`` is the 2822 current parameter vector. 2823 direc : ndarray, optional 2824 Initial fitting step and parameter order set as an (N, N) array, where N 2825 is the number of fitting parameters in `x0`. Defaults to step size 1.0 2826 fitting all parameters simultaneously (``np.eye((N, N))``). To 2827 prevent initial consideration of values in a step or to change initial 2828 step size, set to 0 or desired step size in the Jth position in the Mth 2829 block, where J is the position in `x0` and M is the desired evaluation 2830 step, with steps being evaluated in index order. Step size and ordering 2831 will change freely as minimization proceeds. 2832 2833 Returns 2834 ------- 2835 xopt : ndarray 2836 Parameter which minimizes `func`. 2837 fopt : number 2838 Value of function at minimum: ``fopt = func(xopt)``. 2839 direc : ndarray 2840 Current direction set. 2841 iter : int 2842 Number of iterations. 2843 funcalls : int 2844 Number of function calls made. 2845 warnflag : int 2846 Integer warning flag: 2847 1 : Maximum number of function evaluations. 2848 2 : Maximum number of iterations. 2849 3 : NaN result encountered. 2850 4 : The result is out of the provided bounds. 2851 allvecs : list 2852 List of solutions at each iteration. 2853 2854 See also 2855 -------- 2856 minimize: Interface to unconstrained minimization algorithms for 2857 multivariate functions. See the 'Powell' method in particular. 2858 2859 Notes 2860 ----- 2861 Uses a modification of Powell's method to find the minimum of 2862 a function of N variables. Powell's method is a conjugate 2863 direction method. 2864 2865 The algorithm has two loops. The outer loop merely iterates over the inner 2866 loop. The inner loop minimizes over each current direction in the direction 2867 set. At the end of the inner loop, if certain conditions are met, the 2868 direction that gave the largest decrease is dropped and replaced with the 2869 difference between the current estimated x and the estimated x from the 2870 beginning of the inner-loop. 2871 2872 The technical conditions for replacing the direction of greatest 2873 increase amount to checking that 2874 2875 1. No further gain can be made along the direction of greatest increase 2876 from that iteration. 2877 2. The direction of greatest increase accounted for a large sufficient 2878 fraction of the decrease in the function value from that iteration of 2879 the inner loop. 2880 2881 References 2882 ---------- 2883 Powell M.J.D. (1964) An efficient method for finding the minimum of a 2884 function of several variables without calculating derivatives, 2885 Computer Journal, 7 (2):155-162. 2886 2887 Press W., Teukolsky S.A., Vetterling W.T., and Flannery B.P.: 2888 Numerical Recipes (any edition), Cambridge University Press 2889 2890 Examples 2891 -------- 2892 >>> def f(x): 2893 ... return x**2 2894 2895 >>> from scipy import optimize 2896 2897 >>> minimum = optimize.fmin_powell(f, -1) 2898 Optimization terminated successfully. 2899 Current function value: 0.000000 2900 Iterations: 2 2901 Function evaluations: 18 2902 >>> minimum 2903 array(0.0) 2904 2905 """ 2906 opts = {'xtol': xtol, 2907 'ftol': ftol, 2908 'maxiter': maxiter, 2909 'maxfev': maxfun, 2910 'disp': disp, 2911 'direc': direc, 2912 'return_all': retall} 2913 2914 res = _minimize_powell(func, x0, args, callback=callback, **opts) 2915 2916 if full_output: 2917 retlist = (res['x'], res['fun'], res['direc'], res['nit'], 2918 res['nfev'], res['status']) 2919 if retall: 2920 retlist += (res['allvecs'], ) 2921 return retlist 2922 else: 2923 if retall: 2924 return res['x'], res['allvecs'] 2925 else: 2926 return res['x'] 2927 2928 2929def _minimize_powell(func, x0, args=(), callback=None, bounds=None, 2930 xtol=1e-4, ftol=1e-4, maxiter=None, maxfev=None, 2931 disp=False, direc=None, return_all=False, 2932 **unknown_options): 2933 """ 2934 Minimization of scalar function of one or more variables using the 2935 modified Powell algorithm. 2936 2937 Options 2938 ------- 2939 disp : bool 2940 Set to True to print convergence messages. 2941 xtol : float 2942 Relative error in solution `xopt` acceptable for convergence. 2943 ftol : float 2944 Relative error in ``fun(xopt)`` acceptable for convergence. 2945 maxiter, maxfev : int 2946 Maximum allowed number of iterations and function evaluations. 2947 Will default to ``N*1000``, where ``N`` is the number of 2948 variables, if neither `maxiter` or `maxfev` is set. If both 2949 `maxiter` and `maxfev` are set, minimization will stop at the 2950 first reached. 2951 direc : ndarray 2952 Initial set of direction vectors for the Powell method. 2953 return_all : bool, optional 2954 Set to True to return a list of the best solution at each of the 2955 iterations. 2956 bounds : `Bounds` 2957 If bounds are not provided, then an unbounded line search will be used. 2958 If bounds are provided and the initial guess is within the bounds, then 2959 every function evaluation throughout the minimization procedure will be 2960 within the bounds. If bounds are provided, the initial guess is outside 2961 the bounds, and `direc` is full rank (or left to default), then some 2962 function evaluations during the first iteration may be outside the 2963 bounds, but every function evaluation after the first iteration will be 2964 within the bounds. If `direc` is not full rank, then some parameters may 2965 not be optimized and the solution is not guaranteed to be within the 2966 bounds. 2967 return_all : bool, optional 2968 Set to True to return a list of the best solution at each of the 2969 iterations. 2970 """ 2971 _check_unknown_options(unknown_options) 2972 maxfun = maxfev 2973 retall = return_all 2974 # we need to use a mutable object here that we can update in the 2975 # wrapper function 2976 fcalls, func = _wrap_function(func, args) 2977 x = asarray(x0).flatten() 2978 if retall: 2979 allvecs = [x] 2980 N = len(x) 2981 # If neither are set, then set both to default 2982 if maxiter is None and maxfun is None: 2983 maxiter = N * 1000 2984 maxfun = N * 1000 2985 elif maxiter is None: 2986 # Convert remaining Nones, to np.inf, unless the other is np.inf, in 2987 # which case use the default to avoid unbounded iteration 2988 if maxfun == np.inf: 2989 maxiter = N * 1000 2990 else: 2991 maxiter = np.inf 2992 elif maxfun is None: 2993 if maxiter == np.inf: 2994 maxfun = N * 1000 2995 else: 2996 maxfun = np.inf 2997 2998 if direc is None: 2999 direc = eye(N, dtype=float) 3000 else: 3001 direc = asarray(direc, dtype=float) 3002 if np.linalg.matrix_rank(direc) != direc.shape[0]: 3003 warnings.warn("direc input is not full rank, some parameters may " 3004 "not be optimized", 3005 OptimizeWarning, 3) 3006 3007 if bounds is None: 3008 # don't make these arrays of all +/- inf. because 3009 # _linesearch_powell will do an unnecessary check of all the elements. 3010 # just keep them None, _linesearch_powell will not have to check 3011 # all the elements. 3012 lower_bound, upper_bound = None, None 3013 else: 3014 # bounds is standardized in _minimize.py. 3015 lower_bound, upper_bound = bounds.lb, bounds.ub 3016 if np.any(lower_bound > x0) or np.any(x0 > upper_bound): 3017 warnings.warn("Initial guess is not within the specified bounds", 3018 OptimizeWarning, 3) 3019 3020 fval = squeeze(func(x)) 3021 x1 = x.copy() 3022 iter = 0 3023 ilist = list(range(N)) 3024 while True: 3025 fx = fval 3026 bigind = 0 3027 delta = 0.0 3028 for i in ilist: 3029 direc1 = direc[i] 3030 fx2 = fval 3031 fval, x, direc1 = _linesearch_powell(func, x, direc1, 3032 tol=xtol * 100, 3033 lower_bound=lower_bound, 3034 upper_bound=upper_bound, 3035 fval=fval) 3036 if (fx2 - fval) > delta: 3037 delta = fx2 - fval 3038 bigind = i 3039 iter += 1 3040 if callback is not None: 3041 callback(x) 3042 if retall: 3043 allvecs.append(x) 3044 bnd = ftol * (np.abs(fx) + np.abs(fval)) + 1e-20 3045 if 2.0 * (fx - fval) <= bnd: 3046 break 3047 if fcalls[0] >= maxfun: 3048 break 3049 if iter >= maxiter: 3050 break 3051 if np.isnan(fx) and np.isnan(fval): 3052 # Ended up in a nan-region: bail out 3053 break 3054 3055 # Construct the extrapolated point 3056 direc1 = x - x1 3057 x2 = 2*x - x1 3058 x1 = x.copy() 3059 fx2 = squeeze(func(x2)) 3060 3061 if (fx > fx2): 3062 t = 2.0*(fx + fx2 - 2.0*fval) 3063 temp = (fx - fval - delta) 3064 t *= temp*temp 3065 temp = fx - fx2 3066 t -= delta*temp*temp 3067 if t < 0.0: 3068 fval, x, direc1 = _linesearch_powell(func, x, direc1, 3069 tol=xtol * 100, 3070 lower_bound=lower_bound, 3071 upper_bound=upper_bound, 3072 fval=fval) 3073 if np.any(direc1): 3074 direc[bigind] = direc[-1] 3075 direc[-1] = direc1 3076 3077 warnflag = 0 3078 # out of bounds is more urgent than exceeding function evals or iters, 3079 # but I don't want to cause inconsistencies by changing the 3080 # established warning flags for maxfev and maxiter, so the out of bounds 3081 # warning flag becomes 3, but is checked for first. 3082 if bounds and (np.any(lower_bound > x) or np.any(x > upper_bound)): 3083 warnflag = 4 3084 msg = _status_message['out_of_bounds'] 3085 elif fcalls[0] >= maxfun: 3086 warnflag = 1 3087 msg = _status_message['maxfev'] 3088 if disp: 3089 print("Warning: " + msg) 3090 elif iter >= maxiter: 3091 warnflag = 2 3092 msg = _status_message['maxiter'] 3093 if disp: 3094 print("Warning: " + msg) 3095 elif np.isnan(fval) or np.isnan(x).any(): 3096 warnflag = 3 3097 msg = _status_message['nan'] 3098 if disp: 3099 print("Warning: " + msg) 3100 else: 3101 msg = _status_message['success'] 3102 if disp: 3103 print(msg) 3104 print(" Current function value: %f" % fval) 3105 print(" Iterations: %d" % iter) 3106 print(" Function evaluations: %d" % fcalls[0]) 3107 3108 result = OptimizeResult(fun=fval, direc=direc, nit=iter, nfev=fcalls[0], 3109 status=warnflag, success=(warnflag == 0), 3110 message=msg, x=x) 3111 if retall: 3112 result['allvecs'] = allvecs 3113 return result 3114 3115 3116def _endprint(x, flag, fval, maxfun, xtol, disp): 3117 if flag == 0: 3118 if disp > 1: 3119 print("\nOptimization terminated successfully;\n" 3120 "The returned value satisfies the termination criteria\n" 3121 "(using xtol = ", xtol, ")") 3122 if flag == 1: 3123 if disp: 3124 print("\nMaximum number of function evaluations exceeded --- " 3125 "increase maxfun argument.\n") 3126 if flag == 2: 3127 if disp: 3128 print("\n{}".format(_status_message['nan'])) 3129 return 3130 3131 3132def brute(func, ranges, args=(), Ns=20, full_output=0, finish=fmin, 3133 disp=False, workers=1): 3134 """Minimize a function over a given range by brute force. 3135 3136 Uses the "brute force" method, i.e., computes the function's value 3137 at each point of a multidimensional grid of points, to find the global 3138 minimum of the function. 3139 3140 The function is evaluated everywhere in the range with the datatype of the 3141 first call to the function, as enforced by the ``vectorize`` NumPy 3142 function. The value and type of the function evaluation returned when 3143 ``full_output=True`` are affected in addition by the ``finish`` argument 3144 (see Notes). 3145 3146 The brute force approach is inefficient because the number of grid points 3147 increases exponentially - the number of grid points to evaluate is 3148 ``Ns ** len(x)``. Consequently, even with coarse grid spacing, even 3149 moderately sized problems can take a long time to run, and/or run into 3150 memory limitations. 3151 3152 Parameters 3153 ---------- 3154 func : callable 3155 The objective function to be minimized. Must be in the 3156 form ``f(x, *args)``, where ``x`` is the argument in 3157 the form of a 1-D array and ``args`` is a tuple of any 3158 additional fixed parameters needed to completely specify 3159 the function. 3160 ranges : tuple 3161 Each component of the `ranges` tuple must be either a 3162 "slice object" or a range tuple of the form ``(low, high)``. 3163 The program uses these to create the grid of points on which 3164 the objective function will be computed. See `Note 2` for 3165 more detail. 3166 args : tuple, optional 3167 Any additional fixed parameters needed to completely specify 3168 the function. 3169 Ns : int, optional 3170 Number of grid points along the axes, if not otherwise 3171 specified. See `Note2`. 3172 full_output : bool, optional 3173 If True, return the evaluation grid and the objective function's 3174 values on it. 3175 finish : callable, optional 3176 An optimization function that is called with the result of brute force 3177 minimization as initial guess. `finish` should take `func` and 3178 the initial guess as positional arguments, and take `args` as 3179 keyword arguments. It may additionally take `full_output` 3180 and/or `disp` as keyword arguments. Use None if no "polishing" 3181 function is to be used. See Notes for more details. 3182 disp : bool, optional 3183 Set to True to print convergence messages from the `finish` callable. 3184 workers : int or map-like callable, optional 3185 If `workers` is an int the grid is subdivided into `workers` 3186 sections and evaluated in parallel (uses 3187 `multiprocessing.Pool <multiprocessing>`). 3188 Supply `-1` to use all cores available to the Process. 3189 Alternatively supply a map-like callable, such as 3190 `multiprocessing.Pool.map` for evaluating the grid in parallel. 3191 This evaluation is carried out as ``workers(func, iterable)``. 3192 Requires that `func` be pickleable. 3193 3194 .. versionadded:: 1.3.0 3195 3196 Returns 3197 ------- 3198 x0 : ndarray 3199 A 1-D array containing the coordinates of a point at which the 3200 objective function had its minimum value. (See `Note 1` for 3201 which point is returned.) 3202 fval : float 3203 Function value at the point `x0`. (Returned when `full_output` is 3204 True.) 3205 grid : tuple 3206 Representation of the evaluation grid. It has the same 3207 length as `x0`. (Returned when `full_output` is True.) 3208 Jout : ndarray 3209 Function values at each point of the evaluation 3210 grid, i.e., ``Jout = func(*grid)``. (Returned 3211 when `full_output` is True.) 3212 3213 See Also 3214 -------- 3215 basinhopping, differential_evolution 3216 3217 Notes 3218 ----- 3219 *Note 1*: The program finds the gridpoint at which the lowest value 3220 of the objective function occurs. If `finish` is None, that is the 3221 point returned. When the global minimum occurs within (or not very far 3222 outside) the grid's boundaries, and the grid is fine enough, that 3223 point will be in the neighborhood of the global minimum. 3224 3225 However, users often employ some other optimization program to 3226 "polish" the gridpoint values, i.e., to seek a more precise 3227 (local) minimum near `brute's` best gridpoint. 3228 The `brute` function's `finish` option provides a convenient way to do 3229 that. Any polishing program used must take `brute's` output as its 3230 initial guess as a positional argument, and take `brute's` input values 3231 for `args` as keyword arguments, otherwise an error will be raised. 3232 It may additionally take `full_output` and/or `disp` as keyword arguments. 3233 3234 `brute` assumes that the `finish` function returns either an 3235 `OptimizeResult` object or a tuple in the form: 3236 ``(xmin, Jmin, ... , statuscode)``, where ``xmin`` is the minimizing 3237 value of the argument, ``Jmin`` is the minimum value of the objective 3238 function, "..." may be some other returned values (which are not used 3239 by `brute`), and ``statuscode`` is the status code of the `finish` program. 3240 3241 Note that when `finish` is not None, the values returned are those 3242 of the `finish` program, *not* the gridpoint ones. Consequently, 3243 while `brute` confines its search to the input grid points, 3244 the `finish` program's results usually will not coincide with any 3245 gridpoint, and may fall outside the grid's boundary. Thus, if a 3246 minimum only needs to be found over the provided grid points, make 3247 sure to pass in `finish=None`. 3248 3249 *Note 2*: The grid of points is a `numpy.mgrid` object. 3250 For `brute` the `ranges` and `Ns` inputs have the following effect. 3251 Each component of the `ranges` tuple can be either a slice object or a 3252 two-tuple giving a range of values, such as (0, 5). If the component is a 3253 slice object, `brute` uses it directly. If the component is a two-tuple 3254 range, `brute` internally converts it to a slice object that interpolates 3255 `Ns` points from its low-value to its high-value, inclusive. 3256 3257 Examples 3258 -------- 3259 We illustrate the use of `brute` to seek the global minimum of a function 3260 of two variables that is given as the sum of a positive-definite 3261 quadratic and two deep "Gaussian-shaped" craters. Specifically, define 3262 the objective function `f` as the sum of three other functions, 3263 ``f = f1 + f2 + f3``. We suppose each of these has a signature 3264 ``(z, *params)``, where ``z = (x, y)``, and ``params`` and the functions 3265 are as defined below. 3266 3267 >>> params = (2, 3, 7, 8, 9, 10, 44, -1, 2, 26, 1, -2, 0.5) 3268 >>> def f1(z, *params): 3269 ... x, y = z 3270 ... a, b, c, d, e, f, g, h, i, j, k, l, scale = params 3271 ... return (a * x**2 + b * x * y + c * y**2 + d*x + e*y + f) 3272 3273 >>> def f2(z, *params): 3274 ... x, y = z 3275 ... a, b, c, d, e, f, g, h, i, j, k, l, scale = params 3276 ... return (-g*np.exp(-((x-h)**2 + (y-i)**2) / scale)) 3277 3278 >>> def f3(z, *params): 3279 ... x, y = z 3280 ... a, b, c, d, e, f, g, h, i, j, k, l, scale = params 3281 ... return (-j*np.exp(-((x-k)**2 + (y-l)**2) / scale)) 3282 3283 >>> def f(z, *params): 3284 ... return f1(z, *params) + f2(z, *params) + f3(z, *params) 3285 3286 Thus, the objective function may have local minima near the minimum 3287 of each of the three functions of which it is composed. To 3288 use `fmin` to polish its gridpoint result, we may then continue as 3289 follows: 3290 3291 >>> rranges = (slice(-4, 4, 0.25), slice(-4, 4, 0.25)) 3292 >>> from scipy import optimize 3293 >>> resbrute = optimize.brute(f, rranges, args=params, full_output=True, 3294 ... finish=optimize.fmin) 3295 >>> resbrute[0] # global minimum 3296 array([-1.05665192, 1.80834843]) 3297 >>> resbrute[1] # function value at global minimum 3298 -3.4085818767 3299 3300 Note that if `finish` had been set to None, we would have gotten the 3301 gridpoint [-1.0 1.75] where the rounded function value is -2.892. 3302 3303 """ 3304 N = len(ranges) 3305 if N > 40: 3306 raise ValueError("Brute Force not possible with more " 3307 "than 40 variables.") 3308 lrange = list(ranges) 3309 for k in range(N): 3310 if type(lrange[k]) is not type(slice(None)): 3311 if len(lrange[k]) < 3: 3312 lrange[k] = tuple(lrange[k]) + (complex(Ns),) 3313 lrange[k] = slice(*lrange[k]) 3314 if (N == 1): 3315 lrange = lrange[0] 3316 3317 grid = np.mgrid[lrange] 3318 3319 # obtain an array of parameters that is iterable by a map-like callable 3320 inpt_shape = grid.shape 3321 if (N > 1): 3322 grid = np.reshape(grid, (inpt_shape[0], np.prod(inpt_shape[1:]))).T 3323 3324 wrapped_func = _Brute_Wrapper(func, args) 3325 3326 # iterate over input arrays, possibly in parallel 3327 with MapWrapper(pool=workers) as mapper: 3328 Jout = np.array(list(mapper(wrapped_func, grid))) 3329 if (N == 1): 3330 grid = (grid,) 3331 Jout = np.squeeze(Jout) 3332 elif (N > 1): 3333 Jout = np.reshape(Jout, inpt_shape[1:]) 3334 grid = np.reshape(grid.T, inpt_shape) 3335 3336 Nshape = shape(Jout) 3337 3338 indx = argmin(Jout.ravel(), axis=-1) 3339 Nindx = np.empty(N, int) 3340 xmin = np.empty(N, float) 3341 for k in range(N - 1, -1, -1): 3342 thisN = Nshape[k] 3343 Nindx[k] = indx % Nshape[k] 3344 indx = indx // thisN 3345 for k in range(N): 3346 xmin[k] = grid[k][tuple(Nindx)] 3347 3348 Jmin = Jout[tuple(Nindx)] 3349 if (N == 1): 3350 grid = grid[0] 3351 xmin = xmin[0] 3352 3353 if callable(finish): 3354 # set up kwargs for `finish` function 3355 finish_args = _getfullargspec(finish).args 3356 finish_kwargs = dict() 3357 if 'full_output' in finish_args: 3358 finish_kwargs['full_output'] = 1 3359 if 'disp' in finish_args: 3360 finish_kwargs['disp'] = disp 3361 elif 'options' in finish_args: 3362 # pass 'disp' as `options` 3363 # (e.g., if `finish` is `minimize`) 3364 finish_kwargs['options'] = {'disp': disp} 3365 3366 # run minimizer 3367 res = finish(func, xmin, args=args, **finish_kwargs) 3368 3369 if isinstance(res, OptimizeResult): 3370 xmin = res.x 3371 Jmin = res.fun 3372 success = res.success 3373 else: 3374 xmin = res[0] 3375 Jmin = res[1] 3376 success = res[-1] == 0 3377 if not success: 3378 if disp: 3379 print("Warning: Either final optimization did not succeed " 3380 "or `finish` does not return `statuscode` as its last " 3381 "argument.") 3382 3383 if full_output: 3384 return xmin, Jmin, grid, Jout 3385 else: 3386 return xmin 3387 3388 3389class _Brute_Wrapper: 3390 """ 3391 Object to wrap user cost function for optimize.brute, allowing picklability 3392 """ 3393 3394 def __init__(self, f, args): 3395 self.f = f 3396 self.args = [] if args is None else args 3397 3398 def __call__(self, x): 3399 # flatten needed for one dimensional case. 3400 return self.f(np.asarray(x).flatten(), *self.args) 3401 3402 3403def show_options(solver=None, method=None, disp=True): 3404 """ 3405 Show documentation for additional options of optimization solvers. 3406 3407 These are method-specific options that can be supplied through the 3408 ``options`` dict. 3409 3410 Parameters 3411 ---------- 3412 solver : str 3413 Type of optimization solver. One of 'minimize', 'minimize_scalar', 3414 'root', 'root_scalar', 'linprog', or 'quadratic_assignment'. 3415 method : str, optional 3416 If not given, shows all methods of the specified solver. Otherwise, 3417 show only the options for the specified method. Valid values 3418 corresponds to methods' names of respective solver (e.g., 'BFGS' for 3419 'minimize'). 3420 disp : bool, optional 3421 Whether to print the result rather than returning it. 3422 3423 Returns 3424 ------- 3425 text 3426 Either None (for disp=True) or the text string (disp=False) 3427 3428 Notes 3429 ----- 3430 The solver-specific methods are: 3431 3432 `scipy.optimize.minimize` 3433 3434 - :ref:`Nelder-Mead <optimize.minimize-neldermead>` 3435 - :ref:`Powell <optimize.minimize-powell>` 3436 - :ref:`CG <optimize.minimize-cg>` 3437 - :ref:`BFGS <optimize.minimize-bfgs>` 3438 - :ref:`Newton-CG <optimize.minimize-newtoncg>` 3439 - :ref:`L-BFGS-B <optimize.minimize-lbfgsb>` 3440 - :ref:`TNC <optimize.minimize-tnc>` 3441 - :ref:`COBYLA <optimize.minimize-cobyla>` 3442 - :ref:`SLSQP <optimize.minimize-slsqp>` 3443 - :ref:`dogleg <optimize.minimize-dogleg>` 3444 - :ref:`trust-ncg <optimize.minimize-trustncg>` 3445 3446 `scipy.optimize.root` 3447 3448 - :ref:`hybr <optimize.root-hybr>` 3449 - :ref:`lm <optimize.root-lm>` 3450 - :ref:`broyden1 <optimize.root-broyden1>` 3451 - :ref:`broyden2 <optimize.root-broyden2>` 3452 - :ref:`anderson <optimize.root-anderson>` 3453 - :ref:`linearmixing <optimize.root-linearmixing>` 3454 - :ref:`diagbroyden <optimize.root-diagbroyden>` 3455 - :ref:`excitingmixing <optimize.root-excitingmixing>` 3456 - :ref:`krylov <optimize.root-krylov>` 3457 - :ref:`df-sane <optimize.root-dfsane>` 3458 3459 `scipy.optimize.minimize_scalar` 3460 3461 - :ref:`brent <optimize.minimize_scalar-brent>` 3462 - :ref:`golden <optimize.minimize_scalar-golden>` 3463 - :ref:`bounded <optimize.minimize_scalar-bounded>` 3464 3465 `scipy.optimize.root_scalar` 3466 3467 - :ref:`bisect <optimize.root_scalar-bisect>` 3468 - :ref:`brentq <optimize.root_scalar-brentq>` 3469 - :ref:`brenth <optimize.root_scalar-brenth>` 3470 - :ref:`ridder <optimize.root_scalar-ridder>` 3471 - :ref:`toms748 <optimize.root_scalar-toms748>` 3472 - :ref:`newton <optimize.root_scalar-newton>` 3473 - :ref:`secant <optimize.root_scalar-secant>` 3474 - :ref:`halley <optimize.root_scalar-halley>` 3475 3476 `scipy.optimize.linprog` 3477 3478 - :ref:`simplex <optimize.linprog-simplex>` 3479 - :ref:`interior-point <optimize.linprog-interior-point>` 3480 - :ref:`revised simplex <optimize.linprog-revised_simplex>` 3481 - :ref:`highs <optimize.linprog-highs>` 3482 - :ref:`highs-ds <optimize.linprog-highs-ds>` 3483 - :ref:`highs-ipm <optimize.linprog-highs-ipm>` 3484 3485 `scipy.optimize.quadratic_assignment` 3486 3487 - :ref:`faq <optimize.qap-faq>` 3488 - :ref:`2opt <optimize.qap-2opt>` 3489 3490 Examples 3491 -------- 3492 We can print documentations of a solver in stdout: 3493 3494 >>> from scipy.optimize import show_options 3495 >>> show_options(solver="minimize") 3496 ... 3497 3498 Specifying a method is possible: 3499 3500 >>> show_options(solver="minimize", method="Nelder-Mead") 3501 ... 3502 3503 We can also get the documentations as a string: 3504 3505 >>> show_options(solver="minimize", method="Nelder-Mead", disp=False) 3506 Minimization of scalar function of one or more variables using the ... 3507 3508 """ 3509 import textwrap 3510 3511 doc_routines = { 3512 'minimize': ( 3513 ('bfgs', 'scipy.optimize.optimize._minimize_bfgs'), 3514 ('cg', 'scipy.optimize.optimize._minimize_cg'), 3515 ('cobyla', 'scipy.optimize.cobyla._minimize_cobyla'), 3516 ('dogleg', 'scipy.optimize._trustregion_dogleg._minimize_dogleg'), 3517 ('l-bfgs-b', 'scipy.optimize.lbfgsb._minimize_lbfgsb'), 3518 ('nelder-mead', 'scipy.optimize.optimize._minimize_neldermead'), 3519 ('newton-cg', 'scipy.optimize.optimize._minimize_newtoncg'), 3520 ('powell', 'scipy.optimize.optimize._minimize_powell'), 3521 ('slsqp', 'scipy.optimize.slsqp._minimize_slsqp'), 3522 ('tnc', 'scipy.optimize.tnc._minimize_tnc'), 3523 ('trust-ncg', 3524 'scipy.optimize._trustregion_ncg._minimize_trust_ncg'), 3525 ('trust-constr', 3526 'scipy.optimize._trustregion_constr.' 3527 '_minimize_trustregion_constr'), 3528 ('trust-exact', 3529 'scipy.optimize._trustregion_exact._minimize_trustregion_exact'), 3530 ('trust-krylov', 3531 'scipy.optimize._trustregion_krylov._minimize_trust_krylov'), 3532 ), 3533 'root': ( 3534 ('hybr', 'scipy.optimize.minpack._root_hybr'), 3535 ('lm', 'scipy.optimize._root._root_leastsq'), 3536 ('broyden1', 'scipy.optimize._root._root_broyden1_doc'), 3537 ('broyden2', 'scipy.optimize._root._root_broyden2_doc'), 3538 ('anderson', 'scipy.optimize._root._root_anderson_doc'), 3539 ('diagbroyden', 'scipy.optimize._root._root_diagbroyden_doc'), 3540 ('excitingmixing', 'scipy.optimize._root._root_excitingmixing_doc'), 3541 ('linearmixing', 'scipy.optimize._root._root_linearmixing_doc'), 3542 ('krylov', 'scipy.optimize._root._root_krylov_doc'), 3543 ('df-sane', 'scipy.optimize._spectral._root_df_sane'), 3544 ), 3545 'root_scalar': ( 3546 ('bisect', 'scipy.optimize._root_scalar._root_scalar_bisect_doc'), 3547 ('brentq', 'scipy.optimize._root_scalar._root_scalar_brentq_doc'), 3548 ('brenth', 'scipy.optimize._root_scalar._root_scalar_brenth_doc'), 3549 ('ridder', 'scipy.optimize._root_scalar._root_scalar_ridder_doc'), 3550 ('toms748', 'scipy.optimize._root_scalar._root_scalar_toms748_doc'), 3551 ('secant', 'scipy.optimize._root_scalar._root_scalar_secant_doc'), 3552 ('newton', 'scipy.optimize._root_scalar._root_scalar_newton_doc'), 3553 ('halley', 'scipy.optimize._root_scalar._root_scalar_halley_doc'), 3554 ), 3555 'linprog': ( 3556 ('simplex', 'scipy.optimize._linprog._linprog_simplex_doc'), 3557 ('interior-point', 'scipy.optimize._linprog._linprog_ip_doc'), 3558 ('revised simplex', 'scipy.optimize._linprog._linprog_rs_doc'), 3559 ('highs-ipm', 'scipy.optimize._linprog._linprog_highs_ipm_doc'), 3560 ('highs-ds', 'scipy.optimize._linprog._linprog_highs_ds_doc'), 3561 ('highs', 'scipy.optimize._linprog._linprog_highs_doc'), 3562 ), 3563 'quadratic_assignment': ( 3564 ('faq', 'scipy.optimize._qap._quadratic_assignment_faq'), 3565 ('2opt', 'scipy.optimize._qap._quadratic_assignment_2opt'), 3566 ), 3567 'minimize_scalar': ( 3568 ('brent', 'scipy.optimize.optimize._minimize_scalar_brent'), 3569 ('bounded', 'scipy.optimize.optimize._minimize_scalar_bounded'), 3570 ('golden', 'scipy.optimize.optimize._minimize_scalar_golden'), 3571 ), 3572 } 3573 3574 if solver is None: 3575 text = ["\n\n\n========\n", "minimize\n", "========\n"] 3576 text.append(show_options('minimize', disp=False)) 3577 text.extend(["\n\n===============\n", "minimize_scalar\n", 3578 "===============\n"]) 3579 text.append(show_options('minimize_scalar', disp=False)) 3580 text.extend(["\n\n\n====\n", "root\n", 3581 "====\n"]) 3582 text.append(show_options('root', disp=False)) 3583 text.extend(['\n\n\n=======\n', 'linprog\n', 3584 '=======\n']) 3585 text.append(show_options('linprog', disp=False)) 3586 text = "".join(text) 3587 else: 3588 solver = solver.lower() 3589 if solver not in doc_routines: 3590 raise ValueError('Unknown solver %r' % (solver,)) 3591 3592 if method is None: 3593 text = [] 3594 for name, _ in doc_routines[solver]: 3595 text.extend(["\n\n" + name, "\n" + "="*len(name) + "\n\n"]) 3596 text.append(show_options(solver, name, disp=False)) 3597 text = "".join(text) 3598 else: 3599 method = method.lower() 3600 methods = dict(doc_routines[solver]) 3601 if method not in methods: 3602 raise ValueError("Unknown method %r" % (method,)) 3603 name = methods[method] 3604 3605 # Import function object 3606 parts = name.split('.') 3607 mod_name = ".".join(parts[:-1]) 3608 __import__(mod_name) 3609 obj = getattr(sys.modules[mod_name], parts[-1]) 3610 3611 # Get doc 3612 doc = obj.__doc__ 3613 if doc is not None: 3614 text = textwrap.dedent(doc).strip() 3615 else: 3616 text = "" 3617 3618 if disp: 3619 print(text) 3620 return 3621 else: 3622 return text 3623 3624 3625def main(): 3626 import time 3627 3628 times = [] 3629 algor = [] 3630 x0 = [0.8, 1.2, 0.7] 3631 print("Nelder-Mead Simplex") 3632 print("===================") 3633 start = time.time() 3634 x = fmin(rosen, x0) 3635 print(x) 3636 times.append(time.time() - start) 3637 algor.append('Nelder-Mead Simplex\t') 3638 3639 print() 3640 print("Powell Direction Set Method") 3641 print("===========================") 3642 start = time.time() 3643 x = fmin_powell(rosen, x0) 3644 print(x) 3645 times.append(time.time() - start) 3646 algor.append('Powell Direction Set Method.') 3647 3648 print() 3649 print("Nonlinear CG") 3650 print("============") 3651 start = time.time() 3652 x = fmin_cg(rosen, x0, fprime=rosen_der, maxiter=200) 3653 print(x) 3654 times.append(time.time() - start) 3655 algor.append('Nonlinear CG \t') 3656 3657 print() 3658 print("BFGS Quasi-Newton") 3659 print("=================") 3660 start = time.time() 3661 x = fmin_bfgs(rosen, x0, fprime=rosen_der, maxiter=80) 3662 print(x) 3663 times.append(time.time() - start) 3664 algor.append('BFGS Quasi-Newton\t') 3665 3666 print() 3667 print("BFGS approximate gradient") 3668 print("=========================") 3669 start = time.time() 3670 x = fmin_bfgs(rosen, x0, gtol=1e-4, maxiter=100) 3671 print(x) 3672 times.append(time.time() - start) 3673 algor.append('BFGS without gradient\t') 3674 3675 print() 3676 print("Newton-CG with Hessian product") 3677 print("==============================") 3678 start = time.time() 3679 x = fmin_ncg(rosen, x0, rosen_der, fhess_p=rosen_hess_prod, maxiter=80) 3680 print(x) 3681 times.append(time.time() - start) 3682 algor.append('Newton-CG with hessian product') 3683 3684 print() 3685 print("Newton-CG with full Hessian") 3686 print("===========================") 3687 start = time.time() 3688 x = fmin_ncg(rosen, x0, rosen_der, fhess=rosen_hess, maxiter=80) 3689 print(x) 3690 times.append(time.time() - start) 3691 algor.append('Newton-CG with full Hessian') 3692 3693 print() 3694 print("\nMinimizing the Rosenbrock function of order 3\n") 3695 print(" Algorithm \t\t\t Seconds") 3696 print("===========\t\t\t =========") 3697 for k in range(len(algor)): 3698 print(algor[k], "\t -- ", times[k]) 3699 3700 3701if __name__ == "__main__": 3702 main() 3703