1"""Generic interface for least-squares minimization.""" 2from warnings import warn 3 4import numpy as np 5from numpy.linalg import norm 6 7from scipy.sparse import issparse, csr_matrix 8from scipy.sparse.linalg import LinearOperator 9from scipy.optimize import _minpack, OptimizeResult 10from scipy.optimize._numdiff import approx_derivative, group_columns 11 12from .trf import trf 13from .dogbox import dogbox 14from .common import EPS, in_bounds, make_strictly_feasible 15 16 17TERMINATION_MESSAGES = { 18 -1: "Improper input parameters status returned from `leastsq`", 19 0: "The maximum number of function evaluations is exceeded.", 20 1: "`gtol` termination condition is satisfied.", 21 2: "`ftol` termination condition is satisfied.", 22 3: "`xtol` termination condition is satisfied.", 23 4: "Both `ftol` and `xtol` termination conditions are satisfied." 24} 25 26 27FROM_MINPACK_TO_COMMON = { 28 0: -1, # Improper input parameters from MINPACK. 29 1: 2, 30 2: 3, 31 3: 4, 32 4: 1, 33 5: 0 34 # There are 6, 7, 8 for too small tolerance parameters, 35 # but we guard against it by checking ftol, xtol, gtol beforehand. 36} 37 38 39def call_minpack(fun, x0, jac, ftol, xtol, gtol, max_nfev, x_scale, diff_step): 40 n = x0.size 41 42 if diff_step is None: 43 epsfcn = EPS 44 else: 45 epsfcn = diff_step**2 46 47 # Compute MINPACK's `diag`, which is inverse of our `x_scale` and 48 # ``x_scale='jac'`` corresponds to ``diag=None``. 49 if isinstance(x_scale, str) and x_scale == 'jac': 50 diag = None 51 else: 52 diag = 1 / x_scale 53 54 full_output = True 55 col_deriv = False 56 factor = 100.0 57 58 if jac is None: 59 if max_nfev is None: 60 # n squared to account for Jacobian evaluations. 61 max_nfev = 100 * n * (n + 1) 62 x, info, status = _minpack._lmdif( 63 fun, x0, (), full_output, ftol, xtol, gtol, 64 max_nfev, epsfcn, factor, diag) 65 else: 66 if max_nfev is None: 67 max_nfev = 100 * n 68 x, info, status = _minpack._lmder( 69 fun, jac, x0, (), full_output, col_deriv, 70 ftol, xtol, gtol, max_nfev, factor, diag) 71 72 f = info['fvec'] 73 74 if callable(jac): 75 J = jac(x) 76 else: 77 J = np.atleast_2d(approx_derivative(fun, x)) 78 79 cost = 0.5 * np.dot(f, f) 80 g = J.T.dot(f) 81 g_norm = norm(g, ord=np.inf) 82 83 nfev = info['nfev'] 84 njev = info.get('njev', None) 85 86 status = FROM_MINPACK_TO_COMMON[status] 87 active_mask = np.zeros_like(x0, dtype=int) 88 89 return OptimizeResult( 90 x=x, cost=cost, fun=f, jac=J, grad=g, optimality=g_norm, 91 active_mask=active_mask, nfev=nfev, njev=njev, status=status) 92 93 94def prepare_bounds(bounds, n): 95 lb, ub = [np.asarray(b, dtype=float) for b in bounds] 96 if lb.ndim == 0: 97 lb = np.resize(lb, n) 98 99 if ub.ndim == 0: 100 ub = np.resize(ub, n) 101 102 return lb, ub 103 104 105def check_tolerance(ftol, xtol, gtol, method): 106 def check(tol, name): 107 if tol is None: 108 tol = 0 109 elif tol < EPS: 110 warn("Setting `{}` below the machine epsilon ({:.2e}) effectively " 111 "disables the corresponding termination condition." 112 .format(name, EPS)) 113 return tol 114 115 ftol = check(ftol, "ftol") 116 xtol = check(xtol, "xtol") 117 gtol = check(gtol, "gtol") 118 119 if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS): 120 raise ValueError("All tolerances must be higher than machine epsilon " 121 "({:.2e}) for method 'lm'.".format(EPS)) 122 elif ftol < EPS and xtol < EPS and gtol < EPS: 123 raise ValueError("At least one of the tolerances must be higher than " 124 "machine epsilon ({:.2e}).".format(EPS)) 125 126 return ftol, xtol, gtol 127 128 129def check_x_scale(x_scale, x0): 130 if isinstance(x_scale, str) and x_scale == 'jac': 131 return x_scale 132 133 try: 134 x_scale = np.asarray(x_scale, dtype=float) 135 valid = np.all(np.isfinite(x_scale)) and np.all(x_scale > 0) 136 except (ValueError, TypeError): 137 valid = False 138 139 if not valid: 140 raise ValueError("`x_scale` must be 'jac' or array_like with " 141 "positive numbers.") 142 143 if x_scale.ndim == 0: 144 x_scale = np.resize(x_scale, x0.shape) 145 146 if x_scale.shape != x0.shape: 147 raise ValueError("Inconsistent shapes between `x_scale` and `x0`.") 148 149 return x_scale 150 151 152def check_jac_sparsity(jac_sparsity, m, n): 153 if jac_sparsity is None: 154 return None 155 156 if not issparse(jac_sparsity): 157 jac_sparsity = np.atleast_2d(jac_sparsity) 158 159 if jac_sparsity.shape != (m, n): 160 raise ValueError("`jac_sparsity` has wrong shape.") 161 162 return jac_sparsity, group_columns(jac_sparsity) 163 164 165# Loss functions. 166 167 168def huber(z, rho, cost_only): 169 mask = z <= 1 170 rho[0, mask] = z[mask] 171 rho[0, ~mask] = 2 * z[~mask]**0.5 - 1 172 if cost_only: 173 return 174 rho[1, mask] = 1 175 rho[1, ~mask] = z[~mask]**-0.5 176 rho[2, mask] = 0 177 rho[2, ~mask] = -0.5 * z[~mask]**-1.5 178 179 180def soft_l1(z, rho, cost_only): 181 t = 1 + z 182 rho[0] = 2 * (t**0.5 - 1) 183 if cost_only: 184 return 185 rho[1] = t**-0.5 186 rho[2] = -0.5 * t**-1.5 187 188 189def cauchy(z, rho, cost_only): 190 rho[0] = np.log1p(z) 191 if cost_only: 192 return 193 t = 1 + z 194 rho[1] = 1 / t 195 rho[2] = -1 / t**2 196 197 198def arctan(z, rho, cost_only): 199 rho[0] = np.arctan(z) 200 if cost_only: 201 return 202 t = 1 + z**2 203 rho[1] = 1 / t 204 rho[2] = -2 * z / t**2 205 206 207IMPLEMENTED_LOSSES = dict(linear=None, huber=huber, soft_l1=soft_l1, 208 cauchy=cauchy, arctan=arctan) 209 210 211def construct_loss_function(m, loss, f_scale): 212 if loss == 'linear': 213 return None 214 215 if not callable(loss): 216 loss = IMPLEMENTED_LOSSES[loss] 217 rho = np.empty((3, m)) 218 219 def loss_function(f, cost_only=False): 220 z = (f / f_scale) ** 2 221 loss(z, rho, cost_only=cost_only) 222 if cost_only: 223 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 224 rho[0] *= f_scale ** 2 225 rho[2] /= f_scale ** 2 226 return rho 227 else: 228 def loss_function(f, cost_only=False): 229 z = (f / f_scale) ** 2 230 rho = loss(z) 231 if cost_only: 232 return 0.5 * f_scale ** 2 * np.sum(rho[0]) 233 rho[0] *= f_scale ** 2 234 rho[2] /= f_scale ** 2 235 return rho 236 237 return loss_function 238 239 240def least_squares( 241 fun, x0, jac='2-point', bounds=(-np.inf, np.inf), method='trf', 242 ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, loss='linear', 243 f_scale=1.0, diff_step=None, tr_solver=None, tr_options={}, 244 jac_sparsity=None, max_nfev=None, verbose=0, args=(), kwargs={}): 245 """Solve a nonlinear least-squares problem with bounds on the variables. 246 247 Given the residuals f(x) (an m-D real function of n real 248 variables) and the loss function rho(s) (a scalar function), `least_squares` 249 finds a local minimum of the cost function F(x):: 250 251 minimize F(x) = 0.5 * sum(rho(f_i(x)**2), i = 0, ..., m - 1) 252 subject to lb <= x <= ub 253 254 The purpose of the loss function rho(s) is to reduce the influence of 255 outliers on the solution. 256 257 Parameters 258 ---------- 259 fun : callable 260 Function which computes the vector of residuals, with the signature 261 ``fun(x, *args, **kwargs)``, i.e., the minimization proceeds with 262 respect to its first argument. The argument ``x`` passed to this 263 function is an ndarray of shape (n,) (never a scalar, even for n=1). 264 It must allocate and return a 1-D array_like of shape (m,) or a scalar. 265 If the argument ``x`` is complex or the function ``fun`` returns 266 complex residuals, it must be wrapped in a real function of real 267 arguments, as shown at the end of the Examples section. 268 x0 : array_like with shape (n,) or float 269 Initial guess on independent variables. If float, it will be treated 270 as a 1-D array with one element. 271 jac : {'2-point', '3-point', 'cs', callable}, optional 272 Method of computing the Jacobian matrix (an m-by-n matrix, where 273 element (i, j) is the partial derivative of f[i] with respect to 274 x[j]). The keywords select a finite difference scheme for numerical 275 estimation. The scheme '3-point' is more accurate, but requires 276 twice as many operations as '2-point' (default). The scheme 'cs' 277 uses complex steps, and while potentially the most accurate, it is 278 applicable only when `fun` correctly handles complex inputs and 279 can be analytically continued to the complex plane. Method 'lm' 280 always uses the '2-point' scheme. If callable, it is used as 281 ``jac(x, *args, **kwargs)`` and should return a good approximation 282 (or the exact value) for the Jacobian as an array_like (np.atleast_2d 283 is applied), a sparse matrix (csr_matrix preferred for performance) or 284 a `scipy.sparse.linalg.LinearOperator`. 285 bounds : 2-tuple of array_like, optional 286 Lower and upper bounds on independent variables. Defaults to no bounds. 287 Each array must match the size of `x0` or be a scalar, in the latter 288 case a bound will be the same for all variables. Use ``np.inf`` with 289 an appropriate sign to disable bounds on all or some variables. 290 method : {'trf', 'dogbox', 'lm'}, optional 291 Algorithm to perform minimization. 292 293 * 'trf' : Trust Region Reflective algorithm, particularly suitable 294 for large sparse problems with bounds. Generally robust method. 295 * 'dogbox' : dogleg algorithm with rectangular trust regions, 296 typical use case is small problems with bounds. Not recommended 297 for problems with rank-deficient Jacobian. 298 * 'lm' : Levenberg-Marquardt algorithm as implemented in MINPACK. 299 Doesn't handle bounds and sparse Jacobians. Usually the most 300 efficient method for small unconstrained problems. 301 302 Default is 'trf'. See Notes for more information. 303 ftol : float or None, optional 304 Tolerance for termination by the change of the cost function. Default 305 is 1e-8. The optimization process is stopped when ``dF < ftol * F``, 306 and there was an adequate agreement between a local quadratic model and 307 the true model in the last step. 308 309 If None and 'method' is not 'lm', the termination by this condition is 310 disabled. If 'method' is 'lm', this tolerance must be higher than 311 machine epsilon. 312 xtol : float or None, optional 313 Tolerance for termination by the change of the independent variables. 314 Default is 1e-8. The exact condition depends on the `method` used: 315 316 * For 'trf' and 'dogbox' : ``norm(dx) < xtol * (xtol + norm(x))``. 317 * For 'lm' : ``Delta < xtol * norm(xs)``, where ``Delta`` is 318 a trust-region radius and ``xs`` is the value of ``x`` 319 scaled according to `x_scale` parameter (see below). 320 321 If None and 'method' is not 'lm', the termination by this condition is 322 disabled. If 'method' is 'lm', this tolerance must be higher than 323 machine epsilon. 324 gtol : float or None, optional 325 Tolerance for termination by the norm of the gradient. Default is 1e-8. 326 The exact condition depends on a `method` used: 327 328 * For 'trf' : ``norm(g_scaled, ord=np.inf) < gtol``, where 329 ``g_scaled`` is the value of the gradient scaled to account for 330 the presence of the bounds [STIR]_. 331 * For 'dogbox' : ``norm(g_free, ord=np.inf) < gtol``, where 332 ``g_free`` is the gradient with respect to the variables which 333 are not in the optimal state on the boundary. 334 * For 'lm' : the maximum absolute value of the cosine of angles 335 between columns of the Jacobian and the residual vector is less 336 than `gtol`, or the residual vector is zero. 337 338 If None and 'method' is not 'lm', the termination by this condition is 339 disabled. If 'method' is 'lm', this tolerance must be higher than 340 machine epsilon. 341 x_scale : array_like or 'jac', optional 342 Characteristic scale of each variable. Setting `x_scale` is equivalent 343 to reformulating the problem in scaled variables ``xs = x / x_scale``. 344 An alternative view is that the size of a trust region along jth 345 dimension is proportional to ``x_scale[j]``. Improved convergence may 346 be achieved by setting `x_scale` such that a step of a given size 347 along any of the scaled variables has a similar effect on the cost 348 function. If set to 'jac', the scale is iteratively updated using the 349 inverse norms of the columns of the Jacobian matrix (as described in 350 [JJMore]_). 351 loss : str or callable, optional 352 Determines the loss function. The following keyword values are allowed: 353 354 * 'linear' (default) : ``rho(z) = z``. Gives a standard 355 least-squares problem. 356 * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth 357 approximation of l1 (absolute value) loss. Usually a good 358 choice for robust least squares. 359 * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works 360 similarly to 'soft_l1'. 361 * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers 362 influence, but may cause difficulties in optimization process. 363 * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on 364 a single residual, has properties similar to 'cauchy'. 365 366 If callable, it must take a 1-D ndarray ``z=f**2`` and return an 367 array_like with shape (3, m) where row 0 contains function values, 368 row 1 contains first derivatives and row 2 contains second 369 derivatives. Method 'lm' supports only 'linear' loss. 370 f_scale : float, optional 371 Value of soft margin between inlier and outlier residuals, default 372 is 1.0. The loss function is evaluated as follows 373 ``rho_(f**2) = C**2 * rho(f**2 / C**2)``, where ``C`` is `f_scale`, 374 and ``rho`` is determined by `loss` parameter. This parameter has 375 no effect with ``loss='linear'``, but for other `loss` values it is 376 of crucial importance. 377 max_nfev : None or int, optional 378 Maximum number of function evaluations before the termination. 379 If None (default), the value is chosen automatically: 380 381 * For 'trf' and 'dogbox' : 100 * n. 382 * For 'lm' : 100 * n if `jac` is callable and 100 * n * (n + 1) 383 otherwise (because 'lm' counts function calls in Jacobian 384 estimation). 385 386 diff_step : None or array_like, optional 387 Determines the relative step size for the finite difference 388 approximation of the Jacobian. The actual step is computed as 389 ``x * diff_step``. If None (default), then `diff_step` is taken to be 390 a conventional "optimal" power of machine epsilon for the finite 391 difference scheme used [NR]_. 392 tr_solver : {None, 'exact', 'lsmr'}, optional 393 Method for solving trust-region subproblems, relevant only for 'trf' 394 and 'dogbox' methods. 395 396 * 'exact' is suitable for not very large problems with dense 397 Jacobian matrices. The computational complexity per iteration is 398 comparable to a singular value decomposition of the Jacobian 399 matrix. 400 * 'lsmr' is suitable for problems with sparse and large Jacobian 401 matrices. It uses the iterative procedure 402 `scipy.sparse.linalg.lsmr` for finding a solution of a linear 403 least-squares problem and only requires matrix-vector product 404 evaluations. 405 406 If None (default), the solver is chosen based on the type of Jacobian 407 returned on the first iteration. 408 tr_options : dict, optional 409 Keyword options passed to trust-region solver. 410 411 * ``tr_solver='exact'``: `tr_options` are ignored. 412 * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`. 413 Additionally, ``method='trf'`` supports 'regularize' option 414 (bool, default is True), which adds a regularization term to the 415 normal equation, which improves convergence if the Jacobian is 416 rank-deficient [Byrd]_ (eq. 3.4). 417 418 jac_sparsity : {None, array_like, sparse matrix}, optional 419 Defines the sparsity structure of the Jacobian matrix for finite 420 difference estimation, its shape must be (m, n). If the Jacobian has 421 only few non-zero elements in *each* row, providing the sparsity 422 structure will greatly speed up the computations [Curtis]_. A zero 423 entry means that a corresponding element in the Jacobian is identically 424 zero. If provided, forces the use of 'lsmr' trust-region solver. 425 If None (default), then dense differencing will be used. Has no effect 426 for 'lm' method. 427 verbose : {0, 1, 2}, optional 428 Level of algorithm's verbosity: 429 430 * 0 (default) : work silently. 431 * 1 : display a termination report. 432 * 2 : display progress during iterations (not supported by 'lm' 433 method). 434 435 args, kwargs : tuple and dict, optional 436 Additional arguments passed to `fun` and `jac`. Both empty by default. 437 The calling signature is ``fun(x, *args, **kwargs)`` and the same for 438 `jac`. 439 440 Returns 441 ------- 442 result : OptimizeResult 443 `OptimizeResult` with the following fields defined: 444 445 x : ndarray, shape (n,) 446 Solution found. 447 cost : float 448 Value of the cost function at the solution. 449 fun : ndarray, shape (m,) 450 Vector of residuals at the solution. 451 jac : ndarray, sparse matrix or LinearOperator, shape (m, n) 452 Modified Jacobian matrix at the solution, in the sense that J^T J 453 is a Gauss-Newton approximation of the Hessian of the cost function. 454 The type is the same as the one used by the algorithm. 455 grad : ndarray, shape (m,) 456 Gradient of the cost function at the solution. 457 optimality : float 458 First-order optimality measure. In unconstrained problems, it is 459 always the uniform norm of the gradient. In constrained problems, 460 it is the quantity which was compared with `gtol` during iterations. 461 active_mask : ndarray of int, shape (n,) 462 Each component shows whether a corresponding constraint is active 463 (that is, whether a variable is at the bound): 464 465 * 0 : a constraint is not active. 466 * -1 : a lower bound is active. 467 * 1 : an upper bound is active. 468 469 Might be somewhat arbitrary for 'trf' method as it generates a 470 sequence of strictly feasible iterates and `active_mask` is 471 determined within a tolerance threshold. 472 nfev : int 473 Number of function evaluations done. Methods 'trf' and 'dogbox' do 474 not count function calls for numerical Jacobian approximation, as 475 opposed to 'lm' method. 476 njev : int or None 477 Number of Jacobian evaluations done. If numerical Jacobian 478 approximation is used in 'lm' method, it is set to None. 479 status : int 480 The reason for algorithm termination: 481 482 * -1 : improper input parameters status returned from MINPACK. 483 * 0 : the maximum number of function evaluations is exceeded. 484 * 1 : `gtol` termination condition is satisfied. 485 * 2 : `ftol` termination condition is satisfied. 486 * 3 : `xtol` termination condition is satisfied. 487 * 4 : Both `ftol` and `xtol` termination conditions are satisfied. 488 489 message : str 490 Verbal description of the termination reason. 491 success : bool 492 True if one of the convergence criteria is satisfied (`status` > 0). 493 494 See Also 495 -------- 496 leastsq : A legacy wrapper for the MINPACK implementation of the 497 Levenberg-Marquadt algorithm. 498 curve_fit : Least-squares minimization applied to a curve-fitting problem. 499 500 Notes 501 ----- 502 Method 'lm' (Levenberg-Marquardt) calls a wrapper over least-squares 503 algorithms implemented in MINPACK (lmder, lmdif). It runs the 504 Levenberg-Marquardt algorithm formulated as a trust-region type algorithm. 505 The implementation is based on paper [JJMore]_, it is very robust and 506 efficient with a lot of smart tricks. It should be your first choice 507 for unconstrained problems. Note that it doesn't support bounds. Also, 508 it doesn't work when m < n. 509 510 Method 'trf' (Trust Region Reflective) is motivated by the process of 511 solving a system of equations, which constitute the first-order optimality 512 condition for a bound-constrained minimization problem as formulated in 513 [STIR]_. The algorithm iteratively solves trust-region subproblems 514 augmented by a special diagonal quadratic term and with trust-region shape 515 determined by the distance from the bounds and the direction of the 516 gradient. This enhancements help to avoid making steps directly into bounds 517 and efficiently explore the whole space of variables. To further improve 518 convergence, the algorithm considers search directions reflected from the 519 bounds. To obey theoretical requirements, the algorithm keeps iterates 520 strictly feasible. With dense Jacobians trust-region subproblems are 521 solved by an exact method very similar to the one described in [JJMore]_ 522 (and implemented in MINPACK). The difference from the MINPACK 523 implementation is that a singular value decomposition of a Jacobian 524 matrix is done once per iteration, instead of a QR decomposition and series 525 of Givens rotation eliminations. For large sparse Jacobians a 2-D subspace 526 approach of solving trust-region subproblems is used [STIR]_, [Byrd]_. 527 The subspace is spanned by a scaled gradient and an approximate 528 Gauss-Newton solution delivered by `scipy.sparse.linalg.lsmr`. When no 529 constraints are imposed the algorithm is very similar to MINPACK and has 530 generally comparable performance. The algorithm works quite robust in 531 unbounded and bounded problems, thus it is chosen as a default algorithm. 532 533 Method 'dogbox' operates in a trust-region framework, but considers 534 rectangular trust regions as opposed to conventional ellipsoids [Voglis]_. 535 The intersection of a current trust region and initial bounds is again 536 rectangular, so on each iteration a quadratic minimization problem subject 537 to bound constraints is solved approximately by Powell's dogleg method 538 [NumOpt]_. The required Gauss-Newton step can be computed exactly for 539 dense Jacobians or approximately by `scipy.sparse.linalg.lsmr` for large 540 sparse Jacobians. The algorithm is likely to exhibit slow convergence when 541 the rank of Jacobian is less than the number of variables. The algorithm 542 often outperforms 'trf' in bounded problems with a small number of 543 variables. 544 545 Robust loss functions are implemented as described in [BA]_. The idea 546 is to modify a residual vector and a Jacobian matrix on each iteration 547 such that computed gradient and Gauss-Newton Hessian approximation match 548 the true gradient and Hessian approximation of the cost function. Then 549 the algorithm proceeds in a normal way, i.e., robust loss functions are 550 implemented as a simple wrapper over standard least-squares algorithms. 551 552 .. versionadded:: 0.17.0 553 554 References 555 ---------- 556 .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, 557 and Conjugate Gradient Method for Large-Scale Bound-Constrained 558 Minimization Problems," SIAM Journal on Scientific Computing, 559 Vol. 21, Number 1, pp 1-23, 1999. 560 .. [NR] William H. Press et. al., "Numerical Recipes. The Art of Scientific 561 Computing. 3rd edition", Sec. 5.7. 562 .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate 563 solution of the trust region problem by minimization over 564 two-dimensional subspaces", Math. Programming, 40, pp. 247-263, 565 1988. 566 .. [Curtis] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of 567 sparse Jacobian matrices", Journal of the Institute of 568 Mathematics and its Applications, 13, pp. 117-120, 1974. 569 .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation 570 and Theory," Numerical Analysis, ed. G. A. Watson, Lecture 571 Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977. 572 .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region 573 Dogleg Approach for Unconstrained and Bound Constrained 574 Nonlinear Optimization", WSEAS International Conference on 575 Applied Mathematics, Corfu, Greece, 2004. 576 .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 577 2nd edition", Chapter 4. 578 .. [BA] B. Triggs et. al., "Bundle Adjustment - A Modern Synthesis", 579 Proceedings of the International Workshop on Vision Algorithms: 580 Theory and Practice, pp. 298-372, 1999. 581 582 Examples 583 -------- 584 In this example we find a minimum of the Rosenbrock function without bounds 585 on independent variables. 586 587 >>> def fun_rosenbrock(x): 588 ... return np.array([10 * (x[1] - x[0]**2), (1 - x[0])]) 589 590 Notice that we only provide the vector of the residuals. The algorithm 591 constructs the cost function as a sum of squares of the residuals, which 592 gives the Rosenbrock function. The exact minimum is at ``x = [1.0, 1.0]``. 593 594 >>> from scipy.optimize import least_squares 595 >>> x0_rosenbrock = np.array([2, 2]) 596 >>> res_1 = least_squares(fun_rosenbrock, x0_rosenbrock) 597 >>> res_1.x 598 array([ 1., 1.]) 599 >>> res_1.cost 600 9.8669242910846867e-30 601 >>> res_1.optimality 602 8.8928864934219529e-14 603 604 We now constrain the variables, in such a way that the previous solution 605 becomes infeasible. Specifically, we require that ``x[1] >= 1.5``, and 606 ``x[0]`` left unconstrained. To this end, we specify the `bounds` parameter 607 to `least_squares` in the form ``bounds=([-np.inf, 1.5], np.inf)``. 608 609 We also provide the analytic Jacobian: 610 611 >>> def jac_rosenbrock(x): 612 ... return np.array([ 613 ... [-20 * x[0], 10], 614 ... [-1, 0]]) 615 616 Putting this all together, we see that the new solution lies on the bound: 617 618 >>> res_2 = least_squares(fun_rosenbrock, x0_rosenbrock, jac_rosenbrock, 619 ... bounds=([-np.inf, 1.5], np.inf)) 620 >>> res_2.x 621 array([ 1.22437075, 1.5 ]) 622 >>> res_2.cost 623 0.025213093946805685 624 >>> res_2.optimality 625 1.5885401433157753e-07 626 627 Now we solve a system of equations (i.e., the cost function should be zero 628 at a minimum) for a Broyden tridiagonal vector-valued function of 100000 629 variables: 630 631 >>> def fun_broyden(x): 632 ... f = (3 - x) * x + 1 633 ... f[1:] -= x[:-1] 634 ... f[:-1] -= 2 * x[1:] 635 ... return f 636 637 The corresponding Jacobian matrix is sparse. We tell the algorithm to 638 estimate it by finite differences and provide the sparsity structure of 639 Jacobian to significantly speed up this process. 640 641 >>> from scipy.sparse import lil_matrix 642 >>> def sparsity_broyden(n): 643 ... sparsity = lil_matrix((n, n), dtype=int) 644 ... i = np.arange(n) 645 ... sparsity[i, i] = 1 646 ... i = np.arange(1, n) 647 ... sparsity[i, i - 1] = 1 648 ... i = np.arange(n - 1) 649 ... sparsity[i, i + 1] = 1 650 ... return sparsity 651 ... 652 >>> n = 100000 653 >>> x0_broyden = -np.ones(n) 654 ... 655 >>> res_3 = least_squares(fun_broyden, x0_broyden, 656 ... jac_sparsity=sparsity_broyden(n)) 657 >>> res_3.cost 658 4.5687069299604613e-23 659 >>> res_3.optimality 660 1.1650454296851518e-11 661 662 Let's also solve a curve fitting problem using robust loss function to 663 take care of outliers in the data. Define the model function as 664 ``y = a + b * exp(c * t)``, where t is a predictor variable, y is an 665 observation and a, b, c are parameters to estimate. 666 667 First, define the function which generates the data with noise and 668 outliers, define the model parameters, and generate data: 669 670 >>> from numpy.random import default_rng 671 >>> rng = default_rng() 672 >>> def gen_data(t, a, b, c, noise=0., n_outliers=0, seed=None): 673 ... rng = default_rng(seed) 674 ... 675 ... y = a + b * np.exp(t * c) 676 ... 677 ... error = noise * rng.standard_normal(t.size) 678 ... outliers = rng.integers(0, t.size, n_outliers) 679 ... error[outliers] *= 10 680 ... 681 ... return y + error 682 ... 683 >>> a = 0.5 684 >>> b = 2.0 685 >>> c = -1 686 >>> t_min = 0 687 >>> t_max = 10 688 >>> n_points = 15 689 ... 690 >>> t_train = np.linspace(t_min, t_max, n_points) 691 >>> y_train = gen_data(t_train, a, b, c, noise=0.1, n_outliers=3) 692 693 Define function for computing residuals and initial estimate of 694 parameters. 695 696 >>> def fun(x, t, y): 697 ... return x[0] + x[1] * np.exp(x[2] * t) - y 698 ... 699 >>> x0 = np.array([1.0, 1.0, 0.0]) 700 701 Compute a standard least-squares solution: 702 703 >>> res_lsq = least_squares(fun, x0, args=(t_train, y_train)) 704 705 Now compute two solutions with two different robust loss functions. The 706 parameter `f_scale` is set to 0.1, meaning that inlier residuals should 707 not significantly exceed 0.1 (the noise level used). 708 709 >>> res_soft_l1 = least_squares(fun, x0, loss='soft_l1', f_scale=0.1, 710 ... args=(t_train, y_train)) 711 >>> res_log = least_squares(fun, x0, loss='cauchy', f_scale=0.1, 712 ... args=(t_train, y_train)) 713 714 And, finally, plot all the curves. We see that by selecting an appropriate 715 `loss` we can get estimates close to optimal even in the presence of 716 strong outliers. But keep in mind that generally it is recommended to try 717 'soft_l1' or 'huber' losses first (if at all necessary) as the other two 718 options may cause difficulties in optimization process. 719 720 >>> t_test = np.linspace(t_min, t_max, n_points * 10) 721 >>> y_true = gen_data(t_test, a, b, c) 722 >>> y_lsq = gen_data(t_test, *res_lsq.x) 723 >>> y_soft_l1 = gen_data(t_test, *res_soft_l1.x) 724 >>> y_log = gen_data(t_test, *res_log.x) 725 ... 726 >>> import matplotlib.pyplot as plt 727 >>> plt.plot(t_train, y_train, 'o') 728 >>> plt.plot(t_test, y_true, 'k', linewidth=2, label='true') 729 >>> plt.plot(t_test, y_lsq, label='linear loss') 730 >>> plt.plot(t_test, y_soft_l1, label='soft_l1 loss') 731 >>> plt.plot(t_test, y_log, label='cauchy loss') 732 >>> plt.xlabel("t") 733 >>> plt.ylabel("y") 734 >>> plt.legend() 735 >>> plt.show() 736 737 In the next example, we show how complex-valued residual functions of 738 complex variables can be optimized with ``least_squares()``. Consider the 739 following function: 740 741 >>> def f(z): 742 ... return z - (0.5 + 0.5j) 743 744 We wrap it into a function of real variables that returns real residuals 745 by simply handling the real and imaginary parts as independent variables: 746 747 >>> def f_wrap(x): 748 ... fx = f(x[0] + 1j*x[1]) 749 ... return np.array([fx.real, fx.imag]) 750 751 Thus, instead of the original m-D complex function of n complex 752 variables we optimize a 2m-D real function of 2n real variables: 753 754 >>> from scipy.optimize import least_squares 755 >>> res_wrapped = least_squares(f_wrap, (0.1, 0.1), bounds=([0, 0], [1, 1])) 756 >>> z = res_wrapped.x[0] + res_wrapped.x[1]*1j 757 >>> z 758 (0.49999999999925893+0.49999999999925893j) 759 760 """ 761 if method not in ['trf', 'dogbox', 'lm']: 762 raise ValueError("`method` must be 'trf', 'dogbox' or 'lm'.") 763 764 if jac not in ['2-point', '3-point', 'cs'] and not callable(jac): 765 raise ValueError("`jac` must be '2-point', '3-point', 'cs' or " 766 "callable.") 767 768 if tr_solver not in [None, 'exact', 'lsmr']: 769 raise ValueError("`tr_solver` must be None, 'exact' or 'lsmr'.") 770 771 if loss not in IMPLEMENTED_LOSSES and not callable(loss): 772 raise ValueError("`loss` must be one of {0} or a callable." 773 .format(IMPLEMENTED_LOSSES.keys())) 774 775 if method == 'lm' and loss != 'linear': 776 raise ValueError("method='lm' supports only 'linear' loss function.") 777 778 if verbose not in [0, 1, 2]: 779 raise ValueError("`verbose` must be in [0, 1, 2].") 780 781 if len(bounds) != 2: 782 raise ValueError("`bounds` must contain 2 elements.") 783 784 if max_nfev is not None and max_nfev <= 0: 785 raise ValueError("`max_nfev` must be None or positive integer.") 786 787 if np.iscomplexobj(x0): 788 raise ValueError("`x0` must be real.") 789 790 x0 = np.atleast_1d(x0).astype(float) 791 792 if x0.ndim > 1: 793 raise ValueError("`x0` must have at most 1 dimension.") 794 795 lb, ub = prepare_bounds(bounds, x0.shape[0]) 796 797 if method == 'lm' and not np.all((lb == -np.inf) & (ub == np.inf)): 798 raise ValueError("Method 'lm' doesn't support bounds.") 799 800 if lb.shape != x0.shape or ub.shape != x0.shape: 801 raise ValueError("Inconsistent shapes between bounds and `x0`.") 802 803 if np.any(lb >= ub): 804 raise ValueError("Each lower bound must be strictly less than each " 805 "upper bound.") 806 807 if not in_bounds(x0, lb, ub): 808 raise ValueError("`x0` is infeasible.") 809 810 x_scale = check_x_scale(x_scale, x0) 811 812 ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method) 813 814 def fun_wrapped(x): 815 return np.atleast_1d(fun(x, *args, **kwargs)) 816 817 if method == 'trf': 818 x0 = make_strictly_feasible(x0, lb, ub) 819 820 f0 = fun_wrapped(x0) 821 822 if f0.ndim != 1: 823 raise ValueError("`fun` must return at most 1-d array_like. " 824 "f0.shape: {0}".format(f0.shape)) 825 826 if not np.all(np.isfinite(f0)): 827 raise ValueError("Residuals are not finite in the initial point.") 828 829 n = x0.size 830 m = f0.size 831 832 if method == 'lm' and m < n: 833 raise ValueError("Method 'lm' doesn't work when the number of " 834 "residuals is less than the number of variables.") 835 836 loss_function = construct_loss_function(m, loss, f_scale) 837 if callable(loss): 838 rho = loss_function(f0) 839 if rho.shape != (3, m): 840 raise ValueError("The return value of `loss` callable has wrong " 841 "shape.") 842 initial_cost = 0.5 * np.sum(rho[0]) 843 elif loss_function is not None: 844 initial_cost = loss_function(f0, cost_only=True) 845 else: 846 initial_cost = 0.5 * np.dot(f0, f0) 847 848 if callable(jac): 849 J0 = jac(x0, *args, **kwargs) 850 851 if issparse(J0): 852 J0 = J0.tocsr() 853 854 def jac_wrapped(x, _=None): 855 return jac(x, *args, **kwargs).tocsr() 856 857 elif isinstance(J0, LinearOperator): 858 def jac_wrapped(x, _=None): 859 return jac(x, *args, **kwargs) 860 861 else: 862 J0 = np.atleast_2d(J0) 863 864 def jac_wrapped(x, _=None): 865 return np.atleast_2d(jac(x, *args, **kwargs)) 866 867 else: # Estimate Jacobian by finite differences. 868 if method == 'lm': 869 if jac_sparsity is not None: 870 raise ValueError("method='lm' does not support " 871 "`jac_sparsity`.") 872 873 if jac != '2-point': 874 warn("jac='{0}' works equivalently to '2-point' " 875 "for method='lm'.".format(jac)) 876 877 J0 = jac_wrapped = None 878 else: 879 if jac_sparsity is not None and tr_solver == 'exact': 880 raise ValueError("tr_solver='exact' is incompatible " 881 "with `jac_sparsity`.") 882 883 jac_sparsity = check_jac_sparsity(jac_sparsity, m, n) 884 885 def jac_wrapped(x, f): 886 J = approx_derivative(fun, x, rel_step=diff_step, method=jac, 887 f0=f, bounds=bounds, args=args, 888 kwargs=kwargs, sparsity=jac_sparsity) 889 if J.ndim != 2: # J is guaranteed not sparse. 890 J = np.atleast_2d(J) 891 892 return J 893 894 J0 = jac_wrapped(x0, f0) 895 896 if J0 is not None: 897 if J0.shape != (m, n): 898 raise ValueError( 899 "The return value of `jac` has wrong shape: expected {0}, " 900 "actual {1}.".format((m, n), J0.shape)) 901 902 if not isinstance(J0, np.ndarray): 903 if method == 'lm': 904 raise ValueError("method='lm' works only with dense " 905 "Jacobian matrices.") 906 907 if tr_solver == 'exact': 908 raise ValueError( 909 "tr_solver='exact' works only with dense " 910 "Jacobian matrices.") 911 912 jac_scale = isinstance(x_scale, str) and x_scale == 'jac' 913 if isinstance(J0, LinearOperator) and jac_scale: 914 raise ValueError("x_scale='jac' can't be used when `jac` " 915 "returns LinearOperator.") 916 917 if tr_solver is None: 918 if isinstance(J0, np.ndarray): 919 tr_solver = 'exact' 920 else: 921 tr_solver = 'lsmr' 922 923 if method == 'lm': 924 result = call_minpack(fun_wrapped, x0, jac_wrapped, ftol, xtol, gtol, 925 max_nfev, x_scale, diff_step) 926 927 elif method == 'trf': 928 result = trf(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, xtol, 929 gtol, max_nfev, x_scale, loss_function, tr_solver, 930 tr_options.copy(), verbose) 931 932 elif method == 'dogbox': 933 if tr_solver == 'lsmr' and 'regularize' in tr_options: 934 warn("The keyword 'regularize' in `tr_options` is not relevant " 935 "for 'dogbox' method.") 936 tr_options = tr_options.copy() 937 del tr_options['regularize'] 938 939 result = dogbox(fun_wrapped, jac_wrapped, x0, f0, J0, lb, ub, ftol, 940 xtol, gtol, max_nfev, x_scale, loss_function, 941 tr_solver, tr_options, verbose) 942 943 result.message = TERMINATION_MESSAGES[result.status] 944 result.success = result.status > 0 945 946 if verbose >= 1: 947 print(result.message) 948 print("Function evaluations {0}, initial cost {1:.4e}, final cost " 949 "{2:.4e}, first-order optimality {3:.2e}." 950 .format(result.nfev, initial_cost, result.cost, 951 result.optimality)) 952 953 return result 954