1import numpy as np 2import scipy.sparse as sps 3from ._numdiff import approx_derivative, group_columns 4from ._hessian_update_strategy import HessianUpdateStrategy 5from scipy.sparse.linalg import LinearOperator 6 7 8FD_METHODS = ('2-point', '3-point', 'cs') 9 10 11class ScalarFunction: 12 """Scalar function and its derivatives. 13 14 This class defines a scalar function F: R^n->R and methods for 15 computing or approximating its first and second derivatives. 16 17 Parameters 18 ---------- 19 fun : callable 20 evaluates the scalar function. Must be of the form ``fun(x, *args)``, 21 where ``x`` is the argument in the form of a 1-D array and ``args`` is 22 a tuple of any additional fixed parameters needed to completely specify 23 the function. Should return a scalar. 24 x0 : array-like 25 Provides an initial set of variables for evaluating fun. Array of real 26 elements of size (n,), where 'n' is the number of independent 27 variables. 28 args : tuple, optional 29 Any additional fixed parameters needed to completely specify the scalar 30 function. 31 grad : {callable, '2-point', '3-point', 'cs'} 32 Method for computing the gradient vector. 33 If it is a callable, it should be a function that returns the gradient 34 vector: 35 36 ``grad(x, *args) -> array_like, shape (n,)`` 37 38 where ``x`` is an array with shape (n,) and ``args`` is a tuple with 39 the fixed parameters. 40 Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used 41 to select a finite difference scheme for numerical estimation of the 42 gradient with a relative step size. These finite difference schemes 43 obey any specified `bounds`. 44 hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy} 45 Method for computing the Hessian matrix. If it is callable, it should 46 return the Hessian matrix: 47 48 ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)`` 49 50 where x is a (n,) ndarray and `args` is a tuple with the fixed 51 parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'} 52 select a finite difference scheme for numerical estimation. Or, objects 53 implementing `HessianUpdateStrategy` interface can be used to 54 approximate the Hessian. 55 Whenever the gradient is estimated via finite-differences, the Hessian 56 cannot be estimated with options {'2-point', '3-point', 'cs'} and needs 57 to be estimated using one of the quasi-Newton strategies. 58 finite_diff_rel_step : None or array_like 59 Relative step size to use. The absolute step size is computed as 60 ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly 61 adjusted to fit into the bounds. For ``method='3-point'`` the sign 62 of `h` is ignored. If None then finite_diff_rel_step is selected 63 automatically, 64 finite_diff_bounds : tuple of array_like 65 Lower and upper bounds on independent variables. Defaults to no bounds, 66 (-np.inf, np.inf). Each bound must match the size of `x0` or be a 67 scalar, in the latter case the bound will be the same for all 68 variables. Use it to limit the range of function evaluation. 69 epsilon : None or array_like, optional 70 Absolute step size to use, possibly adjusted to fit into the bounds. 71 For ``method='3-point'`` the sign of `epsilon` is ignored. By default 72 relative steps are used, only if ``epsilon is not None`` are absolute 73 steps used. 74 75 Notes 76 ----- 77 This class implements a memoization logic. There are methods `fun`, 78 `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following 79 things should be considered: 80 81 1. Use only public methods `fun`, `grad` and `hess`. 82 2. After one of the methods is called, the corresponding attribute 83 will be set. However, a subsequent call with a different argument 84 of *any* of the methods may overwrite the attribute. 85 """ 86 def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step, 87 finite_diff_bounds, epsilon=None): 88 if not callable(grad) and grad not in FD_METHODS: 89 raise ValueError( 90 f"`grad` must be either callable or one of {FD_METHODS}." 91 ) 92 93 if not (callable(hess) or hess in FD_METHODS 94 or isinstance(hess, HessianUpdateStrategy)): 95 raise ValueError( 96 f"`hess` must be either callable, HessianUpdateStrategy" 97 f" or one of {FD_METHODS}." 98 ) 99 100 if grad in FD_METHODS and hess in FD_METHODS: 101 raise ValueError("Whenever the gradient is estimated via " 102 "finite-differences, we require the Hessian " 103 "to be estimated using one of the " 104 "quasi-Newton strategies.") 105 106 # the astype call ensures that self.x is a copy of x0 107 self.x = np.atleast_1d(x0).astype(float) 108 self.n = self.x.size 109 self.nfev = 0 110 self.ngev = 0 111 self.nhev = 0 112 self.f_updated = False 113 self.g_updated = False 114 self.H_updated = False 115 116 finite_diff_options = {} 117 if grad in FD_METHODS: 118 finite_diff_options["method"] = grad 119 finite_diff_options["rel_step"] = finite_diff_rel_step 120 finite_diff_options["abs_step"] = epsilon 121 finite_diff_options["bounds"] = finite_diff_bounds 122 if hess in FD_METHODS: 123 finite_diff_options["method"] = hess 124 finite_diff_options["rel_step"] = finite_diff_rel_step 125 finite_diff_options["abs_step"] = epsilon 126 finite_diff_options["as_linear_operator"] = True 127 128 # Function evaluation 129 def fun_wrapped(x): 130 self.nfev += 1 131 # Send a copy because the user may overwrite it. 132 # Overwriting results in undefined behaviour because 133 # fun(self.x) will change self.x, with the two no longer linked. 134 return fun(np.copy(x), *args) 135 136 def update_fun(): 137 self.f = fun_wrapped(self.x) 138 139 self._update_fun_impl = update_fun 140 self._update_fun() 141 142 # Gradient evaluation 143 if callable(grad): 144 def grad_wrapped(x): 145 self.ngev += 1 146 return np.atleast_1d(grad(np.copy(x), *args)) 147 148 def update_grad(): 149 self.g = grad_wrapped(self.x) 150 151 elif grad in FD_METHODS: 152 def update_grad(): 153 self._update_fun() 154 self.ngev += 1 155 self.g = approx_derivative(fun_wrapped, self.x, f0=self.f, 156 **finite_diff_options) 157 158 self._update_grad_impl = update_grad 159 self._update_grad() 160 161 # Hessian Evaluation 162 if callable(hess): 163 self.H = hess(np.copy(x0), *args) 164 self.H_updated = True 165 self.nhev += 1 166 167 if sps.issparse(self.H): 168 def hess_wrapped(x): 169 self.nhev += 1 170 return sps.csr_matrix(hess(np.copy(x), *args)) 171 self.H = sps.csr_matrix(self.H) 172 173 elif isinstance(self.H, LinearOperator): 174 def hess_wrapped(x): 175 self.nhev += 1 176 return hess(np.copy(x), *args) 177 178 else: 179 def hess_wrapped(x): 180 self.nhev += 1 181 return np.atleast_2d(np.asarray(hess(np.copy(x), *args))) 182 self.H = np.atleast_2d(np.asarray(self.H)) 183 184 def update_hess(): 185 self.H = hess_wrapped(self.x) 186 187 elif hess in FD_METHODS: 188 def update_hess(): 189 self._update_grad() 190 self.H = approx_derivative(grad_wrapped, self.x, f0=self.g, 191 **finite_diff_options) 192 return self.H 193 194 update_hess() 195 self.H_updated = True 196 elif isinstance(hess, HessianUpdateStrategy): 197 self.H = hess 198 self.H.initialize(self.n, 'hess') 199 self.H_updated = True 200 self.x_prev = None 201 self.g_prev = None 202 203 def update_hess(): 204 self._update_grad() 205 self.H.update(self.x - self.x_prev, self.g - self.g_prev) 206 207 self._update_hess_impl = update_hess 208 209 if isinstance(hess, HessianUpdateStrategy): 210 def update_x(x): 211 self._update_grad() 212 self.x_prev = self.x 213 self.g_prev = self.g 214 # ensure that self.x is a copy of x. Don't store a reference 215 # otherwise the memoization doesn't work properly. 216 self.x = np.atleast_1d(x).astype(float) 217 self.f_updated = False 218 self.g_updated = False 219 self.H_updated = False 220 self._update_hess() 221 else: 222 def update_x(x): 223 # ensure that self.x is a copy of x. Don't store a reference 224 # otherwise the memoization doesn't work properly. 225 self.x = np.atleast_1d(x).astype(float) 226 self.f_updated = False 227 self.g_updated = False 228 self.H_updated = False 229 self._update_x_impl = update_x 230 231 def _update_fun(self): 232 if not self.f_updated: 233 self._update_fun_impl() 234 self.f_updated = True 235 236 def _update_grad(self): 237 if not self.g_updated: 238 self._update_grad_impl() 239 self.g_updated = True 240 241 def _update_hess(self): 242 if not self.H_updated: 243 self._update_hess_impl() 244 self.H_updated = True 245 246 def fun(self, x): 247 if not np.array_equal(x, self.x): 248 self._update_x_impl(x) 249 self._update_fun() 250 return self.f 251 252 def grad(self, x): 253 if not np.array_equal(x, self.x): 254 self._update_x_impl(x) 255 self._update_grad() 256 return self.g 257 258 def hess(self, x): 259 if not np.array_equal(x, self.x): 260 self._update_x_impl(x) 261 self._update_hess() 262 return self.H 263 264 def fun_and_grad(self, x): 265 if not np.array_equal(x, self.x): 266 self._update_x_impl(x) 267 self._update_fun() 268 self._update_grad() 269 return self.f, self.g 270 271 272class VectorFunction: 273 """Vector function and its derivatives. 274 275 This class defines a vector function F: R^n->R^m and methods for 276 computing or approximating its first and second derivatives. 277 278 Notes 279 ----- 280 This class implements a memoization logic. There are methods `fun`, 281 `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following 282 things should be considered: 283 284 1. Use only public methods `fun`, `jac` and `hess`. 285 2. After one of the methods is called, the corresponding attribute 286 will be set. However, a subsequent call with a different argument 287 of *any* of the methods may overwrite the attribute. 288 """ 289 def __init__(self, fun, x0, jac, hess, 290 finite_diff_rel_step, finite_diff_jac_sparsity, 291 finite_diff_bounds, sparse_jacobian): 292 if not callable(jac) and jac not in FD_METHODS: 293 raise ValueError("`jac` must be either callable or one of {}." 294 .format(FD_METHODS)) 295 296 if not (callable(hess) or hess in FD_METHODS 297 or isinstance(hess, HessianUpdateStrategy)): 298 raise ValueError("`hess` must be either callable," 299 "HessianUpdateStrategy or one of {}." 300 .format(FD_METHODS)) 301 302 if jac in FD_METHODS and hess in FD_METHODS: 303 raise ValueError("Whenever the Jacobian is estimated via " 304 "finite-differences, we require the Hessian to " 305 "be estimated using one of the quasi-Newton " 306 "strategies.") 307 308 self.x = np.atleast_1d(x0).astype(float) 309 self.n = self.x.size 310 self.nfev = 0 311 self.njev = 0 312 self.nhev = 0 313 self.f_updated = False 314 self.J_updated = False 315 self.H_updated = False 316 317 finite_diff_options = {} 318 if jac in FD_METHODS: 319 finite_diff_options["method"] = jac 320 finite_diff_options["rel_step"] = finite_diff_rel_step 321 if finite_diff_jac_sparsity is not None: 322 sparsity_groups = group_columns(finite_diff_jac_sparsity) 323 finite_diff_options["sparsity"] = (finite_diff_jac_sparsity, 324 sparsity_groups) 325 finite_diff_options["bounds"] = finite_diff_bounds 326 self.x_diff = np.copy(self.x) 327 if hess in FD_METHODS: 328 finite_diff_options["method"] = hess 329 finite_diff_options["rel_step"] = finite_diff_rel_step 330 finite_diff_options["as_linear_operator"] = True 331 self.x_diff = np.copy(self.x) 332 if jac in FD_METHODS and hess in FD_METHODS: 333 raise ValueError("Whenever the Jacobian is estimated via " 334 "finite-differences, we require the Hessian to " 335 "be estimated using one of the quasi-Newton " 336 "strategies.") 337 338 # Function evaluation 339 def fun_wrapped(x): 340 self.nfev += 1 341 return np.atleast_1d(fun(x)) 342 343 def update_fun(): 344 self.f = fun_wrapped(self.x) 345 346 self._update_fun_impl = update_fun 347 update_fun() 348 349 self.v = np.zeros_like(self.f) 350 self.m = self.v.size 351 352 # Jacobian Evaluation 353 if callable(jac): 354 self.J = jac(self.x) 355 self.J_updated = True 356 self.njev += 1 357 358 if (sparse_jacobian or 359 sparse_jacobian is None and sps.issparse(self.J)): 360 def jac_wrapped(x): 361 self.njev += 1 362 return sps.csr_matrix(jac(x)) 363 self.J = sps.csr_matrix(self.J) 364 self.sparse_jacobian = True 365 366 elif sps.issparse(self.J): 367 def jac_wrapped(x): 368 self.njev += 1 369 return jac(x).toarray() 370 self.J = self.J.toarray() 371 self.sparse_jacobian = False 372 373 else: 374 def jac_wrapped(x): 375 self.njev += 1 376 return np.atleast_2d(jac(x)) 377 self.J = np.atleast_2d(self.J) 378 self.sparse_jacobian = False 379 380 def update_jac(): 381 self.J = jac_wrapped(self.x) 382 383 elif jac in FD_METHODS: 384 self.J = approx_derivative(fun_wrapped, self.x, f0=self.f, 385 **finite_diff_options) 386 self.J_updated = True 387 388 if (sparse_jacobian or 389 sparse_jacobian is None and sps.issparse(self.J)): 390 def update_jac(): 391 self._update_fun() 392 self.J = sps.csr_matrix( 393 approx_derivative(fun_wrapped, self.x, f0=self.f, 394 **finite_diff_options)) 395 self.J = sps.csr_matrix(self.J) 396 self.sparse_jacobian = True 397 398 elif sps.issparse(self.J): 399 def update_jac(): 400 self._update_fun() 401 self.J = approx_derivative(fun_wrapped, self.x, f0=self.f, 402 **finite_diff_options).toarray() 403 self.J = self.J.toarray() 404 self.sparse_jacobian = False 405 406 else: 407 def update_jac(): 408 self._update_fun() 409 self.J = np.atleast_2d( 410 approx_derivative(fun_wrapped, self.x, f0=self.f, 411 **finite_diff_options)) 412 self.J = np.atleast_2d(self.J) 413 self.sparse_jacobian = False 414 415 self._update_jac_impl = update_jac 416 417 # Define Hessian 418 if callable(hess): 419 self.H = hess(self.x, self.v) 420 self.H_updated = True 421 self.nhev += 1 422 423 if sps.issparse(self.H): 424 def hess_wrapped(x, v): 425 self.nhev += 1 426 return sps.csr_matrix(hess(x, v)) 427 self.H = sps.csr_matrix(self.H) 428 429 elif isinstance(self.H, LinearOperator): 430 def hess_wrapped(x, v): 431 self.nhev += 1 432 return hess(x, v) 433 434 else: 435 def hess_wrapped(x, v): 436 self.nhev += 1 437 return np.atleast_2d(np.asarray(hess(x, v))) 438 self.H = np.atleast_2d(np.asarray(self.H)) 439 440 def update_hess(): 441 self.H = hess_wrapped(self.x, self.v) 442 elif hess in FD_METHODS: 443 def jac_dot_v(x, v): 444 return jac_wrapped(x).T.dot(v) 445 446 def update_hess(): 447 self._update_jac() 448 self.H = approx_derivative(jac_dot_v, self.x, 449 f0=self.J.T.dot(self.v), 450 args=(self.v,), 451 **finite_diff_options) 452 update_hess() 453 self.H_updated = True 454 elif isinstance(hess, HessianUpdateStrategy): 455 self.H = hess 456 self.H.initialize(self.n, 'hess') 457 self.H_updated = True 458 self.x_prev = None 459 self.J_prev = None 460 461 def update_hess(): 462 self._update_jac() 463 # When v is updated before x was updated, then x_prev and 464 # J_prev are None and we need this check. 465 if self.x_prev is not None and self.J_prev is not None: 466 delta_x = self.x - self.x_prev 467 delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v) 468 self.H.update(delta_x, delta_g) 469 470 self._update_hess_impl = update_hess 471 472 if isinstance(hess, HessianUpdateStrategy): 473 def update_x(x): 474 self._update_jac() 475 self.x_prev = self.x 476 self.J_prev = self.J 477 self.x = np.atleast_1d(x).astype(float) 478 self.f_updated = False 479 self.J_updated = False 480 self.H_updated = False 481 self._update_hess() 482 else: 483 def update_x(x): 484 self.x = np.atleast_1d(x).astype(float) 485 self.f_updated = False 486 self.J_updated = False 487 self.H_updated = False 488 489 self._update_x_impl = update_x 490 491 def _update_v(self, v): 492 if not np.array_equal(v, self.v): 493 self.v = v 494 self.H_updated = False 495 496 def _update_x(self, x): 497 if not np.array_equal(x, self.x): 498 self._update_x_impl(x) 499 500 def _update_fun(self): 501 if not self.f_updated: 502 self._update_fun_impl() 503 self.f_updated = True 504 505 def _update_jac(self): 506 if not self.J_updated: 507 self._update_jac_impl() 508 self.J_updated = True 509 510 def _update_hess(self): 511 if not self.H_updated: 512 self._update_hess_impl() 513 self.H_updated = True 514 515 def fun(self, x): 516 self._update_x(x) 517 self._update_fun() 518 return self.f 519 520 def jac(self, x): 521 self._update_x(x) 522 self._update_jac() 523 return self.J 524 525 def hess(self, x, v): 526 # v should be updated before x. 527 self._update_v(v) 528 self._update_x(x) 529 self._update_hess() 530 return self.H 531 532 533class LinearVectorFunction: 534 """Linear vector function and its derivatives. 535 536 Defines a linear function F = A x, where x is N-D vector and 537 A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian 538 is identically zero and it is returned as a csr matrix. 539 """ 540 def __init__(self, A, x0, sparse_jacobian): 541 if sparse_jacobian or sparse_jacobian is None and sps.issparse(A): 542 self.J = sps.csr_matrix(A) 543 self.sparse_jacobian = True 544 elif sps.issparse(A): 545 self.J = A.toarray() 546 self.sparse_jacobian = False 547 else: 548 # np.asarray makes sure A is ndarray and not matrix 549 self.J = np.atleast_2d(np.asarray(A)) 550 self.sparse_jacobian = False 551 552 self.m, self.n = self.J.shape 553 554 self.x = np.atleast_1d(x0).astype(float) 555 self.f = self.J.dot(self.x) 556 self.f_updated = True 557 558 self.v = np.zeros(self.m, dtype=float) 559 self.H = sps.csr_matrix((self.n, self.n)) 560 561 def _update_x(self, x): 562 if not np.array_equal(x, self.x): 563 self.x = np.atleast_1d(x).astype(float) 564 self.f_updated = False 565 566 def fun(self, x): 567 self._update_x(x) 568 if not self.f_updated: 569 self.f = self.J.dot(x) 570 self.f_updated = True 571 return self.f 572 573 def jac(self, x): 574 self._update_x(x) 575 return self.J 576 577 def hess(self, x, v): 578 self._update_x(x) 579 self.v = v 580 return self.H 581 582 583class IdentityVectorFunction(LinearVectorFunction): 584 """Identity vector function and its derivatives. 585 586 The Jacobian is the identity matrix, returned as a dense array when 587 `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is 588 identically zero and it is returned as a csr matrix. 589 """ 590 def __init__(self, x0, sparse_jacobian): 591 n = len(x0) 592 if sparse_jacobian or sparse_jacobian is None: 593 A = sps.eye(n, format='csr') 594 sparse_jacobian = True 595 else: 596 A = np.eye(n) 597 sparse_jacobian = False 598 super().__init__(A, x0, sparse_jacobian) 599