1import numpy as np
2import scipy.sparse as sps
3from ._numdiff import approx_derivative, group_columns
4from ._hessian_update_strategy import HessianUpdateStrategy
5from scipy.sparse.linalg import LinearOperator
6
7
8FD_METHODS = ('2-point', '3-point', 'cs')
9
10
11class ScalarFunction:
12    """Scalar function and its derivatives.
13
14    This class defines a scalar function F: R^n->R and methods for
15    computing or approximating its first and second derivatives.
16
17    Parameters
18    ----------
19    fun : callable
20        evaluates the scalar function. Must be of the form ``fun(x, *args)``,
21        where ``x`` is the argument in the form of a 1-D array and ``args`` is
22        a tuple of any additional fixed parameters needed to completely specify
23        the function. Should return a scalar.
24    x0 : array-like
25        Provides an initial set of variables for evaluating fun. Array of real
26        elements of size (n,), where 'n' is the number of independent
27        variables.
28    args : tuple, optional
29        Any additional fixed parameters needed to completely specify the scalar
30        function.
31    grad : {callable, '2-point', '3-point', 'cs'}
32        Method for computing the gradient vector.
33        If it is a callable, it should be a function that returns the gradient
34        vector:
35
36            ``grad(x, *args) -> array_like, shape (n,)``
37
38        where ``x`` is an array with shape (n,) and ``args`` is a tuple with
39        the fixed parameters.
40        Alternatively, the keywords  {'2-point', '3-point', 'cs'} can be used
41        to select a finite difference scheme for numerical estimation of the
42        gradient with a relative step size. These finite difference schemes
43        obey any specified `bounds`.
44    hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
45        Method for computing the Hessian matrix. If it is callable, it should
46        return the  Hessian matrix:
47
48            ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
49
50        where x is a (n,) ndarray and `args` is a tuple with the fixed
51        parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
52        select a finite difference scheme for numerical estimation. Or, objects
53        implementing `HessianUpdateStrategy` interface can be used to
54        approximate the Hessian.
55        Whenever the gradient is estimated via finite-differences, the Hessian
56        cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
57        to be estimated using one of the quasi-Newton strategies.
58    finite_diff_rel_step : None or array_like
59        Relative step size to use. The absolute step size is computed as
60        ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
61        adjusted to fit into the bounds. For ``method='3-point'`` the sign
62        of `h` is ignored. If None then finite_diff_rel_step is selected
63        automatically,
64    finite_diff_bounds : tuple of array_like
65        Lower and upper bounds on independent variables. Defaults to no bounds,
66        (-np.inf, np.inf). Each bound must match the size of `x0` or be a
67        scalar, in the latter case the bound will be the same for all
68        variables. Use it to limit the range of function evaluation.
69    epsilon : None or array_like, optional
70        Absolute step size to use, possibly adjusted to fit into the bounds.
71        For ``method='3-point'`` the sign of `epsilon` is ignored. By default
72        relative steps are used, only if ``epsilon is not None`` are absolute
73        steps used.
74
75    Notes
76    -----
77    This class implements a memoization logic. There are methods `fun`,
78    `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
79    things should be considered:
80
81        1. Use only public methods `fun`, `grad` and `hess`.
82        2. After one of the methods is called, the corresponding attribute
83           will be set. However, a subsequent call with a different argument
84           of *any* of the methods may overwrite the attribute.
85    """
86    def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
87                 finite_diff_bounds, epsilon=None):
88        if not callable(grad) and grad not in FD_METHODS:
89            raise ValueError(
90                f"`grad` must be either callable or one of {FD_METHODS}."
91            )
92
93        if not (callable(hess) or hess in FD_METHODS
94                or isinstance(hess, HessianUpdateStrategy)):
95            raise ValueError(
96                f"`hess` must be either callable, HessianUpdateStrategy"
97                f" or one of {FD_METHODS}."
98            )
99
100        if grad in FD_METHODS and hess in FD_METHODS:
101            raise ValueError("Whenever the gradient is estimated via "
102                             "finite-differences, we require the Hessian "
103                             "to be estimated using one of the "
104                             "quasi-Newton strategies.")
105
106        # the astype call ensures that self.x is a copy of x0
107        self.x = np.atleast_1d(x0).astype(float)
108        self.n = self.x.size
109        self.nfev = 0
110        self.ngev = 0
111        self.nhev = 0
112        self.f_updated = False
113        self.g_updated = False
114        self.H_updated = False
115
116        finite_diff_options = {}
117        if grad in FD_METHODS:
118            finite_diff_options["method"] = grad
119            finite_diff_options["rel_step"] = finite_diff_rel_step
120            finite_diff_options["abs_step"] = epsilon
121            finite_diff_options["bounds"] = finite_diff_bounds
122        if hess in FD_METHODS:
123            finite_diff_options["method"] = hess
124            finite_diff_options["rel_step"] = finite_diff_rel_step
125            finite_diff_options["abs_step"] = epsilon
126            finite_diff_options["as_linear_operator"] = True
127
128        # Function evaluation
129        def fun_wrapped(x):
130            self.nfev += 1
131            # Send a copy because the user may overwrite it.
132            # Overwriting results in undefined behaviour because
133            # fun(self.x) will change self.x, with the two no longer linked.
134            return fun(np.copy(x), *args)
135
136        def update_fun():
137            self.f = fun_wrapped(self.x)
138
139        self._update_fun_impl = update_fun
140        self._update_fun()
141
142        # Gradient evaluation
143        if callable(grad):
144            def grad_wrapped(x):
145                self.ngev += 1
146                return np.atleast_1d(grad(np.copy(x), *args))
147
148            def update_grad():
149                self.g = grad_wrapped(self.x)
150
151        elif grad in FD_METHODS:
152            def update_grad():
153                self._update_fun()
154                self.ngev += 1
155                self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
156                                           **finite_diff_options)
157
158        self._update_grad_impl = update_grad
159        self._update_grad()
160
161        # Hessian Evaluation
162        if callable(hess):
163            self.H = hess(np.copy(x0), *args)
164            self.H_updated = True
165            self.nhev += 1
166
167            if sps.issparse(self.H):
168                def hess_wrapped(x):
169                    self.nhev += 1
170                    return sps.csr_matrix(hess(np.copy(x), *args))
171                self.H = sps.csr_matrix(self.H)
172
173            elif isinstance(self.H, LinearOperator):
174                def hess_wrapped(x):
175                    self.nhev += 1
176                    return hess(np.copy(x), *args)
177
178            else:
179                def hess_wrapped(x):
180                    self.nhev += 1
181                    return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
182                self.H = np.atleast_2d(np.asarray(self.H))
183
184            def update_hess():
185                self.H = hess_wrapped(self.x)
186
187        elif hess in FD_METHODS:
188            def update_hess():
189                self._update_grad()
190                self.H = approx_derivative(grad_wrapped, self.x, f0=self.g,
191                                           **finite_diff_options)
192                return self.H
193
194            update_hess()
195            self.H_updated = True
196        elif isinstance(hess, HessianUpdateStrategy):
197            self.H = hess
198            self.H.initialize(self.n, 'hess')
199            self.H_updated = True
200            self.x_prev = None
201            self.g_prev = None
202
203            def update_hess():
204                self._update_grad()
205                self.H.update(self.x - self.x_prev, self.g - self.g_prev)
206
207        self._update_hess_impl = update_hess
208
209        if isinstance(hess, HessianUpdateStrategy):
210            def update_x(x):
211                self._update_grad()
212                self.x_prev = self.x
213                self.g_prev = self.g
214                # ensure that self.x is a copy of x. Don't store a reference
215                # otherwise the memoization doesn't work properly.
216                self.x = np.atleast_1d(x).astype(float)
217                self.f_updated = False
218                self.g_updated = False
219                self.H_updated = False
220                self._update_hess()
221        else:
222            def update_x(x):
223                # ensure that self.x is a copy of x. Don't store a reference
224                # otherwise the memoization doesn't work properly.
225                self.x = np.atleast_1d(x).astype(float)
226                self.f_updated = False
227                self.g_updated = False
228                self.H_updated = False
229        self._update_x_impl = update_x
230
231    def _update_fun(self):
232        if not self.f_updated:
233            self._update_fun_impl()
234            self.f_updated = True
235
236    def _update_grad(self):
237        if not self.g_updated:
238            self._update_grad_impl()
239            self.g_updated = True
240
241    def _update_hess(self):
242        if not self.H_updated:
243            self._update_hess_impl()
244            self.H_updated = True
245
246    def fun(self, x):
247        if not np.array_equal(x, self.x):
248            self._update_x_impl(x)
249        self._update_fun()
250        return self.f
251
252    def grad(self, x):
253        if not np.array_equal(x, self.x):
254            self._update_x_impl(x)
255        self._update_grad()
256        return self.g
257
258    def hess(self, x):
259        if not np.array_equal(x, self.x):
260            self._update_x_impl(x)
261        self._update_hess()
262        return self.H
263
264    def fun_and_grad(self, x):
265        if not np.array_equal(x, self.x):
266            self._update_x_impl(x)
267        self._update_fun()
268        self._update_grad()
269        return self.f, self.g
270
271
272class VectorFunction:
273    """Vector function and its derivatives.
274
275    This class defines a vector function F: R^n->R^m and methods for
276    computing or approximating its first and second derivatives.
277
278    Notes
279    -----
280    This class implements a memoization logic. There are methods `fun`,
281    `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
282    things should be considered:
283
284        1. Use only public methods `fun`, `jac` and `hess`.
285        2. After one of the methods is called, the corresponding attribute
286           will be set. However, a subsequent call with a different argument
287           of *any* of the methods may overwrite the attribute.
288    """
289    def __init__(self, fun, x0, jac, hess,
290                 finite_diff_rel_step, finite_diff_jac_sparsity,
291                 finite_diff_bounds, sparse_jacobian):
292        if not callable(jac) and jac not in FD_METHODS:
293            raise ValueError("`jac` must be either callable or one of {}."
294                             .format(FD_METHODS))
295
296        if not (callable(hess) or hess in FD_METHODS
297                or isinstance(hess, HessianUpdateStrategy)):
298            raise ValueError("`hess` must be either callable,"
299                             "HessianUpdateStrategy or one of {}."
300                             .format(FD_METHODS))
301
302        if jac in FD_METHODS and hess in FD_METHODS:
303            raise ValueError("Whenever the Jacobian is estimated via "
304                             "finite-differences, we require the Hessian to "
305                             "be estimated using one of the quasi-Newton "
306                             "strategies.")
307
308        self.x = np.atleast_1d(x0).astype(float)
309        self.n = self.x.size
310        self.nfev = 0
311        self.njev = 0
312        self.nhev = 0
313        self.f_updated = False
314        self.J_updated = False
315        self.H_updated = False
316
317        finite_diff_options = {}
318        if jac in FD_METHODS:
319            finite_diff_options["method"] = jac
320            finite_diff_options["rel_step"] = finite_diff_rel_step
321            if finite_diff_jac_sparsity is not None:
322                sparsity_groups = group_columns(finite_diff_jac_sparsity)
323                finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
324                                                   sparsity_groups)
325            finite_diff_options["bounds"] = finite_diff_bounds
326            self.x_diff = np.copy(self.x)
327        if hess in FD_METHODS:
328            finite_diff_options["method"] = hess
329            finite_diff_options["rel_step"] = finite_diff_rel_step
330            finite_diff_options["as_linear_operator"] = True
331            self.x_diff = np.copy(self.x)
332        if jac in FD_METHODS and hess in FD_METHODS:
333            raise ValueError("Whenever the Jacobian is estimated via "
334                             "finite-differences, we require the Hessian to "
335                             "be estimated using one of the quasi-Newton "
336                             "strategies.")
337
338        # Function evaluation
339        def fun_wrapped(x):
340            self.nfev += 1
341            return np.atleast_1d(fun(x))
342
343        def update_fun():
344            self.f = fun_wrapped(self.x)
345
346        self._update_fun_impl = update_fun
347        update_fun()
348
349        self.v = np.zeros_like(self.f)
350        self.m = self.v.size
351
352        # Jacobian Evaluation
353        if callable(jac):
354            self.J = jac(self.x)
355            self.J_updated = True
356            self.njev += 1
357
358            if (sparse_jacobian or
359                    sparse_jacobian is None and sps.issparse(self.J)):
360                def jac_wrapped(x):
361                    self.njev += 1
362                    return sps.csr_matrix(jac(x))
363                self.J = sps.csr_matrix(self.J)
364                self.sparse_jacobian = True
365
366            elif sps.issparse(self.J):
367                def jac_wrapped(x):
368                    self.njev += 1
369                    return jac(x).toarray()
370                self.J = self.J.toarray()
371                self.sparse_jacobian = False
372
373            else:
374                def jac_wrapped(x):
375                    self.njev += 1
376                    return np.atleast_2d(jac(x))
377                self.J = np.atleast_2d(self.J)
378                self.sparse_jacobian = False
379
380            def update_jac():
381                self.J = jac_wrapped(self.x)
382
383        elif jac in FD_METHODS:
384            self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
385                                       **finite_diff_options)
386            self.J_updated = True
387
388            if (sparse_jacobian or
389                    sparse_jacobian is None and sps.issparse(self.J)):
390                def update_jac():
391                    self._update_fun()
392                    self.J = sps.csr_matrix(
393                        approx_derivative(fun_wrapped, self.x, f0=self.f,
394                                          **finite_diff_options))
395                self.J = sps.csr_matrix(self.J)
396                self.sparse_jacobian = True
397
398            elif sps.issparse(self.J):
399                def update_jac():
400                    self._update_fun()
401                    self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
402                                               **finite_diff_options).toarray()
403                self.J = self.J.toarray()
404                self.sparse_jacobian = False
405
406            else:
407                def update_jac():
408                    self._update_fun()
409                    self.J = np.atleast_2d(
410                        approx_derivative(fun_wrapped, self.x, f0=self.f,
411                                          **finite_diff_options))
412                self.J = np.atleast_2d(self.J)
413                self.sparse_jacobian = False
414
415        self._update_jac_impl = update_jac
416
417        # Define Hessian
418        if callable(hess):
419            self.H = hess(self.x, self.v)
420            self.H_updated = True
421            self.nhev += 1
422
423            if sps.issparse(self.H):
424                def hess_wrapped(x, v):
425                    self.nhev += 1
426                    return sps.csr_matrix(hess(x, v))
427                self.H = sps.csr_matrix(self.H)
428
429            elif isinstance(self.H, LinearOperator):
430                def hess_wrapped(x, v):
431                    self.nhev += 1
432                    return hess(x, v)
433
434            else:
435                def hess_wrapped(x, v):
436                    self.nhev += 1
437                    return np.atleast_2d(np.asarray(hess(x, v)))
438                self.H = np.atleast_2d(np.asarray(self.H))
439
440            def update_hess():
441                self.H = hess_wrapped(self.x, self.v)
442        elif hess in FD_METHODS:
443            def jac_dot_v(x, v):
444                return jac_wrapped(x).T.dot(v)
445
446            def update_hess():
447                self._update_jac()
448                self.H = approx_derivative(jac_dot_v, self.x,
449                                           f0=self.J.T.dot(self.v),
450                                           args=(self.v,),
451                                           **finite_diff_options)
452            update_hess()
453            self.H_updated = True
454        elif isinstance(hess, HessianUpdateStrategy):
455            self.H = hess
456            self.H.initialize(self.n, 'hess')
457            self.H_updated = True
458            self.x_prev = None
459            self.J_prev = None
460
461            def update_hess():
462                self._update_jac()
463                # When v is updated before x was updated, then x_prev and
464                # J_prev are None and we need this check.
465                if self.x_prev is not None and self.J_prev is not None:
466                    delta_x = self.x - self.x_prev
467                    delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
468                    self.H.update(delta_x, delta_g)
469
470        self._update_hess_impl = update_hess
471
472        if isinstance(hess, HessianUpdateStrategy):
473            def update_x(x):
474                self._update_jac()
475                self.x_prev = self.x
476                self.J_prev = self.J
477                self.x = np.atleast_1d(x).astype(float)
478                self.f_updated = False
479                self.J_updated = False
480                self.H_updated = False
481                self._update_hess()
482        else:
483            def update_x(x):
484                self.x = np.atleast_1d(x).astype(float)
485                self.f_updated = False
486                self.J_updated = False
487                self.H_updated = False
488
489        self._update_x_impl = update_x
490
491    def _update_v(self, v):
492        if not np.array_equal(v, self.v):
493            self.v = v
494            self.H_updated = False
495
496    def _update_x(self, x):
497        if not np.array_equal(x, self.x):
498            self._update_x_impl(x)
499
500    def _update_fun(self):
501        if not self.f_updated:
502            self._update_fun_impl()
503            self.f_updated = True
504
505    def _update_jac(self):
506        if not self.J_updated:
507            self._update_jac_impl()
508            self.J_updated = True
509
510    def _update_hess(self):
511        if not self.H_updated:
512            self._update_hess_impl()
513            self.H_updated = True
514
515    def fun(self, x):
516        self._update_x(x)
517        self._update_fun()
518        return self.f
519
520    def jac(self, x):
521        self._update_x(x)
522        self._update_jac()
523        return self.J
524
525    def hess(self, x, v):
526        # v should be updated before x.
527        self._update_v(v)
528        self._update_x(x)
529        self._update_hess()
530        return self.H
531
532
533class LinearVectorFunction:
534    """Linear vector function and its derivatives.
535
536    Defines a linear function F = A x, where x is N-D vector and
537    A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
538    is identically zero and it is returned as a csr matrix.
539    """
540    def __init__(self, A, x0, sparse_jacobian):
541        if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
542            self.J = sps.csr_matrix(A)
543            self.sparse_jacobian = True
544        elif sps.issparse(A):
545            self.J = A.toarray()
546            self.sparse_jacobian = False
547        else:
548            # np.asarray makes sure A is ndarray and not matrix
549            self.J = np.atleast_2d(np.asarray(A))
550            self.sparse_jacobian = False
551
552        self.m, self.n = self.J.shape
553
554        self.x = np.atleast_1d(x0).astype(float)
555        self.f = self.J.dot(self.x)
556        self.f_updated = True
557
558        self.v = np.zeros(self.m, dtype=float)
559        self.H = sps.csr_matrix((self.n, self.n))
560
561    def _update_x(self, x):
562        if not np.array_equal(x, self.x):
563            self.x = np.atleast_1d(x).astype(float)
564            self.f_updated = False
565
566    def fun(self, x):
567        self._update_x(x)
568        if not self.f_updated:
569            self.f = self.J.dot(x)
570            self.f_updated = True
571        return self.f
572
573    def jac(self, x):
574        self._update_x(x)
575        return self.J
576
577    def hess(self, x, v):
578        self._update_x(x)
579        self.v = v
580        return self.H
581
582
583class IdentityVectorFunction(LinearVectorFunction):
584    """Identity vector function and its derivatives.
585
586    The Jacobian is the identity matrix, returned as a dense array when
587    `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
588    identically zero and it is returned as a csr matrix.
589    """
590    def __init__(self, x0, sparse_jacobian):
591        n = len(x0)
592        if sparse_jacobian or sparse_jacobian is None:
593            A = sps.eye(n, format='csr')
594            sparse_jacobian = True
595        else:
596            A = np.eye(n)
597            sparse_jacobian = False
598        super().__init__(A, x0, sparse_jacobian)
599