1"""
2Dogleg algorithm with rectangular trust regions for least-squares minimization.
3
4The description of the algorithm can be found in [Voglis]_. The algorithm does
5trust-region iterations, but the shape of trust regions is rectangular as
6opposed to conventional elliptical. The intersection of a trust region and
7an initial feasible region is again some rectangle. Thus, on each iteration a
8bound-constrained quadratic optimization problem is solved.
9
10A quadratic problem is solved by well-known dogleg approach, where the
11function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
12Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
13along this path, and optimization amounts to simply following along this
14path as long as a point stays within the bounds. A constrained Cauchy step
15(along the anti-gradient) is considered for safety in rank deficient cases,
16in this situations the convergence might be slow.
17
18If during iterations some variable hit the initial bound and the component
19of anti-gradient points outside the feasible region, then a next dogleg step
20won't make any progress. At this state such variables satisfy first-order
21optimality conditions and they are excluded before computing a next dogleg
22step.
23
24Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
25Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
26dense and sparse matrices, or Jacobian being LinearOperator). The second
27option allows to solve very large problems (up to couple of millions of
28residuals on a regular PC), provided the Jacobian matrix is sufficiently
29sparse. But note that dogbox is not very good for solving problems with
30large number of constraints, because of variables exclusion-inclusion on each
31iteration (a required number of function evaluations might be high or accuracy
32of a solution will be poor), thus its large-scale usage is probably limited
33to unconstrained problems.
34
35References
36----------
37.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
38            Approach for Unconstrained and Bound Constrained Nonlinear
39            Optimization", WSEAS International Conference on Applied
40            Mathematics, Corfu, Greece, 2004.
41.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
42"""
43import numpy as np
44from numpy.linalg import lstsq, norm
45
46from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
47from scipy.optimize import OptimizeResult
48
49from .common import (
50    step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
51    build_quadratic_1d, minimize_quadratic_1d, compute_grad,
52    compute_jac_scale, check_termination, scale_for_robust_loss_function,
53    print_header_nonlinear, print_iteration_nonlinear)
54
55
56def lsmr_operator(Jop, d, active_set):
57    """Compute LinearOperator to use in LSMR by dogbox algorithm.
58
59    `active_set` mask is used to excluded active variables from computations
60    of matrix-vector products.
61    """
62    m, n = Jop.shape
63
64    def matvec(x):
65        x_free = x.ravel().copy()
66        x_free[active_set] = 0
67        return Jop.matvec(x * d)
68
69    def rmatvec(x):
70        r = d * Jop.rmatvec(x)
71        r[active_set] = 0
72        return r
73
74    return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
75
76
77def find_intersection(x, tr_bounds, lb, ub):
78    """Find intersection of trust-region bounds and initial bounds.
79
80    Returns
81    -------
82    lb_total, ub_total : ndarray with shape of x
83        Lower and upper bounds of the intersection region.
84    orig_l, orig_u : ndarray of bool with shape of x
85        True means that an original bound is taken as a corresponding bound
86        in the intersection region.
87    tr_l, tr_u : ndarray of bool with shape of x
88        True means that a trust-region bound is taken as a corresponding bound
89        in the intersection region.
90    """
91    lb_centered = lb - x
92    ub_centered = ub - x
93
94    lb_total = np.maximum(lb_centered, -tr_bounds)
95    ub_total = np.minimum(ub_centered, tr_bounds)
96
97    orig_l = np.equal(lb_total, lb_centered)
98    orig_u = np.equal(ub_total, ub_centered)
99
100    tr_l = np.equal(lb_total, -tr_bounds)
101    tr_u = np.equal(ub_total, tr_bounds)
102
103    return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
104
105
106def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
107    """Find dogleg step in a rectangular region.
108
109    Returns
110    -------
111    step : ndarray, shape (n,)
112        Computed dogleg step.
113    bound_hits : ndarray of int, shape (n,)
114        Each component shows whether a corresponding variable hits the
115        initial bound after the step is taken:
116            *  0 - a variable doesn't hit the bound.
117            * -1 - lower bound is hit.
118            *  1 - upper bound is hit.
119    tr_hit : bool
120        Whether the step hit the boundary of the trust-region.
121    """
122    lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
123        x, tr_bounds, lb, ub
124    )
125    bound_hits = np.zeros_like(x, dtype=int)
126
127    if in_bounds(newton_step, lb_total, ub_total):
128        return newton_step, bound_hits, False
129
130    to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
131
132    # The classical dogleg algorithm would check if Cauchy step fits into
133    # the bounds, and just return it constrained version if not. But in a
134    # rectangular trust region it makes sense to try to improve constrained
135    # Cauchy step too. Thus, we don't distinguish these two cases.
136
137    cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
138
139    step_diff = newton_step - cauchy_step
140    step_size, hits = step_size_to_bound(cauchy_step, step_diff,
141                                         lb_total, ub_total)
142    bound_hits[(hits < 0) & orig_l] = -1
143    bound_hits[(hits > 0) & orig_u] = 1
144    tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
145
146    return cauchy_step + step_size * step_diff, bound_hits, tr_hit
147
148
149def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
150           loss_function, tr_solver, tr_options, verbose):
151    f = f0
152    f_true = f.copy()
153    nfev = 1
154
155    J = J0
156    njev = 1
157
158    if loss_function is not None:
159        rho = loss_function(f)
160        cost = 0.5 * np.sum(rho[0])
161        J, f = scale_for_robust_loss_function(J, f, rho)
162    else:
163        cost = 0.5 * np.dot(f, f)
164
165    g = compute_grad(J, f)
166
167    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
168    if jac_scale:
169        scale, scale_inv = compute_jac_scale(J)
170    else:
171        scale, scale_inv = x_scale, 1 / x_scale
172
173    Delta = norm(x0 * scale_inv, ord=np.inf)
174    if Delta == 0:
175        Delta = 1.0
176
177    on_bound = np.zeros_like(x0, dtype=int)
178    on_bound[np.equal(x0, lb)] = -1
179    on_bound[np.equal(x0, ub)] = 1
180
181    x = x0
182    step = np.empty_like(x0)
183
184    if max_nfev is None:
185        max_nfev = x0.size * 100
186
187    termination_status = None
188    iteration = 0
189    step_norm = None
190    actual_reduction = None
191
192    if verbose == 2:
193        print_header_nonlinear()
194
195    while True:
196        active_set = on_bound * g < 0
197        free_set = ~active_set
198
199        g_free = g[free_set]
200        g_full = g.copy()
201        g[active_set] = 0
202
203        g_norm = norm(g, ord=np.inf)
204        if g_norm < gtol:
205            termination_status = 1
206
207        if verbose == 2:
208            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
209                                      step_norm, g_norm)
210
211        if termination_status is not None or nfev == max_nfev:
212            break
213
214        x_free = x[free_set]
215        lb_free = lb[free_set]
216        ub_free = ub[free_set]
217        scale_free = scale[free_set]
218
219        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
220        if tr_solver == 'exact':
221            J_free = J[:, free_set]
222            newton_step = lstsq(J_free, -f, rcond=-1)[0]
223
224            # Coefficients for the quadratic model along the anti-gradient.
225            a, b = build_quadratic_1d(J_free, g_free, -g_free)
226        elif tr_solver == 'lsmr':
227            Jop = aslinearoperator(J)
228
229            # We compute lsmr step in scaled variables and then
230            # transform back to normal variables, if lsmr would give exact lsq
231            # solution, this would be equivalent to not doing any
232            # transformations, but from experience it's better this way.
233
234            # We pass active_set to make computations as if we selected
235            # the free subset of J columns, but without actually doing any
236            # slicing, which is expensive for sparse matrices and impossible
237            # for LinearOperator.
238
239            lsmr_op = lsmr_operator(Jop, scale, active_set)
240            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
241            newton_step *= scale_free
242
243            # Components of g for active variables were zeroed, so this call
244            # is correct and equivalent to using J_free and g_free.
245            a, b = build_quadratic_1d(Jop, g, -g)
246
247        actual_reduction = -1.0
248        while actual_reduction <= 0 and nfev < max_nfev:
249            tr_bounds = Delta * scale_free
250
251            step_free, on_bound_free, tr_hit = dogleg_step(
252                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
253
254            step.fill(0.0)
255            step[free_set] = step_free
256
257            if tr_solver == 'exact':
258                predicted_reduction = -evaluate_quadratic(J_free, g_free,
259                                                          step_free)
260            elif tr_solver == 'lsmr':
261                predicted_reduction = -evaluate_quadratic(Jop, g, step)
262
263            # gh11403 ensure that solution is fully within bounds.
264            x_new = np.clip(x + step, lb, ub)
265
266            f_new = fun(x_new)
267            nfev += 1
268
269            step_h_norm = norm(step * scale_inv, ord=np.inf)
270
271            if not np.all(np.isfinite(f_new)):
272                Delta = 0.25 * step_h_norm
273                continue
274
275            # Usual trust-region step quality estimation.
276            if loss_function is not None:
277                cost_new = loss_function(f_new, cost_only=True)
278            else:
279                cost_new = 0.5 * np.dot(f_new, f_new)
280            actual_reduction = cost - cost_new
281
282            Delta, ratio = update_tr_radius(
283                Delta, actual_reduction, predicted_reduction,
284                step_h_norm, tr_hit
285            )
286
287            step_norm = norm(step)
288            termination_status = check_termination(
289                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
290
291            if termination_status is not None:
292                break
293
294        if actual_reduction > 0:
295            on_bound[free_set] = on_bound_free
296
297            x = x_new
298            # Set variables exactly at the boundary.
299            mask = on_bound == -1
300            x[mask] = lb[mask]
301            mask = on_bound == 1
302            x[mask] = ub[mask]
303
304            f = f_new
305            f_true = f.copy()
306
307            cost = cost_new
308
309            J = jac(x, f)
310            njev += 1
311
312            if loss_function is not None:
313                rho = loss_function(f)
314                J, f = scale_for_robust_loss_function(J, f, rho)
315
316            g = compute_grad(J, f)
317
318            if jac_scale:
319                scale, scale_inv = compute_jac_scale(J, scale_inv)
320        else:
321            step_norm = 0
322            actual_reduction = 0
323
324        iteration += 1
325
326    if termination_status is None:
327        termination_status = 0
328
329    return OptimizeResult(
330        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
331        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
332