1"""
2Solves robust LQ control problems.
3
4"""
5from textwrap import dedent
6import numpy as np
7from .lqcontrol import LQ
8from .quadsums import var_quadratic_sum
9from scipy.linalg import solve, inv, det
10from .matrix_eqn import solve_discrete_lyapunov
11
12
13class RBLQ:
14    r"""
15    Provides methods for analysing infinite horizon robust LQ control
16    problems of the form
17
18    .. math::
19
20        \min_{u_t}  \sum_t \beta^t {x_t' R x_t + u_t' Q u_t }
21
22    subject to
23
24    .. math::
25
26        x_{t+1} = A x_t + B u_t + C w_{t+1}
27
28    and with model misspecification parameter theta.
29
30    Parameters
31    ----------
32    Q : array_like(float, ndim=2)
33        The cost(payoff) matrix for the controls.  See above for more.
34        Q should be k x k and symmetric and positive definite
35    R : array_like(float, ndim=2)
36        The cost(payoff) matrix for the state.  See above for more. R
37        should be n x n and symmetric and non-negative definite
38    A : array_like(float, ndim=2)
39        The matrix that corresponds with the state in the state space
40        system.  A should be n x n
41    B : array_like(float, ndim=2)
42        The matrix that corresponds with the control in the state space
43        system.  B should be n x k
44    C : array_like(float, ndim=2)
45        The matrix that corresponds with the random process in the
46        state space system.  C should be n x j
47    beta : scalar(float)
48        The discount factor in the robust control problem
49    theta : scalar(float)
50        The robustness factor in the robust control problem
51
52    Attributes
53    ----------
54    Q, R, A, B, C, beta, theta : see Parameters
55    k, n, j : scalar(int)
56        The dimensions of the matrices
57
58    """
59
60    def __init__(self, Q, R, A, B, C, beta, theta):
61
62        # == Make sure all matrices can be treated as 2D arrays == #
63        A, B, C, Q, R = list(map(np.atleast_2d, (A, B, C, Q, R)))
64        self.A, self.B, self.C, self.Q, self.R = A, B, C, Q, R
65        # == Record dimensions == #
66        self.k = self.Q.shape[0]
67        self.n = self.R.shape[0]
68        self.j = self.C.shape[1]
69        # == Remaining parameters == #
70        self.beta, self.theta = beta, theta
71        # == Check for case of no control (pure forecasting problem) == #
72        self.pure_forecasting = True if not Q.any() and not B.any() else False
73
74    def __repr__(self):
75        return self.__str__()
76
77    def __str__(self):
78        m = """\
79        Robust linear quadratic control system
80          - beta (discount parameter)   : {b}
81          - theta (robustness factor)   : {th}
82          - n (number of state variables)   : {n}
83          - k (number of control variables) : {k}
84          - j (number of shocks)            : {j}
85        """
86        return dedent(m.format(b=self.beta, n=self.n, k=self.k, j=self.j,
87                               th=self.theta))
88
89    def d_operator(self, P):
90        r"""
91        The D operator, mapping P into
92
93        .. math::
94
95            D(P) := P + PC(\theta I - C'PC)^{-1} C'P.
96
97        Parameters
98        ----------
99        P : array_like(float, ndim=2)
100            A matrix that should be n x n
101
102        Returns
103        -------
104        dP : array_like(float, ndim=2)
105            The matrix P after applying the D operator
106
107        """
108        C, theta = self.C, self.theta
109        I = np.identity(self.j)
110        S1 = np.dot(P, C)
111        S2 = np.dot(C.T, S1)
112
113        dP = P + np.dot(S1, solve(theta * I - S2, S1.T))
114
115        return dP
116
117    def b_operator(self, P):
118        r"""
119        The B operator, mapping P into
120
121        .. math::
122
123            B(P) := R - \beta^2 A'PB(Q + \beta B'PB)^{-1}B'PA + \beta A'PA
124
125        and also returning
126
127        .. math::
128
129            F := (Q + \beta B'PB)^{-1} \beta B'PA
130
131        Parameters
132        ----------
133        P : array_like(float, ndim=2)
134            A matrix that should be n x n
135
136        Returns
137        -------
138        F : array_like(float, ndim=2)
139            The F matrix as defined above
140        new_p : array_like(float, ndim=2)
141            The matrix P after applying the B operator
142
143        """
144        A, B, Q, R, beta = self.A, self.B, self.Q, self.R, self.beta
145        S1 = Q + beta * np.dot(B.T, np.dot(P, B))
146        S2 = beta * np.dot(B.T, np.dot(P, A))
147        S3 = beta * np.dot(A.T, np.dot(P, A))
148        F = solve(S1, S2) if not self.pure_forecasting else np.zeros(
149            (self.k, self.n))
150        new_P = R - np.dot(S2.T, F) + S3
151
152        return F, new_P
153
154    def robust_rule(self, method='doubling'):
155        """
156        This method solves the robust control problem by tricking it
157        into a stacked LQ problem, as described in chapter 2 of Hansen-
158        Sargent's text "Robustness."  The optimal control with observed
159        state is
160
161        .. math::
162
163            u_t = - F x_t
164
165        And the value function is :math:`-x'Px`
166
167        Parameters
168        ----------
169        method : str, optional(default='doubling')
170            Solution method used in solving the associated Riccati
171            equation, str in {'doubling', 'qz'}.
172
173        Returns
174        -------
175        F : array_like(float, ndim=2)
176            The optimal control matrix from above
177        P : array_like(float, ndim=2)
178            The positive semi-definite matrix defining the value
179            function
180        K : array_like(float, ndim=2)
181            the worst-case shock matrix K, where
182            :math:`w_{t+1} = K x_t` is the worst case shock
183
184        """
185        # == Simplify names == #
186        A, B, C, Q, R = self.A, self.B, self.C, self.Q, self.R
187        beta, theta = self.beta, self.theta
188        k, j = self.k, self.j
189        # == Set up LQ version == #
190        I = np.identity(j)
191        Z = np.zeros((k, j))
192
193        if self.pure_forecasting:
194            lq = LQ(-beta*I*theta, R, A, C, beta=beta)
195
196            # == Solve and convert back to robust problem == #
197            P, f, d = lq.stationary_values(method=method)
198            F = np.zeros((self.k, self.n))
199            K = -f[:k, :]
200
201        else:
202            Ba = np.hstack([B, C])
203            Qa = np.vstack([np.hstack([Q, Z]), np.hstack([Z.T, -beta*I*theta])])
204            lq = LQ(Qa, R, A, Ba, beta=beta)
205
206            # == Solve and convert back to robust problem == #
207            P, f, d = lq.stationary_values(method=method)
208            F = f[:k, :]
209            K = -f[k:f.shape[0], :]
210
211        return F, K, P
212
213    def robust_rule_simple(self, P_init=None, max_iter=80, tol=1e-8):
214        """
215        A simple algorithm for computing the robust policy F and the
216        corresponding value function P, based around straightforward
217        iteration with the robust Bellman operator.  This function is
218        easier to understand but one or two orders of magnitude slower
219        than self.robust_rule().  For more information see the docstring
220        of that method.
221
222        Parameters
223        ----------
224        P_init : array_like(float, ndim=2), optional(default=None)
225            The initial guess for the value function matrix.  It will
226            be a matrix of zeros if no guess is given
227        max_iter : scalar(int), optional(default=80)
228            The maximum number of iterations that are allowed
229        tol : scalar(float), optional(default=1e-8)
230            The tolerance for convergence
231
232        Returns
233        -------
234        F : array_like(float, ndim=2)
235            The optimal control matrix from above
236        P : array_like(float, ndim=2)
237            The positive semi-definite matrix defining the value
238            function
239        K : array_like(float, ndim=2)
240            the worst-case shock matrix K, where
241            :math:`w_{t+1} = K x_t` is the worst case shock
242
243        """
244        # == Simplify names == #
245        A, B, C, Q, R = self.A, self.B, self.C, self.Q, self.R
246        beta, theta = self.beta, self.theta
247        # == Set up loop == #
248        P = np.zeros((self.n, self.n)) if P_init is None else P_init
249        iterate, e = 0, tol + 1
250        while iterate < max_iter and e > tol:
251            F, new_P = self.b_operator(self.d_operator(P))
252            e = np.sqrt(np.sum((new_P - P)**2))
253            iterate += 1
254            P = new_P
255        I = np.identity(self.j)
256        S1 = P.dot(C)
257        S2 = C.T.dot(S1)
258        K = inv(theta * I - S2).dot(S1.T).dot(A - B.dot(F))
259
260        return F, K, P
261
262    def F_to_K(self, F, method='doubling'):
263        """
264        Compute agent 2's best cost-minimizing response K, given F.
265
266        Parameters
267        ----------
268        F : array_like(float, ndim=2)
269            A k x n array
270        method : str, optional(default='doubling')
271            Solution method used in solving the associated Riccati
272            equation, str in {'doubling', 'qz'}.
273
274        Returns
275        -------
276        K : array_like(float, ndim=2)
277            Agent's best cost minimizing response for a given F
278        P : array_like(float, ndim=2)
279            The value function for a given F
280
281        """
282        Q2 = self.beta * self.theta
283        R2 = - self.R - np.dot(F.T, np.dot(self.Q, F))
284        A2 = self.A - np.dot(self.B, F)
285        B2 = self.C
286        lq = LQ(Q2, R2, A2, B2, beta=self.beta)
287        neg_P, neg_K, d = lq.stationary_values(method=method)
288
289        return -neg_K, -neg_P
290
291    def K_to_F(self, K, method='doubling'):
292        """
293        Compute agent 1's best value-maximizing response F, given K.
294
295        Parameters
296        ----------
297        K : array_like(float, ndim=2)
298            A j x n array
299        method : str, optional(default='doubling')
300            Solution method used in solving the associated Riccati
301            equation, str in {'doubling', 'qz'}.
302
303        Returns
304        -------
305        F : array_like(float, ndim=2)
306            The policy function for a given K
307        P : array_like(float, ndim=2)
308            The value function for a given K
309
310        """
311        A1 = self.A + np.dot(self.C, K)
312        B1 = self.B
313        Q1 = self.Q
314        R1 = self.R - self.beta * self.theta * np.dot(K.T, K)
315        lq = LQ(Q1, R1, A1, B1, beta=self.beta)
316        P, F, d = lq.stationary_values(method=method)
317
318        return F, P
319
320    def compute_deterministic_entropy(self, F, K, x0):
321        r"""
322
323        Given K and F, compute the value of deterministic entropy, which
324        is
325
326        .. math::
327
328            \sum_t \beta^t x_t' K'K x_t`
329
330        with
331
332        .. math::
333
334            x_{t+1} = (A - BF + CK) x_t
335
336        Parameters
337        ----------
338        F : array_like(float, ndim=2)
339            The policy function, a k x n array
340        K : array_like(float, ndim=2)
341            The worst case matrix, a j x n array
342        x0 : array_like(float, ndim=1)
343            The initial condition for state
344
345        Returns
346        -------
347        e : scalar(int)
348            The deterministic entropy
349
350        """
351        H0 = np.dot(K.T, K)
352        C0 = np.zeros((self.n, 1))
353        A0 = self.A - np.dot(self.B, F) + np.dot(self.C, K)
354        e = var_quadratic_sum(A0, C0, H0, self.beta, x0)
355
356        return e
357
358    def evaluate_F(self, F):
359        """
360        Given a fixed policy F, with the interpretation :math:`u = -F x`, this
361        function computes the matrix :math:`P_F` and constant :math:`d_F`
362        associated with discounted cost :math:`J_F(x) = x' P_F x + d_F`
363
364        Parameters
365        ----------
366        F : array_like(float, ndim=2)
367            The policy function, a k x n array
368
369        Returns
370        -------
371        P_F : array_like(float, ndim=2)
372            Matrix for discounted cost
373        d_F : scalar(float)
374            Constant for discounted cost
375        K_F : array_like(float, ndim=2)
376            Worst case policy
377        O_F : array_like(float, ndim=2)
378            Matrix for discounted entropy
379        o_F : scalar(float)
380            Constant for discounted entropy
381
382        """
383        # == Simplify names == #
384        Q, R, A, B, C = self.Q, self.R, self.A, self.B, self.C
385        beta, theta = self.beta, self.theta
386
387        # == Solve for policies and costs using agent 2's problem == #
388        K_F, P_F = self.F_to_K(F)
389        I = np.identity(self.j)
390        H = inv(I - C.T.dot(P_F.dot(C)) / theta)
391        d_F = np.log(det(H))
392
393        # == Compute O_F and o_F == #
394        AO = np.sqrt(beta) * (A - np.dot(B, F) + np.dot(C, K_F))
395        O_F = solve_discrete_lyapunov(AO.T, beta * np.dot(K_F.T, K_F))
396        ho = (np.trace(H - 1) - d_F) / 2.0
397        tr = np.trace(np.dot(O_F, C.dot(H.dot(C.T))))
398        o_F = (ho + beta * tr) / (1 - beta)
399
400        return K_F, P_F, d_F, O_F, o_F
401