1""" 2Solves robust LQ control problems. 3 4""" 5from textwrap import dedent 6import numpy as np 7from .lqcontrol import LQ 8from .quadsums import var_quadratic_sum 9from scipy.linalg import solve, inv, det 10from .matrix_eqn import solve_discrete_lyapunov 11 12 13class RBLQ: 14 r""" 15 Provides methods for analysing infinite horizon robust LQ control 16 problems of the form 17 18 .. math:: 19 20 \min_{u_t} \sum_t \beta^t {x_t' R x_t + u_t' Q u_t } 21 22 subject to 23 24 .. math:: 25 26 x_{t+1} = A x_t + B u_t + C w_{t+1} 27 28 and with model misspecification parameter theta. 29 30 Parameters 31 ---------- 32 Q : array_like(float, ndim=2) 33 The cost(payoff) matrix for the controls. See above for more. 34 Q should be k x k and symmetric and positive definite 35 R : array_like(float, ndim=2) 36 The cost(payoff) matrix for the state. See above for more. R 37 should be n x n and symmetric and non-negative definite 38 A : array_like(float, ndim=2) 39 The matrix that corresponds with the state in the state space 40 system. A should be n x n 41 B : array_like(float, ndim=2) 42 The matrix that corresponds with the control in the state space 43 system. B should be n x k 44 C : array_like(float, ndim=2) 45 The matrix that corresponds with the random process in the 46 state space system. C should be n x j 47 beta : scalar(float) 48 The discount factor in the robust control problem 49 theta : scalar(float) 50 The robustness factor in the robust control problem 51 52 Attributes 53 ---------- 54 Q, R, A, B, C, beta, theta : see Parameters 55 k, n, j : scalar(int) 56 The dimensions of the matrices 57 58 """ 59 60 def __init__(self, Q, R, A, B, C, beta, theta): 61 62 # == Make sure all matrices can be treated as 2D arrays == # 63 A, B, C, Q, R = list(map(np.atleast_2d, (A, B, C, Q, R))) 64 self.A, self.B, self.C, self.Q, self.R = A, B, C, Q, R 65 # == Record dimensions == # 66 self.k = self.Q.shape[0] 67 self.n = self.R.shape[0] 68 self.j = self.C.shape[1] 69 # == Remaining parameters == # 70 self.beta, self.theta = beta, theta 71 # == Check for case of no control (pure forecasting problem) == # 72 self.pure_forecasting = True if not Q.any() and not B.any() else False 73 74 def __repr__(self): 75 return self.__str__() 76 77 def __str__(self): 78 m = """\ 79 Robust linear quadratic control system 80 - beta (discount parameter) : {b} 81 - theta (robustness factor) : {th} 82 - n (number of state variables) : {n} 83 - k (number of control variables) : {k} 84 - j (number of shocks) : {j} 85 """ 86 return dedent(m.format(b=self.beta, n=self.n, k=self.k, j=self.j, 87 th=self.theta)) 88 89 def d_operator(self, P): 90 r""" 91 The D operator, mapping P into 92 93 .. math:: 94 95 D(P) := P + PC(\theta I - C'PC)^{-1} C'P. 96 97 Parameters 98 ---------- 99 P : array_like(float, ndim=2) 100 A matrix that should be n x n 101 102 Returns 103 ------- 104 dP : array_like(float, ndim=2) 105 The matrix P after applying the D operator 106 107 """ 108 C, theta = self.C, self.theta 109 I = np.identity(self.j) 110 S1 = np.dot(P, C) 111 S2 = np.dot(C.T, S1) 112 113 dP = P + np.dot(S1, solve(theta * I - S2, S1.T)) 114 115 return dP 116 117 def b_operator(self, P): 118 r""" 119 The B operator, mapping P into 120 121 .. math:: 122 123 B(P) := R - \beta^2 A'PB(Q + \beta B'PB)^{-1}B'PA + \beta A'PA 124 125 and also returning 126 127 .. math:: 128 129 F := (Q + \beta B'PB)^{-1} \beta B'PA 130 131 Parameters 132 ---------- 133 P : array_like(float, ndim=2) 134 A matrix that should be n x n 135 136 Returns 137 ------- 138 F : array_like(float, ndim=2) 139 The F matrix as defined above 140 new_p : array_like(float, ndim=2) 141 The matrix P after applying the B operator 142 143 """ 144 A, B, Q, R, beta = self.A, self.B, self.Q, self.R, self.beta 145 S1 = Q + beta * np.dot(B.T, np.dot(P, B)) 146 S2 = beta * np.dot(B.T, np.dot(P, A)) 147 S3 = beta * np.dot(A.T, np.dot(P, A)) 148 F = solve(S1, S2) if not self.pure_forecasting else np.zeros( 149 (self.k, self.n)) 150 new_P = R - np.dot(S2.T, F) + S3 151 152 return F, new_P 153 154 def robust_rule(self, method='doubling'): 155 """ 156 This method solves the robust control problem by tricking it 157 into a stacked LQ problem, as described in chapter 2 of Hansen- 158 Sargent's text "Robustness." The optimal control with observed 159 state is 160 161 .. math:: 162 163 u_t = - F x_t 164 165 And the value function is :math:`-x'Px` 166 167 Parameters 168 ---------- 169 method : str, optional(default='doubling') 170 Solution method used in solving the associated Riccati 171 equation, str in {'doubling', 'qz'}. 172 173 Returns 174 ------- 175 F : array_like(float, ndim=2) 176 The optimal control matrix from above 177 P : array_like(float, ndim=2) 178 The positive semi-definite matrix defining the value 179 function 180 K : array_like(float, ndim=2) 181 the worst-case shock matrix K, where 182 :math:`w_{t+1} = K x_t` is the worst case shock 183 184 """ 185 # == Simplify names == # 186 A, B, C, Q, R = self.A, self.B, self.C, self.Q, self.R 187 beta, theta = self.beta, self.theta 188 k, j = self.k, self.j 189 # == Set up LQ version == # 190 I = np.identity(j) 191 Z = np.zeros((k, j)) 192 193 if self.pure_forecasting: 194 lq = LQ(-beta*I*theta, R, A, C, beta=beta) 195 196 # == Solve and convert back to robust problem == # 197 P, f, d = lq.stationary_values(method=method) 198 F = np.zeros((self.k, self.n)) 199 K = -f[:k, :] 200 201 else: 202 Ba = np.hstack([B, C]) 203 Qa = np.vstack([np.hstack([Q, Z]), np.hstack([Z.T, -beta*I*theta])]) 204 lq = LQ(Qa, R, A, Ba, beta=beta) 205 206 # == Solve and convert back to robust problem == # 207 P, f, d = lq.stationary_values(method=method) 208 F = f[:k, :] 209 K = -f[k:f.shape[0], :] 210 211 return F, K, P 212 213 def robust_rule_simple(self, P_init=None, max_iter=80, tol=1e-8): 214 """ 215 A simple algorithm for computing the robust policy F and the 216 corresponding value function P, based around straightforward 217 iteration with the robust Bellman operator. This function is 218 easier to understand but one or two orders of magnitude slower 219 than self.robust_rule(). For more information see the docstring 220 of that method. 221 222 Parameters 223 ---------- 224 P_init : array_like(float, ndim=2), optional(default=None) 225 The initial guess for the value function matrix. It will 226 be a matrix of zeros if no guess is given 227 max_iter : scalar(int), optional(default=80) 228 The maximum number of iterations that are allowed 229 tol : scalar(float), optional(default=1e-8) 230 The tolerance for convergence 231 232 Returns 233 ------- 234 F : array_like(float, ndim=2) 235 The optimal control matrix from above 236 P : array_like(float, ndim=2) 237 The positive semi-definite matrix defining the value 238 function 239 K : array_like(float, ndim=2) 240 the worst-case shock matrix K, where 241 :math:`w_{t+1} = K x_t` is the worst case shock 242 243 """ 244 # == Simplify names == # 245 A, B, C, Q, R = self.A, self.B, self.C, self.Q, self.R 246 beta, theta = self.beta, self.theta 247 # == Set up loop == # 248 P = np.zeros((self.n, self.n)) if P_init is None else P_init 249 iterate, e = 0, tol + 1 250 while iterate < max_iter and e > tol: 251 F, new_P = self.b_operator(self.d_operator(P)) 252 e = np.sqrt(np.sum((new_P - P)**2)) 253 iterate += 1 254 P = new_P 255 I = np.identity(self.j) 256 S1 = P.dot(C) 257 S2 = C.T.dot(S1) 258 K = inv(theta * I - S2).dot(S1.T).dot(A - B.dot(F)) 259 260 return F, K, P 261 262 def F_to_K(self, F, method='doubling'): 263 """ 264 Compute agent 2's best cost-minimizing response K, given F. 265 266 Parameters 267 ---------- 268 F : array_like(float, ndim=2) 269 A k x n array 270 method : str, optional(default='doubling') 271 Solution method used in solving the associated Riccati 272 equation, str in {'doubling', 'qz'}. 273 274 Returns 275 ------- 276 K : array_like(float, ndim=2) 277 Agent's best cost minimizing response for a given F 278 P : array_like(float, ndim=2) 279 The value function for a given F 280 281 """ 282 Q2 = self.beta * self.theta 283 R2 = - self.R - np.dot(F.T, np.dot(self.Q, F)) 284 A2 = self.A - np.dot(self.B, F) 285 B2 = self.C 286 lq = LQ(Q2, R2, A2, B2, beta=self.beta) 287 neg_P, neg_K, d = lq.stationary_values(method=method) 288 289 return -neg_K, -neg_P 290 291 def K_to_F(self, K, method='doubling'): 292 """ 293 Compute agent 1's best value-maximizing response F, given K. 294 295 Parameters 296 ---------- 297 K : array_like(float, ndim=2) 298 A j x n array 299 method : str, optional(default='doubling') 300 Solution method used in solving the associated Riccati 301 equation, str in {'doubling', 'qz'}. 302 303 Returns 304 ------- 305 F : array_like(float, ndim=2) 306 The policy function for a given K 307 P : array_like(float, ndim=2) 308 The value function for a given K 309 310 """ 311 A1 = self.A + np.dot(self.C, K) 312 B1 = self.B 313 Q1 = self.Q 314 R1 = self.R - self.beta * self.theta * np.dot(K.T, K) 315 lq = LQ(Q1, R1, A1, B1, beta=self.beta) 316 P, F, d = lq.stationary_values(method=method) 317 318 return F, P 319 320 def compute_deterministic_entropy(self, F, K, x0): 321 r""" 322 323 Given K and F, compute the value of deterministic entropy, which 324 is 325 326 .. math:: 327 328 \sum_t \beta^t x_t' K'K x_t` 329 330 with 331 332 .. math:: 333 334 x_{t+1} = (A - BF + CK) x_t 335 336 Parameters 337 ---------- 338 F : array_like(float, ndim=2) 339 The policy function, a k x n array 340 K : array_like(float, ndim=2) 341 The worst case matrix, a j x n array 342 x0 : array_like(float, ndim=1) 343 The initial condition for state 344 345 Returns 346 ------- 347 e : scalar(int) 348 The deterministic entropy 349 350 """ 351 H0 = np.dot(K.T, K) 352 C0 = np.zeros((self.n, 1)) 353 A0 = self.A - np.dot(self.B, F) + np.dot(self.C, K) 354 e = var_quadratic_sum(A0, C0, H0, self.beta, x0) 355 356 return e 357 358 def evaluate_F(self, F): 359 """ 360 Given a fixed policy F, with the interpretation :math:`u = -F x`, this 361 function computes the matrix :math:`P_F` and constant :math:`d_F` 362 associated with discounted cost :math:`J_F(x) = x' P_F x + d_F` 363 364 Parameters 365 ---------- 366 F : array_like(float, ndim=2) 367 The policy function, a k x n array 368 369 Returns 370 ------- 371 P_F : array_like(float, ndim=2) 372 Matrix for discounted cost 373 d_F : scalar(float) 374 Constant for discounted cost 375 K_F : array_like(float, ndim=2) 376 Worst case policy 377 O_F : array_like(float, ndim=2) 378 Matrix for discounted entropy 379 o_F : scalar(float) 380 Constant for discounted entropy 381 382 """ 383 # == Simplify names == # 384 Q, R, A, B, C = self.Q, self.R, self.A, self.B, self.C 385 beta, theta = self.beta, self.theta 386 387 # == Solve for policies and costs using agent 2's problem == # 388 K_F, P_F = self.F_to_K(F) 389 I = np.identity(self.j) 390 H = inv(I - C.T.dot(P_F.dot(C)) / theta) 391 d_F = np.log(det(H)) 392 393 # == Compute O_F and o_F == # 394 AO = np.sqrt(beta) * (A - np.dot(B, F) + np.dot(C, K_F)) 395 O_F = solve_discrete_lyapunov(AO.T, beta * np.dot(K_F.T, K_F)) 396 ho = (np.trace(H - 1) - d_F) / 2.0 397 tr = np.trace(np.dot(O_F, C.dot(H.dot(C.T)))) 398 o_F = (ho + beta * tr) / (1 - beta) 399 400 return K_F, P_F, d_F, O_F, o_F 401