1# -*- coding: utf-8 -*- 2# Copyright (c) 2015, Alex Grigorevskiy 3# Licensed under the BSD 3-clause license (see LICENSE.txt) 4""" 5Main functionality for state-space inference. 6""" 7 8import collections # for cheking whether a variable is iterable 9import types # for cheking whether a variable is a function 10 11import numpy as np 12import scipy as sp 13import scipy.linalg as linalg 14 15import warnings 16 17try: 18 from . import state_space_setup 19 setup_available = True 20except ImportError as e: 21 setup_available = False 22 23 24print_verbose = False 25 26try: 27 import state_space_cython 28 cython_code_available = True 29 if print_verbose: 30 print("state_space: cython is available") 31except ImportError as e: 32 cython_code_available = False 33 34#cython_code_available = False 35# Use cython by default 36use_cython = False 37if setup_available: 38 use_cython = state_space_setup.use_cython 39 40if print_verbose: 41 if use_cython: 42 print("state_space: cython is used") 43 else: 44 print("state_space: cython is NOT used") 45 46# When debugging external module can set some value to this variable (e.g.) 47# 'model' and in this module this variable can be seen.s 48tmp_buffer = None 49 50 51class Dynamic_Callables_Python(object): 52 53 def f_a(self, k, m, A): 54 """ 55 p_a: function (k, x_{k-1}, A_{k}). Dynamic function. 56 k (iteration number), starts at 0 57 x_{k-1} State from the previous step 58 A_{k} Jacobian matrices of f_a. In the linear case it is exactly 59 A_{k}. 60 """ 61 62 raise NotImplemented("f_a is not implemented!") 63 64 def Ak(self, k, m, P): # returns state iteration matrix 65 """ 66 function (k, m, P) return Jacobian of dynamic function, it is passed 67 into p_a. 68 k (iteration number), starts at 0 69 m: point where Jacobian is evaluated 70 P: parameter for Jacobian, usually covariance matrix. 71 """ 72 73 raise NotImplemented("Ak is not implemented!") 74 75 def Qk(self, k): 76 """ 77 function (k). Returns noise matrix of dynamic model on iteration k. 78 k (iteration number). starts at 0 79 """ 80 raise NotImplemented("Qk is not implemented!") 81 82 def Q_srk(self, k): 83 """ 84 function (k). Returns the square root of noise matrix of dynamic model 85 on iteration k. 86 87 k (iteration number). starts at 0 88 89 This function is implemented to use SVD prediction step. 90 """ 91 92 raise NotImplemented("Q_srk is not implemented!") 93 94 def dAk(self, k): 95 """ 96 function (k). Returns the derivative of A on iteration k. 97 k (iteration number). starts at 0 98 """ 99 raise NotImplemented("dAk is not implemented!") 100 101 def dQk(self, k): 102 """ 103 function (k). Returns the derivative of Q on iteration k. 104 k (iteration number). starts at 0 105 """ 106 raise NotImplemented("dQk is not implemented!") 107 108 def reset(self, compute_derivatives=False): 109 """ 110 Return the state of this object to the beginning of iteration 111 (to k eq. 0). 112 """ 113 114 raise NotImplemented("reset is not implemented!") 115 116if use_cython: 117 Dynamic_Callables_Class = state_space_cython.Dynamic_Callables_Cython 118else: 119 Dynamic_Callables_Class = Dynamic_Callables_Python 120 121 122class Measurement_Callables_Python(object): 123 def f_h(self, k, m_pred, Hk): 124 """ 125 function (k, x_{k}, H_{k}). Measurement function. 126 k (iteration number), starts at 0 127 x_{k} state 128 H_{k} Jacobian matrices of f_h. In the linear case it is exactly 129 H_{k}. 130 """ 131 132 raise NotImplemented("f_a is not implemented!") 133 134 def Hk(self, k, m_pred, P_pred): # returns state iteration matrix 135 """ 136 function (k, m, P) return Jacobian of measurement function, it is 137 passed into p_h. 138 k (iteration number), starts at 0 139 m: point where Jacobian is evaluated 140 P: parameter for Jacobian, usually covariance matrix. 141 """ 142 143 raise NotImplemented("Hk is not implemented!") 144 145 def Rk(self, k): 146 """ 147 function (k). Returns noise matrix of measurement equation 148 on iteration k. 149 k (iteration number). starts at 0 150 """ 151 raise NotImplemented("Rk is not implemented!") 152 153 def R_isrk(self, k): 154 """ 155 function (k). Returns the square root of the noise matrix of 156 measurement equation on iteration k. 157 k (iteration number). starts at 0 158 159 This function is implemented to use SVD prediction step. 160 """ 161 162 raise NotImplemented("Q_srk is not implemented!") 163 164 def dHk(self, k): 165 """ 166 function (k). Returns the derivative of H on iteration k. 167 k (iteration number). starts at 0 168 """ 169 raise NotImplemented("dAk is not implemented!") 170 171 def dRk(self, k): 172 """ 173 function (k). Returns the derivative of R on iteration k. 174 k (iteration number). starts at 0 175 """ 176 raise NotImplemented("dQk is not implemented!") 177 178 def reset(self, compute_derivatives=False): 179 """ 180 Return the state of this object to the beginning of iteration 181 (to k eq. 0) 182 """ 183 184 raise NotImplemented("reset is not implemented!") 185 186if use_cython: 187 Measurement_Callables_Class = state_space_cython.\ 188 Measurement_Callables_Cython 189else: 190 Measurement_Callables_Class = Measurement_Callables_Python 191 192 193class R_handling_Python(Measurement_Callables_Class): 194 """ 195 The calss handles noise matrix R. 196 """ 197 def __init__(self, R, index, R_time_var_index, unique_R_number, dR=None): 198 """ 199 Input: 200 --------------- 201 R - array with noise on various steps. The result of preprocessing 202 the noise input. 203 204 index - for each step of Kalman filter contains the corresponding index 205 in the array. 206 207 R_time_var_index - another index in the array R. Computed earlier and 208 is passed here. 209 210 unique_R_number - number of unique noise matrices below which square 211 roots are cached and above which they are computed each time. 212 213 dR: 3D array[:, :, param_num] 214 derivative of R. Derivative is supported only when R do not change 215 over time 216 217 Output: 218 -------------- 219 Object which has two necessary functions: 220 f_R(k) 221 inv_R_square_root(k) 222 """ 223 self.R = R 224 self.index = np.asarray(index, np.int_) 225 self.R_time_var_index = int(R_time_var_index) 226 self.dR = dR 227 228 if (len(np.unique(index)) > unique_R_number): 229 self.svd_each_time = True 230 else: 231 self.svd_each_time = False 232 233 self.R_square_root = {} 234 235 def Rk(self, k): 236 return self.R[:, :, int(self.index[self.R_time_var_index, k])] 237 238 def dRk(self, k): 239 if self.dR is None: 240 raise ValueError("dR derivative is None") 241 242 return self.dR # the same dirivative on each iteration 243 244 def R_isrk(self, k): 245 """ 246 Function returns the inverse square root of R matrix on step k. 247 """ 248 ind = int(self.index[self.R_time_var_index, k]) 249 R = self.R[:, :, ind] 250 251 if (R.shape[0] == 1): # 1-D case handle simplier. No storage 252 # of the result, just compute it each time. 253 inv_square_root = np.sqrt(1.0/R) 254 else: 255 if self.svd_each_time: 256 257 (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, 258 compute_uv=True, overwrite_a=False, 259 check_finite=True) 260 261 inv_square_root = U * 1.0/np.sqrt(S) 262 else: 263 if ind in self.R_square_root: 264 inv_square_root = self.R_square_root[ind] 265 else: 266 (U, S, Vh) = sp.linalg.svd(R, full_matrices=False, 267 compute_uv=True, 268 overwrite_a=False, 269 check_finite=True) 270 271 inv_square_root = U * 1.0/np.sqrt(S) 272 273 self.R_square_root[ind] = inv_square_root 274 275 return inv_square_root 276 277if use_cython: 278 R_handling_Class = state_space_cython.R_handling_Cython 279else: 280 R_handling_Class = R_handling_Python 281 282 283class Std_Measurement_Callables_Python(R_handling_Class): 284 285 def __init__(self, H, H_time_var_index, R, index, R_time_var_index, 286 unique_R_number, dH=None, dR=None): 287 super(Std_Measurement_Callables_Python, 288 self).__init__(R, index, R_time_var_index, unique_R_number, dR) 289 290 self.H = H 291 self.H_time_var_index = int(H_time_var_index) 292 self.dH = dH 293 294 def f_h(self, k, m, H): 295 """ 296 function (k, x_{k}, H_{k}). Measurement function. 297 k (iteration number), starts at 0 298 x_{k} state 299 H_{k} Jacobian matrices of f_h. In the linear case it is exactly 300 H_{k}. 301 """ 302 303 return np.dot(H, m) 304 305 def Hk(self, k, m_pred, P_pred): # returns state iteration matrix 306 """ 307 function (k, m, P) return Jacobian of measurement function, it is 308 passed into p_h. 309 k (iteration number), starts at 0 310 m: point where Jacobian is evaluated 311 P: parameter for Jacobian, usually covariance matrix. 312 """ 313 314 return self.H[:, :, int(self.index[self.H_time_var_index, k])] 315 316 def dHk(self, k): 317 if self.dH is None: 318 raise ValueError("dH derivative is None") 319 320 return self.dH # the same dirivative on each iteration 321 322if use_cython: 323 Std_Measurement_Callables_Class = state_space_cython.\ 324 Std_Measurement_Callables_Cython 325else: 326 Std_Measurement_Callables_Class = Std_Measurement_Callables_Python 327 328 329class Q_handling_Python(Dynamic_Callables_Class): 330 331 def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ=None): 332 """ 333 Input: 334 --------------- 335 R - array with noise on various steps. The result of preprocessing 336 the noise input. 337 338 index - for each step of Kalman filter contains the corresponding index 339 in the array. 340 341 R_time_var_index - another index in the array R. Computed earlier and 342 passed here. 343 344 unique_R_number - number of unique noise matrices below which square 345 roots are cached and above which they are computed each time. 346 347 dQ: 3D array[:, :, param_num] 348 derivative of Q. Derivative is supported only when Q do not 349 change over time 350 351 Output: 352 -------------- 353 Object which has two necessary functions: 354 f_R(k) 355 inv_R_square_root(k) 356 """ 357 358 self.Q = Q 359 self.index = np.asarray(index, np.int_) 360 self.Q_time_var_index = Q_time_var_index 361 self.dQ = dQ 362 363 if (len(np.unique(index)) > unique_Q_number): 364 self.svd_each_time = True 365 else: 366 self.svd_each_time = False 367 368 self.Q_square_root = {} 369 370 def Qk(self, k): 371 """ 372 function (k). Returns noise matrix of dynamic model on iteration k. 373 k (iteration number). starts at 0 374 """ 375 return self.Q[:, :, self.index[self.Q_time_var_index, k]] 376 377 def dQk(self, k): 378 if self.dQ is None: 379 raise ValueError("dQ derivative is None") 380 381 return self.dQ # the same dirivative on each iteration 382 383 def Q_srk(self, k): 384 """ 385 function (k). Returns the square root of noise matrix of dynamic model 386 on iteration k. 387 k (iteration number). starts at 0 388 389 This function is implemented to use SVD prediction step. 390 """ 391 ind = self.index[self.Q_time_var_index, k] 392 Q = self.Q[:, :, ind] 393 394 if (Q.shape[0] == 1): # 1-D case handle simplier. No storage 395 # of the result, just compute it each time. 396 square_root = np.sqrt(Q) 397 else: 398 if self.svd_each_time: 399 400 (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, 401 compute_uv=True, 402 overwrite_a=False, 403 check_finite=True) 404 405 square_root = U * np.sqrt(S) 406 else: 407 408 if ind in self.Q_square_root: 409 square_root = self.Q_square_root[ind] 410 else: 411 (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False, 412 compute_uv=True, 413 overwrite_a=False, 414 check_finite=True) 415 416 square_root = U * np.sqrt(S) 417 418 self.Q_square_root[ind] = square_root 419 420 return square_root 421 422if use_cython: 423 Q_handling_Class = state_space_cython.Q_handling_Cython 424else: 425 Q_handling_Class = Q_handling_Python 426 427 428class Std_Dynamic_Callables_Python(Q_handling_Class): 429 430 def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index, 431 unique_Q_number, dA=None, dQ=None): 432 super(Std_Dynamic_Callables_Python, 433 self).__init__(Q, index, Q_time_var_index, unique_Q_number, dQ) 434 435 self.A = A 436 self.A_time_var_index = np.asarray(A_time_var_index, np.int_) 437 self.dA = dA 438 439 def f_a(self, k, m, A): 440 """ 441 f_a: function (k, x_{k-1}, A_{k}). Dynamic function. 442 k (iteration number), starts at 0 443 x_{k-1} State from the previous step 444 A_{k} Jacobian matrices of f_a. In the linear case it is exactly 445 A_{k}. 446 """ 447 return np.dot(A, m) 448 449 def Ak(self, k, m_pred, P_pred): # returns state iteration matrix 450 """ 451 function (k, m, P) return Jacobian of measurement function, it is 452 passed into p_h. 453 k (iteration number), starts at 0 454 m: point where Jacobian is evaluated 455 P: parameter for Jacobian, usually covariance matrix. 456 """ 457 458 return self.A[:, :, self.index[self.A_time_var_index, k]] 459 460 def dAk(self, k): 461 if self.dA is None: 462 raise ValueError("dA derivative is None") 463 464 return self.dA # the same dirivative on each iteration 465 466 def reset(self, compute_derivatives=False): 467 """ 468 Return the state of this object to the beginning of iteration 469 (to k eq. 0) 470 """ 471 472 return self 473 474if use_cython: 475 Std_Dynamic_Callables_Class = state_space_cython.\ 476 Std_Dynamic_Callables_Cython 477else: 478 Std_Dynamic_Callables_Class = Std_Dynamic_Callables_Python 479 480 481class AddMethodToClass(object): 482 483 def __init__(self, func=None, tp='staticmethod'): 484 """ 485 Input: 486 -------------- 487 func: function to add 488 tp: string 489 Type of the method: normal, staticmethod, classmethod 490 """ 491 if func is None: 492 raise ValueError("Function can not be None") 493 494 self.func = func 495 self.tp = tp 496 497 def __get__(self, obj, klass=None, *args, **kwargs): 498 499 if self.tp == 'staticmethod': 500 return self.func 501 elif self.tp == 'normal': 502 def newfunc(obj, *args, **kwargs): 503 return self.func 504 505 elif self.tp == 'classmethod': 506 def newfunc(klass, *args, **kwargs): 507 return self.func 508 return newfunc 509 510 511class DescreteStateSpaceMeta(type): 512 """ 513 Substitute necessary methods from cython. 514 """ 515 516 def __new__(typeclass, name, bases, attributes): 517 """ 518 After thos method the class object is created 519 """ 520 521 if use_cython: 522 if '_kalman_prediction_step_SVD' in attributes: 523 attributes['_kalman_prediction_step_SVD'] =\ 524 AddMethodToClass(state_space_cython. 525 _kalman_prediction_step_SVD_Cython) 526 527 if '_kalman_update_step_SVD' in attributes: 528 attributes['_kalman_update_step_SVD'] =\ 529 AddMethodToClass(state_space_cython. 530 _kalman_update_step_SVD_Cython) 531 532 if '_cont_discr_kalman_filter_raw' in attributes: 533 attributes['_cont_discr_kalman_filter_raw'] =\ 534 AddMethodToClass(state_space_cython. 535 _cont_discr_kalman_filter_raw_Cython) 536 537 return super(DescreteStateSpaceMeta, 538 typeclass).__new__(typeclass, name, bases, attributes) 539 540 541class DescreteStateSpace(object): 542 """ 543 This class implents state-space inference for linear and non-linear 544 state-space models. 545 Linear models are: 546 x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 547 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 548 549 Nonlinear: 550 x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 551 y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) 552 Here f_a and f_h are some functions of k (iteration number), x_{k-1} or 553 x_{k} (state value on certain iteration), A_{k} and H_{k} - Jacobian 554 matrices of f_a and f_h respectively. In the linear case they are exactly 555 A_{k} and H_{k}. 556 557 558 Currently two nonlinear Gaussian filter algorithms are implemented: 559 Extended Kalman Filter (EKF), Statistically linearized Filter (SLF), which 560 implementations are very similar. 561 562 """ 563 __metaclass__ = DescreteStateSpaceMeta 564 565 @staticmethod 566 def _reshape_input_data(shape, desired_dim=3): 567 """ 568 Static function returns the column-wise shape for for an input shape. 569 570 Input: 571 -------------- 572 shape: tuple 573 Shape of an input array, so that it is always a column. 574 575 desired_dim: int 576 desired shape of output. For Y data it should be 3 577 (sample_no, dimension, ts_no). For X data - 2 (sample_no, 1) 578 Output: 579 -------------- 580 new_shape: tuple 581 New shape of the measurements array. Idea is that samples are 582 along dimension 0, sample dimension - dimension 1, different 583 time series - dimension 2. 584 old_shape: tuple or None 585 If the shape has been modified, return old shape, otherwise 586 None. 587 """ 588 589 if (len(shape) > 3): 590 raise ValueError("""Input array is not supposed to be more 591 than 3 dimensional.""") 592 593 if (len(shape) > desired_dim): 594 raise ValueError("Input array shape is more than desired shape.") 595 elif len(shape) == 1: 596 if (desired_dim == 3): 597 return ((shape[0], 1, 1), shape) # last dimension is the 598 # time serime_series_no 599 elif (desired_dim == 2): 600 return ((shape[0], 1), shape) 601 602 elif len(shape) == 2: 603 if (desired_dim == 3): 604 return ((shape[1], 1, 1), shape) if (shape[0] == 1) else\ 605 ((shape[0], shape[1], 1), shape) # convert to column 606 # vector 607 elif (desired_dim == 2): 608 return ((shape[1], 1), shape) if (shape[0] == 1) else\ 609 ((shape[0], shape[1]), None) # convert to column vector 610 611 else: # len(shape) == 3 612 return (shape, None) # do nothing 613 614 @classmethod 615 def kalman_filter(cls, p_A, p_Q, p_H, p_R, Y, index=None, m_init=None, 616 P_init=None, p_kalman_filter_type='regular', 617 calc_log_likelihood=False, 618 calc_grad_log_likelihood=False, grad_params_no=None, 619 grad_calc_params=None): 620 """ 621 This function implements the basic Kalman Filter algorithm 622 These notations for the State-Space model are assumed: 623 x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 624 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 625 626 Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) 627 628 Current Features: 629 ---------------------------------------- 630 1) The function generaly do not modify the passed parameters. If 631 it happens then it is an error. There are several exeprions: scalars 632 can be modified into a matrix, in some rare cases shapes of 633 the derivatives matrices may be changed, it is ignored for now. 634 635 2) Copies of p_A, p_Q, index are created in memory to be used later 636 in smoother. References to copies are kept in "matrs_for_smoother" 637 return parameter. 638 639 3) Function support "multiple time series mode" which means that exactly 640 the same State-Space model is used to filter several sets of measurements. 641 In this case third dimension of Y should include these state-space measurements 642 Log_likelihood and Grad_log_likelihood have the corresponding dimensions then. 643 644 4) Calculation of Grad_log_likelihood is not supported if matrices A,Q, 645 H, or R changes over time. (later may be changed) 646 647 5) Measurement may include missing values. In this case update step is 648 not done for this measurement. (later may be changed) 649 650 Input: 651 ----------------- 652 653 p_A: scalar, square matrix, 3D array 654 A_{k} in the model. If matrix then A_{k} = A - constant. 655 If it is 3D array then A_{k} = p_A[:,:, index[0,k]] 656 657 p_Q: scalar, square symmetric matrix, 3D array 658 Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant. 659 If it is 3D array then Q_{k-1} = p_Q[:,:, index[1,k]] 660 661 p_H: scalar, matrix (measurement_dim, state_dim) , 3D array 662 H_{k} in the model. If matrix then H_{k} = H - constant. 663 If it is 3D array then H_{k} = p_Q[:,:, index[2,k]] 664 665 p_R: scalar, square symmetric matrix, 3D array 666 R_{k} in the model. If matrix then R_{k} = R - constant. 667 If it is 3D array then R_{k} = p_R[:,:, index[3,k]] 668 669 Y: matrix or vector or 3D array 670 Data. If Y is matrix then samples are along 0-th dimension and 671 features along the 1-st. If 3D array then third dimension 672 correspond to "multiple time series mode". 673 674 index: vector 675 Which indices (on 3-rd dimension) from arrays p_A, p_Q,p_H, p_R to use 676 on every time step. If this parameter is None then it is assumed 677 that p_A, p_Q, p_H, p_R do not change over time and indices are not needed. 678 index[0,:] - correspond to A, index[1,:] - correspond to Q 679 index[2,:] - correspond to H, index[3,:] - correspond to R. 680 If index.shape[0] == 1, it is assumed that indides for all matrices 681 are the same. 682 683 m_init: vector or matrix 684 Initial distribution mean. If None it is assumed to be zero. 685 For "multiple time series mode" it is matrix, second dimension of 686 which correspond to different time series. In regular case ("one 687 time series mode") it is a vector. 688 689 P_init: square symmetric matrix or scalar 690 Initial covariance of the states. If the parameter is scalar 691 then it is assumed that initial covariance matrix is unit matrix 692 multiplied by this scalar. If None the unit matrix is used instead. 693 "multiple time series mode" does not affect it, since it does not 694 affect anything related to state variaces. 695 696 calc_log_likelihood: boolean 697 Whether to calculate marginal likelihood of the state-space model. 698 699 calc_grad_log_likelihood: boolean 700 Whether to calculate gradient of the marginal likelihood 701 of the state-space model. If true then "grad_calc_params" parameter must 702 provide the extra parameters for gradient calculation. 703 704 grad_params_no: int 705 If previous parameter is true, then this parameters gives the 706 total number of parameters in the gradient. 707 708 grad_calc_params: dictionary 709 Dictionary with derivatives of model matrices with respect 710 to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init". 711 They can be None, in this case zero matrices (no dependence on parameters) 712 is assumed. If there is only one parameter then third dimension is 713 automatically added. 714 715 Output: 716 -------------- 717 718 M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 719 Filter estimates of the state means. In the extra step the initial 720 value is included. In the "multiple time series mode" third dimension 721 correspond to different timeseries. 722 723 P: (no_steps+1, state_dim, state_dim) 3D array 724 Filter estimates of the state covariances. In the extra step the initial 725 value is included. 726 727 log_likelihood: double or (1, time_series_no) 3D array. 728 If the parameter calc_log_likelihood was set to true, return 729 logarithm of marginal likelihood of the state-space model. If 730 the parameter was false, return None. In the "multiple time series mode" it is a vector 731 providing log_likelihood for each time series. 732 733 grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix 734 If calc_grad_log_likelihood is true, return gradient of log likelihood 735 with respect to parameters. It returns it column wise, so in 736 "multiple time series mode" gradients for each time series is in the 737 corresponding column. 738 739 matrs_for_smoother: dict 740 Dictionary with model functions for smoother. The intrinsic model 741 functions are computed in this functions and they are returned to 742 use in smoother for convenience. They are: 'p_a', 'p_f_A', 'p_f_Q' 743 The dictionary contains the same fields. 744 """ 745 746 #import pdb; pdb.set_trace() 747 748 # Parameters checking -> 749 # index 750 p_A = np.atleast_1d(p_A) 751 p_Q = np.atleast_1d(p_Q) 752 p_H = np.atleast_1d(p_H) 753 p_R = np.atleast_1d(p_R) 754 755 # Reshape and check measurements: 756 Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) 757 measurement_dim = Y.shape[1] 758 time_series_no = Y.shape[2] # multiple time series mode 759 760 if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\ 761 ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\ 762 ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ 763 ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)): 764 model_matrices_chage_with_time = True 765 else: 766 model_matrices_chage_with_time = False 767 768 # Check index 769 old_index_shape = None 770 if index is None: 771 if (len(p_A.shape) == 3) or (len(p_Q.shape) == 3) or\ 772 (len(p_H.shape) == 3) or (len(p_R.shape) == 3): 773 raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)") 774 else: # matrices do not change in time, so form dummy zero indices. 775 index = np.zeros((1,Y.shape[0])) 776 else: 777 if len(index.shape) == 1: 778 index.shape = (1,index.shape[0]) 779 old_index_shape = (index.shape[0],) 780 781 if (index.shape[1] != Y.shape[0]): 782 raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}") 783 784 if (index.shape[0] == 1): 785 A_time_var_index = 0; Q_time_var_index = 0 786 H_time_var_index = 0; R_time_var_index = 0 787 elif (index.shape[0] == 4): 788 A_time_var_index = 0; Q_time_var_index = 1 789 H_time_var_index = 2; R_time_var_index = 3 790 else: 791 raise ValueError("First Dimension of index must be either 1 or 4.") 792 793 state_dim = p_A.shape[0] 794 # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension 795 # correspond to change in time. 796 (p_A, old_A_shape) = cls._check_SS_matrix(p_A, state_dim, measurement_dim, which='A') 797 (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q') 798 (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') 799 (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') 800 801 # m_init 802 if m_init is None: 803 m_init = np.zeros((state_dim, time_series_no)) 804 else: 805 m_init = np.atleast_2d(m_init).T 806 807 # P_init 808 if P_init is None: 809 P_init = np.eye(state_dim) 810 elif not isinstance(P_init, collections.Iterable): #scalar 811 P_init = P_init*np.eye(state_dim) 812 813 if p_kalman_filter_type not in ('regular', 'svd'): 814 raise ValueError("Kalman filer type neither 'regular nor 'svd'.") 815 816 # Functions to pass to the kalman_filter algorithm: 817 # Parameters: 818 # k - number of Kalman filter iteration 819 # m - vector for calculating matrices. Required for EKF. Not used here. 820 821 c_p_A = p_A.copy() # create a copy because this object is passed to the smoother 822 c_p_Q = p_Q.copy() # create a copy because this object is passed to the smoother 823 c_index = index.copy() # create a copy because this object is passed to the smoother 824 825 if calc_grad_log_likelihood: 826 if model_matrices_chage_with_time: 827 raise ValueError("When computing likelihood gradient A and Q can not change over time.") 828 829 dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA') 830 dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ') 831 dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') 832 dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') 833 834 dm_init = grad_calc_params.get('dm_init') 835 if dm_init is None: 836 # multiple time series mode. Keep grad_params always as a last dimension 837 dm_init = np.zeros((state_dim, time_series_no, grad_params_no)) 838 839 dP_init = grad_calc_params.get('dP_init') 840 if dP_init is None: 841 dP_init = np.zeros((state_dim,state_dim,grad_params_no)) 842 else: 843 dA = None 844 dQ = None 845 dH = None 846 dR = None 847 dm_init = None 848 dP_init = None 849 850 dynamic_callables = Std_Dynamic_Callables_Class(c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ) 851 measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) 852 853 (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) = \ 854 cls._kalman_algorithm_raw(state_dim, dynamic_callables, 855 measurement_callables, Y, m_init, 856 P_init, p_kalman_filter_type = p_kalman_filter_type, 857 calc_log_likelihood=calc_log_likelihood, 858 calc_grad_log_likelihood=calc_grad_log_likelihood, 859 grad_params_no=grad_params_no, 860 dm_init=dm_init, dP_init=dP_init) 861 862 # restore shapes so that input parameters are unchenged 863 if old_index_shape is not None: 864 index.shape = old_index_shape 865 866 if old_Y_shape is not None: 867 Y.shape = old_Y_shape 868 869 if old_A_shape is not None: 870 p_A.shape = old_A_shape 871 872 if old_Q_shape is not None: 873 p_Q.shape = old_Q_shape 874 875 if old_H_shape is not None: 876 p_H.shape = old_H_shape 877 878 if old_R_shape is not None: 879 p_R.shape = old_R_shape 880 # Return values 881 882 return (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) 883 884 @classmethod 885 def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None, 886 P_init=None,calc_log_likelihood=False): 887 888 """ 889 Extended Kalman Filter 890 891 Input: 892 ----------------- 893 894 p_state_dim: integer 895 896 p_a: if None - the function from the linear model is assumed. No non- 897 linearity in the dynamic is assumed. 898 899 function (k, x_{k-1}, A_{k}). Dynamic function. 900 k: (iteration number), 901 x_{k-1}: (previous state) 902 x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}. 903 904 p_f_A: matrix - in this case function which returns this matrix is assumed. 905 Look at this parameter description in kalman_filter function. 906 907 function (k, m, P) return Jacobian of dynamic function, it is 908 passed into p_a. 909 910 k: (iteration number), 911 m: point where Jacobian is evaluated 912 P: parameter for Jacobian, usually covariance matrix. 913 914 p_f_Q: matrix. In this case function which returns this matrix is asumed. 915 Look at this parameter description in kalman_filter function. 916 917 function (k). Returns noise matrix of dynamic model on iteration k. 918 k: (iteration number). 919 920 p_h: if None - the function from the linear measurement model is assumed. 921 No nonlinearity in the measurement is assumed. 922 923 function (k, x_{k}, H_{k}). Measurement function. 924 k: (iteration number), 925 x_{k}: (current state) 926 H_{k}: Jacobian matrices of f_h. In the linear case it is exactly H_{k}. 927 928 p_f_H: matrix - in this case function which returns this matrix is assumed. 929 function (k, m, P) return Jacobian of dynamic function, it is 930 passed into p_h. 931 k: (iteration number), 932 m: point where Jacobian is evaluated 933 P: parameter for Jacobian, usually covariance matrix. 934 935 p_f_R: matrix. In this case function which returns this matrix is asumed. 936 function (k). Returns noise matrix of measurement equation 937 on iteration k. 938 k: (iteration number). 939 940 Y: matrix or vector 941 Data. If Y is matrix then samples are along 0-th dimension and 942 features along the 1-st. May have missing values. 943 944 p_mean: vector 945 Initial distribution mean. If None it is assumed to be zero 946 947 P_init: square symmetric matrix or scalar 948 Initial covariance of the states. If the parameter is scalar 949 then it is assumed that initial covariance matrix is unit matrix 950 multiplied by this scalar. If None the unit matrix is used instead. 951 952 calc_log_likelihood: boolean 953 Whether to calculate marginal likelihood of the state-space model. 954 """ 955 956 # Y 957 Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) 958 959 # m_init 960 if m_init is None: 961 m_init = np.zeros((p_state_dim,1)) 962 else: 963 m_init = np.atleast_2d(m_init).T 964 965 # P_init 966 if P_init is None: 967 P_init = np.eye(p_state_dim) 968 elif not isinstance(P_init, collections.Iterable): #scalar 969 P_init = P_init*np.eye(p_state_dim) 970 971 if p_a is None: 972 p_a = lambda k,m,A: np.dot(A, m) 973 974 old_A_shape = None 975 if not isinstance(p_f_A, types.FunctionType): # not a function but array 976 p_f_A = np.atleast_1d(p_f_A) 977 (p_A, old_A_shape) = cls._check_A_matrix(p_f_A) 978 979 p_f_A = lambda k, m, P: p_A[:,:, 0] # make function 980 else: 981 if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]: 982 raise ValueError("p_f_A function returns matrix of wrong size") 983 984 old_Q_shape = None 985 if not isinstance(p_f_Q, types.FunctionType): # not a function but array 986 p_f_Q = np.atleast_1d(p_f_Q) 987 (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q) 988 989 p_f_Q = lambda k: p_Q[:,:, 0] # make function 990 else: 991 if p_f_Q(1).shape[0] != m_init.shape[0]: 992 raise ValueError("p_f_Q function returns matrix of wrong size") 993 994 if p_h is None: 995 lambda k,m,H: np.dot(H, m) 996 997 old_H_shape = None 998 if not isinstance(p_f_H, types.FunctionType): # not a function but array 999 p_f_H = np.atleast_1d(p_f_H) 1000 (p_H, old_H_shape) = cls._check_H_matrix(p_f_H) 1001 1002 p_f_H = lambda k, m, P: p_H # make function 1003 else: 1004 if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]: 1005 raise ValueError("p_f_H function returns matrix of wrong size") 1006 1007 old_R_shape = None 1008 if not isinstance(p_f_R, types.FunctionType): # not a function but array 1009 p_f_R = np.atleast_1d(p_f_R) 1010 (p_R, old_R_shape) = cls._check_H_matrix(p_f_R) 1011 1012 p_f_R = lambda k: p_R # make function 1013 else: 1014 if p_f_R(1).shape[0] != m_init.shape[0]: 1015 raise ValueError("p_f_R function returns matrix of wrong size") 1016 1017# class dynamic_callables_class(Dynamic_Model_Callables): 1018# 1019# Ak = 1020# Qk = 1021 1022 1023 class measurement_callables_class(R_handling_Class): 1024 def __init__(self,R, index, R_time_var_index, unique_R_number): 1025 super(measurement_callables_class,self).__init__(R, index, R_time_var_index, unique_R_number) 1026 1027 Hk = AddMethodToClass(f_H) 1028 f_h = AddMethodToClass(f_hl) 1029 1030 1031 (M, P,log_likelihood, grad_log_likelihood) = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init, 1032 P_init, calc_log_likelihood, 1033 calc_grad_log_likelihood=False, grad_calc_params=None) 1034 1035 if old_Y_shape is not None: 1036 Y.shape = old_Y_shape 1037 1038 if old_A_shape is not None: 1039 p_A.shape = old_A_shape 1040 1041 if old_Q_shape is not None: 1042 p_Q.shape = old_Q_shape 1043 1044 if old_H_shape is not None: 1045 p_H.shape = old_H_shape 1046 1047 if old_R_shape is not None: 1048 p_R.shape = old_R_shape 1049 1050 return (M, P) 1051 1052 @classmethod 1053 def _kalman_algorithm_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, Y, m_init, 1054 P_init, p_kalman_filter_type='regular', 1055 calc_log_likelihood=False, 1056 calc_grad_log_likelihood=False, grad_params_no=None, 1057 dm_init=None, dP_init=None): 1058 """ 1059 General nonlinear filtering algorithm for inference in the state-space 1060 model: 1061 1062 x_{k} = f_a(k, x_{k-1}, A_{k}) + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 1063 y_{k} = f_h(k, x_{k}, H_{k}) + r_{k}; r_{k-1} ~ N(0, R_{k}) 1064 1065 Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) 1066 1067 Current Features: 1068 ---------------------------------------- 1069 1070 1) Function support "multiple time series mode" which means that exactly 1071 the same State-Space model is used to filter several sets of measurements. 1072 In this case third dimension of Y should include these state-space measurements 1073 Log_likelihood and Grad_log_likelihood have the corresponding dimensions then. 1074 1075 2) Measurement may include missing values. In this case update step is 1076 not done for this measurement. (later may be changed) 1077 1078 Input: 1079 ----------------- 1080 state_dim: int 1081 Demensionality of the states 1082 1083 p_a: function (k, x_{k-1}, A_{k}). Dynamic function. 1084 k (iteration number), 1085 x_{k-1} 1086 A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. 1087 1088 p_f_A: function (k, m, P) return Jacobian of dynamic function, it is 1089 passed into p_a. 1090 k (iteration number), 1091 m: point where Jacobian is evaluated 1092 P: parameter for Jacobian, usually covariance matrix. 1093 1094 p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. 1095 k (iteration number). 1096 1097 p_h: function (k, x_{k}, H_{k}). Measurement function. 1098 k (iteration number), 1099 x_{k} 1100 H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. 1101 1102 p_f_H: function (k, m, P) return Jacobian of dynamic function, it is 1103 passed into p_h. 1104 k (iteration number), 1105 m: point where Jacobian is evaluated 1106 P: parameter for Jacobian, usually covariance matrix. 1107 1108 p_f_R: function (k). Returns noise matrix of measurement equation 1109 on iteration k. 1110 k (iteration number). 1111 1112 Y: matrix or vector or 3D array 1113 Data. If Y is matrix then samples are along 0-th dimension and 1114 features along the 1-st. If 3D array then third dimension 1115 correspond to "multiple time series mode". 1116 1117 m_init: vector or matrix 1118 Initial distribution mean. For "multiple time series mode" 1119 it is matrix, second dimension of which correspond to different 1120 time series. In regular case ("one time series mode") it is a 1121 vector. 1122 1123 P_init: matrix or scalar 1124 Initial covariance of the states. Must be not None 1125 "multiple time series mode" does not affect it, since it does not 1126 affect anything related to state variaces. 1127 1128 p_kalman_filter_type: string 1129 1130 1131 calc_log_likelihood: boolean 1132 Whether to calculate marginal likelihood of the state-space model. 1133 1134 calc_grad_log_likelihood: boolean 1135 Whether to calculate gradient of the marginal likelihood 1136 of the state-space model. If true then the next parameter must 1137 provide the extra parameters for gradient calculation. 1138 1139 grad_calc_params: dictionary 1140 Dictionary with derivatives of model matrices with respect 1141 to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init". 1142 1143 Output: 1144 -------------- 1145 1146 M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 1147 Filter estimates of the state means. In the extra step the initial 1148 value is included. In the "multiple time series mode" third dimension 1149 correspond to different timeseries. 1150 1151 P: (no_steps+1, state_dim, state_dim) 3D array 1152 Filter estimates of the state covariances. In the extra step the initial 1153 value is included. 1154 1155 log_likelihood: double or (1, time_series_no) 3D array. 1156 If the parameter calc_log_likelihood was set to true, return 1157 logarithm of marginal likelihood of the state-space model. If 1158 the parameter was false, return None. In the "multiple time series mode" it is a vector 1159 providing log_likelihood for each time series. 1160 1161 grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix 1162 If calc_grad_log_likelihood is true, return gradient of log likelihood 1163 with respect to parameters. It returns it column wise, so in 1164 "multiple time series mode" gradients for each time series is in the 1165 corresponding column. 1166 1167 """ 1168 1169 steps_no = Y.shape[0] # number of steps in the Kalman Filter 1170 time_series_no = Y.shape[2] # multiple time series mode 1171 1172 # Allocate space for results 1173 # Mean estimations. Initial values will be included 1174 M = np.empty(((steps_no+1),state_dim,time_series_no)) 1175 M[0,:,:] = m_init # Initialize mean values 1176 # Variance estimations. Initial values will be included 1177 P = np.empty(((steps_no+1),state_dim,state_dim)) 1178 P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful 1179 P[0,:,:] = P_init # Initialize initial covariance matrix 1180 1181 if p_kalman_filter_type == 'svd': 1182 (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, 1183 overwrite_a=False,check_finite=True) 1184 S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance 1185 P_upd = (P_init, S,U) 1186 1187 log_likelihood = 0 if calc_log_likelihood else None 1188 grad_log_likelihood = 0 if calc_grad_log_likelihood else None 1189 1190 #setting initial values for derivatives update 1191 dm_upd = dm_init 1192 dP_upd = dP_init 1193 # Main loop of the Kalman filter 1194 for k in range(0,steps_no): 1195 # In this loop index for new estimations is (k+1), old - (k) 1196 # This happened because initial values are stored at 0-th index. 1197 1198 prev_mean = M[k,:,:] # mean from the previous step 1199 1200 if p_kalman_filter_type == 'svd': 1201 m_pred, P_pred, dm_pred, dP_pred = \ 1202 cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, 1203 calc_grad_log_likelihood=calc_grad_log_likelihood, 1204 p_dm = dm_upd, p_dP = dP_upd) 1205 else: 1206 m_pred, P_pred, dm_pred, dP_pred = \ 1207 cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, 1208 calc_grad_log_likelihood=calc_grad_log_likelihood, 1209 p_dm = dm_upd, p_dP = dP_upd ) 1210 1211 k_measurment = Y[k,:,:] 1212 1213 if (np.any(np.isnan(k_measurment)) == False): 1214 if p_kalman_filter_type == 'svd': 1215 m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 1216 cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, 1217 k_measurment, calc_log_likelihood=calc_log_likelihood, 1218 calc_grad_log_likelihood=calc_grad_log_likelihood, 1219 p_dm = dm_pred, p_dP = dP_pred ) 1220 1221 1222 # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 1223 # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, 1224 # calc_log_likelihood=calc_log_likelihood, 1225 # calc_grad_log_likelihood=calc_grad_log_likelihood, 1226 # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) 1227 # 1228 # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, 1229 # overwrite_a=False,check_finite=True) 1230 # P_upd = (P_upd, S,U) 1231 else: 1232 m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 1233 cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, 1234 calc_log_likelihood=calc_log_likelihood, 1235 calc_grad_log_likelihood=calc_grad_log_likelihood, 1236 p_dm = dm_pred, p_dP = dP_pred ) 1237 1238 else: 1239# if k_measurment.shape != (1,1): 1240# raise ValueError("Nan measurements are currently not supported for \ 1241# multidimensional output and multiple time series.") 1242# else: 1243# m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred 1244# log_likelihood_update = 0.0; 1245# d_log_likelihood_update = 0.0; 1246 1247 if not np.all(np.isnan(k_measurment)): 1248 raise ValueError("""Nan measurements are currently not supported if 1249 they are intermixed with not NaN measurements""") 1250 else: 1251 m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred 1252 if calc_log_likelihood: 1253 log_likelihood_update = np.zeros((time_series_no,)) 1254 if calc_grad_log_likelihood: 1255 d_log_likelihood_update = np.zeros((grad_params_no,time_series_no)) 1256 1257 1258 if calc_log_likelihood: 1259 log_likelihood += log_likelihood_update 1260 1261 if calc_grad_log_likelihood: 1262 grad_log_likelihood += d_log_likelihood_update 1263 1264 M[k+1,:,:] = m_upd # separate mean value for each time series 1265 1266 if p_kalman_filter_type == 'svd': 1267 P[k+1,:,:] = P_upd[0] 1268 else: 1269 P[k+1,:,:] = P_upd 1270 1271 # !!!Print statistics! Print sizes of matrices 1272 # !!!Print statistics! Print iteration time base on another boolean variable 1273 return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) 1274 1275 @staticmethod 1276 def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, 1277 p_dm = None, p_dP = None): 1278 """ 1279 Desctrete prediction function 1280 1281 Input: 1282 k:int 1283 Iteration No. Starts at 0. Total number of iterations equal to the 1284 number of measurements. 1285 1286 p_m: matrix of size (state_dim, time_series_no) 1287 Mean value from the previous step. For "multiple time series mode" 1288 it is matrix, second dimension of which correspond to different 1289 time series. 1290 1291 p_P: 1292 Covariance matrix from the previous step. 1293 1294 p_dyn_model_callable: class 1295 1296 1297 calc_grad_log_likelihood: boolean 1298 Whether to calculate gradient of the marginal likelihood 1299 of the state-space model. If true then the next parameter must 1300 provide the extra parameters for gradient calculation. 1301 1302 p_dm: 3D array (state_dim, time_series_no, parameters_no) 1303 Mean derivatives from the previous step. For "multiple time series mode" 1304 it is 3D array, second dimension of which correspond to different 1305 time series. 1306 1307 p_dP: 3D array (state_dim, state_dim, parameters_no) 1308 Mean derivatives from the previous step 1309 1310 Output: 1311 ---------------------------- 1312 m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects 1313 Results of the prediction steps. 1314 1315 """ 1316 1317 # index correspond to values from previous iteration. 1318 A = p_dyn_model_callable.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) 1319 Q = p_dyn_model_callable.Qk(k) # state noise matrix 1320 1321 # Prediction step -> 1322 m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean 1323 P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance 1324 # Prediction step <- 1325 1326 if calc_grad_log_likelihood: 1327 dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters 1328 dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters 1329 1330 param_number = p_dP.shape[2] 1331 1332 # p_dm, p_dP - derivatives form the previoius step 1333 dm_pred = np.empty(p_dm.shape) 1334 dP_pred = np.empty(p_dP.shape) 1335 1336 for j in range(param_number): 1337 dA = dA_all_params[:,:,j] 1338 dQ = dQ_all_params[:,:,j] 1339 1340 dP = p_dP[:,:,j] 1341 dm = p_dm[:,:,j] 1342 dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm) 1343 # prediction step derivatives for current parameter: 1344 1345 dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T)) 1346 dP_pred[:,:,j] += dP_pred[:,:,j].T 1347 dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ 1348 1349 dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize 1350 else: 1351 dm_pred = None 1352 dP_pred = None 1353 1354 return m_pred, P_pred, dm_pred, dP_pred 1355 1356 @staticmethod 1357 def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False, 1358 p_dm = None, p_dP = None): 1359 """ 1360 Desctrete prediction function 1361 1362 Input: 1363 k:int 1364 Iteration No. Starts at 0. Total number of iterations equal to the 1365 number of measurements. 1366 1367 p_m: matrix of size (state_dim, time_series_no) 1368 Mean value from the previous step. For "multiple time series mode" 1369 it is matrix, second dimension of which correspond to different 1370 time series. 1371 1372 p_P: tuple (Prev_cov, S, V) 1373 Covariance matrix from the previous step and its SVD decomposition. 1374 Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V) 1375 1376 p_dyn_model_callable: object 1377 1378 calc_grad_log_likelihood: boolean 1379 Whether to calculate gradient of the marginal likelihood 1380 of the state-space model. If true then the next parameter must 1381 provide the extra parameters for gradient calculation. 1382 1383 p_dm: 3D array (state_dim, time_series_no, parameters_no) 1384 Mean derivatives from the previous step. For "multiple time series mode" 1385 it is 3D array, second dimension of which correspond to different 1386 time series. 1387 1388 p_dP: 3D array (state_dim, state_dim, parameters_no) 1389 Mean derivatives from the previous step 1390 1391 Output: 1392 ---------------------------- 1393 m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects 1394 Results of the prediction steps. 1395 1396 """ 1397 1398 # covariance from the previous step and its SVD decomposition 1399 # p_prev_cov = v * S * V.T 1400 Prev_cov, S_old, V_old = p_P 1401 #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step 1402 1403 # index correspond to values from previous iteration. 1404 A = p_dyn_model_callable.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian) 1405 Q = p_dyn_model_callable.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step) 1406 Q_sr = p_dyn_model_callable.Q_srk(k) 1407 # Prediction step -> 1408 m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean 1409 1410 # coavariance prediction have changed: 1411 svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) ) 1412 (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True, 1413 overwrite_a=False,check_finite=True) 1414 1415 # predicted variance computed by the regular method. For testing 1416 #P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q 1417 V_new = Vh.T 1418 S_new = S**2 1419 1420 P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance 1421 P_pred = (P_pred, S_new, Vh.T) 1422 # Prediction step <- 1423 1424 # derivatives 1425 if calc_grad_log_likelihood: 1426 dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters 1427 dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters 1428 1429 param_number = p_dP.shape[2] 1430 1431 # p_dm, p_dP - derivatives form the previoius step 1432 dm_pred = np.empty(p_dm.shape) 1433 dP_pred = np.empty(p_dP.shape) 1434 1435 for j in range(param_number): 1436 dA = dA_all_params[:,:,j] 1437 dQ = dQ_all_params[:,:,j] 1438 1439 #dP = p_dP[:,:,j] 1440 #dm = p_dm[:,:,j] 1441 dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j]) 1442 # prediction step derivatives for current parameter: 1443 1444 1445 dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T)) 1446 dP_pred[:,:,j] += dP_pred[:,:,j].T 1447 dP_pred[:,:,j] += np.dot( A ,np.dot(p_dP[:,:,j], A.T)) + dQ 1448 1449 dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize 1450 else: 1451 dm_pred = None 1452 dP_pred = None 1453 1454 return m_pred, P_pred, dm_pred, dP_pred 1455 1456 @staticmethod 1457 def _kalman_update_step(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, 1458 calc_grad_log_likelihood=False, p_dm = None, p_dP = None): 1459 """ 1460 Input: 1461 1462 k: int 1463 Iteration No. Starts at 0. Total number of iterations equal to the 1464 number of measurements. 1465 1466 m_P: matrix of size (state_dim, time_series_no) 1467 Mean value from the previous step. For "multiple time series mode" 1468 it is matrix, second dimension of which correspond to different 1469 time series. 1470 1471 p_P: 1472 Covariance matrix from the prediction step. 1473 1474 p_meas_model_callable: object 1475 1476 measurement: (measurement_dim, time_series_no) matrix 1477 One measurement used on the current update step. For 1478 "multiple time series mode" it is matrix, second dimension of 1479 which correspond to different time series. 1480 1481 calc_log_likelihood: boolean 1482 Whether to calculate marginal likelihood of the state-space model. 1483 1484 calc_grad_log_likelihood: boolean 1485 Whether to calculate gradient of the marginal likelihood 1486 of the state-space model. If true then the next parameter must 1487 provide the extra parameters for gradient calculation. 1488 1489 p_dm: 3D array (state_dim, time_series_no, parameters_no) 1490 Mean derivatives from the prediction step. For "multiple time series mode" 1491 it is 3D array, second dimension of which correspond to different 1492 time series. 1493 1494 p_dP: array 1495 Covariance derivatives from the prediction step. 1496 1497 Output: 1498 ---------------------------- 1499 m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects 1500 Results of the prediction steps. 1501 1502 log_likelihood_update: double or 1D array 1503 Update to the log_likelihood from this step 1504 1505 d_log_likelihood_update: (grad_params_no, time_series_no) matrix 1506 Update to the gradient of log_likelihood, "multiple time series mode" 1507 adds extra columns to the gradient. 1508 1509 """ 1510 #import pdb; pdb.set_trace() 1511 1512 m_pred = p_m # from prediction step 1513 P_pred = p_P # from prediction step 1514 1515 H = p_meas_model_callable.Hk(k, m_pred, P_pred) 1516 R = p_meas_model_callable.Rk(k) 1517 1518 time_series_no = p_m.shape[1] # number of time serieses 1519 1520 log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None 1521 # Update step (only if there is data) 1522 #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. 1523 v = measurement-p_meas_model_callable.f_h(k, m_pred, H) 1524 S = H.dot(P_pred).dot(H.T) + R 1525 if measurement.shape[0]==1: # measurements are one dimensional 1526 if (S < 0): 1527 raise ValueError("Kalman Filter Update: S is negative step %i" % k ) 1528 #import pdb; pdb.set_trace() 1529 1530 K = P_pred.dot(H.T) / S 1531 if calc_log_likelihood: 1532 log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + 1533 v*v / S) 1534 #log_likelihood_update = log_likelihood_update[0,0] # to make int 1535 if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None. 1536 raise ValueError("Nan values in likelihood update!") 1537 LL = None; islower = None 1538 else: 1539 LL,islower = linalg.cho_factor(S) 1540 K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T 1541 1542 if calc_log_likelihood: 1543 log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + 1544 2*np.sum( np.log(np.diag(LL)) ) +\ 1545 np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v 1546 1547 if calc_grad_log_likelihood: 1548 dm_pred_all_params = p_dm # derivativas of the prediction phase 1549 dP_pred_all_params = p_dP 1550 1551 param_number = p_dP.shape[2] 1552 1553 dH_all_params = p_meas_model_callable.dHk(k) 1554 dR_all_params = p_meas_model_callable.dRk(k) 1555 1556 dm_upd = np.empty(dm_pred_all_params.shape) 1557 dP_upd = np.empty(dP_pred_all_params.shape) 1558 1559 # firts dimension parameter_no, second - time series number 1560 d_log_likelihood_update = np.empty((param_number,time_series_no)) 1561 for param in range(param_number): 1562 1563 dH = dH_all_params[:,:,param] 1564 dR = dR_all_params[:,:,param] 1565 1566 dm_pred = dm_pred_all_params[:,:,param] 1567 dP_pred = dP_pred_all_params[:,:,param] 1568 1569 # Terms in the likelihood derivatives 1570 dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) 1571 dS = np.dot(dH, np.dot( P_pred, H.T)) 1572 dS += dS.T 1573 dS += np.dot(H, np.dot( dP_pred, H.T)) + dR 1574 1575 # TODO: maybe symmetrize dS 1576 1577 #dm and dP for the next stem 1578 if LL is not None: # the state vector is not a scalar 1579 tmp1 = linalg.cho_solve((LL,islower), H).T 1580 tmp2 = linalg.cho_solve((LL,islower), dH).T 1581 tmp3 = linalg.cho_solve((LL,islower), dS).T 1582 else: # the state vector is a scalar 1583 tmp1 = H.T / S 1584 tmp2 = dH.T / S 1585 tmp3 = dS.T / S 1586 1587 dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ 1588 np.dot( P_pred, np.dot( tmp1, tmp3 ) ) 1589 1590 # terms required for the next step, save this for each parameter 1591 dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) 1592 1593 dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) 1594 dP_upd[:,:,param] += dP_upd[:,:,param].T 1595 dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) 1596 1597 dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize 1598 # computing the likelihood change for each parameter: 1599 if LL is not None: # the state vector is not 1D 1600 #tmp4 = linalg.cho_solve((LL,islower), dv) 1601 tmp5 = linalg.cho_solve((LL,islower), v) 1602 else: # the state vector is a scalar 1603 #tmp4 = dv / S 1604 tmp5 = v / S 1605 1606 1607 d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ 1608 np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) 1609 # Before 1610 #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ 1611 #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) 1612 1613 1614 1615 # Compute the actual updates for mean and variance of the states. 1616 m_upd = m_pred + K.dot( v ) 1617 1618 # Covariance update and ensure it is symmetric 1619 P_upd = K.dot(S).dot(K.T) 1620 P_upd = 0.5*(P_upd + P_upd.T) 1621 P_upd = P_pred - P_upd# this update matrix is symmetric 1622 1623 return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update 1624 1625 @staticmethod 1626 def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False, 1627 calc_grad_log_likelihood=False, p_dm = None, p_dP = None): 1628 """ 1629 Input: 1630 1631 k: int 1632 Iteration No. Starts at 0. Total number of iterations equal to the 1633 number of measurements. 1634 1635 m_P: matrix of size (state_dim, time_series_no) 1636 Mean value from the previous step. For "multiple time series mode" 1637 it is matrix, second dimension of which correspond to different 1638 time series. 1639 1640 p_P: tuple (P_pred, S, V) 1641 Covariance matrix from the prediction step and its SVD decomposition. 1642 P_pred = V * S * V.T The tuple is (P_pred, S, V) 1643 1644 p_h: function (k, x_{k}, H_{k}). Measurement function. 1645 k (iteration number), starts at 0 1646 x_{k} state 1647 H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. 1648 1649 p_f_H: function (k, m, P) return Jacobian of measurement function, it is 1650 passed into p_h. 1651 k (iteration number), starts at 0 1652 m: point where Jacobian is evaluated 1653 P: parameter for Jacobian, usually covariance matrix. 1654 1655 p_f_R: function (k). Returns noise matrix of measurement equation 1656 on iteration k. 1657 k (iteration number). starts at 0 1658 1659 p_f_iRsr: function (k). Returns the square root of the noise matrix of 1660 measurement equation on iteration k. 1661 k (iteration number). starts at 0 1662 1663 measurement: (measurement_dim, time_series_no) matrix 1664 One measurement used on the current update step. For 1665 "multiple time series mode" it is matrix, second dimension of 1666 which correspond to different time series. 1667 1668 calc_log_likelihood: boolean 1669 Whether to calculate marginal likelihood of the state-space model. 1670 1671 calc_grad_log_likelihood: boolean 1672 Whether to calculate gradient of the marginal likelihood 1673 of the state-space model. If true then the next parameter must 1674 provide the extra parameters for gradient calculation. 1675 1676 p_dm: 3D array (state_dim, time_series_no, parameters_no) 1677 Mean derivatives from the prediction step. For "multiple time series mode" 1678 it is 3D array, second dimension of which correspond to different 1679 time series. 1680 1681 p_dP: array 1682 Covariance derivatives from the prediction step. 1683 1684 grad_calc_params_2: List or None 1685 List with derivatives. The first component is 'f_dH' - function(k) 1686 which returns the derivative of H. The second element is 'f_dR' 1687 - function(k). Function which returns the derivative of R. 1688 1689 Output: 1690 ---------------------------- 1691 m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects 1692 Results of the prediction steps. 1693 1694 log_likelihood_update: double or 1D array 1695 Update to the log_likelihood from this step 1696 1697 d_log_likelihood_update: (grad_params_no, time_series_no) matrix 1698 Update to the gradient of log_likelihood, "multiple time series mode" 1699 adds extra columns to the gradient. 1700 1701 """ 1702 1703 #import pdb; pdb.set_trace() 1704 1705 m_pred = p_m # from prediction step 1706 P_pred,S_pred,V_pred = p_P # from prediction step 1707 1708 H = p_meas_model_callable.Hk(k, m_pred, P_pred) 1709 R = p_meas_model_callable.Rk(k) 1710 R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix 1711 1712 time_series_no = p_m.shape[1] # number of time serieses 1713 1714 log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None 1715 # Update step (only if there is data) 1716 #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other. 1717 v = measurement-p_meas_model_callable.f_h(k, m_pred, H) 1718 1719 svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) ) 1720 1721 (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True, 1722 overwrite_a=False,check_finite=True) 1723 1724 # P_upd = U_upd S_upd**2 U_upd.T 1725 U_upd = np.dot(V_pred, Vh.T) 1726 S_upd = (1.0/S)**2 1727 1728 P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance 1729 P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step 1730 1731 # stil need to compute S and K for derivative computation 1732 S = H.dot(P_pred).dot(H.T) + R 1733 if measurement.shape[0]==1: # measurements are one dimensional 1734 if (S < 0): 1735 raise ValueError("Kalman Filter Update: S is negative step %i" % k ) 1736 #import pdb; pdb.set_trace() 1737 1738 K = P_pred.dot(H.T) / S 1739 if calc_log_likelihood: 1740 log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) + 1741 v*v / S) 1742 #log_likelihood_update = log_likelihood_update[0,0] # to make int 1743 if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None. 1744 raise ValueError("Nan values in likelihood update!") 1745 LL = None; islower = None 1746 else: 1747 LL,islower = linalg.cho_factor(S) 1748 K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T 1749 1750 if calc_log_likelihood: 1751 log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) + 1752 2*np.sum( np.log(np.diag(LL)) ) +\ 1753 np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v 1754 1755 1756 # Old method of computing updated covariance (for testing) -> 1757 #P_upd_tst = K.dot(S).dot(K.T) 1758 #P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T) 1759 #P_upd_tst = P_pred - P_upd_tst# this update matrix is symmetric 1760 # Old method of computing updated covariance (for testing) <- 1761 1762 if calc_grad_log_likelihood: 1763 dm_pred_all_params = p_dm # derivativas of the prediction phase 1764 dP_pred_all_params = p_dP 1765 1766 param_number = p_dP.shape[2] 1767 1768 dH_all_params = p_meas_model_callable.dHk(k) 1769 dR_all_params = p_meas_model_callable.dRk(k) 1770 1771 dm_upd = np.empty(dm_pred_all_params.shape) 1772 dP_upd = np.empty(dP_pred_all_params.shape) 1773 1774 # firts dimension parameter_no, second - time series number 1775 d_log_likelihood_update = np.empty((param_number,time_series_no)) 1776 for param in range(param_number): 1777 1778 dH = dH_all_params[:,:,param] 1779 dR = dR_all_params[:,:,param] 1780 1781 dm_pred = dm_pred_all_params[:,:,param] 1782 dP_pred = dP_pred_all_params[:,:,param] 1783 1784 # Terms in the likelihood derivatives 1785 dv = - np.dot( dH, m_pred) - np.dot( H, dm_pred) 1786 dS = np.dot(dH, np.dot( P_pred, H.T)) 1787 dS += dS.T 1788 dS += np.dot(H, np.dot( dP_pred, H.T)) + dR 1789 1790 # TODO: maybe symmetrize dS 1791 1792 #dm and dP for the next stem 1793 if LL is not None: # the state vector is not a scalar 1794 tmp1 = linalg.cho_solve((LL,islower), H).T 1795 tmp2 = linalg.cho_solve((LL,islower), dH).T 1796 tmp3 = linalg.cho_solve((LL,islower), dS).T 1797 else: # the state vector is a scalar 1798 tmp1 = H.T / S 1799 tmp2 = dH.T / S 1800 tmp3 = dS.T / S 1801 1802 dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \ 1803 np.dot( P_pred, np.dot( tmp1, tmp3 ) ) 1804 1805 # terms required for the next step, save this for each parameter 1806 dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv) 1807 1808 dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T)) 1809 dP_upd[:,:,param] += dP_upd[:,:,param].T 1810 dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T)) 1811 1812 dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize 1813 # computing the likelihood change for each parameter: 1814 if LL is not None: # the state vector is not 1D 1815 tmp5 = linalg.cho_solve((LL,islower), v) 1816 else: # the state vector is a scalar 1817 tmp5 = v / S 1818 1819 1820 d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \ 1821 np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) ) 1822 # Before 1823 #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \ 1824 #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) ) 1825 1826 # Compute the actual updates for mean of the states. Variance update 1827 # is computed earlier. 1828 m_upd = m_pred + K.dot( v ) 1829 1830 return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update 1831 1832 @staticmethod 1833 def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step, 1834 p_P_prev_step, p_dynamic_callables): 1835 """ 1836 Rauch–Tung–Striebel(RTS) update step 1837 1838 Input: 1839 ----------------------------- 1840 k: int 1841 Iteration No. Starts at 0. Total number of iterations equal to the 1842 number of measurements. 1843 1844 p_m: matrix of size (state_dim, time_series_no) 1845 Filter mean on step k 1846 1847 p_P: matrix of size (state_dim,state_dim) 1848 Filter Covariance on step k 1849 1850 p_m_pred: matrix of size (state_dim, time_series_no) 1851 Means from the smoother prediction step. 1852 1853 p_P_pred: 1854 Covariance from the smoother prediction step. 1855 1856 p_m_prev_step 1857 Smoother mean from the previous step. 1858 1859 p_P_prev_step: 1860 Smoother covariance from the previous step. 1861 1862 p_f_A: function (k, m, P) return Jacobian of dynamic function, it is 1863 passed into p_a. 1864 k (iteration number), starts at 0 1865 m: point where Jacobian is evaluated 1866 P: parameter for Jacobian, usually covariance matrix. 1867 1868 """ 1869 1870 A = p_dynamic_callables.Ak(k,p_m,p_P) # state transition matrix (or Jacobian) 1871 1872 tmp = np.dot( A, p_P.T) 1873 if A.shape[0] == 1: # 1D states 1874 G = tmp.T / p_P_pred # P[:,:,k] is symmetric 1875 else: 1876 try: 1877 LL,islower = linalg.cho_factor(p_P_pred) 1878 G = linalg.cho_solve((LL,islower),tmp).T 1879 except: 1880 # It happende that p_P_pred has several near zero eigenvalues 1881 # hence the Cholesky method does not work. 1882 res = sp.linalg.lstsq(p_P_pred, tmp) 1883 G = res[0].T 1884 1885 m_upd = p_m + G.dot( p_m_prev_step-p_m_pred ) 1886 P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T) 1887 1888 P_upd = 0.5*(P_upd + P_upd.T) 1889 1890 return m_upd, P_upd, G 1891 1892 @classmethod 1893 def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means, 1894 filter_covars): 1895 """ 1896 This function implements Rauch–Tung–Striebel(RTS) smoother algorithm 1897 based on the results of kalman_filter_raw. 1898 These notations are the same: 1899 x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 1900 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 1901 1902 Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) 1903 1904 Input: 1905 -------------- 1906 1907 p_a: function (k, x_{k-1}, A_{k}). Dynamic function. 1908 k (iteration number), starts at 0 1909 x_{k-1} State from the previous step 1910 A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}. 1911 1912 p_f_A: function (k, m, P) return Jacobian of dynamic function, it is 1913 passed into p_a. 1914 k (iteration number), starts at 0 1915 m: point where Jacobian is evaluated 1916 P: parameter for Jacobian, usually covariance matrix. 1917 1918 p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k. 1919 k (iteration number). starts at 0 1920 1921 filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 1922 Results of the Kalman Filter means estimation. 1923 1924 filter_covars: (no_steps+1, state_dim, state_dim) 3D array 1925 Results of the Kalman Filter covariance estimation. 1926 1927 Output: 1928 ------------- 1929 1930 M: (no_steps+1, state_dim) matrix 1931 Smoothed estimates of the state means 1932 1933 P: (no_steps+1, state_dim, state_dim) 3D array 1934 Smoothed estimates of the state covariances 1935 """ 1936 1937 no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) 1938 1939 M = np.empty(filter_means.shape) # smoothed means 1940 P = np.empty(filter_covars.shape) # smoothed covars 1941 #G = np.empty( (no_steps,state_dim,state_dim) ) # G from the update step of the smoother 1942 1943 M[-1,:] = filter_means[-1,:] 1944 P[-1,:,:] = filter_covars[-1,:,:] 1945 for k in range(no_steps-1,-1,-1): 1946 1947 m_pred, P_pred, tmp1, tmp2 = \ 1948 cls._kalman_prediction_step(k, filter_means[k,:], 1949 filter_covars[k,:,:], p_dynamic_callables, 1950 calc_grad_log_likelihood=False) 1951 p_m = filter_means[k,:] 1952 if len(p_m.shape)<2: 1953 p_m.shape = (p_m.shape[0],1) 1954 1955 p_m_prev_step = M[k+1,:] 1956 if len(p_m_prev_step.shape)<2: 1957 p_m_prev_step.shape = (p_m_prev_step.shape[0],1) 1958 1959 m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k, 1960 p_m ,filter_covars[k,:,:], 1961 m_pred, P_pred, p_m_prev_step ,P[k+1,:,:], p_dynamic_callables) 1962 1963 M[k,:] = m_upd#np.squeeze(m_upd) 1964 P[k,:,:] = P_upd 1965 #G[k,:,:] = G_upd.T # store transposed G. 1966 # Return values 1967 1968 return (M, P) #, G) 1969 1970 @staticmethod 1971 def _EM_gradient(A,Q,H,R,m_init,P_init,measurements, M, P, G, dA, dQ, dH, dR, dm_init, dP_init): 1972 """ 1973 Gradient computation with the EM algorithm. 1974 1975 Input: 1976 ----------------- 1977 1978 M: Means from the smoother 1979 P: Variances from the smoother 1980 G: Gains? from the smoother 1981 """ 1982 import pdb; pdb.set_trace(); 1983 1984 param_number = dA.shape[-1] 1985 d_log_likelihood_update = np.empty((param_number,1)) 1986 1987 sample_no = measurements.shape[0] 1988 P_1 = P[1:,:,:] # remove 0-th step 1989 P_2 = P[0:-1,:,:] # remove 0-th step 1990 1991 M_1 = M[1:,:] # remove 0-th step 1992 M_2 = M[0:-1,:] # remove the last step 1993 1994 Sigma = np.mean(P_1,axis=0) + np.dot(M_1.T, M_1) / sample_no # 1995 Phi = np.mean(P_2,axis=0) + np.dot(M_2.T, M_2) / sample_no # 1996 1997 B = np.dot( measurements.T, M_1 )/ sample_no 1998 C = (sp.einsum( 'ijk,ikl', P_1, G) + np.dot(M_1.T, M_2)) / sample_no # 1999 2000# C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) ) 2001# for k in range(P_1.shape[0]): 2002# C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] ) 2003# C1 = C1 / sample_no 2004 2005 D = np.dot( measurements.T, measurements ) / sample_no 2006 2007 try: 2008 P_init_inv = sp.linalg.inv(P_init) 2009 2010 if np.max( np.abs(P_init_inv)) > 10e13: 2011 compute_P_init_terms = False 2012 else: 2013 compute_P_init_terms = True 2014 except np.linalg.LinAlgError: 2015 compute_P_init_terms = False 2016 2017 try: 2018 Q_inv = sp.linalg.inv(Q) 2019 2020 if np.max( np.abs(Q_inv)) > 10e13: 2021 compute_Q_terms = False 2022 else: 2023 compute_Q_terms = True 2024 except np.linalg.LinAlgError: 2025 compute_Q_terms = False 2026 2027 try: 2028 R_inv = sp.linalg.inv(R) 2029 2030 if np.max( np.abs(R_inv)) > 10e13: 2031 compute_R_terms = False 2032 else: 2033 compute_R_terms = True 2034 except np.linalg.LinAlgError: 2035 compute_R_terms = False 2036 2037 2038 d_log_likelihood_update = np.zeros((param_number,1)) 2039 for j in range(param_number): 2040 if compute_P_init_terms: 2041 d_log_likelihood_update[j,:] -= 0.5 * np.sum(P_init_inv* dP_init[:,:,j].T ) #p #m 2042 2043 M0_smoothed = M[0]; M0_smoothed.shape = (M0_smoothed.shape[0],1) 2044 tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) ) ) #p #m 2045 d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp1.T ) 2046 2047 tmp2 = sp.outer( dm_init[:,j], M0_smoothed ) 2048 tmp2 += tmp2.T 2049 d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp2.T ) 2050 2051 if compute_Q_terms: 2052 2053 d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv* dQ[:,:,j].T ) #m 2054 2055 tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m 2056 tmp1 = np.dot( dQ[:,:,j], np.dot( Q_inv, tmp1) ) 2057 d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(Q_inv * tmp1.T) 2058 2059 tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T; 2060 tmp3 = np.dot(dA[:,:,j], np.dot(Phi,A.T)); tmp3 += tmp3.T 2061 d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) ) 2062 2063 if compute_R_terms: 2064 d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T ) 2065 2066 tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T)) 2067 tmp1 = np.dot( dR[:,:,j], np.dot( R_inv, tmp1) ) 2068 d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(R_inv * tmp1.T) 2069 2070 tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T; 2071 tmp3 = np.dot(dH[:,:,j], np.dot(Sigma,H.T)); tmp3 += tmp3.T 2072 d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv.T * (tmp3 - tmp2) ) 2073 2074 return d_log_likelihood_update 2075 2076 @staticmethod 2077 def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'): 2078 """ 2079 Veryfy that on exit the matrix has appropriate shape for the KF algorithm. 2080 2081 Input: 2082 p_M: matrix 2083 As it is given for the user 2084 state_dim: int 2085 State dimensioanlity 2086 measurement_dim: int 2087 Measurement dimensionality 2088 which: string 2089 One of: 'A', 'Q', 'H', 'R' 2090 Output: 2091 --------------- 2092 p_M: matrix of the right shape 2093 2094 old_M_shape: tuple 2095 Old Shape 2096 """ 2097 2098 old_M_shape = None 2099 if len(p_M.shape) < 3: # new shape is 3 dimensional 2100 old_M_shape = p_M.shape # save shape to restore it on exit 2101 if len(p_M.shape) == 2: # matrix 2102 p_M.shape = (p_M.shape[0],p_M.shape[1],1) 2103 elif len(p_M.shape) == 1: # scalar but in array already 2104 if (p_M.shape[0] != 1): 2105 raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which) 2106 else: 2107 p_M.shape = (1,1,1) 2108 2109 if (which == 'A') or (which == 'Q'): 2110 if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim): 2111 raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim)) 2112 if (which == 'H'): 2113 if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != state_dim): 2114 raise ValueError("H must be of shape (measurement_dim, state_dim) (%i,%i)" % (measurement_dim, state_dim)) 2115 if (which == 'R'): 2116 if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim): 2117 raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim)) 2118 2119 return (p_M,old_M_shape) 2120 2121 @staticmethod 2122 def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'): 2123 """ 2124 Function checks (mostly check dimensions) matrices for marginal likelihood 2125 gradient parameters calculation. It check dA, dQ matrices. 2126 2127 Input: 2128 ------------- 2129 dM: None, scaler or 3D matrix 2130 It is supposed to be (state_dim,state_dim,grad_params_no) matrix. 2131 If None then zero matrix is assumed. If scalar then the function 2132 checks consistency with "state_dim" and "grad_params_no". 2133 2134 state_dim: int 2135 State dimensionality 2136 2137 grad_params_no: int 2138 How many parrameters of likelihood gradient in total. 2139 2140 which: string 2141 'dA' or 'dQ' 2142 2143 2144 Output: 2145 -------------- 2146 function of (k) which returns the parameters matrix. 2147 2148 """ 2149 2150 2151 if dM is None: 2152 dM=np.zeros((state_dim,state_dim,grad_params_no)) 2153 elif isinstance(dM, np.ndarray): 2154 if state_dim == 1: 2155 if len(dM.shape) < 3: 2156 dM.shape = (1,1,1) 2157 else: 2158 if len(dM.shape) < 3: 2159 dM.shape = (state_dim,state_dim,1) 2160 elif isinstance(dM, np.int): 2161 if state_dim > 1: 2162 raise ValueError("When computing likelihood gradient wrong %s dimension." % which) 2163 else: 2164 dM = np.ones((1,1,1)) * dM 2165 2166# if not isinstance(dM, types.FunctionType): 2167# f_dM = lambda k: dM 2168# else: 2169# f_dM = dM 2170 2171 return dM 2172 2173 2174 @staticmethod 2175 def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'): 2176 """ 2177 Function checks (mostly check dimensions) matrices for marginal likelihood 2178 gradient parameters calculation. It check dH, dR matrices. 2179 2180 Input: 2181 ------------- 2182 dM: None, scaler or 3D matrix 2183 It is supposed to be 2184 (measurement_dim ,state_dim,grad_params_no) for "dH" matrix. 2185 (measurement_dim,measurement_dim,grad_params_no) for "dR" 2186 2187 If None then zero matrix is assumed. If scalar then the function 2188 checks consistency with "state_dim" and "grad_params_no". 2189 2190 state_dim: int 2191 State dimensionality 2192 2193 grad_params_no: int 2194 How many parrameters of likelihood gradient in total. 2195 2196 measurement_dim: int 2197 Dimensionality of measurements. 2198 2199 which: string 2200 'dH' or 'dR' 2201 2202 2203 Output: 2204 -------------- 2205 function of (k) which returns the parameters matrix. 2206 """ 2207 2208 if dM is None: 2209 if which == 'dH': 2210 dM=np.zeros((measurement_dim ,state_dim,grad_params_no)) 2211 elif which == 'dR': 2212 dM=np.zeros((measurement_dim,measurement_dim,grad_params_no)) 2213 elif isinstance(dM, np.ndarray): 2214 if state_dim == 1: 2215 if len(dM.shape) < 3: 2216 dM.shape = (1,1,1) 2217 else: 2218 if len(dM.shape) < 3: 2219 if which == 'dH': 2220 dM.shape = (measurement_dim,state_dim,1) 2221 elif which == 'dR': 2222 dM.shape = (measurement_dim,measurement_dim,1) 2223 elif isinstance(dM, np.int): 2224 if state_dim > 1: 2225 raise ValueError("When computing likelihood gradient wrong dH dimension.") 2226 else: 2227 dM = np.ones((1,1,1)) * dM 2228 2229# if not isinstance(dM, types.FunctionType): 2230# f_dM = lambda k: dM 2231# else: 2232# f_dM = dM 2233 2234 return dM 2235 2236 2237 2238class Struct(object): 2239 pass 2240 2241class ContDescrStateSpace(DescreteStateSpace): 2242 """ 2243 Class for continuous-discrete Kalman filter. State equation is 2244 continuous while measurement equation is discrete. 2245 2246 d x(t)/ dt = F x(t) + L q; where q~ N(0, Qc) 2247 y_{t_k} = H_{k} x_{t_k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 2248 2249 """ 2250 2251 class AQcompute_once(Q_handling_Class): 2252 """ 2253 Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter 2254 from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state 2255 equation. dt - time steps. 2256 2257 It has the same interface as AQcompute_batch. 2258 2259 It computes matrices for only one time step. This object is used when 2260 there are many different time steps and storing matrices for each of them 2261 would take too much memory. 2262 """ 2263 2264 def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): 2265 """ 2266 Constructor. All necessary parameters are passed here and stored 2267 in the opject. 2268 2269 Input: 2270 ------------------- 2271 F, L, Qc, P_inf : matrices 2272 Parameters of corresponding continuous state model 2273 dt: array 2274 All time steps 2275 compute_derivatives: bool 2276 Whether to calculate derivatives 2277 2278 dP_inf, dF, dQc: 3D array 2279 Derivatives if they are required 2280 2281 Output: 2282 ------------------- 2283 Nothing 2284 """ 2285 # Copies are done because this object is used later in smoother 2286 # and these parameters must not change. 2287 self.F = F.copy() 2288 self.L = L.copy() 2289 self.Qc = Qc.copy() 2290 2291 self.dt = dt # copy is not taken because dt is internal parameter 2292 2293 # Parameters are used to calculate derivatives but derivatives 2294 # are not used in the smoother. Therefore copies are not taken. 2295 self.P_inf = P_inf 2296 self.dP_inf = dP_inf 2297 self.dF = dF 2298 self.dQc = dQc 2299 2300 self.compute_derivatives = compute_derivatives 2301 self.grad_params_no = grad_params_no 2302 2303 2304 self.last_k = 0 2305 self.last_k_computed = False 2306 self.v_Ak = None 2307 self.v_Qk = None 2308 self.v_dAk = None 2309 self.v_dQk = None 2310 2311 self.square_root_computed = False 2312 self.Q_inverse_computed = False 2313 self.Q_svd_computed = False 2314 # !!!Print statistics! Which object is created 2315 2316 def f_a(self, k,m,A): 2317 """ 2318 Dynamic model 2319 """ 2320 2321 return np.dot(A, m) # default dynamic model 2322 2323 def _recompute_for_new_k(self,k): 2324 """ 2325 Computes the necessary matrices for an index k and store the results. 2326 2327 Input: 2328 ---------------------- 2329 k: int 2330 Index in the time differences array dt where to compute matrices 2331 2332 Output: 2333 ---------------------- 2334 Ak,Qk, dAk, dQk: matrices and/or 3D arrays 2335 A, Q, dA dQ on step k 2336 """ 2337 if (self.last_k != k) or (self.last_k_computed == False): 2338 v_Ak,v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F, 2339 self.L,self.Qc,self.dt[k],self.compute_derivatives, 2340 grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc) 2341 2342 self.last_k = k 2343 self.last_k_computed = True 2344 self.v_Ak = v_Ak 2345 self.v_Qk = v_Qk 2346 self.v_dAk = v_dAk 2347 self.v_dQk = v_dQk 2348 2349 self.Q_square_root_computed = False 2350 self.Q_inverse_computed = False 2351 self.Q_svd_computed = False 2352 else: 2353 v_Ak = self.v_Ak 2354 v_Qk = self.v_Qk 2355 v_dAk = self.v_dAk 2356 v_dQk = self.v_dQk 2357 2358 # !!!Print statistics! Print sizes of matrices 2359 2360 return v_Ak,v_Qk, v_dAk, v_dQk 2361 2362 def reset(self, compute_derivatives): 2363 """ 2364 For reusing this object e.g. in smoother computation. Actually, 2365 this object can not be reused because it computes the matrices on 2366 every iteration. But this method is written for keeping the same 2367 interface with the class AQcompute_batch. 2368 """ 2369 2370 self.last_k = 0 2371 self.last_k_computed = False 2372 self.compute_derivatives = compute_derivatives 2373 2374 self.Q_square_root_computed = False 2375 self.Q_inverse_computed = False 2376 self.Q_svd_computed = False 2377 self.Q_eigen_computed = False 2378 return self 2379 2380 def Ak(self,k,m,P): 2381 v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) 2382 return v_Ak 2383 2384 def Qk(self,k): 2385 v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) 2386 return v_Qk 2387 2388 def dAk(self, k): 2389 v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) 2390 return v_dAk 2391 2392 def dQk(self, k): 2393 v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k) 2394 return v_dQk 2395 2396 def Q_srk(self,k): 2397 """ 2398 Check square root, maybe rewriting for Spectral decomposition is needed. 2399 Square root of the noise matrix Q 2400 """ 2401 2402 if ((self.last_k == k) and (self.last_k_computed == True)): 2403 if not self.Q_square_root_computed: 2404 if not self.Q_svd_computed: 2405 (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) 2406 self.Q_svd = (U, S, Vh) 2407 self.Q_svd_computed = True 2408 else: 2409 (U, S, Vh) = self.Q_svd 2410 2411 square_root = U * np.sqrt(S) 2412 self.square_root_computed = True 2413 self.Q_square_root = square_root 2414 else: 2415 square_root = self.Q_square_root 2416 else: 2417 raise ValueError("Square root of Q can not be computed") 2418 2419 return square_root 2420 2421 def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): 2422 """ 2423 Function inverts Q matrix and regularizes the inverse. 2424 Regularization is useful when original matrix is badly conditioned. 2425 Function is currently used only in SparseGP code. 2426 2427 Inputs: 2428 ------------------------------ 2429 k: int 2430 Iteration number. 2431 2432 p_largest_cond_num: float 2433 Largest condition value for the inverted matrix. If cond. number is smaller than that 2434 no regularization happen. 2435 2436 regularization_type: 1 or 2 2437 Regularization type. 2438 2439 regularization_type: int (1 or 2) 2440 2441 type 1: 1/(S[k] + regularizer) regularizer is computed 2442 type 2: S[k]/(S^2[k] + regularizer) regularizer is computed 2443 """ 2444 2445 #import pdb; pdb.set_trace() 2446 2447 if ((self.last_k == k) and (self.last_k_computed == True)): 2448 if not self.Q_inverse_computed: 2449 if not self.Q_svd_computed: 2450 (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False) 2451 self.Q_svd = (U, S, Vh) 2452 self.Q_svd_computed = True 2453 else: 2454 (U, S, Vh) = self.Q_svd 2455 2456 Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type) 2457 2458 self.Q_inverse_computed = True 2459 self.Q_inverse_r = Q_inverse_r 2460 2461 else: 2462 Q_inverse_r = self.Q_inverse_r 2463 else: 2464 raise ValueError("""Inverse of Q can not be computed, because Q has not been computed. 2465 This requires some programming""") 2466 2467 return Q_inverse_r 2468 2469 2470 def return_last(self): 2471 """ 2472 Function returns last computed matrices. 2473 """ 2474 if not self.last_k_computed: 2475 raise ValueError("Matrices are not computed.") 2476 else: 2477 k = self.last_k 2478 A = self.v_Ak 2479 Q = self.v_Qk 2480 dA = self.v_dAk 2481 dQ = self.v_dQk 2482 2483 return k, A, Q, dA, dQ 2484 2485 class AQcompute_batch_Python(Q_handling_Class): 2486 """ 2487 Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter 2488 from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state 2489 equation. dt - time steps. 2490 2491 It has the same interface as AQcompute_once. 2492 2493 It computes matrices for all time steps. This object is used when 2494 there are not so many (controlled by internal variable) 2495 different time steps and storing all the matrices do not take too much memory. 2496 2497 Since all the matrices are computed all together, this object can be used 2498 in smoother without repeating the computations. 2499 """ 2500 def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): 2501 """ 2502 Constructor. All necessary parameters are passed here and stored 2503 in the opject. 2504 2505 Input: 2506 ------------------- 2507 F, L, Qc, P_inf : matrices 2508 Parameters of corresponding continuous state model 2509 dt: array 2510 All time steps 2511 compute_derivatives: bool 2512 Whether to calculate derivatives 2513 2514 dP_inf, dF, dQc: 3D array 2515 Derivatives if they are required 2516 2517 Output: 2518 ------------------- 2519 Nothing 2520 """ 2521 As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, 2522 L,Qc,dt,compute_derivatives, 2523 grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) 2524 2525 self.As = As 2526 self.Qs = Qs 2527 self.dAs = dAs 2528 self.dQs = dQs 2529 self.reconstruct_indices = reconstruct_indices 2530 self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\ 2531 (self.dAs.nbytes if (self.dAs is not None) else 0) +\ 2532 (self.dQs.nbytes if (self.dQs is not None) else 0) +\ 2533 (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0) 2534 2535 self.Q_svd_dict = {} 2536 self.Q_square_root_dict = {} 2537 self.Q_inverse_dict = {} 2538 2539 self.last_k = None 2540 # !!!Print statistics! Which object is created 2541 # !!!Print statistics! Print sizes of matrices 2542 2543 def f_a(self, k,m,A): 2544 """ 2545 Dynamic model 2546 """ 2547 return np.dot(A, m) # default dynamic model 2548 2549 def reset(self, compute_derivatives=False): 2550 """ 2551 For reusing this object e.g. in smoother computation. It makes sence 2552 because necessary matrices have been already computed for all 2553 time steps. 2554 """ 2555 return self 2556 2557 def Ak(self,k,m,P): 2558 self.last_k = k 2559 return self.As[:,:, self.reconstruct_indices[k]] 2560 2561 def Qk(self,k): 2562 self.last_k = k 2563 return self.Qs[:,:, self.reconstruct_indices[k]] 2564 2565 def dAk(self,k): 2566 self.last_k = k 2567 return self.dAs[:,:, :, self.reconstruct_indices[k]] 2568 2569 def dQk(self,k): 2570 self.last_k = k 2571 return self.dQs[:,:, :, self.reconstruct_indices[k]] 2572 2573 2574 def Q_srk(self,k): 2575 """ 2576 Square root of the noise matrix Q 2577 """ 2578 matrix_index = self.reconstruct_indices[k] 2579 if matrix_index in self.Q_square_root_dict: 2580 square_root = self.Q_square_root_dict[matrix_index] 2581 else: 2582 if matrix_index in self.Q_svd_dict: 2583 (U, S, Vh) = self.Q_svd_dict[matrix_index] 2584 else: 2585 (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], 2586 full_matrices=False, compute_uv=True, 2587 overwrite_a=False, check_finite=False) 2588 self.Q_svd_dict[matrix_index] = (U,S,Vh) 2589 2590 square_root = U * np.sqrt(S) 2591 self.Q_square_root_dict[matrix_index] = square_root 2592 2593 return square_root 2594 2595 def Q_inverse(self, k, p_largest_cond_num, p_regularization_type): 2596 """ 2597 Function inverts Q matrix and regularizes the inverse. 2598 Regularization is useful when original matrix is badly conditioned. 2599 Function is currently used only in SparseGP code. 2600 2601 Inputs: 2602 ------------------------------ 2603 k: int 2604 Iteration number. 2605 2606 p_largest_cond_num: float 2607 Largest condition value for the inverted matrix. If cond. number is smaller than that 2608 no regularization happen. 2609 2610 regularization_type: 1 or 2 2611 Regularization type. 2612 2613 regularization_type: int (1 or 2) 2614 2615 type 1: 1/(S[k] + regularizer) regularizer is computed 2616 type 2: S[k]/(S^2[k] + regularizer) regularizer is computed 2617 """ 2618 #import pdb; pdb.set_trace() 2619 2620 matrix_index = self.reconstruct_indices[k] 2621 if matrix_index in self.Q_inverse_dict: 2622 Q_inverse_r = self.Q_inverse_dict[matrix_index] 2623 else: 2624 2625 if matrix_index in self.Q_svd_dict: 2626 (U, S, Vh) = self.Q_svd_dict[matrix_index] 2627 else: 2628 (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index], 2629 full_matrices=False, compute_uv=True, 2630 overwrite_a=False, check_finite=False) 2631 self.Q_svd_dict[matrix_index] = (U,S,Vh) 2632 2633 Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type) 2634 self.Q_inverse_dict[matrix_index] = Q_inverse_r 2635 2636 return Q_inverse_r 2637 2638 2639 def return_last(self): 2640 """ 2641 Function returns last available matrices. 2642 """ 2643 2644 if (self.last_k is None): 2645 raise ValueError("Matrices are not computed.") 2646 else: 2647 ind = self.reconstruct_indices[self.last_k] 2648 A = self.As[:,:, ind] 2649 Q = self.Qs[:,:, ind] 2650 dA = self.dAs[:,:, :, ind] 2651 dQ = self.dQs[:,:, :, ind] 2652 2653 return self.last_k, A, Q, dA, dQ 2654 2655 @classmethod 2656 def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None, 2657 m_init=None, P_init=None, 2658 p_kalman_filter_type='regular', 2659 calc_log_likelihood=False, 2660 calc_grad_log_likelihood=False, 2661 grad_params_no=0, grad_calc_params=None): 2662 """ 2663 This function implements the continuous-discrete Kalman Filter algorithm 2664 These notations for the State-Space model are assumed: 2665 d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) 2666 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 2667 2668 Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) 2669 2670 Current Features: 2671 ---------------------------------------- 2672 1) The function generaly do not modify the passed parameters. If 2673 it happens then it is an error. There are several exeprions: scalars 2674 can be modified into a matrix, in some rare cases shapes of 2675 the derivatives matrices may be changed, it is ignored for now. 2676 2677 2) Copies of F,L,Qc are created in memory because they may be used later 2678 in smoother. References to copies are kept in "AQcomp" object 2679 return parameter. 2680 2681 3) Function support "multiple time series mode" which means that exactly 2682 the same State-Space model is used to filter several sets of measurements. 2683 In this case third dimension of Y should include these state-space measurements 2684 Log_likelihood and Grad_log_likelihood have the corresponding dimensions then. 2685 2686 4) Calculation of Grad_log_likelihood is not supported if matrices 2687 H, or R changes overf time (with index k). (later may be changed) 2688 2689 5) Measurement may include missing values. In this case update step is 2690 not done for this measurement. (later may be changed) 2691 2692 Input: 2693 ----------------- 2694 2695 F: (state_dim, state_dim) matrix 2696 F in the model. 2697 2698 L: (state_dim, noise_dim) matrix 2699 L in the model. 2700 2701 Qc: (noise_dim, noise_dim) matrix 2702 Q_c in the model. 2703 2704 p_H: scalar, matrix (measurement_dim, state_dim) , 3D array 2705 H_{k} in the model. If matrix then H_{k} = H - constant. 2706 If it is 3D array then H_{k} = p_Q[:,:, index[2,k]] 2707 2708 p_R: scalar, square symmetric matrix, 3D array 2709 R_{k} in the model. If matrix then R_{k} = R - constant. 2710 If it is 3D array then R_{k} = p_R[:,:, index[3,k]] 2711 2712 P_inf: (state_dim, state_dim) matrix 2713 State varince matrix on infinity. 2714 2715 X: 1D array 2716 Time points of measurements. Needed for converting continuos 2717 problem to the discrete one. 2718 2719 Y: matrix or vector or 3D array 2720 Data. If Y is matrix then samples are along 0-th dimension and 2721 features along the 1-st. If 3D array then third dimension 2722 correspond to "multiple time series mode". 2723 2724 index: vector 2725 Which indices (on 3-rd dimension) from arrays p_H, p_R to use 2726 on every time step. If this parameter is None then it is assumed 2727 that p_H, p_R do not change over time and indices are not needed. 2728 index[0,:] - correspond to H, index[1,:] - correspond to R 2729 If index.shape[0] == 1, it is assumed that indides for all matrices 2730 are the same. 2731 2732 m_init: vector or matrix 2733 Initial distribution mean. If None it is assumed to be zero. 2734 For "multiple time series mode" it is matrix, second dimension of 2735 which correspond to different time series. In regular case ("one 2736 time series mode") it is a vector. 2737 2738 P_init: square symmetric matrix or scalar 2739 Initial covariance of the states. If the parameter is scalar 2740 then it is assumed that initial covariance matrix is unit matrix 2741 multiplied by this scalar. If None the unit matrix is used instead. 2742 "multiple time series mode" does not affect it, since it does not 2743 affect anything related to state variaces. 2744 2745 p_kalman_filter_type: string, one of ('regular', 'svd') 2746 Which Kalman Filter is used. Regular or SVD. SVD is more numerically 2747 stable, in particular, Covariace matrices are guarantied to be 2748 positive semi-definite. However, 'svd' works slower, especially for 2749 small data due to SVD call overhead. 2750 2751 calc_log_likelihood: boolean 2752 Whether to calculate marginal likelihood of the state-space model. 2753 2754 calc_grad_log_likelihood: boolean 2755 Whether to calculate gradient of the marginal likelihood 2756 of the state-space model. If true then "grad_calc_params" parameter must 2757 provide the extra parameters for gradient calculation. 2758 2759 grad_params_no: int 2760 If previous parameter is true, then this parameters gives the 2761 total number of parameters in the gradient. 2762 2763 grad_calc_params: dictionary 2764 Dictionary with derivatives of model matrices with respect 2765 to parameters "dF", "dL", "dQc", "dH", "dR", "dm_init", "dP_init". 2766 They can be None, in this case zero matrices (no dependence on parameters) 2767 is assumed. If there is only one parameter then third dimension is 2768 automatically added. 2769 2770 Output: 2771 -------------- 2772 2773 M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 2774 Filter estimates of the state means. In the extra step the initial 2775 value is included. In the "multiple time series mode" third dimension 2776 correspond to different timeseries. 2777 2778 P: (no_steps+1, state_dim, state_dim) 3D array 2779 Filter estimates of the state covariances. In the extra step the initial 2780 value is included. 2781 2782 log_likelihood: double or (1, time_series_no) 3D array. 2783 2784 If the parameter calc_log_likelihood was set to true, return 2785 logarithm of marginal likelihood of the state-space model. If 2786 the parameter was false, return None. In the "multiple time series mode" it is a vector 2787 providing log_likelihood for each time series. 2788 2789 grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix 2790 If calc_grad_log_likelihood is true, return gradient of log likelihood 2791 with respect to parameters. It returns it column wise, so in 2792 "multiple time series mode" gradients for each time series is in the 2793 corresponding column. 2794 2795 AQcomp: object 2796 Contains some pre-computed values for converting continuos model into 2797 discrete one. It can be used later in the smoothing pahse. 2798 """ 2799 2800 p_H = np.atleast_1d(p_H) 2801 p_R = np.atleast_1d(p_R) 2802 2803 X.shape, old_X_shape = cls._reshape_input_data(X.shape, 2) # represent as column 2804 if (X.shape[1] != 1): 2805 raise ValueError("Only one dimensional X data is supported.") 2806 2807 Y.shape, old_Y_shape = cls._reshape_input_data(Y.shape) # represent as column 2808 2809 state_dim = F.shape[0] 2810 measurement_dim = Y.shape[1] 2811 time_series_no = Y.shape[2] # multiple time series mode 2812 2813 if ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\ 2814 ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)): 2815 model_matrices_chage_with_time = True 2816 else: 2817 model_matrices_chage_with_time = False 2818 2819 # Check index 2820 old_index_shape = None 2821 if index is None: 2822 if (len(p_H.shape) == 3) or (len(p_R.shape) == 3): 2823 raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)") 2824 else: # matrices do not change in time, so form dummy zero indices. 2825 index = np.zeros((1,Y.shape[0])) 2826 else: 2827 if len(index.shape) == 1: 2828 index.shape = (1,index.shape[0]) 2829 old_index_shape = (index.shape[0],) 2830 2831 if (index.shape[1] != Y.shape[0]): 2832 raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}") 2833 2834 if (index.shape[0] == 1): 2835 H_time_var_index = 0; R_time_var_index = 0 2836 elif (index.shape[0] == 4): 2837 H_time_var_index = 0; R_time_var_index = 1 2838 else: 2839 raise ValueError("First Dimension of index must be either 1 or 2.") 2840 2841 (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H') 2842 (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R') 2843 2844 if m_init is None: 2845 m_init = np.zeros((state_dim, time_series_no)) 2846 else: 2847 m_init = np.atleast_2d(m_init).T 2848 2849 if P_init is None: 2850 P_init = P_inf.copy() 2851 2852 if p_kalman_filter_type not in ('regular', 'svd'): 2853 raise ValueError("Kalman filer type neither 'regular nor 'svd'.") 2854 2855 # Functions to pass to the kalman_filter algorithm: 2856 # Parameters: 2857 # k - number of Kalman filter iteration 2858 # m - vector for calculating matrices. Required for EKF. Not used here. 2859 # f_hl = lambda k,m,H: np.dot(H, m) 2860 # f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]] 2861 #f_R = lambda k: p_R[:,:, index[R_time_var_index, k]] 2862 #o_R = R_handling( p_R, index, R_time_var_index, 20) 2863 2864 if calc_grad_log_likelihood: 2865 2866 dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA') 2867 dQc = cls._check_grad_state_matrices(grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ') 2868 dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA') 2869 2870 dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH') 2871 dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR') 2872 2873 dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter 2874 if dm_init is None: 2875 # multiple time series mode. Keep grad_params always as a last dimension 2876 dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) ) 2877 2878 dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter 2879 if dP_init is None: 2880 dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function 2881 2882 else: 2883 dP_inf = None 2884 dF = None 2885 dQc = None 2886 dH = None 2887 dR = None 2888 dm_init = None 2889 dP_init = None 2890 2891 measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR) 2892 #import pdb; pdb.set_trace() 2893 2894 dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood, 2895 grad_params_no=grad_params_no, 2896 P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) 2897 2898 if print_verbose: 2899 print("General: run Continuos-Discrete Kalman Filter") 2900 # Also for dH, dR and probably for all derivatives 2901 (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim, 2902 dynamic_callables, measurement_callables, 2903 X, Y, m_init=m_init, P_init=P_init, 2904 p_kalman_filter_type=p_kalman_filter_type, 2905 calc_log_likelihood=calc_log_likelihood, 2906 calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no, 2907 dm_init=dm_init, dP_init=dP_init) 2908 2909 if old_index_shape is not None: 2910 index.shape = old_index_shape 2911 2912 if old_X_shape is not None: 2913 X.shape = old_X_shape 2914 2915 if old_Y_shape is not None: 2916 Y.shape = old_Y_shape 2917 2918 if old_H_shape is not None: 2919 p_H.shape = old_H_shape 2920 2921 if old_R_shape is not None: 2922 p_R.shape = old_R_shape 2923 2924 return (M, P, log_likelihood, grad_log_likelihood, AQcomp) 2925 2926 @classmethod 2927 def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y, 2928 m_init, P_init, 2929 p_kalman_filter_type='regular', 2930 calc_log_likelihood=False, 2931 calc_grad_log_likelihood=False, grad_params_no=None, 2932 dm_init=None, dP_init=None): 2933 """ 2934 General filtering algorithm for inference in the continuos-discrete 2935 state-space model: 2936 2937 d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) 2938 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 2939 2940 Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k)) 2941 2942 Current Features: 2943 ---------------------------------------- 2944 2945 1) Function support "multiple time series mode" which means that exactly 2946 the same State-Space model is used to filter several sets of measurements. 2947 In this case third dimension of Y should include these state-space measurements 2948 Log_likelihood and Grad_log_likelihood have the corresponding dimensions then. 2949 2950 2) Measurement may include missing values. In this case update step is 2951 not done for this measurement. (later may be changed) 2952 2953 Input: 2954 ----------------- 2955 state_dim: int 2956 Demensionality of the states 2957 2958 F: (state_dim, state_dim) matrix 2959 F in the model. 2960 2961 L: (state_dim, noise_dim) matrix 2962 L in the model. 2963 2964 Qc: (noise_dim, noise_dim) matrix 2965 Q_c in the model. 2966 2967 P_inf: (state_dim, state_dim) matrix 2968 State varince matrix on infinity. 2969 2970 p_h: function (k, x_{k}, H_{k}). Measurement function. 2971 k (iteration number), 2972 x_{k} 2973 H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}. 2974 2975 f_H: function (k, m, P) return Jacobian of dynamic function, it is 2976 passed into p_h. 2977 k (iteration number), 2978 m: point where Jacobian is evaluated, 2979 P: parameter for Jacobian, usually covariance matrix. 2980 2981 p_f_R: function (k). Returns noise matrix of measurement equation 2982 on iteration k. 2983 k (iteration number). 2984 2985 m_init: vector or matrix 2986 Initial distribution mean. For "multiple time series mode" 2987 it is matrix, second dimension of which correspond to different 2988 time series. In regular case ("one time series mode") it is a 2989 vector. 2990 2991 P_init: matrix or scalar 2992 Initial covariance of the states. Must be not None 2993 "multiple time series mode" does not affect it, since it does not 2994 affect anything related to state variaces. 2995 2996 p_kalman_filter_type: string, one of ('regular', 'svd') 2997 Which Kalman Filter is used. Regular or SVD. SVD is more numerically 2998 stable, in particular, Covariace matrices are guarantied to be 2999 positive semi-definite. However, 'svd' works slower, especially for 3000 small data due to SVD call overhead. 3001 3002 calc_log_likelihood: boolean 3003 Whether to calculate marginal likelihood of the state-space model. 3004 3005 calc_grad_log_likelihood: boolean 3006 Whether to calculate gradient of the marginal likelihood 3007 of the state-space model. If true then the next parameter must 3008 provide the extra parameters for gradient calculation. 3009 3010 grad_params_no: int 3011 Number of gradient parameters 3012 3013 dP_inf, dF, dQc, dH, dR, dm_init, dP_init: matrices or 3D arrays. 3014 Necessary parameters for derivatives calculation. 3015 3016 """ 3017 3018 #import pdb; pdb.set_trace() 3019 steps_no = Y.shape[0] # number of steps in the Kalman Filter 3020 time_series_no = Y.shape[2] # multiple time series mode 3021 3022 # Allocate space for results 3023 # Mean estimations. Initial values will be included 3024 M = np.empty(((steps_no+1),state_dim,time_series_no)) 3025 M[0,:,:] = m_init # Initialize mean values 3026 # Variance estimations. Initial values will be included 3027 P = np.empty(((steps_no+1),state_dim,state_dim)) 3028 P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful 3029 P[0,:,:] = P_init # Initialize initial covariance matrix 3030 3031 #import pdb;pdb.set_trace() 3032 if p_kalman_filter_type == 'svd': 3033 (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True, 3034 overwrite_a=False,check_finite=True) 3035 S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance 3036 P_upd = (P_init, S,U) 3037 #log_likelihood = 0 3038 #grad_log_likelihood = np.zeros((grad_params_no,1)) 3039 log_likelihood = 0 if calc_log_likelihood else None 3040 grad_log_likelihood = 0 if calc_grad_log_likelihood else None 3041 3042 #setting initial values for derivatives update 3043 dm_upd = dm_init 3044 dP_upd = dP_init 3045 # Main loop of the Kalman filter 3046 for k in range(0,steps_no): 3047 # In this loop index for new estimations is (k+1), old - (k) 3048 # This happened because initial values are stored at 0-th index. 3049 #import pdb; pdb.set_trace() 3050 3051 prev_mean = M[k,:,:] # mean from the previous step 3052 3053 if p_kalman_filter_type == 'svd': 3054 m_pred, P_pred, dm_pred, dP_pred = \ 3055 cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables, 3056 calc_grad_log_likelihood=calc_grad_log_likelihood, 3057 p_dm = dm_upd, p_dP = dP_upd) 3058 else: 3059 m_pred, P_pred, dm_pred, dP_pred = \ 3060 cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables, 3061 calc_grad_log_likelihood=calc_grad_log_likelihood, 3062 p_dm = dm_upd, p_dP = dP_upd ) 3063 3064 #import pdb; pdb.set_trace() 3065 k_measurment = Y[k,:,:] 3066 3067 if (np.any(np.isnan(k_measurment)) == False): 3068 3069 if p_kalman_filter_type == 'svd': 3070 m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 3071 cls._kalman_update_step_SVD(k, m_pred , P_pred, p_measurement_callables, 3072 k_measurment, calc_log_likelihood=calc_log_likelihood, 3073 calc_grad_log_likelihood=calc_grad_log_likelihood, 3074 p_dm = dm_pred, p_dP = dP_pred ) 3075 3076 3077 # m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 3078 # cls._kalman_update_step(k, m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment, 3079 # calc_log_likelihood=calc_log_likelihood, 3080 # calc_grad_log_likelihood=calc_grad_log_likelihood, 3081 # p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR)) 3082 # 3083 # (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True, 3084 # overwrite_a=False,check_finite=True) 3085 # P_upd = (P_upd, S,U) 3086 else: 3087 m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \ 3088 cls._kalman_update_step(k, m_pred , P_pred, p_measurement_callables, k_measurment, 3089 calc_log_likelihood=calc_log_likelihood, 3090 calc_grad_log_likelihood=calc_grad_log_likelihood, 3091 p_dm = dm_pred, p_dP = dP_pred ) 3092 else: 3093 if k_measurment.shape != (1,1): 3094 raise ValueError("Nan measurements are currently not supported for \ 3095 multidimensional output and multiple tiem series.") 3096 else: 3097 m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred 3098 log_likelihood_update = 0.0; 3099 d_log_likelihood_update = 0.0; 3100 3101 3102 if calc_log_likelihood: 3103 log_likelihood += log_likelihood_update 3104 3105 if calc_grad_log_likelihood: 3106 grad_log_likelihood += d_log_likelihood_update 3107 3108 M[k+1,:,:] = m_upd # separate mean value for each time series 3109 3110 if p_kalman_filter_type == 'svd': 3111 P[k+1,:,:] = P_upd[0] 3112 else: 3113 P[k+1,:,:] = P_upd 3114 #print("kf it: %i" % k) 3115 # !!!Print statistics! Print sizes of matrices 3116 # !!!Print statistics! Print iteration time base on another boolean variable 3117 return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False)) 3118 3119 @classmethod 3120 def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars, 3121 p_dynamic_callables=None, X=None, F=None,L=None,Qc=None): 3122 """ 3123 3124 Continuos-discrete Rauch–Tung–Striebel(RTS) smoother. 3125 3126 This function implements Rauch–Tung–Striebel(RTS) smoother algorithm 3127 based on the results of _cont_discr_kalman_filter_raw. 3128 3129 Model: 3130 d/dt x(t) = F * x(t) + L * w(t); w(t) ~ N(0, Qc) 3131 y_{k} = H_{k} * x_{k} + r_{k}; r_{k-1} ~ N(0, R_{k}) 3132 3133 Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k)) 3134 3135 Input: 3136 -------------- 3137 3138 filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array 3139 Results of the Kalman Filter means estimation. 3140 3141 filter_covars: (no_steps+1, state_dim, state_dim) 3D array 3142 Results of the Kalman Filter covariance estimation. 3143 3144 Dynamic_callables: object or None 3145 Object form the filter phase which provides functions for computing 3146 A, Q, dA, dQ fro discrete model from the continuos model. 3147 3148 X, F, L, Qc: matrices 3149 If AQcomp is None, these matrices are used to create this object from scratch. 3150 3151 Output: 3152 ------------- 3153 3154 M: (no_steps+1,state_dim) matrix 3155 Smoothed estimates of the state means 3156 3157 P: (no_steps+1,state_dim, state_dim) 3D array 3158 Smoothed estimates of the state covariances 3159 """ 3160 3161 f_a = lambda k,m,A: np.dot(A, m) # state dynamic model 3162 if p_dynamic_callables is None: # make this object from scratch 3163 p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False, 3164 grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None) 3165 3166 no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance) 3167 3168 M = np.empty(filter_means.shape) # smoothed means 3169 P = np.empty(filter_covars.shape) # smoothed covars 3170 3171 if print_verbose: 3172 print("General: run Continuos-Discrete Kalman Smoother") 3173 3174 M[-1,:,:] = filter_means[-1,:,:] 3175 P[-1,:,:] = filter_covars[-1,:,:] 3176 for k in range(no_steps-1,-1,-1): 3177 3178 prev_mean = filter_means[k,:] # mean from the previous step 3179 m_pred, P_pred, tmp1, tmp2 = \ 3180 cls._kalman_prediction_step(k, prev_mean, 3181 filter_covars[k,:,:], p_dynamic_callables, 3182 calc_grad_log_likelihood=False) 3183 p_m = filter_means[k,:] 3184 p_m_prev_step = M[(k+1),:] 3185 3186 m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k, 3187 p_m ,filter_covars[k,:,:], 3188 m_pred, P_pred, p_m_prev_step ,P[(k+1),:,:], p_dynamic_callables) 3189 3190 M[k,:,:] = m_upd 3191 P[k,:,:] = P_upd 3192 # Return values 3193 return (M, P) 3194 3195 @classmethod 3196 def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False, 3197 grad_params_no=None, 3198 P_inf=None, dP_inf=None, dF = None, dQc=None, 3199 dt0=None): 3200 """ 3201 Function return the object which is used in Kalman filter and/or 3202 smoother to obtain matrices A, Q and their derivatives for discrete model 3203 from the continuous model. 3204 3205 There are 2 objects AQcompute_once and AQcompute_batch and the function 3206 returs the appropriate one based on the number of different time steps. 3207 3208 Input: 3209 ---------------------- 3210 X, F, L, Qc: matrices 3211 Continuous model matrices 3212 3213 f_a: function 3214 Dynamic Function is attached to the Dynamic_Model_Callables class 3215 compute_derivatives: boolean 3216 Whether to compute derivatives 3217 3218 grad_params_no: int 3219 Number of parameters in the gradient 3220 3221 P_inf, dP_inf, dF, dQ: matrices and 3D objects 3222 Data necessary to compute derivatives. 3223 3224 Output: 3225 -------------------------- 3226 AQcomp: object 3227 Its methods return matrices (and optionally derivatives) for the 3228 discrete state-space model. 3229 3230 """ 3231 3232 unique_round_decimals = 10 3233 threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time 3234 dt = np.empty((X.shape[0],)) 3235 dt[1:] = np.diff(X[:,0],axis=0) 3236 if dt0 is None: 3237 dt[0] = 0#dt[1] 3238 else: 3239 if isinstance(dt0,str): 3240 dt = dt[1:] 3241 else: 3242 dt[0] = dt0 3243 3244 unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals)) 3245 number_unique_indices = len(unique_indices) 3246 3247 #import pdb; pdb.set_trace() 3248 if use_cython: 3249 class AQcompute_batch(state_space_cython.AQcompute_batch_Cython): 3250 def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None): 3251 As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F, 3252 L,Qc,dt,compute_derivatives, 3253 grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) 3254 3255 super(AQcompute_batch,self).__init__(As, Qs, reconstruct_indices, dAs,dQs) 3256 else: 3257 AQcompute_batch = cls.AQcompute_batch_Python 3258 3259 if number_unique_indices > threshold_number_of_unique_time_steps: 3260 AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives, 3261 grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) 3262 if print_verbose: 3263 print("CDO: Continue-to-discrete INSTANTANEOUS object is created.") 3264 print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) 3265 3266 else: 3267 AQcomp = AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives, 3268 grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc) 3269 if print_verbose: 3270 print("CDO: Continue-to-discrete BATCH object is created.") 3271 print("CDO: Number of different time steps: %i" % (number_unique_indices,) ) 3272 print("CDO: Total size if its data: %i" % (AQcomp.total_size_of_data,) ) 3273 3274 return AQcomp 3275 3276 @staticmethod 3277 def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False, 3278 grad_params_no=None, P_inf=None, 3279 dP_inf=None, dF = None, dQc=None): 3280 """ 3281 Linear Time-Invariant Stochastic Differential Equation (LTI SDE): 3282 3283 dx(t) = F x(t) dt + L d \beta ,where 3284 3285 x(t): (vector) stochastic process 3286 \beta: (vector) Brownian motion process 3287 F, L: (time invariant) matrices of corresponding dimensions 3288 Qc: covariance of noise. 3289 3290 This function rewrites it into the corresponding state-space form: 3291 3292 x_{k} = A_{k} * x_{k-1} + q_{k-1}; q_{k-1} ~ N(0, Q_{k-1}) 3293 3294 TODO: this function can be redone to "preprocess dataset", when 3295 close time points are handeled properly (with rounding parameter) and 3296 values are averaged accordingly. 3297 3298 Input: 3299 -------------- 3300 F,L: LTI SDE matrices of corresponding dimensions 3301 3302 Qc: matrix (n,n) 3303 Covarince between different dimensions of noise \beta. 3304 n is the dimensionality of the noise. 3305 3306 dt: double or iterable 3307 Time difference used on this iteration. 3308 If dt is iterable, then A and Q_noise are computed for every 3309 unique dt 3310 3311 compute_derivatives: boolean 3312 Whether derivatives of A and Q are required. 3313 3314 grad_params_no: int 3315 Number of gradient parameters 3316 3317 P_inf: (state_dim. state_dim) matrix 3318 3319 dP_inf 3320 3321 dF: 3D array 3322 Derivatives of F 3323 3324 dQc: 3D array 3325 Derivatives of Qc 3326 3327 dR: 3D array 3328 Derivatives of R 3329 3330 Output: 3331 -------------- 3332 3333 A: matrix 3334 A_{k}. Because we have LTI SDE only dt can affect on matrix 3335 difference for different k. 3336 3337 Q_noise: matrix 3338 Covariance matrix of (vector) q_{k-1}. Only dt can affect the 3339 matrix difference for different k. 3340 3341 reconstruct_index: array 3342 If dt was iterable return three dimensinal arrays A and Q_noise. 3343 Third dimension of these arrays correspond to unique dt's. 3344 This reconstruct_index contain indices of the original dt's 3345 in the uninue dt sequence. A[:,:, reconstruct_index[5]] 3346 is matrix A of 6-th(indices start from zero) dt in the original 3347 sequence. 3348 dA: 3D array 3349 Derivatives of A 3350 3351 dQ: 3D array 3352 Derivatives of Q 3353 """ 3354 # Dimensionality 3355 n = F.shape[0] 3356 3357 if not isinstance(dt, collections.Iterable): # not iterable, scalar 3358 #import pdb; pdb.set_trace() 3359 # The dynamical model 3360 A = matrix_exponent(F*dt) 3361 3362 # The covariance matrix Q by matrix fraction decomposition -> 3363 Phi = np.zeros((2*n,2*n)) 3364 Phi[:n,:n] = F 3365 Phi[:n,n:] = L.dot(Qc).dot(L.T) 3366 Phi[n:,n:] = -F.T 3367 AB = matrix_exponent(Phi*dt) 3368 AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n)))) 3369 3370 Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T) 3371 Q_noise_2 = P_inf - A.dot(P_inf).dot(A.T) 3372 # The covariance matrix Q by matrix fraction decomposition <- 3373 3374 if compute_derivatives: 3375 dA = np.zeros([n, n, grad_params_no]) 3376 dQ = np.zeros([n, n, grad_params_no]) 3377 3378 #AA = np.zeros([2*n, 2*n, nparam]) 3379 FF = np.zeros([2*n, 2*n]) 3380 AA = np.zeros([2*n, 2*n, grad_params_no]) 3381 3382 for p in range(0, grad_params_no): 3383 3384 FF[:n,:n] = F 3385 FF[n:,:n] = dF[:,:,p] 3386 FF[n:,n:] = F 3387 3388 # Solve the matrix exponential 3389 AA[:,:,p] = matrix_exponent(FF*dt) 3390 3391 # Solve the differential equation 3392 #foo = AA[:,:,p].dot(np.vstack([m, dm[:,p]])) 3393 #mm = foo[:n,:] 3394 #dm[:,p] = foo[n:,:] 3395 3396 # The discrete-time dynamical model* 3397 if p==0: 3398 A = AA[:n,:n,p] 3399 Q_noise_3 = P_inf - A.dot(P_inf).dot(A.T) 3400 Q_noise = Q_noise_3 3401 #PP = A.dot(P).dot(A.T) + Q_noise_2 3402 3403 # The derivatives of A and Q 3404 dA[:,:,p] = AA[n:,:n,p] 3405 tmp = dA[:,:,p].dot(P_inf).dot(A.T) 3406 dQ[:,:,p] = dP_inf[:,:,p] - tmp \ 3407 - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T 3408 3409 dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize 3410 else: 3411 dA = None 3412 dQ = None 3413 Q_noise = Q_noise_2 3414 # Innacuracies have been observed when Q_noise_1 was used. 3415 3416 #Q_noise = Q_noise_1 3417 3418 Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize 3419 return A, Q_noise,None, dA, dQ 3420 3421 else: # iterable, array 3422 3423 # Time discretizations (round to 14 decimals to avoid problems) 3424 dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8), 3425 return_index=True,return_inverse=True) 3426 del tmp 3427 # Allocate space for A and Q 3428 A = np.empty((n,n,dt_unique.shape[0])) 3429 Q_noise = np.empty((n,n,dt_unique.shape[0])) 3430 3431 if compute_derivatives: 3432 dA = np.empty((n,n,grad_params_no,dt_unique.shape[0])) 3433 dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0])) 3434 else: 3435 dA = None 3436 dQ = None 3437 # Call this function for each unique dt 3438 for j in range(0,dt_unique.shape[0]): 3439 A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j], 3440 compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc) 3441 if compute_derivatives: 3442 dA[:,:,:,j] = dA_t 3443 dQ[:,:,:,j] = dQ_t 3444 3445 # Return 3446 return A, Q_noise, reconstruct_index, dA, dQ 3447 3448def matrix_exponent(M): 3449 """ 3450 The function computes matrix exponent and handles some special cases 3451 """ 3452 3453 if (M.shape[0] == 1): # 1*1 matrix 3454 Mexp = np.array( ((np.exp(M[0,0]) ,),) ) 3455 3456 else: # matrix is larger 3457 method = None 3458 try: 3459 Mexp = linalg.expm(M) 3460 method = 1 3461 except (Exception,) as e: 3462 Mexp = linalg.expm3(M) 3463 method = 2 3464 finally: 3465 if np.any(np.isnan(Mexp)): 3466 if method == 2: 3467 raise ValueError("Matrix Exponent is not computed 1") 3468 else: 3469 Mexp = linalg.expm3(M) 3470 method = 2 3471 if np.any(np.isnan(Mexp)): 3472 raise ValueError("Matrix Exponent is not computed 2") 3473 3474 return Mexp 3475 3476def balance_matrix(A): 3477 """ 3478 Balance matrix, i.e. finds such similarity transformation of the original 3479 matrix A: A = T * bA * T^{-1}, where norms of columns of bA and of rows of bA 3480 are as close as possible. It is usually used as a preprocessing step in 3481 eigenvalue calculation routine. It is useful also for State-Space models. 3482 3483 See also: 3484 [1] Beresford N. Parlett and Christian Reinsch (1969). Balancing 3485 a matrix for calculation of eigenvalues and eigenvectors. 3486 Numerische Mathematik, 13(4): 293-304. 3487 3488 Input: 3489 ---------------------- 3490 A: square matrix 3491 Matrix to be balanced 3492 3493 Output: 3494 ---------------- 3495 bA: matrix 3496 Balanced matrix 3497 3498 T: matrix 3499 Left part of the similarity transformation 3500 3501 T_inv: matrix 3502 Right part of the similarity transformation. 3503 """ 3504 3505 if len(A.shape) != 2 or (A.shape[0] != A.shape[1]): 3506 raise ValueError('balance_matrix: Expecting square matrix') 3507 3508 N = A.shape[0] # matrix size 3509 3510 gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,)) 3511 bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False) 3512 if info < 0: 3513 raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info) 3514 # calculating the similarity transforamtion: 3515 def perm_matr(D, c1,c2): 3516 """ 3517 Function creates the permutation matrix which swaps columns c1 and c2. 3518 3519 Input: 3520 -------------- 3521 D: int 3522 Size of the permutation matrix 3523 c1: int 3524 Column 1. Numeration starts from 1...D 3525 c2: int 3526 Column 2. Numeration starts from 1...D 3527 """ 3528 i1 = c1-1; i2 = c2-1 # indices 3529 P = np.eye(D); 3530 P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements 3531 P[i1,i2] = 1.0; P[i2,i1] = 1.0 3532 3533 return P 3534 3535 P = np.eye(N) # permutation matrix 3536 if (hi != N-1): # there are row permutations 3537 for k in range(N-1,hi,-1): 3538 new_perm = perm_matr(N, k+1, pivscale[k]) 3539 P = np.dot(P,new_perm) 3540 if (lo != 0): 3541 for k in range(0,lo,1): 3542 new_perm = perm_matr(N, k+1, pivscale[k]) 3543 P = np.dot(P,new_perm) 3544 D = pivscale.copy() 3545 D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one. 3546 #D = np.diag(D) # make a diagonal matrix 3547 3548 T = np.dot(P,np.diag(D)) # similarity transformation in question 3549 T_inv = np.dot(np.diag(D**(-1)),P.T) 3550 3551 #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) ) 3552 return bA.copy(), T, T_inv 3553 3554def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None): 3555 """ 3556 Balances State-Space model for more numerical stability 3557 3558 This is based on the following: 3559 3560 dx/dt = F x + L w 3561 y = H x 3562 3563 Let T z = x, which gives 3564 3565 dz/dt = inv(T) F T z + inv(T) L w 3566 y = H T z 3567 """ 3568 3569 bF,T,T_inv = balance_matrix(F) 3570 3571 bL = np.dot( T_inv, L) 3572 bQc = Qc # not affected 3573 3574 bH = np.dot(H, T) 3575 3576 bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T)) 3577 3578 #import pdb; pdb.set_trace() 3579# LL,islower = linalg.cho_factor(Pinf) 3580# inds = np.triu_indices(Pinf.shape[0],k=1) 3581# LL[inds] = 0.0 3582# bLL = np.dot(T_inv, LL) 3583# bPinf = np.dot( bLL, bLL.T) 3584 3585 bP0 = np.dot(T_inv, np.dot(P0, T_inv.T)) 3586 3587 if dF is not None: 3588 bdF = np.zeros(dF.shape) 3589 for i in range(dF.shape[2]): 3590 bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T)) 3591 3592 else: 3593 bdF = None 3594 3595 if dPinf is not None: 3596 bdPinf = np.zeros(dPinf.shape) 3597 for i in range(dPinf.shape[2]): 3598 bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T)) 3599 3600# LL,islower = linalg.cho_factor(dPinf[:,:,i]) 3601# inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1) 3602# LL[inds] = 0.0 3603# bLL = np.dot(T_inv, LL) 3604# bdPinf[:,:,i] = np.dot( bLL, bLL.T) 3605 3606 3607 else: 3608 bdPinf = None 3609 3610 if dP0 is not None: 3611 bdP0 = np.zeros(dP0.shape) 3612 for i in range(dP0.shape[2]): 3613 bdP0[:,:,i] = np.dot( T_inv, np.dot( dP0[:,:,i], T_inv.T)) 3614 else: 3615 bdP0 = None 3616 3617 3618 bdQc = dQc # not affected 3619 3620 # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0) 3621 3622 return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0 3623