1# -*- coding: utf-8 -*-
2# Copyright (c) 2015, Alex Grigorevskiy
3# Licensed under the BSD 3-clause license (see LICENSE.txt)
4"""
5Main functionality for state-space inference.
6"""
7
8import collections  # for cheking whether a variable is iterable
9import types  # for cheking whether a variable is a function
10
11import numpy as np
12import scipy as sp
13import scipy.linalg as linalg
14
15import warnings
16
17try:
18    from . import state_space_setup
19    setup_available = True
20except ImportError as e:
21    setup_available = False
22
23
24print_verbose = False
25
26try:
27    import state_space_cython
28    cython_code_available = True
29    if print_verbose:
30        print("state_space: cython is available")
31except ImportError as e:
32    cython_code_available = False
33
34#cython_code_available = False
35# Use cython by default
36use_cython = False
37if setup_available:
38    use_cython = state_space_setup.use_cython
39
40if print_verbose:
41    if use_cython:
42        print("state_space: cython is used")
43    else:
44        print("state_space: cython is NOT used")
45
46# When debugging external module can set some value to this variable (e.g.)
47# 'model' and in this module this variable can be seen.s
48tmp_buffer = None
49
50
51class Dynamic_Callables_Python(object):
52
53    def f_a(self, k, m, A):
54        """
55        p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
56            k (iteration number), starts at 0
57            x_{k-1} State from the previous step
58            A_{k} Jacobian matrices of f_a. In the linear case it is exactly
59            A_{k}.
60        """
61
62        raise NotImplemented("f_a is not implemented!")
63
64    def Ak(self, k, m, P):  # returns state iteration matrix
65        """
66        function (k, m, P) return Jacobian of dynamic function, it is passed
67        into p_a.
68            k (iteration number), starts at 0
69            m: point where Jacobian is evaluated
70            P: parameter for Jacobian, usually covariance matrix.
71        """
72
73        raise NotImplemented("Ak is not implemented!")
74
75    def Qk(self, k):
76        """
77        function (k). Returns noise matrix of dynamic model on iteration k.
78                k (iteration number). starts at 0
79        """
80        raise NotImplemented("Qk is not implemented!")
81
82    def Q_srk(self, k):
83        """
84        function (k). Returns the square root of noise matrix of dynamic model
85        on iteration k.
86
87        k (iteration number). starts at 0
88
89        This function is implemented to use SVD prediction step.
90        """
91
92        raise NotImplemented("Q_srk is not implemented!")
93
94    def dAk(self, k):
95        """
96        function (k). Returns the derivative of A on iteration k.
97                k (iteration number). starts at 0
98        """
99        raise NotImplemented("dAk is not implemented!")
100
101    def dQk(self, k):
102        """
103        function (k). Returns the derivative of Q on iteration k.
104                k (iteration number). starts at 0
105        """
106        raise NotImplemented("dQk is not implemented!")
107
108    def reset(self, compute_derivatives=False):
109        """
110        Return the state of this object to the beginning of iteration
111        (to k eq. 0).
112        """
113
114        raise NotImplemented("reset is not implemented!")
115
116if use_cython:
117    Dynamic_Callables_Class = state_space_cython.Dynamic_Callables_Cython
118else:
119    Dynamic_Callables_Class = Dynamic_Callables_Python
120
121
122class Measurement_Callables_Python(object):
123    def f_h(self, k, m_pred, Hk):
124        """
125        function (k, x_{k}, H_{k}). Measurement function.
126            k (iteration number), starts at 0
127            x_{k} state
128            H_{k} Jacobian matrices of f_h. In the linear case it is exactly
129            H_{k}.
130        """
131
132        raise NotImplemented("f_a is not implemented!")
133
134    def Hk(self, k, m_pred, P_pred):  # returns state iteration matrix
135        """
136        function (k, m, P) return Jacobian of measurement function, it is
137            passed into p_h.
138            k (iteration number), starts at 0
139            m: point where Jacobian is evaluated
140            P: parameter for Jacobian, usually covariance matrix.
141        """
142
143        raise NotImplemented("Hk is not implemented!")
144
145    def Rk(self, k):
146        """
147        function (k). Returns noise matrix of measurement equation
148            on iteration k.
149            k (iteration number). starts at 0
150        """
151        raise NotImplemented("Rk is not implemented!")
152
153    def R_isrk(self, k):
154        """
155        function (k). Returns the square root of the noise matrix of
156            measurement equation on iteration k.
157            k (iteration number). starts at 0
158
159        This function is implemented to use SVD prediction step.
160        """
161
162        raise NotImplemented("Q_srk is not implemented!")
163
164    def dHk(self, k):
165        """
166        function (k). Returns the derivative of H on iteration k.
167                k (iteration number). starts at 0
168        """
169        raise NotImplemented("dAk is not implemented!")
170
171    def dRk(self, k):
172        """
173        function (k). Returns the derivative of R on iteration k.
174                k (iteration number). starts at 0
175        """
176        raise NotImplemented("dQk is not implemented!")
177
178    def reset(self, compute_derivatives=False):
179        """
180        Return the state of this object to the beginning of iteration
181        (to k eq. 0)
182        """
183
184        raise NotImplemented("reset is not implemented!")
185
186if use_cython:
187    Measurement_Callables_Class = state_space_cython.\
188        Measurement_Callables_Cython
189else:
190    Measurement_Callables_Class = Measurement_Callables_Python
191
192
193class R_handling_Python(Measurement_Callables_Class):
194    """
195    The calss handles noise matrix R.
196    """
197    def __init__(self, R, index, R_time_var_index, unique_R_number, dR=None):
198        """
199        Input:
200        ---------------
201        R - array with noise on various steps. The result of preprocessing
202            the noise input.
203
204        index - for each step of Kalman filter contains the corresponding index
205                in the array.
206
207        R_time_var_index - another index in the array R. Computed earlier and
208                            is passed here.
209
210        unique_R_number - number of unique noise matrices below which square
211                roots are cached and above which they are computed each time.
212
213        dR: 3D array[:, :, param_num]
214            derivative of R. Derivative is supported only when R do not change
215            over time
216
217        Output:
218        --------------
219        Object which has two necessary functions:
220            f_R(k)
221            inv_R_square_root(k)
222        """
223        self.R = R
224        self.index = np.asarray(index, np.int_)
225        self.R_time_var_index = int(R_time_var_index)
226        self.dR = dR
227
228        if (len(np.unique(index)) > unique_R_number):
229            self.svd_each_time = True
230        else:
231            self.svd_each_time = False
232
233        self.R_square_root = {}
234
235    def Rk(self, k):
236        return self.R[:, :, int(self.index[self.R_time_var_index, k])]
237
238    def dRk(self, k):
239        if self.dR is None:
240            raise ValueError("dR derivative is None")
241
242        return self.dR  # the same dirivative on each iteration
243
244    def R_isrk(self, k):
245        """
246        Function returns the inverse square root of R matrix on step k.
247        """
248        ind = int(self.index[self.R_time_var_index, k])
249        R = self.R[:, :, ind]
250
251        if (R.shape[0] == 1):  # 1-D case handle simplier. No storage
252            # of the result, just compute it each time.
253            inv_square_root = np.sqrt(1.0/R)
254        else:
255            if self.svd_each_time:
256
257                (U, S, Vh) = sp.linalg.svd(R, full_matrices=False,
258                                           compute_uv=True, overwrite_a=False,
259                                           check_finite=True)
260
261                inv_square_root = U * 1.0/np.sqrt(S)
262            else:
263                if ind in self.R_square_root:
264                    inv_square_root = self.R_square_root[ind]
265                else:
266                    (U, S, Vh) = sp.linalg.svd(R, full_matrices=False,
267                                               compute_uv=True,
268                                               overwrite_a=False,
269                                               check_finite=True)
270
271                    inv_square_root = U * 1.0/np.sqrt(S)
272
273                    self.R_square_root[ind] = inv_square_root
274
275        return inv_square_root
276
277if use_cython:
278    R_handling_Class = state_space_cython.R_handling_Cython
279else:
280    R_handling_Class = R_handling_Python
281
282
283class Std_Measurement_Callables_Python(R_handling_Class):
284
285    def __init__(self, H, H_time_var_index, R, index, R_time_var_index,
286                 unique_R_number, dH=None, dR=None):
287        super(Std_Measurement_Callables_Python,
288              self).__init__(R, index, R_time_var_index, unique_R_number, dR)
289
290        self.H = H
291        self.H_time_var_index = int(H_time_var_index)
292        self.dH = dH
293
294    def f_h(self, k, m, H):
295        """
296        function (k, x_{k}, H_{k}). Measurement function.
297            k (iteration number), starts at 0
298            x_{k} state
299            H_{k} Jacobian matrices of f_h. In the linear case it is exactly
300            H_{k}.
301        """
302
303        return np.dot(H, m)
304
305    def Hk(self, k, m_pred, P_pred):  # returns state iteration matrix
306        """
307        function (k, m, P) return Jacobian of measurement function, it is
308            passed into p_h.
309            k (iteration number), starts at 0
310            m: point where Jacobian is evaluated
311            P: parameter for Jacobian, usually covariance matrix.
312        """
313
314        return self.H[:, :, int(self.index[self.H_time_var_index, k])]
315
316    def dHk(self, k):
317        if self.dH is None:
318            raise ValueError("dH derivative is None")
319
320        return self.dH  # the same dirivative on each iteration
321
322if use_cython:
323    Std_Measurement_Callables_Class = state_space_cython.\
324                                        Std_Measurement_Callables_Cython
325else:
326    Std_Measurement_Callables_Class = Std_Measurement_Callables_Python
327
328
329class Q_handling_Python(Dynamic_Callables_Class):
330
331    def __init__(self, Q, index, Q_time_var_index, unique_Q_number, dQ=None):
332        """
333        Input:
334        ---------------
335        R - array with noise on various steps. The result of preprocessing
336            the noise input.
337
338        index - for each step of Kalman filter contains the corresponding index
339                in the array.
340
341        R_time_var_index - another index in the array R. Computed earlier and
342                            passed here.
343
344        unique_R_number - number of unique noise matrices below which square
345            roots are cached and above which they are computed each time.
346
347        dQ: 3D array[:, :, param_num]
348            derivative of Q. Derivative is supported only when Q do not
349            change over time
350
351        Output:
352        --------------
353        Object which has two necessary functions:
354            f_R(k)
355            inv_R_square_root(k)
356        """
357
358        self.Q = Q
359        self.index = np.asarray(index, np.int_)
360        self.Q_time_var_index = Q_time_var_index
361        self.dQ = dQ
362
363        if (len(np.unique(index)) > unique_Q_number):
364            self.svd_each_time = True
365        else:
366            self.svd_each_time = False
367
368        self.Q_square_root = {}
369
370    def Qk(self, k):
371        """
372        function (k). Returns noise matrix of dynamic model on iteration k.
373                k (iteration number). starts at 0
374        """
375        return self.Q[:, :, self.index[self.Q_time_var_index, k]]
376
377    def dQk(self, k):
378        if self.dQ is None:
379            raise ValueError("dQ derivative is None")
380
381        return self.dQ  # the same dirivative on each iteration
382
383    def Q_srk(self, k):
384        """
385        function (k). Returns the square root of noise matrix of dynamic model
386                      on iteration k.
387            k (iteration number). starts at 0
388
389        This function is implemented to use SVD prediction step.
390        """
391        ind = self.index[self.Q_time_var_index, k]
392        Q = self.Q[:, :, ind]
393
394        if (Q.shape[0] == 1):  # 1-D case handle simplier. No storage
395            # of the result, just compute it each time.
396            square_root = np.sqrt(Q)
397        else:
398            if self.svd_each_time:
399
400                (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False,
401                                           compute_uv=True,
402                                           overwrite_a=False,
403                                           check_finite=True)
404
405                square_root = U * np.sqrt(S)
406            else:
407
408                if ind in self.Q_square_root:
409                    square_root = self.Q_square_root[ind]
410                else:
411                    (U, S, Vh) = sp.linalg.svd(Q, full_matrices=False,
412                                               compute_uv=True,
413                                               overwrite_a=False,
414                                               check_finite=True)
415
416                    square_root = U * np.sqrt(S)
417
418                    self.Q_square_root[ind] = square_root
419
420        return square_root
421
422if use_cython:
423    Q_handling_Class = state_space_cython.Q_handling_Cython
424else:
425    Q_handling_Class = Q_handling_Python
426
427
428class Std_Dynamic_Callables_Python(Q_handling_Class):
429
430    def __init__(self, A, A_time_var_index, Q, index, Q_time_var_index,
431                 unique_Q_number, dA=None, dQ=None):
432        super(Std_Dynamic_Callables_Python,
433              self).__init__(Q, index, Q_time_var_index, unique_Q_number, dQ)
434
435        self.A = A
436        self.A_time_var_index = np.asarray(A_time_var_index, np.int_)
437        self.dA = dA
438
439    def f_a(self, k, m, A):
440        """
441            f_a: function (k, x_{k-1}, A_{k}). Dynamic function.
442            k (iteration number), starts at 0
443            x_{k-1} State from the previous step
444            A_{k} Jacobian matrices of f_a. In the linear case it is exactly
445            A_{k}.
446        """
447        return np.dot(A, m)
448
449    def Ak(self, k, m_pred, P_pred):  # returns state iteration matrix
450        """
451        function (k, m, P) return Jacobian of measurement function, it is
452            passed into p_h.
453            k (iteration number), starts at 0
454            m: point where Jacobian is evaluated
455            P: parameter for Jacobian, usually covariance matrix.
456        """
457
458        return self.A[:, :, self.index[self.A_time_var_index, k]]
459
460    def dAk(self, k):
461        if self.dA is None:
462            raise ValueError("dA derivative is None")
463
464        return self.dA  # the same dirivative on each iteration
465
466    def reset(self, compute_derivatives=False):
467        """
468        Return the state of this object to the beginning of iteration
469        (to k eq. 0)
470        """
471
472        return self
473
474if use_cython:
475    Std_Dynamic_Callables_Class = state_space_cython.\
476                                  Std_Dynamic_Callables_Cython
477else:
478    Std_Dynamic_Callables_Class = Std_Dynamic_Callables_Python
479
480
481class AddMethodToClass(object):
482
483    def __init__(self, func=None, tp='staticmethod'):
484        """
485        Input:
486        --------------
487        func: function to add
488        tp: string
489        Type of the method: normal, staticmethod, classmethod
490        """
491        if func is None:
492            raise ValueError("Function can not be None")
493
494        self.func = func
495        self.tp = tp
496
497    def __get__(self, obj, klass=None, *args, **kwargs):
498
499        if self.tp == 'staticmethod':
500            return self.func
501        elif self.tp == 'normal':
502            def newfunc(obj, *args, **kwargs):
503                return self.func
504
505        elif self.tp == 'classmethod':
506            def newfunc(klass, *args, **kwargs):
507                return self.func
508        return newfunc
509
510
511class DescreteStateSpaceMeta(type):
512    """
513    Substitute necessary methods from cython.
514    """
515
516    def __new__(typeclass, name, bases, attributes):
517        """
518        After thos method the class object is created
519        """
520
521        if use_cython:
522            if '_kalman_prediction_step_SVD' in attributes:
523                attributes['_kalman_prediction_step_SVD'] =\
524                                    AddMethodToClass(state_space_cython.
525                                        _kalman_prediction_step_SVD_Cython)
526
527            if '_kalman_update_step_SVD' in attributes:
528                attributes['_kalman_update_step_SVD'] =\
529                                    AddMethodToClass(state_space_cython.
530                                        _kalman_update_step_SVD_Cython)
531
532            if '_cont_discr_kalman_filter_raw' in attributes:
533                attributes['_cont_discr_kalman_filter_raw'] =\
534                                    AddMethodToClass(state_space_cython.
535                                        _cont_discr_kalman_filter_raw_Cython)
536
537        return super(DescreteStateSpaceMeta,
538                     typeclass).__new__(typeclass, name, bases, attributes)
539
540
541class DescreteStateSpace(object):
542    """
543    This class implents state-space inference for linear and non-linear
544    state-space models.
545    Linear models are:
546    x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
547    y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
548
549    Nonlinear:
550    x_{k} =  f_a(k, x_{k-1}, A_{k}) + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
551    y_{k} =  f_h(k, x_{k}, H_{k}) + r_{k};           r_{k-1} ~ N(0, R_{k})
552    Here f_a and f_h are some functions of k (iteration number), x_{k-1} or
553    x_{k} (state value on certain iteration), A_{k} and  H_{k} - Jacobian
554    matrices of f_a and f_h respectively. In the linear case they are exactly
555    A_{k} and  H_{k}.
556
557
558    Currently two nonlinear Gaussian filter algorithms are implemented:
559    Extended Kalman Filter (EKF), Statistically linearized Filter (SLF), which
560    implementations are very similar.
561
562    """
563    __metaclass__ = DescreteStateSpaceMeta
564
565    @staticmethod
566    def _reshape_input_data(shape, desired_dim=3):
567        """
568        Static function returns the column-wise shape for for an input shape.
569
570        Input:
571        --------------
572            shape: tuple
573                Shape of an input array, so that it is always a column.
574
575            desired_dim: int
576                desired shape of output. For Y data it should be 3
577                (sample_no, dimension, ts_no). For X data - 2 (sample_no, 1)
578        Output:
579        --------------
580            new_shape: tuple
581                New shape of the measurements array. Idea is that samples are
582                along dimension 0, sample dimension - dimension 1, different
583                time series - dimension 2.
584            old_shape: tuple or None
585                If the shape has been modified, return old shape, otherwise
586                None.
587        """
588
589        if (len(shape) > 3):
590            raise ValueError("""Input array is not supposed to be more
591                                than 3 dimensional.""")
592
593        if (len(shape) > desired_dim):
594            raise ValueError("Input array shape is more than desired shape.")
595        elif len(shape) == 1:
596            if (desired_dim == 3):
597                return ((shape[0], 1, 1), shape)  # last dimension is the
598                # time serime_series_no
599            elif (desired_dim == 2):
600                return ((shape[0], 1), shape)
601
602        elif len(shape) == 2:
603            if (desired_dim == 3):
604                return ((shape[1], 1, 1), shape) if (shape[0] == 1) else\
605                    ((shape[0], shape[1], 1), shape)  # convert to column
606                                                      # vector
607            elif (desired_dim == 2):
608                return ((shape[1], 1), shape) if (shape[0] == 1) else\
609                    ((shape[0], shape[1]), None)  # convert to column vector
610
611        else:  # len(shape) == 3
612            return (shape, None)  # do nothing
613
614    @classmethod
615    def kalman_filter(cls, p_A, p_Q, p_H, p_R, Y, index=None, m_init=None,
616                      P_init=None, p_kalman_filter_type='regular',
617                      calc_log_likelihood=False,
618                      calc_grad_log_likelihood=False, grad_params_no=None,
619                      grad_calc_params=None):
620        """
621        This function implements the basic Kalman Filter algorithm
622        These notations for the State-Space model are assumed:
623            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
624            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
625
626        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
627
628        Current Features:
629        ----------------------------------------
630        1) The function generaly do not modify the passed parameters. If
631        it happens then it is an error. There are several exeprions: scalars
632        can be modified into a matrix, in some rare cases shapes of
633        the derivatives matrices may be changed, it is ignored for now.
634
635        2) Copies of p_A, p_Q, index are created in memory to be used later
636        in smoother. References to copies are kept in "matrs_for_smoother"
637        return parameter.
638
639        3) Function support "multiple time series mode" which means that exactly
640        the same State-Space model is used to filter several sets of measurements.
641        In this case third dimension of Y should include these state-space measurements
642        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
643
644        4) Calculation of Grad_log_likelihood is not supported if matrices A,Q,
645        H, or R changes over time. (later may be changed)
646
647        5) Measurement may include missing values. In this case update step is
648        not done for this measurement. (later may be changed)
649
650        Input:
651        -----------------
652
653        p_A: scalar, square matrix, 3D array
654            A_{k} in the model. If matrix then A_{k} = A - constant.
655            If it is 3D array then A_{k} = p_A[:,:, index[0,k]]
656
657        p_Q: scalar, square symmetric matrix, 3D array
658            Q_{k-1} in the model. If matrix then Q_{k-1} = Q - constant.
659            If it is 3D array then Q_{k-1} = p_Q[:,:, index[1,k]]
660
661        p_H: scalar, matrix (measurement_dim, state_dim) , 3D array
662            H_{k} in the model. If matrix then H_{k} = H - constant.
663            If it is 3D array then H_{k} = p_Q[:,:, index[2,k]]
664
665        p_R: scalar, square symmetric matrix, 3D array
666            R_{k} in the model. If matrix then R_{k} = R - constant.
667            If it is 3D array then R_{k} = p_R[:,:, index[3,k]]
668
669        Y: matrix or vector or 3D array
670            Data. If Y is matrix then samples are along 0-th dimension and
671            features along the 1-st. If 3D array then third dimension
672            correspond to "multiple time series mode".
673
674        index: vector
675            Which indices (on 3-rd dimension) from arrays p_A, p_Q,p_H, p_R to use
676            on every time step. If this parameter is None then it is assumed
677            that p_A, p_Q, p_H, p_R do not change over time and indices are not needed.
678            index[0,:] - correspond to A, index[1,:] - correspond to Q
679            index[2,:] - correspond to H, index[3,:] - correspond to R.
680            If index.shape[0] == 1, it is assumed that indides for all matrices
681            are the same.
682
683        m_init: vector or matrix
684            Initial distribution mean. If None it is assumed to be zero.
685            For "multiple time series mode" it is matrix, second dimension of
686            which correspond to different time series. In regular case ("one
687            time series mode") it is a vector.
688
689        P_init: square symmetric matrix or scalar
690            Initial covariance of the states. If the parameter is scalar
691            then it is assumed that initial covariance matrix is unit matrix
692            multiplied by this scalar. If None the unit matrix is used instead.
693            "multiple time series mode" does not affect it, since it does not
694            affect anything related to state variaces.
695
696        calc_log_likelihood: boolean
697            Whether to calculate marginal likelihood of the state-space model.
698
699        calc_grad_log_likelihood: boolean
700            Whether to calculate gradient of the marginal likelihood
701            of the state-space model. If true then "grad_calc_params" parameter must
702            provide the extra parameters for gradient calculation.
703
704        grad_params_no: int
705            If previous parameter is true, then this parameters gives the
706            total number of parameters in the gradient.
707
708        grad_calc_params: dictionary
709            Dictionary with derivatives of model matrices with respect
710            to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init".
711            They can be None, in this case zero matrices (no dependence on parameters)
712            is assumed. If there is only one parameter then third dimension is
713            automatically added.
714
715        Output:
716        --------------
717
718        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
719            Filter estimates of the state means. In the extra step the initial
720            value is included. In the "multiple time series mode" third dimension
721            correspond to different timeseries.
722
723        P: (no_steps+1, state_dim, state_dim) 3D array
724            Filter estimates of the state covariances. In the extra step the initial
725            value is included.
726
727        log_likelihood: double or (1, time_series_no) 3D array.
728            If the parameter calc_log_likelihood was set to true, return
729            logarithm of marginal likelihood of the state-space model. If
730            the parameter was false, return None. In the "multiple time series mode" it is a vector
731            providing log_likelihood for each time series.
732
733        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
734            If calc_grad_log_likelihood is true, return gradient of log likelihood
735            with respect to parameters. It returns it column wise, so in
736            "multiple time series mode" gradients for each time series is in the
737            corresponding column.
738
739        matrs_for_smoother: dict
740            Dictionary with model functions for smoother. The intrinsic model
741            functions are computed in this functions and they are returned to
742            use in smoother for convenience. They are: 'p_a', 'p_f_A', 'p_f_Q'
743            The dictionary contains the same fields.
744        """
745
746        #import pdb; pdb.set_trace()
747
748        # Parameters checking ->
749        # index
750        p_A = np.atleast_1d(p_A)
751        p_Q = np.atleast_1d(p_Q)
752        p_H = np.atleast_1d(p_H)
753        p_R = np.atleast_1d(p_R)
754
755        # Reshape and check measurements:
756        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape)
757        measurement_dim = Y.shape[1]
758        time_series_no = Y.shape[2] # multiple time series mode
759
760        if ((len(p_A.shape) == 3) and (len(p_A.shape[2]) != 1)) or\
761            ((len(p_Q.shape) == 3) and (len(p_Q.shape[2]) != 1)) or\
762            ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
763            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
764            model_matrices_chage_with_time = True
765        else:
766            model_matrices_chage_with_time = False
767
768        # Check index
769        old_index_shape = None
770        if index is None:
771            if (len(p_A.shape) == 3) or (len(p_Q.shape) == 3) or\
772                (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
773                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
774            else: # matrices do not change in time, so form dummy zero indices.
775                index = np.zeros((1,Y.shape[0]))
776        else:
777            if len(index.shape) == 1:
778                index.shape = (1,index.shape[0])
779                old_index_shape = (index.shape[0],)
780
781            if (index.shape[1] != Y.shape[0]):
782                raise ValueError("Number of measurements must be equal the number of A_{k}, Q_{k}, H_{k}, R_{k}")
783
784        if (index.shape[0] == 1):
785            A_time_var_index = 0; Q_time_var_index = 0
786            H_time_var_index = 0; R_time_var_index = 0
787        elif (index.shape[0] == 4):
788            A_time_var_index = 0; Q_time_var_index = 1
789            H_time_var_index = 2; R_time_var_index = 3
790        else:
791            raise ValueError("First Dimension of index must be either 1 or 4.")
792
793        state_dim = p_A.shape[0]
794        # Check and make right shape for model matrices. On exit they all are 3 dimensional. Last dimension
795        # correspond to change in time.
796        (p_A, old_A_shape) = cls._check_SS_matrix(p_A, state_dim, measurement_dim, which='A')
797        (p_Q, old_Q_shape) = cls._check_SS_matrix(p_Q, state_dim, measurement_dim, which='Q')
798        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
799        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
800
801        # m_init
802        if m_init is None:
803            m_init = np.zeros((state_dim, time_series_no))
804        else:
805            m_init = np.atleast_2d(m_init).T
806
807        # P_init
808        if P_init is None:
809            P_init = np.eye(state_dim)
810        elif not isinstance(P_init, collections.Iterable): #scalar
811            P_init = P_init*np.eye(state_dim)
812
813        if p_kalman_filter_type not in ('regular', 'svd'):
814            raise ValueError("Kalman filer type neither 'regular nor 'svd'.")
815
816        # Functions to pass to the kalman_filter algorithm:
817        # Parameters:
818        # k - number of Kalman filter iteration
819        # m - vector for calculating matrices. Required for EKF. Not used here.
820
821        c_p_A = p_A.copy() # create a copy because this object is passed to the smoother
822        c_p_Q = p_Q.copy() # create a copy because this object is passed to the smoother
823        c_index = index.copy() # create a copy because this object is passed to the smoother
824
825        if calc_grad_log_likelihood:
826            if model_matrices_chage_with_time:
827                raise ValueError("When computing likelihood gradient A and Q can not change over time.")
828
829            dA = cls._check_grad_state_matrices(grad_calc_params.get('dA'), state_dim, grad_params_no, which = 'dA')
830            dQ = cls._check_grad_state_matrices(grad_calc_params.get('dQ'), state_dim, grad_params_no, which = 'dQ')
831            dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
832            dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
833
834            dm_init = grad_calc_params.get('dm_init')
835            if dm_init is None:
836                 # multiple time series mode. Keep grad_params always as a last dimension
837                dm_init = np.zeros((state_dim, time_series_no, grad_params_no))
838
839            dP_init = grad_calc_params.get('dP_init')
840            if dP_init is None:
841                dP_init = np.zeros((state_dim,state_dim,grad_params_no))
842        else:
843            dA = None
844            dQ = None
845            dH = None
846            dR = None
847            dm_init = None
848            dP_init = None
849
850        dynamic_callables = Std_Dynamic_Callables_Class(c_p_A, A_time_var_index, c_p_Q, c_index, Q_time_var_index, 20, dA, dQ)
851        measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR)
852
853        (M, P,log_likelihood, grad_log_likelihood, dynamic_callables) = \
854            cls._kalman_algorithm_raw(state_dim, dynamic_callables,
855                                    measurement_callables, Y, m_init,
856                                    P_init, p_kalman_filter_type = p_kalman_filter_type,
857                                    calc_log_likelihood=calc_log_likelihood,
858                                    calc_grad_log_likelihood=calc_grad_log_likelihood,
859                                    grad_params_no=grad_params_no,
860                                    dm_init=dm_init, dP_init=dP_init)
861
862        # restore shapes so that input parameters are unchenged
863        if old_index_shape is not None:
864            index.shape = old_index_shape
865
866        if old_Y_shape is not None:
867            Y.shape = old_Y_shape
868
869        if old_A_shape is not None:
870            p_A.shape = old_A_shape
871
872        if old_Q_shape is not None:
873            p_Q.shape = old_Q_shape
874
875        if old_H_shape is not None:
876            p_H.shape = old_H_shape
877
878        if old_R_shape is not None:
879            p_R.shape = old_R_shape
880        # Return values
881
882        return (M, P,log_likelihood, grad_log_likelihood, dynamic_callables)
883
884    @classmethod
885    def extended_kalman_filter(cls,p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init=None,
886                          P_init=None,calc_log_likelihood=False):
887
888        """
889        Extended Kalman Filter
890
891        Input:
892        -----------------
893
894        p_state_dim: integer
895
896        p_a: if None - the function from the linear model is assumed. No non-
897             linearity in the dynamic is assumed.
898
899             function (k, x_{k-1}, A_{k}). Dynamic function.
900             k: (iteration number),
901             x_{k-1}: (previous state)
902             x_{k}: Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
903
904        p_f_A: matrix - in this case function which returns this matrix is assumed.
905               Look at this parameter description in kalman_filter function.
906
907               function (k, m, P) return Jacobian of dynamic function, it is
908               passed into p_a.
909
910               k: (iteration number),
911               m: point where Jacobian is evaluated
912               P: parameter for Jacobian, usually covariance matrix.
913
914        p_f_Q: matrix. In this case function which returns this matrix is asumed.
915               Look at this parameter description in kalman_filter function.
916
917               function (k). Returns noise matrix of dynamic model on iteration k.
918               k: (iteration number).
919
920        p_h: if None - the function from the linear measurement model is assumed.
921             No nonlinearity in the measurement is assumed.
922
923             function (k, x_{k}, H_{k}). Measurement function.
924             k: (iteration number),
925             x_{k}: (current state)
926             H_{k}: Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
927
928        p_f_H: matrix - in this case function which returns this matrix is assumed.
929               function (k, m, P) return Jacobian of dynamic function, it is
930               passed into p_h.
931               k: (iteration number),
932               m: point where Jacobian is evaluated
933               P: parameter for Jacobian, usually covariance matrix.
934
935        p_f_R: matrix. In this case function which returns this matrix is asumed.
936               function (k). Returns noise matrix of measurement equation
937               on iteration k.
938               k: (iteration number).
939
940        Y: matrix or vector
941            Data. If Y is matrix then samples are along 0-th dimension and
942            features along the 1-st. May have missing values.
943
944        p_mean: vector
945            Initial distribution mean. If None it is assumed to be zero
946
947        P_init: square symmetric matrix or scalar
948            Initial covariance of the states. If the parameter is scalar
949            then it is assumed that initial covariance matrix is unit matrix
950            multiplied by this scalar. If None the unit matrix is used instead.
951
952        calc_log_likelihood: boolean
953            Whether to calculate marginal likelihood of the state-space model.
954        """
955
956        # Y
957        Y.shape, old_Y_shape  =  cls._reshape_input_data(Y.shape)
958
959         # m_init
960        if m_init is None:
961            m_init = np.zeros((p_state_dim,1))
962        else:
963            m_init = np.atleast_2d(m_init).T
964
965        # P_init
966        if P_init is None:
967            P_init = np.eye(p_state_dim)
968        elif not isinstance(P_init, collections.Iterable): #scalar
969            P_init = P_init*np.eye(p_state_dim)
970
971        if p_a is None:
972            p_a = lambda k,m,A: np.dot(A, m)
973
974        old_A_shape = None
975        if not isinstance(p_f_A, types.FunctionType): # not a function but array
976            p_f_A = np.atleast_1d(p_f_A)
977            (p_A, old_A_shape) = cls._check_A_matrix(p_f_A)
978
979            p_f_A = lambda k, m, P: p_A[:,:, 0] # make function
980        else:
981            if p_f_A(1, m_init, P_init).shape[0] != m_init.shape[0]:
982                raise ValueError("p_f_A function returns matrix of wrong size")
983
984        old_Q_shape = None
985        if not isinstance(p_f_Q, types.FunctionType): # not a function but array
986            p_f_Q = np.atleast_1d(p_f_Q)
987            (p_Q, old_Q_shape) = cls._check_Q_matrix(p_f_Q)
988
989            p_f_Q = lambda k: p_Q[:,:, 0] # make function
990        else:
991            if p_f_Q(1).shape[0] != m_init.shape[0]:
992                raise ValueError("p_f_Q function returns matrix of wrong size")
993
994        if p_h is None:
995            lambda k,m,H: np.dot(H, m)
996
997        old_H_shape = None
998        if not isinstance(p_f_H, types.FunctionType): # not a function but array
999            p_f_H = np.atleast_1d(p_f_H)
1000            (p_H, old_H_shape) = cls._check_H_matrix(p_f_H)
1001
1002            p_f_H = lambda k, m, P: p_H # make function
1003        else:
1004            if p_f_H(1, m_init, P_init).shape[0] != Y.shape[1]:
1005                raise ValueError("p_f_H function returns matrix of wrong size")
1006
1007        old_R_shape = None
1008        if not isinstance(p_f_R, types.FunctionType): # not a function but array
1009            p_f_R = np.atleast_1d(p_f_R)
1010            (p_R, old_R_shape) = cls._check_H_matrix(p_f_R)
1011
1012            p_f_R = lambda k: p_R # make function
1013        else:
1014            if p_f_R(1).shape[0] != m_init.shape[0]:
1015                raise ValueError("p_f_R function returns matrix of wrong size")
1016
1017#        class dynamic_callables_class(Dynamic_Model_Callables):
1018#
1019#            Ak =
1020#            Qk =
1021
1022
1023        class measurement_callables_class(R_handling_Class):
1024            def __init__(self,R, index, R_time_var_index, unique_R_number):
1025                super(measurement_callables_class,self).__init__(R, index, R_time_var_index, unique_R_number)
1026
1027            Hk = AddMethodToClass(f_H)
1028            f_h = AddMethodToClass(f_hl)
1029
1030
1031        (M, P,log_likelihood, grad_log_likelihood)  = cls._kalman_algorithm_raw(p_state_dim, p_a, p_f_A, p_f_Q, p_h, p_f_H, p_f_R, Y, m_init,
1032                          P_init, calc_log_likelihood,
1033                          calc_grad_log_likelihood=False, grad_calc_params=None)
1034
1035        if old_Y_shape is not None:
1036            Y.shape = old_Y_shape
1037
1038        if old_A_shape is not None:
1039            p_A.shape = old_A_shape
1040
1041        if old_Q_shape is not None:
1042            p_Q.shape = old_Q_shape
1043
1044        if old_H_shape is not None:
1045            p_H.shape = old_H_shape
1046
1047        if old_R_shape is not None:
1048            p_R.shape = old_R_shape
1049
1050        return (M, P)
1051
1052    @classmethod
1053    def _kalman_algorithm_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, Y, m_init,
1054                          P_init, p_kalman_filter_type='regular',
1055                          calc_log_likelihood=False,
1056                          calc_grad_log_likelihood=False, grad_params_no=None,
1057                          dm_init=None, dP_init=None):
1058        """
1059        General nonlinear filtering algorithm for inference in the state-space
1060        model:
1061
1062        x_{k} =  f_a(k, x_{k-1}, A_{k}) + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
1063        y_{k} =  f_h(k, x_{k}, H_{k}) + r_{k};           r_{k-1} ~ N(0, R_{k})
1064
1065        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
1066
1067        Current Features:
1068        ----------------------------------------
1069
1070        1) Function support "multiple time series mode" which means that exactly
1071        the same State-Space model is used to filter several sets of measurements.
1072        In this case third dimension of Y should include these state-space measurements
1073        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
1074
1075        2) Measurement may include missing values. In this case update step is
1076        not done for this measurement. (later may be changed)
1077
1078        Input:
1079        -----------------
1080        state_dim: int
1081            Demensionality of the states
1082
1083        p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
1084            k (iteration number),
1085            x_{k-1}
1086            A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
1087
1088        p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
1089            passed into p_a.
1090            k (iteration number),
1091            m: point where Jacobian is evaluated
1092            P: parameter for Jacobian, usually covariance matrix.
1093
1094        p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
1095            k (iteration number).
1096
1097        p_h: function (k, x_{k}, H_{k}). Measurement function.
1098            k (iteration number),
1099            x_{k}
1100            H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
1101
1102        p_f_H: function (k, m, P) return Jacobian of dynamic function, it is
1103            passed into p_h.
1104            k (iteration number),
1105            m: point where Jacobian is evaluated
1106            P: parameter for Jacobian, usually covariance matrix.
1107
1108        p_f_R: function (k). Returns noise matrix of measurement equation
1109            on iteration k.
1110            k (iteration number).
1111
1112        Y: matrix or vector or 3D array
1113            Data. If Y is matrix then samples are along 0-th dimension and
1114            features along the 1-st. If 3D array then third dimension
1115            correspond to "multiple time series mode".
1116
1117        m_init: vector or matrix
1118            Initial distribution mean. For "multiple time series mode"
1119            it is matrix, second dimension of which correspond to different
1120            time series. In regular case ("one time series mode") it is a
1121            vector.
1122
1123        P_init: matrix or scalar
1124            Initial covariance of the states. Must be not None
1125            "multiple time series mode" does not affect it, since it does not
1126            affect anything related to state variaces.
1127
1128        p_kalman_filter_type: string
1129
1130
1131        calc_log_likelihood: boolean
1132            Whether to calculate marginal likelihood of the state-space model.
1133
1134        calc_grad_log_likelihood: boolean
1135            Whether to calculate gradient of the marginal likelihood
1136            of the state-space model. If true then the next parameter must
1137            provide the extra parameters for gradient calculation.
1138
1139        grad_calc_params: dictionary
1140            Dictionary with derivatives of model matrices with respect
1141            to parameters "dA", "dQ", "dH", "dR", "dm_init", "dP_init".
1142
1143        Output:
1144        --------------
1145
1146        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
1147            Filter estimates of the state means. In the extra step the initial
1148            value is included. In the "multiple time series mode" third dimension
1149            correspond to different timeseries.
1150
1151        P: (no_steps+1, state_dim, state_dim) 3D array
1152            Filter estimates of the state covariances. In the extra step the initial
1153            value is included.
1154
1155        log_likelihood: double or (1, time_series_no) 3D array.
1156            If the parameter calc_log_likelihood was set to true, return
1157            logarithm of marginal likelihood of the state-space model. If
1158            the parameter was false, return None. In the "multiple time series mode" it is a vector
1159            providing log_likelihood for each time series.
1160
1161        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
1162            If calc_grad_log_likelihood is true, return gradient of log likelihood
1163            with respect to parameters. It returns it column wise, so in
1164            "multiple time series mode" gradients for each time series is in the
1165            corresponding column.
1166
1167        """
1168
1169        steps_no = Y.shape[0] # number of steps in the Kalman Filter
1170        time_series_no = Y.shape[2] # multiple time series mode
1171
1172        # Allocate space for results
1173        # Mean estimations. Initial values will be included
1174        M = np.empty(((steps_no+1),state_dim,time_series_no))
1175        M[0,:,:] = m_init # Initialize mean values
1176        # Variance estimations. Initial values will be included
1177        P = np.empty(((steps_no+1),state_dim,state_dim))
1178        P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
1179        P[0,:,:] = P_init # Initialize initial covariance matrix
1180
1181        if p_kalman_filter_type == 'svd':
1182            (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
1183                      overwrite_a=False,check_finite=True)
1184            S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
1185            P_upd = (P_init, S,U)
1186
1187        log_likelihood = 0 if calc_log_likelihood else None
1188        grad_log_likelihood = 0 if calc_grad_log_likelihood else None
1189
1190        #setting initial values for derivatives update
1191        dm_upd = dm_init
1192        dP_upd = dP_init
1193        # Main loop of the Kalman filter
1194        for k in range(0,steps_no):
1195            # In this loop index for new estimations is (k+1), old - (k)
1196            # This happened because initial values are stored at 0-th index.
1197
1198            prev_mean = M[k,:,:] # mean from the previous step
1199
1200            if p_kalman_filter_type == 'svd':
1201                m_pred, P_pred, dm_pred, dP_pred = \
1202                cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables,
1203                    calc_grad_log_likelihood=calc_grad_log_likelihood,
1204                    p_dm = dm_upd, p_dP = dP_upd)
1205            else:
1206                m_pred, P_pred, dm_pred, dP_pred = \
1207                cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables,
1208                    calc_grad_log_likelihood=calc_grad_log_likelihood,
1209                    p_dm = dm_upd, p_dP = dP_upd )
1210
1211            k_measurment = Y[k,:,:]
1212
1213            if (np.any(np.isnan(k_measurment)) == False):
1214                if p_kalman_filter_type == 'svd':
1215                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
1216                    cls._kalman_update_step_SVD(k,  m_pred , P_pred, p_measurement_callables,
1217                            k_measurment, calc_log_likelihood=calc_log_likelihood,
1218                            calc_grad_log_likelihood=calc_grad_log_likelihood,
1219                            p_dm = dm_pred, p_dP = dP_pred )
1220
1221
1222    #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
1223    #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
1224    #                        calc_log_likelihood=calc_log_likelihood,
1225    #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
1226    #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
1227    #
1228    #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
1229    #                      overwrite_a=False,check_finite=True)
1230    #                P_upd = (P_upd, S,U)
1231                else:
1232                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
1233                    cls._kalman_update_step(k,  m_pred , P_pred, p_measurement_callables, k_measurment,
1234                            calc_log_likelihood=calc_log_likelihood,
1235                            calc_grad_log_likelihood=calc_grad_log_likelihood,
1236                            p_dm = dm_pred, p_dP = dP_pred )
1237
1238            else:
1239#                if k_measurment.shape != (1,1):
1240#                    raise ValueError("Nan measurements are currently not supported for \
1241#                                     multidimensional output and multiple time series.")
1242#                else:
1243#                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
1244#                    log_likelihood_update = 0.0;
1245#                    d_log_likelihood_update = 0.0;
1246
1247                if not np.all(np.isnan(k_measurment)):
1248                    raise ValueError("""Nan measurements are currently not supported if
1249                                     they are intermixed with not NaN measurements""")
1250                else:
1251                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
1252                    if calc_log_likelihood:
1253                        log_likelihood_update = np.zeros((time_series_no,))
1254                    if calc_grad_log_likelihood:
1255                        d_log_likelihood_update = np.zeros((grad_params_no,time_series_no))
1256
1257
1258            if calc_log_likelihood:
1259                log_likelihood += log_likelihood_update
1260
1261            if calc_grad_log_likelihood:
1262                grad_log_likelihood += d_log_likelihood_update
1263
1264            M[k+1,:,:] = m_upd # separate mean value for each time series
1265
1266            if p_kalman_filter_type == 'svd':
1267                P[k+1,:,:] = P_upd[0]
1268            else:
1269                P[k+1,:,:] = P_upd
1270
1271        # !!!Print statistics! Print sizes of matrices
1272        # !!!Print statistics! Print iteration time base on another boolean variable
1273        return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))
1274
1275    @staticmethod
1276    def _kalman_prediction_step(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False,
1277                                p_dm = None, p_dP = None):
1278        """
1279        Desctrete prediction function
1280
1281        Input:
1282            k:int
1283                Iteration No. Starts at 0. Total number of iterations equal to the
1284                number of measurements.
1285
1286            p_m: matrix of size (state_dim, time_series_no)
1287                Mean value from the previous step. For "multiple time series mode"
1288                it is matrix, second dimension of which correspond to different
1289                time series.
1290
1291            p_P:
1292                Covariance matrix from the previous step.
1293
1294            p_dyn_model_callable: class
1295
1296
1297            calc_grad_log_likelihood: boolean
1298                Whether to calculate gradient of the marginal likelihood
1299                of the state-space model. If true then the next parameter must
1300                provide the extra parameters for gradient calculation.
1301
1302            p_dm: 3D array (state_dim, time_series_no, parameters_no)
1303                Mean derivatives from the previous step. For "multiple time series mode"
1304                it is 3D array, second dimension of which correspond to different
1305                time series.
1306
1307            p_dP: 3D array (state_dim, state_dim, parameters_no)
1308                Mean derivatives from the previous step
1309
1310        Output:
1311        ----------------------------
1312        m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
1313            Results of the prediction steps.
1314
1315        """
1316
1317        # index correspond to values from previous iteration.
1318        A = p_dyn_model_callable.Ak(k,p_m,p_P) # state transition matrix (or Jacobian)
1319        Q = p_dyn_model_callable.Qk(k) # state noise matrix
1320
1321        # Prediction step ->
1322        m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean
1323        P_pred = A.dot(p_P).dot(A.T) + Q # predicted variance
1324        # Prediction step <-
1325
1326        if calc_grad_log_likelihood:
1327            dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters
1328            dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters
1329
1330            param_number = p_dP.shape[2]
1331
1332            # p_dm, p_dP - derivatives form the previoius step
1333            dm_pred = np.empty(p_dm.shape)
1334            dP_pred = np.empty(p_dP.shape)
1335
1336            for j in range(param_number):
1337                dA = dA_all_params[:,:,j]
1338                dQ = dQ_all_params[:,:,j]
1339
1340                dP = p_dP[:,:,j]
1341                dm = p_dm[:,:,j]
1342                dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, dm)
1343                # prediction step derivatives for current parameter:
1344
1345                dP_pred[:,:,j] = np.dot( dA ,np.dot(p_P, A.T))
1346                dP_pred[:,:,j] += dP_pred[:,:,j].T
1347                dP_pred[:,:,j] += np.dot( A ,np.dot(dP, A.T)) + dQ
1348
1349                dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
1350        else:
1351            dm_pred = None
1352            dP_pred = None
1353
1354        return m_pred, P_pred, dm_pred, dP_pred
1355
1356    @staticmethod
1357    def _kalman_prediction_step_SVD(k, p_m , p_P, p_dyn_model_callable, calc_grad_log_likelihood=False,
1358                                p_dm = None, p_dP = None):
1359        """
1360        Desctrete prediction function
1361
1362        Input:
1363            k:int
1364                Iteration No. Starts at 0. Total number of iterations equal to the
1365                number of measurements.
1366
1367            p_m: matrix of size (state_dim, time_series_no)
1368                Mean value from the previous step. For "multiple time series mode"
1369                it is matrix, second dimension of which correspond to different
1370                time series.
1371
1372            p_P: tuple (Prev_cov, S, V)
1373                Covariance matrix from the previous step and its SVD decomposition.
1374                Prev_cov = V * S * V.T The tuple is (Prev_cov, S, V)
1375
1376            p_dyn_model_callable: object
1377
1378            calc_grad_log_likelihood: boolean
1379                Whether to calculate gradient of the marginal likelihood
1380                of the state-space model. If true then the next parameter must
1381                provide the extra parameters for gradient calculation.
1382
1383            p_dm: 3D array (state_dim, time_series_no, parameters_no)
1384                Mean derivatives from the previous step. For "multiple time series mode"
1385                it is 3D array, second dimension of which correspond to different
1386                time series.
1387
1388            p_dP: 3D array (state_dim, state_dim, parameters_no)
1389                Mean derivatives from the previous step
1390
1391        Output:
1392        ----------------------------
1393        m_pred, P_pred, dm_pred, dP_pred: metrices, 3D objects
1394            Results of the prediction steps.
1395
1396        """
1397
1398        # covariance from the previous step and its SVD decomposition
1399        # p_prev_cov = v * S * V.T
1400        Prev_cov, S_old, V_old = p_P
1401        #p_prev_cov_tst = np.dot(p_V, (p_S * p_V).T) # reconstructed covariance from the previous step
1402
1403        # index correspond to values from previous iteration.
1404        A = p_dyn_model_callable.Ak(k,p_m,Prev_cov) # state transition matrix (or Jacobian)
1405        Q = p_dyn_model_callable.Qk(k) # state noise matrx. This is necessary for the square root calculation (next step)
1406        Q_sr = p_dyn_model_callable.Q_srk(k)
1407        # Prediction step ->
1408        m_pred = p_dyn_model_callable.f_a(k, p_m, A) # predicted mean
1409
1410        # coavariance prediction have changed:
1411        svd_1_matr = np.vstack( ( (np.sqrt(S_old)* np.dot(A,V_old)).T , Q_sr.T) )
1412        (U,S,Vh) = sp.linalg.svd( svd_1_matr,full_matrices=False, compute_uv=True,
1413                      overwrite_a=False,check_finite=True)
1414
1415        # predicted variance computed by the regular method. For testing
1416        #P_pred_tst = A.dot(Prev_cov).dot(A.T) + Q
1417        V_new = Vh.T
1418        S_new = S**2
1419
1420        P_pred = np.dot(V_new * S_new, V_new.T) # prediction covariance
1421        P_pred = (P_pred, S_new, Vh.T)
1422        # Prediction step <-
1423
1424        # derivatives
1425        if calc_grad_log_likelihood:
1426            dA_all_params = p_dyn_model_callable.dAk(k) # derivatives of A wrt parameters
1427            dQ_all_params = p_dyn_model_callable.dQk(k) # derivatives of Q wrt parameters
1428
1429            param_number = p_dP.shape[2]
1430
1431            # p_dm, p_dP - derivatives form the previoius step
1432            dm_pred = np.empty(p_dm.shape)
1433            dP_pred = np.empty(p_dP.shape)
1434
1435            for j in range(param_number):
1436                dA = dA_all_params[:,:,j]
1437                dQ = dQ_all_params[:,:,j]
1438
1439                #dP = p_dP[:,:,j]
1440                #dm = p_dm[:,:,j]
1441                dm_pred[:,:,j] = np.dot(dA, p_m) + np.dot(A, p_dm[:,:,j])
1442                # prediction step derivatives for current parameter:
1443
1444
1445                dP_pred[:,:,j] = np.dot( dA ,np.dot(Prev_cov, A.T))
1446                dP_pred[:,:,j] += dP_pred[:,:,j].T
1447                dP_pred[:,:,j] += np.dot( A ,np.dot(p_dP[:,:,j], A.T)) + dQ
1448
1449                dP_pred[:,:,j] = 0.5*(dP_pred[:,:,j] + dP_pred[:,:,j].T) #symmetrize
1450        else:
1451            dm_pred = None
1452            dP_pred = None
1453
1454        return m_pred, P_pred, dm_pred, dP_pred
1455
1456    @staticmethod
1457    def _kalman_update_step(k,   p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False,
1458                            calc_grad_log_likelihood=False, p_dm = None, p_dP = None):
1459        """
1460        Input:
1461
1462        k: int
1463              Iteration No. Starts at 0. Total number of iterations equal to the
1464              number of measurements.
1465
1466        m_P: matrix of size (state_dim, time_series_no)
1467             Mean value from the previous step. For "multiple time series mode"
1468                it is matrix, second dimension of which correspond to different
1469                time series.
1470
1471        p_P:
1472             Covariance matrix from the prediction step.
1473
1474        p_meas_model_callable: object
1475
1476        measurement: (measurement_dim, time_series_no) matrix
1477            One measurement used on the current update step. For
1478            "multiple time series mode" it is matrix, second dimension of
1479            which correspond to different time series.
1480
1481        calc_log_likelihood: boolean
1482            Whether to calculate marginal likelihood of the state-space model.
1483
1484        calc_grad_log_likelihood: boolean
1485                Whether to calculate gradient of the marginal likelihood
1486                of the state-space model. If true then the next parameter must
1487                provide the extra parameters for gradient calculation.
1488
1489        p_dm: 3D array (state_dim, time_series_no, parameters_no)
1490                Mean derivatives from the prediction step. For "multiple time series mode"
1491                it is 3D array, second dimension of which correspond to different
1492                time series.
1493
1494        p_dP: array
1495            Covariance derivatives from the prediction step.
1496
1497        Output:
1498        ----------------------------
1499        m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
1500            Results of the prediction steps.
1501
1502        log_likelihood_update: double or 1D array
1503            Update to the log_likelihood from this step
1504
1505        d_log_likelihood_update: (grad_params_no, time_series_no) matrix
1506            Update to the gradient of log_likelihood, "multiple time series mode"
1507            adds extra columns to the gradient.
1508
1509        """
1510        #import pdb; pdb.set_trace()
1511
1512        m_pred = p_m # from prediction step
1513        P_pred = p_P # from prediction step
1514
1515        H = p_meas_model_callable.Hk(k, m_pred, P_pred)
1516        R = p_meas_model_callable.Rk(k)
1517
1518        time_series_no = p_m.shape[1] # number of time serieses
1519
1520        log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
1521        # Update step (only if there is data)
1522        #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
1523        v = measurement-p_meas_model_callable.f_h(k, m_pred, H)
1524        S = H.dot(P_pred).dot(H.T) + R
1525        if measurement.shape[0]==1: # measurements are one dimensional
1526            if (S < 0):
1527                raise ValueError("Kalman Filter Update: S is negative step %i" % k )
1528                 #import pdb; pdb.set_trace()
1529
1530            K = P_pred.dot(H.T) / S
1531            if calc_log_likelihood:
1532                log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
1533                                    v*v / S)
1534                #log_likelihood_update = log_likelihood_update[0,0] # to make int
1535                if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
1536                    raise ValueError("Nan values in likelihood update!")
1537            LL = None; islower = None
1538        else:
1539            LL,islower = linalg.cho_factor(S)
1540            K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T
1541
1542            if calc_log_likelihood:
1543                log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) +
1544                    2*np.sum( np.log(np.diag(LL)) ) +\
1545                        np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v
1546
1547        if calc_grad_log_likelihood:
1548            dm_pred_all_params = p_dm # derivativas of the prediction phase
1549            dP_pred_all_params = p_dP
1550
1551            param_number = p_dP.shape[2]
1552
1553            dH_all_params = p_meas_model_callable.dHk(k)
1554            dR_all_params = p_meas_model_callable.dRk(k)
1555
1556            dm_upd = np.empty(dm_pred_all_params.shape)
1557            dP_upd = np.empty(dP_pred_all_params.shape)
1558
1559             # firts dimension parameter_no, second - time series number
1560            d_log_likelihood_update = np.empty((param_number,time_series_no))
1561            for param in range(param_number):
1562
1563               dH = dH_all_params[:,:,param]
1564               dR = dR_all_params[:,:,param]
1565
1566               dm_pred = dm_pred_all_params[:,:,param]
1567               dP_pred = dP_pred_all_params[:,:,param]
1568
1569                # Terms in the likelihood derivatives
1570               dv = - np.dot( dH, m_pred) -  np.dot( H, dm_pred)
1571               dS = np.dot(dH, np.dot( P_pred, H.T))
1572               dS += dS.T
1573               dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
1574
1575               # TODO: maybe symmetrize dS
1576
1577               #dm and dP for the next stem
1578               if LL is not None: # the state vector is not a scalar
1579                   tmp1 = linalg.cho_solve((LL,islower), H).T
1580                   tmp2 = linalg.cho_solve((LL,islower), dH).T
1581                   tmp3 = linalg.cho_solve((LL,islower), dS).T
1582               else: # the state vector is a scalar
1583                   tmp1 = H.T / S
1584                   tmp2 = dH.T / S
1585                   tmp3 = dS.T / S
1586
1587               dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
1588                    np.dot( P_pred, np.dot( tmp1, tmp3 ) )
1589
1590                # terms required for the next step, save this for each parameter
1591               dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
1592
1593               dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
1594               dP_upd[:,:,param] += dP_upd[:,:,param].T
1595               dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
1596
1597               dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
1598                # computing the likelihood change for each parameter:
1599               if LL is not None: # the state vector is not 1D
1600                    #tmp4 = linalg.cho_solve((LL,islower), dv)
1601                   tmp5 = linalg.cho_solve((LL,islower), v)
1602               else: # the state vector is a scalar
1603                   #tmp4 = dv / S
1604                   tmp5 = v / S
1605
1606
1607               d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
1608                    np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
1609                # Before
1610                #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
1611                #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
1612
1613
1614
1615        # Compute the actual updates for mean and variance of the states.
1616        m_upd = m_pred + K.dot( v )
1617
1618        # Covariance update and ensure it is symmetric
1619        P_upd = K.dot(S).dot(K.T)
1620        P_upd = 0.5*(P_upd + P_upd.T)
1621        P_upd =  P_pred - P_upd# this update matrix is symmetric
1622
1623        return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
1624
1625    @staticmethod
1626    def _kalman_update_step_SVD(k, p_m , p_P, p_meas_model_callable, measurement, calc_log_likelihood= False,
1627                            calc_grad_log_likelihood=False, p_dm = None, p_dP = None):
1628        """
1629        Input:
1630
1631        k: int
1632              Iteration No. Starts at 0. Total number of iterations equal to the
1633              number of measurements.
1634
1635        m_P: matrix of size (state_dim, time_series_no)
1636             Mean value from the previous step. For "multiple time series mode"
1637                it is matrix, second dimension of which correspond to different
1638                time series.
1639
1640        p_P: tuple (P_pred, S, V)
1641             Covariance matrix from the prediction step and its SVD decomposition.
1642             P_pred = V * S * V.T The tuple is (P_pred, S, V)
1643
1644        p_h: function (k, x_{k}, H_{k}). Measurement function.
1645            k (iteration number), starts at 0
1646            x_{k} state
1647            H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
1648
1649        p_f_H: function (k, m, P) return Jacobian of measurement function, it is
1650            passed into p_h.
1651            k (iteration number), starts at 0
1652            m: point where Jacobian is evaluated
1653            P: parameter for Jacobian, usually covariance matrix.
1654
1655        p_f_R: function (k). Returns noise matrix of measurement equation
1656            on iteration k.
1657            k (iteration number). starts at 0
1658
1659        p_f_iRsr: function (k). Returns the square root of the noise matrix of
1660            measurement equation on iteration k.
1661            k (iteration number). starts at 0
1662
1663        measurement: (measurement_dim, time_series_no) matrix
1664            One measurement used on the current update step. For
1665            "multiple time series mode" it is matrix, second dimension of
1666            which correspond to different time series.
1667
1668        calc_log_likelihood: boolean
1669            Whether to calculate marginal likelihood of the state-space model.
1670
1671        calc_grad_log_likelihood: boolean
1672                Whether to calculate gradient of the marginal likelihood
1673                of the state-space model. If true then the next parameter must
1674                provide the extra parameters for gradient calculation.
1675
1676        p_dm: 3D array (state_dim, time_series_no, parameters_no)
1677                Mean derivatives from the prediction step. For "multiple time series mode"
1678                it is 3D array, second dimension of which correspond to different
1679                time series.
1680
1681        p_dP: array
1682            Covariance derivatives from the prediction step.
1683
1684        grad_calc_params_2: List or None
1685            List with derivatives. The first component is 'f_dH' - function(k)
1686            which returns the derivative of H. The second element is 'f_dR'
1687             - function(k). Function which returns the derivative of R.
1688
1689        Output:
1690        ----------------------------
1691        m_upd, P_upd, dm_upd, dP_upd: metrices, 3D objects
1692            Results of the prediction steps.
1693
1694        log_likelihood_update: double or 1D array
1695            Update to the log_likelihood from this step
1696
1697        d_log_likelihood_update: (grad_params_no, time_series_no) matrix
1698            Update to the gradient of log_likelihood, "multiple time series mode"
1699            adds extra columns to the gradient.
1700
1701        """
1702
1703        #import pdb; pdb.set_trace()
1704
1705        m_pred = p_m # from prediction step
1706        P_pred,S_pred,V_pred = p_P # from prediction step
1707
1708        H = p_meas_model_callable.Hk(k, m_pred, P_pred)
1709        R = p_meas_model_callable.Rk(k)
1710        R_isr = p_meas_model_callable.R_isrk(k) # square root of the inverse of R matrix
1711
1712        time_series_no = p_m.shape[1] # number of time serieses
1713
1714        log_likelihood_update=None; dm_upd=None; dP_upd=None; d_log_likelihood_update=None
1715        # Update step (only if there is data)
1716        #if not np.any(np.isnan(measurement)): # TODO: if some dimensions are missing, do properly computations for other.
1717        v = measurement-p_meas_model_callable.f_h(k, m_pred, H)
1718
1719        svd_2_matr = np.vstack( ( np.dot( R_isr.T, np.dot(H, V_pred)) , np.diag( 1.0/np.sqrt(S_pred) ) ) )
1720
1721        (U,S,Vh) = sp.linalg.svd( svd_2_matr,full_matrices=False, compute_uv=True,
1722                     overwrite_a=False,check_finite=True)
1723
1724         # P_upd = U_upd S_upd**2 U_upd.T
1725        U_upd = np.dot(V_pred, Vh.T)
1726        S_upd = (1.0/S)**2
1727
1728        P_upd = np.dot(U_upd * S_upd, U_upd.T) # update covariance
1729        P_upd = (P_upd,S_upd,U_upd) # tuple to pass to the next step
1730
1731         # stil need to compute S and K for derivative computation
1732        S = H.dot(P_pred).dot(H.T) + R
1733        if measurement.shape[0]==1: # measurements are one dimensional
1734            if (S < 0):
1735                raise ValueError("Kalman Filter Update: S is negative step %i" % k )
1736                 #import pdb; pdb.set_trace()
1737
1738            K = P_pred.dot(H.T) / S
1739            if calc_log_likelihood:
1740                log_likelihood_update = -0.5 * ( np.log(2*np.pi) + np.log(S) +
1741                                    v*v / S)
1742                #log_likelihood_update = log_likelihood_update[0,0] # to make int
1743                if np.any(np.isnan(log_likelihood_update)): # some member in P_pred is None.
1744                    raise ValueError("Nan values in likelihood update!")
1745            LL = None; islower = None
1746        else:
1747            LL,islower = linalg.cho_factor(S)
1748            K = linalg.cho_solve((LL,islower), H.dot(P_pred.T)).T
1749
1750            if calc_log_likelihood:
1751                log_likelihood_update = -0.5 * ( v.shape[0]*np.log(2*np.pi) +
1752                    2*np.sum( np.log(np.diag(LL)) ) +\
1753                        np.sum((linalg.cho_solve((LL,islower),v)) * v, axis = 0) ) # diagonal of v.T*S^{-1}*v
1754
1755
1756        # Old  method of computing updated covariance (for testing) ->
1757        #P_upd_tst = K.dot(S).dot(K.T)
1758        #P_upd_tst = 0.5*(P_upd_tst + P_upd_tst.T)
1759        #P_upd_tst =  P_pred - P_upd_tst# this update matrix is symmetric
1760        # Old  method of computing updated covariance (for testing) <-
1761
1762        if calc_grad_log_likelihood:
1763            dm_pred_all_params = p_dm # derivativas of the prediction phase
1764            dP_pred_all_params = p_dP
1765
1766            param_number = p_dP.shape[2]
1767
1768            dH_all_params = p_meas_model_callable.dHk(k)
1769            dR_all_params = p_meas_model_callable.dRk(k)
1770
1771            dm_upd = np.empty(dm_pred_all_params.shape)
1772            dP_upd = np.empty(dP_pred_all_params.shape)
1773
1774             # firts dimension parameter_no, second - time series number
1775            d_log_likelihood_update = np.empty((param_number,time_series_no))
1776            for param in range(param_number):
1777
1778               dH = dH_all_params[:,:,param]
1779               dR = dR_all_params[:,:,param]
1780
1781               dm_pred = dm_pred_all_params[:,:,param]
1782               dP_pred = dP_pred_all_params[:,:,param]
1783
1784                # Terms in the likelihood derivatives
1785               dv = - np.dot( dH, m_pred) -  np.dot( H, dm_pred)
1786               dS = np.dot(dH, np.dot( P_pred, H.T))
1787               dS += dS.T
1788               dS += np.dot(H, np.dot( dP_pred, H.T)) + dR
1789
1790                # TODO: maybe symmetrize dS
1791
1792                #dm and dP for the next stem
1793               if LL is not None: # the state vector is not a scalar
1794                   tmp1 = linalg.cho_solve((LL,islower), H).T
1795                   tmp2 = linalg.cho_solve((LL,islower), dH).T
1796                   tmp3 = linalg.cho_solve((LL,islower), dS).T
1797               else: # the state vector is a scalar
1798                   tmp1 = H.T / S
1799                   tmp2 = dH.T / S
1800                   tmp3 = dS.T / S
1801
1802               dK = np.dot( dP_pred, tmp1) + np.dot( P_pred, tmp2) - \
1803                    np.dot( P_pred, np.dot( tmp1, tmp3 ) )
1804
1805               # terms required for the next step, save this for each parameter
1806               dm_upd[:,:,param] = dm_pred + np.dot(dK, v) + np.dot(K, dv)
1807
1808               dP_upd[:,:,param] = -np.dot(dK, np.dot(S, K.T))
1809               dP_upd[:,:,param] += dP_upd[:,:,param].T
1810               dP_upd[:,:,param] += dP_pred - np.dot(K , np.dot( dS, K.T))
1811
1812               dP_upd[:,:,param] = 0.5*(dP_upd[:,:,param] + dP_upd[:,:,param].T) #symmetrize
1813               # computing the likelihood change for each parameter:
1814               if LL is not None: # the state vector is not 1D
1815                   tmp5 = linalg.cho_solve((LL,islower), v)
1816               else: # the state vector is a scalar
1817                   tmp5 = v / S
1818
1819
1820               d_log_likelihood_update[param,:] = -(0.5*np.sum(np.diag(tmp3)) + \
1821                   np.sum(tmp5*dv, axis=0) - 0.5 * np.sum(tmp5 * np.dot(dS, tmp5), axis=0) )
1822                # Before
1823                #d_log_likelihood_update[param,0] = -(0.5*np.sum(np.diag(tmp3)) + \
1824                #np.dot(tmp5.T, dv) - 0.5 * np.dot(tmp5.T ,np.dot(dS, tmp5)) )
1825
1826        # Compute the actual updates for mean of the states. Variance update
1827        # is computed earlier.
1828        m_upd = m_pred + K.dot( v )
1829
1830        return m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update
1831
1832    @staticmethod
1833    def _rts_smoother_update_step(k, p_m , p_P, p_m_pred, p_P_pred, p_m_prev_step,
1834                                  p_P_prev_step, p_dynamic_callables):
1835        """
1836        Rauch–Tung–Striebel(RTS) update step
1837
1838        Input:
1839        -----------------------------
1840        k: int
1841              Iteration No. Starts at 0. Total number of iterations equal to the
1842              number of measurements.
1843
1844        p_m: matrix of size (state_dim, time_series_no)
1845             Filter mean on step k
1846
1847        p_P:  matrix of size (state_dim,state_dim)
1848             Filter Covariance on step k
1849
1850        p_m_pred: matrix of size (state_dim, time_series_no)
1851             Means from the smoother prediction step.
1852
1853        p_P_pred:
1854             Covariance from the smoother prediction step.
1855
1856        p_m_prev_step
1857            Smoother mean from the previous step.
1858
1859        p_P_prev_step:
1860            Smoother covariance from the previous step.
1861
1862        p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
1863            passed into p_a.
1864            k (iteration number), starts at 0
1865            m: point where Jacobian is evaluated
1866            P: parameter for Jacobian, usually covariance matrix.
1867
1868        """
1869
1870        A = p_dynamic_callables.Ak(k,p_m,p_P) # state transition matrix (or Jacobian)
1871
1872        tmp = np.dot( A, p_P.T)
1873        if A.shape[0] == 1: # 1D states
1874            G = tmp.T / p_P_pred # P[:,:,k] is symmetric
1875        else:
1876            try:
1877                LL,islower = linalg.cho_factor(p_P_pred)
1878                G = linalg.cho_solve((LL,islower),tmp).T
1879            except:
1880                # It happende that p_P_pred has several near zero eigenvalues
1881                # hence the Cholesky method does not work.
1882                res = sp.linalg.lstsq(p_P_pred, tmp)
1883                G = res[0].T
1884
1885        m_upd = p_m + G.dot( p_m_prev_step-p_m_pred )
1886        P_upd = p_P + G.dot( p_P_prev_step-p_P_pred).dot(G.T)
1887
1888        P_upd = 0.5*(P_upd + P_upd.T)
1889
1890        return m_upd, P_upd, G
1891
1892    @classmethod
1893    def rts_smoother(cls,state_dim, p_dynamic_callables, filter_means,
1894                          filter_covars):
1895        """
1896        This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
1897        based on the results of kalman_filter_raw.
1898        These notations are the same:
1899            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
1900            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
1901
1902        Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k))
1903
1904        Input:
1905        --------------
1906
1907        p_a: function (k, x_{k-1}, A_{k}). Dynamic function.
1908                k (iteration number), starts at 0
1909                x_{k-1} State from the previous step
1910                A_{k} Jacobian matrices of f_a. In the linear case it is exactly A_{k}.
1911
1912        p_f_A: function (k, m, P) return Jacobian of dynamic function, it is
1913            passed into p_a.
1914            k (iteration number), starts at 0
1915            m: point where Jacobian is evaluated
1916            P: parameter for Jacobian, usually covariance matrix.
1917
1918        p_f_Q: function (k). Returns noise matrix of dynamic model on iteration k.
1919            k (iteration number). starts at 0
1920
1921        filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
1922            Results of the Kalman Filter means estimation.
1923
1924        filter_covars: (no_steps+1, state_dim, state_dim) 3D array
1925            Results of the Kalman Filter covariance estimation.
1926
1927        Output:
1928        -------------
1929
1930        M: (no_steps+1, state_dim) matrix
1931            Smoothed estimates of the state means
1932
1933        P: (no_steps+1, state_dim, state_dim) 3D array
1934            Smoothed estimates of the state covariances
1935        """
1936
1937        no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance)
1938
1939        M = np.empty(filter_means.shape) # smoothed means
1940        P = np.empty(filter_covars.shape) # smoothed covars
1941        #G = np.empty( (no_steps,state_dim,state_dim)  ) # G from the update step of the smoother
1942
1943        M[-1,:] = filter_means[-1,:]
1944        P[-1,:,:] = filter_covars[-1,:,:]
1945        for k in range(no_steps-1,-1,-1):
1946
1947            m_pred, P_pred, tmp1, tmp2 = \
1948                    cls._kalman_prediction_step(k, filter_means[k,:],
1949                                                filter_covars[k,:,:], p_dynamic_callables,
1950                                                calc_grad_log_likelihood=False)
1951            p_m = filter_means[k,:]
1952            if len(p_m.shape)<2:
1953                p_m.shape = (p_m.shape[0],1)
1954
1955            p_m_prev_step = M[k+1,:]
1956            if len(p_m_prev_step.shape)<2:
1957                p_m_prev_step.shape = (p_m_prev_step.shape[0],1)
1958
1959            m_upd, P_upd, G_tmp = cls._rts_smoother_update_step(k,
1960                            p_m ,filter_covars[k,:,:],
1961                            m_pred, P_pred, p_m_prev_step ,P[k+1,:,:], p_dynamic_callables)
1962
1963            M[k,:] = m_upd#np.squeeze(m_upd)
1964            P[k,:,:] = P_upd
1965            #G[k,:,:] = G_upd.T # store transposed G.
1966        # Return values
1967
1968        return (M, P) #, G)
1969
1970    @staticmethod
1971    def _EM_gradient(A,Q,H,R,m_init,P_init,measurements, M, P, G, dA, dQ, dH, dR, dm_init, dP_init):
1972        """
1973        Gradient computation with the EM algorithm.
1974
1975        Input:
1976        -----------------
1977
1978        M: Means from the smoother
1979        P: Variances from the smoother
1980        G: Gains? from the smoother
1981        """
1982        import pdb; pdb.set_trace();
1983
1984        param_number = dA.shape[-1]
1985        d_log_likelihood_update = np.empty((param_number,1))
1986
1987        sample_no = measurements.shape[0]
1988        P_1 = P[1:,:,:] # remove 0-th step
1989        P_2 = P[0:-1,:,:] # remove 0-th step
1990
1991        M_1 = M[1:,:] # remove 0-th step
1992        M_2 = M[0:-1,:] # remove the last step
1993
1994        Sigma = np.mean(P_1,axis=0) + np.dot(M_1.T, M_1) / sample_no #
1995        Phi =   np.mean(P_2,axis=0) + np.dot(M_2.T, M_2) / sample_no #
1996
1997        B = np.dot( measurements.T, M_1 )/ sample_no
1998        C =   (sp.einsum( 'ijk,ikl', P_1, G) + np.dot(M_1.T, M_2)) / sample_no #
1999
2000#        C1 = np.zeros( (P_1.shape[1],P_1.shape[1]) )
2001#        for k in range(P_1.shape[0]):
2002#            C1 += np.dot(P_1[k,:,:],G[k,:,:]) + sp.outer( M_1[k,:], M_2[k,:] )
2003#        C1 = C1 / sample_no
2004
2005        D = np.dot( measurements.T, measurements ) / sample_no
2006
2007        try:
2008            P_init_inv = sp.linalg.inv(P_init)
2009
2010            if np.max( np.abs(P_init_inv)) > 10e13:
2011                compute_P_init_terms = False
2012            else:
2013                compute_P_init_terms = True
2014        except np.linalg.LinAlgError:
2015            compute_P_init_terms = False
2016
2017        try:
2018            Q_inv = sp.linalg.inv(Q)
2019
2020            if np.max( np.abs(Q_inv)) > 10e13:
2021                compute_Q_terms = False
2022            else:
2023                compute_Q_terms = True
2024        except np.linalg.LinAlgError:
2025            compute_Q_terms = False
2026
2027        try:
2028            R_inv = sp.linalg.inv(R)
2029
2030            if np.max( np.abs(R_inv)) > 10e13:
2031                compute_R_terms = False
2032            else:
2033                compute_R_terms = True
2034        except np.linalg.LinAlgError:
2035            compute_R_terms = False
2036
2037
2038        d_log_likelihood_update = np.zeros((param_number,1))
2039        for j in range(param_number):
2040            if compute_P_init_terms:
2041                d_log_likelihood_update[j,:] -= 0.5 * np.sum(P_init_inv* dP_init[:,:,j].T ) #p #m
2042
2043                M0_smoothed = M[0]; M0_smoothed.shape = (M0_smoothed.shape[0],1)
2044                tmp1 = np.dot( dP_init[:,:,j], np.dot( P_init_inv, (P[0,:,:] + sp.outer( (M0_smoothed - m_init), (M0_smoothed - m_init) )) )  ) #p #m
2045                d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp1.T )
2046
2047                tmp2 = sp.outer( dm_init[:,j], M0_smoothed )
2048                tmp2 += tmp2.T
2049                d_log_likelihood_update[j,:] += 0.5 * np.sum(P_init_inv* tmp2.T )
2050
2051            if compute_Q_terms:
2052
2053                d_log_likelihood_update[j,:] -=  sample_no/2.0 * np.sum(Q_inv* dQ[:,:,j].T ) #m
2054
2055                tmp1 = np.dot(C,A.T); tmp1 += tmp1.T; tmp1 = Sigma - tmp1 + np.dot(A, np.dot(Phi,A.T)) #m
2056                tmp1 = np.dot( dQ[:,:,j], np.dot( Q_inv, tmp1) )
2057                d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(Q_inv * tmp1.T)
2058
2059                tmp2 = np.dot( dA[:,:,j], C.T); tmp2 += tmp2.T;
2060                tmp3 = np.dot(dA[:,:,j], np.dot(Phi,A.T)); tmp3 += tmp3.T
2061                d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(Q_inv.T * (tmp3 - tmp2) )
2062
2063            if compute_R_terms:
2064                d_log_likelihood_update[j,:] -=  sample_no/2.0 * np.sum(R_inv* dR[:,:,j].T )
2065
2066                tmp1 = np.dot(B,H.T); tmp1 += tmp1.T; tmp1 = D - tmp1 + np.dot(H, np.dot(Sigma,H.T))
2067                tmp1 = np.dot( dR[:,:,j], np.dot( R_inv, tmp1) )
2068                d_log_likelihood_update[j,:] += sample_no/2.0 * np.sum(R_inv * tmp1.T)
2069
2070                tmp2 = np.dot( dH[:,:,j], B.T); tmp2 += tmp2.T;
2071                tmp3 = np.dot(dH[:,:,j], np.dot(Sigma,H.T)); tmp3 += tmp3.T
2072                d_log_likelihood_update[j,:] -= sample_no/2.0 * np.sum(R_inv.T * (tmp3 - tmp2) )
2073
2074        return d_log_likelihood_update
2075
2076    @staticmethod
2077    def _check_SS_matrix(p_M, state_dim, measurement_dim, which='A'):
2078        """
2079        Veryfy that on exit the matrix has appropriate shape for the KF algorithm.
2080
2081        Input:
2082            p_M: matrix
2083                As it is given for the user
2084            state_dim: int
2085                State dimensioanlity
2086            measurement_dim: int
2087                Measurement dimensionality
2088            which: string
2089                One of: 'A', 'Q', 'H', 'R'
2090        Output:
2091        ---------------
2092            p_M: matrix of the right shape
2093
2094            old_M_shape: tuple
2095                Old Shape
2096        """
2097
2098        old_M_shape = None
2099        if len(p_M.shape) < 3: # new shape is 3 dimensional
2100            old_M_shape = p_M.shape # save shape to restore it on exit
2101            if len(p_M.shape) == 2: # matrix
2102                p_M.shape = (p_M.shape[0],p_M.shape[1],1)
2103            elif len(p_M.shape) == 1: # scalar but in array already
2104                if (p_M.shape[0] != 1):
2105                    raise ValueError("Matrix %s is an 1D array, while it must be a matrix or scalar", which)
2106                else:
2107                    p_M.shape = (1,1,1)
2108
2109        if (which == 'A') or (which == 'Q'):
2110            if (p_M.shape[0] != state_dim) or (p_M.shape[1] != state_dim):
2111                raise ValueError("%s must be a square matrix of size (%i,%i)" % (which, state_dim, state_dim))
2112        if (which == 'H'):
2113            if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != state_dim):
2114                raise ValueError("H must be of shape (measurement_dim, state_dim) (%i,%i)" % (measurement_dim, state_dim))
2115        if (which == 'R'):
2116            if (p_M.shape[0] != measurement_dim) or (p_M.shape[1] != measurement_dim):
2117                raise ValueError("R must be of shape (measurement_dim, measurement_dim) (%i,%i)" % (measurement_dim, measurement_dim))
2118
2119        return (p_M,old_M_shape)
2120
2121    @staticmethod
2122    def _check_grad_state_matrices(dM, state_dim, grad_params_no, which = 'dA'):
2123        """
2124        Function checks (mostly check dimensions) matrices for marginal likelihood
2125        gradient parameters calculation. It check dA, dQ matrices.
2126
2127        Input:
2128        -------------
2129            dM: None, scaler or 3D matrix
2130                It is supposed to be (state_dim,state_dim,grad_params_no) matrix.
2131                If None then zero matrix is assumed. If scalar then the function
2132                checks consistency with "state_dim" and "grad_params_no".
2133
2134            state_dim: int
2135                State dimensionality
2136
2137            grad_params_no: int
2138                How many parrameters of likelihood gradient in total.
2139
2140            which: string
2141                'dA' or 'dQ'
2142
2143
2144        Output:
2145        --------------
2146            function of (k) which returns the parameters matrix.
2147
2148        """
2149
2150
2151        if dM is None:
2152            dM=np.zeros((state_dim,state_dim,grad_params_no))
2153        elif isinstance(dM, np.ndarray):
2154            if state_dim == 1:
2155                if len(dM.shape) < 3:
2156                    dM.shape = (1,1,1)
2157            else:
2158                if len(dM.shape) < 3:
2159                    dM.shape = (state_dim,state_dim,1)
2160        elif isinstance(dM, np.int):
2161            if state_dim > 1:
2162                raise ValueError("When computing likelihood gradient wrong %s dimension." % which)
2163            else:
2164                dM = np.ones((1,1,1)) * dM
2165
2166#        if not isinstance(dM, types.FunctionType):
2167#            f_dM = lambda k: dM
2168#        else:
2169#            f_dM = dM
2170
2171        return dM
2172
2173
2174    @staticmethod
2175    def _check_grad_measurement_matrices(dM, state_dim, grad_params_no, measurement_dim, which = 'dH'):
2176        """
2177        Function checks (mostly check dimensions) matrices for marginal likelihood
2178        gradient parameters calculation. It check dH, dR matrices.
2179
2180        Input:
2181        -------------
2182            dM: None, scaler or 3D matrix
2183                It is supposed to be
2184                (measurement_dim ,state_dim,grad_params_no) for "dH" matrix.
2185                (measurement_dim,measurement_dim,grad_params_no) for "dR"
2186
2187                If None then zero matrix is assumed. If scalar then the function
2188                checks consistency with "state_dim" and "grad_params_no".
2189
2190            state_dim: int
2191                State dimensionality
2192
2193            grad_params_no: int
2194                How many parrameters of likelihood gradient in total.
2195
2196            measurement_dim: int
2197                Dimensionality of measurements.
2198
2199            which: string
2200                'dH' or 'dR'
2201
2202
2203        Output:
2204        --------------
2205            function of (k) which returns the parameters matrix.
2206        """
2207
2208        if dM is None:
2209            if which == 'dH':
2210                dM=np.zeros((measurement_dim ,state_dim,grad_params_no))
2211            elif  which == 'dR':
2212                dM=np.zeros((measurement_dim,measurement_dim,grad_params_no))
2213        elif isinstance(dM, np.ndarray):
2214            if state_dim == 1:
2215                if len(dM.shape) < 3:
2216                    dM.shape = (1,1,1)
2217            else:
2218                if len(dM.shape) < 3:
2219                     if which == 'dH':
2220                        dM.shape = (measurement_dim,state_dim,1)
2221                     elif  which == 'dR':
2222                        dM.shape = (measurement_dim,measurement_dim,1)
2223        elif isinstance(dM, np.int):
2224            if state_dim > 1:
2225                raise ValueError("When computing likelihood gradient wrong dH dimension.")
2226            else:
2227                dM = np.ones((1,1,1)) * dM
2228
2229#        if not isinstance(dM, types.FunctionType):
2230#            f_dM = lambda k: dM
2231#        else:
2232#            f_dM = dM
2233
2234        return dM
2235
2236
2237
2238class Struct(object):
2239    pass
2240
2241class ContDescrStateSpace(DescreteStateSpace):
2242    """
2243    Class for continuous-discrete Kalman filter. State equation is
2244    continuous while measurement equation is discrete.
2245
2246        d x(t)/ dt = F x(t) + L q;        where q~ N(0, Qc)
2247        y_{t_k} = H_{k} x_{t_k} + r_{k};        r_{k-1} ~ N(0, R_{k})
2248
2249    """
2250
2251    class AQcompute_once(Q_handling_Class):
2252        """
2253        Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
2254        from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
2255        equation. dt - time steps.
2256
2257        It has the same interface as AQcompute_batch.
2258
2259        It computes matrices for only one time step. This object is used when
2260        there are many different time steps and storing matrices for each of them
2261        would take too much memory.
2262        """
2263
2264        def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
2265            """
2266            Constructor. All necessary parameters are passed here and stored
2267            in the opject.
2268
2269            Input:
2270            -------------------
2271                F, L, Qc, P_inf : matrices
2272                    Parameters of corresponding continuous state model
2273                dt: array
2274                    All time steps
2275                compute_derivatives: bool
2276                    Whether to calculate derivatives
2277
2278                dP_inf, dF, dQc: 3D array
2279                    Derivatives if they are required
2280
2281            Output:
2282            -------------------
2283            Nothing
2284            """
2285            # Copies are done because this object is used later in smoother
2286            # and these parameters must not change.
2287            self.F = F.copy()
2288            self.L = L.copy()
2289            self.Qc = Qc.copy()
2290
2291            self.dt = dt # copy is not taken because dt is internal parameter
2292
2293            # Parameters are used to calculate derivatives but derivatives
2294            # are not used in the smoother. Therefore copies are not taken.
2295            self.P_inf = P_inf
2296            self.dP_inf = dP_inf
2297            self.dF = dF
2298            self.dQc = dQc
2299
2300            self.compute_derivatives = compute_derivatives
2301            self.grad_params_no =  grad_params_no
2302
2303
2304            self.last_k = 0
2305            self.last_k_computed = False
2306            self.v_Ak = None
2307            self.v_Qk = None
2308            self.v_dAk = None
2309            self.v_dQk = None
2310
2311            self.square_root_computed = False
2312            self.Q_inverse_computed = False
2313            self.Q_svd_computed = False
2314            # !!!Print statistics! Which object is created
2315
2316        def f_a(self, k,m,A):
2317            """
2318            Dynamic model
2319            """
2320
2321            return np.dot(A, m) # default dynamic model
2322
2323        def _recompute_for_new_k(self,k):
2324            """
2325            Computes the necessary matrices for an index k and store the results.
2326
2327            Input:
2328            ----------------------
2329            k: int
2330                Index in the time differences array dt where to compute matrices
2331
2332            Output:
2333            ----------------------
2334                Ak,Qk, dAk, dQk: matrices and/or 3D arrays
2335                    A, Q, dA dQ on step k
2336            """
2337            if (self.last_k != k) or (self.last_k_computed == False):
2338                v_Ak,v_Qk, tmp, v_dAk, v_dQk = ContDescrStateSpace.lti_sde_to_descrete(self.F,
2339                        self.L,self.Qc,self.dt[k],self.compute_derivatives,
2340                        grad_params_no=self.grad_params_no, P_inf=self.P_inf, dP_inf=self.dP_inf, dF=self.dF, dQc=self.dQc)
2341
2342                self.last_k = k
2343                self.last_k_computed = True
2344                self.v_Ak = v_Ak
2345                self.v_Qk = v_Qk
2346                self.v_dAk = v_dAk
2347                self.v_dQk = v_dQk
2348
2349                self.Q_square_root_computed = False
2350                self.Q_inverse_computed = False
2351                self.Q_svd_computed = False
2352            else:
2353                v_Ak = self.v_Ak
2354                v_Qk = self.v_Qk
2355                v_dAk = self.v_dAk
2356                v_dQk = self.v_dQk
2357
2358            # !!!Print statistics! Print sizes of matrices
2359
2360            return v_Ak,v_Qk, v_dAk, v_dQk
2361
2362        def reset(self, compute_derivatives):
2363            """
2364            For reusing this object e.g. in smoother computation. Actually,
2365            this object can not be reused because it computes the matrices on
2366            every iteration. But this method is written for keeping the same
2367            interface with the class AQcompute_batch.
2368            """
2369
2370            self.last_k = 0
2371            self.last_k_computed = False
2372            self.compute_derivatives = compute_derivatives
2373
2374            self.Q_square_root_computed = False
2375            self.Q_inverse_computed = False
2376            self.Q_svd_computed = False
2377            self.Q_eigen_computed = False
2378            return self
2379
2380        def Ak(self,k,m,P):
2381            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
2382            return v_Ak
2383
2384        def Qk(self,k):
2385            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
2386            return v_Qk
2387
2388        def dAk(self, k):
2389            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
2390            return v_dAk
2391
2392        def dQk(self, k):
2393            v_Ak,v_Qk, v_dAk, v_dQk = self._recompute_for_new_k(k)
2394            return v_dQk
2395
2396        def Q_srk(self,k):
2397            """
2398            Check square root, maybe rewriting for Spectral decomposition is needed.
2399            Square root of the noise matrix Q
2400            """
2401
2402            if ((self.last_k == k) and (self.last_k_computed == True)):
2403                if not self.Q_square_root_computed:
2404                    if not self.Q_svd_computed:
2405                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
2406                        self.Q_svd = (U, S, Vh)
2407                        self.Q_svd_computed = True
2408                    else:
2409                        (U, S, Vh) = self.Q_svd
2410
2411                    square_root = U * np.sqrt(S)
2412                    self.square_root_computed = True
2413                    self.Q_square_root = square_root
2414                else:
2415                    square_root = self.Q_square_root
2416            else:
2417                raise ValueError("Square root of Q can not be computed")
2418
2419            return square_root
2420
2421        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
2422            """
2423            Function inverts Q matrix and regularizes the inverse.
2424            Regularization is useful when original matrix is badly conditioned.
2425            Function is currently used only in SparseGP code.
2426
2427            Inputs:
2428            ------------------------------
2429            k: int
2430            Iteration number.
2431
2432            p_largest_cond_num: float
2433            Largest condition value for the inverted matrix. If cond. number is smaller than that
2434            no regularization happen.
2435
2436            regularization_type: 1 or 2
2437            Regularization type.
2438
2439            regularization_type: int (1 or 2)
2440
2441                type 1: 1/(S[k] + regularizer) regularizer is computed
2442                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
2443            """
2444
2445            #import pdb; pdb.set_trace()
2446
2447            if ((self.last_k == k) and (self.last_k_computed == True)):
2448                if not self.Q_inverse_computed:
2449                    if not self.Q_svd_computed:
2450                        (U, S, Vh) = sp.linalg.svd( self.v_Qk, full_matrices=False, compute_uv=True, overwrite_a=False, check_finite=False)
2451                        self.Q_svd = (U, S, Vh)
2452                        self.Q_svd_computed = True
2453                    else:
2454                        (U, S, Vh) = self.Q_svd
2455
2456                    Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.v_Qk + self.v_Qk.T), U,S, p_largest_cond_num, p_regularization_type)
2457
2458                    self.Q_inverse_computed = True
2459                    self.Q_inverse_r = Q_inverse_r
2460
2461                else:
2462                    Q_inverse_r = self.Q_inverse_r
2463            else:
2464                raise ValueError("""Inverse of Q can not be computed, because Q has not been computed.
2465                                     This requires some programming""")
2466
2467            return Q_inverse_r
2468
2469
2470        def return_last(self):
2471            """
2472            Function returns last computed matrices.
2473            """
2474            if not self.last_k_computed:
2475                raise ValueError("Matrices are not computed.")
2476            else:
2477                k = self.last_k
2478                A = self.v_Ak
2479                Q = self.v_Qk
2480                dA = self.v_dAk
2481                dQ = self.v_dQk
2482
2483            return k, A, Q, dA, dQ
2484
2485    class AQcompute_batch_Python(Q_handling_Class):
2486        """
2487        Class for calculating matrices A, Q, dA, dQ of the discrete Kalman Filter
2488        from the matrices F, L, Qc, P_ing, dF, dQc, dP_inf of the continuos state
2489        equation. dt - time steps.
2490
2491        It has the same interface as AQcompute_once.
2492
2493        It computes matrices for all time steps. This object is used when
2494        there are not so many (controlled by internal variable)
2495        different time steps and storing all the matrices do not take too much memory.
2496
2497        Since all the matrices are computed all together, this object can be used
2498        in smoother without repeating the computations.
2499        """
2500        def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
2501            """
2502            Constructor. All necessary parameters are passed here and stored
2503            in the opject.
2504
2505            Input:
2506            -------------------
2507                F, L, Qc, P_inf : matrices
2508                    Parameters of corresponding continuous state model
2509                dt: array
2510                    All time steps
2511                compute_derivatives: bool
2512                    Whether to calculate derivatives
2513
2514                dP_inf, dF, dQc: 3D array
2515                    Derivatives if they are required
2516
2517            Output:
2518            -------------------
2519            Nothing
2520            """
2521            As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
2522                        L,Qc,dt,compute_derivatives,
2523                        grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
2524
2525            self.As = As
2526            self.Qs = Qs
2527            self.dAs = dAs
2528            self.dQs = dQs
2529            self.reconstruct_indices = reconstruct_indices
2530            self.total_size_of_data = self.As.nbytes + self.Qs.nbytes +\
2531                            (self.dAs.nbytes if (self.dAs is not None) else 0) +\
2532                            (self.dQs.nbytes if (self.dQs is not None) else 0) +\
2533                            (self.reconstruct_indices.nbytes if (self.reconstruct_indices is not None) else 0)
2534
2535            self.Q_svd_dict = {}
2536            self.Q_square_root_dict = {}
2537            self.Q_inverse_dict = {}
2538
2539            self.last_k = None
2540             # !!!Print statistics! Which object is created
2541            # !!!Print statistics! Print sizes of matrices
2542
2543        def f_a(self, k,m,A):
2544            """
2545            Dynamic model
2546            """
2547            return np.dot(A, m) # default dynamic model
2548
2549        def reset(self, compute_derivatives=False):
2550            """
2551            For reusing this object e.g. in smoother computation. It makes sence
2552            because necessary matrices have been already computed for all
2553            time steps.
2554            """
2555            return self
2556
2557        def Ak(self,k,m,P):
2558            self.last_k = k
2559            return self.As[:,:, self.reconstruct_indices[k]]
2560
2561        def Qk(self,k):
2562            self.last_k = k
2563            return self.Qs[:,:, self.reconstruct_indices[k]]
2564
2565        def dAk(self,k):
2566            self.last_k = k
2567            return self.dAs[:,:, :, self.reconstruct_indices[k]]
2568
2569        def dQk(self,k):
2570            self.last_k = k
2571            return self.dQs[:,:, :, self.reconstruct_indices[k]]
2572
2573
2574        def Q_srk(self,k):
2575            """
2576            Square root of the noise matrix Q
2577            """
2578            matrix_index = self.reconstruct_indices[k]
2579            if matrix_index in self.Q_square_root_dict:
2580                square_root = self.Q_square_root_dict[matrix_index]
2581            else:
2582                if matrix_index in self.Q_svd_dict:
2583                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
2584                else:
2585                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
2586                                        full_matrices=False, compute_uv=True,
2587                                        overwrite_a=False, check_finite=False)
2588                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
2589
2590                square_root = U * np.sqrt(S)
2591                self.Q_square_root_dict[matrix_index] = square_root
2592
2593            return square_root
2594
2595        def Q_inverse(self, k, p_largest_cond_num, p_regularization_type):
2596            """
2597            Function inverts Q matrix and regularizes the inverse.
2598            Regularization is useful when original matrix is badly conditioned.
2599            Function is currently used only in SparseGP code.
2600
2601            Inputs:
2602            ------------------------------
2603            k: int
2604            Iteration number.
2605
2606            p_largest_cond_num: float
2607            Largest condition value for the inverted matrix. If cond. number is smaller than that
2608            no regularization happen.
2609
2610            regularization_type: 1 or 2
2611            Regularization type.
2612
2613            regularization_type: int (1 or 2)
2614
2615                type 1: 1/(S[k] + regularizer) regularizer is computed
2616                type 2: S[k]/(S^2[k] + regularizer) regularizer is computed
2617            """
2618            #import pdb; pdb.set_trace()
2619
2620            matrix_index = self.reconstruct_indices[k]
2621            if matrix_index in self.Q_inverse_dict:
2622                Q_inverse_r = self.Q_inverse_dict[matrix_index]
2623            else:
2624
2625                if matrix_index in self.Q_svd_dict:
2626                    (U, S, Vh) = self.Q_svd_dict[matrix_index]
2627                else:
2628                    (U, S, Vh) = sp.linalg.svd( self.Qs[:,:, matrix_index],
2629                                        full_matrices=False, compute_uv=True,
2630                                        overwrite_a=False, check_finite=False)
2631                    self.Q_svd_dict[matrix_index] = (U,S,Vh)
2632
2633                Q_inverse_r = psd_matrix_inverse(k, 0.5*(self.Qs[:,:, matrix_index] + self.Qs[:,:, matrix_index].T), U,S, p_largest_cond_num, p_regularization_type)
2634                self.Q_inverse_dict[matrix_index] = Q_inverse_r
2635
2636            return Q_inverse_r
2637
2638
2639        def return_last(self):
2640            """
2641            Function returns last available matrices.
2642            """
2643
2644            if (self.last_k is None):
2645                raise ValueError("Matrices are not computed.")
2646            else:
2647                ind = self.reconstruct_indices[self.last_k]
2648                A = self.As[:,:, ind]
2649                Q = self.Qs[:,:, ind]
2650                dA = self.dAs[:,:, :, ind]
2651                dQ = self.dQs[:,:, :, ind]
2652
2653            return self.last_k, A, Q, dA, dQ
2654
2655    @classmethod
2656    def cont_discr_kalman_filter(cls, F, L, Qc, p_H, p_R, P_inf, X, Y, index = None,
2657                                 m_init=None, P_init=None,
2658                                 p_kalman_filter_type='regular',
2659                                 calc_log_likelihood=False,
2660                                 calc_grad_log_likelihood=False,
2661                                 grad_params_no=0, grad_calc_params=None):
2662        """
2663        This function implements the continuous-discrete Kalman Filter algorithm
2664        These notations for the State-Space model are assumed:
2665            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
2666            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
2667
2668        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
2669
2670        Current Features:
2671        ----------------------------------------
2672        1) The function generaly do not modify the passed parameters. If
2673        it happens then it is an error. There are several exeprions: scalars
2674        can be modified into a matrix, in some rare cases shapes of
2675        the derivatives matrices may be changed, it is ignored for now.
2676
2677        2) Copies of F,L,Qc are created in memory because they may be used later
2678        in smoother. References to copies are kept in "AQcomp" object
2679        return parameter.
2680
2681        3) Function support "multiple time series mode" which means that exactly
2682        the same State-Space model is used to filter several sets of measurements.
2683        In this case third dimension of Y should include these state-space measurements
2684        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
2685
2686        4) Calculation of Grad_log_likelihood is not supported if matrices
2687        H, or R changes overf time (with index k). (later may be changed)
2688
2689        5) Measurement may include missing values. In this case update step is
2690        not done for this measurement. (later may be changed)
2691
2692        Input:
2693        -----------------
2694
2695        F: (state_dim, state_dim) matrix
2696            F in the model.
2697
2698        L: (state_dim, noise_dim) matrix
2699            L in the model.
2700
2701        Qc: (noise_dim, noise_dim) matrix
2702            Q_c in the model.
2703
2704        p_H: scalar, matrix (measurement_dim, state_dim) , 3D array
2705            H_{k} in the model. If matrix then H_{k} = H - constant.
2706            If it is 3D array then H_{k} = p_Q[:,:, index[2,k]]
2707
2708        p_R: scalar, square symmetric matrix, 3D array
2709            R_{k} in the model. If matrix then R_{k} = R - constant.
2710            If it is 3D array then R_{k} = p_R[:,:, index[3,k]]
2711
2712        P_inf: (state_dim, state_dim) matrix
2713            State varince matrix on infinity.
2714
2715        X: 1D array
2716            Time points of measurements. Needed for converting continuos
2717            problem to the discrete one.
2718
2719        Y: matrix or vector or 3D array
2720            Data. If Y is matrix then samples are along 0-th dimension and
2721            features along the 1-st. If 3D array then third dimension
2722            correspond to "multiple time series mode".
2723
2724        index: vector
2725            Which indices (on 3-rd dimension) from arrays p_H, p_R to use
2726            on every time step. If this parameter is None then it is assumed
2727            that p_H, p_R do not change over time and indices are not needed.
2728            index[0,:] - correspond to H, index[1,:] - correspond to R
2729            If index.shape[0] == 1, it is assumed that indides for all matrices
2730            are the same.
2731
2732        m_init: vector or matrix
2733            Initial distribution mean. If None it is assumed to be zero.
2734            For "multiple time series mode" it is matrix, second dimension of
2735            which correspond to different time series. In regular case ("one
2736            time series mode") it is a vector.
2737
2738        P_init: square symmetric matrix or scalar
2739            Initial covariance of the states. If the parameter is scalar
2740            then it is assumed that initial covariance matrix is unit matrix
2741            multiplied by this scalar. If None the unit matrix is used instead.
2742            "multiple time series mode" does not affect it, since it does not
2743            affect anything related to state variaces.
2744
2745        p_kalman_filter_type: string, one of ('regular', 'svd')
2746            Which Kalman Filter is used. Regular or SVD. SVD is more numerically
2747            stable, in particular, Covariace matrices are guarantied to be
2748            positive semi-definite. However, 'svd' works slower, especially for
2749            small data due to SVD call overhead.
2750
2751        calc_log_likelihood: boolean
2752            Whether to calculate marginal likelihood of the state-space model.
2753
2754        calc_grad_log_likelihood: boolean
2755            Whether to calculate gradient of the marginal likelihood
2756            of the state-space model. If true then "grad_calc_params" parameter must
2757            provide the extra parameters for gradient calculation.
2758
2759        grad_params_no: int
2760            If previous parameter is true, then this parameters gives the
2761            total number of parameters in the gradient.
2762
2763        grad_calc_params: dictionary
2764            Dictionary with derivatives of model matrices with respect
2765            to parameters "dF", "dL", "dQc", "dH", "dR", "dm_init", "dP_init".
2766            They can be None, in this case zero matrices (no dependence on parameters)
2767            is assumed. If there is only one parameter then third dimension is
2768            automatically added.
2769
2770        Output:
2771        --------------
2772
2773        M: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
2774            Filter estimates of the state means. In the extra step the initial
2775            value is included. In the "multiple time series mode" third dimension
2776            correspond to different timeseries.
2777
2778        P: (no_steps+1, state_dim, state_dim) 3D array
2779            Filter estimates of the state covariances. In the extra step the initial
2780            value is included.
2781
2782        log_likelihood: double or (1, time_series_no) 3D array.
2783
2784            If the parameter calc_log_likelihood was set to true, return
2785            logarithm of marginal likelihood of the state-space model. If
2786            the parameter was false, return None. In the "multiple time series mode" it is a vector
2787            providing log_likelihood for each time series.
2788
2789        grad_log_likelihood: column vector or (grad_params_no, time_series_no) matrix
2790            If calc_grad_log_likelihood is true, return gradient of log likelihood
2791            with respect to parameters. It returns it column wise, so in
2792            "multiple time series mode" gradients for each time series is in the
2793            corresponding column.
2794
2795        AQcomp: object
2796            Contains some pre-computed values for converting continuos model into
2797            discrete one. It can be used later in the smoothing pahse.
2798        """
2799
2800        p_H = np.atleast_1d(p_H)
2801        p_R = np.atleast_1d(p_R)
2802
2803        X.shape, old_X_shape  = cls._reshape_input_data(X.shape, 2) # represent as column
2804        if (X.shape[1] != 1):
2805            raise ValueError("Only one dimensional X data is supported.")
2806
2807        Y.shape, old_Y_shape  = cls._reshape_input_data(Y.shape) # represent as column
2808
2809        state_dim = F.shape[0]
2810        measurement_dim = Y.shape[1]
2811        time_series_no = Y.shape[2] # multiple time series mode
2812
2813        if  ((len(p_H.shape) == 3) and (len(p_H.shape[2]) != 1)) or\
2814            ((len(p_R.shape) == 3) and (len(p_R.shape[2]) != 1)):
2815            model_matrices_chage_with_time = True
2816        else:
2817            model_matrices_chage_with_time = False
2818
2819        # Check index
2820        old_index_shape = None
2821        if index is None:
2822            if (len(p_H.shape) == 3) or (len(p_R.shape) == 3):
2823                raise ValueError("Parameter index can not be None for time varying matrices (third dimension is present)")
2824            else: # matrices do not change in time, so form dummy zero indices.
2825                index = np.zeros((1,Y.shape[0]))
2826        else:
2827            if len(index.shape) == 1:
2828                index.shape = (1,index.shape[0])
2829                old_index_shape = (index.shape[0],)
2830
2831            if (index.shape[1] != Y.shape[0]):
2832                raise ValueError("Number of measurements must be equal the number of H_{k}, R_{k}")
2833
2834        if (index.shape[0] == 1):
2835            H_time_var_index = 0; R_time_var_index = 0
2836        elif (index.shape[0] == 4):
2837            H_time_var_index = 0; R_time_var_index = 1
2838        else:
2839            raise ValueError("First Dimension of index must be either 1 or 2.")
2840
2841        (p_H, old_H_shape) = cls._check_SS_matrix(p_H, state_dim, measurement_dim, which='H')
2842        (p_R, old_R_shape) = cls._check_SS_matrix(p_R, state_dim, measurement_dim, which='R')
2843
2844        if m_init is None:
2845            m_init = np.zeros((state_dim, time_series_no))
2846        else:
2847            m_init = np.atleast_2d(m_init).T
2848
2849        if P_init is None:
2850            P_init = P_inf.copy()
2851
2852        if p_kalman_filter_type not in ('regular', 'svd'):
2853            raise ValueError("Kalman filer type neither 'regular nor 'svd'.")
2854
2855        # Functions to pass to the kalman_filter algorithm:
2856        # Parameters:
2857        # k - number of Kalman filter iteration
2858        # m - vector for calculating matrices. Required for EKF. Not used here.
2859        # f_hl = lambda k,m,H: np.dot(H, m)
2860        # f_H = lambda k,m,P: p_H[:,:, index[H_time_var_index, k]]
2861        #f_R = lambda k: p_R[:,:, index[R_time_var_index, k]]
2862        #o_R = R_handling( p_R, index, R_time_var_index, 20)
2863
2864        if calc_grad_log_likelihood:
2865
2866            dF = cls._check_grad_state_matrices(grad_calc_params.get('dF'), state_dim, grad_params_no, which = 'dA')
2867            dQc = cls._check_grad_state_matrices(grad_calc_params.get('dQc'), state_dim, grad_params_no, which = 'dQ')
2868            dP_inf = cls._check_grad_state_matrices(grad_calc_params.get('dP_inf'), state_dim, grad_params_no, which = 'dA')
2869
2870            dH = cls._check_grad_measurement_matrices(grad_calc_params.get('dH'), state_dim, grad_params_no, measurement_dim, which = 'dH')
2871            dR = cls._check_grad_measurement_matrices(grad_calc_params.get('dR'), state_dim, grad_params_no, measurement_dim, which = 'dR')
2872
2873            dm_init = grad_calc_params.get('dm_init') # Initial values for the Kalman Filter
2874            if dm_init is None:
2875                # multiple time series mode. Keep grad_params always as a last dimension
2876                dm_init = np.zeros( (state_dim, time_series_no, grad_params_no) )
2877
2878            dP_init = grad_calc_params.get('dP_init') # Initial values for the Kalman Filter
2879            if dP_init is None:
2880                dP_init = dP_inf(0).copy() # get the dP_init matrix, because now it is a function
2881
2882        else:
2883            dP_inf = None
2884            dF = None
2885            dQc = None
2886            dH = None
2887            dR = None
2888            dm_init = None
2889            dP_init = None
2890
2891        measurement_callables = Std_Measurement_Callables_Class(p_H, H_time_var_index, p_R, index, R_time_var_index, 20, dH, dR)
2892        #import pdb; pdb.set_trace()
2893
2894        dynamic_callables = cls._cont_to_discrete_object(X, F, L, Qc, compute_derivatives=calc_grad_log_likelihood,
2895                                              grad_params_no=grad_params_no,
2896                                              P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
2897
2898        if print_verbose:
2899            print("General: run Continuos-Discrete Kalman Filter")
2900        # Also for dH, dR and probably for all derivatives
2901        (M, P, log_likelihood, grad_log_likelihood, AQcomp) = cls._cont_discr_kalman_filter_raw(state_dim,
2902                        dynamic_callables, measurement_callables,
2903                        X, Y, m_init=m_init, P_init=P_init,
2904                        p_kalman_filter_type=p_kalman_filter_type,
2905                        calc_log_likelihood=calc_log_likelihood,
2906                        calc_grad_log_likelihood=calc_grad_log_likelihood, grad_params_no=grad_params_no,
2907                        dm_init=dm_init, dP_init=dP_init)
2908
2909        if old_index_shape is not None:
2910            index.shape = old_index_shape
2911
2912        if old_X_shape is not None:
2913            X.shape = old_X_shape
2914
2915        if old_Y_shape is not None:
2916            Y.shape = old_Y_shape
2917
2918        if old_H_shape is not None:
2919            p_H.shape = old_H_shape
2920
2921        if old_R_shape is not None:
2922            p_R.shape = old_R_shape
2923
2924        return (M, P, log_likelihood, grad_log_likelihood, AQcomp)
2925
2926    @classmethod
2927    def _cont_discr_kalman_filter_raw(cls,state_dim, p_dynamic_callables, p_measurement_callables, X, Y,
2928                                      m_init, P_init,
2929                                      p_kalman_filter_type='regular',
2930                                      calc_log_likelihood=False,
2931                      calc_grad_log_likelihood=False, grad_params_no=None,
2932                      dm_init=None, dP_init=None):
2933        """
2934        General filtering algorithm for inference in the continuos-discrete
2935        state-space model:
2936
2937            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
2938            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
2939
2940        Returns estimated filter distributions x_{k} ~ N(m_{k}, P(k))
2941
2942        Current Features:
2943        ----------------------------------------
2944
2945        1) Function support "multiple time series mode" which means that exactly
2946        the same State-Space model is used to filter several sets of measurements.
2947        In this case third dimension of Y should include these state-space measurements
2948        Log_likelihood and Grad_log_likelihood have the corresponding dimensions then.
2949
2950        2) Measurement may include missing values. In this case update step is
2951        not done for this measurement. (later may be changed)
2952
2953        Input:
2954        -----------------
2955        state_dim: int
2956            Demensionality of the states
2957
2958        F: (state_dim, state_dim) matrix
2959            F in the model.
2960
2961        L: (state_dim, noise_dim) matrix
2962            L in the model.
2963
2964        Qc: (noise_dim, noise_dim) matrix
2965            Q_c in the model.
2966
2967        P_inf: (state_dim, state_dim) matrix
2968            State varince matrix on infinity.
2969
2970        p_h: function (k, x_{k}, H_{k}). Measurement function.
2971            k (iteration number),
2972            x_{k}
2973            H_{k} Jacobian matrices of f_h. In the linear case it is exactly H_{k}.
2974
2975        f_H: function (k, m, P) return Jacobian of dynamic function, it is
2976            passed into p_h.
2977            k (iteration number),
2978            m: point where Jacobian is evaluated,
2979            P: parameter for Jacobian, usually covariance matrix.
2980
2981        p_f_R: function (k). Returns noise matrix of measurement equation
2982            on iteration k.
2983            k (iteration number).
2984
2985        m_init: vector or matrix
2986            Initial distribution mean. For "multiple time series mode"
2987            it is matrix, second dimension of which correspond to different
2988            time series. In regular case ("one time series mode") it is a
2989            vector.
2990
2991        P_init: matrix or scalar
2992            Initial covariance of the states. Must be not None
2993            "multiple time series mode" does not affect it, since it does not
2994            affect anything related to state variaces.
2995
2996        p_kalman_filter_type: string, one of ('regular', 'svd')
2997            Which Kalman Filter is used. Regular or SVD. SVD is more numerically
2998            stable, in particular, Covariace matrices are guarantied to be
2999            positive semi-definite. However, 'svd' works slower, especially for
3000            small data due to SVD call overhead.
3001
3002        calc_log_likelihood: boolean
3003            Whether to calculate marginal likelihood of the state-space model.
3004
3005        calc_grad_log_likelihood: boolean
3006            Whether to calculate gradient of the marginal likelihood
3007            of the state-space model. If true then the next parameter must
3008            provide the extra parameters for gradient calculation.
3009
3010        grad_params_no: int
3011            Number of gradient parameters
3012
3013        dP_inf, dF, dQc, dH, dR, dm_init, dP_init: matrices or 3D arrays.
3014            Necessary parameters for derivatives calculation.
3015
3016        """
3017
3018        #import pdb; pdb.set_trace()
3019        steps_no = Y.shape[0] # number of steps in the Kalman Filter
3020        time_series_no = Y.shape[2] # multiple time series mode
3021
3022        # Allocate space for results
3023        # Mean estimations. Initial values will be included
3024        M = np.empty(((steps_no+1),state_dim,time_series_no))
3025        M[0,:,:] = m_init # Initialize mean values
3026        # Variance estimations. Initial values will be included
3027        P = np.empty(((steps_no+1),state_dim,state_dim))
3028        P_init = 0.5*( P_init + P_init.T) # symmetrize initial covariance. In some ustable cases this is uiseful
3029        P[0,:,:] = P_init # Initialize initial covariance matrix
3030
3031        #import pdb;pdb.set_trace()
3032        if p_kalman_filter_type == 'svd':
3033            (U,S,Vh) = sp.linalg.svd( P_init,full_matrices=False, compute_uv=True,
3034                      overwrite_a=False,check_finite=True)
3035            S[ (S==0) ] = 1e-17 # allows to run algorithm for singular initial variance
3036            P_upd = (P_init, S,U)
3037        #log_likelihood = 0
3038        #grad_log_likelihood = np.zeros((grad_params_no,1))
3039        log_likelihood = 0 if calc_log_likelihood else None
3040        grad_log_likelihood = 0 if calc_grad_log_likelihood else None
3041
3042        #setting initial values for derivatives update
3043        dm_upd = dm_init
3044        dP_upd = dP_init
3045        # Main loop of the Kalman filter
3046        for k in range(0,steps_no):
3047            # In this loop index for new estimations is (k+1), old - (k)
3048            # This happened because initial values are stored at 0-th index.
3049            #import pdb; pdb.set_trace()
3050
3051            prev_mean = M[k,:,:] # mean from the previous step
3052
3053            if p_kalman_filter_type == 'svd':
3054                m_pred, P_pred, dm_pred, dP_pred = \
3055                cls._kalman_prediction_step_SVD(k, prev_mean ,P_upd, p_dynamic_callables,
3056                    calc_grad_log_likelihood=calc_grad_log_likelihood,
3057                    p_dm = dm_upd, p_dP = dP_upd)
3058            else:
3059                m_pred, P_pred, dm_pred, dP_pred = \
3060                cls._kalman_prediction_step(k, prev_mean ,P[k,:,:], p_dynamic_callables,
3061                    calc_grad_log_likelihood=calc_grad_log_likelihood,
3062                    p_dm = dm_upd, p_dP = dP_upd )
3063
3064            #import pdb; pdb.set_trace()
3065            k_measurment = Y[k,:,:]
3066
3067            if (np.any(np.isnan(k_measurment)) == False):
3068
3069                if p_kalman_filter_type == 'svd':
3070                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
3071                    cls._kalman_update_step_SVD(k,  m_pred , P_pred, p_measurement_callables,
3072                            k_measurment, calc_log_likelihood=calc_log_likelihood,
3073                            calc_grad_log_likelihood=calc_grad_log_likelihood,
3074                            p_dm = dm_pred, p_dP = dP_pred )
3075
3076
3077    #                m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
3078    #                cls._kalman_update_step(k,  m_pred , P_pred[0], f_h, f_H, p_R.f_R, k_measurment,
3079    #                        calc_log_likelihood=calc_log_likelihood,
3080    #                        calc_grad_log_likelihood=calc_grad_log_likelihood,
3081    #                        p_dm = dm_pred, p_dP = dP_pred, grad_calc_params_2 = (dH, dR))
3082    #
3083    #                (U,S,Vh) = sp.linalg.svd( P_upd,full_matrices=False, compute_uv=True,
3084    #                      overwrite_a=False,check_finite=True)
3085    #                P_upd = (P_upd, S,U)
3086                else:
3087                    m_upd, P_upd, log_likelihood_update, dm_upd, dP_upd, d_log_likelihood_update = \
3088                    cls._kalman_update_step(k,  m_pred , P_pred, p_measurement_callables, k_measurment,
3089                            calc_log_likelihood=calc_log_likelihood,
3090                            calc_grad_log_likelihood=calc_grad_log_likelihood,
3091                            p_dm = dm_pred, p_dP = dP_pred )
3092            else:
3093                if k_measurment.shape != (1,1):
3094                    raise ValueError("Nan measurements are currently not supported for \
3095                                     multidimensional output and multiple tiem series.")
3096                else:
3097                    m_upd = m_pred; P_upd = P_pred; dm_upd = dm_pred; dP_upd = dP_pred
3098                    log_likelihood_update = 0.0;
3099                    d_log_likelihood_update = 0.0;
3100
3101
3102            if calc_log_likelihood:
3103                log_likelihood += log_likelihood_update
3104
3105            if calc_grad_log_likelihood:
3106                grad_log_likelihood += d_log_likelihood_update
3107
3108            M[k+1,:,:] = m_upd # separate mean value for each time series
3109
3110            if p_kalman_filter_type == 'svd':
3111                P[k+1,:,:] = P_upd[0]
3112            else:
3113                P[k+1,:,:] = P_upd
3114            #print("kf it: %i" % k)
3115            # !!!Print statistics! Print sizes of matrices
3116            # !!!Print statistics! Print iteration time base on another boolean variable
3117        return (M, P, log_likelihood, grad_log_likelihood, p_dynamic_callables.reset(False))
3118
3119    @classmethod
3120    def cont_discr_rts_smoother(cls,state_dim, filter_means, filter_covars,
3121                                p_dynamic_callables=None, X=None, F=None,L=None,Qc=None):
3122        """
3123
3124        Continuos-discrete Rauch–Tung–Striebel(RTS) smoother.
3125
3126        This function implements Rauch–Tung–Striebel(RTS) smoother algorithm
3127        based on the results of _cont_discr_kalman_filter_raw.
3128
3129        Model:
3130            d/dt x(t) =  F * x(t) + L * w(t);         w(t) ~ N(0, Qc)
3131            y_{k} = H_{k} * x_{k} + r_{k};            r_{k-1} ~ N(0, R_{k})
3132
3133        Returns estimated smoother distributions x_{k} ~ N(m_{k}, P(k))
3134
3135        Input:
3136        --------------
3137
3138        filter_means: (no_steps+1,state_dim) matrix or (no_steps+1,state_dim, time_series_no) 3D array
3139            Results of the Kalman Filter means estimation.
3140
3141        filter_covars: (no_steps+1, state_dim, state_dim) 3D array
3142            Results of the Kalman Filter covariance estimation.
3143
3144        Dynamic_callables: object or None
3145            Object form the filter phase which provides functions for computing
3146            A, Q, dA, dQ fro  discrete model from the continuos model.
3147
3148         X, F, L, Qc: matrices
3149             If AQcomp is None, these matrices are used to create this object from scratch.
3150
3151        Output:
3152        -------------
3153
3154        M: (no_steps+1,state_dim) matrix
3155            Smoothed estimates of the state means
3156
3157        P: (no_steps+1,state_dim, state_dim) 3D array
3158            Smoothed estimates of the state covariances
3159        """
3160
3161        f_a = lambda k,m,A: np.dot(A, m) # state dynamic model
3162        if p_dynamic_callables is None: # make this object from scratch
3163            p_dynamic_callables = cls._cont_to_discrete_object(cls, X, F,L,Qc,f_a,compute_derivatives=False,
3164                                                  grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None)
3165
3166        no_steps = filter_covars.shape[0]-1# number of steps (minus initial covariance)
3167
3168        M = np.empty(filter_means.shape) # smoothed means
3169        P = np.empty(filter_covars.shape) # smoothed covars
3170
3171        if print_verbose:
3172            print("General: run Continuos-Discrete Kalman Smoother")
3173
3174        M[-1,:,:] = filter_means[-1,:,:]
3175        P[-1,:,:] = filter_covars[-1,:,:]
3176        for k in range(no_steps-1,-1,-1):
3177
3178            prev_mean = filter_means[k,:] # mean from the previous step
3179            m_pred, P_pred, tmp1, tmp2 = \
3180                    cls._kalman_prediction_step(k, prev_mean,
3181                                                filter_covars[k,:,:], p_dynamic_callables,
3182                                                calc_grad_log_likelihood=False)
3183            p_m = filter_means[k,:]
3184            p_m_prev_step = M[(k+1),:]
3185
3186            m_upd, P_upd, tmp_G = cls._rts_smoother_update_step(k,
3187                            p_m ,filter_covars[k,:,:],
3188                            m_pred, P_pred, p_m_prev_step ,P[(k+1),:,:], p_dynamic_callables)
3189
3190            M[k,:,:] = m_upd
3191            P[k,:,:] = P_upd
3192        # Return values
3193        return (M, P)
3194
3195    @classmethod
3196    def _cont_to_discrete_object(cls, X, F, L, Qc, compute_derivatives=False,
3197                                 grad_params_no=None,
3198                                 P_inf=None, dP_inf=None, dF = None, dQc=None,
3199                                 dt0=None):
3200        """
3201        Function return the object which is used in Kalman filter and/or
3202        smoother to obtain matrices A, Q and their derivatives for discrete model
3203        from the continuous model.
3204
3205        There are 2 objects AQcompute_once and AQcompute_batch and the function
3206        returs the appropriate one based on the number of different time steps.
3207
3208        Input:
3209        ----------------------
3210        X, F, L, Qc: matrices
3211            Continuous model matrices
3212
3213        f_a: function
3214            Dynamic Function is attached to the Dynamic_Model_Callables class
3215        compute_derivatives: boolean
3216            Whether to compute derivatives
3217
3218        grad_params_no: int
3219            Number of parameters in the gradient
3220
3221        P_inf, dP_inf, dF, dQ: matrices and 3D objects
3222            Data necessary to compute derivatives.
3223
3224        Output:
3225        --------------------------
3226        AQcomp: object
3227            Its methods return matrices (and optionally derivatives) for the
3228            discrete state-space model.
3229
3230        """
3231
3232        unique_round_decimals = 10
3233        threshold_number_of_unique_time_steps = 20 # above which matrices are separately each time
3234        dt = np.empty((X.shape[0],))
3235        dt[1:] = np.diff(X[:,0],axis=0)
3236        if dt0 is None:
3237            dt[0]  = 0#dt[1]
3238        else:
3239            if isinstance(dt0,str):
3240                dt = dt[1:]
3241            else:
3242                dt[0] = dt0
3243
3244        unique_indices = np.unique(np.round(dt, decimals=unique_round_decimals))
3245        number_unique_indices = len(unique_indices)
3246
3247        #import pdb; pdb.set_trace()
3248        if use_cython:
3249            class AQcompute_batch(state_space_cython.AQcompute_batch_Cython):
3250                def __init__(self, F,L,Qc,dt,compute_derivatives=False, grad_params_no=None, P_inf=None, dP_inf=None, dF = None, dQc=None):
3251                    As, Qs, reconstruct_indices, dAs, dQs = ContDescrStateSpace.lti_sde_to_descrete(F,
3252                                L,Qc,dt,compute_derivatives,
3253                                grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
3254
3255                    super(AQcompute_batch,self).__init__(As, Qs, reconstruct_indices, dAs,dQs)
3256        else:
3257            AQcompute_batch = cls.AQcompute_batch_Python
3258
3259        if number_unique_indices > threshold_number_of_unique_time_steps:
3260            AQcomp = cls.AQcompute_once(F,L,Qc, dt,compute_derivatives=compute_derivatives,
3261                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
3262            if print_verbose:
3263                print("CDO:  Continue-to-discrete INSTANTANEOUS object is created.")
3264                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )
3265
3266        else:
3267            AQcomp = AQcompute_batch(F,L,Qc,dt,compute_derivatives=compute_derivatives,
3268                                    grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF=dF, dQc=dQc)
3269            if print_verbose:
3270                print("CDO:  Continue-to-discrete BATCH object is created.")
3271                print("CDO:  Number of different time steps: %i" % (number_unique_indices,) )
3272                print("CDO:  Total size if its data: %i" % (AQcomp.total_size_of_data,) )
3273
3274        return AQcomp
3275
3276    @staticmethod
3277    def lti_sde_to_descrete(F,L,Qc,dt,compute_derivatives=False,
3278                            grad_params_no=None, P_inf=None,
3279                            dP_inf=None, dF = None, dQc=None):
3280        """
3281        Linear Time-Invariant Stochastic Differential Equation (LTI SDE):
3282
3283            dx(t) = F x(t) dt + L d \beta  ,where
3284
3285                x(t): (vector) stochastic process
3286                \beta: (vector) Brownian motion process
3287                F, L: (time invariant) matrices of corresponding dimensions
3288                Qc: covariance of noise.
3289
3290        This function rewrites it into the corresponding state-space form:
3291
3292            x_{k} =  A_{k} * x_{k-1} + q_{k-1};       q_{k-1} ~ N(0, Q_{k-1})
3293
3294        TODO: this function can be redone to "preprocess dataset", when
3295        close time points are handeled properly (with rounding parameter) and
3296        values are averaged accordingly.
3297
3298        Input:
3299        --------------
3300        F,L: LTI SDE matrices of corresponding dimensions
3301
3302        Qc: matrix (n,n)
3303            Covarince between different dimensions of noise \beta.
3304            n is the dimensionality of the noise.
3305
3306        dt: double or iterable
3307            Time difference used on this iteration.
3308            If dt is iterable, then A and Q_noise are computed for every
3309            unique dt
3310
3311        compute_derivatives: boolean
3312            Whether derivatives of A and Q are required.
3313
3314        grad_params_no: int
3315            Number of gradient parameters
3316
3317        P_inf: (state_dim. state_dim) matrix
3318
3319        dP_inf
3320
3321        dF: 3D array
3322            Derivatives of F
3323
3324        dQc: 3D array
3325            Derivatives of Qc
3326
3327        dR: 3D array
3328            Derivatives of R
3329
3330        Output:
3331        --------------
3332
3333        A: matrix
3334            A_{k}. Because we have LTI SDE only dt can affect on matrix
3335            difference for different k.
3336
3337        Q_noise: matrix
3338            Covariance matrix of (vector) q_{k-1}. Only dt can affect the
3339            matrix difference for different k.
3340
3341        reconstruct_index: array
3342            If dt was iterable return three dimensinal arrays A and Q_noise.
3343            Third dimension of these arrays correspond to unique dt's.
3344            This reconstruct_index contain indices of the original dt's
3345            in the uninue dt sequence. A[:,:, reconstruct_index[5]]
3346            is matrix A of 6-th(indices start from zero) dt in the original
3347            sequence.
3348        dA: 3D array
3349            Derivatives of A
3350
3351        dQ: 3D array
3352            Derivatives of Q
3353        """
3354        # Dimensionality
3355        n = F.shape[0]
3356
3357        if not isinstance(dt, collections.Iterable): # not iterable, scalar
3358            #import pdb; pdb.set_trace()
3359            # The dynamical model
3360            A  = matrix_exponent(F*dt)
3361
3362            # The covariance matrix Q by matrix fraction decomposition ->
3363            Phi = np.zeros((2*n,2*n))
3364            Phi[:n,:n] = F
3365            Phi[:n,n:] = L.dot(Qc).dot(L.T)
3366            Phi[n:,n:] = -F.T
3367            AB = matrix_exponent(Phi*dt)
3368            AB = np.dot(AB, np.vstack((np.zeros((n,n)),np.eye(n))))
3369
3370            Q_noise_1 = linalg.solve(AB[n:,:].T,AB[:n,:].T)
3371            Q_noise_2  = P_inf - A.dot(P_inf).dot(A.T)
3372            # The covariance matrix Q by matrix fraction decomposition <-
3373
3374            if compute_derivatives:
3375                dA = np.zeros([n, n, grad_params_no])
3376                dQ = np.zeros([n, n, grad_params_no])
3377
3378                #AA  = np.zeros([2*n, 2*n, nparam])
3379                FF  = np.zeros([2*n, 2*n])
3380                AA = np.zeros([2*n, 2*n, grad_params_no])
3381
3382                for p in range(0, grad_params_no):
3383
3384                    FF[:n,:n] = F
3385                    FF[n:,:n] = dF[:,:,p]
3386                    FF[n:,n:] = F
3387
3388                    # Solve the matrix exponential
3389                    AA[:,:,p] = matrix_exponent(FF*dt)
3390
3391                    # Solve the differential equation
3392                    #foo         = AA[:,:,p].dot(np.vstack([m, dm[:,p]]))
3393                    #mm          = foo[:n,:]
3394                    #dm[:,p] = foo[n:,:]
3395
3396                    # The discrete-time dynamical model*
3397                    if p==0:
3398                        A  = AA[:n,:n,p]
3399                        Q_noise_3  = P_inf - A.dot(P_inf).dot(A.T)
3400                        Q_noise = Q_noise_3
3401                        #PP = A.dot(P).dot(A.T) + Q_noise_2
3402
3403                    # The derivatives of A and Q
3404                    dA[:,:,p] = AA[n:,:n,p]
3405                    tmp = dA[:,:,p].dot(P_inf).dot(A.T)
3406                    dQ[:,:,p] = dP_inf[:,:,p] - tmp \
3407                       - A.dot(dP_inf[:,:,p]).dot(A.T) - tmp.T
3408
3409                    dQ[:,:,p] = 0.5*(dQ[:,:,p] + dQ[:,:,p].T) # Symmetrize
3410            else:
3411              dA = None
3412              dQ = None
3413              Q_noise = Q_noise_2
3414	      # Innacuracies have been observed when Q_noise_1 was used.
3415
3416            #Q_noise = Q_noise_1
3417
3418            Q_noise = 0.5*(Q_noise + Q_noise.T) # Symmetrize
3419            return A, Q_noise,None, dA, dQ
3420
3421        else: # iterable, array
3422
3423            # Time discretizations (round to 14 decimals to avoid problems)
3424            dt_unique, tmp, reconstruct_index = np.unique(np.round(dt,8),
3425                                        return_index=True,return_inverse=True)
3426            del tmp
3427            # Allocate space for A and Q
3428            A = np.empty((n,n,dt_unique.shape[0]))
3429            Q_noise = np.empty((n,n,dt_unique.shape[0]))
3430
3431            if compute_derivatives:
3432                dA = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
3433                dQ = np.empty((n,n,grad_params_no,dt_unique.shape[0]))
3434            else:
3435                dA = None
3436                dQ = None
3437            # Call this function for each unique dt
3438            for j in range(0,dt_unique.shape[0]):
3439                A[:,:,j], Q_noise[:,:,j], tmp1, dA_t, dQ_t = ContDescrStateSpace.lti_sde_to_descrete(F,L,Qc,dt_unique[j],
3440                    compute_derivatives=compute_derivatives, grad_params_no=grad_params_no, P_inf=P_inf, dP_inf=dP_inf, dF = dF, dQc=dQc)
3441                if compute_derivatives:
3442                    dA[:,:,:,j] = dA_t
3443                    dQ[:,:,:,j] = dQ_t
3444
3445            # Return
3446            return A, Q_noise, reconstruct_index, dA, dQ
3447
3448def matrix_exponent(M):
3449    """
3450    The function computes matrix exponent and handles some special cases
3451    """
3452
3453    if (M.shape[0] == 1): # 1*1 matrix
3454        Mexp = np.array( ((np.exp(M[0,0]) ,),) )
3455
3456    else: # matrix is larger
3457        method = None
3458        try:
3459            Mexp = linalg.expm(M)
3460            method = 1
3461        except (Exception,) as e:
3462            Mexp = linalg.expm3(M)
3463            method = 2
3464        finally:
3465            if np.any(np.isnan(Mexp)):
3466                if method == 2:
3467                    raise ValueError("Matrix Exponent is not computed 1")
3468                else:
3469                    Mexp = linalg.expm3(M)
3470                    method = 2
3471                    if np.any(np.isnan(Mexp)):
3472                        raise ValueError("Matrix Exponent is not computed 2")
3473
3474    return Mexp
3475
3476def balance_matrix(A):
3477    """
3478    Balance matrix, i.e. finds such similarity transformation of the original
3479    matrix A:  A = T * bA * T^{-1}, where norms of columns of bA and of rows of bA
3480    are as close as possible. It is usually used as a preprocessing step in
3481    eigenvalue calculation routine. It is useful also for State-Space models.
3482
3483    See also:
3484        [1] Beresford N. Parlett and Christian Reinsch (1969). Balancing
3485            a matrix for calculation of eigenvalues and eigenvectors.
3486            Numerische Mathematik, 13(4): 293-304.
3487
3488    Input:
3489    ----------------------
3490    A: square matrix
3491        Matrix to be balanced
3492
3493    Output:
3494    ----------------
3495        bA: matrix
3496            Balanced matrix
3497
3498        T: matrix
3499            Left part of the similarity transformation
3500
3501        T_inv: matrix
3502            Right part of the similarity transformation.
3503    """
3504
3505    if len(A.shape) != 2 or (A.shape[0] != A.shape[1]):
3506        raise ValueError('balance_matrix: Expecting square matrix')
3507
3508    N = A.shape[0] # matrix size
3509
3510    gebal = sp.linalg.lapack.get_lapack_funcs('gebal',(A,))
3511    bA, lo, hi, pivscale, info = gebal(A, permute=True, scale=True,overwrite_a=False)
3512    if info < 0:
3513        raise ValueError('balance_matrix: Illegal value in %d-th argument of internal gebal ' % -info)
3514    # calculating the similarity transforamtion:
3515    def perm_matr(D, c1,c2):
3516        """
3517        Function creates the permutation matrix which swaps columns c1 and c2.
3518
3519        Input:
3520        --------------
3521        D: int
3522            Size of the permutation matrix
3523        c1: int
3524            Column 1. Numeration starts from 1...D
3525        c2: int
3526            Column 2. Numeration starts from 1...D
3527        """
3528        i1 = c1-1; i2 = c2-1 # indices
3529        P = np.eye(D);
3530        P[i1,i1] = 0.0; P[i2,i2] = 0.0; # nullify diagonal elements
3531        P[i1,i2] = 1.0; P[i2,i1] = 1.0
3532
3533        return P
3534
3535    P = np.eye(N) # permutation matrix
3536    if (hi != N-1): # there are row permutations
3537        for k in range(N-1,hi,-1):
3538            new_perm = perm_matr(N, k+1, pivscale[k])
3539            P = np.dot(P,new_perm)
3540    if (lo != 0):
3541        for k in range(0,lo,1):
3542            new_perm = perm_matr(N, k+1, pivscale[k])
3543            P = np.dot(P,new_perm)
3544    D = pivscale.copy()
3545    D[0:lo] = 1.0; D[hi+1:N] = 1.0 # thesee scaling factors must be set to one.
3546    #D = np.diag(D) # make a diagonal matrix
3547
3548    T = np.dot(P,np.diag(D)) # similarity transformation in question
3549    T_inv = np.dot(np.diag(D**(-1)),P.T)
3550
3551    #print( np.max(A - np.dot(T, np.dot(bA, T_inv) )) )
3552    return bA.copy(), T, T_inv
3553
3554def balance_ss_model(F,L,Qc,H,Pinf,P0,dF=None,dQc=None,dPinf=None,dP0=None):
3555    """
3556    Balances State-Space model for more numerical stability
3557
3558    This is based on the following:
3559
3560     dx/dt = F x + L w
3561         y = H x
3562
3563     Let T z = x, which gives
3564
3565     dz/dt = inv(T) F T z + inv(T) L w
3566         y = H T z
3567    """
3568
3569    bF,T,T_inv = balance_matrix(F)
3570
3571    bL = np.dot( T_inv, L)
3572    bQc = Qc # not affected
3573
3574    bH = np.dot(H, T)
3575
3576    bPinf = np.dot(T_inv, np.dot(Pinf, T_inv.T))
3577
3578    #import pdb; pdb.set_trace()
3579#    LL,islower = linalg.cho_factor(Pinf)
3580#    inds = np.triu_indices(Pinf.shape[0],k=1)
3581#    LL[inds] = 0.0
3582#    bLL = np.dot(T_inv, LL)
3583#    bPinf = np.dot( bLL, bLL.T)
3584
3585    bP0 = np.dot(T_inv, np.dot(P0, T_inv.T))
3586
3587    if dF is not None:
3588        bdF = np.zeros(dF.shape)
3589        for i in range(dF.shape[2]):
3590            bdF[:,:,i] = np.dot( T_inv, np.dot( dF[:,:,i], T))
3591
3592    else:
3593        bdF = None
3594
3595    if dPinf is not None:
3596        bdPinf = np.zeros(dPinf.shape)
3597        for i in range(dPinf.shape[2]):
3598            bdPinf[:,:,i] = np.dot( T_inv, np.dot( dPinf[:,:,i], T_inv.T))
3599
3600#            LL,islower = linalg.cho_factor(dPinf[:,:,i])
3601#            inds = np.triu_indices(dPinf[:,:,i].shape[0],k=1)
3602#            LL[inds] = 0.0
3603#            bLL = np.dot(T_inv, LL)
3604#            bdPinf[:,:,i] = np.dot( bLL, bLL.T)
3605
3606
3607    else:
3608        bdPinf = None
3609
3610    if dP0 is not None:
3611        bdP0 = np.zeros(dP0.shape)
3612        for i in range(dP0.shape[2]):
3613            bdP0[:,:,i] = np.dot( T_inv, np.dot( dP0[:,:,i], T_inv.T))
3614    else:
3615        bdP0 = None
3616
3617
3618    bdQc = dQc # not affected
3619
3620    # (F,L,Qc,H,Pinf,P0,dF,dQc,dPinf,dP0)
3621
3622    return bF, bL, bQc, bH, bPinf, bP0, bdF, bdQc, bdPinf, bdP0
3623