1import math
2import numpy as np
3from scipy import linalg, stats, special
4
5from .linalg_decomp_1 import SvdArray
6
7
8#univariate standard normal distribution
9#following from scipy.stats.distributions with adjustments
10sqrt2pi = math.sqrt(2 * np.pi)
11logsqrt2pi = math.log(sqrt2pi)
12
13class StandardNormal(object):
14    '''Distribution of vector x, with independent distribution N(0,1)
15
16    this is the same as univariate normal for pdf and logpdf
17
18    other methods not checked/adjusted yet
19
20    '''
21    def rvs(self, size):
22        return np.random.standard_normal(size)
23    def pdf(self, x):
24        return np.exp(-x**2 * 0.5) / sqrt2pi
25    def logpdf(self, x):
26        return -x**2 * 0.5 - logsqrt2pi
27    def _cdf(self, x):
28        return special.ndtr(x)
29    def _logcdf(self, x):
30        return np.log(special.ndtr(x))
31    def _ppf(self, q):
32        return special.ndtri(q)
33
34
35class AffineTransform(object):
36    '''affine full rank transformation of a multivariate distribution
37
38    no dimension checking, assumes everything broadcasts correctly
39    first version without bound support
40
41    provides distribution of y given distribution of x
42    y = const + tmat * x
43
44    '''
45    def __init__(self, const, tmat, dist):
46        self.const = const
47        self.tmat = tmat
48        self.dist = dist
49        self.nrv = len(const)
50        if not np.equal(self.nrv, tmat.shape).all():
51            raise ValueError('dimension of const and tmat do not agree')
52
53        #replace the following with a linalgarray class
54        self.tmatinv = linalg.inv(tmat)
55        self.absdet = np.abs(np.linalg.det(self.tmat))
56        self.logabsdet = np.log(np.abs(np.linalg.det(self.tmat)))
57        self.dist
58
59    def rvs(self, size):
60        #size can only be integer not yet tuple
61        print((size,)+(self.nrv,))
62        return self.transform(self.dist.rvs(size=(size,)+(self.nrv,)))
63
64    def transform(self, x):
65        #return np.dot(self.tmat, x) + self.const
66        return np.dot(x, self.tmat) + self.const
67
68    def invtransform(self, y):
69        return np.dot(self.tmatinv, y - self.const)
70
71    def pdf(self, x):
72        return 1. / self.absdet * self.dist.pdf(self.invtransform(x))
73
74    def logpdf(self, x):
75        return - self.logabsdet + self.dist.logpdf(self.invtransform(x))
76
77
78
79
80class MultivariateNormalChol(object):
81    '''multivariate normal distribution with cholesky decomposition of sigma
82
83    ignoring mean at the beginning, maybe
84
85    needs testing for broadcasting to contemporaneously but not intertemporaly
86    correlated random variable, which axis?,
87    maybe swapaxis or rollaxis if x.ndim != mean.ndim == (sigma.ndim - 1)
88
89    initially 1d is ok, 2d should work with iid in axis 0 and mvn in axis 1
90
91    '''
92
93    def __init__(self, mean, sigma):
94        self.mean = mean
95        self.sigma = sigma
96        self.sigmainv = sigmainv
97        self.cholsigma = linalg.cholesky(sigma)
98        #the following makes it lower triangular with increasing time
99        self.cholsigmainv = linalg.cholesky(sigmainv)[::-1,::-1]
100        #todo: this might be a trick todo backward instead of forward filtering
101
102    def whiten(self, x):
103        return np.dot(cholsigmainv, x)
104
105    def logpdf_obs(self, x):
106        x = x - self.mean
107        x_whitened = self.whiten(x)
108
109        #sigmainv = linalg.cholesky(sigma)
110        logdetsigma = np.log(np.linalg.det(sigma))
111
112        sigma2 = 1. # error variance is included in sigma
113
114        llike  =  0.5 * (np.log(sigma2)
115                         - 2.* np.log(np.diagonal(self.cholsigmainv))
116                         + (x_whitened**2)/sigma2
117                         +  np.log(2*np.pi))
118
119        return llike
120
121    def logpdf(self, x):
122        return self.logpdf_obs(x).sum(-1)
123
124    def pdf(self, x):
125        return np.exp(self.logpdf(x))
126
127
128
129class MultivariateNormal(object):
130
131    def __init__(self, mean, sigma):
132        self.mean = mean
133        self.sigma = SvdArray(sigma)
134
135
136
137
138def loglike_ar1(x, rho):
139    '''loglikelihood of AR(1) process, as a test case
140
141    sigma_u partially hard coded
142
143    Greene chapter 12 eq. (12-31)
144    '''
145    x = np.asarray(x)
146    u = np.r_[x[0], x[1:] - rho * x[:-1]]
147    sigma_u2 = 2*(1-rho**2)
148    loglik = 0.5*(-(u**2).sum(0) / sigma_u2 + np.log(1-rho**2)
149                  - x.shape[0] * (np.log(2*np.pi) + np.log(sigma_u2)))
150    return loglik
151
152
153def ar2transform(x, arcoefs):
154    '''
155
156    (Greene eq 12-30)
157    '''
158    a1, a2 = arcoefs
159    y = np.zeros_like(x)
160    y[0] = np.sqrt((1+a2) * ((1-a2)**2 - a1**2) / (1-a2)) * x[0]
161    y[1] = np.sqrt(1-a2**2) * x[2] - a1 * np.sqrt(1-a1**2)/(1-a2) * x[1] #TODO:wrong index in x
162    y[2:] = x[2:] - a1 * x[1:-1] - a2 * x[:-2]
163    return y
164
165
166def mvn_loglike(x, sigma):
167    '''loglike multivariate normal
168
169    assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)
170
171    brute force from formula
172    no checking of correct inputs
173    use of inv and log-det should be replace with something more efficient
174    '''
175    #see numpy thread
176    #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
177    sigmainv = linalg.inv(sigma)
178    logdetsigma = np.log(np.linalg.det(sigma))
179    nobs = len(x)
180
181    llf = - np.dot(x, np.dot(sigmainv, x))
182    llf -= nobs * np.log(2 * np.pi)
183    llf -= logdetsigma
184    llf *= 0.5
185    return llf
186
187def mvn_nloglike_obs(x, sigma):
188    '''loglike multivariate normal
189
190    assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)
191
192    brute force from formula
193    no checking of correct inputs
194    use of inv and log-det should be replace with something more efficient
195    '''
196    #see numpy thread
197    #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
198
199    #Still wasteful to calculate pinv first
200    sigmainv = linalg.inv(sigma)
201    cholsigmainv = linalg.cholesky(sigmainv)
202    #2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist
203    # logdet not needed ???
204    #logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
205    x_whitened = np.dot(cholsigmainv, x)
206
207    #sigmainv = linalg.cholesky(sigma)
208    logdetsigma = np.log(np.linalg.det(sigma))
209
210    sigma2 = 1. # error variance is included in sigma
211
212    llike  =  0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv))
213                          + (x_whitened**2)/sigma2
214                          +  np.log(2*np.pi))
215
216    return llike, (x_whitened**2)
217
218nobs = 10
219x = np.arange(nobs)
220autocov = 2*0.8**np.arange(nobs)# +0.01 * np.random.randn(nobs)
221sigma = linalg.toeplitz(autocov)
222#sigma = np.diag(1+np.random.randn(10)**2)
223
224cholsigma = linalg.cholesky(sigma).T#, lower=True)
225
226sigmainv = linalg.inv(sigma)
227cholsigmainv = linalg.cholesky(sigmainv)
228#2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist
229# logdet not needed ???
230#logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
231x_whitened = np.dot(cholsigmainv, x)
232
233#sigmainv = linalg.cholesky(sigma)
234logdetsigma = np.log(np.linalg.det(sigma))
235
236sigma2 = 1. # error variance is included in sigma
237
238llike  =  0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv))
239                      + (x_whitened**2)/sigma2
240                      +  np.log(2*np.pi))
241
242ll, ls = mvn_nloglike_obs(x, sigma)
243#the following are all the same for diagonal sigma
244print(ll.sum(), 'll.sum()')
245print(llike.sum(), 'llike.sum()')
246print(np.log(stats.norm._pdf(x_whitened)).sum() - 0.5 * logdetsigma,)
247print('stats whitened')
248print(np.log(stats.norm.pdf(x,scale=np.sqrt(np.diag(sigma)))).sum(),)
249print('stats scaled')
250print(0.5*(np.dot(linalg.cho_solve((linalg.cho_factor(sigma, lower=False)[0].T,
251                                    False),x.T), x)
252           + nobs*np.log(2*np.pi)
253           - 2.* np.log(np.diagonal(cholsigmainv)).sum()))
254print(0.5*(np.dot(linalg.cho_solve((linalg.cho_factor(sigma)[0].T, False),x.T), x) + nobs*np.log(2*np.pi)- 2.* np.log(np.diagonal(cholsigmainv)).sum()))
255print(0.5*(np.dot(linalg.cho_solve(linalg.cho_factor(sigma),x.T), x) + nobs*np.log(2*np.pi)- 2.* np.log(np.diagonal(cholsigmainv)).sum()))
256print(mvn_loglike(x, sigma))
257
258
259normtransf = AffineTransform(np.zeros(nobs), cholsigma, StandardNormal())
260print(normtransf.logpdf(x_whitened).sum())
261#print(normtransf.rvs(5)
262print(loglike_ar1(x, 0.8))
263
264mch = MultivariateNormalChol(np.zeros(nobs), sigma)
265print(mch.logpdf(x))
266
267#from .linalg_decomp_1 import tiny2zero
268#print(tiny2zero(mch.cholsigmainv / mch.cholsigmainv[-1,-1])
269
270xw = mch.whiten(x)
271print('xSigmax', np.dot(xw,xw))
272print('xSigmax', np.dot(x,linalg.cho_solve(linalg.cho_factor(mch.sigma),x)))
273print('xSigmax', np.dot(x,linalg.cho_solve((mch.cholsigma, False),x)))
274