1import math 2import numpy as np 3from scipy import linalg, stats, special 4 5from .linalg_decomp_1 import SvdArray 6 7 8#univariate standard normal distribution 9#following from scipy.stats.distributions with adjustments 10sqrt2pi = math.sqrt(2 * np.pi) 11logsqrt2pi = math.log(sqrt2pi) 12 13class StandardNormal(object): 14 '''Distribution of vector x, with independent distribution N(0,1) 15 16 this is the same as univariate normal for pdf and logpdf 17 18 other methods not checked/adjusted yet 19 20 ''' 21 def rvs(self, size): 22 return np.random.standard_normal(size) 23 def pdf(self, x): 24 return np.exp(-x**2 * 0.5) / sqrt2pi 25 def logpdf(self, x): 26 return -x**2 * 0.5 - logsqrt2pi 27 def _cdf(self, x): 28 return special.ndtr(x) 29 def _logcdf(self, x): 30 return np.log(special.ndtr(x)) 31 def _ppf(self, q): 32 return special.ndtri(q) 33 34 35class AffineTransform(object): 36 '''affine full rank transformation of a multivariate distribution 37 38 no dimension checking, assumes everything broadcasts correctly 39 first version without bound support 40 41 provides distribution of y given distribution of x 42 y = const + tmat * x 43 44 ''' 45 def __init__(self, const, tmat, dist): 46 self.const = const 47 self.tmat = tmat 48 self.dist = dist 49 self.nrv = len(const) 50 if not np.equal(self.nrv, tmat.shape).all(): 51 raise ValueError('dimension of const and tmat do not agree') 52 53 #replace the following with a linalgarray class 54 self.tmatinv = linalg.inv(tmat) 55 self.absdet = np.abs(np.linalg.det(self.tmat)) 56 self.logabsdet = np.log(np.abs(np.linalg.det(self.tmat))) 57 self.dist 58 59 def rvs(self, size): 60 #size can only be integer not yet tuple 61 print((size,)+(self.nrv,)) 62 return self.transform(self.dist.rvs(size=(size,)+(self.nrv,))) 63 64 def transform(self, x): 65 #return np.dot(self.tmat, x) + self.const 66 return np.dot(x, self.tmat) + self.const 67 68 def invtransform(self, y): 69 return np.dot(self.tmatinv, y - self.const) 70 71 def pdf(self, x): 72 return 1. / self.absdet * self.dist.pdf(self.invtransform(x)) 73 74 def logpdf(self, x): 75 return - self.logabsdet + self.dist.logpdf(self.invtransform(x)) 76 77 78 79 80class MultivariateNormalChol(object): 81 '''multivariate normal distribution with cholesky decomposition of sigma 82 83 ignoring mean at the beginning, maybe 84 85 needs testing for broadcasting to contemporaneously but not intertemporaly 86 correlated random variable, which axis?, 87 maybe swapaxis or rollaxis if x.ndim != mean.ndim == (sigma.ndim - 1) 88 89 initially 1d is ok, 2d should work with iid in axis 0 and mvn in axis 1 90 91 ''' 92 93 def __init__(self, mean, sigma): 94 self.mean = mean 95 self.sigma = sigma 96 self.sigmainv = sigmainv 97 self.cholsigma = linalg.cholesky(sigma) 98 #the following makes it lower triangular with increasing time 99 self.cholsigmainv = linalg.cholesky(sigmainv)[::-1,::-1] 100 #todo: this might be a trick todo backward instead of forward filtering 101 102 def whiten(self, x): 103 return np.dot(cholsigmainv, x) 104 105 def logpdf_obs(self, x): 106 x = x - self.mean 107 x_whitened = self.whiten(x) 108 109 #sigmainv = linalg.cholesky(sigma) 110 logdetsigma = np.log(np.linalg.det(sigma)) 111 112 sigma2 = 1. # error variance is included in sigma 113 114 llike = 0.5 * (np.log(sigma2) 115 - 2.* np.log(np.diagonal(self.cholsigmainv)) 116 + (x_whitened**2)/sigma2 117 + np.log(2*np.pi)) 118 119 return llike 120 121 def logpdf(self, x): 122 return self.logpdf_obs(x).sum(-1) 123 124 def pdf(self, x): 125 return np.exp(self.logpdf(x)) 126 127 128 129class MultivariateNormal(object): 130 131 def __init__(self, mean, sigma): 132 self.mean = mean 133 self.sigma = SvdArray(sigma) 134 135 136 137 138def loglike_ar1(x, rho): 139 '''loglikelihood of AR(1) process, as a test case 140 141 sigma_u partially hard coded 142 143 Greene chapter 12 eq. (12-31) 144 ''' 145 x = np.asarray(x) 146 u = np.r_[x[0], x[1:] - rho * x[:-1]] 147 sigma_u2 = 2*(1-rho**2) 148 loglik = 0.5*(-(u**2).sum(0) / sigma_u2 + np.log(1-rho**2) 149 - x.shape[0] * (np.log(2*np.pi) + np.log(sigma_u2))) 150 return loglik 151 152 153def ar2transform(x, arcoefs): 154 ''' 155 156 (Greene eq 12-30) 157 ''' 158 a1, a2 = arcoefs 159 y = np.zeros_like(x) 160 y[0] = np.sqrt((1+a2) * ((1-a2)**2 - a1**2) / (1-a2)) * x[0] 161 y[1] = np.sqrt(1-a2**2) * x[2] - a1 * np.sqrt(1-a1**2)/(1-a2) * x[1] #TODO:wrong index in x 162 y[2:] = x[2:] - a1 * x[1:-1] - a2 * x[:-2] 163 return y 164 165 166def mvn_loglike(x, sigma): 167 '''loglike multivariate normal 168 169 assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs) 170 171 brute force from formula 172 no checking of correct inputs 173 use of inv and log-det should be replace with something more efficient 174 ''' 175 #see numpy thread 176 #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1) 177 sigmainv = linalg.inv(sigma) 178 logdetsigma = np.log(np.linalg.det(sigma)) 179 nobs = len(x) 180 181 llf = - np.dot(x, np.dot(sigmainv, x)) 182 llf -= nobs * np.log(2 * np.pi) 183 llf -= logdetsigma 184 llf *= 0.5 185 return llf 186 187def mvn_nloglike_obs(x, sigma): 188 '''loglike multivariate normal 189 190 assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs) 191 192 brute force from formula 193 no checking of correct inputs 194 use of inv and log-det should be replace with something more efficient 195 ''' 196 #see numpy thread 197 #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1) 198 199 #Still wasteful to calculate pinv first 200 sigmainv = linalg.inv(sigma) 201 cholsigmainv = linalg.cholesky(sigmainv) 202 #2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist 203 # logdet not needed ??? 204 #logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv))) 205 x_whitened = np.dot(cholsigmainv, x) 206 207 #sigmainv = linalg.cholesky(sigma) 208 logdetsigma = np.log(np.linalg.det(sigma)) 209 210 sigma2 = 1. # error variance is included in sigma 211 212 llike = 0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv)) 213 + (x_whitened**2)/sigma2 214 + np.log(2*np.pi)) 215 216 return llike, (x_whitened**2) 217 218nobs = 10 219x = np.arange(nobs) 220autocov = 2*0.8**np.arange(nobs)# +0.01 * np.random.randn(nobs) 221sigma = linalg.toeplitz(autocov) 222#sigma = np.diag(1+np.random.randn(10)**2) 223 224cholsigma = linalg.cholesky(sigma).T#, lower=True) 225 226sigmainv = linalg.inv(sigma) 227cholsigmainv = linalg.cholesky(sigmainv) 228#2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist 229# logdet not needed ??? 230#logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv))) 231x_whitened = np.dot(cholsigmainv, x) 232 233#sigmainv = linalg.cholesky(sigma) 234logdetsigma = np.log(np.linalg.det(sigma)) 235 236sigma2 = 1. # error variance is included in sigma 237 238llike = 0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv)) 239 + (x_whitened**2)/sigma2 240 + np.log(2*np.pi)) 241 242ll, ls = mvn_nloglike_obs(x, sigma) 243#the following are all the same for diagonal sigma 244print(ll.sum(), 'll.sum()') 245print(llike.sum(), 'llike.sum()') 246print(np.log(stats.norm._pdf(x_whitened)).sum() - 0.5 * logdetsigma,) 247print('stats whitened') 248print(np.log(stats.norm.pdf(x,scale=np.sqrt(np.diag(sigma)))).sum(),) 249print('stats scaled') 250print(0.5*(np.dot(linalg.cho_solve((linalg.cho_factor(sigma, lower=False)[0].T, 251 False),x.T), x) 252 + nobs*np.log(2*np.pi) 253 - 2.* np.log(np.diagonal(cholsigmainv)).sum())) 254print(0.5*(np.dot(linalg.cho_solve((linalg.cho_factor(sigma)[0].T, False),x.T), x) + nobs*np.log(2*np.pi)- 2.* np.log(np.diagonal(cholsigmainv)).sum())) 255print(0.5*(np.dot(linalg.cho_solve(linalg.cho_factor(sigma),x.T), x) + nobs*np.log(2*np.pi)- 2.* np.log(np.diagonal(cholsigmainv)).sum())) 256print(mvn_loglike(x, sigma)) 257 258 259normtransf = AffineTransform(np.zeros(nobs), cholsigma, StandardNormal()) 260print(normtransf.logpdf(x_whitened).sum()) 261#print(normtransf.rvs(5) 262print(loglike_ar1(x, 0.8)) 263 264mch = MultivariateNormalChol(np.zeros(nobs), sigma) 265print(mch.logpdf(x)) 266 267#from .linalg_decomp_1 import tiny2zero 268#print(tiny2zero(mch.cholsigmainv / mch.cholsigmainv[-1,-1]) 269 270xw = mch.whiten(x) 271print('xSigmax', np.dot(xw,xw)) 272print('xSigmax', np.dot(x,linalg.cho_solve(linalg.cho_factor(mch.sigma),x))) 273print('xSigmax', np.dot(x,linalg.cho_solve((mch.cholsigma, False),x))) 274