1'''subclassing kde 2 3Author: josef pktd 4''' 5 6import numpy as np 7from numpy.testing import assert_almost_equal, assert_ 8import scipy 9from scipy import stats 10import matplotlib.pylab as plt 11 12 13class gaussian_kde_set_covariance(stats.gaussian_kde): 14 ''' 15 from Anne Archibald in mailinglist: 16 http://www.nabble.com/Width-of-the-gaussian-in-stats.kde.gaussian_kde---td19558924.html#a19558924 17 ''' 18 def __init__(self, dataset, covariance): 19 self.covariance = covariance 20 scipy.stats.gaussian_kde.__init__(self, dataset) 21 22 def _compute_covariance(self): 23 self.inv_cov = np.linalg.inv(self.covariance) 24 self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.n 25 26 27class gaussian_kde_covfact(stats.gaussian_kde): 28 def __init__(self, dataset, covfact = 'scotts'): 29 self.covfact = covfact 30 scipy.stats.gaussian_kde.__init__(self, dataset) 31 32 def _compute_covariance_(self): 33 '''not used''' 34 self.inv_cov = np.linalg.inv(self.covariance) 35 self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.n 36 37 def covariance_factor(self): 38 if self.covfact in ['sc', 'scotts']: 39 return self.scotts_factor() 40 if self.covfact in ['si', 'silverman']: 41 return self.silverman_factor() 42 elif self.covfact: 43 return float(self.covfact) 44 else: 45 raise ValueError('covariance factor has to be scotts, silverman or a number') 46 47 def reset_covfact(self, covfact): 48 self.covfact = covfact 49 self.covariance_factor() 50 self._compute_covariance() 51 52def plotkde(covfact): 53 gkde.reset_covfact(covfact) 54 kdepdf = gkde.evaluate(ind) 55 plt.figure() 56 # plot histgram of sample 57 plt.hist(xn, bins=20, normed=1) 58 # plot estimated density 59 plt.plot(ind, kdepdf, label='kde', color="g") 60 # plot data generating density 61 plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) + 62 (1-alpha) * stats.norm.pdf(ind, loc=mhigh), 63 color="r", label='DGP: normal mix') 64 plt.title('Kernel Density Estimation - ' + str(gkde.covfact)) 65 plt.legend() 66 67 68def test_kde_1d(): 69 np.random.seed(8765678) 70 n_basesample = 500 71 xn = np.random.randn(n_basesample) 72 xnmean = xn.mean() 73 xnstd = xn.std(ddof=1) 74 print(xnmean, xnstd) 75 76 # get kde for original sample 77 gkde = stats.gaussian_kde(xn) 78 79 # evaluate the density function for the kde for some points 80 xs = np.linspace(-7,7,501) 81 kdepdf = gkde.evaluate(xs) 82 normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd) 83 print('MSE', np.sum((kdepdf - normpdf)**2)) 84 print('axabserror', np.max(np.abs(kdepdf - normpdf))) 85 intervall = xs[1] - xs[0] 86 assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01) 87 #assert_array_almost_equal(kdepdf, normpdf, decimal=2) 88 print(gkde.integrate_gaussian(0.0, 1.0)) 89 print(gkde.integrate_box_1d(-np.inf, 0.0)) 90 print(gkde.integrate_box_1d(0.0, np.inf)) 91 print(gkde.integrate_box_1d(-np.inf, xnmean)) 92 print(gkde.integrate_box_1d(xnmean, np.inf)) 93 94 assert_almost_equal(gkde.integrate_box_1d(xnmean, np.inf), 0.5, decimal=1) 95 assert_almost_equal(gkde.integrate_box_1d(-np.inf, xnmean), 0.5, decimal=1) 96 assert_almost_equal(gkde.integrate_box(xnmean, np.inf), 0.5, decimal=1) 97 assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), 0.5, decimal=1) 98 99 assert_almost_equal(gkde.integrate_kde(gkde), 100 (kdepdf**2).sum()*intervall, decimal=2) 101 assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2), 102 (kdepdf*normpdf).sum()*intervall, decimal=2) 103## assert_almost_equal(gkde.integrate_gaussian(0.0, 1.0), 104## (kdepdf*normpdf).sum()*intervall, decimal=2) 105 106 107 108 109if __name__ == '__main__': 110 # generate a sample 111 n_basesample = 1000 112 np.random.seed(8765678) 113 alpha = 0.6 #weight for (prob of) lower distribution 114 mlow, mhigh = (-3,3) #mean locations for gaussian mixture 115 xn = np.concatenate([mlow + np.random.randn(alpha * n_basesample), 116 mhigh + np.random.randn((1-alpha) * n_basesample)]) 117 118 # get kde for original sample 119 #gkde = stats.gaussian_kde(xn) 120 gkde = gaussian_kde_covfact(xn, 0.1) 121 # evaluate the density function for the kde for some points 122 ind = np.linspace(-7,7,101) 123 kdepdf = gkde.evaluate(ind) 124 125 plt.figure() 126 # plot histgram of sample 127 plt.hist(xn, bins=20, normed=1) 128 # plot estimated density 129 plt.plot(ind, kdepdf, label='kde', color="g") 130 # plot data generating density 131 plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) + 132 (1-alpha) * stats.norm.pdf(ind, loc=mhigh), 133 color="r", label='DGP: normal mix') 134 plt.title('Kernel Density Estimation') 135 plt.legend() 136 137 gkde = gaussian_kde_covfact(xn, 'scotts') 138 kdepdf = gkde.evaluate(ind) 139 plt.figure() 140 # plot histgram of sample 141 plt.hist(xn, bins=20, normed=1) 142 # plot estimated density 143 plt.plot(ind, kdepdf, label='kde', color="g") 144 # plot data generating density 145 plt.plot(ind, alpha * stats.norm.pdf(ind, loc=mlow) + 146 (1-alpha) * stats.norm.pdf(ind, loc=mhigh), 147 color="r", label='DGP: normal mix') 148 plt.title('Kernel Density Estimation') 149 plt.legend() 150 #plt.show() 151 for cv in ['scotts', 'silverman', 0.05, 0.1, 0.5]: 152 plotkde(cv) 153 154 test_kde_1d() 155 156 157 np.random.seed(8765678) 158 n_basesample = 1000 159 xn = np.random.randn(n_basesample) 160 xnmean = xn.mean() 161 xnstd = xn.std(ddof=1) 162 163 # get kde for original sample 164 gkde = stats.gaussian_kde(xn) 165