1# -*- coding: utf-8 -*- 2"""Kernel Regression and Significance Test 3 4Warning: SLOW, 11 minutes on my computer 5 6Created on Thu Jan 03 20:20:47 2013 7 8Author: Josef Perktold 9 10results - this version 11---------------------- 12 13>>> exec(open('ex_kernel_regression_censored1.py').read()) 14bw 15[ 0.3987821 0.50933458] 16[0.39878209999999997, 0.50933457999999998] 17 18sig_test - default 19Not Significant 20pvalue 210.11 22test statistic 0.000434305313291 23bootstrap critical values 24[ 0.00043875 0.00046808 0.0005064 0.00054151] 25 26sig_test - pivot=True, nboot=200, nested_res=50 27pvalue 280.01 29test statistic 6.17877171579 30bootstrap critical values 31[ 5.5658345 5.74761076 5.87386858 6.46012041] 32times: 8.34599995613 20.6909999847 666.373999834 33 34""" 35 36import time 37 38import numpy as np 39import statsmodels.nonparametric.api as nparam 40import statsmodels.nonparametric.kernel_regression as smkr 41 42if __name__ == '__main__': 43 t0 = time.time() 44 #example from test file 45 nobs = 200 46 np.random.seed(1234) 47 C1 = np.random.normal(size=(nobs, )) 48 C2 = np.random.normal(2, 1, size=(nobs, )) 49 noise = np.random.normal(size=(nobs, )) 50 Y = 0.3 +1.2 * C1 - 0.9 * C2 + noise 51 #self.write2file('RegData.csv', (Y, C1, C2)) 52 53 #CODE TO PRODUCE BANDWIDTH ESTIMATION IN R 54 #library(np) 55 #data <- read.csv('RegData.csv', header=FALSE) 56 #bw <- npregbw(formula=data$V1 ~ data$V2 + data$V3, 57 # bwmethod='cv.aic', regtype='lc') 58 model = nparam.KernelReg(endog=[Y], exog=[C1, C2], 59 reg_type='lc', var_type='cc', bw='aic') 60 mean, marg = model.fit() 61 #R_bw = [0.4017893, 0.4943397] # Bandwidth obtained in R 62 bw_expected = [0.3987821, 0.50933458] 63 #npt.assert_allclose(model.bw, bw_expected, rtol=1e-3) 64 print('bw') 65 print(model.bw) 66 print(bw_expected) 67 68 print('\nsig_test - default') 69 print(model.sig_test([1], nboot=100)) 70 t1 = time.time() 71 res0 = smkr.TestRegCoefC(model, [1]) 72 print('pvalue') 73 print((res0.t_dist >= res0.test_stat).mean()) 74 print('test statistic', res0.test_stat) 75 print('bootstrap critical values') 76 probs = np.array([0.9, 0.95, 0.975, 0.99]) 77 bsort0 = np.sort(res0.t_dist) 78 nrep0 = len(bsort0) 79 print(bsort0[(probs * nrep0).astype(int)]) 80 81 t2 = time.time() 82 print('\nsig_test - pivot=True, nboot=200, nested_res=50') 83 res1 = smkr.TestRegCoefC(model, [1], pivot=True, nboot=200, nested_res=50) 84 print('pvalue') 85 print((res1.t_dist >= res1.test_stat).mean()) 86 print('test statistic', res1.test_stat) 87 print('bootstrap critical values') 88 probs = np.array([0.9, 0.95, 0.975, 0.99]) 89 bsort1 = np.sort(res1.t_dist) 90 nrep1 = len(bsort1) 91 print(bsort1[(probs * nrep1).astype(int)]) 92 t3 = time.time() 93 94 print('times:', t1-t0, t2-t1, t3-t2) 95 96 97# import matplotlib.pyplot as plt 98# fig = plt.figure() 99# ax = fig.add_subplot(1,1,1) 100# ax.plot(x, y, 'o', alpha=0.5) 101# ax.plot(x, y_cens, 'o', alpha=0.5) 102# ax.plot(x, y_true, lw=2, label='DGP mean') 103# ax.plot(x, sm_mean, lw=2, label='model 0 mean') 104# ax.plot(x, mean2, lw=2, label='model 2 mean') 105# ax.legend() 106# 107# plt.show() 108