1# -*- coding: utf-8 -*-
2"""Kernel Regression and Significance Test
3
4Warning: SLOW, 11 minutes on my computer
5
6Created on Thu Jan 03 20:20:47 2013
7
8Author: Josef Perktold
9
10results - this version
11----------------------
12
13>>> exec(open('ex_kernel_regression_censored1.py').read())
14bw
15[ 0.3987821   0.50933458]
16[0.39878209999999997, 0.50933457999999998]
17
18sig_test - default
19Not Significant
20pvalue
210.11
22test statistic 0.000434305313291
23bootstrap critical values
24[ 0.00043875  0.00046808  0.0005064   0.00054151]
25
26sig_test - pivot=True, nboot=200, nested_res=50
27pvalue
280.01
29test statistic 6.17877171579
30bootstrap critical values
31[ 5.5658345   5.74761076  5.87386858  6.46012041]
32times: 8.34599995613 20.6909999847 666.373999834
33
34"""
35
36import time
37
38import numpy as np
39import statsmodels.nonparametric.api as nparam
40import statsmodels.nonparametric.kernel_regression as smkr
41
42if __name__ == '__main__':
43    t0 = time.time()
44    #example from test file
45    nobs = 200
46    np.random.seed(1234)
47    C1 = np.random.normal(size=(nobs, ))
48    C2 = np.random.normal(2, 1, size=(nobs, ))
49    noise = np.random.normal(size=(nobs, ))
50    Y = 0.3 +1.2 * C1 - 0.9 * C2 + noise
51    #self.write2file('RegData.csv', (Y, C1, C2))
52
53    #CODE TO PRODUCE BANDWIDTH ESTIMATION IN R
54    #library(np)
55    #data <- read.csv('RegData.csv', header=FALSE)
56    #bw <- npregbw(formula=data$V1 ~ data$V2 + data$V3,
57    #                bwmethod='cv.aic', regtype='lc')
58    model = nparam.KernelReg(endog=[Y], exog=[C1, C2],
59                             reg_type='lc', var_type='cc', bw='aic')
60    mean, marg = model.fit()
61    #R_bw = [0.4017893, 0.4943397]  # Bandwidth obtained in R
62    bw_expected = [0.3987821, 0.50933458]
63    #npt.assert_allclose(model.bw, bw_expected, rtol=1e-3)
64    print('bw')
65    print(model.bw)
66    print(bw_expected)
67
68    print('\nsig_test - default')
69    print(model.sig_test([1], nboot=100))
70    t1 = time.time()
71    res0 = smkr.TestRegCoefC(model, [1])
72    print('pvalue')
73    print((res0.t_dist >= res0.test_stat).mean())
74    print('test statistic', res0.test_stat)
75    print('bootstrap critical values')
76    probs = np.array([0.9, 0.95, 0.975, 0.99])
77    bsort0 = np.sort(res0.t_dist)
78    nrep0 = len(bsort0)
79    print(bsort0[(probs * nrep0).astype(int)])
80
81    t2 = time.time()
82    print('\nsig_test - pivot=True, nboot=200, nested_res=50')
83    res1 = smkr.TestRegCoefC(model, [1], pivot=True, nboot=200, nested_res=50)
84    print('pvalue')
85    print((res1.t_dist >= res1.test_stat).mean())
86    print('test statistic', res1.test_stat)
87    print('bootstrap critical values')
88    probs = np.array([0.9, 0.95, 0.975, 0.99])
89    bsort1 = np.sort(res1.t_dist)
90    nrep1 = len(bsort1)
91    print(bsort1[(probs * nrep1).astype(int)])
92    t3 = time.time()
93
94    print('times:', t1-t0, t2-t1, t3-t2)
95
96
97#    import matplotlib.pyplot as plt
98#    fig = plt.figure()
99#    ax = fig.add_subplot(1,1,1)
100#    ax.plot(x, y, 'o', alpha=0.5)
101#    ax.plot(x, y_cens, 'o', alpha=0.5)
102#    ax.plot(x, y_true, lw=2, label='DGP mean')
103#    ax.plot(x, sm_mean, lw=2, label='model 0 mean')
104#    ax.plot(x, mean2, lw=2, label='model 2 mean')
105#    ax.legend()
106#
107#    plt.show()
108