1import scipy.stats
2import numpy as np
3from numpy.testing import assert_allclose, assert_equal, assert_almost_equal
4from patsy import dmatrices  # pylint: disable=E0611
5import statsmodels.api as sm
6from statsmodels.regression.quantile_regression import QuantReg
7from .results.results_quantile_regression import (
8    biweight_chamberlain, biweight_hsheather, biweight_bofinger,
9    cosine_chamberlain, cosine_hsheather, cosine_bofinger,
10    gaussian_chamberlain, gaussian_hsheather, gaussian_bofinger,
11    epan2_chamberlain, epan2_hsheather, epan2_bofinger,
12    parzen_chamberlain, parzen_hsheather, parzen_bofinger,
13    # rectangle_chamberlain, rectangle_hsheather, rectangle_bofinger,
14    # triangle_chamberlain, triangle_hsheather, triangle_bofinger,
15    # epanechnikov_chamberlain, epanechnikov_hsheather, epanechnikov_bofinger,
16    epanechnikov_hsheather_q75, Rquantreg)
17
18idx = ['income', 'Intercept']
19
20
21class CheckModelResultsMixin(object):
22    def test_params(self):
23        assert_allclose(np.ravel(self.res1.params.loc[idx]),
24                        self.res2.table[:, 0], rtol=1e-3)
25
26    def test_bse(self):
27        assert_equal(self.res1.scale, 1)
28        assert_allclose(np.ravel(self.res1.bse.loc[idx]),
29                        self.res2.table[:, 1], rtol=1e-3)
30
31    def test_tvalues(self):
32        assert_allclose(np.ravel(self.res1.tvalues.loc[idx]),
33                        self.res2.table[:, 2], rtol=1e-2)
34
35    def test_pvalues(self):
36        pvals_stata = scipy.stats.t.sf(self.res2.table[:, 2], self.res2.df_r)
37        assert_allclose(np.ravel(self.res1.pvalues.loc[idx]),
38                        pvals_stata, rtol=1.1)
39
40        # test that we use the t distribution for the p-values
41        pvals_t = scipy.stats.t.sf(self.res1.tvalues, self.res2.df_r) * 2
42        assert_allclose(np.ravel(self.res1.pvalues),
43                        pvals_t, rtol=1e-9, atol=1e-10)
44
45    def test_conf_int(self):
46        assert_allclose(self.res1.conf_int().loc[idx],
47                        self.res2.table[:, -2:], rtol=1e-3)
48
49    def test_nobs(self):
50        assert_allclose(self.res1.nobs, self.res2.N, rtol=1e-3)
51
52    def test_df_model(self):
53        assert_allclose(self.res1.df_model, self.res2.df_m, rtol=1e-3)
54
55    def test_df_resid(self):
56        assert_allclose(self.res1.df_resid, self.res2.df_r, rtol=1e-3)
57
58    def test_prsquared(self):
59        assert_allclose(self.res1.prsquared, self.res2.psrsquared, rtol=1e-3)
60
61    def test_sparsity(self):
62        assert_allclose(np.array(self.res1.sparsity),
63                        self.res2.sparsity, rtol=1e-3)
64
65    def test_bandwidth(self):
66        assert_allclose(np.array(self.res1.bandwidth),
67                        self.res2.kbwidth, rtol=1e-3)
68
69
70d = {('biw', 'bofinger'): biweight_bofinger,
71     ('biw', 'chamberlain'): biweight_chamberlain,
72     ('biw', 'hsheather'): biweight_hsheather,
73     ('cos', 'bofinger'): cosine_bofinger,
74     ('cos', 'chamberlain'): cosine_chamberlain,
75     ('cos', 'hsheather'): cosine_hsheather,
76     ('gau', 'bofinger'): gaussian_bofinger,
77     ('gau', 'chamberlain'): gaussian_chamberlain,
78     ('gau', 'hsheather'): gaussian_hsheather,
79     ('par', 'bofinger'): parzen_bofinger,
80     ('par', 'chamberlain'): parzen_chamberlain,
81     ('par', 'hsheather'): parzen_hsheather,
82     # ('rec','bofinger'): rectangle_bofinger,
83     # ('rec','chamberlain'): rectangle_chamberlain,
84     # ('rec','hsheather'): rectangle_hsheather,
85     # ('tri','bofinger'): triangle_bofinger,
86     # ('tri','chamberlain'): triangle_chamberlain,
87     # ('tri','hsheather'): triangle_hsheather,
88     ('epa', 'bofinger'): epan2_bofinger,
89     ('epa', 'chamberlain'): epan2_chamberlain,
90     ('epa', 'hsheather'): epan2_hsheather
91     # ('epa2', 'bofinger'): epan2_bofinger,
92     # ('epa2', 'chamberlain'): epan2_chamberlain,
93     # ('epa2', 'hsheather'): epan2_hsheather
94     }
95
96
97def setup_fun(kernel='gau', bandwidth='bofinger'):
98    data = sm.datasets.engel.load_pandas().data
99    y, X = dmatrices('foodexp ~ income', data, return_type='dataframe')
100    statsm = QuantReg(y, X).fit(vcov='iid', kernel=kernel, bandwidth=bandwidth)
101    stata = d[(kernel, bandwidth)]
102    return statsm, stata
103
104
105def test_fitted_residuals():
106    data = sm.datasets.engel.load_pandas().data
107    y, X = dmatrices('foodexp ~ income', data, return_type='dataframe')
108    res = QuantReg(y, X).fit(q=.1)
109    # Note: maxabs relative error with fitted is 1.789e-09
110    assert_almost_equal(np.array(res.fittedvalues), Rquantreg.fittedvalues, 5)
111    assert_almost_equal(np.array(res.predict()), Rquantreg.fittedvalues, 5)
112    assert_almost_equal(np.array(res.resid), Rquantreg.residuals, 5)
113
114
115class TestEpanechnikovHsheatherQ75(CheckModelResultsMixin):
116    # Vincent Arel-Bundock also spot-checked q=.1
117    @classmethod
118    def setup_class(cls):
119        data = sm.datasets.engel.load_pandas().data
120        y, X = dmatrices('foodexp ~ income', data, return_type='dataframe')
121        cls.res1 = QuantReg(y, X).fit(q=.75, vcov='iid', kernel='epa',
122                                      bandwidth='hsheather')
123        cls.res2 = epanechnikov_hsheather_q75
124
125
126class TestEpanechnikovBofinger(CheckModelResultsMixin):
127    @classmethod
128    def setup_class(cls):
129        cls.res1, cls.res2 = setup_fun('epa', 'bofinger')
130
131
132class TestEpanechnikovChamberlain(CheckModelResultsMixin):
133    @classmethod
134    def setup_class(cls):
135        cls.res1, cls.res2 = setup_fun('epa', 'chamberlain')
136
137
138class TestEpanechnikovHsheather(CheckModelResultsMixin):
139    @classmethod
140    def setup_class(cls):
141        cls.res1, cls.res2 = setup_fun('epa', 'hsheather')
142
143
144class TestGaussianBofinger(CheckModelResultsMixin):
145    @classmethod
146    def setup_class(cls):
147        cls.res1, cls.res2 = setup_fun('gau', 'bofinger')
148
149
150class TestGaussianChamberlain(CheckModelResultsMixin):
151    @classmethod
152    def setup_class(cls):
153        cls.res1, cls.res2 = setup_fun('gau', 'chamberlain')
154
155
156class TestGaussianHsheather(CheckModelResultsMixin):
157    @classmethod
158    def setup_class(cls):
159        cls.res1, cls.res2 = setup_fun('gau', 'hsheather')
160
161
162class TestBiweightBofinger(CheckModelResultsMixin):
163    @classmethod
164    def setup_class(cls):
165        cls.res1, cls.res2 = setup_fun('biw', 'bofinger')
166
167
168class TestBiweightChamberlain(CheckModelResultsMixin):
169    @classmethod
170    def setup_class(cls):
171        cls.res1, cls.res2 = setup_fun('biw', 'chamberlain')
172
173
174class TestBiweightHsheather(CheckModelResultsMixin):
175    @classmethod
176    def setup_class(cls):
177        cls.res1, cls.res2 = setup_fun('biw', 'hsheather')
178
179
180class TestCosineBofinger(CheckModelResultsMixin):
181    @classmethod
182    def setup_class(cls):
183        cls.res1, cls.res2 = setup_fun('cos', 'bofinger')
184
185
186class TestCosineChamberlain(CheckModelResultsMixin):
187    @classmethod
188    def setup_class(cls):
189        cls.res1, cls.res2 = setup_fun('cos', 'chamberlain')
190
191
192class TestCosineHsheather(CheckModelResultsMixin):
193    @classmethod
194    def setup_class(cls):
195        cls.res1, cls.res2 = setup_fun('cos', 'hsheather')
196
197
198class TestParzeneBofinger(CheckModelResultsMixin):
199    @classmethod
200    def setup_class(cls):
201        cls.res1, cls.res2 = setup_fun('par', 'bofinger')
202
203
204class TestParzeneChamberlain(CheckModelResultsMixin):
205    @classmethod
206    def setup_class(cls):
207        cls.res1, cls.res2 = setup_fun('par', 'chamberlain')
208
209
210class TestParzeneHsheather(CheckModelResultsMixin):
211    @classmethod
212    def setup_class(cls):
213        cls.res1, cls.res2 = setup_fun('par', 'hsheather')
214
215# class TestTriangleBofinger(CheckModelResultsMixin):
216#    @classmethod
217#    def setup_class(cls):
218#        cls.res1, cls.res2 = setup_fun('tri', 'bofinger')
219
220# class TestTriangleChamberlain(CheckModelResultsMixin):
221#    @classmethod
222#    def setup_class(cls):
223#        cls.res1, cls.res2 = setup_fun('tri', 'chamberlain')
224
225# class TestTriangleHsheather(CheckModelResultsMixin):
226#    @classmethod
227#    def setup_class(cls):
228#        cls.res1, cls.res2 = setup_fun('tri', 'hsheather')
229
230
231def test_zero_resid():
232    # smoke and regression tests
233
234    X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64)
235    y = np.array([0, 1, 2, 3], dtype=np.float64)
236
237    res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain')  # 'bofinger')
238    res.summary()
239
240    assert_allclose(res.params,
241                    np.array([0.0, 0.96774163]),
242                    rtol=1e-4, atol=1e-20)
243    assert_allclose(res.bse,
244                    np.array([0.0447576, 0.01154867]),
245                    rtol=1e-4, atol=1e-20)
246    assert_allclose(res.resid,
247                    np.array([0.0, 3.22583680e-02,
248                              -3.22574272e-02, 9.40732912e-07]),
249                    rtol=1e-4, atol=1e-20)
250
251    X = np.array([[1, 0], [0.1, 1], [0, 2.1], [0, 3.1]], dtype=np.float64)
252    y = np.array([0, 1, 2, 3], dtype=np.float64)
253
254    res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain')
255    res.summary()
256
257    assert_allclose(res.params, np.array([9.99982796e-08, 9.67741630e-01]),
258                    rtol=1e-4, atol=1e-20)
259    assert_allclose(res.bse, np.array([0.04455029, 0.01155251]), rtol=1e-4,
260                    atol=1e-20)
261    assert_allclose(res.resid, np.array([-9.99982796e-08, 3.22583598e-02,
262                                         -3.22574234e-02, 9.46361860e-07]),
263                    rtol=1e-4, atol=1e-20)
264
265
266def test_use_t_summary():
267    X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64)
268    y = np.array([0, 1, 2, 3], dtype=np.float64)
269
270    res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True)
271    summ = res.summary()
272    assert 'P>|t|' in str(summ)
273    assert 'P>|z|' not in str(summ)
274
275
276def test_alpha_summary():
277    X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64)
278    y = np.array([0, 1, 2, 3], dtype=np.float64)
279
280    res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True)
281    summ_20 = res.summary(alpha=.2)
282    assert '[0.025      0.975]' not in str(summ_20)
283    assert '[0.1        0.9]' in str(summ_20)
284
285
286def test_remove_data():
287    X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64)
288    y = np.array([0, 1, 2, 3], dtype=np.float64)
289
290    res = QuantReg(y, X).fit(0.5)
291    res.remove_data()
292
293
294def test_collinear_matrix():
295    X = np.array([[1, 0, .5], [1, 0, .8],
296                  [1, 0, 1.5], [1, 0, .25]], dtype=np.float64)
297    y = np.array([0, 1, 2, 3], dtype=np.float64)
298
299    res_collinear = QuantReg(y, X).fit(0.5)
300    assert len(res_collinear.params) == X.shape[1]
301
302
303def test_nontrivial_singular_matrix():
304    x_one = np.random.random(1000)
305    x_two = np.random.random(1000)*10
306    x_three = np.random.random(1000)
307    intercept = np.ones(1000)
308
309    y = np.random.random(1000)*5
310    X = np.column_stack((intercept, x_one, x_two, x_three, x_one))
311
312    assert np.linalg.matrix_rank(X) < X.shape[1]
313    res_singular = QuantReg(y, X).fit(0.5)
314    assert len(res_singular.params) == X.shape[1]
315    assert np.linalg.matrix_rank(res_singular.cov_params()) == X.shape[1] - 1
316
317    # prediction is correct even with singular exog
318    res_ns = QuantReg(y, X[:, :-1]).fit(0.5)
319    assert_allclose(res_singular.fittedvalues, res_ns.fittedvalues, rtol=0.01)
320