1import scipy.stats 2import numpy as np 3from numpy.testing import assert_allclose, assert_equal, assert_almost_equal 4from patsy import dmatrices # pylint: disable=E0611 5import statsmodels.api as sm 6from statsmodels.regression.quantile_regression import QuantReg 7from .results.results_quantile_regression import ( 8 biweight_chamberlain, biweight_hsheather, biweight_bofinger, 9 cosine_chamberlain, cosine_hsheather, cosine_bofinger, 10 gaussian_chamberlain, gaussian_hsheather, gaussian_bofinger, 11 epan2_chamberlain, epan2_hsheather, epan2_bofinger, 12 parzen_chamberlain, parzen_hsheather, parzen_bofinger, 13 # rectangle_chamberlain, rectangle_hsheather, rectangle_bofinger, 14 # triangle_chamberlain, triangle_hsheather, triangle_bofinger, 15 # epanechnikov_chamberlain, epanechnikov_hsheather, epanechnikov_bofinger, 16 epanechnikov_hsheather_q75, Rquantreg) 17 18idx = ['income', 'Intercept'] 19 20 21class CheckModelResultsMixin(object): 22 def test_params(self): 23 assert_allclose(np.ravel(self.res1.params.loc[idx]), 24 self.res2.table[:, 0], rtol=1e-3) 25 26 def test_bse(self): 27 assert_equal(self.res1.scale, 1) 28 assert_allclose(np.ravel(self.res1.bse.loc[idx]), 29 self.res2.table[:, 1], rtol=1e-3) 30 31 def test_tvalues(self): 32 assert_allclose(np.ravel(self.res1.tvalues.loc[idx]), 33 self.res2.table[:, 2], rtol=1e-2) 34 35 def test_pvalues(self): 36 pvals_stata = scipy.stats.t.sf(self.res2.table[:, 2], self.res2.df_r) 37 assert_allclose(np.ravel(self.res1.pvalues.loc[idx]), 38 pvals_stata, rtol=1.1) 39 40 # test that we use the t distribution for the p-values 41 pvals_t = scipy.stats.t.sf(self.res1.tvalues, self.res2.df_r) * 2 42 assert_allclose(np.ravel(self.res1.pvalues), 43 pvals_t, rtol=1e-9, atol=1e-10) 44 45 def test_conf_int(self): 46 assert_allclose(self.res1.conf_int().loc[idx], 47 self.res2.table[:, -2:], rtol=1e-3) 48 49 def test_nobs(self): 50 assert_allclose(self.res1.nobs, self.res2.N, rtol=1e-3) 51 52 def test_df_model(self): 53 assert_allclose(self.res1.df_model, self.res2.df_m, rtol=1e-3) 54 55 def test_df_resid(self): 56 assert_allclose(self.res1.df_resid, self.res2.df_r, rtol=1e-3) 57 58 def test_prsquared(self): 59 assert_allclose(self.res1.prsquared, self.res2.psrsquared, rtol=1e-3) 60 61 def test_sparsity(self): 62 assert_allclose(np.array(self.res1.sparsity), 63 self.res2.sparsity, rtol=1e-3) 64 65 def test_bandwidth(self): 66 assert_allclose(np.array(self.res1.bandwidth), 67 self.res2.kbwidth, rtol=1e-3) 68 69 70d = {('biw', 'bofinger'): biweight_bofinger, 71 ('biw', 'chamberlain'): biweight_chamberlain, 72 ('biw', 'hsheather'): biweight_hsheather, 73 ('cos', 'bofinger'): cosine_bofinger, 74 ('cos', 'chamberlain'): cosine_chamberlain, 75 ('cos', 'hsheather'): cosine_hsheather, 76 ('gau', 'bofinger'): gaussian_bofinger, 77 ('gau', 'chamberlain'): gaussian_chamberlain, 78 ('gau', 'hsheather'): gaussian_hsheather, 79 ('par', 'bofinger'): parzen_bofinger, 80 ('par', 'chamberlain'): parzen_chamberlain, 81 ('par', 'hsheather'): parzen_hsheather, 82 # ('rec','bofinger'): rectangle_bofinger, 83 # ('rec','chamberlain'): rectangle_chamberlain, 84 # ('rec','hsheather'): rectangle_hsheather, 85 # ('tri','bofinger'): triangle_bofinger, 86 # ('tri','chamberlain'): triangle_chamberlain, 87 # ('tri','hsheather'): triangle_hsheather, 88 ('epa', 'bofinger'): epan2_bofinger, 89 ('epa', 'chamberlain'): epan2_chamberlain, 90 ('epa', 'hsheather'): epan2_hsheather 91 # ('epa2', 'bofinger'): epan2_bofinger, 92 # ('epa2', 'chamberlain'): epan2_chamberlain, 93 # ('epa2', 'hsheather'): epan2_hsheather 94 } 95 96 97def setup_fun(kernel='gau', bandwidth='bofinger'): 98 data = sm.datasets.engel.load_pandas().data 99 y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') 100 statsm = QuantReg(y, X).fit(vcov='iid', kernel=kernel, bandwidth=bandwidth) 101 stata = d[(kernel, bandwidth)] 102 return statsm, stata 103 104 105def test_fitted_residuals(): 106 data = sm.datasets.engel.load_pandas().data 107 y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') 108 res = QuantReg(y, X).fit(q=.1) 109 # Note: maxabs relative error with fitted is 1.789e-09 110 assert_almost_equal(np.array(res.fittedvalues), Rquantreg.fittedvalues, 5) 111 assert_almost_equal(np.array(res.predict()), Rquantreg.fittedvalues, 5) 112 assert_almost_equal(np.array(res.resid), Rquantreg.residuals, 5) 113 114 115class TestEpanechnikovHsheatherQ75(CheckModelResultsMixin): 116 # Vincent Arel-Bundock also spot-checked q=.1 117 @classmethod 118 def setup_class(cls): 119 data = sm.datasets.engel.load_pandas().data 120 y, X = dmatrices('foodexp ~ income', data, return_type='dataframe') 121 cls.res1 = QuantReg(y, X).fit(q=.75, vcov='iid', kernel='epa', 122 bandwidth='hsheather') 123 cls.res2 = epanechnikov_hsheather_q75 124 125 126class TestEpanechnikovBofinger(CheckModelResultsMixin): 127 @classmethod 128 def setup_class(cls): 129 cls.res1, cls.res2 = setup_fun('epa', 'bofinger') 130 131 132class TestEpanechnikovChamberlain(CheckModelResultsMixin): 133 @classmethod 134 def setup_class(cls): 135 cls.res1, cls.res2 = setup_fun('epa', 'chamberlain') 136 137 138class TestEpanechnikovHsheather(CheckModelResultsMixin): 139 @classmethod 140 def setup_class(cls): 141 cls.res1, cls.res2 = setup_fun('epa', 'hsheather') 142 143 144class TestGaussianBofinger(CheckModelResultsMixin): 145 @classmethod 146 def setup_class(cls): 147 cls.res1, cls.res2 = setup_fun('gau', 'bofinger') 148 149 150class TestGaussianChamberlain(CheckModelResultsMixin): 151 @classmethod 152 def setup_class(cls): 153 cls.res1, cls.res2 = setup_fun('gau', 'chamberlain') 154 155 156class TestGaussianHsheather(CheckModelResultsMixin): 157 @classmethod 158 def setup_class(cls): 159 cls.res1, cls.res2 = setup_fun('gau', 'hsheather') 160 161 162class TestBiweightBofinger(CheckModelResultsMixin): 163 @classmethod 164 def setup_class(cls): 165 cls.res1, cls.res2 = setup_fun('biw', 'bofinger') 166 167 168class TestBiweightChamberlain(CheckModelResultsMixin): 169 @classmethod 170 def setup_class(cls): 171 cls.res1, cls.res2 = setup_fun('biw', 'chamberlain') 172 173 174class TestBiweightHsheather(CheckModelResultsMixin): 175 @classmethod 176 def setup_class(cls): 177 cls.res1, cls.res2 = setup_fun('biw', 'hsheather') 178 179 180class TestCosineBofinger(CheckModelResultsMixin): 181 @classmethod 182 def setup_class(cls): 183 cls.res1, cls.res2 = setup_fun('cos', 'bofinger') 184 185 186class TestCosineChamberlain(CheckModelResultsMixin): 187 @classmethod 188 def setup_class(cls): 189 cls.res1, cls.res2 = setup_fun('cos', 'chamberlain') 190 191 192class TestCosineHsheather(CheckModelResultsMixin): 193 @classmethod 194 def setup_class(cls): 195 cls.res1, cls.res2 = setup_fun('cos', 'hsheather') 196 197 198class TestParzeneBofinger(CheckModelResultsMixin): 199 @classmethod 200 def setup_class(cls): 201 cls.res1, cls.res2 = setup_fun('par', 'bofinger') 202 203 204class TestParzeneChamberlain(CheckModelResultsMixin): 205 @classmethod 206 def setup_class(cls): 207 cls.res1, cls.res2 = setup_fun('par', 'chamberlain') 208 209 210class TestParzeneHsheather(CheckModelResultsMixin): 211 @classmethod 212 def setup_class(cls): 213 cls.res1, cls.res2 = setup_fun('par', 'hsheather') 214 215# class TestTriangleBofinger(CheckModelResultsMixin): 216# @classmethod 217# def setup_class(cls): 218# cls.res1, cls.res2 = setup_fun('tri', 'bofinger') 219 220# class TestTriangleChamberlain(CheckModelResultsMixin): 221# @classmethod 222# def setup_class(cls): 223# cls.res1, cls.res2 = setup_fun('tri', 'chamberlain') 224 225# class TestTriangleHsheather(CheckModelResultsMixin): 226# @classmethod 227# def setup_class(cls): 228# cls.res1, cls.res2 = setup_fun('tri', 'hsheather') 229 230 231def test_zero_resid(): 232 # smoke and regression tests 233 234 X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) 235 y = np.array([0, 1, 2, 3], dtype=np.float64) 236 237 res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain') # 'bofinger') 238 res.summary() 239 240 assert_allclose(res.params, 241 np.array([0.0, 0.96774163]), 242 rtol=1e-4, atol=1e-20) 243 assert_allclose(res.bse, 244 np.array([0.0447576, 0.01154867]), 245 rtol=1e-4, atol=1e-20) 246 assert_allclose(res.resid, 247 np.array([0.0, 3.22583680e-02, 248 -3.22574272e-02, 9.40732912e-07]), 249 rtol=1e-4, atol=1e-20) 250 251 X = np.array([[1, 0], [0.1, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) 252 y = np.array([0, 1, 2, 3], dtype=np.float64) 253 254 res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain') 255 res.summary() 256 257 assert_allclose(res.params, np.array([9.99982796e-08, 9.67741630e-01]), 258 rtol=1e-4, atol=1e-20) 259 assert_allclose(res.bse, np.array([0.04455029, 0.01155251]), rtol=1e-4, 260 atol=1e-20) 261 assert_allclose(res.resid, np.array([-9.99982796e-08, 3.22583598e-02, 262 -3.22574234e-02, 9.46361860e-07]), 263 rtol=1e-4, atol=1e-20) 264 265 266def test_use_t_summary(): 267 X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) 268 y = np.array([0, 1, 2, 3], dtype=np.float64) 269 270 res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True) 271 summ = res.summary() 272 assert 'P>|t|' in str(summ) 273 assert 'P>|z|' not in str(summ) 274 275 276def test_alpha_summary(): 277 X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) 278 y = np.array([0, 1, 2, 3], dtype=np.float64) 279 280 res = QuantReg(y, X).fit(0.5, bandwidth='chamberlain', use_t=True) 281 summ_20 = res.summary(alpha=.2) 282 assert '[0.025 0.975]' not in str(summ_20) 283 assert '[0.1 0.9]' in str(summ_20) 284 285 286def test_remove_data(): 287 X = np.array([[1, 0], [0, 1], [0, 2.1], [0, 3.1]], dtype=np.float64) 288 y = np.array([0, 1, 2, 3], dtype=np.float64) 289 290 res = QuantReg(y, X).fit(0.5) 291 res.remove_data() 292 293 294def test_collinear_matrix(): 295 X = np.array([[1, 0, .5], [1, 0, .8], 296 [1, 0, 1.5], [1, 0, .25]], dtype=np.float64) 297 y = np.array([0, 1, 2, 3], dtype=np.float64) 298 299 res_collinear = QuantReg(y, X).fit(0.5) 300 assert len(res_collinear.params) == X.shape[1] 301 302 303def test_nontrivial_singular_matrix(): 304 x_one = np.random.random(1000) 305 x_two = np.random.random(1000)*10 306 x_three = np.random.random(1000) 307 intercept = np.ones(1000) 308 309 y = np.random.random(1000)*5 310 X = np.column_stack((intercept, x_one, x_two, x_three, x_one)) 311 312 assert np.linalg.matrix_rank(X) < X.shape[1] 313 res_singular = QuantReg(y, X).fit(0.5) 314 assert len(res_singular.params) == X.shape[1] 315 assert np.linalg.matrix_rank(res_singular.cov_params()) == X.shape[1] - 1 316 317 # prediction is correct even with singular exog 318 res_ns = QuantReg(y, X[:, :-1]).fit(0.5) 319 assert_allclose(res_singular.fittedvalues, res_ns.fittedvalues, rtol=0.01) 320