1from statsmodels.compat.platform import PLATFORM_LINUX32 2 3import numpy as np 4from numpy.testing import ( 5 assert_, 6 assert_allclose, 7 assert_array_equal, 8 assert_equal, 9) 10import pandas as pd 11import pytest 12 13import statsmodels.api as sm 14 15from .results.results_discrete import RandHIE 16from .test_discrete import CheckModelMixin 17 18 19class CheckGeneric(CheckModelMixin): 20 def test_params(self): 21 assert_allclose(self.res1.params, self.res2.params, atol=1e-5, rtol=1e-5) 22 23 def test_llf(self): 24 assert_allclose(self.res1.llf, self.res2.llf, atol=1e-5, rtol=1e-5) 25 26 def test_conf_int(self): 27 assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-5) 28 29 def test_bse(self): 30 assert_allclose(self.res1.bse, self.res2.bse, atol=1e-3, rtol=1e-3) 31 32 def test_aic(self): 33 assert_allclose(self.res1.aic, self.res2.aic, atol=1e-2, rtol=1e-2) 34 35 def test_bic(self): 36 assert_allclose(self.res1.aic, self.res2.aic, atol=1e-1, rtol=1e-1) 37 38 def test_t(self): 39 unit_matrix = np.identity(self.res1.params.size) 40 t_test = self.res1.t_test(unit_matrix) 41 assert_allclose(self.res1.tvalues, t_test.tvalue) 42 43 def test_fit_regularized(self): 44 model = self.res1.model 45 46 alpha = np.ones(len(self.res1.params)) 47 alpha[-2:] = 0 48 res_reg = model.fit_regularized(alpha=alpha*0.01, disp=False, maxiter=500) 49 50 assert_allclose(res_reg.params[2:], self.res1.params[2:], 51 atol=5e-2, rtol=5e-2) 52 53 def test_init_keys(self): 54 init_kwds = self.res1.model._get_init_kwds() 55 assert_equal(set(init_kwds.keys()), set(self.init_keys)) 56 for key, value in self.init_kwds.items(): 57 assert_equal(init_kwds[key], value) 58 59 def test_null(self): 60 # call llnull, so null model is attached, side effect of cached attribute 61 self.res1.llnull 62 # check model instead of value 63 exog_null = self.res1.res_null.model.exog 64 exog_infl_null = self.res1.res_null.model.exog_infl 65 assert_array_equal(exog_infl_null.shape, 66 (len(self.res1.model.exog), 1)) 67 assert_equal(np.ptp(exog_null), 0) 68 assert_equal(np.ptp(exog_infl_null), 0) 69 70 @pytest.mark.smoke 71 def test_summary(self): 72 summ = self.res1.summary() 73 # GH 4581 74 assert 'Covariance Type:' in str(summ) 75 76class TestZeroInflatedModel_logit(CheckGeneric): 77 @classmethod 78 def setup_class(cls): 79 data = sm.datasets.randhie.load() 80 cls.endog = np.asarray(data.endog) 81 data.exog = np.asarray(data.exog) 82 exog = sm.add_constant(data.exog[:,1:4], prepend=False) 83 exog_infl = sm.add_constant(data.exog[:,0], prepend=False) 84 cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog, 85 exog_infl=exog_infl, inflation='logit').fit(method='newton', maxiter=500, 86 disp=False) 87 # for llnull test 88 cls.res1._results._attach_nullmodel = True 89 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset'] 90 cls.init_kwds = {'inflation': 'logit'} 91 res2 = RandHIE.zero_inflated_poisson_logit 92 cls.res2 = res2 93 94class TestZeroInflatedModel_probit(CheckGeneric): 95 @classmethod 96 def setup_class(cls): 97 data = sm.datasets.randhie.load() 98 cls.endog = np.asarray(data.endog) 99 data.exog = np.asarray(data.exog) 100 exog = sm.add_constant(data.exog[:,1:4], prepend=False) 101 exog_infl = sm.add_constant(data.exog[:,0], prepend=False) 102 cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog, 103 exog_infl=exog_infl, inflation='probit').fit(method='newton', maxiter=500, 104 disp=False) 105 # for llnull test 106 cls.res1._results._attach_nullmodel = True 107 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset'] 108 cls.init_kwds = {'inflation': 'probit'} 109 res2 = RandHIE.zero_inflated_poisson_probit 110 cls.res2 = res2 111 112 @pytest.mark.skipif(PLATFORM_LINUX32, reason="Fails on 32-bit Linux") 113 def test_fit_regularized(self): 114 super().test_fit_regularized() 115 116class TestZeroInflatedModel_offset(CheckGeneric): 117 @classmethod 118 def setup_class(cls): 119 data = sm.datasets.randhie.load() 120 cls.endog = np.asarray(data.endog) 121 data.exog = np.asarray(data.exog) 122 exog = sm.add_constant(data.exog[:,1:4], prepend=False) 123 exog_infl = sm.add_constant(data.exog[:,0], prepend=False) 124 cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog, 125 exog_infl=exog_infl, offset=data.exog[:,7]).fit(method='newton', 126 maxiter=500, 127 disp=False) 128 # for llnull test 129 cls.res1._results._attach_nullmodel = True 130 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset'] 131 cls.init_kwds = {'inflation': 'logit'} 132 res2 = RandHIE.zero_inflated_poisson_offset 133 cls.res2 = res2 134 135 def test_exposure(self): 136 # This test mostly the equivalence of offset and exposure = exp(offset) 137 # use data arrays from class model 138 model1 = self.res1.model 139 offset = model1.offset 140 model3 = sm.ZeroInflatedPoisson(model1.endog, model1.exog, 141 exog_infl=model1.exog_infl, exposure=np.exp(offset)) 142 res3 = model3.fit(start_params=self.res1.params, 143 method='newton', maxiter=500, disp=False) 144 145 assert_allclose(res3.params, self.res1.params, atol=1e-6, rtol=1e-6) 146 fitted1 = self.res1.predict() 147 fitted3 = res3.predict() 148 assert_allclose(fitted3, fitted1, atol=1e-6, rtol=1e-6) 149 150 ex = model1.exog 151 ex_infl = model1.exog_infl 152 offset = model1.offset 153 fitted1_0 = self.res1.predict(exog=ex, exog_infl=ex_infl, 154 offset=offset.tolist()) 155 fitted3_0 = res3.predict(exog=ex, exog_infl=ex_infl, 156 exposure=np.exp(offset)) 157 assert_allclose(fitted3_0, fitted1_0, atol=1e-6, rtol=1e-6) 158 159 ex = model1.exog[:10:2] 160 ex_infl = model1.exog_infl[:10:2] 161 offset = offset[:10:2] 162 # # TODO: this raises with shape mismatch, 163 # # i.e. uses offset or exposure from model -> fix it or not? 164 # GLM.predict to setting offset and exposure to zero 165 # fitted1_1 = self.res1.predict(exog=ex, exog_infl=ex_infl) 166 # fitted3_1 = res3.predict(exog=ex, exog_infl=ex_infl) 167 # assert_allclose(fitted3_1, fitted1_1, atol=1e-6, rtol=1e-6) 168 169 fitted1_2 = self.res1.predict(exog=ex, exog_infl=ex_infl, 170 offset=offset) 171 fitted3_2 = res3.predict(exog=ex, exog_infl=ex_infl, 172 exposure=np.exp(offset)) 173 assert_allclose(fitted3_2, fitted1_2, atol=1e-6, rtol=1e-6) 174 assert_allclose(fitted1_2, fitted1[:10:2], atol=1e-6, rtol=1e-6) 175 assert_allclose(fitted3_2, fitted1[:10:2], atol=1e-6, rtol=1e-6) 176 177 # without specifying offset and exposure 178 fitted1_3 = self.res1.predict(exog=ex, exog_infl=ex_infl) 179 fitted3_3 = res3.predict(exog=ex, exog_infl=ex_infl) 180 assert_allclose(fitted3_3, fitted1_3, atol=1e-6, rtol=1e-6) 181 182 183class TestZeroInflatedModelPandas(CheckGeneric): 184 @classmethod 185 def setup_class(cls): 186 data = sm.datasets.randhie.load_pandas() 187 cls.endog = data.endog 188 cls.data = data 189 exog = sm.add_constant(data.exog.iloc[:,1:4], prepend=False) 190 exog_infl = sm.add_constant(data.exog.iloc[:,0], prepend=False) 191 # we do not need to verify convergence here 192 start_params = np.asarray([0.10337834587498942, -1.0459825102508549, 193 -0.08219794475894268, 0.00856917434709146, 194 -0.026795737379474334, 1.4823632430107334]) 195 model = sm.ZeroInflatedPoisson(data.endog, exog, 196 exog_infl=exog_infl, inflation='logit') 197 cls.res1 = model.fit(start_params=start_params, method='newton', 198 maxiter=500, disp=False) 199 # for llnull test 200 cls.res1._results._attach_nullmodel = True 201 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset'] 202 cls.init_kwds = {'inflation': 'logit'} 203 res2 = RandHIE.zero_inflated_poisson_logit 204 cls.res2 = res2 205 206 def test_names(self): 207 param_names = ['inflate_lncoins', 'inflate_const', 'idp', 'lpi', 208 'fmde', 'const'] 209 assert_array_equal(self.res1.model.exog_names, param_names) 210 assert_array_equal(self.res1.params.index.tolist(), param_names) 211 assert_array_equal(self.res1.bse.index.tolist(), param_names) 212 213 exog = sm.add_constant(self.data.exog.iloc[:,1:4], prepend=True) 214 exog_infl = sm.add_constant(self.data.exog.iloc[:,0], prepend=True) 215 param_names = ['inflate_const', 'inflate_lncoins', 'const', 'idp', 216 'lpi', 'fmde'] 217 model = sm.ZeroInflatedPoisson(self.data.endog, exog, 218 exog_infl=exog_infl, inflation='logit') 219 assert_array_equal(model.exog_names, param_names) 220 221 222class TestZeroInflatedPoisson_predict(object): 223 @classmethod 224 def setup_class(cls): 225 expected_params = [1, 0.5] 226 np.random.seed(999) 227 nobs = 2000 228 exog = np.ones((nobs, 2)) 229 exog[:nobs//2, 1] = 2 230 mu_true = exog.dot(expected_params) 231 cls.endog = sm.distributions.zipoisson.rvs(mu_true, 0.05, 232 size=mu_true.shape) 233 model = sm.ZeroInflatedPoisson(cls.endog, exog) 234 cls.res = model.fit(method='bfgs', maxiter=5000, disp=False) 235 236 cls.params_true = [mu_true, 0.05, nobs] 237 238 def test_mean(self): 239 def compute_conf_interval_95(mu, prob_infl, nobs): 240 dispersion_factor = 1 + prob_infl * mu 241 242 # scalar variance of the mixture of zip 243 var = (dispersion_factor*(1-prob_infl)*mu).mean() 244 var += (((1-prob_infl)*mu)**2).mean() 245 var -= (((1-prob_infl)*mu).mean())**2 246 std = np.sqrt(var) 247 # Central limit theorem 248 conf_intv_95 = 2 * std / np.sqrt(nobs) 249 return conf_intv_95 250 251 conf_interval_95 = compute_conf_interval_95(*self.params_true) 252 assert_allclose(self.res.predict().mean(), self.endog.mean(), 253 atol=conf_interval_95, rtol=0) 254 255 def test_var(self): 256 def compute_mixture_var(dispersion_factor, prob_main, mu): 257 # the variance of the mixture is the mixture of the variances plus 258 # a non-negative term accounting for the (weighted) 259 # dispersion of the means, see stats.stackexchange #16609 and 260 # Casella & Berger's Statistical Inference (Example 10.2.1) 261 prob_infl = 1-prob_main 262 var = (dispersion_factor*(1-prob_infl)*mu).mean() 263 var += (((1-prob_infl)*mu)**2).mean() 264 var -= (((1-prob_infl)*mu).mean())**2 265 return var 266 267 res = self.res 268 var_fitted = compute_mixture_var(res._dispersion_factor, 269 res.predict(which='prob-main'), 270 res.predict(which='mean-main')) 271 272 assert_allclose(var_fitted.mean(), 273 self.endog.var(), atol=5e-2, rtol=5e-2) 274 275 def test_predict_prob(self): 276 res = self.res 277 278 pr = res.predict(which='prob') 279 pr2 = sm.distributions.zipoisson.pmf(np.arange(pr.shape[1])[:, None], 280 res.predict(), 0.05).T 281 assert_allclose(pr, pr2, rtol=0.05, atol=0.05) 282 283 def test_predict_options(self): 284 # check default exog_infl, see #4757 285 res = self.res 286 n = 5 287 pr1 = res.predict(which='prob') 288 pr0 = res.predict(exog=res.model.exog[:n], which='prob') 289 assert_allclose(pr0, pr1[:n], rtol=1e-10) 290 291 fitted1 = res.predict() 292 fitted0 = res.predict(exog=res.model.exog[:n]) 293 assert_allclose(fitted0, fitted1[:n], rtol=1e-10) 294 295 296@pytest.mark.slow 297class TestZeroInflatedGeneralizedPoisson(CheckGeneric): 298 @classmethod 299 def setup_class(cls): 300 data = sm.datasets.randhie.load() 301 cls.endog = np.asarray(data.endog) 302 data.exog = np.asarray(data.exog) 303 exog = sm.add_constant(data.exog[:,1:4], prepend=False) 304 exog_infl = sm.add_constant(data.exog[:,0], prepend=False) 305 cls.res1 = sm.ZeroInflatedGeneralizedPoisson(data.endog, exog, 306 exog_infl=exog_infl, p=1).fit(method='newton', maxiter=500, disp=False) 307 # for llnull test 308 cls.res1._results._attach_nullmodel = True 309 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p'] 310 cls.init_kwds = {'inflation': 'logit', 'p': 1} 311 res2 = RandHIE.zero_inflated_generalized_poisson 312 cls.res2 = res2 313 314 def test_bse(self): 315 pass 316 317 def test_conf_int(self): 318 pass 319 320 def test_bic(self): 321 pass 322 323 def test_t(self): 324 unit_matrix = np.identity(self.res1.params.size) 325 t_test = self.res1.t_test(unit_matrix) 326 assert_allclose(self.res1.tvalues, t_test.tvalue) 327 328 def test_minimize(self, reset_randomstate): 329 # check additional optimizers using the `minimize` option 330 model = self.res1.model 331 # use the same start_params, but avoid recomputing 332 start_params = self.res1.mle_settings['start_params'] 333 334 res_ncg = model.fit(start_params=start_params, 335 method='minimize', min_method="trust-ncg", 336 maxiter=500, disp=False) 337 338 assert_allclose(res_ncg.params, self.res2.params, 339 atol=1e-3, rtol=0.04) 340 assert_allclose(res_ncg.bse, self.res2.bse, 341 atol=1e-3, rtol=0.6) 342 assert_(res_ncg.mle_retvals['converged'] is True) 343 344 res_dog = model.fit(start_params=start_params, 345 method='minimize', min_method="dogleg", 346 maxiter=500, disp=False) 347 348 assert_allclose(res_dog.params, self.res2.params, 349 atol=1e-3, rtol=3e-3) 350 assert_allclose(res_dog.bse, self.res2.bse, 351 atol=1e-3, rtol=0.6) 352 assert_(res_dog.mle_retvals['converged'] is True) 353 354 # Ser random_state here to improve reproducibility 355 random_state = np.random.RandomState(1) 356 seed = {'seed': random_state} 357 res_bh = model.fit(start_params=start_params, 358 method='basinhopping', niter=500, stepsize=0.1, 359 niter_success=None, disp=False, interval=1, **seed) 360 361 assert_allclose(res_bh.params, self.res2.params, 362 atol=1e-4, rtol=1e-4) 363 assert_allclose(res_bh.bse, self.res2.bse, 364 atol=1e-3, rtol=0.6) 365 # skip, res_bh reports converged is false but params agree 366 #assert_(res_bh.mle_retvals['converged'] is True) 367 368class TestZeroInflatedGeneralizedPoisson_predict(object): 369 @classmethod 370 def setup_class(cls): 371 expected_params = [1, 0.5, 0.5] 372 np.random.seed(999) 373 nobs = 2000 374 exog = np.ones((nobs, 2)) 375 exog[:nobs//2, 1] = 2 376 mu_true = exog.dot(expected_params[:-1]) 377 cls.endog = sm.distributions.zigenpoisson.rvs(mu_true, expected_params[-1], 378 2, 0.5, size=mu_true.shape) 379 model = sm.ZeroInflatedGeneralizedPoisson(cls.endog, exog, p=2) 380 cls.res = model.fit(method='bfgs', maxiter=5000, disp=False) 381 382 cls.params_true = [mu_true, expected_params[-1], 2, 0.5, nobs] 383 384 def test_mean(self): 385 def compute_conf_interval_95(mu, alpha, p, prob_infl, nobs): 386 p = p-1 387 dispersion_factor = (1 + alpha * mu**p)**2 + prob_infl * mu 388 389 # scalar variance of the mixture of zip 390 var = (dispersion_factor*(1-prob_infl)*mu).mean() 391 var += (((1-prob_infl)*mu)**2).mean() 392 var -= (((1-prob_infl)*mu).mean())**2 393 std = np.sqrt(var) 394 # Central limit theorem 395 conf_intv_95 = 2 * std / np.sqrt(nobs) 396 return conf_intv_95 397 398 conf_interval_95 = compute_conf_interval_95(*self.params_true) 399 assert_allclose(self.res.predict().mean(), self.endog.mean(), 400 atol=conf_interval_95, rtol=0) 401 402 def test_var(self): 403 def compute_mixture_var(dispersion_factor, prob_main, mu): 404 prob_infl = 1-prob_main 405 var = (dispersion_factor*(1-prob_infl)*mu).mean() 406 var += (((1-prob_infl)*mu)**2).mean() 407 var -= (((1-prob_infl)*mu).mean())**2 408 return var 409 410 res = self.res 411 var_fitted = compute_mixture_var(res._dispersion_factor, 412 res.predict(which='prob-main'), 413 res.predict(which='mean-main')) 414 415 assert_allclose(var_fitted.mean(), 416 self.endog.var(), atol=0.05, rtol=0.1) 417 418 def test_predict_prob(self): 419 res = self.res 420 421 pr = res.predict(which='prob') 422 pr2 = sm.distributions.zinegbin.pmf(np.arange(pr.shape[1])[:, None], 423 res.predict(), 0.5, 2, 0.5).T 424 assert_allclose(pr, pr2, rtol=0.08, atol=0.05) 425 426 427class TestZeroInflatedNegativeBinomialP(CheckGeneric): 428 @classmethod 429 def setup_class(cls): 430 data = sm.datasets.randhie.load() 431 cls.endog = np.asarray(data.endog) 432 data.exog = np.asarray(data.exog) 433 exog = sm.add_constant(data.exog[:,1], prepend=False) 434 exog_infl = sm.add_constant(data.exog[:,0], prepend=False) 435 # cheating for now, parameters are not well identified in this dataset 436 # see https://github.com/statsmodels/statsmodels/pull/3928#issuecomment-331724022 437 sp = np.array([1.88, -10.28, -0.20, 1.14, 1.34]) 438 cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog, exog, 439 exog_infl=exog_infl, p=2).fit(start_params=sp, method='nm', 440 xtol=1e-6, maxiter=5000, disp=False) 441 # for llnull test 442 cls.res1._results._attach_nullmodel = True 443 cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p'] 444 cls.init_kwds = {'inflation': 'logit', 'p': 2} 445 res2 = RandHIE.zero_inflated_negative_binomial 446 cls.res2 = res2 447 448 def test_params(self): 449 assert_allclose(self.res1.params, self.res2.params, 450 atol=1e-3, rtol=1e-3) 451 452 def test_conf_int(self): 453 pass 454 455 def test_bic(self): 456 pass 457 458 def test_fit_regularized(self): 459 model = self.res1.model 460 461 alpha = np.ones(len(self.res1.params)) 462 alpha[-2:] = 0 463 res_reg = model.fit_regularized(alpha=alpha*0.01, disp=False, maxiter=500) 464 465 assert_allclose(res_reg.params[2:], self.res1.params[2:], 466 atol=1e-1, rtol=1e-1) 467 468 # possibly slow, adds 25 seconds 469 def test_minimize(self, reset_randomstate): 470 # check additional optimizers using the `minimize` option 471 model = self.res1.model 472 # use the same start_params, but avoid recomputing 473 start_params = self.res1.mle_settings['start_params'] 474 475 res_ncg = model.fit(start_params=start_params, 476 method='minimize', min_method="trust-ncg", 477 maxiter=500, disp=False) 478 479 assert_allclose(res_ncg.params, self.res2.params, 480 atol=1e-3, rtol=0.03) 481 assert_allclose(res_ncg.bse, self.res2.bse, 482 atol=1e-3, rtol=0.06) 483 assert_(res_ncg.mle_retvals['converged'] is True) 484 485 res_dog = model.fit(start_params=start_params, 486 method='minimize', min_method="dogleg", 487 maxiter=500, disp=False) 488 489 assert_allclose(res_dog.params, self.res2.params, 490 atol=1e-3, rtol=3e-3) 491 assert_allclose(res_dog.bse, self.res2.bse, 492 atol=1e-3, rtol=7e-3) 493 assert_(res_dog.mle_retvals['converged'] is True) 494 495 res_bh = model.fit(start_params=start_params, 496 method='basinhopping', maxiter=500, 497 niter_success=3, disp=False) 498 499 assert_allclose(res_bh.params, self.res2.params, 500 atol=1e-4, rtol=3e-4) 501 assert_allclose(res_bh.bse, self.res2.bse, 502 atol=1e-3, rtol=1e-3) 503 # skip, res_bh reports converged is false but params agree 504 #assert_(res_bh.mle_retvals['converged'] is True) 505 506 507class TestZeroInflatedNegativeBinomialP_predict(object): 508 @classmethod 509 def setup_class(cls): 510 511 expected_params = [1, 1, 0.5] 512 np.random.seed(999) 513 nobs = 5000 514 exog = np.ones((nobs, 2)) 515 exog[:nobs//2, 1] = 0 516 517 prob_infl = 0.15 518 mu_true = np.exp(exog.dot(expected_params[:-1])) 519 cls.endog = sm.distributions.zinegbin.rvs(mu_true, 520 expected_params[-1], 2, prob_infl, size=mu_true.shape) 521 model = sm.ZeroInflatedNegativeBinomialP(cls.endog, exog, p=2) 522 cls.res = model.fit(method='bfgs', maxiter=5000, disp=False) 523 524 # attach others 525 cls.prob_infl = prob_infl 526 cls.params_true = [mu_true, expected_params[-1], 2, prob_infl, nobs] 527 528 def test_mean(self): 529 def compute_conf_interval_95(mu, alpha, p, prob_infl, nobs): 530 dispersion_factor = 1 + alpha * mu**(p-1) + prob_infl * mu 531 532 # scalar variance of the mixture of zip 533 var = (dispersion_factor*(1-prob_infl)*mu).mean() 534 var += (((1-prob_infl)*mu)**2).mean() 535 var -= (((1-prob_infl)*mu).mean())**2 536 std = np.sqrt(var) 537 # Central limit theorem 538 conf_intv_95 = 2 * std / np.sqrt(nobs) 539 return conf_intv_95 540 541 conf_interval_95 = compute_conf_interval_95(*self.params_true) 542 mean_true = ((1-self.prob_infl)*self.params_true[0]).mean() 543 assert_allclose(self.res.predict().mean(), 544 mean_true, atol=conf_interval_95, rtol=0) 545 546 def test_var(self): 547 # todo check precision 548 def compute_mixture_var(dispersion_factor, prob_main, mu): 549 prob_infl = 1 - prob_main 550 var = (dispersion_factor * (1 - prob_infl) * mu).mean() 551 var += (((1 - prob_infl) * mu) ** 2).mean() 552 var -= (((1 - prob_infl) * mu).mean()) ** 2 553 return var 554 555 res = self.res 556 var_fitted = compute_mixture_var(res._dispersion_factor, 557 res.predict(which='prob-main'), 558 res.predict(which='mean-main')) 559 560 assert_allclose(var_fitted.mean(), 561 self.endog.var(), rtol=0.2) 562 563 def test_predict_prob(self): 564 res = self.res 565 endog = res.model.endog 566 567 pr = res.predict(which='prob') 568 pr2 = sm.distributions.zinegbin.pmf(np.arange(pr.shape[1])[:,None], 569 res.predict(), 0.5, 2, self.prob_infl).T 570 assert_allclose(pr, pr2, rtol=0.1, atol=0.1) 571 prm = pr.mean(0) 572 pr2m = pr2.mean(0) 573 freq = np.bincount(endog.astype(int)) / len(endog) 574 assert_allclose(((pr2m - prm)**2).mean(), 0, rtol=1e-10, atol=5e-4) 575 assert_allclose(((prm - freq)**2).mean(), 0, rtol=1e-10, atol=1e-4) 576 577 def test_predict_generic_zi(self): 578 # These tests do not use numbers from other packages. 579 # Tests are on closeness of estimated to true/DGP values 580 # and theoretical relationship between quantities 581 res = self.res 582 endog = self.endog 583 exog = self.res.model.exog 584 prob_infl = self.prob_infl 585 nobs = len(endog) 586 587 freq = np.bincount(endog.astype(int)) / len(endog) 588 probs = res.predict(which='prob') 589 probsm = probs.mean(0) 590 assert_allclose(freq, probsm, atol=0.02) 591 592 probs_unique = res.predict(exog=[[1, 0], [1, 1]], 593 exog_infl=np.asarray([[1], [1]]), 594 which='prob') 595 # no default for exog_infl yet 596 #probs_unique = res.predict(exog=[[1, 0], [1, 1]], which='prob') 597 598 probs_unique2 = probs[[1, nobs-1]] 599 600 assert_allclose(probs_unique, probs_unique2, atol=1e-10) 601 602 probs0_unique = res.predict(exog=[[1, 0], [1, 1]], 603 exog_infl=np.asarray([[1], [1]]), 604 which='prob-zero') 605 assert_allclose(probs0_unique, probs_unique2[:, 0], rtol=1e-10) 606 607 probs_main_unique = res.predict(exog=[[1, 0], [1, 1]], 608 exog_infl=np.asarray([[1], [1]]), 609 which='prob-main') 610 probs_main = res.predict(which='prob-main') 611 probs_main[[0,-1]] 612 assert_allclose(probs_main_unique, probs_main[[0,-1]], rtol=1e-10) 613 assert_allclose(probs_main_unique, 1 - prob_infl, atol=0.01) 614 615 pred = res.predict(exog=[[1, 0], [1, 1]], 616 exog_infl=np.asarray([[1], [1]])) 617 pred1 = endog[exog[:, 1] == 0].mean(), endog[exog[:, 1] == 1].mean() 618 assert_allclose(pred, pred1, rtol=0.05) 619 620 pred_main_unique = res.predict(exog=[[1, 0], [1, 1]], 621 exog_infl=np.asarray([[1], [1]]), 622 which='mean-main') 623 assert_allclose(pred_main_unique, np.exp(np.cumsum(res.params[1:3])), 624 rtol=1e-10) 625 626 # TODO: why does the following fail, params are not close enough to DGP 627 # but results are close statistics of simulated data 628 # what is mu_true in DGP sm.distributions.zinegbin.rvs 629 # assert_allclose(pred_main_unique, mu_true[[1, -1]] * (1 - prob_infl), rtol=0.01) 630 631 # mean-nonzero 632 mean_nz = (endog[(exog[:, 1] == 0) & (endog > 0)].mean(), 633 endog[(exog[:, 1] == 1) & (endog > 0)].mean()) 634 pred_nonzero_unique = res.predict(exog=[[1, 0], [1, 1]], 635 exog_infl=np.asarray([[1], [1]]), which='mean-nonzero') 636 assert_allclose(pred_nonzero_unique, mean_nz, rtol=0.05) 637 638 pred_lin_unique = res.predict(exog=[[1, 0], [1, 1]], 639 exog_infl=np.asarray([[1], [1]]), 640 which='linear') 641 assert_allclose(pred_lin_unique, np.cumsum(res.params[1:3]), rtol=1e-10) 642 643 644class TestZeroInflatedNegativeBinomialP_predict2(object): 645 @classmethod 646 def setup_class(cls): 647 data = sm.datasets.randhie.load() 648 649 cls.endog = np.asarray(data.endog) 650 data.exog = np.asarray(data.exog) 651 exog = data.exog 652 start_params = np.array([ 653 -2.83983767, -2.31595924, -3.9263248, -4.01816431, -5.52251843, 654 -2.4351714, -4.61636366, -4.17959785, -0.12960256, -0.05653484, 655 -0.21206673, 0.08782572, -0.02991995, 0.22901208, 0.0620983, 656 0.06809681, 0.0841814, 0.185506, 1.36527888]) 657 mod = sm.ZeroInflatedNegativeBinomialP( 658 cls.endog, exog, exog_infl=exog, p=2) 659 res = mod.fit(start_params=start_params, method="bfgs", 660 maxiter=1000, disp=False) 661 662 cls.res = res 663 664 def test_mean(self): 665 assert_allclose(self.res.predict().mean(), self.endog.mean(), 666 atol=0.02) 667 668 def test_zero_nonzero_mean(self): 669 mean1 = self.endog.mean() 670 mean2 = ((1 - self.res.predict(which='prob-zero').mean()) * 671 self.res.predict(which='mean-nonzero').mean()) 672 assert_allclose(mean1, mean2, atol=0.2) 673 674 675class TestPandasOffset: 676 677 def test_pd_offset_exposure(self): 678 endog = pd.DataFrame({'F': [0.0, 0.0, 0.0, 0.0, 1.0]}) 679 exog = pd.DataFrame({'I': [1.0, 1.0, 1.0, 1.0, 1.0], 680 'C': [0.0, 1.0, 0.0, 1.0, 0.0]}) 681 exposure = pd.Series([1., 1, 1, 2, 1]) 682 offset = pd.Series([1, 1, 1, 2, 1]) 683 sm.Poisson(endog=endog, exog=exog, offset=offset).fit() 684 inflations = ['logit', 'probit'] 685 for inflation in inflations: 686 sm.ZeroInflatedPoisson(endog=endog, exog=exog["I"], 687 exposure=exposure, 688 inflation=inflation).fit() 689