1from statsmodels.compat.platform import PLATFORM_LINUX32
2
3import numpy as np
4from numpy.testing import (
5    assert_,
6    assert_allclose,
7    assert_array_equal,
8    assert_equal,
9)
10import pandas as pd
11import pytest
12
13import statsmodels.api as sm
14
15from .results.results_discrete import RandHIE
16from .test_discrete import CheckModelMixin
17
18
19class CheckGeneric(CheckModelMixin):
20    def test_params(self):
21        assert_allclose(self.res1.params, self.res2.params, atol=1e-5, rtol=1e-5)
22
23    def test_llf(self):
24        assert_allclose(self.res1.llf, self.res2.llf, atol=1e-5, rtol=1e-5)
25
26    def test_conf_int(self):
27        assert_allclose(self.res1.conf_int(), self.res2.conf_int, atol=1e-3, rtol=1e-5)
28
29    def test_bse(self):
30        assert_allclose(self.res1.bse, self.res2.bse, atol=1e-3, rtol=1e-3)
31
32    def test_aic(self):
33        assert_allclose(self.res1.aic, self.res2.aic, atol=1e-2, rtol=1e-2)
34
35    def test_bic(self):
36        assert_allclose(self.res1.aic, self.res2.aic, atol=1e-1, rtol=1e-1)
37
38    def test_t(self):
39        unit_matrix = np.identity(self.res1.params.size)
40        t_test = self.res1.t_test(unit_matrix)
41        assert_allclose(self.res1.tvalues, t_test.tvalue)
42
43    def test_fit_regularized(self):
44        model = self.res1.model
45
46        alpha = np.ones(len(self.res1.params))
47        alpha[-2:] = 0
48        res_reg = model.fit_regularized(alpha=alpha*0.01, disp=False, maxiter=500)
49
50        assert_allclose(res_reg.params[2:], self.res1.params[2:],
51            atol=5e-2, rtol=5e-2)
52
53    def test_init_keys(self):
54        init_kwds = self.res1.model._get_init_kwds()
55        assert_equal(set(init_kwds.keys()), set(self.init_keys))
56        for key, value in self.init_kwds.items():
57            assert_equal(init_kwds[key], value)
58
59    def test_null(self):
60        # call llnull, so null model is attached, side effect of cached attribute
61        self.res1.llnull
62        # check model instead of value
63        exog_null = self.res1.res_null.model.exog
64        exog_infl_null = self.res1.res_null.model.exog_infl
65        assert_array_equal(exog_infl_null.shape,
66                     (len(self.res1.model.exog), 1))
67        assert_equal(np.ptp(exog_null), 0)
68        assert_equal(np.ptp(exog_infl_null), 0)
69
70    @pytest.mark.smoke
71    def test_summary(self):
72        summ = self.res1.summary()
73        # GH 4581
74        assert 'Covariance Type:' in str(summ)
75
76class TestZeroInflatedModel_logit(CheckGeneric):
77    @classmethod
78    def setup_class(cls):
79        data = sm.datasets.randhie.load()
80        cls.endog = np.asarray(data.endog)
81        data.exog = np.asarray(data.exog)
82        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
83        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
84        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
85            exog_infl=exog_infl, inflation='logit').fit(method='newton', maxiter=500,
86                                                        disp=False)
87        # for llnull test
88        cls.res1._results._attach_nullmodel = True
89        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
90        cls.init_kwds = {'inflation': 'logit'}
91        res2 = RandHIE.zero_inflated_poisson_logit
92        cls.res2 = res2
93
94class TestZeroInflatedModel_probit(CheckGeneric):
95    @classmethod
96    def setup_class(cls):
97        data = sm.datasets.randhie.load()
98        cls.endog = np.asarray(data.endog)
99        data.exog = np.asarray(data.exog)
100        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
101        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
102        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
103            exog_infl=exog_infl, inflation='probit').fit(method='newton', maxiter=500,
104                                                         disp=False)
105        # for llnull test
106        cls.res1._results._attach_nullmodel = True
107        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
108        cls.init_kwds = {'inflation': 'probit'}
109        res2 = RandHIE.zero_inflated_poisson_probit
110        cls.res2 = res2
111
112    @pytest.mark.skipif(PLATFORM_LINUX32, reason="Fails on 32-bit Linux")
113    def test_fit_regularized(self):
114        super().test_fit_regularized()
115
116class TestZeroInflatedModel_offset(CheckGeneric):
117    @classmethod
118    def setup_class(cls):
119        data = sm.datasets.randhie.load()
120        cls.endog = np.asarray(data.endog)
121        data.exog = np.asarray(data.exog)
122        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
123        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
124        cls.res1 = sm.ZeroInflatedPoisson(data.endog, exog,
125            exog_infl=exog_infl, offset=data.exog[:,7]).fit(method='newton',
126                                                            maxiter=500,
127                                                            disp=False)
128        # for llnull test
129        cls.res1._results._attach_nullmodel = True
130        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
131        cls.init_kwds = {'inflation': 'logit'}
132        res2 = RandHIE.zero_inflated_poisson_offset
133        cls.res2 = res2
134
135    def test_exposure(self):
136        # This test mostly the equivalence of offset and exposure = exp(offset)
137        # use data arrays from class model
138        model1 = self.res1.model
139        offset = model1.offset
140        model3 = sm.ZeroInflatedPoisson(model1.endog, model1.exog,
141            exog_infl=model1.exog_infl, exposure=np.exp(offset))
142        res3 = model3.fit(start_params=self.res1.params,
143                          method='newton', maxiter=500, disp=False)
144
145        assert_allclose(res3.params, self.res1.params, atol=1e-6, rtol=1e-6)
146        fitted1 = self.res1.predict()
147        fitted3 = res3.predict()
148        assert_allclose(fitted3, fitted1, atol=1e-6, rtol=1e-6)
149
150        ex = model1.exog
151        ex_infl = model1.exog_infl
152        offset = model1.offset
153        fitted1_0 = self.res1.predict(exog=ex, exog_infl=ex_infl,
154                                      offset=offset.tolist())
155        fitted3_0 = res3.predict(exog=ex, exog_infl=ex_infl,
156                                 exposure=np.exp(offset))
157        assert_allclose(fitted3_0, fitted1_0, atol=1e-6, rtol=1e-6)
158
159        ex = model1.exog[:10:2]
160        ex_infl = model1.exog_infl[:10:2]
161        offset = offset[:10:2]
162        # # TODO: this raises with shape mismatch,
163        # # i.e. uses offset or exposure from model -> fix it or not?
164        # GLM.predict to setting offset and exposure to zero
165        # fitted1_1 = self.res1.predict(exog=ex, exog_infl=ex_infl)
166        # fitted3_1 = res3.predict(exog=ex, exog_infl=ex_infl)
167        # assert_allclose(fitted3_1, fitted1_1, atol=1e-6, rtol=1e-6)
168
169        fitted1_2 = self.res1.predict(exog=ex, exog_infl=ex_infl,
170                                      offset=offset)
171        fitted3_2 = res3.predict(exog=ex, exog_infl=ex_infl,
172                                 exposure=np.exp(offset))
173        assert_allclose(fitted3_2, fitted1_2, atol=1e-6, rtol=1e-6)
174        assert_allclose(fitted1_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)
175        assert_allclose(fitted3_2, fitted1[:10:2], atol=1e-6, rtol=1e-6)
176
177        # without specifying offset and exposure
178        fitted1_3 = self.res1.predict(exog=ex, exog_infl=ex_infl)
179        fitted3_3 = res3.predict(exog=ex, exog_infl=ex_infl)
180        assert_allclose(fitted3_3, fitted1_3, atol=1e-6, rtol=1e-6)
181
182
183class TestZeroInflatedModelPandas(CheckGeneric):
184    @classmethod
185    def setup_class(cls):
186        data = sm.datasets.randhie.load_pandas()
187        cls.endog = data.endog
188        cls.data = data
189        exog = sm.add_constant(data.exog.iloc[:,1:4], prepend=False)
190        exog_infl = sm.add_constant(data.exog.iloc[:,0], prepend=False)
191        # we do not need to verify convergence here
192        start_params = np.asarray([0.10337834587498942, -1.0459825102508549,
193                                   -0.08219794475894268, 0.00856917434709146,
194                                   -0.026795737379474334, 1.4823632430107334])
195        model = sm.ZeroInflatedPoisson(data.endog, exog,
196            exog_infl=exog_infl, inflation='logit')
197        cls.res1 = model.fit(start_params=start_params, method='newton',
198                             maxiter=500, disp=False)
199        # for llnull test
200        cls.res1._results._attach_nullmodel = True
201        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset']
202        cls.init_kwds = {'inflation': 'logit'}
203        res2 = RandHIE.zero_inflated_poisson_logit
204        cls.res2 = res2
205
206    def test_names(self):
207        param_names = ['inflate_lncoins', 'inflate_const', 'idp', 'lpi',
208                       'fmde', 'const']
209        assert_array_equal(self.res1.model.exog_names, param_names)
210        assert_array_equal(self.res1.params.index.tolist(), param_names)
211        assert_array_equal(self.res1.bse.index.tolist(), param_names)
212
213        exog = sm.add_constant(self.data.exog.iloc[:,1:4], prepend=True)
214        exog_infl = sm.add_constant(self.data.exog.iloc[:,0], prepend=True)
215        param_names = ['inflate_const', 'inflate_lncoins', 'const', 'idp',
216                       'lpi', 'fmde']
217        model = sm.ZeroInflatedPoisson(self.data.endog, exog,
218            exog_infl=exog_infl, inflation='logit')
219        assert_array_equal(model.exog_names, param_names)
220
221
222class TestZeroInflatedPoisson_predict(object):
223    @classmethod
224    def setup_class(cls):
225        expected_params = [1, 0.5]
226        np.random.seed(999)
227        nobs = 2000
228        exog = np.ones((nobs, 2))
229        exog[:nobs//2, 1] = 2
230        mu_true = exog.dot(expected_params)
231        cls.endog = sm.distributions.zipoisson.rvs(mu_true, 0.05,
232                                                   size=mu_true.shape)
233        model = sm.ZeroInflatedPoisson(cls.endog, exog)
234        cls.res = model.fit(method='bfgs', maxiter=5000, disp=False)
235
236        cls.params_true = [mu_true,  0.05, nobs]
237
238    def test_mean(self):
239        def compute_conf_interval_95(mu, prob_infl, nobs):
240            dispersion_factor = 1 + prob_infl * mu
241
242            # scalar variance of the mixture of zip
243            var = (dispersion_factor*(1-prob_infl)*mu).mean()
244            var += (((1-prob_infl)*mu)**2).mean()
245            var -= (((1-prob_infl)*mu).mean())**2
246            std = np.sqrt(var)
247            # Central limit theorem
248            conf_intv_95 = 2 * std / np.sqrt(nobs)
249            return conf_intv_95
250
251        conf_interval_95 = compute_conf_interval_95(*self.params_true)
252        assert_allclose(self.res.predict().mean(), self.endog.mean(),
253                        atol=conf_interval_95, rtol=0)
254
255    def test_var(self):
256        def compute_mixture_var(dispersion_factor, prob_main, mu):
257            # the variance of the mixture is the mixture of the variances plus
258            # a non-negative term accounting for the (weighted)
259            # dispersion of the means, see stats.stackexchange #16609 and
260            #  Casella & Berger's Statistical Inference (Example 10.2.1)
261            prob_infl = 1-prob_main
262            var = (dispersion_factor*(1-prob_infl)*mu).mean()
263            var += (((1-prob_infl)*mu)**2).mean()
264            var -= (((1-prob_infl)*mu).mean())**2
265            return var
266
267        res = self.res
268        var_fitted = compute_mixture_var(res._dispersion_factor,
269                                         res.predict(which='prob-main'),
270                                         res.predict(which='mean-main'))
271
272        assert_allclose(var_fitted.mean(),
273                        self.endog.var(), atol=5e-2, rtol=5e-2)
274
275    def test_predict_prob(self):
276        res = self.res
277
278        pr = res.predict(which='prob')
279        pr2 = sm.distributions.zipoisson.pmf(np.arange(pr.shape[1])[:, None],
280                                             res.predict(), 0.05).T
281        assert_allclose(pr, pr2, rtol=0.05, atol=0.05)
282
283    def test_predict_options(self):
284        # check default exog_infl, see #4757
285        res = self.res
286        n = 5
287        pr1 = res.predict(which='prob')
288        pr0 = res.predict(exog=res.model.exog[:n], which='prob')
289        assert_allclose(pr0, pr1[:n], rtol=1e-10)
290
291        fitted1 = res.predict()
292        fitted0 = res.predict(exog=res.model.exog[:n])
293        assert_allclose(fitted0, fitted1[:n], rtol=1e-10)
294
295
296@pytest.mark.slow
297class TestZeroInflatedGeneralizedPoisson(CheckGeneric):
298    @classmethod
299    def setup_class(cls):
300        data = sm.datasets.randhie.load()
301        cls.endog = np.asarray(data.endog)
302        data.exog = np.asarray(data.exog)
303        exog = sm.add_constant(data.exog[:,1:4], prepend=False)
304        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
305        cls.res1 = sm.ZeroInflatedGeneralizedPoisson(data.endog, exog,
306            exog_infl=exog_infl, p=1).fit(method='newton', maxiter=500, disp=False)
307        # for llnull test
308        cls.res1._results._attach_nullmodel = True
309        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p']
310        cls.init_kwds = {'inflation': 'logit', 'p': 1}
311        res2 = RandHIE.zero_inflated_generalized_poisson
312        cls.res2 = res2
313
314    def test_bse(self):
315        pass
316
317    def test_conf_int(self):
318        pass
319
320    def test_bic(self):
321        pass
322
323    def test_t(self):
324        unit_matrix = np.identity(self.res1.params.size)
325        t_test = self.res1.t_test(unit_matrix)
326        assert_allclose(self.res1.tvalues, t_test.tvalue)
327
328    def test_minimize(self, reset_randomstate):
329        # check additional optimizers using the `minimize` option
330        model = self.res1.model
331        # use the same start_params, but avoid recomputing
332        start_params = self.res1.mle_settings['start_params']
333
334        res_ncg = model.fit(start_params=start_params,
335                            method='minimize', min_method="trust-ncg",
336                            maxiter=500, disp=False)
337
338        assert_allclose(res_ncg.params, self.res2.params,
339                        atol=1e-3, rtol=0.04)
340        assert_allclose(res_ncg.bse, self.res2.bse,
341                        atol=1e-3, rtol=0.6)
342        assert_(res_ncg.mle_retvals['converged'] is True)
343
344        res_dog = model.fit(start_params=start_params,
345                            method='minimize', min_method="dogleg",
346                            maxiter=500, disp=False)
347
348        assert_allclose(res_dog.params, self.res2.params,
349                        atol=1e-3, rtol=3e-3)
350        assert_allclose(res_dog.bse, self.res2.bse,
351                        atol=1e-3, rtol=0.6)
352        assert_(res_dog.mle_retvals['converged'] is True)
353
354        # Ser random_state here to improve reproducibility
355        random_state = np.random.RandomState(1)
356        seed = {'seed': random_state}
357        res_bh = model.fit(start_params=start_params,
358                           method='basinhopping', niter=500, stepsize=0.1,
359                           niter_success=None, disp=False, interval=1, **seed)
360
361        assert_allclose(res_bh.params, self.res2.params,
362                        atol=1e-4, rtol=1e-4)
363        assert_allclose(res_bh.bse, self.res2.bse,
364                        atol=1e-3, rtol=0.6)
365        # skip, res_bh reports converged is false but params agree
366        #assert_(res_bh.mle_retvals['converged'] is True)
367
368class TestZeroInflatedGeneralizedPoisson_predict(object):
369    @classmethod
370    def setup_class(cls):
371        expected_params = [1, 0.5, 0.5]
372        np.random.seed(999)
373        nobs = 2000
374        exog = np.ones((nobs, 2))
375        exog[:nobs//2, 1] = 2
376        mu_true = exog.dot(expected_params[:-1])
377        cls.endog = sm.distributions.zigenpoisson.rvs(mu_true, expected_params[-1],
378                                                      2, 0.5, size=mu_true.shape)
379        model = sm.ZeroInflatedGeneralizedPoisson(cls.endog, exog, p=2)
380        cls.res = model.fit(method='bfgs', maxiter=5000, disp=False)
381
382        cls.params_true = [mu_true, expected_params[-1], 2,  0.5, nobs]
383
384    def test_mean(self):
385        def compute_conf_interval_95(mu, alpha, p, prob_infl, nobs):
386            p = p-1
387            dispersion_factor = (1 + alpha * mu**p)**2 + prob_infl * mu
388
389            # scalar variance of the mixture of zip
390            var = (dispersion_factor*(1-prob_infl)*mu).mean()
391            var += (((1-prob_infl)*mu)**2).mean()
392            var -= (((1-prob_infl)*mu).mean())**2
393            std = np.sqrt(var)
394            # Central limit theorem
395            conf_intv_95 = 2 * std / np.sqrt(nobs)
396            return conf_intv_95
397
398        conf_interval_95 = compute_conf_interval_95(*self.params_true)
399        assert_allclose(self.res.predict().mean(), self.endog.mean(),
400                        atol=conf_interval_95, rtol=0)
401
402    def test_var(self):
403        def compute_mixture_var(dispersion_factor, prob_main, mu):
404            prob_infl = 1-prob_main
405            var = (dispersion_factor*(1-prob_infl)*mu).mean()
406            var += (((1-prob_infl)*mu)**2).mean()
407            var -= (((1-prob_infl)*mu).mean())**2
408            return var
409
410        res = self.res
411        var_fitted = compute_mixture_var(res._dispersion_factor,
412                                         res.predict(which='prob-main'),
413                                         res.predict(which='mean-main'))
414
415        assert_allclose(var_fitted.mean(),
416                        self.endog.var(), atol=0.05, rtol=0.1)
417
418    def test_predict_prob(self):
419        res = self.res
420
421        pr = res.predict(which='prob')
422        pr2 = sm.distributions.zinegbin.pmf(np.arange(pr.shape[1])[:, None],
423                                            res.predict(), 0.5, 2, 0.5).T
424        assert_allclose(pr, pr2, rtol=0.08, atol=0.05)
425
426
427class TestZeroInflatedNegativeBinomialP(CheckGeneric):
428    @classmethod
429    def setup_class(cls):
430        data = sm.datasets.randhie.load()
431        cls.endog = np.asarray(data.endog)
432        data.exog = np.asarray(data.exog)
433        exog = sm.add_constant(data.exog[:,1], prepend=False)
434        exog_infl = sm.add_constant(data.exog[:,0], prepend=False)
435        # cheating for now, parameters are not well identified in this dataset
436        # see https://github.com/statsmodels/statsmodels/pull/3928#issuecomment-331724022
437        sp = np.array([1.88, -10.28, -0.20, 1.14, 1.34])
438        cls.res1 = sm.ZeroInflatedNegativeBinomialP(data.endog, exog,
439            exog_infl=exog_infl, p=2).fit(start_params=sp, method='nm',
440                                          xtol=1e-6, maxiter=5000, disp=False)
441        # for llnull test
442        cls.res1._results._attach_nullmodel = True
443        cls.init_keys = ['exog_infl', 'exposure', 'inflation', 'offset', 'p']
444        cls.init_kwds = {'inflation': 'logit', 'p': 2}
445        res2 = RandHIE.zero_inflated_negative_binomial
446        cls.res2 = res2
447
448    def test_params(self):
449        assert_allclose(self.res1.params, self.res2.params,
450                        atol=1e-3, rtol=1e-3)
451
452    def test_conf_int(self):
453        pass
454
455    def test_bic(self):
456        pass
457
458    def test_fit_regularized(self):
459        model = self.res1.model
460
461        alpha = np.ones(len(self.res1.params))
462        alpha[-2:] = 0
463        res_reg = model.fit_regularized(alpha=alpha*0.01, disp=False, maxiter=500)
464
465        assert_allclose(res_reg.params[2:], self.res1.params[2:],
466            atol=1e-1, rtol=1e-1)
467
468    # possibly slow, adds 25 seconds
469    def test_minimize(self, reset_randomstate):
470        # check additional optimizers using the `minimize` option
471        model = self.res1.model
472        # use the same start_params, but avoid recomputing
473        start_params = self.res1.mle_settings['start_params']
474
475        res_ncg = model.fit(start_params=start_params,
476                            method='minimize', min_method="trust-ncg",
477                            maxiter=500, disp=False)
478
479        assert_allclose(res_ncg.params, self.res2.params,
480                        atol=1e-3, rtol=0.03)
481        assert_allclose(res_ncg.bse, self.res2.bse,
482                        atol=1e-3, rtol=0.06)
483        assert_(res_ncg.mle_retvals['converged'] is True)
484
485        res_dog = model.fit(start_params=start_params,
486                            method='minimize', min_method="dogleg",
487                            maxiter=500, disp=False)
488
489        assert_allclose(res_dog.params, self.res2.params,
490                        atol=1e-3, rtol=3e-3)
491        assert_allclose(res_dog.bse, self.res2.bse,
492                        atol=1e-3, rtol=7e-3)
493        assert_(res_dog.mle_retvals['converged'] is True)
494
495        res_bh = model.fit(start_params=start_params,
496                           method='basinhopping', maxiter=500,
497                           niter_success=3, disp=False)
498
499        assert_allclose(res_bh.params, self.res2.params,
500                        atol=1e-4, rtol=3e-4)
501        assert_allclose(res_bh.bse, self.res2.bse,
502                        atol=1e-3, rtol=1e-3)
503        # skip, res_bh reports converged is false but params agree
504        #assert_(res_bh.mle_retvals['converged'] is True)
505
506
507class TestZeroInflatedNegativeBinomialP_predict(object):
508    @classmethod
509    def setup_class(cls):
510
511        expected_params = [1, 1, 0.5]
512        np.random.seed(999)
513        nobs = 5000
514        exog = np.ones((nobs, 2))
515        exog[:nobs//2, 1] = 0
516
517        prob_infl = 0.15
518        mu_true = np.exp(exog.dot(expected_params[:-1]))
519        cls.endog = sm.distributions.zinegbin.rvs(mu_true,
520                    expected_params[-1], 2, prob_infl, size=mu_true.shape)
521        model = sm.ZeroInflatedNegativeBinomialP(cls.endog, exog, p=2)
522        cls.res = model.fit(method='bfgs', maxiter=5000, disp=False)
523
524        # attach others
525        cls.prob_infl = prob_infl
526        cls.params_true = [mu_true, expected_params[-1], 2,  prob_infl, nobs]
527
528    def test_mean(self):
529        def compute_conf_interval_95(mu, alpha, p, prob_infl, nobs):
530            dispersion_factor = 1 + alpha * mu**(p-1) + prob_infl * mu
531
532            # scalar variance of the mixture of zip
533            var = (dispersion_factor*(1-prob_infl)*mu).mean()
534            var += (((1-prob_infl)*mu)**2).mean()
535            var -= (((1-prob_infl)*mu).mean())**2
536            std = np.sqrt(var)
537            # Central limit theorem
538            conf_intv_95 = 2 * std / np.sqrt(nobs)
539            return conf_intv_95
540
541        conf_interval_95 = compute_conf_interval_95(*self.params_true)
542        mean_true = ((1-self.prob_infl)*self.params_true[0]).mean()
543        assert_allclose(self.res.predict().mean(),
544                        mean_true, atol=conf_interval_95, rtol=0)
545
546    def test_var(self):
547        # todo check precision
548        def compute_mixture_var(dispersion_factor, prob_main, mu):
549            prob_infl = 1 - prob_main
550            var = (dispersion_factor * (1 - prob_infl) * mu).mean()
551            var += (((1 - prob_infl) * mu) ** 2).mean()
552            var -= (((1 - prob_infl) * mu).mean()) ** 2
553            return var
554
555        res = self.res
556        var_fitted = compute_mixture_var(res._dispersion_factor,
557                                         res.predict(which='prob-main'),
558                                         res.predict(which='mean-main'))
559
560        assert_allclose(var_fitted.mean(),
561                        self.endog.var(), rtol=0.2)
562
563    def test_predict_prob(self):
564        res = self.res
565        endog = res.model.endog
566
567        pr = res.predict(which='prob')
568        pr2 = sm.distributions.zinegbin.pmf(np.arange(pr.shape[1])[:,None],
569            res.predict(), 0.5, 2, self.prob_infl).T
570        assert_allclose(pr, pr2, rtol=0.1, atol=0.1)
571        prm = pr.mean(0)
572        pr2m = pr2.mean(0)
573        freq = np.bincount(endog.astype(int)) / len(endog)
574        assert_allclose(((pr2m - prm)**2).mean(), 0, rtol=1e-10, atol=5e-4)
575        assert_allclose(((prm - freq)**2).mean(), 0, rtol=1e-10, atol=1e-4)
576
577    def test_predict_generic_zi(self):
578        # These tests do not use numbers from other packages.
579        # Tests are on closeness of estimated to true/DGP values
580        # and theoretical relationship between quantities
581        res = self.res
582        endog = self.endog
583        exog = self.res.model.exog
584        prob_infl = self.prob_infl
585        nobs = len(endog)
586
587        freq = np.bincount(endog.astype(int)) / len(endog)
588        probs = res.predict(which='prob')
589        probsm = probs.mean(0)
590        assert_allclose(freq, probsm, atol=0.02)
591
592        probs_unique = res.predict(exog=[[1, 0], [1, 1]],
593                                   exog_infl=np.asarray([[1], [1]]),
594                                   which='prob')
595        # no default for exog_infl yet
596        #probs_unique = res.predict(exog=[[1, 0], [1, 1]], which='prob')
597
598        probs_unique2 = probs[[1, nobs-1]]
599
600        assert_allclose(probs_unique, probs_unique2, atol=1e-10)
601
602        probs0_unique = res.predict(exog=[[1, 0], [1, 1]],
603                                    exog_infl=np.asarray([[1], [1]]),
604                                    which='prob-zero')
605        assert_allclose(probs0_unique, probs_unique2[:, 0], rtol=1e-10)
606
607        probs_main_unique = res.predict(exog=[[1, 0], [1, 1]],
608                                        exog_infl=np.asarray([[1], [1]]),
609                                        which='prob-main')
610        probs_main = res.predict(which='prob-main')
611        probs_main[[0,-1]]
612        assert_allclose(probs_main_unique, probs_main[[0,-1]],  rtol=1e-10)
613        assert_allclose(probs_main_unique, 1 - prob_infl, atol=0.01)
614
615        pred = res.predict(exog=[[1, 0], [1, 1]],
616                           exog_infl=np.asarray([[1], [1]]))
617        pred1 = endog[exog[:, 1] == 0].mean(), endog[exog[:, 1] == 1].mean()
618        assert_allclose(pred, pred1, rtol=0.05)
619
620        pred_main_unique = res.predict(exog=[[1, 0], [1, 1]],
621                                       exog_infl=np.asarray([[1], [1]]),
622                                       which='mean-main')
623        assert_allclose(pred_main_unique, np.exp(np.cumsum(res.params[1:3])),
624                        rtol=1e-10)
625
626        # TODO: why does the following fail, params are not close enough to DGP
627        # but results are close statistics of simulated data
628        # what is mu_true in DGP sm.distributions.zinegbin.rvs
629        # assert_allclose(pred_main_unique, mu_true[[1, -1]] * (1 - prob_infl), rtol=0.01)
630
631        # mean-nonzero
632        mean_nz = (endog[(exog[:, 1] == 0) & (endog > 0)].mean(),
633                   endog[(exog[:, 1] == 1) & (endog > 0)].mean())
634        pred_nonzero_unique = res.predict(exog=[[1, 0], [1, 1]],
635                                          exog_infl=np.asarray([[1], [1]]), which='mean-nonzero')
636        assert_allclose(pred_nonzero_unique, mean_nz, rtol=0.05)
637
638        pred_lin_unique = res.predict(exog=[[1, 0], [1, 1]],
639                                      exog_infl=np.asarray([[1], [1]]),
640                                      which='linear')
641        assert_allclose(pred_lin_unique, np.cumsum(res.params[1:3]), rtol=1e-10)
642
643
644class TestZeroInflatedNegativeBinomialP_predict2(object):
645    @classmethod
646    def setup_class(cls):
647        data = sm.datasets.randhie.load()
648
649        cls.endog = np.asarray(data.endog)
650        data.exog = np.asarray(data.exog)
651        exog = data.exog
652        start_params = np.array([
653            -2.83983767, -2.31595924, -3.9263248,  -4.01816431, -5.52251843,
654            -2.4351714,  -4.61636366, -4.17959785, -0.12960256, -0.05653484,
655            -0.21206673,  0.08782572, -0.02991995,  0.22901208,  0.0620983,
656            0.06809681,  0.0841814,   0.185506,    1.36527888])
657        mod = sm.ZeroInflatedNegativeBinomialP(
658            cls.endog, exog, exog_infl=exog, p=2)
659        res = mod.fit(start_params=start_params, method="bfgs",
660                      maxiter=1000, disp=False)
661
662        cls.res = res
663
664    def test_mean(self):
665        assert_allclose(self.res.predict().mean(), self.endog.mean(),
666                        atol=0.02)
667
668    def test_zero_nonzero_mean(self):
669        mean1 = self.endog.mean()
670        mean2 = ((1 - self.res.predict(which='prob-zero').mean()) *
671                 self.res.predict(which='mean-nonzero').mean())
672        assert_allclose(mean1, mean2, atol=0.2)
673
674
675class TestPandasOffset:
676
677    def test_pd_offset_exposure(self):
678        endog = pd.DataFrame({'F': [0.0, 0.0, 0.0, 0.0, 1.0]})
679        exog = pd.DataFrame({'I': [1.0, 1.0, 1.0, 1.0, 1.0],
680                             'C': [0.0, 1.0, 0.0, 1.0, 0.0]})
681        exposure = pd.Series([1., 1, 1, 2, 1])
682        offset = pd.Series([1, 1, 1, 2, 1])
683        sm.Poisson(endog=endog, exog=exog, offset=offset).fit()
684        inflations = ['logit', 'probit']
685        for inflation in inflations:
686            sm.ZeroInflatedPoisson(endog=endog, exog=exog["I"],
687                                   exposure=exposure,
688                                   inflation=inflation).fit()
689