1__all__ = ["ZeroInflatedPoisson", "ZeroInflatedGeneralizedPoisson",
2           "ZeroInflatedNegativeBinomialP"]
3
4import warnings
5import numpy as np
6import statsmodels.base.model as base
7import statsmodels.base.wrapper as wrap
8import statsmodels.regression.linear_model as lm
9from statsmodels.discrete.discrete_model import (DiscreteModel, CountModel,
10                                                 Poisson, Logit, CountResults,
11                                                 L1CountResults, Probit,
12                                                 _discrete_results_docs,
13                                                 _validate_l1_method,
14                                                 GeneralizedPoisson,
15                                                 NegativeBinomialP)
16from statsmodels.distributions import zipoisson, zigenpoisson, zinegbin
17from statsmodels.tools.numdiff import approx_fprime, approx_hess
18from statsmodels.tools.decorators import cache_readonly
19from statsmodels.tools.sm_exceptions import ConvergenceWarning
20from statsmodels.compat.pandas import Appender
21
22
23_doc_zi_params = """
24    exog_infl : array_like or None
25        Explanatory variables for the binary inflation model, i.e. for
26        mixing probability model. If None, then a constant is used.
27    offset : array_like
28        Offset is added to the linear prediction with coefficient equal to 1.
29    exposure : array_like
30        Log(exposure) is added to the linear prediction with coefficient
31        equal to 1.
32    inflation : {'logit', 'probit'}
33        The model for the zero inflation, either Logit (default) or Probit
34    """
35
36
37class GenericZeroInflated(CountModel):
38    __doc__ = """
39    Generic Zero Inflated Model
40
41    %(params)s
42    %(extra_params)s
43
44    Attributes
45    ----------
46    endog : ndarray
47        A reference to the endogenous response variable
48    exog : ndarray
49        A reference to the exogenous design.
50    exog_infl : ndarray
51        A reference to the zero-inflated exogenous design.
52    """ % {'params' : base._model_params_doc,
53           'extra_params' : _doc_zi_params + base._missing_param_doc}
54
55    def __init__(self, endog, exog, exog_infl=None, offset=None,
56                 inflation='logit', exposure=None, missing='none', **kwargs):
57        super(GenericZeroInflated, self).__init__(endog, exog, offset=offset,
58                                                  exposure=exposure,
59                                                  missing=missing, **kwargs)
60
61        if exog_infl is None:
62            self.k_inflate = 1
63            self._no_exog_infl = True
64            self.exog_infl = np.ones((endog.size, self.k_inflate),
65                                     dtype=np.float64)
66        else:
67            self.exog_infl = exog_infl
68            self.k_inflate = exog_infl.shape[1]
69            self._no_exog_infl = False
70
71        if len(exog.shape) == 1:
72            self.k_exog = 1
73        else:
74            self.k_exog = exog.shape[1]
75
76        self.infl = inflation
77        if inflation == 'logit':
78            self.model_infl = Logit(np.zeros(self.exog_infl.shape[0]),
79                                    self.exog_infl)
80            self._hessian_inflate = self._hessian_logit
81        elif inflation == 'probit':
82            self.model_infl = Probit(np.zeros(self.exog_infl.shape[0]),
83                                    self.exog_infl)
84            self._hessian_inflate = self._hessian_probit
85
86        else:
87            raise ValueError("inflation == %s, which is not handled"
88                             % inflation)
89
90        self.inflation = inflation
91        self.k_extra = self.k_inflate
92
93        if len(self.exog) != len(self.exog_infl):
94            raise ValueError('exog and exog_infl have different number of'
95                             'observation. `missing` handling is not supported')
96
97        infl_names = ['inflate_%s' % i for i in self.model_infl.data.param_names]
98        self.exog_names[:] = infl_names + list(self.exog_names)
99        self.exog_infl = np.asarray(self.exog_infl, dtype=np.float64)
100
101        self._init_keys.extend(['exog_infl', 'inflation'])
102        self._null_drop_keys = ['exog_infl']
103
104    def loglike(self, params):
105        """
106        Loglikelihood of Generic Zero Inflated model.
107
108        Parameters
109        ----------
110        params : array_like
111            The parameters of the model.
112
113        Returns
114        -------
115        loglike : float
116            The log-likelihood function of the model evaluated at `params`.
117            See notes.
118
119        Notes
120        --------
121        .. math:: \\ln L=\\sum_{y_{i}=0}\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
122            \\sum_{y_{i}>0}(\\ln(1-w_{i})+L_{main\\_model})
123            where P - pdf of main model, L - loglike function of main model.
124        """
125        return np.sum(self.loglikeobs(params))
126
127    def loglikeobs(self, params):
128        """
129        Loglikelihood for observations of Generic Zero Inflated model.
130
131        Parameters
132        ----------
133        params : array_like
134            The parameters of the model.
135
136        Returns
137        -------
138        loglike : ndarray
139            The log likelihood for each observation of the model evaluated
140            at `params`. See Notes for definition.
141
142        Notes
143        --------
144        .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
145            \\ln(1-w_{i})+L_{main\\_model}
146            where P - pdf of main model, L - loglike function of main model.
147
148        for observations :math:`i=1,...,n`
149        """
150        params_infl = params[:self.k_inflate]
151        params_main = params[self.k_inflate:]
152
153        y = self.endog
154        w = self.model_infl.predict(params_infl)
155
156        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
157        llf_main = self.model_main.loglikeobs(params_main)
158        zero_idx = np.nonzero(y == 0)[0]
159        nonzero_idx = np.nonzero(y)[0]
160
161        llf = np.zeros_like(y, dtype=np.float64)
162        llf[zero_idx] = (np.log(w[zero_idx] +
163            (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
164        llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
165
166        return llf
167
168    @Appender(DiscreteModel.fit.__doc__)
169    def fit(self, start_params=None, method='bfgs', maxiter=35,
170            full_output=1, disp=1, callback=None,
171            cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
172        if start_params is None:
173            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
174            if np.size(offset) == 1 and offset == 0:
175                offset = None
176            start_params = self._get_start_params()
177
178        if callback is None:
179            # work around perfect separation callback #3895
180            callback = lambda *x: x
181
182        mlefit = super(GenericZeroInflated, self).fit(start_params=start_params,
183                       maxiter=maxiter, disp=disp, method=method,
184                       full_output=full_output, callback=callback,
185                       **kwargs)
186
187        zipfit = self.result_class(self, mlefit._results)
188        result = self.result_class_wrapper(zipfit)
189
190        if cov_kwds is None:
191            cov_kwds = {}
192
193        result._get_robustcov_results(cov_type=cov_type,
194                                      use_self=True, use_t=use_t, **cov_kwds)
195        return result
196
197    @Appender(DiscreteModel.fit_regularized.__doc__)
198    def fit_regularized(self, start_params=None, method='l1',
199            maxiter='defined_by_method', full_output=1, disp=1, callback=None,
200            alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4,
201            qc_tol=0.03, **kwargs):
202
203        _validate_l1_method(method)
204
205        if np.size(alpha) == 1 and alpha != 0:
206            k_params = self.k_exog + self.k_inflate
207            alpha = alpha * np.ones(k_params)
208
209        extra = self.k_extra - self.k_inflate
210        alpha_p = alpha[:-(self.k_extra - extra)] if (self.k_extra
211            and np.size(alpha) > 1) else alpha
212        if start_params is None:
213            offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0)
214            if np.size(offset) == 1 and offset == 0:
215                offset = None
216            start_params = self.model_main.fit_regularized(
217                start_params=start_params, method=method, maxiter=maxiter,
218                full_output=full_output, disp=0, callback=callback,
219                alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
220                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params
221            start_params = np.append(np.ones(self.k_inflate), start_params)
222        cntfit = super(CountModel, self).fit_regularized(
223                start_params=start_params, method=method, maxiter=maxiter,
224                full_output=full_output, disp=disp, callback=callback,
225                alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol,
226                size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs)
227
228        discretefit = self.result_class_reg(self, cntfit)
229        return self.result_class_reg_wrapper(discretefit)
230
231    def score_obs(self, params):
232        """
233        Generic Zero Inflated model score (gradient) vector of the log-likelihood
234
235        Parameters
236        ----------
237        params : array_like
238            The parameters of the model
239
240        Returns
241        -------
242        score : ndarray, 1-D
243            The score vector of the model, i.e. the first derivative of the
244            loglikelihood function, evaluated at `params`
245        """
246        params_infl = params[:self.k_inflate]
247        params_main = params[self.k_inflate:]
248
249        y = self.endog
250        w = self.model_infl.predict(params_infl)
251        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
252        score_main = self.model_main.score_obs(params_main)
253        llf_main = self.model_main.loglikeobs(params_main)
254        llf = self.loglikeobs(params)
255        zero_idx = np.nonzero(y == 0)[0]
256        nonzero_idx = np.nonzero(y)[0]
257
258        mu = self.model_main.predict(params_main)
259
260        dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64)
261        dldw = np.zeros_like(self.exog_infl, dtype=np.float64)
262
263        dldp[zero_idx,:] = (score_main[zero_idx].T *
264                     (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
265        dldp[nonzero_idx,:] = score_main[nonzero_idx]
266
267        if self.inflation == 'logit':
268            dldw[zero_idx,:] =  (self.exog_infl[zero_idx].T * w[zero_idx] *
269                                 (1 - w[zero_idx]) *
270                                 (1 - np.exp(llf_main[zero_idx])) /
271                                  np.exp(llf[zero_idx])).T
272            dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T *
273                                    w[nonzero_idx]).T
274        elif self.inflation == 'probit':
275            return approx_fprime(params, self.loglikeobs)
276
277        return np.hstack((dldw, dldp))
278
279    def score(self, params):
280        return self.score_obs(params).sum(0)
281
282    def _hessian_main(self, params):
283        pass
284
285    def _hessian_logit(self, params):
286        params_infl = params[:self.k_inflate]
287        params_main = params[self.k_inflate:]
288
289        y = self.endog
290        w = self.model_infl.predict(params_infl)
291        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
292        score_main = self.model_main.score_obs(params_main)
293        llf_main = self.model_main.loglikeobs(params_main)
294        llf = self.loglikeobs(params)
295        zero_idx = np.nonzero(y == 0)[0]
296        nonzero_idx = np.nonzero(y)[0]
297
298        hess_arr = np.zeros((self.k_inflate, self.k_exog + self.k_inflate))
299
300        pmf = np.exp(llf)
301
302        #d2l/dw2
303        for i in range(self.k_inflate):
304            for j in range(i, -1, -1):
305                hess_arr[i, j] = ((
306                    self.exog_infl[zero_idx, i] * self.exog_infl[zero_idx, j] *
307                    (w[zero_idx] * (1 - w[zero_idx]) * ((1 -
308                    np.exp(llf_main[zero_idx])) * (1 - 2 * w[zero_idx]) *
309                    np.exp(llf[zero_idx]) - (w[zero_idx] - w[zero_idx]**2) *
310                    (1 - np.exp(llf_main[zero_idx]))**2) /
311                    pmf[zero_idx]**2)).sum() -
312                    (self.exog_infl[nonzero_idx, i] * self.exog_infl[nonzero_idx, j] *
313                    w[nonzero_idx] * (1 - w[nonzero_idx])).sum())
314
315        #d2l/dpdw
316        for i in range(self.k_inflate):
317            for j in range(self.k_exog):
318                hess_arr[i, j + self.k_inflate] = -(score_main[zero_idx, j] *
319                    w[zero_idx] * (1 - w[zero_idx]) *
320                    self.exog_infl[zero_idx, i] / pmf[zero_idx]).sum()
321
322        return hess_arr
323
324    def _hessian_probit(self, params):
325        pass
326
327    def hessian(self, params):
328        """
329        Generic Zero Inflated model Hessian matrix of the loglikelihood
330
331        Parameters
332        ----------
333        params : array_like
334            The parameters of the model
335
336        Returns
337        -------
338        hess : ndarray, (k_vars, k_vars)
339            The Hessian, second derivative of loglikelihood function,
340            evaluated at `params`
341
342        Notes
343        -----
344        """
345        hess_arr_main = self._hessian_main(params)
346        hess_arr_infl = self._hessian_inflate(params)
347
348        if hess_arr_main is None or hess_arr_infl is None:
349            return approx_hess(params, self.loglike)
350
351        dim = self.k_exog + self.k_inflate
352
353        hess_arr = np.zeros((dim, dim))
354
355        hess_arr[:self.k_inflate,:] = hess_arr_infl
356        hess_arr[self.k_inflate:,self.k_inflate:] = hess_arr_main
357
358        tri_idx = np.triu_indices(self.k_exog + self.k_inflate, k=1)
359        hess_arr[tri_idx] = hess_arr.T[tri_idx]
360
361        return hess_arr
362
363    def predict(self, params, exog=None, exog_infl=None, exposure=None,
364                offset=None, which='mean'):
365        """
366        Predict response variable of a count model given exogenous variables.
367
368        Parameters
369        ----------
370        params : array_like
371            The parameters of the model
372        exog : ndarray, optional
373            A reference to the exogenous design.
374            If not assigned, will be used exog from fitting.
375        exog_infl : ndarray, optional
376            A reference to the zero-inflated exogenous design.
377            If not assigned, will be used exog from fitting.
378        offset : ndarray, optional
379            Offset is added to the linear prediction with coefficient equal to 1.
380        exposure : ndarray, optional
381            Log(exposure) is added to the linear prediction with coefficient
382            equal to 1. If exposure is specified, then it will be logged by the method.
383            The user does not need to log it first.
384        which : str, optional
385            Define values that will be predicted.
386            'mean', 'mean-main', 'linear', 'mean-nonzero', 'prob-zero, 'prob', 'prob-main'
387            Default is 'mean'.
388
389        Notes
390        -----
391        """
392        no_exog = False
393        if exog is None:
394            no_exog = True
395            exog = self.exog
396
397        if exog_infl is None:
398            if no_exog:
399                exog_infl = self.exog_infl
400            else:
401                if self._no_exog_infl:
402                    exog_infl = np.ones((len(exog), 1))
403        else:
404            exog_infl = np.asarray(exog_infl)
405            if exog_infl.ndim == 1 and self.k_inflate == 1:
406                exog_infl = exog_infl[:, None]
407
408        if exposure is None:
409            if no_exog:
410                exposure = getattr(self, 'exposure', 0)
411            else:
412                exposure = 0
413        else:
414            exposure = np.log(exposure)
415
416        if offset is None:
417            if no_exog:
418                offset = getattr(self, 'offset', 0)
419            else:
420                offset = 0
421
422        params_infl = params[:self.k_inflate]
423        params_main = params[self.k_inflate:]
424
425        prob_main = 1 - self.model_infl.predict(params_infl, exog_infl)
426
427        lin_pred = np.dot(exog, params_main[:self.exog.shape[1]]) + exposure + offset
428
429        # Refactor: This is pretty hacky,
430        # there should be an appropriate predict method in model_main
431        # this is just prob(y=0 | model_main)
432        tmp_exog = self.model_main.exog
433        tmp_endog = self.model_main.endog
434        tmp_offset = getattr(self.model_main, 'offset', ['no'])
435        tmp_exposure = getattr(self.model_main, 'exposure', ['no'])
436        self.model_main.exog = exog
437        self.model_main.endog = np.zeros((exog.shape[0]))
438        self.model_main.offset = offset
439        self.model_main.exposure = exposure
440        llf = self.model_main.loglikeobs(params_main)
441        self.model_main.exog = tmp_exog
442        self.model_main.endog = tmp_endog
443        # tmp_offset might be an array with elementwise equality testing
444        if len(tmp_offset) == 1 and tmp_offset[0] == 'no':
445            del self.model_main.offset
446        else:
447            self.model_main.offset = tmp_offset
448        if len(tmp_exposure) == 1 and tmp_exposure[0] == 'no':
449            del self.model_main.exposure
450        else:
451            self.model_main.exposure = tmp_exposure
452        # end hack
453
454        prob_zero = (1 - prob_main) + prob_main * np.exp(llf)
455
456        if which == 'mean':
457            return prob_main * np.exp(lin_pred)
458        elif which == 'mean-main':
459            return np.exp(lin_pred)
460        elif which == 'linear':
461            return lin_pred
462        elif which == 'mean-nonzero':
463            return prob_main * np.exp(lin_pred) / (1 - prob_zero)
464        elif which == 'prob-zero':
465            return prob_zero
466        elif which == 'prob-main':
467            return prob_main
468        elif which == 'prob':
469            return self._predict_prob(params, exog, exog_infl, exposure, offset)
470        else:
471            raise ValueError('which = %s is not available' % which)
472
473
474class ZeroInflatedPoisson(GenericZeroInflated):
475    __doc__ = """
476    Poisson Zero Inflated Model
477
478    %(params)s
479    %(extra_params)s
480
481    Attributes
482    ----------
483    endog : ndarray
484        A reference to the endogenous response variable
485    exog : ndarray
486        A reference to the exogenous design.
487    exog_infl : ndarray
488        A reference to the zero-inflated exogenous design.
489    """ % {'params' : base._model_params_doc,
490           'extra_params' : _doc_zi_params + base._missing_param_doc}
491
492    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
493                 inflation='logit', missing='none', **kwargs):
494        super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset,
495                                                  inflation=inflation,
496                                                  exog_infl=exog_infl,
497                                                  exposure=exposure,
498                                                  missing=missing, **kwargs)
499        self.model_main = Poisson(self.endog, self.exog, offset=offset,
500                                  exposure=exposure)
501        self.distribution = zipoisson
502        self.result_class = ZeroInflatedPoissonResults
503        self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper
504        self.result_class_reg = L1ZeroInflatedPoissonResults
505        self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper
506
507    def _hessian_main(self, params):
508        params_infl = params[:self.k_inflate]
509        params_main = params[self.k_inflate:]
510
511        y = self.endog
512        w = self.model_infl.predict(params_infl)
513        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
514        score = self.score(params)
515        zero_idx = np.nonzero(y == 0)[0]
516        nonzero_idx = np.nonzero(y)[0]
517
518        mu = self.model_main.predict(params_main)
519
520        hess_arr = np.zeros((self.k_exog, self.k_exog))
521
522        coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1))
523
524        #d2l/dp2
525        for i in range(self.k_exog):
526            for j in range(i, -1, -1):
527                hess_arr[i, j] = ((
528                    self.exog[zero_idx, i] * self.exog[zero_idx, j] *
529                    mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff -
530                    w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) /
531                    coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] *
532                    self.exog[nonzero_idx, j]).sum())
533
534        return hess_arr
535
536    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
537        params_infl = params[:self.k_inflate]
538        params_main = params[self.k_inflate:]
539
540        counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
541
542        if len(exog_infl.shape) < 2:
543            transform = True
544            w = np.atleast_2d(
545                self.model_infl.predict(params_infl, exog_infl))[:, None]
546        else:
547            transform = False
548            w = self.model_infl.predict(params_infl, exog_infl)[:, None]
549
550        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
551        mu = self.model_main.predict(params_main, exog,
552            offset=offset)[:, None]
553        result = self.distribution.pmf(counts, mu, w)
554        return result[0] if transform else result
555
556    def _get_start_params(self):
557        start_params = self.model_main.fit(disp=0, method="nm").params
558        start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params)
559        return start_params
560
561
562class ZeroInflatedGeneralizedPoisson(GenericZeroInflated):
563    __doc__ = """
564    Zero Inflated Generalized Poisson Model
565
566    %(params)s
567    %(extra_params)s
568
569    Attributes
570    ----------
571    endog : ndarray
572        A reference to the endogenous response variable
573    exog : ndarray
574        A reference to the exogenous design.
575    exog_infl : ndarray
576        A reference to the zero-inflated exogenous design.
577    p : scalar
578        P denotes parametrizations for ZIGP regression.
579    """ % {'params' : base._model_params_doc,
580           'extra_params' : _doc_zi_params +
581           """p : float
582        dispersion power parameter for the GeneralizedPoisson model.  p=1 for
583        ZIGP-1 and p=2 for ZIGP-2. Default is p=2
584    """ + base._missing_param_doc}
585
586    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
587                 inflation='logit', p=2, missing='none', **kwargs):
588        super(ZeroInflatedGeneralizedPoisson, self).__init__(endog, exog,
589                                                  offset=offset,
590                                                  inflation=inflation,
591                                                  exog_infl=exog_infl,
592                                                  exposure=exposure,
593                                                  missing=missing, **kwargs)
594        self.model_main = GeneralizedPoisson(self.endog, self.exog,
595            offset=offset, exposure=exposure, p=p)
596        self.distribution = zigenpoisson
597        self.k_exog += 1
598        self.k_extra += 1
599        self.exog_names.append("alpha")
600        self.result_class = ZeroInflatedGeneralizedPoissonResults
601        self.result_class_wrapper = ZeroInflatedGeneralizedPoissonResultsWrapper
602        self.result_class_reg = L1ZeroInflatedGeneralizedPoissonResults
603        self.result_class_reg_wrapper = L1ZeroInflatedGeneralizedPoissonResultsWrapper
604
605    def _get_init_kwds(self):
606        kwds = super(ZeroInflatedGeneralizedPoisson, self)._get_init_kwds()
607        kwds['p'] = self.model_main.parameterization + 1
608        return kwds
609
610    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
611        params_infl = params[:self.k_inflate]
612        params_main = params[self.k_inflate:]
613
614        p = self.model_main.parameterization
615        counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1))
616
617        if len(exog_infl.shape) < 2:
618            transform = True
619            w = np.atleast_2d(
620                self.model_infl.predict(params_infl, exog_infl))[:, None]
621        else:
622            transform = False
623            w = self.model_infl.predict(params_infl, exog_infl)[:, None]
624
625        w[w == 1.] = np.nextafter(1, 0)
626        mu = self.model_main.predict(params_main, exog,
627            exposure=exposure, offset=offset)[:, None]
628        result = self.distribution.pmf(counts, mu, params_main[-1], p, w)
629        return result[0] if transform else result
630
631    def _get_start_params(self):
632        with warnings.catch_warnings():
633            warnings.simplefilter("ignore", category=ConvergenceWarning)
634            start_params = ZeroInflatedPoisson(self.endog, self.exog,
635                exog_infl=self.exog_infl).fit(disp=0).params
636        start_params = np.append(start_params, 0.1)
637        return start_params
638
639
640class ZeroInflatedNegativeBinomialP(GenericZeroInflated):
641    __doc__ = """
642    Zero Inflated Generalized Negative Binomial Model
643
644    %(params)s
645    %(extra_params)s
646
647    Attributes
648    ----------
649    endog : ndarray
650        A reference to the endogenous response variable
651    exog : ndarray
652        A reference to the exogenous design.
653    exog_infl : ndarray
654        A reference to the zero-inflated exogenous design.
655    p : scalar
656        P denotes parametrizations for ZINB regression. p=1 for ZINB-1 and
657    p=2 for ZINB-2. Default is p=2
658    """ % {'params' : base._model_params_doc,
659           'extra_params' : _doc_zi_params +
660           """p : float
661        dispersion power parameter for the NegativeBinomialP model.  p=1 for
662        ZINB-1 and p=2 for ZINM-2. Default is p=2
663    """ + base._missing_param_doc}
664
665    def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None,
666                 inflation='logit', p=2, missing='none', **kwargs):
667        super(ZeroInflatedNegativeBinomialP, self).__init__(endog, exog,
668                                                  offset=offset,
669                                                  inflation=inflation,
670                                                  exog_infl=exog_infl,
671                                                  exposure=exposure,
672                                                  missing=missing, **kwargs)
673        self.model_main = NegativeBinomialP(self.endog, self.exog,
674            offset=offset, exposure=exposure, p=p)
675        self.distribution = zinegbin
676        self.k_exog += 1
677        self.k_extra += 1
678        self.exog_names.append("alpha")
679        self.result_class = ZeroInflatedNegativeBinomialResults
680        self.result_class_wrapper = ZeroInflatedNegativeBinomialResultsWrapper
681        self.result_class_reg = L1ZeroInflatedNegativeBinomialResults
682        self.result_class_reg_wrapper = L1ZeroInflatedNegativeBinomialResultsWrapper
683
684    def _get_init_kwds(self):
685        kwds = super(ZeroInflatedNegativeBinomialP, self)._get_init_kwds()
686        kwds['p'] = self.model_main.parameterization
687        return kwds
688
689    def _predict_prob(self, params, exog, exog_infl, exposure, offset):
690        params_infl = params[:self.k_inflate]
691        params_main = params[self.k_inflate:]
692
693        p = self.model_main.parameterization
694        counts = np.arange(0, np.max(self.endog)+1)
695
696        if len(exog_infl.shape) < 2:
697            transform = True
698            w = np.atleast_2d(
699                self.model_infl.predict(params_infl, exog_infl))[:, None]
700        else:
701            transform = False
702            w = self.model_infl.predict(params_infl, exog_infl)[:, None]
703
704        w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
705        mu = self.model_main.predict(params_main, exog,
706            exposure=exposure, offset=offset)[:, None]
707        result = self.distribution.pmf(counts, mu, params_main[-1], p, w)
708        return result[0] if transform else result
709
710    def _get_start_params(self):
711        with warnings.catch_warnings():
712            warnings.simplefilter("ignore", category=ConvergenceWarning)
713            start_params = self.model_main.fit(disp=0, method='nm').params
714        start_params = np.append(np.zeros(self.k_inflate), start_params)
715        return start_params
716
717
718class ZeroInflatedPoissonResults(CountResults):
719    __doc__ = _discrete_results_docs % {
720    "one_line_description": "A results class for Zero Inflated Poisson",
721    "extra_attr": ""}
722
723    @cache_readonly
724    def _dispersion_factor(self):
725        mu = self.predict(which='linear')
726        w = 1 - self.predict() / np.exp(self.predict(which='linear'))
727        return (1 + w * np.exp(mu))
728
729    def get_margeff(self, at='overall', method='dydx', atexog=None,
730                    dummy=False, count=False):
731        """Get marginal effects of the fitted model.
732
733        Not yet implemented for Zero Inflated Models
734        """
735        raise NotImplementedError("not yet implemented for zero inflation")
736
737
738class L1ZeroInflatedPoissonResults(L1CountResults, ZeroInflatedPoissonResults):
739    pass
740
741
742class ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper):
743    pass
744wrap.populate_wrapper(ZeroInflatedPoissonResultsWrapper,
745                      ZeroInflatedPoissonResults)
746
747
748class L1ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper):
749    pass
750wrap.populate_wrapper(L1ZeroInflatedPoissonResultsWrapper,
751                      L1ZeroInflatedPoissonResults)
752
753
754class ZeroInflatedGeneralizedPoissonResults(CountResults):
755    __doc__ = _discrete_results_docs % {
756        "one_line_description": "A results class for Zero Inflated Generalized Poisson",
757        "extra_attr": ""}
758
759    @cache_readonly
760    def _dispersion_factor(self):
761        p = self.model.model_main.parameterization
762        alpha = self.params[self.model.k_inflate:][-1]
763        mu = np.exp(self.predict(which='linear'))
764        w = 1 - self.predict() / mu
765        return ((1 + alpha * mu**p)**2 + w * mu)
766
767    def get_margeff(self, at='overall', method='dydx', atexog=None,
768                    dummy=False, count=False):
769        """Get marginal effects of the fitted model.
770
771        Not yet implemented for Zero Inflated Models
772        """
773        raise NotImplementedError("not yet implemented for zero inflation")
774
775
776class L1ZeroInflatedGeneralizedPoissonResults(L1CountResults,
777        ZeroInflatedGeneralizedPoissonResults):
778    pass
779
780
781class ZeroInflatedGeneralizedPoissonResultsWrapper(
782        lm.RegressionResultsWrapper):
783    pass
784wrap.populate_wrapper(ZeroInflatedGeneralizedPoissonResultsWrapper,
785                      ZeroInflatedGeneralizedPoissonResults)
786
787
788class L1ZeroInflatedGeneralizedPoissonResultsWrapper(
789        lm.RegressionResultsWrapper):
790    pass
791wrap.populate_wrapper(L1ZeroInflatedGeneralizedPoissonResultsWrapper,
792                      L1ZeroInflatedGeneralizedPoissonResults)
793
794
795class ZeroInflatedNegativeBinomialResults(CountResults):
796    __doc__ = _discrete_results_docs % {
797        "one_line_description": "A results class for Zero Inflated Generalized Negative Binomial",
798        "extra_attr": ""}
799
800    @cache_readonly
801    def _dispersion_factor(self):
802        p = self.model.model_main.parameterization
803        alpha = self.params[self.model.k_inflate:][-1]
804        mu = np.exp(self.predict(which='linear'))
805        w = 1 - self.predict() / mu
806        return (1 + alpha * mu**(p-1) + w * mu)
807
808    def get_margeff(self, at='overall', method='dydx', atexog=None,
809            dummy=False, count=False):
810        """Get marginal effects of the fitted model.
811
812        Not yet implemented for Zero Inflated Models
813        """
814        raise NotImplementedError("not yet implemented for zero inflation")
815
816
817class L1ZeroInflatedNegativeBinomialResults(L1CountResults,
818        ZeroInflatedNegativeBinomialResults):
819    pass
820
821
822class ZeroInflatedNegativeBinomialResultsWrapper(
823        lm.RegressionResultsWrapper):
824    pass
825wrap.populate_wrapper(ZeroInflatedNegativeBinomialResultsWrapper,
826                      ZeroInflatedNegativeBinomialResults)
827
828
829class L1ZeroInflatedNegativeBinomialResultsWrapper(
830        lm.RegressionResultsWrapper):
831    pass
832wrap.populate_wrapper(L1ZeroInflatedNegativeBinomialResultsWrapper,
833                      L1ZeroInflatedNegativeBinomialResults)
834