1__all__ = ["ZeroInflatedPoisson", "ZeroInflatedGeneralizedPoisson", 2 "ZeroInflatedNegativeBinomialP"] 3 4import warnings 5import numpy as np 6import statsmodels.base.model as base 7import statsmodels.base.wrapper as wrap 8import statsmodels.regression.linear_model as lm 9from statsmodels.discrete.discrete_model import (DiscreteModel, CountModel, 10 Poisson, Logit, CountResults, 11 L1CountResults, Probit, 12 _discrete_results_docs, 13 _validate_l1_method, 14 GeneralizedPoisson, 15 NegativeBinomialP) 16from statsmodels.distributions import zipoisson, zigenpoisson, zinegbin 17from statsmodels.tools.numdiff import approx_fprime, approx_hess 18from statsmodels.tools.decorators import cache_readonly 19from statsmodels.tools.sm_exceptions import ConvergenceWarning 20from statsmodels.compat.pandas import Appender 21 22 23_doc_zi_params = """ 24 exog_infl : array_like or None 25 Explanatory variables for the binary inflation model, i.e. for 26 mixing probability model. If None, then a constant is used. 27 offset : array_like 28 Offset is added to the linear prediction with coefficient equal to 1. 29 exposure : array_like 30 Log(exposure) is added to the linear prediction with coefficient 31 equal to 1. 32 inflation : {'logit', 'probit'} 33 The model for the zero inflation, either Logit (default) or Probit 34 """ 35 36 37class GenericZeroInflated(CountModel): 38 __doc__ = """ 39 Generic Zero Inflated Model 40 41 %(params)s 42 %(extra_params)s 43 44 Attributes 45 ---------- 46 endog : ndarray 47 A reference to the endogenous response variable 48 exog : ndarray 49 A reference to the exogenous design. 50 exog_infl : ndarray 51 A reference to the zero-inflated exogenous design. 52 """ % {'params' : base._model_params_doc, 53 'extra_params' : _doc_zi_params + base._missing_param_doc} 54 55 def __init__(self, endog, exog, exog_infl=None, offset=None, 56 inflation='logit', exposure=None, missing='none', **kwargs): 57 super(GenericZeroInflated, self).__init__(endog, exog, offset=offset, 58 exposure=exposure, 59 missing=missing, **kwargs) 60 61 if exog_infl is None: 62 self.k_inflate = 1 63 self._no_exog_infl = True 64 self.exog_infl = np.ones((endog.size, self.k_inflate), 65 dtype=np.float64) 66 else: 67 self.exog_infl = exog_infl 68 self.k_inflate = exog_infl.shape[1] 69 self._no_exog_infl = False 70 71 if len(exog.shape) == 1: 72 self.k_exog = 1 73 else: 74 self.k_exog = exog.shape[1] 75 76 self.infl = inflation 77 if inflation == 'logit': 78 self.model_infl = Logit(np.zeros(self.exog_infl.shape[0]), 79 self.exog_infl) 80 self._hessian_inflate = self._hessian_logit 81 elif inflation == 'probit': 82 self.model_infl = Probit(np.zeros(self.exog_infl.shape[0]), 83 self.exog_infl) 84 self._hessian_inflate = self._hessian_probit 85 86 else: 87 raise ValueError("inflation == %s, which is not handled" 88 % inflation) 89 90 self.inflation = inflation 91 self.k_extra = self.k_inflate 92 93 if len(self.exog) != len(self.exog_infl): 94 raise ValueError('exog and exog_infl have different number of' 95 'observation. `missing` handling is not supported') 96 97 infl_names = ['inflate_%s' % i for i in self.model_infl.data.param_names] 98 self.exog_names[:] = infl_names + list(self.exog_names) 99 self.exog_infl = np.asarray(self.exog_infl, dtype=np.float64) 100 101 self._init_keys.extend(['exog_infl', 'inflation']) 102 self._null_drop_keys = ['exog_infl'] 103 104 def loglike(self, params): 105 """ 106 Loglikelihood of Generic Zero Inflated model. 107 108 Parameters 109 ---------- 110 params : array_like 111 The parameters of the model. 112 113 Returns 114 ------- 115 loglike : float 116 The log-likelihood function of the model evaluated at `params`. 117 See notes. 118 119 Notes 120 -------- 121 .. math:: \\ln L=\\sum_{y_{i}=0}\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+ 122 \\sum_{y_{i}>0}(\\ln(1-w_{i})+L_{main\\_model}) 123 where P - pdf of main model, L - loglike function of main model. 124 """ 125 return np.sum(self.loglikeobs(params)) 126 127 def loglikeobs(self, params): 128 """ 129 Loglikelihood for observations of Generic Zero Inflated model. 130 131 Parameters 132 ---------- 133 params : array_like 134 The parameters of the model. 135 136 Returns 137 ------- 138 loglike : ndarray 139 The log likelihood for each observation of the model evaluated 140 at `params`. See Notes for definition. 141 142 Notes 143 -------- 144 .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+ 145 \\ln(1-w_{i})+L_{main\\_model} 146 where P - pdf of main model, L - loglike function of main model. 147 148 for observations :math:`i=1,...,n` 149 """ 150 params_infl = params[:self.k_inflate] 151 params_main = params[self.k_inflate:] 152 153 y = self.endog 154 w = self.model_infl.predict(params_infl) 155 156 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 157 llf_main = self.model_main.loglikeobs(params_main) 158 zero_idx = np.nonzero(y == 0)[0] 159 nonzero_idx = np.nonzero(y)[0] 160 161 llf = np.zeros_like(y, dtype=np.float64) 162 llf[zero_idx] = (np.log(w[zero_idx] + 163 (1 - w[zero_idx]) * np.exp(llf_main[zero_idx]))) 164 llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx] 165 166 return llf 167 168 @Appender(DiscreteModel.fit.__doc__) 169 def fit(self, start_params=None, method='bfgs', maxiter=35, 170 full_output=1, disp=1, callback=None, 171 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs): 172 if start_params is None: 173 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 174 if np.size(offset) == 1 and offset == 0: 175 offset = None 176 start_params = self._get_start_params() 177 178 if callback is None: 179 # work around perfect separation callback #3895 180 callback = lambda *x: x 181 182 mlefit = super(GenericZeroInflated, self).fit(start_params=start_params, 183 maxiter=maxiter, disp=disp, method=method, 184 full_output=full_output, callback=callback, 185 **kwargs) 186 187 zipfit = self.result_class(self, mlefit._results) 188 result = self.result_class_wrapper(zipfit) 189 190 if cov_kwds is None: 191 cov_kwds = {} 192 193 result._get_robustcov_results(cov_type=cov_type, 194 use_self=True, use_t=use_t, **cov_kwds) 195 return result 196 197 @Appender(DiscreteModel.fit_regularized.__doc__) 198 def fit_regularized(self, start_params=None, method='l1', 199 maxiter='defined_by_method', full_output=1, disp=1, callback=None, 200 alpha=0, trim_mode='auto', auto_trim_tol=0.01, size_trim_tol=1e-4, 201 qc_tol=0.03, **kwargs): 202 203 _validate_l1_method(method) 204 205 if np.size(alpha) == 1 and alpha != 0: 206 k_params = self.k_exog + self.k_inflate 207 alpha = alpha * np.ones(k_params) 208 209 extra = self.k_extra - self.k_inflate 210 alpha_p = alpha[:-(self.k_extra - extra)] if (self.k_extra 211 and np.size(alpha) > 1) else alpha 212 if start_params is None: 213 offset = getattr(self, "offset", 0) + getattr(self, "exposure", 0) 214 if np.size(offset) == 1 and offset == 0: 215 offset = None 216 start_params = self.model_main.fit_regularized( 217 start_params=start_params, method=method, maxiter=maxiter, 218 full_output=full_output, disp=0, callback=callback, 219 alpha=alpha_p, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 220 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs).params 221 start_params = np.append(np.ones(self.k_inflate), start_params) 222 cntfit = super(CountModel, self).fit_regularized( 223 start_params=start_params, method=method, maxiter=maxiter, 224 full_output=full_output, disp=disp, callback=callback, 225 alpha=alpha, trim_mode=trim_mode, auto_trim_tol=auto_trim_tol, 226 size_trim_tol=size_trim_tol, qc_tol=qc_tol, **kwargs) 227 228 discretefit = self.result_class_reg(self, cntfit) 229 return self.result_class_reg_wrapper(discretefit) 230 231 def score_obs(self, params): 232 """ 233 Generic Zero Inflated model score (gradient) vector of the log-likelihood 234 235 Parameters 236 ---------- 237 params : array_like 238 The parameters of the model 239 240 Returns 241 ------- 242 score : ndarray, 1-D 243 The score vector of the model, i.e. the first derivative of the 244 loglikelihood function, evaluated at `params` 245 """ 246 params_infl = params[:self.k_inflate] 247 params_main = params[self.k_inflate:] 248 249 y = self.endog 250 w = self.model_infl.predict(params_infl) 251 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 252 score_main = self.model_main.score_obs(params_main) 253 llf_main = self.model_main.loglikeobs(params_main) 254 llf = self.loglikeobs(params) 255 zero_idx = np.nonzero(y == 0)[0] 256 nonzero_idx = np.nonzero(y)[0] 257 258 mu = self.model_main.predict(params_main) 259 260 dldp = np.zeros((self.exog.shape[0], self.k_exog), dtype=np.float64) 261 dldw = np.zeros_like(self.exog_infl, dtype=np.float64) 262 263 dldp[zero_idx,:] = (score_main[zero_idx].T * 264 (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T 265 dldp[nonzero_idx,:] = score_main[nonzero_idx] 266 267 if self.inflation == 'logit': 268 dldw[zero_idx,:] = (self.exog_infl[zero_idx].T * w[zero_idx] * 269 (1 - w[zero_idx]) * 270 (1 - np.exp(llf_main[zero_idx])) / 271 np.exp(llf[zero_idx])).T 272 dldw[nonzero_idx,:] = -(self.exog_infl[nonzero_idx].T * 273 w[nonzero_idx]).T 274 elif self.inflation == 'probit': 275 return approx_fprime(params, self.loglikeobs) 276 277 return np.hstack((dldw, dldp)) 278 279 def score(self, params): 280 return self.score_obs(params).sum(0) 281 282 def _hessian_main(self, params): 283 pass 284 285 def _hessian_logit(self, params): 286 params_infl = params[:self.k_inflate] 287 params_main = params[self.k_inflate:] 288 289 y = self.endog 290 w = self.model_infl.predict(params_infl) 291 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 292 score_main = self.model_main.score_obs(params_main) 293 llf_main = self.model_main.loglikeobs(params_main) 294 llf = self.loglikeobs(params) 295 zero_idx = np.nonzero(y == 0)[0] 296 nonzero_idx = np.nonzero(y)[0] 297 298 hess_arr = np.zeros((self.k_inflate, self.k_exog + self.k_inflate)) 299 300 pmf = np.exp(llf) 301 302 #d2l/dw2 303 for i in range(self.k_inflate): 304 for j in range(i, -1, -1): 305 hess_arr[i, j] = (( 306 self.exog_infl[zero_idx, i] * self.exog_infl[zero_idx, j] * 307 (w[zero_idx] * (1 - w[zero_idx]) * ((1 - 308 np.exp(llf_main[zero_idx])) * (1 - 2 * w[zero_idx]) * 309 np.exp(llf[zero_idx]) - (w[zero_idx] - w[zero_idx]**2) * 310 (1 - np.exp(llf_main[zero_idx]))**2) / 311 pmf[zero_idx]**2)).sum() - 312 (self.exog_infl[nonzero_idx, i] * self.exog_infl[nonzero_idx, j] * 313 w[nonzero_idx] * (1 - w[nonzero_idx])).sum()) 314 315 #d2l/dpdw 316 for i in range(self.k_inflate): 317 for j in range(self.k_exog): 318 hess_arr[i, j + self.k_inflate] = -(score_main[zero_idx, j] * 319 w[zero_idx] * (1 - w[zero_idx]) * 320 self.exog_infl[zero_idx, i] / pmf[zero_idx]).sum() 321 322 return hess_arr 323 324 def _hessian_probit(self, params): 325 pass 326 327 def hessian(self, params): 328 """ 329 Generic Zero Inflated model Hessian matrix of the loglikelihood 330 331 Parameters 332 ---------- 333 params : array_like 334 The parameters of the model 335 336 Returns 337 ------- 338 hess : ndarray, (k_vars, k_vars) 339 The Hessian, second derivative of loglikelihood function, 340 evaluated at `params` 341 342 Notes 343 ----- 344 """ 345 hess_arr_main = self._hessian_main(params) 346 hess_arr_infl = self._hessian_inflate(params) 347 348 if hess_arr_main is None or hess_arr_infl is None: 349 return approx_hess(params, self.loglike) 350 351 dim = self.k_exog + self.k_inflate 352 353 hess_arr = np.zeros((dim, dim)) 354 355 hess_arr[:self.k_inflate,:] = hess_arr_infl 356 hess_arr[self.k_inflate:,self.k_inflate:] = hess_arr_main 357 358 tri_idx = np.triu_indices(self.k_exog + self.k_inflate, k=1) 359 hess_arr[tri_idx] = hess_arr.T[tri_idx] 360 361 return hess_arr 362 363 def predict(self, params, exog=None, exog_infl=None, exposure=None, 364 offset=None, which='mean'): 365 """ 366 Predict response variable of a count model given exogenous variables. 367 368 Parameters 369 ---------- 370 params : array_like 371 The parameters of the model 372 exog : ndarray, optional 373 A reference to the exogenous design. 374 If not assigned, will be used exog from fitting. 375 exog_infl : ndarray, optional 376 A reference to the zero-inflated exogenous design. 377 If not assigned, will be used exog from fitting. 378 offset : ndarray, optional 379 Offset is added to the linear prediction with coefficient equal to 1. 380 exposure : ndarray, optional 381 Log(exposure) is added to the linear prediction with coefficient 382 equal to 1. If exposure is specified, then it will be logged by the method. 383 The user does not need to log it first. 384 which : str, optional 385 Define values that will be predicted. 386 'mean', 'mean-main', 'linear', 'mean-nonzero', 'prob-zero, 'prob', 'prob-main' 387 Default is 'mean'. 388 389 Notes 390 ----- 391 """ 392 no_exog = False 393 if exog is None: 394 no_exog = True 395 exog = self.exog 396 397 if exog_infl is None: 398 if no_exog: 399 exog_infl = self.exog_infl 400 else: 401 if self._no_exog_infl: 402 exog_infl = np.ones((len(exog), 1)) 403 else: 404 exog_infl = np.asarray(exog_infl) 405 if exog_infl.ndim == 1 and self.k_inflate == 1: 406 exog_infl = exog_infl[:, None] 407 408 if exposure is None: 409 if no_exog: 410 exposure = getattr(self, 'exposure', 0) 411 else: 412 exposure = 0 413 else: 414 exposure = np.log(exposure) 415 416 if offset is None: 417 if no_exog: 418 offset = getattr(self, 'offset', 0) 419 else: 420 offset = 0 421 422 params_infl = params[:self.k_inflate] 423 params_main = params[self.k_inflate:] 424 425 prob_main = 1 - self.model_infl.predict(params_infl, exog_infl) 426 427 lin_pred = np.dot(exog, params_main[:self.exog.shape[1]]) + exposure + offset 428 429 # Refactor: This is pretty hacky, 430 # there should be an appropriate predict method in model_main 431 # this is just prob(y=0 | model_main) 432 tmp_exog = self.model_main.exog 433 tmp_endog = self.model_main.endog 434 tmp_offset = getattr(self.model_main, 'offset', ['no']) 435 tmp_exposure = getattr(self.model_main, 'exposure', ['no']) 436 self.model_main.exog = exog 437 self.model_main.endog = np.zeros((exog.shape[0])) 438 self.model_main.offset = offset 439 self.model_main.exposure = exposure 440 llf = self.model_main.loglikeobs(params_main) 441 self.model_main.exog = tmp_exog 442 self.model_main.endog = tmp_endog 443 # tmp_offset might be an array with elementwise equality testing 444 if len(tmp_offset) == 1 and tmp_offset[0] == 'no': 445 del self.model_main.offset 446 else: 447 self.model_main.offset = tmp_offset 448 if len(tmp_exposure) == 1 and tmp_exposure[0] == 'no': 449 del self.model_main.exposure 450 else: 451 self.model_main.exposure = tmp_exposure 452 # end hack 453 454 prob_zero = (1 - prob_main) + prob_main * np.exp(llf) 455 456 if which == 'mean': 457 return prob_main * np.exp(lin_pred) 458 elif which == 'mean-main': 459 return np.exp(lin_pred) 460 elif which == 'linear': 461 return lin_pred 462 elif which == 'mean-nonzero': 463 return prob_main * np.exp(lin_pred) / (1 - prob_zero) 464 elif which == 'prob-zero': 465 return prob_zero 466 elif which == 'prob-main': 467 return prob_main 468 elif which == 'prob': 469 return self._predict_prob(params, exog, exog_infl, exposure, offset) 470 else: 471 raise ValueError('which = %s is not available' % which) 472 473 474class ZeroInflatedPoisson(GenericZeroInflated): 475 __doc__ = """ 476 Poisson Zero Inflated Model 477 478 %(params)s 479 %(extra_params)s 480 481 Attributes 482 ---------- 483 endog : ndarray 484 A reference to the endogenous response variable 485 exog : ndarray 486 A reference to the exogenous design. 487 exog_infl : ndarray 488 A reference to the zero-inflated exogenous design. 489 """ % {'params' : base._model_params_doc, 490 'extra_params' : _doc_zi_params + base._missing_param_doc} 491 492 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 493 inflation='logit', missing='none', **kwargs): 494 super(ZeroInflatedPoisson, self).__init__(endog, exog, offset=offset, 495 inflation=inflation, 496 exog_infl=exog_infl, 497 exposure=exposure, 498 missing=missing, **kwargs) 499 self.model_main = Poisson(self.endog, self.exog, offset=offset, 500 exposure=exposure) 501 self.distribution = zipoisson 502 self.result_class = ZeroInflatedPoissonResults 503 self.result_class_wrapper = ZeroInflatedPoissonResultsWrapper 504 self.result_class_reg = L1ZeroInflatedPoissonResults 505 self.result_class_reg_wrapper = L1ZeroInflatedPoissonResultsWrapper 506 507 def _hessian_main(self, params): 508 params_infl = params[:self.k_inflate] 509 params_main = params[self.k_inflate:] 510 511 y = self.endog 512 w = self.model_infl.predict(params_infl) 513 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 514 score = self.score(params) 515 zero_idx = np.nonzero(y == 0)[0] 516 nonzero_idx = np.nonzero(y)[0] 517 518 mu = self.model_main.predict(params_main) 519 520 hess_arr = np.zeros((self.k_exog, self.k_exog)) 521 522 coeff = (1 + w[zero_idx] * (np.exp(mu[zero_idx]) - 1)) 523 524 #d2l/dp2 525 for i in range(self.k_exog): 526 for j in range(i, -1, -1): 527 hess_arr[i, j] = (( 528 self.exog[zero_idx, i] * self.exog[zero_idx, j] * 529 mu[zero_idx] * (w[zero_idx] - 1) * (1 / coeff - 530 w[zero_idx] * mu[zero_idx] * np.exp(mu[zero_idx]) / 531 coeff**2)).sum() - (mu[nonzero_idx] * self.exog[nonzero_idx, i] * 532 self.exog[nonzero_idx, j]).sum()) 533 534 return hess_arr 535 536 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 537 params_infl = params[:self.k_inflate] 538 params_main = params[self.k_inflate:] 539 540 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 541 542 if len(exog_infl.shape) < 2: 543 transform = True 544 w = np.atleast_2d( 545 self.model_infl.predict(params_infl, exog_infl))[:, None] 546 else: 547 transform = False 548 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 549 550 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 551 mu = self.model_main.predict(params_main, exog, 552 offset=offset)[:, None] 553 result = self.distribution.pmf(counts, mu, w) 554 return result[0] if transform else result 555 556 def _get_start_params(self): 557 start_params = self.model_main.fit(disp=0, method="nm").params 558 start_params = np.append(np.ones(self.k_inflate) * 0.1, start_params) 559 return start_params 560 561 562class ZeroInflatedGeneralizedPoisson(GenericZeroInflated): 563 __doc__ = """ 564 Zero Inflated Generalized Poisson Model 565 566 %(params)s 567 %(extra_params)s 568 569 Attributes 570 ---------- 571 endog : ndarray 572 A reference to the endogenous response variable 573 exog : ndarray 574 A reference to the exogenous design. 575 exog_infl : ndarray 576 A reference to the zero-inflated exogenous design. 577 p : scalar 578 P denotes parametrizations for ZIGP regression. 579 """ % {'params' : base._model_params_doc, 580 'extra_params' : _doc_zi_params + 581 """p : float 582 dispersion power parameter for the GeneralizedPoisson model. p=1 for 583 ZIGP-1 and p=2 for ZIGP-2. Default is p=2 584 """ + base._missing_param_doc} 585 586 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 587 inflation='logit', p=2, missing='none', **kwargs): 588 super(ZeroInflatedGeneralizedPoisson, self).__init__(endog, exog, 589 offset=offset, 590 inflation=inflation, 591 exog_infl=exog_infl, 592 exposure=exposure, 593 missing=missing, **kwargs) 594 self.model_main = GeneralizedPoisson(self.endog, self.exog, 595 offset=offset, exposure=exposure, p=p) 596 self.distribution = zigenpoisson 597 self.k_exog += 1 598 self.k_extra += 1 599 self.exog_names.append("alpha") 600 self.result_class = ZeroInflatedGeneralizedPoissonResults 601 self.result_class_wrapper = ZeroInflatedGeneralizedPoissonResultsWrapper 602 self.result_class_reg = L1ZeroInflatedGeneralizedPoissonResults 603 self.result_class_reg_wrapper = L1ZeroInflatedGeneralizedPoissonResultsWrapper 604 605 def _get_init_kwds(self): 606 kwds = super(ZeroInflatedGeneralizedPoisson, self)._get_init_kwds() 607 kwds['p'] = self.model_main.parameterization + 1 608 return kwds 609 610 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 611 params_infl = params[:self.k_inflate] 612 params_main = params[self.k_inflate:] 613 614 p = self.model_main.parameterization 615 counts = np.atleast_2d(np.arange(0, np.max(self.endog)+1)) 616 617 if len(exog_infl.shape) < 2: 618 transform = True 619 w = np.atleast_2d( 620 self.model_infl.predict(params_infl, exog_infl))[:, None] 621 else: 622 transform = False 623 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 624 625 w[w == 1.] = np.nextafter(1, 0) 626 mu = self.model_main.predict(params_main, exog, 627 exposure=exposure, offset=offset)[:, None] 628 result = self.distribution.pmf(counts, mu, params_main[-1], p, w) 629 return result[0] if transform else result 630 631 def _get_start_params(self): 632 with warnings.catch_warnings(): 633 warnings.simplefilter("ignore", category=ConvergenceWarning) 634 start_params = ZeroInflatedPoisson(self.endog, self.exog, 635 exog_infl=self.exog_infl).fit(disp=0).params 636 start_params = np.append(start_params, 0.1) 637 return start_params 638 639 640class ZeroInflatedNegativeBinomialP(GenericZeroInflated): 641 __doc__ = """ 642 Zero Inflated Generalized Negative Binomial Model 643 644 %(params)s 645 %(extra_params)s 646 647 Attributes 648 ---------- 649 endog : ndarray 650 A reference to the endogenous response variable 651 exog : ndarray 652 A reference to the exogenous design. 653 exog_infl : ndarray 654 A reference to the zero-inflated exogenous design. 655 p : scalar 656 P denotes parametrizations for ZINB regression. p=1 for ZINB-1 and 657 p=2 for ZINB-2. Default is p=2 658 """ % {'params' : base._model_params_doc, 659 'extra_params' : _doc_zi_params + 660 """p : float 661 dispersion power parameter for the NegativeBinomialP model. p=1 for 662 ZINB-1 and p=2 for ZINM-2. Default is p=2 663 """ + base._missing_param_doc} 664 665 def __init__(self, endog, exog, exog_infl=None, offset=None, exposure=None, 666 inflation='logit', p=2, missing='none', **kwargs): 667 super(ZeroInflatedNegativeBinomialP, self).__init__(endog, exog, 668 offset=offset, 669 inflation=inflation, 670 exog_infl=exog_infl, 671 exposure=exposure, 672 missing=missing, **kwargs) 673 self.model_main = NegativeBinomialP(self.endog, self.exog, 674 offset=offset, exposure=exposure, p=p) 675 self.distribution = zinegbin 676 self.k_exog += 1 677 self.k_extra += 1 678 self.exog_names.append("alpha") 679 self.result_class = ZeroInflatedNegativeBinomialResults 680 self.result_class_wrapper = ZeroInflatedNegativeBinomialResultsWrapper 681 self.result_class_reg = L1ZeroInflatedNegativeBinomialResults 682 self.result_class_reg_wrapper = L1ZeroInflatedNegativeBinomialResultsWrapper 683 684 def _get_init_kwds(self): 685 kwds = super(ZeroInflatedNegativeBinomialP, self)._get_init_kwds() 686 kwds['p'] = self.model_main.parameterization 687 return kwds 688 689 def _predict_prob(self, params, exog, exog_infl, exposure, offset): 690 params_infl = params[:self.k_inflate] 691 params_main = params[self.k_inflate:] 692 693 p = self.model_main.parameterization 694 counts = np.arange(0, np.max(self.endog)+1) 695 696 if len(exog_infl.shape) < 2: 697 transform = True 698 w = np.atleast_2d( 699 self.model_infl.predict(params_infl, exog_infl))[:, None] 700 else: 701 transform = False 702 w = self.model_infl.predict(params_infl, exog_infl)[:, None] 703 704 w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps) 705 mu = self.model_main.predict(params_main, exog, 706 exposure=exposure, offset=offset)[:, None] 707 result = self.distribution.pmf(counts, mu, params_main[-1], p, w) 708 return result[0] if transform else result 709 710 def _get_start_params(self): 711 with warnings.catch_warnings(): 712 warnings.simplefilter("ignore", category=ConvergenceWarning) 713 start_params = self.model_main.fit(disp=0, method='nm').params 714 start_params = np.append(np.zeros(self.k_inflate), start_params) 715 return start_params 716 717 718class ZeroInflatedPoissonResults(CountResults): 719 __doc__ = _discrete_results_docs % { 720 "one_line_description": "A results class for Zero Inflated Poisson", 721 "extra_attr": ""} 722 723 @cache_readonly 724 def _dispersion_factor(self): 725 mu = self.predict(which='linear') 726 w = 1 - self.predict() / np.exp(self.predict(which='linear')) 727 return (1 + w * np.exp(mu)) 728 729 def get_margeff(self, at='overall', method='dydx', atexog=None, 730 dummy=False, count=False): 731 """Get marginal effects of the fitted model. 732 733 Not yet implemented for Zero Inflated Models 734 """ 735 raise NotImplementedError("not yet implemented for zero inflation") 736 737 738class L1ZeroInflatedPoissonResults(L1CountResults, ZeroInflatedPoissonResults): 739 pass 740 741 742class ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper): 743 pass 744wrap.populate_wrapper(ZeroInflatedPoissonResultsWrapper, 745 ZeroInflatedPoissonResults) 746 747 748class L1ZeroInflatedPoissonResultsWrapper(lm.RegressionResultsWrapper): 749 pass 750wrap.populate_wrapper(L1ZeroInflatedPoissonResultsWrapper, 751 L1ZeroInflatedPoissonResults) 752 753 754class ZeroInflatedGeneralizedPoissonResults(CountResults): 755 __doc__ = _discrete_results_docs % { 756 "one_line_description": "A results class for Zero Inflated Generalized Poisson", 757 "extra_attr": ""} 758 759 @cache_readonly 760 def _dispersion_factor(self): 761 p = self.model.model_main.parameterization 762 alpha = self.params[self.model.k_inflate:][-1] 763 mu = np.exp(self.predict(which='linear')) 764 w = 1 - self.predict() / mu 765 return ((1 + alpha * mu**p)**2 + w * mu) 766 767 def get_margeff(self, at='overall', method='dydx', atexog=None, 768 dummy=False, count=False): 769 """Get marginal effects of the fitted model. 770 771 Not yet implemented for Zero Inflated Models 772 """ 773 raise NotImplementedError("not yet implemented for zero inflation") 774 775 776class L1ZeroInflatedGeneralizedPoissonResults(L1CountResults, 777 ZeroInflatedGeneralizedPoissonResults): 778 pass 779 780 781class ZeroInflatedGeneralizedPoissonResultsWrapper( 782 lm.RegressionResultsWrapper): 783 pass 784wrap.populate_wrapper(ZeroInflatedGeneralizedPoissonResultsWrapper, 785 ZeroInflatedGeneralizedPoissonResults) 786 787 788class L1ZeroInflatedGeneralizedPoissonResultsWrapper( 789 lm.RegressionResultsWrapper): 790 pass 791wrap.populate_wrapper(L1ZeroInflatedGeneralizedPoissonResultsWrapper, 792 L1ZeroInflatedGeneralizedPoissonResults) 793 794 795class ZeroInflatedNegativeBinomialResults(CountResults): 796 __doc__ = _discrete_results_docs % { 797 "one_line_description": "A results class for Zero Inflated Generalized Negative Binomial", 798 "extra_attr": ""} 799 800 @cache_readonly 801 def _dispersion_factor(self): 802 p = self.model.model_main.parameterization 803 alpha = self.params[self.model.k_inflate:][-1] 804 mu = np.exp(self.predict(which='linear')) 805 w = 1 - self.predict() / mu 806 return (1 + alpha * mu**(p-1) + w * mu) 807 808 def get_margeff(self, at='overall', method='dydx', atexog=None, 809 dummy=False, count=False): 810 """Get marginal effects of the fitted model. 811 812 Not yet implemented for Zero Inflated Models 813 """ 814 raise NotImplementedError("not yet implemented for zero inflation") 815 816 817class L1ZeroInflatedNegativeBinomialResults(L1CountResults, 818 ZeroInflatedNegativeBinomialResults): 819 pass 820 821 822class ZeroInflatedNegativeBinomialResultsWrapper( 823 lm.RegressionResultsWrapper): 824 pass 825wrap.populate_wrapper(ZeroInflatedNegativeBinomialResultsWrapper, 826 ZeroInflatedNegativeBinomialResults) 827 828 829class L1ZeroInflatedNegativeBinomialResultsWrapper( 830 lm.RegressionResultsWrapper): 831 pass 832wrap.populate_wrapper(L1ZeroInflatedNegativeBinomialResultsWrapper, 833 L1ZeroInflatedNegativeBinomialResults) 834