1from statsmodels.compat.python import lzip
2
3from functools import reduce
4import warnings
5
6import numpy as np
7from scipy import stats
8
9from statsmodels.base.data import handle_data
10from statsmodels.base.optimizer import Optimizer
11import statsmodels.base.wrapper as wrap
12from statsmodels.formula import handle_formula_data
13from statsmodels.stats.contrast import (
14    ContrastResults,
15    WaldTestResults,
16    t_test_pairwise,
17)
18from statsmodels.tools.data import _is_using_pandas
19from statsmodels.tools.decorators import (
20    cache_readonly,
21    cached_data,
22    cached_value,
23)
24from statsmodels.tools.numdiff import approx_fprime
25from statsmodels.tools.sm_exceptions import (
26    HessianInversionWarning,
27    ValueWarning,
28)
29from statsmodels.tools.tools import nan_dot, recipr
30from statsmodels.tools.validation import bool_like
31
32ERROR_INIT_KWARGS = False
33
34_model_params_doc = """Parameters
35    ----------
36    endog : array_like
37        A 1-d endogenous response variable. The dependent variable.
38    exog : array_like
39        A nobs x k array where `nobs` is the number of observations and `k`
40        is the number of regressors. An intercept is not included by default
41        and should be added by the user. See
42        :func:`statsmodels.tools.add_constant`."""
43
44_missing_param_doc = """\
45missing : str
46        Available options are 'none', 'drop', and 'raise'. If 'none', no nan
47        checking is done. If 'drop', any observations with nans are dropped.
48        If 'raise', an error is raised. Default is 'none'."""
49_extra_param_doc = """
50    hasconst : None or bool
51        Indicates whether the RHS includes a user-supplied constant. If True,
52        a constant is not checked for and k_constant is set to 1 and all
53        result statistics are calculated as if a constant is present. If
54        False, a constant is not checked for and k_constant is set to 0.
55    **kwargs
56        Extra arguments that are used to set model properties when using the
57        formula interface."""
58
59
60class Model(object):
61    __doc__ = """
62    A (predictive) statistical model. Intended to be subclassed not used.
63
64    %(params_doc)s
65    %(extra_params_doc)s
66
67    Attributes
68    ----------
69    exog_names
70    endog_names
71
72    Notes
73    -----
74    `endog` and `exog` are references to any data provided.  So if the data is
75    already stored in numpy arrays and it is changed then `endog` and `exog`
76    will change as well.
77    """ % {'params_doc': _model_params_doc,
78           'extra_params_doc': _missing_param_doc + _extra_param_doc}
79
80    # Maximum number of endogenous variables when using a formula
81    # Default is 1, which is more common. Override in models when needed
82    # Set to None to skip check
83    _formula_max_endog = 1
84    # kwargs that are generically allowed, maybe not supported in all models
85    _kwargs_allowed = [
86        "missing", 'missing_idx', 'formula', 'design_info', "hasconst",
87        ]
88
89    def __init__(self, endog, exog=None, **kwargs):
90        missing = kwargs.pop('missing', 'none')
91        hasconst = kwargs.pop('hasconst', None)
92        self.data = self._handle_data(endog, exog, missing, hasconst,
93                                      **kwargs)
94        self.k_constant = self.data.k_constant
95        self.exog = self.data.exog
96        self.endog = self.data.endog
97        self._data_attr = []
98        self._data_attr.extend(['exog', 'endog', 'data.exog', 'data.endog'])
99        if 'formula' not in kwargs:  # will not be able to unpickle without these
100            self._data_attr.extend(['data.orig_endog', 'data.orig_exog'])
101        # store keys for extras if we need to recreate model instance
102        # we do not need 'missing', maybe we need 'hasconst'
103        self._init_keys = list(kwargs.keys())
104        if hasconst is not None:
105            self._init_keys.append('hasconst')
106
107    def _get_init_kwds(self):
108        """return dictionary with extra keys used in model.__init__
109        """
110        kwds = dict(((key, getattr(self, key, None))
111                     for key in self._init_keys))
112
113        return kwds
114
115    def _check_kwargs(self, kwargs, keys_extra=None, error=ERROR_INIT_KWARGS):
116
117        kwargs_allowed = [
118            "missing", 'missing_idx', 'formula', 'design_info', "hasconst",
119            ]
120        if keys_extra:
121            kwargs_allowed.extend(keys_extra)
122
123        kwargs_invalid = [i for i in kwargs if i not in kwargs_allowed]
124        if kwargs_invalid:
125            msg = "unknown kwargs " + repr(kwargs_invalid)
126            if error is False:
127                warnings.warn(msg, ValueWarning)
128            else:
129                raise ValueError(msg)
130
131    def _handle_data(self, endog, exog, missing, hasconst, **kwargs):
132        data = handle_data(endog, exog, missing, hasconst, **kwargs)
133        # kwargs arrays could have changed, easier to just attach here
134        for key in kwargs:
135            if key in ['design_info', 'formula']:  # leave attached to data
136                continue
137            # pop so we do not start keeping all these twice or references
138            try:
139                setattr(self, key, data.__dict__.pop(key))
140            except KeyError:  # panel already pops keys in data handling
141                pass
142        return data
143
144    @classmethod
145    def from_formula(cls, formula, data, subset=None, drop_cols=None,
146                     *args, **kwargs):
147        """
148        Create a Model from a formula and dataframe.
149
150        Parameters
151        ----------
152        formula : str or generic Formula object
153            The formula specifying the model.
154        data : array_like
155            The data for the model. See Notes.
156        subset : array_like
157            An array-like object of booleans, integers, or index values that
158            indicate the subset of df to use in the model. Assumes df is a
159            `pandas.DataFrame`.
160        drop_cols : array_like
161            Columns to drop from the design matrix.  Cannot be used to
162            drop terms involving categoricals.
163        *args
164            Additional positional argument that are passed to the model.
165        **kwargs
166            These are passed to the model with one exception. The
167            ``eval_env`` keyword is passed to patsy. It can be either a
168            :class:`patsy:patsy.EvalEnvironment` object or an integer
169            indicating the depth of the namespace to use. For example, the
170            default ``eval_env=0`` uses the calling namespace. If you wish
171            to use a "clean" environment set ``eval_env=-1``.
172
173        Returns
174        -------
175        model
176            The model instance.
177
178        Notes
179        -----
180        data must define __getitem__ with the keys in the formula terms
181        args and kwargs are passed on to the model instantiation. E.g.,
182        a numpy structured or rec array, a dictionary, or a pandas DataFrame.
183        """
184        # TODO: provide a docs template for args/kwargs from child models
185        # TODO: subset could use syntax. issue #469.
186        if subset is not None:
187            data = data.loc[subset]
188        eval_env = kwargs.pop('eval_env', None)
189        if eval_env is None:
190            eval_env = 2
191        elif eval_env == -1:
192            from patsy import EvalEnvironment
193            eval_env = EvalEnvironment({})
194        elif isinstance(eval_env, int):
195            eval_env += 1  # we're going down the stack again
196        missing = kwargs.get('missing', 'drop')
197        if missing == 'none':  # with patsy it's drop or raise. let's raise.
198            missing = 'raise'
199
200        tmp = handle_formula_data(data, None, formula, depth=eval_env,
201                                  missing=missing)
202        ((endog, exog), missing_idx, design_info) = tmp
203        max_endog = cls._formula_max_endog
204        if (max_endog is not None and
205                endog.ndim > 1 and endog.shape[1] > max_endog):
206            raise ValueError('endog has evaluated to an array with multiple '
207                             'columns that has shape {0}. This occurs when '
208                             'the variable converted to endog is non-numeric'
209                             ' (e.g., bool or str).'.format(endog.shape))
210        if drop_cols is not None and len(drop_cols) > 0:
211            cols = [x for x in exog.columns if x not in drop_cols]
212            if len(cols) < len(exog.columns):
213                exog = exog[cols]
214                cols = list(design_info.term_names)
215                for col in drop_cols:
216                    try:
217                        cols.remove(col)
218                    except ValueError:
219                        pass  # OK if not present
220                design_info = design_info.subset(cols)
221
222        kwargs.update({'missing_idx': missing_idx,
223                       'missing': missing,
224                       'formula': formula,  # attach formula for unpckling
225                       'design_info': design_info})
226        mod = cls(endog, exog, *args, **kwargs)
227        mod.formula = formula
228        # since we got a dataframe, attach the original
229        mod.data.frame = data
230        return mod
231
232    @property
233    def endog_names(self):
234        """
235        Names of endogenous variables.
236        """
237        return self.data.ynames
238
239    @property
240    def exog_names(self):
241        """
242        Names of exogenous variables.
243        """
244        return self.data.xnames
245
246    def fit(self):
247        """
248        Fit a model to data.
249        """
250        raise NotImplementedError
251
252    def predict(self, params, exog=None, *args, **kwargs):
253        """
254        After a model has been fit predict returns the fitted values.
255
256        This is a placeholder intended to be overwritten by individual models.
257        """
258        raise NotImplementedError
259
260
261class LikelihoodModel(Model):
262    """
263    Likelihood model is a subclass of Model.
264    """
265
266    def __init__(self, endog, exog=None, **kwargs):
267        super().__init__(endog, exog, **kwargs)
268        self.initialize()
269
270    def initialize(self):
271        """
272        Initialize (possibly re-initialize) a Model instance.
273
274        For example, if the the design matrix of a linear model changes then
275        initialized can be used to recompute values using the modified design
276        matrix.
277        """
278        pass
279
280    # TODO: if the intent is to re-initialize the model with new data then this
281    # method needs to take inputs...
282
283    def loglike(self, params):
284        """
285        Log-likelihood of model.
286
287        Parameters
288        ----------
289        params : ndarray
290            The model parameters used to compute the log-likelihood.
291
292        Notes
293        -----
294        Must be overridden by subclasses.
295        """
296        raise NotImplementedError
297
298    def score(self, params):
299        """
300        Score vector of model.
301
302        The gradient of logL with respect to each parameter.
303
304        Parameters
305        ----------
306        params : ndarray
307            The parameters to use when evaluating the Hessian.
308
309        Returns
310        -------
311        ndarray
312            The score vector evaluated at the parameters.
313        """
314        raise NotImplementedError
315
316    def information(self, params):
317        """
318        Fisher information matrix of model.
319
320        Returns -1 * Hessian of the log-likelihood evaluated at params.
321
322        Parameters
323        ----------
324        params : ndarray
325            The model parameters.
326        """
327        raise NotImplementedError
328
329    def hessian(self, params):
330        """
331        The Hessian matrix of the model.
332
333        Parameters
334        ----------
335        params : ndarray
336            The parameters to use when evaluating the Hessian.
337
338        Returns
339        -------
340        ndarray
341            The hessian evaluated at the parameters.
342        """
343        raise NotImplementedError
344
345    def fit(self, start_params=None, method='newton', maxiter=100,
346            full_output=True, disp=True, fargs=(), callback=None, retall=False,
347            skip_hessian=False, **kwargs):
348        """
349        Fit method for likelihood based models
350
351        Parameters
352        ----------
353        start_params : array_like, optional
354            Initial guess of the solution for the loglikelihood maximization.
355            The default is an array of zeros.
356        method : str, optional
357            The `method` determines which solver from `scipy.optimize`
358            is used, and it can be chosen from among the following strings:
359
360            - 'newton' for Newton-Raphson, 'nm' for Nelder-Mead
361            - 'bfgs' for Broyden-Fletcher-Goldfarb-Shanno (BFGS)
362            - 'lbfgs' for limited-memory BFGS with optional box constraints
363            - 'powell' for modified Powell's method
364            - 'cg' for conjugate gradient
365            - 'ncg' for Newton-conjugate gradient
366            - 'basinhopping' for global basin-hopping solver
367            - 'minimize' for generic wrapper of scipy minimize (BFGS by default)
368
369            The explicit arguments in `fit` are passed to the solver,
370            with the exception of the basin-hopping solver. Each
371            solver has several optional arguments that are not the same across
372            solvers. See the notes section below (or scipy.optimize) for the
373            available arguments and for the list of explicit arguments that the
374            basin-hopping solver supports.
375        maxiter : int, optional
376            The maximum number of iterations to perform.
377        full_output : bool, optional
378            Set to True to have all available output in the Results object's
379            mle_retvals attribute. The output is dependent on the solver.
380            See LikelihoodModelResults notes section for more information.
381        disp : bool, optional
382            Set to True to print convergence messages.
383        fargs : tuple, optional
384            Extra arguments passed to the likelihood function, i.e.,
385            loglike(x,*args)
386        callback : callable callback(xk), optional
387            Called after each iteration, as callback(xk), where xk is the
388            current parameter vector.
389        retall : bool, optional
390            Set to True to return list of solutions at each iteration.
391            Available in Results object's mle_retvals attribute.
392        skip_hessian : bool, optional
393            If False (default), then the negative inverse hessian is calculated
394            after the optimization. If True, then the hessian will not be
395            calculated. However, it will be available in methods that use the
396            hessian in the optimization (currently only with `"newton"`).
397        kwargs : keywords
398            All kwargs are passed to the chosen solver with one exception. The
399            following keyword controls what happens after the fit::
400
401                warn_convergence : bool, optional
402                    If True, checks the model for the converged flag. If the
403                    converged flag is False, a ConvergenceWarning is issued.
404
405        Notes
406        -----
407        The 'basinhopping' solver ignores `maxiter`, `retall`, `full_output`
408        explicit arguments.
409
410        Optional arguments for solvers (see returned Results.mle_settings)::
411
412            'newton'
413                tol : float
414                    Relative error in params acceptable for convergence.
415            'nm' -- Nelder Mead
416                xtol : float
417                    Relative error in params acceptable for convergence
418                ftol : float
419                    Relative error in loglike(params) acceptable for
420                    convergence
421                maxfun : int
422                    Maximum number of function evaluations to make.
423            'bfgs'
424                gtol : float
425                    Stop when norm of gradient is less than gtol.
426                norm : float
427                    Order of norm (np.Inf is max, -np.Inf is min)
428                epsilon
429                    If fprime is approximated, use this value for the step
430                    size. Only relevant if LikelihoodModel.score is None.
431            'lbfgs'
432                m : int
433                    This many terms are used for the Hessian approximation.
434                factr : float
435                    A stop condition that is a variant of relative error.
436                pgtol : float
437                    A stop condition that uses the projected gradient.
438                epsilon
439                    If fprime is approximated, use this value for the step
440                    size. Only relevant if LikelihoodModel.score is None.
441                maxfun : int
442                    Maximum number of function evaluations to make.
443                bounds : sequence
444                    (min, max) pairs for each element in x,
445                    defining the bounds on that parameter.
446                    Use None for one of min or max when there is no bound
447                    in that direction.
448            'cg'
449                gtol : float
450                    Stop when norm of gradient is less than gtol.
451                norm : float
452                    Order of norm (np.Inf is max, -np.Inf is min)
453                epsilon : float
454                    If fprime is approximated, use this value for the step
455                    size. Can be scalar or vector.  Only relevant if
456                    Likelihoodmodel.score is None.
457            'ncg'
458                fhess_p : callable f'(x,*args)
459                    Function which computes the Hessian of f times an arbitrary
460                    vector, p.  Should only be supplied if
461                    LikelihoodModel.hessian is None.
462                avextol : float
463                    Stop when the average relative error in the minimizer
464                    falls below this amount.
465                epsilon : float or ndarray
466                    If fhess is approximated, use this value for the step size.
467                    Only relevant if Likelihoodmodel.hessian is None.
468            'powell'
469                xtol : float
470                    Line-search error tolerance
471                ftol : float
472                    Relative error in loglike(params) for acceptable for
473                    convergence.
474                maxfun : int
475                    Maximum number of function evaluations to make.
476                start_direc : ndarray
477                    Initial direction set.
478            'basinhopping'
479                niter : int
480                    The number of basin hopping iterations.
481                niter_success : int
482                    Stop the run if the global minimum candidate remains the
483                    same for this number of iterations.
484                T : float
485                    The "temperature" parameter for the accept or reject
486                    criterion. Higher "temperatures" mean that larger jumps
487                    in function value will be accepted. For best results
488                    `T` should be comparable to the separation (in function
489                    value) between local minima.
490                stepsize : float
491                    Initial step size for use in the random displacement.
492                interval : int
493                    The interval for how often to update the `stepsize`.
494                minimizer : dict
495                    Extra keyword arguments to be passed to the minimizer
496                    `scipy.optimize.minimize()`, for example 'method' - the
497                    minimization method (e.g. 'L-BFGS-B'), or 'tol' - the
498                    tolerance for termination. Other arguments are mapped from
499                    explicit argument of `fit`:
500                      - `args` <- `fargs`
501                      - `jac` <- `score`
502                      - `hess` <- `hess`
503            'minimize'
504                min_method : str, optional
505                    Name of minimization method to use.
506                    Any method specific arguments can be passed directly.
507                    For a list of methods and their arguments, see
508                    documentation of `scipy.optimize.minimize`.
509                    If no method is specified, then BFGS is used.
510        """
511        Hinv = None  # JP error if full_output=0, Hinv not defined
512
513        if start_params is None:
514            if hasattr(self, 'start_params'):
515                start_params = self.start_params
516            elif self.exog is not None:
517                # fails for shape (K,)?
518                start_params = [0] * self.exog.shape[1]
519            else:
520                raise ValueError("If exog is None, then start_params should "
521                                 "be specified")
522
523        # TODO: separate args from nonarg taking score and hessian, ie.,
524        # user-supplied and numerically evaluated estimate frprime does not take
525        # args in most (any?) of the optimize function
526
527        nobs = self.endog.shape[0]
528        # f = lambda params, *args: -self.loglike(params, *args) / nobs
529
530        def f(params, *args):
531            return -self.loglike(params, *args) / nobs
532
533        if method == 'newton':
534            # TODO: why are score and hess positive?
535            def score(params, *args):
536                return self.score(params, *args) / nobs
537
538            def hess(params, *args):
539                return self.hessian(params, *args) / nobs
540        else:
541            def score(params, *args):
542                return -self.score(params, *args) / nobs
543
544            def hess(params, *args):
545                return -self.hessian(params, *args) / nobs
546
547        warn_convergence = kwargs.pop('warn_convergence', True)
548
549        # Remove covariance args before calling fir to allow strict checking
550        if 'cov_type' in kwargs:
551            cov_kwds = kwargs.get('cov_kwds', {})
552            kwds = {'cov_type': kwargs['cov_type'], 'cov_kwds': cov_kwds}
553            if cov_kwds:
554                del kwargs["cov_kwds"]
555            del kwargs["cov_type"]
556        else:
557            kwds = {}
558        if 'use_t' in kwargs:
559            kwds['use_t'] = kwargs['use_t']
560            del kwargs["use_t"]
561
562        optimizer = Optimizer()
563        xopt, retvals, optim_settings = optimizer._fit(f, score, start_params,
564                                                       fargs, kwargs,
565                                                       hessian=hess,
566                                                       method=method,
567                                                       disp=disp,
568                                                       maxiter=maxiter,
569                                                       callback=callback,
570                                                       retall=retall,
571                                                       full_output=full_output)
572        # Restore cov_type, cov_kwds and use_t
573        optim_settings.update(kwds)
574        # NOTE: this is for fit_regularized and should be generalized
575        cov_params_func = kwargs.setdefault('cov_params_func', None)
576        if cov_params_func:
577            Hinv = cov_params_func(self, xopt, retvals)
578        elif method == 'newton' and full_output:
579            Hinv = np.linalg.inv(-retvals['Hessian']) / nobs
580        elif not skip_hessian:
581            H = -1 * self.hessian(xopt)
582            invertible = False
583            if np.all(np.isfinite(H)):
584                eigvals, eigvecs = np.linalg.eigh(H)
585                if np.min(eigvals) > 0:
586                    invertible = True
587
588            if invertible:
589                Hinv = eigvecs.dot(np.diag(1.0 / eigvals)).dot(eigvecs.T)
590                Hinv = np.asfortranarray((Hinv + Hinv.T) / 2.0)
591            else:
592                warnings.warn('Inverting hessian failed, no bse or cov_params '
593                              'available', HessianInversionWarning)
594                Hinv = None
595
596        # TODO: add Hessian approximation and change the above if needed
597        mlefit = LikelihoodModelResults(self, xopt, Hinv, scale=1., **kwds)
598
599        # TODO: hardcode scale?
600        mlefit.mle_retvals = retvals
601        if isinstance(retvals, dict):
602            if warn_convergence and not retvals['converged']:
603                from statsmodels.tools.sm_exceptions import ConvergenceWarning
604                warnings.warn("Maximum Likelihood optimization failed to "
605                              "converge. Check mle_retvals",
606                              ConvergenceWarning)
607
608        mlefit.mle_settings = optim_settings
609        return mlefit
610
611    def _fit_zeros(self, keep_index=None, start_params=None,
612                   return_auxiliary=False, k_params=None, **fit_kwds):
613        """experimental, fit the model subject to zero constraints
614
615        Intended for internal use cases until we know what we need.
616        API will need to change to handle models with two exog.
617        This is not yet supported by all model subclasses.
618
619        This is essentially a simplified version of `fit_constrained`, and
620        does not need to use `offset`.
621
622        The estimation creates a new model with transformed design matrix,
623        exog, and converts the results back to the original parameterization.
624
625        Some subclasses could use a more efficient calculation than using a
626        new model.
627
628        Parameters
629        ----------
630        keep_index : array_like (int or bool) or slice
631            variables that should be dropped.
632        start_params : None or array_like
633            starting values for the optimization. `start_params` needs to be
634            given in the original parameter space and are internally
635            transformed.
636        k_params : int or None
637            If None, then we try to infer from start_params or model.
638        **fit_kwds : keyword arguments
639            fit_kwds are used in the optimization of the transformed model.
640
641        Returns
642        -------
643        results : Results instance
644        """
645        # we need to append index of extra params to keep_index as in
646        # NegativeBinomial
647        if hasattr(self, 'k_extra') and self.k_extra > 0:
648            # we cannot change the original, TODO: should we add keep_index_params?
649            keep_index = np.array(keep_index, copy=True)
650            k = self.exog.shape[1]
651            extra_index = np.arange(k, k + self.k_extra)
652            keep_index_p = np.concatenate((keep_index, extra_index))
653        else:
654            keep_index_p = keep_index
655
656        # not all models support start_params, drop if None, hide them in fit_kwds
657        if start_params is not None:
658            fit_kwds['start_params'] = start_params[keep_index_p]
659            k_params = len(start_params)
660            # ignore k_params in this case, or verify consisteny?
661
662        # build auxiliary model and fit
663        init_kwds = self._get_init_kwds()
664        mod_constr = self.__class__(self.endog, self.exog[:, keep_index],
665                                    **init_kwds)
666        res_constr = mod_constr.fit(**fit_kwds)
667        # switch name, only need keep_index for params below
668        keep_index = keep_index_p
669
670        if k_params is None:
671            k_params = self.exog.shape[1]
672            k_params += getattr(self, 'k_extra', 0)
673
674        params_full = np.zeros(k_params)
675        params_full[keep_index] = res_constr.params
676
677        # create dummy results Instance, TODO: wire up properly
678        # TODO: this could be moved into separate private method if needed
679        # discrete L1 fit_regularized doens't reestimate AFAICS
680        # RLM does not have method, disp nor warn_convergence keywords
681        # OLS, WLS swallows extra kwds with **kwargs, but does not have method='nm'
682        try:
683            # Note: addding full_output=False causes exceptions
684            res = self.fit(maxiter=0, disp=0, method='nm', skip_hessian=True,
685                           warn_convergence=False, start_params=params_full)
686            # we get a wrapper back
687        except (TypeError, ValueError):
688            res = self.fit()
689
690        # Warning: make sure we are not just changing the wrapper instead of
691        # results #2400
692        # TODO: do we need to change res._results.scale in some models?
693        if hasattr(res_constr.model, 'scale'):
694            # Note: res.model is self
695            # GLM problem, see #2399,
696            # TODO: remove from model if not needed anymore
697            res.model.scale = res._results.scale = res_constr.model.scale
698
699        if hasattr(res_constr, 'mle_retvals'):
700            res._results.mle_retvals = res_constr.mle_retvals
701            # not available for not scipy optimization, e.g. glm irls
702            # TODO: what retvals should be required?
703            # res.mle_retvals['fcall'] = res_constr.mle_retvals.get('fcall', np.nan)
704            # res.mle_retvals['iterations'] = res_constr.mle_retvals.get(
705            #                                                 'iterations', np.nan)
706            # res.mle_retvals['converged'] = res_constr.mle_retvals['converged']
707        # overwrite all mle_settings
708        if hasattr(res_constr, 'mle_settings'):
709            res._results.mle_settings = res_constr.mle_settings
710
711        res._results.params = params_full
712        if (not hasattr(res._results, 'normalized_cov_params') or
713                res._results.normalized_cov_params is None):
714            res._results.normalized_cov_params = np.zeros((k_params, k_params))
715        else:
716            res._results.normalized_cov_params[...] = 0
717
718        # fancy indexing requires integer array
719        keep_index = np.array(keep_index)
720        res._results.normalized_cov_params[keep_index[:, None], keep_index] = \
721            res_constr.normalized_cov_params
722        k_constr = res_constr.df_resid - res._results.df_resid
723        if hasattr(res_constr, 'cov_params_default'):
724            res._results.cov_params_default = np.zeros((k_params, k_params))
725            res._results.cov_params_default[keep_index[:, None], keep_index] = \
726                res_constr.cov_params_default
727        if hasattr(res_constr, 'cov_type'):
728            res._results.cov_type = res_constr.cov_type
729            res._results.cov_kwds = res_constr.cov_kwds
730
731        res._results.keep_index = keep_index
732        res._results.df_resid = res_constr.df_resid
733        res._results.df_model = res_constr.df_model
734
735        res._results.k_constr = k_constr
736        res._results.results_constrained = res_constr
737
738        # special temporary workaround for RLM
739        # need to be able to override robust covariances
740        if hasattr(res.model, 'M'):
741            del res._results._cache['resid']
742            del res._results._cache['fittedvalues']
743            del res._results._cache['sresid']
744            cov = res._results._cache['bcov_scaled']
745            # inplace adjustment
746            cov[...] = 0
747            cov[keep_index[:, None], keep_index] = res_constr.bcov_scaled
748            res._results.cov_params_default = cov
749
750        return res
751
752    def _fit_collinear(self, atol=1e-14, rtol=1e-13, **kwds):
753        """experimental, fit of the model without collinear variables
754
755        This currently uses QR to drop variables based on the given
756        sequence.
757        Options will be added in future, when the supporting functions
758        to identify collinear variables become available.
759        """
760
761        # ------ copied from PR #2380 remove when merged
762        x = self.exog
763        tol = atol + rtol * x.var(0)
764        r = np.linalg.qr(x, mode='r')
765        mask = np.abs(r.diagonal()) < np.sqrt(tol)
766        # TODO add to results instance
767        # idx_collinear = np.where(mask)[0]
768        idx_keep = np.where(~mask)[0]
769        return self._fit_zeros(keep_index=idx_keep, **kwds)
770
771
772# TODO: the below is unfinished
773class GenericLikelihoodModel(LikelihoodModel):
774    """
775    Allows the fitting of any likelihood function via maximum likelihood.
776
777    A subclass needs to specify at least the log-likelihood
778    If the log-likelihood is specified for each observation, then results that
779    require the Jacobian will be available. (The other case is not tested yet.)
780
781    Notes
782    -----
783    Optimization methods that require only a likelihood function are 'nm' and
784    'powell'
785
786    Optimization methods that require a likelihood function and a
787    score/gradient are 'bfgs', 'cg', and 'ncg'. A function to compute the
788    Hessian is optional for 'ncg'.
789
790    Optimization method that require a likelihood function, a score/gradient,
791    and a Hessian is 'newton'
792
793    If they are not overwritten by a subclass, then numerical gradient,
794    Jacobian and Hessian of the log-likelihood are calculated by numerical
795    forward differentiation. This might results in some cases in precision
796    problems, and the Hessian might not be positive definite. Even if the
797    Hessian is not positive definite the covariance matrix of the parameter
798    estimates based on the outer product of the Jacobian might still be valid.
799
800
801    Examples
802    --------
803    see also subclasses in directory miscmodels
804
805    import statsmodels.api as sm
806    data = sm.datasets.spector.load()
807    data.exog = sm.add_constant(data.exog)
808    # in this dir
809    from model import GenericLikelihoodModel
810    probit_mod = sm.Probit(data.endog, data.exog)
811    probit_res = probit_mod.fit()
812    loglike = probit_mod.loglike
813    score = probit_mod.score
814    mod = GenericLikelihoodModel(data.endog, data.exog, loglike, score)
815    res = mod.fit(method="nm", maxiter = 500)
816    import numpy as np
817    np.allclose(res.params, probit_res.params)
818    """
819    def __init__(self, endog, exog=None, loglike=None, score=None,
820                 hessian=None, missing='none', extra_params_names=None,
821                 **kwds):
822        # let them be none in case user wants to use inheritance
823        if loglike is not None:
824            self.loglike = loglike
825        if score is not None:
826            self.score = score
827        if hessian is not None:
828            self.hessian = hessian
829
830        hasconst = kwds.pop("hasconst", None)
831        self.__dict__.update(kwds)
832
833        # TODO: data structures?
834
835        # TODO temporary solution, force approx normal
836        # self.df_model = 9999
837        # somewhere: CacheWriteWarning: 'df_model' cannot be overwritten
838        super(GenericLikelihoodModel, self).__init__(endog, exog,
839                                                     missing=missing,
840                                                     hasconst=hasconst,
841                                                     **kwds
842                                                     )
843
844        # this will not work for ru2nmnl, maybe np.ndim of a dict?
845        if exog is not None:
846            self.nparams = (exog.shape[1] if np.ndim(exog) == 2 else 1)
847
848        if extra_params_names is not None:
849            self._set_extra_params_names(extra_params_names)
850
851    def _set_extra_params_names(self, extra_params_names):
852        # check param_names
853        if extra_params_names is not None:
854            if self.exog is not None:
855                self.exog_names.extend(extra_params_names)
856            else:
857                self.data.xnames = extra_params_names
858
859        self.nparams = len(self.exog_names)
860
861    # this is redundant and not used when subclassing
862    def initialize(self):
863        """
864        Initialize (possibly re-initialize) a Model instance. For
865        instance, the design matrix of a linear model may change
866        and some things must be recomputed.
867        """
868        if not self.score:  # right now score is not optional
869            self.score = lambda x: approx_fprime(x, self.loglike)
870            if not self.hessian:
871                pass
872        else:   # can use approx_hess_p if we have a gradient
873            if not self.hessian:
874                pass
875        # Initialize is called by
876        # statsmodels.model.LikelihoodModel.__init__
877        # and should contain any preprocessing that needs to be done for a model
878        if self.exog is not None:
879            # assume constant
880            er = np.linalg.matrix_rank(self.exog)
881            self.df_model = float(er - 1)
882            self.df_resid = float(self.exog.shape[0] - er)
883        else:
884            self.df_model = np.nan
885            self.df_resid = np.nan
886        super(GenericLikelihoodModel, self).initialize()
887
888    def expandparams(self, params):
889        """
890        expand to full parameter array when some parameters are fixed
891
892        Parameters
893        ----------
894        params : ndarray
895            reduced parameter array
896
897        Returns
898        -------
899        paramsfull : ndarray
900            expanded parameter array where fixed parameters are included
901
902        Notes
903        -----
904        Calling this requires that self.fixed_params and self.fixed_paramsmask
905        are defined.
906
907        *developer notes:*
908
909        This can be used in the log-likelihood to ...
910
911        this could also be replaced by a more general parameter
912        transformation.
913        """
914        paramsfull = self.fixed_params.copy()
915        paramsfull[self.fixed_paramsmask] = params
916        return paramsfull
917
918    def reduceparams(self, params):
919        """Reduce parameters"""
920        return params[self.fixed_paramsmask]
921
922    def loglike(self, params):
923        """Log-likelihood of model at params"""
924        return self.loglikeobs(params).sum(0)
925
926    def nloglike(self, params):
927        """Negative log-likelihood of model at params"""
928        return -self.loglikeobs(params).sum(0)
929
930    def loglikeobs(self, params):
931        """
932        Log-likelihood of the model for all observations at params.
933
934        Parameters
935        ----------
936        params : array_like
937            The parameters of the model.
938
939        Returns
940        -------
941        loglike : array_like
942            The log likelihood of the model evaluated at `params`.
943        """
944        return -self.nloglikeobs(params)
945
946    def score(self, params):
947        """
948        Gradient of log-likelihood evaluated at params
949        """
950        kwds = {}
951        kwds.setdefault('centered', True)
952        return approx_fprime(params, self.loglike, **kwds).ravel()
953
954    def score_obs(self, params, **kwds):
955        """
956        Jacobian/Gradient of log-likelihood evaluated at params for each
957        observation.
958        """
959        # kwds.setdefault('epsilon', 1e-4)
960        kwds.setdefault('centered', True)
961        return approx_fprime(params, self.loglikeobs, **kwds)
962
963    def hessian(self, params):
964        """
965        Hessian of log-likelihood evaluated at params
966        """
967        from statsmodels.tools.numdiff import approx_hess
968
969        # need options for hess (epsilon)
970        return approx_hess(params, self.loglike)
971
972    def hessian_factor(self, params, scale=None, observed=True):
973        """Weights for calculating Hessian
974
975        Parameters
976        ----------
977        params : ndarray
978            parameter at which Hessian is evaluated
979        scale : None or float
980            If scale is None, then the default scale will be calculated.
981            Default scale is defined by `self.scaletype` and set in fit.
982            If scale is not None, then it is used as a fixed scale.
983        observed : bool
984            If True, then the observed Hessian is returned. If false then the
985            expected information matrix is returned.
986
987        Returns
988        -------
989        hessian_factor : ndarray, 1d
990            A 1d weight vector used in the calculation of the Hessian.
991            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`
992        """
993
994        raise NotImplementedError
995
996    def fit(self, start_params=None, method='nm', maxiter=500, full_output=1,
997            disp=1, callback=None, retall=0, **kwargs):
998
999        if start_params is None:
1000            if hasattr(self, 'start_params'):
1001                start_params = self.start_params
1002            else:
1003                start_params = 0.1 * np.ones(self.nparams)
1004
1005        fit_method = super(GenericLikelihoodModel, self).fit
1006        mlefit = fit_method(start_params=start_params,
1007                            method=method, maxiter=maxiter,
1008                            full_output=full_output,
1009                            disp=disp, callback=callback, **kwargs)
1010
1011        results_class = getattr(self, 'results_class',
1012                                GenericLikelihoodModelResults)
1013        genericmlefit = results_class(self, mlefit)
1014
1015        # amend param names
1016        exog_names = [] if (self.exog_names is None) else self.exog_names
1017        k_miss = len(exog_names) - len(mlefit.params)
1018        if not k_miss == 0:
1019            if k_miss < 0:
1020                self._set_extra_params_names(['par%d' % i
1021                                              for i in range(-k_miss)])
1022            else:
1023                # I do not want to raise after we have already fit()
1024                warnings.warn('more exog_names than parameters', ValueWarning)
1025
1026        return genericmlefit
1027
1028
1029class Results(object):
1030    """
1031    Class to contain model results
1032
1033    Parameters
1034    ----------
1035    model : class instance
1036        the previously specified model instance
1037    params : ndarray
1038        parameter estimates from the fit model
1039    """
1040    def __init__(self, model, params, **kwd):
1041        self.__dict__.update(kwd)
1042        self.initialize(model, params, **kwd)
1043        self._data_attr = []
1044        # Variables to clear from cache
1045        self._data_in_cache = ['fittedvalues', 'resid', 'wresid']
1046
1047    def initialize(self, model, params, **kwargs):
1048        """
1049        Initialize (possibly re-initialize) a Results instance.
1050
1051        Parameters
1052        ----------
1053        model : Model
1054            The model instance.
1055        params : ndarray
1056            The model parameters.
1057        **kwargs
1058            Any additional keyword arguments required to initialize the model.
1059        """
1060        self.params = params
1061        self.model = model
1062        if hasattr(model, 'k_constant'):
1063            self.k_constant = model.k_constant
1064
1065    def predict(self, exog=None, transform=True, *args, **kwargs):
1066        """
1067        Call self.model.predict with self.params as the first argument.
1068
1069        Parameters
1070        ----------
1071        exog : array_like, optional
1072            The values for which you want to predict. see Notes below.
1073        transform : bool, optional
1074            If the model was fit via a formula, do you want to pass
1075            exog through the formula. Default is True. E.g., if you fit
1076            a model y ~ log(x1) + log(x2), and transform is True, then
1077            you can pass a data structure that contains x1 and x2 in
1078            their original form. Otherwise, you'd need to log the data
1079            first.
1080        *args
1081            Additional arguments to pass to the model, see the
1082            predict method of the model for the details.
1083        **kwargs
1084            Additional keywords arguments to pass to the model, see the
1085            predict method of the model for the details.
1086
1087        Returns
1088        -------
1089        array_like
1090            See self.model.predict.
1091
1092        Notes
1093        -----
1094        The types of exog that are supported depends on whether a formula
1095        was used in the specification of the model.
1096
1097        If a formula was used, then exog is processed in the same way as
1098        the original data. This transformation needs to have key access to the
1099        same variable names, and can be a pandas DataFrame or a dict like
1100        object that contains numpy arrays.
1101
1102        If no formula was used, then the provided exog needs to have the
1103        same number of columns as the original exog in the model. No
1104        transformation of the data is performed except converting it to
1105        a numpy array.
1106
1107        Row indices as in pandas data frames are supported, and added to the
1108        returned prediction.
1109        """
1110        import pandas as pd
1111
1112        is_pandas = _is_using_pandas(exog, None)
1113        exog_index = None
1114        if is_pandas:
1115            if exog.ndim == 2 or self.params.size == 1:
1116                exog_index = exog.index
1117            else:
1118                exog_index = [exog.index.name]
1119
1120        if transform and hasattr(self.model, 'formula') and (exog is not None):
1121            # allow both location of design_info, see #7043
1122            design_info = (getattr(self.model, "design_info", None) or
1123                           self.model.data.design_info)
1124            from patsy import dmatrix
1125            if isinstance(exog, pd.Series):
1126                # we are guessing whether it should be column or row
1127                if (hasattr(exog, 'name') and isinstance(exog.name, str) and
1128                        exog.name in design_info.describe()):
1129                    # assume we need one column
1130                    exog = pd.DataFrame(exog)
1131                else:
1132                    # assume we need a row
1133                    exog = pd.DataFrame(exog).T
1134            orig_exog_len = len(exog)
1135            is_dict = isinstance(exog, dict)
1136            try:
1137                exog = dmatrix(design_info, exog, return_type="dataframe")
1138            except Exception as exc:
1139                msg = ('predict requires that you use a DataFrame when '
1140                       'predicting from a model\nthat was created using the '
1141                       'formula api.'
1142                       '\n\nThe original error message returned by patsy is:\n'
1143                       '{0}'.format(str(str(exc))))
1144                raise exc.__class__(msg)
1145            if orig_exog_len > len(exog) and not is_dict:
1146                if exog_index is None:
1147                    warnings.warn('nan values have been dropped', ValueWarning)
1148                else:
1149                    exog = exog.reindex(exog_index)
1150            exog_index = exog.index
1151
1152        if exog is not None:
1153            exog = np.asarray(exog)
1154            if exog.ndim == 1 and (self.model.exog.ndim == 1 or
1155                                   self.model.exog.shape[1] == 1):
1156                exog = exog[:, None]
1157            exog = np.atleast_2d(exog)  # needed in count model shape[1]
1158
1159        predict_results = self.model.predict(self.params, exog, *args,
1160                                             **kwargs)
1161
1162        if exog_index is not None and not hasattr(predict_results,
1163                                                  'predicted_values'):
1164            if predict_results.ndim == 1:
1165                return pd.Series(predict_results, index=exog_index)
1166            else:
1167                return pd.DataFrame(predict_results, index=exog_index)
1168        else:
1169            return predict_results
1170
1171    def summary(self):
1172        """
1173        Summary
1174
1175        Not implemented
1176        """
1177        raise NotImplementedError
1178
1179
1180# TODO: public method?
1181class LikelihoodModelResults(Results):
1182    """
1183    Class to contain results from likelihood models
1184
1185    Parameters
1186    ----------
1187    model : LikelihoodModel instance or subclass instance
1188        LikelihoodModelResults holds a reference to the model that is fit.
1189    params : 1d array_like
1190        parameter estimates from estimated model
1191    normalized_cov_params : 2d array
1192       Normalized (before scaling) covariance of params. (dot(X.T,X))**-1
1193    scale : float
1194        For (some subset of models) scale will typically be the
1195        mean square error from the estimated model (sigma^2)
1196
1197    Attributes
1198    ----------
1199    mle_retvals : dict
1200        Contains the values returned from the chosen optimization method if
1201        full_output is True during the fit.  Available only if the model
1202        is fit by maximum likelihood.  See notes below for the output from
1203        the different methods.
1204    mle_settings : dict
1205        Contains the arguments passed to the chosen optimization method.
1206        Available if the model is fit by maximum likelihood.  See
1207        LikelihoodModel.fit for more information.
1208    model : model instance
1209        LikelihoodResults contains a reference to the model that is fit.
1210    params : ndarray
1211        The parameters estimated for the model.
1212    scale : float
1213        The scaling factor of the model given during instantiation.
1214    tvalues : ndarray
1215        The t-values of the standard errors.
1216
1217
1218    Notes
1219    -----
1220    The covariance of params is given by scale times normalized_cov_params.
1221
1222    Return values by solver if full_output is True during fit:
1223
1224        'newton'
1225            fopt : float
1226                The value of the (negative) loglikelihood at its
1227                minimum.
1228            iterations : int
1229                Number of iterations performed.
1230            score : ndarray
1231                The score vector at the optimum.
1232            Hessian : ndarray
1233                The Hessian at the optimum.
1234            warnflag : int
1235                1 if maxiter is exceeded. 0 if successful convergence.
1236            converged : bool
1237                True: converged. False: did not converge.
1238            allvecs : list
1239                List of solutions at each iteration.
1240        'nm'
1241            fopt : float
1242                The value of the (negative) loglikelihood at its
1243                minimum.
1244            iterations : int
1245                Number of iterations performed.
1246            warnflag : int
1247                1: Maximum number of function evaluations made.
1248                2: Maximum number of iterations reached.
1249            converged : bool
1250                True: converged. False: did not converge.
1251            allvecs : list
1252                List of solutions at each iteration.
1253        'bfgs'
1254            fopt : float
1255                Value of the (negative) loglikelihood at its minimum.
1256            gopt : float
1257                Value of gradient at minimum, which should be near 0.
1258            Hinv : ndarray
1259                value of the inverse Hessian matrix at minimum.  Note
1260                that this is just an approximation and will often be
1261                different from the value of the analytic Hessian.
1262            fcalls : int
1263                Number of calls to loglike.
1264            gcalls : int
1265                Number of calls to gradient/score.
1266            warnflag : int
1267                1: Maximum number of iterations exceeded. 2: Gradient
1268                and/or function calls are not changing.
1269            converged : bool
1270                True: converged.  False: did not converge.
1271            allvecs : list
1272                Results at each iteration.
1273        'lbfgs'
1274            fopt : float
1275                Value of the (negative) loglikelihood at its minimum.
1276            gopt : float
1277                Value of gradient at minimum, which should be near 0.
1278            fcalls : int
1279                Number of calls to loglike.
1280            warnflag : int
1281                Warning flag:
1282
1283                - 0 if converged
1284                - 1 if too many function evaluations or too many iterations
1285                - 2 if stopped for another reason
1286
1287            converged : bool
1288                True: converged.  False: did not converge.
1289        'powell'
1290            fopt : float
1291                Value of the (negative) loglikelihood at its minimum.
1292            direc : ndarray
1293                Current direction set.
1294            iterations : int
1295                Number of iterations performed.
1296            fcalls : int
1297                Number of calls to loglike.
1298            warnflag : int
1299                1: Maximum number of function evaluations. 2: Maximum number
1300                of iterations.
1301            converged : bool
1302                True : converged. False: did not converge.
1303            allvecs : list
1304                Results at each iteration.
1305        'cg'
1306            fopt : float
1307                Value of the (negative) loglikelihood at its minimum.
1308            fcalls : int
1309                Number of calls to loglike.
1310            gcalls : int
1311                Number of calls to gradient/score.
1312            warnflag : int
1313                1: Maximum number of iterations exceeded. 2: Gradient and/
1314                or function calls not changing.
1315            converged : bool
1316                True: converged. False: did not converge.
1317            allvecs : list
1318                Results at each iteration.
1319        'ncg'
1320            fopt : float
1321                Value of the (negative) loglikelihood at its minimum.
1322            fcalls : int
1323                Number of calls to loglike.
1324            gcalls : int
1325                Number of calls to gradient/score.
1326            hcalls : int
1327                Number of calls to hessian.
1328            warnflag : int
1329                1: Maximum number of iterations exceeded.
1330            converged : bool
1331                True: converged. False: did not converge.
1332            allvecs : list
1333                Results at each iteration.
1334        """
1335
1336    # by default we use normal distribution
1337    # can be overwritten by instances or subclasses
1338
1339    def __init__(self, model, params, normalized_cov_params=None, scale=1.,
1340                 **kwargs):
1341        super(LikelihoodModelResults, self).__init__(model, params)
1342        self.normalized_cov_params = normalized_cov_params
1343        self.scale = scale
1344        self._use_t = False
1345        # robust covariance
1346        # We put cov_type in kwargs so subclasses can decide in fit whether to
1347        # use this generic implementation
1348        if 'use_t' in kwargs:
1349            use_t = kwargs['use_t']
1350            self.use_t = use_t if use_t is not None else False
1351        if 'cov_type' in kwargs:
1352            cov_type = kwargs.get('cov_type', 'nonrobust')
1353            cov_kwds = kwargs.get('cov_kwds', {})
1354
1355            if cov_type == 'nonrobust':
1356                self.cov_type = 'nonrobust'
1357                self.cov_kwds = {'description': 'Standard Errors assume that the ' +
1358                                 'covariance matrix of the errors is correctly ' +
1359                                 'specified.'}
1360            else:
1361                from statsmodels.base.covtype import get_robustcov_results
1362                if cov_kwds is None:
1363                    cov_kwds = {}
1364                use_t = self.use_t
1365                # TODO: we should not need use_t in get_robustcov_results
1366                get_robustcov_results(self, cov_type=cov_type, use_self=True,
1367                                      use_t=use_t, **cov_kwds)
1368
1369    def normalized_cov_params(self):
1370        """See specific model class docstring"""
1371        raise NotImplementedError
1372
1373    def _get_robustcov_results(self, cov_type='nonrobust', use_self=True,
1374                               use_t=None, **cov_kwds):
1375        if use_self is False:
1376            raise ValueError("use_self should have been removed long ago.  "
1377                             "See GH#4401")
1378        from statsmodels.base.covtype import get_robustcov_results
1379        if cov_kwds is None:
1380            cov_kwds = {}
1381
1382        if cov_type == 'nonrobust':
1383            self.cov_type = 'nonrobust'
1384            self.cov_kwds = {'description': 'Standard Errors assume that the ' +
1385                             'covariance matrix of the errors is correctly ' +
1386                             'specified.'}
1387        else:
1388            # TODO: we should not need use_t in get_robustcov_results
1389            get_robustcov_results(self, cov_type=cov_type, use_self=True,
1390                                  use_t=use_t, **cov_kwds)
1391    @property
1392    def use_t(self):
1393        """Flag indicating to use the Student's distribution in inference."""
1394        return self._use_t
1395
1396    @use_t.setter
1397    def use_t(self, value):
1398        self._use_t = bool(value)
1399
1400    @cached_value
1401    def llf(self):
1402        """Log-likelihood of model"""
1403        return self.model.loglike(self.params)
1404
1405    @cached_value
1406    def bse(self):
1407        """The standard errors of the parameter estimates."""
1408        # Issue 3299
1409        if ((not hasattr(self, 'cov_params_default')) and
1410                (self.normalized_cov_params is None)):
1411            bse_ = np.empty(len(self.params))
1412            bse_[:] = np.nan
1413        else:
1414            with warnings.catch_warnings():
1415                warnings.simplefilter("ignore", RuntimeWarning)
1416                bse_ = np.sqrt(np.diag(self.cov_params()))
1417        return bse_
1418
1419    @cached_value
1420    def tvalues(self):
1421        """
1422        Return the t-statistic for a given parameter estimate.
1423        """
1424        with warnings.catch_warnings():
1425            warnings.simplefilter("ignore", RuntimeWarning)
1426            return self.params / self.bse
1427
1428    @cached_value
1429    def pvalues(self):
1430        """The two-tailed p values for the t-stats of the params."""
1431        with warnings.catch_warnings():
1432            warnings.simplefilter("ignore", RuntimeWarning)
1433            if self.use_t:
1434                df_resid = getattr(self, 'df_resid_inference', self.df_resid)
1435                return stats.t.sf(np.abs(self.tvalues), df_resid) * 2
1436            else:
1437                return stats.norm.sf(np.abs(self.tvalues)) * 2
1438
1439    def cov_params(self, r_matrix=None, column=None, scale=None, cov_p=None,
1440                   other=None):
1441        """
1442        Compute the variance/covariance matrix.
1443
1444        The variance/covariance matrix can be of a linear contrast of the
1445        estimated parameters or all params multiplied by scale which will
1446        usually be an estimate of sigma^2.  Scale is assumed to be a scalar.
1447
1448        Parameters
1449        ----------
1450        r_matrix : array_like
1451            Can be 1d, or 2d.  Can be used alone or with other.
1452        column : array_like, optional
1453            Must be used on its own.  Can be 0d or 1d see below.
1454        scale : float, optional
1455            Can be specified or not.  Default is None, which means that
1456            the scale argument is taken from the model.
1457        cov_p : ndarray, optional
1458            The covariance of the parameters. If not provided, this value is
1459            read from `self.normalized_cov_params` or
1460            `self.cov_params_default`.
1461        other : array_like, optional
1462            Can be used when r_matrix is specified.
1463
1464        Returns
1465        -------
1466        ndarray
1467            The covariance matrix of the parameter estimates or of linear
1468            combination of parameter estimates. See Notes.
1469
1470        Notes
1471        -----
1472        (The below are assumed to be in matrix notation.)
1473
1474        If no argument is specified returns the covariance matrix of a model
1475        ``(scale)*(X.T X)^(-1)``
1476
1477        If contrast is specified it pre and post-multiplies as follows
1478        ``(scale) * r_matrix (X.T X)^(-1) r_matrix.T``
1479
1480        If contrast and other are specified returns
1481        ``(scale) * r_matrix (X.T X)^(-1) other.T``
1482
1483        If column is specified returns
1484        ``(scale) * (X.T X)^(-1)[column,column]`` if column is 0d
1485
1486        OR
1487
1488        ``(scale) * (X.T X)^(-1)[column][:,column]`` if column is 1d
1489        """
1490        if (hasattr(self, 'mle_settings') and
1491                self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
1492            dot_fun = nan_dot
1493        else:
1494            dot_fun = np.dot
1495
1496        if (cov_p is None and self.normalized_cov_params is None and
1497                not hasattr(self, 'cov_params_default')):
1498            raise ValueError('need covariance of parameters for computing '
1499                             '(unnormalized) covariances')
1500        if column is not None and (r_matrix is not None or other is not None):
1501            raise ValueError('Column should be specified without other '
1502                             'arguments.')
1503        if other is not None and r_matrix is None:
1504            raise ValueError('other can only be specified with r_matrix')
1505
1506        if cov_p is None:
1507            if hasattr(self, 'cov_params_default'):
1508                cov_p = self.cov_params_default
1509            else:
1510                if scale is None:
1511                    scale = self.scale
1512                cov_p = self.normalized_cov_params * scale
1513
1514        if column is not None:
1515            column = np.asarray(column)
1516            if column.shape == ():
1517                return cov_p[column, column]
1518            else:
1519                return cov_p[column[:, None], column]
1520        elif r_matrix is not None:
1521            r_matrix = np.asarray(r_matrix)
1522            if r_matrix.shape == ():
1523                raise ValueError("r_matrix should be 1d or 2d")
1524            if other is None:
1525                other = r_matrix
1526            else:
1527                other = np.asarray(other)
1528            tmp = dot_fun(r_matrix, dot_fun(cov_p, np.transpose(other)))
1529            return tmp
1530        else:  # if r_matrix is None and column is None:
1531            return cov_p
1532
1533    # TODO: make sure this works as needed for GLMs
1534    def t_test(self, r_matrix, cov_p=None, use_t=None):
1535        """
1536        Compute a t-test for a each linear hypothesis of the form Rb = q.
1537
1538        Parameters
1539        ----------
1540        r_matrix : {array_like, str, tuple}
1541            One of:
1542
1543            - array : If an array is given, a p x k 2d array or length k 1d
1544              array specifying the linear restrictions. It is assumed
1545              that the linear combination is equal to zero.
1546            - str : The full hypotheses to test can be given as a string.
1547              See the examples.
1548            - tuple : A tuple of arrays in the form (R, q). If q is given,
1549              can be either a scalar or a length p row vector.
1550
1551        cov_p : array_like, optional
1552            An alternative estimate for the parameter covariance matrix.
1553            If None is given, self.normalized_cov_params is used.
1554        use_t : bool, optional
1555            If use_t is None, then the default of the model is used. If use_t
1556            is True, then the p-values are based on the t distribution. If
1557            use_t is False, then the p-values are based on the normal
1558            distribution.
1559
1560        Returns
1561        -------
1562        ContrastResults
1563            The results for the test are attributes of this results instance.
1564            The available results have the same elements as the parameter table
1565            in `summary()`.
1566
1567        See Also
1568        --------
1569        tvalues : Individual t statistics for the estimated parameters.
1570        f_test : Perform an F tests on model parameters.
1571        patsy.DesignInfo.linear_constraint : Specify a linear constraint.
1572
1573        Examples
1574        --------
1575        >>> import numpy as np
1576        >>> import statsmodels.api as sm
1577        >>> data = sm.datasets.longley.load()
1578        >>> data.exog = sm.add_constant(data.exog)
1579        >>> results = sm.OLS(data.endog, data.exog).fit()
1580        >>> r = np.zeros_like(results.params)
1581        >>> r[5:] = [1,-1]
1582        >>> print(r)
1583        [ 0.  0.  0.  0.  0.  1. -1.]
1584
1585        r tests that the coefficients on the 5th and 6th independent
1586        variable are the same.
1587
1588        >>> T_test = results.t_test(r)
1589        >>> print(T_test)
1590                                     Test for Constraints
1591        ==============================================================================
1592                         coef    std err          t      P>|t|      [0.025      0.975]
1593        ------------------------------------------------------------------------------
1594        c0         -1829.2026    455.391     -4.017      0.003   -2859.368    -799.037
1595        ==============================================================================
1596        >>> T_test.effect
1597        -1829.2025687192481
1598        >>> T_test.sd
1599        455.39079425193762
1600        >>> T_test.tvalue
1601        -4.0167754636411717
1602        >>> T_test.pvalue
1603        0.0015163772380899498
1604
1605        Alternatively, you can specify the hypothesis tests using a string
1606
1607        >>> from statsmodels.formula.api import ols
1608        >>> dta = sm.datasets.longley.load_pandas().data
1609        >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
1610        >>> results = ols(formula, dta).fit()
1611        >>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1'
1612        >>> t_test = results.t_test(hypotheses)
1613        >>> print(t_test)
1614                                     Test for Constraints
1615        ==============================================================================
1616                         coef    std err          t      P>|t|      [0.025      0.975]
1617        ------------------------------------------------------------------------------
1618        c0            15.0977     84.937      0.178      0.863    -177.042     207.238
1619        c1            -2.0202      0.488     -8.231      0.000      -3.125      -0.915
1620        c2             1.0001      0.249      0.000      1.000       0.437       1.563
1621        ==============================================================================
1622        """
1623        from patsy import DesignInfo
1624        use_t = bool_like(use_t, "use_t", strict=True, optional=True)
1625        names = self.model.data.cov_names
1626        LC = DesignInfo(names).linear_constraint(r_matrix)
1627        r_matrix, q_matrix = LC.coefs, LC.constants
1628        num_ttests = r_matrix.shape[0]
1629        num_params = r_matrix.shape[1]
1630
1631        if (cov_p is None and self.normalized_cov_params is None and
1632                not hasattr(self, 'cov_params_default')):
1633            raise ValueError('Need covariance of parameters for computing '
1634                             'T statistics')
1635        params = self.params.ravel()
1636        if num_params != params.shape[0]:
1637            raise ValueError('r_matrix and params are not aligned')
1638        if q_matrix is None:
1639            q_matrix = np.zeros(num_ttests)
1640        else:
1641            q_matrix = np.asarray(q_matrix)
1642            q_matrix = q_matrix.squeeze()
1643        if q_matrix.size > 1:
1644            if q_matrix.shape[0] != num_ttests:
1645                raise ValueError("r_matrix and q_matrix must have the same "
1646                                 "number of rows")
1647
1648        if use_t is None:
1649            # switch to use_t false if undefined
1650            use_t = (hasattr(self, 'use_t') and self.use_t)
1651
1652        _effect = np.dot(r_matrix, params)
1653
1654        # Perform the test
1655        if num_ttests > 1:
1656            _sd = np.sqrt(np.diag(self.cov_params(
1657                r_matrix=r_matrix, cov_p=cov_p)))
1658        else:
1659            _sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))
1660        _t = (_effect - q_matrix) * recipr(_sd)
1661
1662        df_resid = getattr(self, 'df_resid_inference', self.df_resid)
1663
1664        if use_t:
1665            return ContrastResults(effect=_effect, t=_t, sd=_sd,
1666                                   df_denom=df_resid)
1667        else:
1668            return ContrastResults(effect=_effect, statistic=_t, sd=_sd,
1669                                   df_denom=df_resid,
1670                                   distribution='norm')
1671
1672    def f_test(self, r_matrix, cov_p=None, invcov=None):
1673        """
1674        Compute the F-test for a joint linear hypothesis.
1675
1676        This is a special case of `wald_test` that always uses the F
1677        distribution.
1678
1679        Parameters
1680        ----------
1681        r_matrix : {array_like, str, tuple}
1682            One of:
1683
1684            - array : An r x k array where r is the number of restrictions to
1685              test and k is the number of regressors. It is assumed
1686              that the linear combination is equal to zero.
1687            - str : The full hypotheses to test can be given as a string.
1688              See the examples.
1689            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
1690              either a scalar or a length k row vector.
1691
1692        cov_p : array_like, optional
1693            An alternative estimate for the parameter covariance matrix.
1694            If None is given, self.normalized_cov_params is used.
1695        invcov : array_like, optional
1696            A q x q array to specify an inverse covariance matrix based on a
1697            restrictions matrix.
1698
1699        Returns
1700        -------
1701        ContrastResults
1702            The results for the test are attributes of this results instance.
1703
1704        See Also
1705        --------
1706        t_test : Perform a single hypothesis test.
1707        wald_test : Perform a Wald-test using a quadratic form.
1708        statsmodels.stats.contrast.ContrastResults : Test results.
1709        patsy.DesignInfo.linear_constraint : Specify a linear constraint.
1710
1711        Notes
1712        -----
1713        The matrix `r_matrix` is assumed to be non-singular. More precisely,
1714
1715        r_matrix (pX pX.T) r_matrix.T
1716
1717        is assumed invertible. Here, pX is the generalized inverse of the
1718        design matrix of the model. There can be problems in non-OLS models
1719        where the rank of the covariance of the noise is not full.
1720
1721        Examples
1722        --------
1723        >>> import numpy as np
1724        >>> import statsmodels.api as sm
1725        >>> data = sm.datasets.longley.load()
1726        >>> data.exog = sm.add_constant(data.exog)
1727        >>> results = sm.OLS(data.endog, data.exog).fit()
1728        >>> A = np.identity(len(results.params))
1729        >>> A = A[1:,:]
1730
1731        This tests that each coefficient is jointly statistically
1732        significantly different from zero.
1733
1734        >>> print(results.f_test(A))
1735        <F test: F=array([[ 330.28533923]]), p=4.984030528700946e-10, df_denom=9, df_num=6>
1736
1737        Compare this to
1738
1739        >>> results.fvalue
1740        330.2853392346658
1741        >>> results.f_pvalue
1742        4.98403096572e-10
1743
1744        >>> B = np.array(([0,0,1,-1,0,0,0],[0,0,0,0,0,1,-1]))
1745
1746        This tests that the coefficient on the 2nd and 3rd regressors are
1747        equal and jointly that the coefficient on the 5th and 6th regressors
1748        are equal.
1749
1750        >>> print(results.f_test(B))
1751        <F test: F=array([[ 9.74046187]]), p=0.005605288531708235, df_denom=9, df_num=2>
1752
1753        Alternatively, you can specify the hypothesis tests using a string
1754
1755        >>> from statsmodels.datasets import longley
1756        >>> from statsmodels.formula.api import ols
1757        >>> dta = longley.load_pandas().data
1758        >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR'
1759        >>> results = ols(formula, dta).fit()
1760        >>> hypotheses = '(GNPDEFL = GNP), (UNEMP = 2), (YEAR/1829 = 1)'
1761        >>> f_test = results.f_test(hypotheses)
1762        >>> print(f_test)
1763        <F test: F=array([[ 144.17976065]]), p=6.322026217355609e-08, df_denom=9, df_num=3>
1764        """
1765        res = self.wald_test(r_matrix, cov_p=cov_p, invcov=invcov, use_f=True, scalar=True)
1766        return res
1767
1768    # TODO: untested for GLMs?
1769    def wald_test(self, r_matrix, cov_p=None, invcov=None,
1770                  use_f=None, df_constraints=None, scalar=None):
1771        """
1772        Compute a Wald-test for a joint linear hypothesis.
1773
1774        Parameters
1775        ----------
1776        r_matrix : {array_like, str, tuple}
1777            One of:
1778
1779            - array : An r x k array where r is the number of restrictions to
1780              test and k is the number of regressors. It is assumed that the
1781              linear combination is equal to zero.
1782            - str : The full hypotheses to test can be given as a string.
1783              See the examples.
1784            - tuple : A tuple of arrays in the form (R, q), ``q`` can be
1785              either a scalar or a length p row vector.
1786
1787        cov_p : array_like, optional
1788            An alternative estimate for the parameter covariance matrix.
1789            If None is given, self.normalized_cov_params is used.
1790        invcov : array_like, optional
1791            A q x q array to specify an inverse covariance matrix based on a
1792            restrictions matrix.
1793        use_f : bool
1794            If True, then the F-distribution is used. If False, then the
1795            asymptotic distribution, chisquare is used. If use_f is None, then
1796            the F distribution is used if the model specifies that use_t is True.
1797            The test statistic is proportionally adjusted for the distribution
1798            by the number of constraints in the hypothesis.
1799        df_constraints : int, optional
1800            The number of constraints. If not provided the number of
1801            constraints is determined from r_matrix.
1802        scalar : bool, optional
1803            Flag indicating whether the Wald test statistic should be returned
1804            as a sclar float. The current behavior is to return an array.
1805            This will switch to a scalar float after 0.14 is released. To
1806            get the future behavior now, set scalar to True. To silence
1807            the warning and retain the legacy behavior, set scalar to
1808            False.
1809
1810        Returns
1811        -------
1812        ContrastResults
1813            The results for the test are attributes of this results instance.
1814
1815        See Also
1816        --------
1817        f_test : Perform an F tests on model parameters.
1818        t_test : Perform a single hypothesis test.
1819        statsmodels.stats.contrast.ContrastResults : Test results.
1820        patsy.DesignInfo.linear_constraint : Specify a linear constraint.
1821
1822        Notes
1823        -----
1824        The matrix `r_matrix` is assumed to be non-singular. More precisely,
1825
1826        r_matrix (pX pX.T) r_matrix.T
1827
1828        is assumed invertible. Here, pX is the generalized inverse of the
1829        design matrix of the model. There can be problems in non-OLS models
1830        where the rank of the covariance of the noise is not full.
1831        """
1832        use_f = bool_like(use_f, "use_f", strict=True, optional=True)
1833        scalar = bool_like(scalar, "scalar", strict=True, optional=True)
1834        if use_f is None:
1835            # switch to use_t false if undefined
1836            use_f = (hasattr(self, 'use_t') and self.use_t)
1837
1838        from patsy import DesignInfo
1839        names = self.model.data.cov_names
1840        params = self.params.ravel()
1841        LC = DesignInfo(names).linear_constraint(r_matrix)
1842        r_matrix, q_matrix = LC.coefs, LC.constants
1843
1844        if (self.normalized_cov_params is None and cov_p is None and
1845                invcov is None and not hasattr(self, 'cov_params_default')):
1846            raise ValueError('need covariance of parameters for computing '
1847                             'F statistics')
1848
1849        cparams = np.dot(r_matrix, params[:, None])
1850        J = float(r_matrix.shape[0])  # number of restrictions
1851
1852        if q_matrix is None:
1853            q_matrix = np.zeros(J)
1854        else:
1855            q_matrix = np.asarray(q_matrix)
1856        if q_matrix.ndim == 1:
1857            q_matrix = q_matrix[:, None]
1858            if q_matrix.shape[0] != J:
1859                raise ValueError("r_matrix and q_matrix must have the same "
1860                                 "number of rows")
1861        Rbq = cparams - q_matrix
1862        if invcov is None:
1863            cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p)
1864            if np.isnan(cov_p).max():
1865                raise ValueError("r_matrix performs f_test for using "
1866                                 "dimensions that are asymptotically "
1867                                 "non-normal")
1868            invcov = np.linalg.pinv(cov_p)
1869            J_ = np.linalg.matrix_rank(cov_p)
1870            if J_ < J:
1871                warnings.warn('covariance of constraints does not have full '
1872                              'rank. The number of constraints is %d, but '
1873                              'rank is %d' % (J, J_), ValueWarning)
1874                J = J_
1875
1876        # TODO streamline computation, we do not need to compute J if given
1877        if df_constraints is not None:
1878            # let caller override J by df_constraint
1879            J = df_constraints
1880
1881        if (hasattr(self, 'mle_settings') and
1882                self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']):
1883            F = nan_dot(nan_dot(Rbq.T, invcov), Rbq)
1884        else:
1885            F = np.dot(np.dot(Rbq.T, invcov), Rbq)
1886
1887        df_resid = getattr(self, 'df_resid_inference', self.df_resid)
1888        if scalar is None:
1889            warnings.warn(
1890                "The behavior of wald_test will change after 0.14 to returning "
1891                "scalar test statistic values. To get the future behavior now, "
1892                "set scalar to True. To silence this message while retaining "
1893                "the legacy behavior, set scalar to False.",
1894                FutureWarning
1895            )
1896            scalar = False
1897        if scalar and F.size == 1:
1898            F = float(F)
1899        if use_f:
1900            F /= J
1901            return ContrastResults(F=F, df_denom=df_resid,
1902                                   df_num=J) #invcov.shape[0])
1903        else:
1904            return ContrastResults(chi2=F, df_denom=J, statistic=F,
1905                                   distribution='chi2', distargs=(J,))
1906
1907    def wald_test_terms(self, skip_single=False, extra_constraints=None,
1908                        combine_terms=None, scalar=None):
1909        """
1910        Compute a sequence of Wald tests for terms over multiple columns.
1911
1912        This computes joined Wald tests for the hypothesis that all
1913        coefficients corresponding to a `term` are zero.
1914        `Terms` are defined by the underlying formula or by string matching.
1915
1916        Parameters
1917        ----------
1918        skip_single : bool
1919            If true, then terms that consist only of a single column and,
1920            therefore, refers only to a single parameter is skipped.
1921            If false, then all terms are included.
1922        extra_constraints : ndarray
1923            Additional constraints to test. Note that this input has not been
1924            tested.
1925        combine_terms : {list[str], None}
1926            Each string in this list is matched to the name of the terms or
1927            the name of the exogenous variables. All columns whose name
1928            includes that string are combined in one joint test.
1929        scalar : bool, optional
1930            Flag indicating whether the Wald test statistic should be returned
1931            as a sclar float. The current behavior is to return an array.
1932            This will switch to a scalar float after 0.14 is released. To
1933            get the future behavior now, set scalar to True. To silence
1934            the warning and retain the legacy behavior, set scalar to
1935            False.
1936
1937        Returns
1938        -------
1939        WaldTestResults
1940            The result instance contains `table` which is a pandas DataFrame
1941            with the test results: test statistic, degrees of freedom and
1942            pvalues.
1943
1944        Examples
1945        --------
1946        >>> res_ols = ols("np.log(Days+1) ~ C(Duration, Sum)*C(Weight, Sum)", data).fit()
1947        >>> res_ols.wald_test_terms()
1948        <class 'statsmodels.stats.contrast.WaldTestResults'>
1949                                                  F                P>F  df constraint  df denom
1950        Intercept                        279.754525  2.37985521351e-22              1        51
1951        C(Duration, Sum)                   5.367071    0.0245738436636              1        51
1952        C(Weight, Sum)                    12.432445  3.99943118767e-05              2        51
1953        C(Duration, Sum):C(Weight, Sum)    0.176002      0.83912310946              2        51
1954
1955        >>> res_poi = Poisson.from_formula("Days ~ C(Weight) * C(Duration)", \
1956                                           data).fit(cov_type='HC0')
1957        >>> wt = res_poi.wald_test_terms(skip_single=False, \
1958                                         combine_terms=['Duration', 'Weight'])
1959        >>> print(wt)
1960                                    chi2             P>chi2  df constraint
1961        Intercept              15.695625  7.43960374424e-05              1
1962        C(Weight)              16.132616  0.000313940174705              2
1963        C(Duration)             1.009147     0.315107378931              1
1964        C(Weight):C(Duration)   0.216694     0.897315972824              2
1965        Duration               11.187849     0.010752286833              3
1966        Weight                 30.263368  4.32586407145e-06              4
1967        """
1968        # lazy import
1969        from collections import defaultdict
1970
1971        result = self
1972        if extra_constraints is None:
1973            extra_constraints = []
1974        if combine_terms is None:
1975            combine_terms = []
1976        design_info = getattr(result.model.data, 'design_info', None)
1977
1978        if design_info is None and extra_constraints is None:
1979            raise ValueError('no constraints, nothing to do')
1980
1981        identity = np.eye(len(result.params))
1982        constraints = []
1983        combined = defaultdict(list)
1984        if design_info is not None:
1985            for term in design_info.terms:
1986                cols = design_info.slice(term)
1987                name = term.name()
1988                constraint_matrix = identity[cols]
1989
1990                # check if in combined
1991                for cname in combine_terms:
1992                    if cname in name:
1993                        combined[cname].append(constraint_matrix)
1994
1995                k_constraint = constraint_matrix.shape[0]
1996                if skip_single:
1997                    if k_constraint == 1:
1998                        continue
1999
2000                constraints.append((name, constraint_matrix))
2001
2002            combined_constraints = []
2003            for cname in combine_terms:
2004                combined_constraints.append((cname, np.vstack(combined[cname])))
2005        else:
2006            # check by exog/params names if there is no formula info
2007            for col, name in enumerate(result.model.exog_names):
2008                constraint_matrix = np.atleast_2d(identity[col])
2009
2010                # check if in combined
2011                for cname in combine_terms:
2012                    if cname in name:
2013                        combined[cname].append(constraint_matrix)
2014
2015                if skip_single:
2016                    continue
2017
2018                constraints.append((name, constraint_matrix))
2019
2020            combined_constraints = []
2021            for cname in combine_terms:
2022                combined_constraints.append((cname, np.vstack(combined[cname])))
2023
2024        use_t = result.use_t
2025        distribution = ['chi2', 'F'][use_t]
2026
2027        res_wald = []
2028        index = []
2029        for name, constraint in constraints + combined_constraints + extra_constraints:
2030            wt = result.wald_test(constraint, scalar=scalar)
2031            row = [wt.statistic, wt.pvalue, constraint.shape[0]]
2032            if use_t:
2033                row.append(wt.df_denom)
2034            res_wald.append(row)
2035            index.append(name)
2036
2037        # distribution nerutral names
2038        col_names = ['statistic', 'pvalue', 'df_constraint']
2039        if use_t:
2040            col_names.append('df_denom')
2041        # TODO: maybe move DataFrame creation to results class
2042        from pandas import DataFrame
2043        table = DataFrame(res_wald, index=index, columns=col_names)
2044        res = WaldTestResults(None, distribution, None, table=table)
2045        # TODO: remove temp again, added for testing
2046        res.temp = constraints + combined_constraints + extra_constraints
2047        return res
2048
2049    def t_test_pairwise(self, term_name, method='hs', alpha=0.05,
2050                        factor_labels=None):
2051        """
2052        Perform pairwise t_test with multiple testing corrected p-values.
2053
2054        This uses the formula design_info encoding contrast matrix and should
2055        work for all encodings of a main effect.
2056
2057        Parameters
2058        ----------
2059        term_name : str
2060            The name of the term for which pairwise comparisons are computed.
2061            Term names for categorical effects are created by patsy and
2062            correspond to the main part of the exog names.
2063        method : {str, list[str]}
2064            The multiple testing p-value correction to apply. The default is
2065            'hs'. See stats.multipletesting.
2066        alpha : float
2067            The significance level for multiple testing reject decision.
2068        factor_labels : {list[str], None}
2069            Labels for the factor levels used for pairwise labels. If not
2070            provided, then the labels from the formula design_info are used.
2071
2072        Returns
2073        -------
2074        MultiCompResult
2075            The results are stored as attributes, the main attributes are the
2076            following two. Other attributes are added for debugging purposes
2077            or as background information.
2078
2079            - result_frame : pandas DataFrame with t_test results and multiple
2080              testing corrected p-values.
2081            - contrasts : matrix of constraints of the null hypothesis in the
2082              t_test.
2083
2084        Notes
2085        -----
2086        Status: experimental. Currently only checked for treatment coding with
2087        and without specified reference level.
2088
2089        Currently there are no multiple testing corrected confidence intervals
2090        available.
2091
2092        Examples
2093        --------
2094        >>> res = ols("np.log(Days+1) ~ C(Weight) + C(Duration)", data).fit()
2095        >>> pw = res.t_test_pairwise("C(Weight)")
2096        >>> pw.result_frame
2097                 coef   std err         t         P>|t|  Conf. Int. Low
2098        2-1  0.632315  0.230003  2.749157  8.028083e-03        0.171563
2099        3-1  1.302555  0.230003  5.663201  5.331513e-07        0.841803
2100        3-2  0.670240  0.230003  2.914044  5.119126e-03        0.209488
2101             Conf. Int. Upp.  pvalue-hs reject-hs
2102        2-1         1.093067   0.010212      True
2103        3-1         1.763307   0.000002      True
2104        3-2         1.130992   0.010212      True
2105        """
2106        res = t_test_pairwise(self, term_name, method=method, alpha=alpha,
2107                              factor_labels=factor_labels)
2108        return res
2109
2110    def conf_int(self, alpha=.05, cols=None):
2111        """
2112        Construct confidence interval for the fitted parameters.
2113
2114        Parameters
2115        ----------
2116        alpha : float, optional
2117            The significance level for the confidence interval. The default
2118            `alpha` = .05 returns a 95% confidence interval.
2119        cols : array_like, optional
2120            Specifies which confidence intervals to return.
2121
2122        .. deprecated: 0.13
2123
2124           cols is deprecated and will be removed after 0.14 is released.
2125           cols only works when inputs are NumPy arrays and will fail
2126           when using pandas Series or DataFrames as input. You can
2127           subset the confidence intervals using slices.
2128
2129        Returns
2130        -------
2131        array_like
2132            Each row contains [lower, upper] limits of the confidence interval
2133            for the corresponding parameter. The first column contains all
2134            lower, the second column contains all upper limits.
2135
2136        Notes
2137        -----
2138        The confidence interval is based on the standard normal distribution
2139        if self.use_t is False. If self.use_t is True, then uses a Student's t
2140        with self.df_resid_inference (or self.df_resid if df_resid_inference is
2141        not defined) degrees of freedom.
2142
2143        Examples
2144        --------
2145        >>> import statsmodels.api as sm
2146        >>> data = sm.datasets.longley.load()
2147        >>> data.exog = sm.add_constant(data.exog)
2148        >>> results = sm.OLS(data.endog, data.exog).fit()
2149        >>> results.conf_int()
2150        array([[-5496529.48322745, -1467987.78596704],
2151               [    -177.02903529,      207.15277984],
2152               [      -0.1115811 ,        0.03994274],
2153               [      -3.12506664,       -0.91539297],
2154               [      -1.5179487 ,       -0.54850503],
2155               [      -0.56251721,        0.460309  ],
2156               [     798.7875153 ,     2859.51541392]])
2157
2158        >>> results.conf_int(cols=(2,3))
2159        array([[-0.1115811 ,  0.03994274],
2160               [-3.12506664, -0.91539297]])
2161        """
2162        bse = self.bse
2163
2164        if self.use_t:
2165            dist = stats.t
2166            df_resid = getattr(self, 'df_resid_inference', self.df_resid)
2167            q = dist.ppf(1 - alpha / 2, df_resid)
2168        else:
2169            dist = stats.norm
2170            q = dist.ppf(1 - alpha / 2)
2171
2172        params = self.params
2173        lower = params - q * bse
2174        upper = params + q * bse
2175        if cols is not None:
2176            warnings.warn(
2177                "cols is deprecated and will be removed after 0.14 is "
2178                "released. cols only works when inputs are NumPy arrays and "
2179                "will fail when using pandas Series or DataFrames as input. "
2180                "Subsets of confidence intervals can be selected using slices "
2181                "of the full confidence interval array.",
2182                FutureWarning
2183            )
2184            cols = np.asarray(cols)
2185            lower = lower[cols]
2186            upper = upper[cols]
2187        return np.asarray(lzip(lower, upper))
2188
2189    def save(self, fname, remove_data=False):
2190        """
2191        Save a pickle of this instance.
2192
2193        Parameters
2194        ----------
2195        fname : {str, handle}
2196            A string filename or a file handle.
2197        remove_data : bool
2198            If False (default), then the instance is pickled without changes.
2199            If True, then all arrays with length nobs are set to None before
2200            pickling. See the remove_data method.
2201            In some cases not all arrays will be set to None.
2202
2203        Notes
2204        -----
2205        If remove_data is true and the model result does not implement a
2206        remove_data method then this will raise an exception.
2207        """
2208
2209        from statsmodels.iolib.smpickle import save_pickle
2210
2211        if remove_data:
2212            self.remove_data()
2213
2214        save_pickle(self, fname)
2215
2216    @classmethod
2217    def load(cls, fname):
2218        """
2219        Load a pickled results instance
2220
2221        .. warning::
2222
2223           Loading pickled models is not secure against erroneous or
2224           maliciously constructed data. Never unpickle data received from
2225           an untrusted or unauthenticated source.
2226
2227        Parameters
2228        ----------
2229        fname : {str, handle, pathlib.Path}
2230            A string filename or a file handle.
2231
2232        Returns
2233        -------
2234        Results
2235            The unpickled results instance.
2236        """
2237
2238        from statsmodels.iolib.smpickle import load_pickle
2239        return load_pickle(fname)
2240
2241    def remove_data(self):
2242        """
2243        Remove data arrays, all nobs arrays from result and model.
2244
2245        This reduces the size of the instance, so it can be pickled with less
2246        memory. Currently tested for use with predict from an unpickled
2247        results and model instance.
2248
2249        .. warning::
2250
2251           Since data and some intermediate results have been removed
2252           calculating new statistics that require them will raise exceptions.
2253           The exception will occur the first time an attribute is accessed
2254           that has been set to None.
2255
2256        Not fully tested for time series models, tsa, and might delete too much
2257        for prediction or not all that would be possible.
2258
2259        The lists of arrays to delete are maintained as attributes of
2260        the result and model instance, except for cached values. These
2261        lists could be changed before calling remove_data.
2262
2263        The attributes to remove are named in:
2264
2265        model._data_attr : arrays attached to both the model instance
2266            and the results instance with the same attribute name.
2267
2268        result._data_in_cache : arrays that may exist as values in
2269            result._cache
2270
2271        result._data_attr_model : arrays attached to the model
2272            instance but not to the results instance
2273        """
2274        cls = self.__class__
2275        # Note: we cannot just use `getattr(cls, x)` or `getattr(self, x)`
2276        # because of redirection involved with property-like accessors
2277        cls_attrs = {}
2278        for name in dir(cls):
2279            try:
2280                attr = object.__getattribute__(cls, name)
2281            except AttributeError:
2282                pass
2283            else:
2284                cls_attrs[name] = attr
2285        data_attrs = [x for x in cls_attrs
2286                      if isinstance(cls_attrs[x], cached_data)]
2287        for name in data_attrs:
2288            self._cache[name] = None
2289
2290        def wipe(obj, att):
2291            # get to last element in attribute path
2292            p = att.split('.')
2293            att_ = p.pop(-1)
2294            try:
2295                obj_ = reduce(getattr, [obj] + p)
2296                if hasattr(obj_, att_):
2297                    setattr(obj_, att_, None)
2298            except AttributeError:
2299                pass
2300
2301        model_only = ['model.' + i for i in getattr(self, "_data_attr_model", [])]
2302        model_attr = ['model.' + i for i in self.model._data_attr]
2303        for att in self._data_attr + model_attr + model_only:
2304            if att in data_attrs:
2305                # these have been handled above, and trying to call wipe
2306                # would raise an Exception anyway, so skip these
2307                continue
2308            wipe(self, att)
2309
2310        for key in self._data_in_cache:
2311            try:
2312                self._cache[key] = None
2313            except (AttributeError, KeyError):
2314                pass
2315
2316
2317class LikelihoodResultsWrapper(wrap.ResultsWrapper):
2318    _attrs = {
2319        'params': 'columns',
2320        'bse': 'columns',
2321        'pvalues': 'columns',
2322        'tvalues': 'columns',
2323        'resid': 'rows',
2324        'fittedvalues': 'rows',
2325        'normalized_cov_params': 'cov',
2326    }
2327
2328    _wrap_attrs = _attrs
2329    _wrap_methods = {
2330        'cov_params': 'cov',
2331        'conf_int': 'columns'
2332    }
2333
2334wrap.populate_wrapper(LikelihoodResultsWrapper,  # noqa:E305
2335                      LikelihoodModelResults)
2336
2337
2338class ResultMixin(object):
2339
2340    @cache_readonly
2341    def df_modelwc(self):
2342        """Model WC"""
2343        # collect different ways of defining the number of parameters, used for
2344        # aic, bic
2345        if hasattr(self, 'df_model'):
2346            if hasattr(self, 'k_constant'):
2347                hasconst = self.k_constant
2348            elif hasattr(self, 'hasconst'):
2349                hasconst = self.hasconst
2350            else:
2351                # default assumption
2352                hasconst = 1
2353            return self.df_model + hasconst
2354        else:
2355            return self.params.size
2356
2357    @cache_readonly
2358    def aic(self):
2359        """Akaike information criterion"""
2360        return -2 * self.llf + 2 * (self.df_modelwc)
2361
2362    @cache_readonly
2363    def bic(self):
2364        """Bayesian information criterion"""
2365        return -2 * self.llf + np.log(self.nobs) * (self.df_modelwc)
2366
2367    @cache_readonly
2368    def score_obsv(self):
2369        """cached Jacobian of log-likelihood
2370        """
2371        return self.model.score_obs(self.params)
2372
2373    @cache_readonly
2374    def hessv(self):
2375        """cached Hessian of log-likelihood
2376        """
2377        return self.model.hessian(self.params)
2378
2379    @cache_readonly
2380    def covjac(self):
2381        """
2382        covariance of parameters based on outer product of jacobian of
2383        log-likelihood
2384        """
2385        #  if not hasattr(self, '_results'):
2386        #      raise ValueError('need to call fit first')
2387        #      #self.fit()
2388        #  self.jacv = jacv = self.jac(self._results.params)
2389        jacv = self.score_obsv
2390        return np.linalg.inv(np.dot(jacv.T, jacv))
2391
2392    @cache_readonly
2393    def covjhj(self):
2394        """covariance of parameters based on HJJH
2395
2396        dot product of Hessian, Jacobian, Jacobian, Hessian of likelihood
2397
2398        name should be covhjh
2399        """
2400        jacv = self.score_obsv
2401        hessv = self.hessv
2402        hessinv = np.linalg.inv(hessv)
2403        #  self.hessinv = hessin = self.cov_params()
2404        return np.dot(hessinv, np.dot(np.dot(jacv.T, jacv), hessinv))
2405
2406    @cache_readonly
2407    def bsejhj(self):
2408        """standard deviation of parameter estimates based on covHJH
2409        """
2410        return np.sqrt(np.diag(self.covjhj))
2411
2412    @cache_readonly
2413    def bsejac(self):
2414        """standard deviation of parameter estimates based on covjac
2415        """
2416        return np.sqrt(np.diag(self.covjac))
2417
2418    def bootstrap(self, nrep=100, method='nm', disp=0, store=1):
2419        """simple bootstrap to get mean and variance of estimator
2420
2421        see notes
2422
2423        Parameters
2424        ----------
2425        nrep : int
2426            number of bootstrap replications
2427        method : str
2428            optimization method to use
2429        disp : bool
2430            If true, then optimization prints results
2431        store : bool
2432            If true, then parameter estimates for all bootstrap iterations
2433            are attached in self.bootstrap_results
2434
2435        Returns
2436        -------
2437        mean : ndarray
2438            mean of parameter estimates over bootstrap replications
2439        std : ndarray
2440            standard deviation of parameter estimates over bootstrap
2441            replications
2442
2443        Notes
2444        -----
2445        This was mainly written to compare estimators of the standard errors of
2446        the parameter estimates.  It uses independent random sampling from the
2447        original endog and exog, and therefore is only correct if observations
2448        are independently distributed.
2449
2450        This will be moved to apply only to models with independently
2451        distributed observations.
2452        """
2453        results = []
2454        hascloneattr = True if hasattr(self.model, 'cloneattr') else False
2455        for i in range(nrep):
2456            rvsind = np.random.randint(self.nobs, size=self.nobs)
2457            # this needs to set startparam and get other defining attributes
2458            # need a clone method on model
2459            if self.exog is not None:
2460                exog_resamp = self.exog[rvsind, :]
2461            else:
2462                exog_resamp = None
2463            # build auxiliary model and fit
2464            init_kwds = self.model._get_init_kwds()
2465            fitmod = self.model.__class__(self.endog[rvsind],
2466                                          exog=exog_resamp, **init_kwds)
2467            if hascloneattr:
2468                for attr in self.model.cloneattr:
2469                    setattr(fitmod, attr, getattr(self.model, attr))
2470
2471            fitres = fitmod.fit(method=method, disp=disp)
2472            results.append(fitres.params)
2473        results = np.array(results)
2474        if store:
2475            self.bootstrap_results = results
2476        return results.mean(0), results.std(0), results
2477
2478    def get_nlfun(self, fun):
2479        """
2480        get_nlfun
2481
2482        This is not Implemented
2483        """
2484        # I think this is supposed to get the delta method that is currently
2485        # in miscmodels count (as part of Poisson example)
2486        raise NotImplementedError
2487
2488
2489class _LLRMixin():
2490    """Mixin class for Null model and likelihood ratio
2491    """
2492    # methods copied from DiscreteResults, adjusted pseudo R2
2493
2494    def pseudo_rsquared(self, kind="mcf"):
2495        """
2496        McFadden's pseudo-R-squared. `1 - (llf / llnull)`
2497        """
2498        kind = kind.lower()
2499        if kind.startswith("mcf"):
2500            prsq = 1 - self.llf / self.llnull
2501        elif kind.startswith("cox") or kind in ["cs", "lr"]:
2502            prsq = 1 - np.exp((self.llnull - self.llf) * (2 / self.nobs))
2503        else:
2504            raise ValueError("only McFadden and Cox-Snell are available")
2505        return prsq
2506
2507    @cache_readonly
2508    def llr(self):
2509        """
2510        Likelihood ratio chi-squared statistic; `-2*(llnull - llf)`
2511        """
2512        return -2*(self.llnull - self.llf)
2513
2514    @cache_readonly
2515    def llr_pvalue(self):
2516        """
2517        The chi-squared probability of getting a log-likelihood ratio
2518        statistic greater than llr.  llr has a chi-squared distribution
2519        with degrees of freedom `df_model`.
2520        """
2521        # see also RegressionModel compare_lr_test
2522        llr = self.llr
2523        df_full = self.df_resid
2524        df_restr = self.df_resid_null
2525        lrdf = (df_restr - df_full)
2526        self.df_lr_null = lrdf
2527        return stats.distributions.chi2.sf(llr, lrdf)
2528
2529    def set_null_options(self, llnull=None, attach_results=True, **kwargs):
2530        """
2531        Set the fit options for the Null (constant-only) model.
2532
2533        This resets the cache for related attributes which is potentially
2534        fragile. This only sets the option, the null model is estimated
2535        when llnull is accessed, if llnull is not yet in cache.
2536
2537        Parameters
2538        ----------
2539        llnull : {None, float}
2540            If llnull is not None, then the value will be directly assigned to
2541            the cached attribute "llnull".
2542        attach_results : bool
2543            Sets an internal flag whether the results instance of the null
2544            model should be attached. By default without calling this method,
2545            thenull model results are not attached and only the loglikelihood
2546            value llnull is stored.
2547        **kwargs
2548            Additional keyword arguments used as fit keyword arguments for the
2549            null model. The override and model default values.
2550
2551        Notes
2552        -----
2553        Modifies attributes of this instance, and so has no return.
2554        """
2555        # reset cache, note we need to add here anything that depends on
2556        # llnullor the null model. If something is missing, then the attribute
2557        # might be incorrect.
2558        self._cache.pop('llnull', None)
2559        self._cache.pop('llr', None)
2560        self._cache.pop('llr_pvalue', None)
2561        self._cache.pop('prsquared', None)
2562        if hasattr(self, 'res_null'):
2563            del self.res_null
2564
2565        if llnull is not None:
2566            self._cache['llnull'] = llnull
2567        self._attach_nullmodel = attach_results
2568        self._optim_kwds_null = kwargs
2569
2570    @cache_readonly
2571    def llnull(self):
2572        """
2573        Value of the constant-only loglikelihood
2574        """
2575        model = self.model
2576        kwds = model._get_init_kwds().copy()
2577        for key in getattr(model, '_null_drop_keys', []):
2578            del kwds[key]
2579        # TODO: what parameters to pass to fit?
2580        mod_null = model.__class__(model.endog, np.ones(self.nobs), **kwds)
2581        # TODO: consider catching and warning on convergence failure?
2582        # in the meantime, try hard to converge. see
2583        # TestPoissonConstrained1a.test_smoke
2584
2585        optim_kwds = getattr(self, '_optim_kwds_null', {}).copy()
2586
2587        if 'start_params' in optim_kwds:
2588            # user provided
2589            sp_null = optim_kwds.pop('start_params')
2590        elif hasattr(model, '_get_start_params_null'):
2591            # get moment estimates if available
2592            sp_null = model._get_start_params_null()
2593        else:
2594            sp_null = None
2595
2596        opt_kwds = dict(method='bfgs', warn_convergence=False, maxiter=10000,
2597                        disp=0)
2598        opt_kwds.update(optim_kwds)
2599
2600        if optim_kwds:
2601            res_null = mod_null.fit(start_params=sp_null, **opt_kwds)
2602        else:
2603            # this should be a reasonably method case across versions
2604            res_null = mod_null.fit(start_params=sp_null, method='nm',
2605                                    warn_convergence=False,
2606                                    maxiter=10000, disp=0)
2607            res_null = mod_null.fit(start_params=res_null.params, method='bfgs',
2608                                    warn_convergence=False,
2609                                    maxiter=10000, disp=0)
2610
2611        if getattr(self, '_attach_nullmodel', False) is not False:
2612            self.res_null = res_null
2613
2614        self.k_null = len(res_null.params)
2615        self.df_resid_null = res_null.df_resid
2616        return res_null.llf
2617
2618
2619class GenericLikelihoodModelResults(LikelihoodModelResults, ResultMixin):
2620    """
2621    A results class for the discrete dependent variable models.
2622
2623    ..Warning :
2624
2625    The following description has not been updated to this version/class.
2626    Where are AIC, BIC, ....? docstring looks like copy from discretemod
2627
2628    Parameters
2629    ----------
2630    model : A DiscreteModel instance
2631    mlefit : instance of LikelihoodResults
2632        This contains the numerical optimization results as returned by
2633        LikelihoodModel.fit(), in a superclass of GnericLikelihoodModels
2634
2635
2636    Attributes
2637    ----------
2638    aic : float
2639        Akaike information criterion.  -2*(`llf` - p) where p is the number
2640        of regressors including the intercept.
2641    bic : float
2642        Bayesian information criterion. -2*`llf` + ln(`nobs`)*p where p is the
2643        number of regressors including the intercept.
2644    bse : ndarray
2645        The standard errors of the coefficients.
2646    df_resid : float
2647        See model definition.
2648    df_model : float
2649        See model definition.
2650    fitted_values : ndarray
2651        Linear predictor XB.
2652    llf : float
2653        Value of the loglikelihood
2654    llnull : float
2655        Value of the constant-only loglikelihood
2656    llr : float
2657        Likelihood ratio chi-squared statistic; -2*(`llnull` - `llf`)
2658    llr_pvalue : float
2659        The chi-squared probability of getting a log-likelihood ratio
2660        statistic greater than llr.  llr has a chi-squared distribution
2661        with degrees of freedom `df_model`.
2662    prsquared : float
2663        McFadden's pseudo-R-squared. 1 - (`llf`/`llnull`)
2664    """
2665
2666    def __init__(self, model, mlefit):
2667        self.model = model
2668        self.endog = model.endog
2669        self.exog = model.exog
2670        self.nobs = model.endog.shape[0]
2671
2672        # TODO: possibly move to model.fit()
2673        #       and outsource together with patching names
2674        if hasattr(model, 'df_model'):
2675            self.df_model = model.df_model
2676        else:
2677            self.df_model = len(mlefit.params)
2678            # retrofitting the model, used in t_test TODO: check design
2679            self.model.df_model = self.df_model
2680
2681        if hasattr(model, 'df_resid'):
2682            self.df_resid = model.df_resid
2683        else:
2684            self.df_resid = self.endog.shape[0] - self.df_model
2685            # retrofitting the model, used in t_test TODO: check design
2686            self.model.df_resid = self.df_resid
2687
2688        self._cache = {}
2689        self.__dict__.update(mlefit.__dict__)
2690
2691        k_params = len(mlefit.params)
2692        # checks mainly for adding new models or subclassing
2693        if self.df_model + self.model.k_constant != k_params:
2694            warnings.warn("df_model + k_constant differs from nparams")
2695        if self.df_resid != self.nobs - k_params:
2696            warnings.warn("df_resid differs from nobs - nparams")
2697
2698    def summary(self, yname=None, xname=None, title=None, alpha=.05):
2699        """Summarize the Regression Results
2700
2701        Parameters
2702        ----------
2703        yname : str, optional
2704            Default is `y`
2705        xname : list[str], optional
2706            Names for the exogenous variables, default is "var_xx".
2707            Must match the number of parameters in the model
2708        title : str, optional
2709            Title for the top table. If not None, then this replaces the
2710            default title
2711        alpha : float
2712            significance level for the confidence intervals
2713
2714        Returns
2715        -------
2716        smry : Summary instance
2717            this holds the summary tables and text, which can be printed or
2718            converted to various output formats.
2719
2720        See Also
2721        --------
2722        statsmodels.iolib.summary.Summary : class to hold summary results
2723        """
2724
2725        top_left = [('Dep. Variable:', None),
2726                    ('Model:', None),
2727                    ('Method:', ['Maximum Likelihood']),
2728                    ('Date:', None),
2729                    ('Time:', None),
2730                    ('No. Observations:', None),
2731                    ('Df Residuals:', None),
2732                    ('Df Model:', None),
2733                    ]
2734
2735        top_right = [('Log-Likelihood:', None),
2736                     ('AIC:', ["%#8.4g" % self.aic]),
2737                     ('BIC:', ["%#8.4g" % self.bic])
2738                     ]
2739
2740        if title is None:
2741            title = self.model.__class__.__name__ + ' ' + "Results"
2742
2743        # create summary table instance
2744        from statsmodels.iolib.summary import Summary
2745        smry = Summary()
2746        smry.add_table_2cols(self, gleft=top_left, gright=top_right,
2747                             yname=yname, xname=xname, title=title)
2748        smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha,
2749                              use_t=self.use_t)
2750
2751        return smry
2752