1# TODO: Determine which tests are valid for GLSAR, and under what conditions
2# TODO: Fix issue with constant and GLS
3# TODO: GLS: add options Iterative GLS, for iterative fgls if sigma is None
4# TODO: GLS: default if sigma is none should be two-step GLS
5# TODO: Check nesting when performing model based tests, lr, wald, lm
6"""
7This module implements standard regression models:
8
9Generalized Least Squares (GLS)
10Ordinary Least Squares (OLS)
11Weighted Least Squares (WLS)
12Generalized Least Squares with autoregressive error terms GLSAR(p)
13
14Models are specified with an endogenous response variable and an
15exogenous design matrix and are fit using their `fit` method.
16
17Subclasses that have more complicated covariance matrices
18should write over the 'whiten' method as the fit method
19prewhitens the response by calling 'whiten'.
20
21General reference for regression models:
22
23D. C. Montgomery and E.A. Peck. "Introduction to Linear Regression
24    Analysis." 2nd. Ed., Wiley, 1992.
25
26Econometrics references for regression models:
27
28R. Davidson and J.G. MacKinnon.  "Econometric Theory and Methods," Oxford,
29    2004.
30
31W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.
32"""
33
34
35from statsmodels.compat.pandas import Appender
36from statsmodels.compat.python import lrange, lzip
37
38import warnings
39
40import numpy as np
41from scipy import optimize, stats
42from scipy.linalg import toeplitz
43
44import statsmodels.base.model as base
45import statsmodels.base.wrapper as wrap
46from statsmodels.emplike.elregress import _ELRegOpts
47# need import in module instead of lazily to copy `__doc__`
48from statsmodels.regression._prediction import PredictionResults
49from statsmodels.tools.decorators import cache_readonly, cache_writable
50from statsmodels.tools.sm_exceptions import (
51    InvalidTestWarning,
52    ValueWarning,
53    )
54from statsmodels.tools.tools import pinv_extended
55from statsmodels.tools.validation import string_like
56
57from . import _prediction as pred
58
59__docformat__ = 'restructuredtext en'
60
61__all__ = ['GLS', 'WLS', 'OLS', 'GLSAR', 'PredictionResults',
62           'RegressionResultsWrapper']
63
64
65_fit_regularized_doc =\
66        r"""
67        Return a regularized fit to a linear regression model.
68
69        Parameters
70        ----------
71        method : str
72            Either 'elastic_net' or 'sqrt_lasso'.
73        alpha : scalar or array_like
74            The penalty weight.  If a scalar, the same penalty weight
75            applies to all variables in the model.  If a vector, it
76            must have the same length as `params`, and contains a
77            penalty weight for each coefficient.
78        L1_wt : scalar
79            The fraction of the penalty given to the L1 penalty term.
80            Must be between 0 and 1 (inclusive).  If 0, the fit is a
81            ridge fit, if 1 it is a lasso fit.
82        start_params : array_like
83            Starting values for ``params``.
84        profile_scale : bool
85            If True the penalized fit is computed using the profile
86            (concentrated) log-likelihood for the Gaussian model.
87            Otherwise the fit uses the residual sum of squares.
88        refit : bool
89            If True, the model is refit using only the variables that
90            have non-zero coefficients in the regularized fit.  The
91            refitted model is not regularized.
92        **kwargs
93            Additional keyword arguments that contain information used when
94            constructing a model using the formula interface.
95
96        Returns
97        -------
98        statsmodels.base.elastic_net.RegularizedResults
99            The regularized results.
100
101        Notes
102        -----
103        The elastic net uses a combination of L1 and L2 penalties.
104        The implementation closely follows the glmnet package in R.
105
106        The function that is minimized is:
107
108        .. math::
109
110            0.5*RSS/n + alpha*((1-L1\_wt)*|params|_2^2/2 + L1\_wt*|params|_1)
111
112        where RSS is the usual regression sum of squares, n is the
113        sample size, and :math:`|*|_1` and :math:`|*|_2` are the L1 and L2
114        norms.
115
116        For WLS and GLS, the RSS is calculated using the whitened endog and
117        exog data.
118
119        Post-estimation results are based on the same data used to
120        select variables, hence may be subject to overfitting biases.
121
122        The elastic_net method uses the following keyword arguments:
123
124        maxiter : int
125            Maximum number of iterations
126        cnvrg_tol : float
127            Convergence threshold for line searches
128        zero_tol : float
129            Coefficients below this threshold are treated as zero.
130
131        The square root lasso approach is a variation of the Lasso
132        that is largely self-tuning (the optimal tuning parameter
133        does not depend on the standard deviation of the regression
134        errors).  If the errors are Gaussian, the tuning parameter
135        can be taken to be
136
137        alpha = 1.1 * np.sqrt(n) * norm.ppf(1 - 0.05 / (2 * p))
138
139        where n is the sample size and p is the number of predictors.
140
141        The square root lasso uses the following keyword arguments:
142
143        zero_tol : float
144            Coefficients below this threshold are treated as zero.
145
146        The cvxopt module is required to estimate model using the square root
147        lasso.
148
149        References
150        ----------
151        .. [*] Friedman, Hastie, Tibshirani (2008).  Regularization paths for
152           generalized linear models via coordinate descent.  Journal of
153           Statistical Software 33(1), 1-22 Feb 2010.
154
155        .. [*] A Belloni, V Chernozhukov, L Wang (2011).  Square-root Lasso:
156           pivotal recovery of sparse signals via conic programming.
157           Biometrika 98(4), 791-806. https://arxiv.org/pdf/1009.5689.pdf
158        """
159
160
161def _get_sigma(sigma, nobs):
162    """
163    Returns sigma (matrix, nobs by nobs) for GLS and the inverse of its
164    Cholesky decomposition.  Handles dimensions and checks integrity.
165    If sigma is None, returns None, None. Otherwise returns sigma,
166    cholsigmainv.
167    """
168    if sigma is None:
169        return None, None
170    sigma = np.asarray(sigma).squeeze()
171    if sigma.ndim == 0:
172        sigma = np.repeat(sigma, nobs)
173    if sigma.ndim == 1:
174        if sigma.shape != (nobs,):
175            raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
176                             "array of shape %s x %s" % (nobs, nobs, nobs))
177        cholsigmainv = 1/np.sqrt(sigma)
178    else:
179        if sigma.shape != (nobs, nobs):
180            raise ValueError("Sigma must be a scalar, 1d of length %s or a 2d "
181                             "array of shape %s x %s" % (nobs, nobs, nobs))
182        cholsigmainv = np.linalg.cholesky(np.linalg.inv(sigma)).T
183    return sigma, cholsigmainv
184
185
186class RegressionModel(base.LikelihoodModel):
187    """
188    Base class for linear regression models. Should not be directly called.
189
190    Intended for subclassing.
191    """
192    def __init__(self, endog, exog, **kwargs):
193        super(RegressionModel, self).__init__(endog, exog, **kwargs)
194        self._data_attr.extend(['pinv_wexog', 'wendog', 'wexog', 'weights'])
195
196    def initialize(self):
197        """Initialize model components."""
198        self.wexog = self.whiten(self.exog)
199        self.wendog = self.whiten(self.endog)
200        # overwrite nobs from class Model:
201        self.nobs = float(self.wexog.shape[0])
202
203        self._df_model = None
204        self._df_resid = None
205        self.rank = None
206
207    @property
208    def df_model(self):
209        """
210        The model degree of freedom.
211
212        The dof is defined as the rank of the regressor matrix minus 1 if a
213        constant is included.
214        """
215        if self._df_model is None:
216            if self.rank is None:
217                self.rank = np.linalg.matrix_rank(self.exog)
218            self._df_model = float(self.rank - self.k_constant)
219        return self._df_model
220
221    @df_model.setter
222    def df_model(self, value):
223        self._df_model = value
224
225    @property
226    def df_resid(self):
227        """
228        The residual degree of freedom.
229
230        The dof is defined as the number of observations minus the rank of
231        the regressor matrix.
232        """
233
234        if self._df_resid is None:
235            if self.rank is None:
236                self.rank = np.linalg.matrix_rank(self.exog)
237            self._df_resid = self.nobs - self.rank
238        return self._df_resid
239
240    @df_resid.setter
241    def df_resid(self, value):
242        self._df_resid = value
243
244    def whiten(self, x):
245        """
246        Whiten method that must be overwritten by individual models.
247
248        Parameters
249        ----------
250        x : array_like
251            Data to be whitened.
252        """
253        raise NotImplementedError("Subclasses must implement.")
254
255    def fit(self, method="pinv", cov_type='nonrobust', cov_kwds=None,
256            use_t=None, **kwargs):
257        """
258        Full fit of the model.
259
260        The results include an estimate of covariance matrix, (whitened)
261        residuals and an estimate of scale.
262
263        Parameters
264        ----------
265        method : str, optional
266            Can be "pinv", "qr".  "pinv" uses the Moore-Penrose pseudoinverse
267            to solve the least squares problem. "qr" uses the QR
268            factorization.
269        cov_type : str, optional
270            See `regression.linear_model.RegressionResults` for a description
271            of the available covariance estimators.
272        cov_kwds : list or None, optional
273            See `linear_model.RegressionResults.get_robustcov_results` for a
274            description required keywords for alternative covariance
275            estimators.
276        use_t : bool, optional
277            Flag indicating to use the Student's t distribution when computing
278            p-values.  Default behavior depends on cov_type. See
279            `linear_model.RegressionResults.get_robustcov_results` for
280            implementation details.
281        **kwargs
282            Additional keyword arguments that contain information used when
283            constructing a model using the formula interface.
284
285        Returns
286        -------
287        RegressionResults
288            The model estimation results.
289
290        See Also
291        --------
292        RegressionResults
293            The results container.
294        RegressionResults.get_robustcov_results
295            A method to change the covariance estimator used when fitting the
296            model.
297
298        Notes
299        -----
300        The fit method uses the pseudoinverse of the design/exogenous variables
301        to solve the least squares minimization.
302        """
303        if method == "pinv":
304            if not (hasattr(self, 'pinv_wexog') and
305                    hasattr(self, 'normalized_cov_params') and
306                    hasattr(self, 'rank')):
307
308                self.pinv_wexog, singular_values = pinv_extended(self.wexog)
309                self.normalized_cov_params = np.dot(
310                    self.pinv_wexog, np.transpose(self.pinv_wexog))
311
312                # Cache these singular values for use later.
313                self.wexog_singular_values = singular_values
314                self.rank = np.linalg.matrix_rank(np.diag(singular_values))
315
316            beta = np.dot(self.pinv_wexog, self.wendog)
317
318        elif method == "qr":
319            if not (hasattr(self, 'exog_Q') and
320                    hasattr(self, 'exog_R') and
321                    hasattr(self, 'normalized_cov_params') and
322                    hasattr(self, 'rank')):
323                Q, R = np.linalg.qr(self.wexog)
324                self.exog_Q, self.exog_R = Q, R
325                self.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
326
327                # Cache singular values from R.
328                self.wexog_singular_values = np.linalg.svd(R, 0, 0)
329                self.rank = np.linalg.matrix_rank(R)
330            else:
331                Q, R = self.exog_Q, self.exog_R
332
333            # used in ANOVA
334            self.effects = effects = np.dot(Q.T, self.wendog)
335            beta = np.linalg.solve(R, effects)
336        else:
337            raise ValueError('method has to be "pinv" or "qr"')
338
339        if self._df_model is None:
340            self._df_model = float(self.rank - self.k_constant)
341        if self._df_resid is None:
342            self.df_resid = self.nobs - self.rank
343
344        if isinstance(self, OLS):
345            lfit = OLSResults(
346                self, beta,
347                normalized_cov_params=self.normalized_cov_params,
348                cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t)
349        else:
350            lfit = RegressionResults(
351                self, beta,
352                normalized_cov_params=self.normalized_cov_params,
353                cov_type=cov_type, cov_kwds=cov_kwds, use_t=use_t,
354                **kwargs)
355        return RegressionResultsWrapper(lfit)
356
357    def predict(self, params, exog=None):
358        """
359        Return linear predicted values from a design matrix.
360
361        Parameters
362        ----------
363        params : array_like
364            Parameters of a linear model.
365        exog : array_like, optional
366            Design / exogenous data. Model exog is used if None.
367
368        Returns
369        -------
370        array_like
371            An array of fitted values.
372
373        Notes
374        -----
375        If the model has not yet been fit, params is not optional.
376        """
377        # JP: this does not look correct for GLMAR
378        # SS: it needs its own predict method
379
380        if exog is None:
381            exog = self.exog
382
383        return np.dot(exog, params)
384
385    def get_distribution(self, params, scale, exog=None, dist_class=None):
386        """
387        Construct a random number generator for the predictive distribution.
388
389        Parameters
390        ----------
391        params : array_like
392            The model parameters (regression coefficients).
393        scale : scalar
394            The variance parameter.
395        exog : array_like
396            The predictor variable matrix.
397        dist_class : class
398            A random number generator class.  Must take 'loc' and 'scale'
399            as arguments and return a random number generator implementing
400            an ``rvs`` method for simulating random values. Defaults to normal.
401
402        Returns
403        -------
404        gen
405            Frozen random number generator object with mean and variance
406            determined by the fitted linear model.  Use the ``rvs`` method
407            to generate random values.
408
409        Notes
410        -----
411        Due to the behavior of ``scipy.stats.distributions objects``,
412        the returned random number generator must be called with
413        ``gen.rvs(n)`` where ``n`` is the number of observations in
414        the data set used to fit the model.  If any other value is
415        used for ``n``, misleading results will be produced.
416        """
417        fit = self.predict(params, exog)
418        if dist_class is None:
419            from scipy.stats.distributions import norm
420            dist_class = norm
421        gen = dist_class(loc=fit, scale=np.sqrt(scale))
422        return gen
423
424
425class GLS(RegressionModel):
426    __doc__ = r"""
427    Generalized Least Squares
428
429    %(params)s
430    sigma : scalar or array
431        The array or scalar `sigma` is the weighting matrix of the covariance.
432        The default is None for no scaling.  If `sigma` is a scalar, it is
433        assumed that `sigma` is an n x n diagonal matrix with the given
434        scalar, `sigma` as the value of each diagonal element.  If `sigma`
435        is an n-length vector, then `sigma` is assumed to be a diagonal
436        matrix with the given `sigma` on the diagonal.  This should be the
437        same as WLS.
438    %(extra_params)s
439
440    Attributes
441    ----------
442    pinv_wexog : ndarray
443        `pinv_wexog` is the p x n Moore-Penrose pseudoinverse of `wexog`.
444    cholsimgainv : ndarray
445        The transpose of the Cholesky decomposition of the pseudoinverse.
446    df_model : float
447        p - 1, where p is the number of regressors including the intercept.
448        of freedom.
449    df_resid : float
450        Number of observations n less the number of parameters p.
451    llf : float
452        The value of the likelihood function of the fitted model.
453    nobs : float
454        The number of observations n.
455    normalized_cov_params : ndarray
456        p x p array :math:`(X^{T}\Sigma^{-1}X)^{-1}`
457    results : RegressionResults instance
458        A property that returns the RegressionResults class if fit.
459    sigma : ndarray
460        `sigma` is the n x n covariance structure of the error terms.
461    wexog : ndarray
462        Design matrix whitened by `cholsigmainv`
463    wendog : ndarray
464        Response variable whitened by `cholsigmainv`
465
466    See Also
467    --------
468    WLS : Fit a linear model using Weighted Least Squares.
469    OLS : Fit a linear model using Ordinary Least Squares.
470
471    Notes
472    -----
473    If sigma is a function of the data making one of the regressors
474    a constant, then the current postestimation statistics will not be correct.
475
476    Examples
477    --------
478    >>> import statsmodels.api as sm
479    >>> data = sm.datasets.longley.load()
480    >>> data.exog = sm.add_constant(data.exog)
481    >>> ols_resid = sm.OLS(data.endog, data.exog).fit().resid
482    >>> res_fit = sm.OLS(ols_resid[1:], ols_resid[:-1]).fit()
483    >>> rho = res_fit.params
484
485    `rho` is a consistent estimator of the correlation of the residuals from
486    an OLS fit of the longley data.  It is assumed that this is the true rho
487    of the AR process data.
488
489    >>> from scipy.linalg import toeplitz
490    >>> order = toeplitz(np.arange(16))
491    >>> sigma = rho**order
492
493    `sigma` is an n x n matrix of the autocorrelation structure of the
494    data.
495
496    >>> gls_model = sm.GLS(data.endog, data.exog, sigma=sigma)
497    >>> gls_results = gls_model.fit()
498    >>> print(gls_results.summary())
499    """ % {'params': base._model_params_doc,
500           'extra_params': base._missing_param_doc + base._extra_param_doc}
501
502    def __init__(self, endog, exog, sigma=None, missing='none', hasconst=None,
503                 **kwargs):
504        if type(self) is GLS:
505            self._check_kwargs(kwargs)
506        # TODO: add options igls, for iterative fgls if sigma is None
507        # TODO: default if sigma is none should be two-step GLS
508        sigma, cholsigmainv = _get_sigma(sigma, len(endog))
509
510        super(GLS, self).__init__(endog, exog, missing=missing,
511                                  hasconst=hasconst, sigma=sigma,
512                                  cholsigmainv=cholsigmainv, **kwargs)
513
514        # store attribute names for data arrays
515        self._data_attr.extend(['sigma', 'cholsigmainv'])
516
517    def whiten(self, x):
518        """
519        GLS whiten method.
520
521        Parameters
522        ----------
523        x : array_like
524            Data to be whitened.
525
526        Returns
527        -------
528        ndarray
529            The value np.dot(cholsigmainv,X).
530
531        See Also
532        --------
533        GLS : Fit a linear model using Generalized Least Squares.
534        """
535        x = np.asarray(x)
536        if self.sigma is None or self.sigma.shape == ():
537            return x
538        elif self.sigma.ndim == 1:
539            if x.ndim == 1:
540                return x * self.cholsigmainv
541            else:
542                return x * self.cholsigmainv[:, None]
543        else:
544            return np.dot(self.cholsigmainv, x)
545
546    def loglike(self, params):
547        r"""
548        Compute the value of the Gaussian log-likelihood function at params.
549
550        Given the whitened design matrix, the log-likelihood is evaluated
551        at the parameter vector `params` for the dependent variable `endog`.
552
553        Parameters
554        ----------
555        params : array_like
556            The model parameters.
557
558        Returns
559        -------
560        float
561            The value of the log-likelihood function for a GLS Model.
562
563        Notes
564        -----
565        The log-likelihood function for the normal distribution is
566
567        .. math:: -\frac{n}{2}\log\left(\left(Y-\hat{Y}\right)^{\prime}
568                   \left(Y-\hat{Y}\right)\right)
569                  -\frac{n}{2}\left(1+\log\left(\frac{2\pi}{n}\right)\right)
570                  -\frac{1}{2}\log\left(\left|\Sigma\right|\right)
571
572        Y and Y-hat are whitened.
573        """
574        # TODO: combine this with OLS/WLS loglike and add _det_sigma argument
575        nobs2 = self.nobs / 2.0
576        SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0)
577        llf = -np.log(SSR) * nobs2      # concentrated likelihood
578        llf -= (1+np.log(np.pi/nobs2))*nobs2  # with likelihood constant
579        if np.any(self.sigma):
580            # FIXME: robust-enough check? unneeded if _det_sigma gets defined
581            if self.sigma.ndim == 2:
582                det = np.linalg.slogdet(self.sigma)
583                llf -= .5*det[1]
584            else:
585                llf -= 0.5*np.sum(np.log(self.sigma))
586            # with error covariance matrix
587        return llf
588
589    def hessian_factor(self, params, scale=None, observed=True):
590        """
591        Compute weights for calculating Hessian.
592
593        Parameters
594        ----------
595        params : ndarray
596            The parameter at which Hessian is evaluated.
597        scale : None or float
598            If scale is None, then the default scale will be calculated.
599            Default scale is defined by `self.scaletype` and set in fit.
600            If scale is not None, then it is used as a fixed scale.
601        observed : bool
602            If True, then the observed Hessian is returned. If false then the
603            expected information matrix is returned.
604
605        Returns
606        -------
607        ndarray
608            A 1d weight vector used in the calculation of the Hessian.
609            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`.
610        """
611
612        if self.sigma is None or self.sigma.shape == ():
613            return np.ones(self.exog.shape[0])
614        elif self.sigma.ndim == 1:
615            return self.cholsigmainv
616        else:
617            return np.diag(self.cholsigmainv)
618
619    @Appender(_fit_regularized_doc)
620    def fit_regularized(self, method="elastic_net", alpha=0.,
621                        L1_wt=1., start_params=None, profile_scale=False,
622                        refit=False, **kwargs):
623        if not np.isscalar(alpha):
624            alpha = np.asarray(alpha)
625        # Need to adjust since RSS/n term in elastic net uses nominal
626        # n in denominator
627        if self.sigma is not None:
628            if self.sigma.ndim == 2:
629                var_obs = np.diag(self.sigma)
630            elif self.sigma.ndim == 1:
631                var_obs = self.sigma
632            else:
633                raise ValueError("sigma should be 1-dim or 2-dim")
634
635            alpha = alpha * np.sum(1 / var_obs) / len(self.endog)
636
637        rslt = OLS(self.wendog, self.wexog).fit_regularized(
638            method=method, alpha=alpha,
639            L1_wt=L1_wt,
640            start_params=start_params,
641            profile_scale=profile_scale,
642            refit=refit, **kwargs)
643
644        from statsmodels.base.elastic_net import (
645            RegularizedResults,
646            RegularizedResultsWrapper,
647        )
648        rrslt = RegularizedResults(self, rslt.params)
649        return RegularizedResultsWrapper(rrslt)
650
651
652class WLS(RegressionModel):
653    __doc__ = """
654    Weighted Least Squares
655
656    The weights are presumed to be (proportional to) the inverse of
657    the variance of the observations.  That is, if the variables are
658    to be transformed by 1/sqrt(W) you must supply weights = 1/W.
659
660    %(params)s
661    weights : array_like, optional
662        A 1d array of weights.  If you supply 1/W then the variables are
663        pre- multiplied by 1/sqrt(W).  If no weights are supplied the
664        default value is 1 and WLS results are the same as OLS.
665    %(extra_params)s
666
667    Attributes
668    ----------
669    weights : ndarray
670        The stored weights supplied as an argument.
671
672    See Also
673    --------
674    GLS : Fit a linear model using Generalized Least Squares.
675    OLS : Fit a linear model using Ordinary Least Squares.
676
677    Notes
678    -----
679    If the weights are a function of the data, then the post estimation
680    statistics such as fvalue and mse_model might not be correct, as the
681    package does not yet support no-constant regression.
682
683    Examples
684    --------
685    >>> import statsmodels.api as sm
686    >>> Y = [1,3,4,5,2,3,4]
687    >>> X = range(1,8)
688    >>> X = sm.add_constant(X)
689    >>> wls_model = sm.WLS(Y,X, weights=list(range(1,8)))
690    >>> results = wls_model.fit()
691    >>> results.params
692    array([ 2.91666667,  0.0952381 ])
693    >>> results.tvalues
694    array([ 2.0652652 ,  0.35684428])
695    >>> print(results.t_test([1, 0]))
696    <T test: effect=array([ 2.91666667]), sd=array([[ 1.41224801]]), t=array([[ 2.0652652]]), p=array([[ 0.04690139]]), df_denom=5>
697    >>> print(results.f_test([0, 1]))
698    <F test: F=array([[ 0.12733784]]), p=[[ 0.73577409]], df_denom=5, df_num=1>
699    """ % {'params': base._model_params_doc,
700           'extra_params': base._missing_param_doc + base._extra_param_doc}
701
702    def __init__(self, endog, exog, weights=1., missing='none', hasconst=None,
703                 **kwargs):
704        if type(self) is WLS:
705            self._check_kwargs(kwargs)
706        weights = np.array(weights)
707        if weights.shape == ():
708            if (missing == 'drop' and 'missing_idx' in kwargs and
709                    kwargs['missing_idx'] is not None):
710                # patsy may have truncated endog
711                weights = np.repeat(weights, len(kwargs['missing_idx']))
712            else:
713                weights = np.repeat(weights, len(endog))
714        # handle case that endog might be of len == 1
715        if len(weights) == 1:
716            weights = np.array([weights.squeeze()])
717        else:
718            weights = weights.squeeze()
719        super(WLS, self).__init__(endog, exog, missing=missing,
720                                  weights=weights, hasconst=hasconst, **kwargs)
721        nobs = self.exog.shape[0]
722        weights = self.weights
723        if weights.size != nobs and weights.shape[0] != nobs:
724            raise ValueError('Weights must be scalar or same length as design')
725
726    def whiten(self, x):
727        """
728        Whitener for WLS model, multiplies each column by sqrt(self.weights).
729
730        Parameters
731        ----------
732        x : array_like
733            Data to be whitened.
734
735        Returns
736        -------
737        array_like
738            The whitened values sqrt(weights)*X.
739        """
740
741        x = np.asarray(x)
742        if x.ndim == 1:
743            return x * np.sqrt(self.weights)
744        elif x.ndim == 2:
745            return np.sqrt(self.weights)[:, None] * x
746
747    def loglike(self, params):
748        r"""
749        Compute the value of the gaussian log-likelihood function at params.
750
751        Given the whitened design matrix, the log-likelihood is evaluated
752        at the parameter vector `params` for the dependent variable `Y`.
753
754        Parameters
755        ----------
756        params : array_like
757            The parameter estimates.
758
759        Returns
760        -------
761        float
762            The value of the log-likelihood function for a WLS Model.
763
764        Notes
765        --------
766        .. math:: -\frac{n}{2}\log SSR
767                  -\frac{n}{2}\left(1+\log\left(\frac{2\pi}{n}\right)\right)
768                  -\frac{1}{2}\log\left(\left|W\right|\right)
769
770        where :math:`W` is a diagonal weight matrix matrix and
771        :math:`SSR=\left(Y-\hat{Y}\right)^\prime W \left(Y-\hat{Y}\right)` is
772        the sum of the squared weighted residuals.
773        """
774        nobs2 = self.nobs / 2.0
775        SSR = np.sum((self.wendog - np.dot(self.wexog, params))**2, axis=0)
776        llf = -np.log(SSR) * nobs2      # concentrated likelihood
777        llf -= (1+np.log(np.pi/nobs2))*nobs2  # with constant
778        llf += 0.5 * np.sum(np.log(self.weights))
779        return llf
780
781    def hessian_factor(self, params, scale=None, observed=True):
782        """
783        Compute the weights for calculating the Hessian.
784
785        Parameters
786        ----------
787        params : ndarray
788            The parameter at which Hessian is evaluated.
789        scale : None or float
790            If scale is None, then the default scale will be calculated.
791            Default scale is defined by `self.scaletype` and set in fit.
792            If scale is not None, then it is used as a fixed scale.
793        observed : bool
794            If True, then the observed Hessian is returned. If false then the
795            expected information matrix is returned.
796
797        Returns
798        -------
799        ndarray
800            A 1d weight vector used in the calculation of the Hessian.
801            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`.
802        """
803
804        return self.weights
805
806    @Appender(_fit_regularized_doc)
807    def fit_regularized(self, method="elastic_net", alpha=0.,
808                        L1_wt=1., start_params=None, profile_scale=False,
809                        refit=False, **kwargs):
810        # Docstring attached below
811        if not np.isscalar(alpha):
812            alpha = np.asarray(alpha)
813        # Need to adjust since RSS/n in elastic net uses nominal n in
814        # denominator
815        alpha = alpha * np.sum(self.weights) / len(self.weights)
816
817        rslt = OLS(self.wendog, self.wexog).fit_regularized(
818            method=method, alpha=alpha,
819            L1_wt=L1_wt,
820            start_params=start_params,
821            profile_scale=profile_scale,
822            refit=refit, **kwargs)
823
824        from statsmodels.base.elastic_net import (
825            RegularizedResults,
826            RegularizedResultsWrapper,
827        )
828        rrslt = RegularizedResults(self, rslt.params)
829        return RegularizedResultsWrapper(rrslt)
830
831
832class OLS(WLS):
833    __doc__ = """
834    Ordinary Least Squares
835
836    %(params)s
837    %(extra_params)s
838
839    Attributes
840    ----------
841    weights : scalar
842        Has an attribute weights = array(1.0) due to inheritance from WLS.
843
844    See Also
845    --------
846    WLS : Fit a linear model using Weighted Least Squares.
847    GLS : Fit a linear model using Generalized Least Squares.
848
849    Notes
850    -----
851    No constant is added by the model unless you are using formulas.
852
853    Examples
854    --------
855    >>> import statsmodels.api as sm
856    >>> import numpy as np
857    >>> duncan_prestige = sm.datasets.get_rdataset("Duncan", "carData")
858    >>> Y = duncan_prestige.data['income']
859    >>> X = duncan_prestige.data['education']
860    >>> X = sm.add_constant(X)
861    >>> model = sm.OLS(Y,X)
862    >>> results = model.fit()
863    >>> results.params
864    const        10.603498
865    education     0.594859
866    dtype: float64
867
868    >>> results.tvalues
869    const        2.039813
870    education    6.892802
871    dtype: float64
872
873    >>> print(results.t_test([1, 0]))
874                                 Test for Constraints
875    ==============================================================================
876                     coef    std err          t      P>|t|      [0.025      0.975]
877    ------------------------------------------------------------------------------
878    c0            10.6035      5.198      2.040      0.048       0.120      21.087
879    ==============================================================================
880
881    >>> print(results.f_test(np.identity(2)))
882    <F test: F=array([[159.63031026]]), p=1.2607168903696672e-20, df_denom=43, df_num=2>
883    """ % {'params': base._model_params_doc,
884           'extra_params': base._missing_param_doc + base._extra_param_doc}
885
886    def __init__(self, endog, exog=None, missing='none', hasconst=None,
887                 **kwargs):
888        if "weights" in kwargs:
889            msg = ("Weights are not supported in OLS and will be ignored"
890                   "An exception will be raised in the next version.")
891            warnings.warn(msg, ValueWarning)
892        super(OLS, self).__init__(endog, exog, missing=missing,
893                                  hasconst=hasconst, **kwargs)
894        if "weights" in self._init_keys:
895            self._init_keys.remove("weights")
896
897        if type(self) is OLS:
898            self._check_kwargs(kwargs, ["offset"])
899
900    def loglike(self, params, scale=None):
901        """
902        The likelihood function for the OLS model.
903
904        Parameters
905        ----------
906        params : array_like
907            The coefficients with which to estimate the log-likelihood.
908        scale : float or None
909            If None, return the profile (concentrated) log likelihood
910            (profiled over the scale parameter), else return the
911            log-likelihood using the given scale value.
912
913        Returns
914        -------
915        float
916            The likelihood function evaluated at params.
917        """
918        nobs2 = self.nobs / 2.0
919        nobs = float(self.nobs)
920        resid = self.endog - np.dot(self.exog, params)
921        if hasattr(self, 'offset'):
922            resid -= self.offset
923        ssr = np.sum(resid**2)
924        if scale is None:
925            # profile log likelihood
926            llf = -nobs2*np.log(2*np.pi) - nobs2*np.log(ssr / nobs) - nobs2
927        else:
928            # log-likelihood
929            llf = -nobs2 * np.log(2 * np.pi * scale) - ssr / (2*scale)
930        return llf
931
932    def whiten(self, x):
933        """
934        OLS model whitener does nothing.
935
936        Parameters
937        ----------
938        x : array_like
939            Data to be whitened.
940
941        Returns
942        -------
943        array_like
944            The input array unmodified.
945
946        See Also
947        --------
948        OLS : Fit a linear model using Ordinary Least Squares.
949        """
950        return x
951
952    def score(self, params, scale=None):
953        """
954        Evaluate the score function at a given point.
955
956        The score corresponds to the profile (concentrated)
957        log-likelihood in which the scale parameter has been profiled
958        out.
959
960        Parameters
961        ----------
962        params : array_like
963            The parameter vector at which the score function is
964            computed.
965        scale : float or None
966            If None, return the profile (concentrated) log likelihood
967            (profiled over the scale parameter), else return the
968            log-likelihood using the given scale value.
969
970        Returns
971        -------
972        ndarray
973            The score vector.
974        """
975
976        if not hasattr(self, "_wexog_xprod"):
977            self._setup_score_hess()
978
979        xtxb = np.dot(self._wexog_xprod, params)
980        sdr = -self._wexog_x_wendog + xtxb
981
982        if scale is None:
983            ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T,
984                                                  params)
985            ssr += np.dot(params, xtxb)
986            return -self.nobs * sdr / ssr
987        else:
988            return -sdr / scale
989
990    def _setup_score_hess(self):
991        y = self.wendog
992        if hasattr(self, 'offset'):
993            y = y - self.offset
994        self._wendog_xprod = np.sum(y * y)
995        self._wexog_xprod = np.dot(self.wexog.T, self.wexog)
996        self._wexog_x_wendog = np.dot(self.wexog.T, y)
997
998    def hessian(self, params, scale=None):
999        """
1000        Evaluate the Hessian function at a given point.
1001
1002        Parameters
1003        ----------
1004        params : array_like
1005            The parameter vector at which the Hessian is computed.
1006        scale : float or None
1007            If None, return the profile (concentrated) log likelihood
1008            (profiled over the scale parameter), else return the
1009            log-likelihood using the given scale value.
1010
1011        Returns
1012        -------
1013        ndarray
1014            The Hessian matrix.
1015        """
1016
1017        if not hasattr(self, "_wexog_xprod"):
1018            self._setup_score_hess()
1019
1020        xtxb = np.dot(self._wexog_xprod, params)
1021
1022        if scale is None:
1023            ssr = self._wendog_xprod - 2 * np.dot(self._wexog_x_wendog.T,
1024                                                  params)
1025            ssr += np.dot(params, xtxb)
1026            ssrp = -2*self._wexog_x_wendog + 2*xtxb
1027            hm = self._wexog_xprod / ssr - np.outer(ssrp, ssrp) / ssr**2
1028            return -self.nobs * hm / 2
1029        else:
1030            return -self._wexog_xprod / scale
1031
1032    def hessian_factor(self, params, scale=None, observed=True):
1033        """
1034        Calculate the weights for the Hessian.
1035
1036        Parameters
1037        ----------
1038        params : ndarray
1039            The parameter at which Hessian is evaluated.
1040        scale : None or float
1041            If scale is None, then the default scale will be calculated.
1042            Default scale is defined by `self.scaletype` and set in fit.
1043            If scale is not None, then it is used as a fixed scale.
1044        observed : bool
1045            If True, then the observed Hessian is returned. If false then the
1046            expected information matrix is returned.
1047
1048        Returns
1049        -------
1050        ndarray
1051            A 1d weight vector used in the calculation of the Hessian.
1052            The hessian is obtained by `(exog.T * hessian_factor).dot(exog)`.
1053        """
1054
1055        return np.ones(self.exog.shape[0])
1056
1057    @Appender(_fit_regularized_doc)
1058    def fit_regularized(self, method="elastic_net", alpha=0.,
1059                        L1_wt=1., start_params=None, profile_scale=False,
1060                        refit=False, **kwargs):
1061
1062        # In the future we could add support for other penalties, e.g. SCAD.
1063        if method not in ("elastic_net", "sqrt_lasso"):
1064            msg = "Unknown method '%s' for fit_regularized" % method
1065            raise ValueError(msg)
1066
1067        # Set default parameters.
1068        defaults = {"maxiter":  50, "cnvrg_tol": 1e-10,
1069                    "zero_tol": 1e-8}
1070        defaults.update(kwargs)
1071
1072        if method == "sqrt_lasso":
1073            from statsmodels.base.elastic_net import (
1074                RegularizedResults,
1075                RegularizedResultsWrapper,
1076            )
1077            params = self._sqrt_lasso(alpha, refit, defaults["zero_tol"])
1078            results = RegularizedResults(self, params)
1079            return RegularizedResultsWrapper(results)
1080
1081        from statsmodels.base.elastic_net import fit_elasticnet
1082
1083        if L1_wt == 0:
1084            return self._fit_ridge(alpha)
1085
1086        # If a scale parameter is passed in, the non-profile
1087        # likelihood (residual sum of squares divided by -2) is used,
1088        # otherwise the profile likelihood is used.
1089        if profile_scale:
1090            loglike_kwds = {}
1091            score_kwds = {}
1092            hess_kwds = {}
1093        else:
1094            loglike_kwds = {"scale": 1}
1095            score_kwds = {"scale": 1}
1096            hess_kwds = {"scale": 1}
1097
1098        return fit_elasticnet(self, method=method,
1099                              alpha=alpha,
1100                              L1_wt=L1_wt,
1101                              start_params=start_params,
1102                              loglike_kwds=loglike_kwds,
1103                              score_kwds=score_kwds,
1104                              hess_kwds=hess_kwds,
1105                              refit=refit,
1106                              check_step=False,
1107                              **defaults)
1108
1109    def _sqrt_lasso(self, alpha, refit, zero_tol):
1110
1111        try:
1112            import cvxopt
1113        except ImportError:
1114            msg = 'sqrt_lasso fitting requires the cvxopt module'
1115            raise ValueError(msg)
1116
1117        n = len(self.endog)
1118        p = self.exog.shape[1]
1119
1120        h0 = cvxopt.matrix(0., (2*p+1, 1))
1121        h1 = cvxopt.matrix(0., (n+1, 1))
1122        h1[1:, 0] = cvxopt.matrix(self.endog, (n, 1))
1123
1124        G0 = cvxopt.spmatrix([], [], [], (2*p+1, 2*p+1))
1125        for i in range(1, 2*p+1):
1126            G0[i, i] = -1
1127        G1 = cvxopt.matrix(0., (n+1, 2*p+1))
1128        G1[0, 0] = -1
1129        G1[1:, 1:p+1] = self.exog
1130        G1[1:, p+1:] = -self.exog
1131
1132        c = cvxopt.matrix(alpha / n, (2*p + 1, 1))
1133        c[0] = 1 / np.sqrt(n)
1134
1135        from cvxopt import solvers
1136        solvers.options["show_progress"] = False
1137
1138        rslt = solvers.socp(c, Gl=G0, hl=h0, Gq=[G1], hq=[h1])
1139        x = np.asarray(rslt['x']).flat
1140        bp = x[1:p+1]
1141        bn = x[p+1:]
1142        params = bp - bn
1143
1144        if not refit:
1145            return params
1146
1147        ii = np.flatnonzero(np.abs(params) > zero_tol)
1148        rfr = OLS(self.endog, self.exog[:, ii]).fit()
1149        params *= 0
1150        params[ii] = rfr.params
1151
1152        return params
1153
1154    def _fit_ridge(self, alpha):
1155        """
1156        Fit a linear model using ridge regression.
1157
1158        Parameters
1159        ----------
1160        alpha : scalar or array_like
1161            The penalty weight.  If a scalar, the same penalty weight
1162            applies to all variables in the model.  If a vector, it
1163            must have the same length as `params`, and contains a
1164            penalty weight for each coefficient.
1165
1166        Notes
1167        -----
1168        Equivalent to fit_regularized with L1_wt = 0 (but implemented
1169        more efficiently).
1170        """
1171
1172        u, s, vt = np.linalg.svd(self.exog, 0)
1173        v = vt.T
1174        q = np.dot(u.T, self.endog) * s
1175        s2 = s * s
1176        if np.isscalar(alpha):
1177            sd = s2 + alpha * self.nobs
1178            params = q / sd
1179            params = np.dot(v, params)
1180        else:
1181            alpha = np.asarray(alpha)
1182            vtav = self.nobs * np.dot(vt, alpha[:, None] * v)
1183            d = np.diag(vtav) + s2
1184            np.fill_diagonal(vtav, d)
1185            r = np.linalg.solve(vtav, q)
1186            params = np.dot(v, r)
1187
1188        from statsmodels.base.elastic_net import RegularizedResults
1189        return RegularizedResults(self, params)
1190
1191
1192class GLSAR(GLS):
1193    __doc__ = """
1194    Generalized Least Squares with AR covariance structure
1195
1196    %(params)s
1197    rho : int
1198        The order of the autoregressive covariance.
1199    %(extra_params)s
1200
1201    Notes
1202    -----
1203    GLSAR is considered to be experimental.
1204    The linear autoregressive process of order p--AR(p)--is defined as:
1205    TODO
1206
1207    Examples
1208    --------
1209    >>> import statsmodels.api as sm
1210    >>> X = range(1,8)
1211    >>> X = sm.add_constant(X)
1212    >>> Y = [1,3,4,5,8,10,9]
1213    >>> model = sm.GLSAR(Y, X, rho=2)
1214    >>> for i in range(6):
1215    ...     results = model.fit()
1216    ...     print("AR coefficients: {0}".format(model.rho))
1217    ...     rho, sigma = sm.regression.yule_walker(results.resid,
1218    ...                                            order=model.order)
1219    ...     model = sm.GLSAR(Y, X, rho)
1220    ...
1221    AR coefficients: [ 0.  0.]
1222    AR coefficients: [-0.52571491 -0.84496178]
1223    AR coefficients: [-0.6104153  -0.86656458]
1224    AR coefficients: [-0.60439494 -0.857867  ]
1225    AR coefficients: [-0.6048218  -0.85846157]
1226    AR coefficients: [-0.60479146 -0.85841922]
1227    >>> results.params
1228    array([-0.66661205,  1.60850853])
1229    >>> results.tvalues
1230    array([ -2.10304127,  21.8047269 ])
1231    >>> print(results.t_test([1, 0]))
1232    <T test: effect=array([-0.66661205]), sd=array([[ 0.31697526]]), t=array([[-2.10304127]]), p=array([[ 0.06309969]]), df_denom=3>
1233    >>> print(results.f_test(np.identity(2)))
1234    <F test: F=array([[ 1815.23061844]]), p=[[ 0.00002372]], df_denom=3, df_num=2>
1235
1236    Or, equivalently
1237
1238    >>> model2 = sm.GLSAR(Y, X, rho=2)
1239    >>> res = model2.iterative_fit(maxiter=6)
1240    >>> model2.rho
1241    array([-0.60479146, -0.85841922])
1242    """ % {'params': base._model_params_doc,
1243           'extra_params': base._missing_param_doc + base._extra_param_doc}
1244    # TODO: Complete docstring
1245
1246    def __init__(self, endog, exog=None, rho=1, missing='none', hasconst=None,
1247                 **kwargs):
1248        # this looks strange, interpreting rho as order if it is int
1249        if isinstance(rho, (int, np.integer)):
1250            self.order = int(rho)
1251            self.rho = np.zeros(self.order, np.float64)
1252        else:
1253            self.rho = np.squeeze(np.asarray(rho))
1254            if len(self.rho.shape) not in [0, 1]:
1255                raise ValueError("AR parameters must be a scalar or a vector")
1256            if self.rho.shape == ():
1257                self.rho.shape = (1,)
1258            self.order = self.rho.shape[0]
1259        if exog is None:
1260            # JP this looks wrong, should be a regression on constant
1261            # results for rho estimate now identical to yule-walker on y
1262            # super(AR, self).__init__(endog, add_constant(endog))
1263            super(GLSAR, self).__init__(endog, np.ones((endog.shape[0], 1)),
1264                                        missing=missing, hasconst=None,
1265                                        **kwargs)
1266        else:
1267            super(GLSAR, self).__init__(endog, exog, missing=missing,
1268                                        **kwargs)
1269
1270    def iterative_fit(self, maxiter=3, rtol=1e-4, **kwargs):
1271        """
1272        Perform an iterative two-stage procedure to estimate a GLS model.
1273
1274        The model is assumed to have AR(p) errors, AR(p) parameters and
1275        regression coefficients are estimated iteratively.
1276
1277        Parameters
1278        ----------
1279        maxiter : int, optional
1280            The number of iterations.
1281        rtol : float, optional
1282            Relative tolerance between estimated coefficients to stop the
1283            estimation.  Stops if max(abs(last - current) / abs(last)) < rtol.
1284        **kwargs
1285            Additional keyword arguments passed to `fit`.
1286
1287        Returns
1288        -------
1289        RegressionResults
1290            The results computed using an iterative fit.
1291        """
1292        # TODO: update this after going through example.
1293        converged = False
1294        i = -1  # need to initialize for maxiter < 1 (skip loop)
1295        history = {'params': [], 'rho': [self.rho]}
1296        for i in range(maxiter - 1):
1297            if hasattr(self, 'pinv_wexog'):
1298                del self.pinv_wexog
1299            self.initialize()
1300            results = self.fit()
1301            history['params'].append(results.params)
1302            if i == 0:
1303                last = results.params
1304            else:
1305                diff = np.max(np.abs(last - results.params) / np.abs(last))
1306                if diff < rtol:
1307                    converged = True
1308                    break
1309                last = results.params
1310            self.rho, _ = yule_walker(results.resid,
1311                                      order=self.order, df=None)
1312            history['rho'].append(self.rho)
1313
1314        # why not another call to self.initialize
1315        # Use kwarg to insert history
1316        if not converged and maxiter > 0:
1317            # maxiter <= 0 just does OLS
1318            if hasattr(self, 'pinv_wexog'):
1319                del self.pinv_wexog
1320            self.initialize()
1321
1322        # if converged then this is a duplicate fit, because we did not
1323        # update rho
1324        results = self.fit(history=history, **kwargs)
1325        results.iter = i + 1
1326        # add last fit to history, not if duplicate fit
1327        if not converged:
1328            results.history['params'].append(results.params)
1329            results.iter += 1
1330
1331        results.converged = converged
1332
1333        return results
1334
1335    def whiten(self, x):
1336        """
1337        Whiten a series of columns according to an AR(p) covariance structure.
1338
1339        Whitening using this method drops the initial p observations.
1340
1341        Parameters
1342        ----------
1343        x : array_like
1344            The data to be whitened.
1345
1346        Returns
1347        -------
1348        ndarray
1349            The whitened data.
1350        """
1351        # TODO: notation for AR process
1352        x = np.asarray(x, np.float64)
1353        _x = x.copy()
1354
1355        # the following loops over the first axis,  works for 1d and nd
1356        for i in range(self.order):
1357            _x[(i + 1):] = _x[(i + 1):] - self.rho[i] * x[0:-(i + 1)]
1358        return _x[self.order:]
1359
1360
1361def yule_walker(x, order=1, method="adjusted", df=None, inv=False,
1362                demean=True):
1363    """
1364    Estimate AR(p) parameters from a sequence using the Yule-Walker equations.
1365
1366    Adjusted or maximum-likelihood estimator (mle)
1367
1368    Parameters
1369    ----------
1370    x : array_like
1371        A 1d array.
1372    order : int, optional
1373        The order of the autoregressive process.  Default is 1.
1374    method : str, optional
1375       Method can be 'adjusted' or 'mle' and this determines
1376       denominator in estimate of autocorrelation function (ACF) at
1377       lag k. If 'mle', the denominator is n=X.shape[0], if 'adjusted'
1378       the denominator is n-k.  The default is adjusted.
1379    df : int, optional
1380       Specifies the degrees of freedom. If `df` is supplied, then it
1381       is assumed the X has `df` degrees of freedom rather than `n`.
1382       Default is None.
1383    inv : bool
1384        If inv is True the inverse of R is also returned.  Default is
1385        False.
1386    demean : bool
1387        True, the mean is subtracted from `X` before estimation.
1388
1389    Returns
1390    -------
1391    rho : ndarray
1392        AR(p) coefficients computed using the Yule-Walker method.
1393    sigma : float
1394        The estimate of the residual standard deviation.
1395
1396    See Also
1397    --------
1398    burg : Burg's AR estimator.
1399
1400    Notes
1401    -----
1402    See https://en.wikipedia.org/wiki/Autoregressive_moving_average_model for
1403    further details.
1404
1405    Examples
1406    --------
1407    >>> import statsmodels.api as sm
1408    >>> from statsmodels.datasets.sunspots import load
1409    >>> data = load()
1410    >>> rho, sigma = sm.regression.yule_walker(data.endog, order=4,
1411    ...                                        method="mle")
1412
1413    >>> rho
1414    array([ 1.28310031, -0.45240924, -0.20770299,  0.04794365])
1415    >>> sigma
1416    16.808022730464351
1417    """
1418    # TODO: define R better, look back at notes and technical notes on YW.
1419    # First link here is useful
1420    # http://www-stat.wharton.upenn.edu/~steele/Courses/956/ResourceDetails/YuleWalkerAndMore.htm
1421
1422    method = string_like(
1423        method, "method", options=("adjusted", "unbiased", "mle")
1424    )
1425    if method == "unbiased":
1426        warnings.warn(
1427            "unbiased is deprecated in factor of adjusted to reflect that the "
1428            "term is adjusting the sample size used in the autocovariance "
1429            "calculation rather than estimating an unbiased autocovariance. "
1430            "After release 0.13, using 'unbiased' will raise.",
1431            FutureWarning,
1432        )
1433        method = "adjusted"
1434
1435    if method not in ("adjusted", "mle"):
1436        raise ValueError("ACF estimation method must be 'adjusted' or 'MLE'")
1437    x = np.array(x, dtype=np.float64)
1438    if demean:
1439        x -= x.mean()
1440    n = df or x.shape[0]
1441
1442    # this handles df_resid ie., n - p
1443    adj_needed = method == "adjusted"
1444
1445    if x.ndim > 1 and x.shape[1] != 1:
1446        raise ValueError("expecting a vector to estimate AR parameters")
1447    r = np.zeros(order+1, np.float64)
1448    r[0] = (x ** 2).sum() / n
1449    for k in range(1, order+1):
1450        r[k] = (x[0:-k] * x[k:]).sum() / (n - k * adj_needed)
1451    R = toeplitz(r[:-1])
1452
1453    rho = np.linalg.solve(R, r[1:])
1454    sigmasq = r[0] - (r[1:]*rho).sum()
1455    sigma = np.sqrt(sigmasq) if not np.isnan(sigmasq) and sigmasq > 0 else np.nan
1456    if inv:
1457        return rho, sigma, np.linalg.inv(R)
1458    else:
1459        return rho, sigma
1460
1461
1462def burg(endog, order=1, demean=True):
1463    """
1464    Compute Burg's AP(p) parameter estimator.
1465
1466    Parameters
1467    ----------
1468    endog : array_like
1469        The endogenous variable.
1470    order : int, optional
1471        Order of the AR.  Default is 1.
1472    demean : bool, optional
1473        Flag indicating to subtract the mean from endog before estimation.
1474
1475    Returns
1476    -------
1477    rho : ndarray
1478        The AR(p) coefficients computed using Burg's algorithm.
1479    sigma2 : float
1480        The estimate of the residual variance.
1481
1482    See Also
1483    --------
1484    yule_walker : Estimate AR parameters using the Yule-Walker method.
1485
1486    Notes
1487    -----
1488    AR model estimated includes a constant that is estimated using the sample
1489    mean (see [1]_). This value is not reported.
1490
1491    References
1492    ----------
1493    .. [1] Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series
1494        and forecasting. Springer.
1495
1496    Examples
1497    --------
1498    >>> import statsmodels.api as sm
1499    >>> from statsmodels.datasets.sunspots import load
1500    >>> data = load()
1501    >>> rho, sigma2 = sm.regression.linear_model.burg(data.endog, order=4)
1502
1503    >>> rho
1504    array([ 1.30934186, -0.48086633, -0.20185982,  0.05501941])
1505    >>> sigma2
1506    271.2467306963966
1507    """
1508    # Avoid circular imports
1509    from statsmodels.tsa.stattools import levinson_durbin_pacf, pacf_burg
1510
1511    endog = np.squeeze(np.asarray(endog))
1512    if endog.ndim != 1:
1513        raise ValueError('endog must be 1-d or squeezable to 1-d.')
1514    order = int(order)
1515    if order < 1:
1516        raise ValueError('order must be an integer larger than 1')
1517    if demean:
1518        endog = endog - endog.mean()
1519    pacf, sigma = pacf_burg(endog, order, demean=demean)
1520    ar, _ = levinson_durbin_pacf(pacf)
1521    return ar, sigma[-1]
1522
1523
1524class RegressionResults(base.LikelihoodModelResults):
1525    r"""
1526    This class summarizes the fit of a linear regression model.
1527
1528    It handles the output of contrasts, estimates of covariance, etc.
1529
1530    Parameters
1531    ----------
1532    model : RegressionModel
1533        The regression model instance.
1534    params : ndarray
1535        The estimated parameters.
1536    normalized_cov_params : ndarray
1537        The normalized covariance parameters.
1538    scale : float
1539        The estimated scale of the residuals.
1540    cov_type : str
1541        The covariance estimator used in the results.
1542    cov_kwds : dict
1543        Additional keywords used in the covariance specification.
1544    use_t : bool
1545        Flag indicating to use the Student's t in inference.
1546    **kwargs
1547        Additional keyword arguments used to initialize the results.
1548
1549    Attributes
1550    ----------
1551    pinv_wexog
1552        See model class docstring for implementation details.
1553    cov_type
1554        Parameter covariance estimator used for standard errors and t-stats.
1555    df_model
1556        Model degrees of freedom. The number of regressors `p`. Does not
1557        include the constant if one is present.
1558    df_resid
1559        Residual degrees of freedom. `n - p - 1`, if a constant is present.
1560        `n - p` if a constant is not included.
1561    het_scale
1562        adjusted squared residuals for heteroscedasticity robust standard
1563        errors. Is only available after `HC#_se` or `cov_HC#` is called.
1564        See HC#_se for more information.
1565    history
1566        Estimation history for iterative estimators.
1567    model
1568        A pointer to the model instance that called fit() or results.
1569    params
1570        The linear coefficients that minimize the least squares
1571        criterion.  This is usually called Beta for the classical
1572        linear model.
1573    """
1574
1575    _cache = {}  # needs to be a class attribute for scale setter?
1576
1577    def __init__(self, model, params, normalized_cov_params=None, scale=1.,
1578                 cov_type='nonrobust', cov_kwds=None, use_t=None, **kwargs):
1579        super(RegressionResults, self).__init__(
1580            model, params, normalized_cov_params, scale)
1581
1582        self._cache = {}
1583        if hasattr(model, 'wexog_singular_values'):
1584            self._wexog_singular_values = model.wexog_singular_values
1585        else:
1586            self._wexog_singular_values = None
1587
1588        self.df_model = model.df_model
1589        self.df_resid = model.df_resid
1590
1591        if cov_type == 'nonrobust':
1592            self.cov_type = 'nonrobust'
1593            self.cov_kwds = {
1594                'description': 'Standard Errors assume that the ' +
1595                'covariance matrix of the errors is correctly ' +
1596                'specified.'}
1597            if use_t is None:
1598                use_t = True  # TODO: class default
1599            self.use_t = use_t
1600        else:
1601            if cov_kwds is None:
1602                cov_kwds = {}
1603            if 'use_t' in cov_kwds:
1604                # TODO: we want to get rid of 'use_t' in cov_kwds
1605                use_t_2 = cov_kwds.pop('use_t')
1606                if use_t is None:
1607                    use_t = use_t_2
1608                # TODO: warn or not?
1609            self.get_robustcov_results(cov_type=cov_type, use_self=True,
1610                                       use_t=use_t, **cov_kwds)
1611        for key in kwargs:
1612            setattr(self, key, kwargs[key])
1613
1614    def conf_int(self, alpha=.05, cols=None):
1615        """
1616        Compute the confidence interval of the fitted parameters.
1617
1618        Parameters
1619        ----------
1620        alpha : float, optional
1621            The `alpha` level for the confidence interval. The default
1622            `alpha` = .05 returns a 95% confidence interval.
1623        cols : array_like, optional
1624            Columns to include in returned confidence intervals.
1625
1626        Returns
1627        -------
1628        array_like
1629            The confidence intervals.
1630
1631        Notes
1632        -----
1633        The confidence interval is based on Student's t-distribution.
1634        """
1635        # keep method for docstring for now
1636        ci = super(RegressionResults, self).conf_int(alpha=alpha, cols=cols)
1637        return ci
1638
1639    @cache_readonly
1640    def nobs(self):
1641        """Number of observations n."""
1642        return float(self.model.wexog.shape[0])
1643
1644    @cache_readonly
1645    def fittedvalues(self):
1646        """The predicted values for the original (unwhitened) design."""
1647        return self.model.predict(self.params, self.model.exog)
1648
1649    @cache_readonly
1650    def wresid(self):
1651        """
1652        The residuals of the transformed/whitened regressand and regressor(s).
1653        """
1654        return self.model.wendog - self.model.predict(
1655            self.params, self.model.wexog)
1656
1657    @cache_readonly
1658    def resid(self):
1659        """The residuals of the model."""
1660        return self.model.endog - self.model.predict(
1661            self.params, self.model.exog)
1662
1663    # TODO: fix writable example
1664    @cache_writable()
1665    def scale(self):
1666        """
1667        A scale factor for the covariance matrix.
1668
1669        The Default value is ssr/(n-p).  Note that the square root of `scale`
1670        is often called the standard error of the regression.
1671        """
1672        wresid = self.wresid
1673        return np.dot(wresid, wresid) / self.df_resid
1674
1675    @cache_readonly
1676    def ssr(self):
1677        """Sum of squared (whitened) residuals."""
1678        wresid = self.wresid
1679        return np.dot(wresid, wresid)
1680
1681    @cache_readonly
1682    def centered_tss(self):
1683        """The total (weighted) sum of squares centered about the mean."""
1684        model = self.model
1685        weights = getattr(model, 'weights', None)
1686        sigma = getattr(model, 'sigma', None)
1687        if weights is not None:
1688            mean = np.average(model.endog, weights=weights)
1689            return np.sum(weights * (model.endog - mean)**2)
1690        elif sigma is not None:
1691            # Exactly matches WLS when sigma is diagonal
1692            iota = np.ones_like(model.endog)
1693            iota = model.whiten(iota)
1694            mean = model.wendog.dot(iota) / iota.dot(iota)
1695            err = model.endog - mean
1696            err = model.whiten(err)
1697            return np.sum(err**2)
1698        else:
1699            centered_endog = model.wendog - model.wendog.mean()
1700            return np.dot(centered_endog, centered_endog)
1701
1702    @cache_readonly
1703    def uncentered_tss(self):
1704        """
1705        Uncentered sum of squares.
1706
1707        The sum of the squared values of the (whitened) endogenous response
1708        variable.
1709        """
1710        wendog = self.model.wendog
1711        return np.dot(wendog, wendog)
1712
1713    @cache_readonly
1714    def ess(self):
1715        """
1716        The explained sum of squares.
1717
1718        If a constant is present, the centered total sum of squares minus the
1719        sum of squared residuals. If there is no constant, the uncentered total
1720        sum of squares is used.
1721        """
1722
1723        if self.k_constant:
1724            return self.centered_tss - self.ssr
1725        else:
1726            return self.uncentered_tss - self.ssr
1727
1728    @cache_readonly
1729    def rsquared(self):
1730        """
1731        R-squared of the model.
1732
1733        This is defined here as 1 - `ssr`/`centered_tss` if the constant is
1734        included in the model and 1 - `ssr`/`uncentered_tss` if the constant is
1735        omitted.
1736        """
1737        if self.k_constant:
1738            return 1 - self.ssr/self.centered_tss
1739        else:
1740            return 1 - self.ssr/self.uncentered_tss
1741
1742    @cache_readonly
1743    def rsquared_adj(self):
1744        """
1745        Adjusted R-squared.
1746
1747        This is defined here as 1 - (`nobs`-1)/`df_resid` * (1-`rsquared`)
1748        if a constant is included and 1 - `nobs`/`df_resid` * (1-`rsquared`) if
1749        no constant is included.
1750        """
1751        return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
1752                    * (1 - self.rsquared))
1753
1754    @cache_readonly
1755    def mse_model(self):
1756        """
1757        Mean squared error the model.
1758
1759        The explained sum of squares divided by the model degrees of freedom.
1760        """
1761        if np.all(self.df_model == 0.0):
1762            return np.full_like(self.ess, np.nan)
1763        return self.ess/self.df_model
1764
1765    @cache_readonly
1766    def mse_resid(self):
1767        """
1768        Mean squared error of the residuals.
1769
1770        The sum of squared residuals divided by the residual degrees of
1771        freedom.
1772        """
1773        if np.all(self.df_resid == 0.0):
1774            return np.full_like(self.ssr, np.nan)
1775        return self.ssr/self.df_resid
1776
1777    @cache_readonly
1778    def mse_total(self):
1779        """
1780        Total mean squared error.
1781
1782        The uncentered total sum of squares divided by the number of
1783        observations.
1784        """
1785        if np.all(self.df_resid + self.df_model == 0.0):
1786            return np.full_like(self.centered_tss, np.nan)
1787        if self.k_constant:
1788            return self.centered_tss / (self.df_resid + self.df_model)
1789        else:
1790            return self.uncentered_tss / (self.df_resid + self.df_model)
1791
1792    @cache_readonly
1793    def fvalue(self):
1794        """
1795        F-statistic of the fully specified model.
1796
1797        Calculated as the mean squared error of the model divided by the mean
1798        squared error of the residuals if the nonrobust covariance is used.
1799        Otherwise computed using a Wald-like quadratic form that tests whether
1800        all coefficients (excluding the constant) are zero.
1801        """
1802        if hasattr(self, 'cov_type') and self.cov_type != 'nonrobust':
1803            # with heteroscedasticity or correlation robustness
1804            k_params = self.normalized_cov_params.shape[0]
1805            mat = np.eye(k_params)
1806            const_idx = self.model.data.const_idx
1807            # TODO: What if model includes implicit constant, e.g. all
1808            #       dummies but no constant regressor?
1809            # TODO: Restats as LM test by projecting orthogonalizing
1810            #       to constant?
1811            if self.model.data.k_constant == 1:
1812                # if constant is implicit, return nan see #2444
1813                if const_idx is None:
1814                    return np.nan
1815
1816                idx = lrange(k_params)
1817                idx.pop(const_idx)
1818                mat = mat[idx]  # remove constant
1819                if mat.size == 0:  # see  #3642
1820                    return np.nan
1821            ft = self.f_test(mat)
1822            # using backdoor to set another attribute that we already have
1823            self._cache['f_pvalue'] = float(ft.pvalue)
1824            return float(ft.fvalue)
1825        else:
1826            # for standard homoscedastic case
1827            return self.mse_model/self.mse_resid
1828
1829    @cache_readonly
1830    def f_pvalue(self):
1831        """The p-value of the F-statistic."""
1832        # Special case for df_model 0
1833        if self.df_model == 0:
1834            return np.full_like(self.fvalue, np.nan)
1835        return stats.f.sf(self.fvalue, self.df_model, self.df_resid)
1836
1837    @cache_readonly
1838    def bse(self):
1839        """The standard errors of the parameter estimates."""
1840        return np.sqrt(np.diag(self.cov_params()))
1841
1842    @cache_readonly
1843    def aic(self):
1844        r"""
1845        Akaike's information criteria.
1846
1847        For a model with a constant :math:`-2llf + 2(df\_model + 1)`. For a
1848        model without a constant :math:`-2llf + 2(df\_model)`.
1849        """
1850        return self.info_criteria("aic")
1851
1852    @cache_readonly
1853    def bic(self):
1854        r"""
1855        Bayes' information criteria.
1856
1857        For a model with a constant :math:`-2llf + \log(n)(df\_model+1)`.
1858        For a model without a constant :math:`-2llf + \log(n)(df\_model)`.
1859        """
1860        return self.info_criteria("bic")
1861
1862    def info_criteria(self, crit, dk_params=0):
1863        """Return an information criterion for the model.
1864
1865        Parameters
1866        ----------
1867        crit : string
1868            One of 'aic', 'bic', 'aicc' or 'hqic'.
1869        dk_params : int or float
1870            Correction to the number of parameters used in the information
1871            criterion. By default, only mean parameters are included, the
1872            scale parameter is not included in the parameter count.
1873            Use ``dk_params=1`` to include scale in the parameter count.
1874
1875        Returns the given information criterion value.
1876
1877        References
1878        ----------
1879        Burnham KP, Anderson KR (2002). Model Selection and Multimodel
1880        Inference; Springer New York.
1881        """
1882        crit = crit.lower()
1883        k_params = self.df_model + self.k_constant + dk_params
1884
1885        if crit == "aic":
1886            return -2 * self.llf + 2 * k_params
1887        elif crit == "bic":
1888            bic = -2*self.llf + np.log(self.nobs) * k_params
1889            return bic
1890        elif crit == "aicc":
1891            from statsmodels.tools.eval_measures import aicc
1892            return aicc(self.llf, self.nobs, k_params)
1893        elif crit == "hqic":
1894            from statsmodels.tools.eval_measures import hqic
1895            return hqic(self.llf, self.nobs, k_params)
1896
1897    @cache_readonly
1898    def eigenvals(self):
1899        """
1900        Return eigenvalues sorted in decreasing order.
1901        """
1902        if self._wexog_singular_values is not None:
1903            eigvals = self._wexog_singular_values ** 2
1904        else:
1905            eigvals = np.linalg.linalg.eigvalsh(np.dot(self.model.wexog.T,
1906                                                       self.model.wexog))
1907        return np.sort(eigvals)[::-1]
1908
1909    @cache_readonly
1910    def condition_number(self):
1911        """
1912        Return condition number of exogenous matrix.
1913
1914        Calculated as ratio of largest to smallest eigenvalue.
1915        """
1916        eigvals = self.eigenvals
1917        return np.sqrt(eigvals[0]/eigvals[-1])
1918
1919    # TODO: make these properties reset bse
1920    def _HCCM(self, scale):
1921        H = np.dot(self.model.pinv_wexog,
1922                   scale[:, None] * self.model.pinv_wexog.T)
1923        return H
1924
1925    def _abat_diagonal(self, a, b):
1926        # equivalent to np.diag(a @ b @ a.T)
1927        return np.einsum('ij,ik,kj->i', a, a, b)
1928
1929    @cache_readonly
1930    def cov_HC0(self):
1931        """
1932        Heteroscedasticity robust covariance matrix. See HC0_se.
1933        """
1934        self.het_scale = self.wresid**2
1935        cov_HC0 = self._HCCM(self.het_scale)
1936        return cov_HC0
1937
1938    @cache_readonly
1939    def cov_HC1(self):
1940        """
1941        Heteroscedasticity robust covariance matrix. See HC1_se.
1942        """
1943        self.het_scale = self.nobs/(self.df_resid)*(self.wresid**2)
1944        cov_HC1 = self._HCCM(self.het_scale)
1945        return cov_HC1
1946
1947    @cache_readonly
1948    def cov_HC2(self):
1949        """
1950        Heteroscedasticity robust covariance matrix. See HC2_se.
1951        """
1952        wexog = self.model.wexog
1953        h = self._abat_diagonal(wexog, self.normalized_cov_params)
1954        self.het_scale = self.wresid**2/(1-h)
1955        cov_HC2 = self._HCCM(self.het_scale)
1956        return cov_HC2
1957
1958    @cache_readonly
1959    def cov_HC3(self):
1960        """
1961        Heteroscedasticity robust covariance matrix. See HC3_se.
1962        """
1963        wexog = self.model.wexog
1964        h = self._abat_diagonal(wexog, self.normalized_cov_params)
1965        self.het_scale = (self.wresid / (1 - h))**2
1966        cov_HC3 = self._HCCM(self.het_scale)
1967        return cov_HC3
1968
1969    @cache_readonly
1970    def HC0_se(self):
1971        """
1972        White's (1980) heteroskedasticity robust standard errors.
1973
1974        Notes
1975        -----
1976        Defined as sqrt(diag(X.T X)^(-1)X.T diag(e_i^(2)) X(X.T X)^(-1)
1977        where e_i = resid[i].
1978
1979        When HC0_se or cov_HC0 is called the RegressionResults instance will
1980        then have another attribute `het_scale`, which is in this case is just
1981        resid**2.
1982        """
1983        return np.sqrt(np.diag(self.cov_HC0))
1984
1985    @cache_readonly
1986    def HC1_se(self):
1987        """
1988        MacKinnon and White's (1985) heteroskedasticity robust standard errors.
1989
1990        Notes
1991        -----
1992        Defined as sqrt(diag(n/(n-p)*HC_0).
1993
1994        When HC1_se or cov_HC1 is called the RegressionResults instance will
1995        then have another attribute `het_scale`, which is in this case is
1996        n/(n-p)*resid**2.
1997        """
1998        return np.sqrt(np.diag(self.cov_HC1))
1999
2000    @cache_readonly
2001    def HC2_se(self):
2002        """
2003        MacKinnon and White's (1985) heteroskedasticity robust standard errors.
2004
2005        Notes
2006        -----
2007        Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)) X(X.T X)^(-1)
2008        where h_ii = x_i(X.T X)^(-1)x_i.T
2009
2010        When HC2_se or cov_HC2 is called the RegressionResults instance will
2011        then have another attribute `het_scale`, which is in this case is
2012        resid^(2)/(1-h_ii).
2013        """
2014        return np.sqrt(np.diag(self.cov_HC2))
2015
2016    @cache_readonly
2017    def HC3_se(self):
2018        """
2019        MacKinnon and White's (1985) heteroskedasticity robust standard errors.
2020
2021        Notes
2022        -----
2023        Defined as (X.T X)^(-1)X.T diag(e_i^(2)/(1-h_ii)^(2)) X(X.T X)^(-1)
2024        where h_ii = x_i(X.T X)^(-1)x_i.T.
2025
2026        When HC3_se or cov_HC3 is called the RegressionResults instance will
2027        then have another attribute `het_scale`, which is in this case is
2028        resid^(2)/(1-h_ii)^(2).
2029        """
2030        return np.sqrt(np.diag(self.cov_HC3))
2031
2032    @cache_readonly
2033    def resid_pearson(self):
2034        """
2035        Residuals, normalized to have unit variance.
2036
2037        Returns
2038        -------
2039        array_like
2040            The array `wresid` normalized by the sqrt of the scale to have
2041            unit variance.
2042        """
2043
2044        if not hasattr(self, 'resid'):
2045            raise ValueError('Method requires residuals.')
2046        eps = np.finfo(self.wresid.dtype).eps
2047        if np.sqrt(self.scale) < 10 * eps * self.model.endog.mean():
2048            # do not divide if scale is zero close to numerical precision
2049            warnings.warn(
2050                "All residuals are 0, cannot compute normed residuals.",
2051                RuntimeWarning
2052            )
2053            return self.wresid
2054        else:
2055            return self.wresid / np.sqrt(self.scale)
2056
2057    def _is_nested(self, restricted):
2058        """
2059        Parameters
2060        ----------
2061        restricted : Result instance
2062            The restricted model is assumed to be nested in the current
2063            model. The result instance of the restricted model is required to
2064            have two attributes, residual sum of squares, `ssr`, residual
2065            degrees of freedom, `df_resid`.
2066
2067        Returns
2068        -------
2069        nested : bool
2070            True if nested, otherwise false
2071
2072        Notes
2073        -----
2074        A most nests another model if the regressors in the smaller
2075        model are spanned by the regressors in the larger model and
2076        the regressand is identical.
2077        """
2078
2079        if self.model.nobs != restricted.model.nobs:
2080            return False
2081
2082        full_rank = self.model.rank
2083        restricted_rank = restricted.model.rank
2084        if full_rank <= restricted_rank:
2085            return False
2086
2087        restricted_exog = restricted.model.wexog
2088        full_wresid = self.wresid
2089
2090        scores = restricted_exog * full_wresid[:, None]
2091        score_l2 = np.sqrt(np.mean(scores.mean(0) ** 2))
2092        # TODO: Could be improved, and may fail depending on scale of
2093        # regressors
2094        return np.allclose(score_l2, 0)
2095
2096    def compare_lm_test(self, restricted, demean=True, use_lr=False):
2097        """
2098        Use Lagrange Multiplier test to test a set of linear restrictions.
2099
2100        Parameters
2101        ----------
2102        restricted : Result instance
2103            The restricted model is assumed to be nested in the
2104            current model. The result instance of the restricted model
2105            is required to have two attributes, residual sum of
2106            squares, `ssr`, residual degrees of freedom, `df_resid`.
2107        demean : bool
2108            Flag indicating whether the demean the scores based on the
2109            residuals from the restricted model.  If True, the covariance of
2110            the scores are used and the LM test is identical to the large
2111            sample version of the LR test.
2112        use_lr : bool
2113            A flag indicating whether to estimate the covariance of the model
2114            scores using the unrestricted model. Setting the to True improves
2115            the power of the test.
2116
2117        Returns
2118        -------
2119        lm_value : float
2120            The test statistic which has a chi2 distributed.
2121        p_value : float
2122            The p-value of the test statistic.
2123        df_diff : int
2124            The degrees of freedom of the restriction, i.e. difference in df
2125            between models.
2126
2127        Notes
2128        -----
2129        The LM test examines whether the scores from the restricted model are
2130        0. If the null is true, and the restrictions are valid, then the
2131        parameters of the restricted model should be close to the minimum of
2132        the sum of squared errors, and so the scores should be close to zero,
2133        on average.
2134        """
2135        from numpy.linalg import inv
2136
2137        import statsmodels.stats.sandwich_covariance as sw
2138
2139        if not self._is_nested(restricted):
2140            raise ValueError("Restricted model is not nested by full model.")
2141
2142        wresid = restricted.wresid
2143        wexog = self.model.wexog
2144        scores = wexog * wresid[:, None]
2145
2146        n = self.nobs
2147        df_full = self.df_resid
2148        df_restr = restricted.df_resid
2149        df_diff = (df_restr - df_full)
2150
2151        s = scores.mean(axis=0)
2152        if use_lr:
2153            scores = wexog * self.wresid[:, None]
2154            demean = False
2155
2156        if demean:
2157            scores = scores - scores.mean(0)[None, :]
2158        # Form matters here.  If homoskedastics can be sigma^2 (X'X)^-1
2159        # If Heteroskedastic then the form below is fine
2160        # If HAC then need to use HAC
2161        # If Cluster, should use cluster
2162
2163        cov_type = getattr(self, 'cov_type', 'nonrobust')
2164        if cov_type == 'nonrobust':
2165            sigma2 = np.mean(wresid**2)
2166            xpx = np.dot(wexog.T, wexog) / n
2167            s_inv = inv(sigma2 * xpx)
2168        elif cov_type in ('HC0', 'HC1', 'HC2', 'HC3'):
2169            s_inv = inv(np.dot(scores.T, scores) / n)
2170        elif cov_type == 'HAC':
2171            maxlags = self.cov_kwds['maxlags']
2172            s_inv = inv(sw.S_hac_simple(scores, maxlags) / n)
2173        elif cov_type == 'cluster':
2174            # cluster robust standard errors
2175            groups = self.cov_kwds['groups']
2176            # TODO: Might need demean option in S_crosssection by group?
2177            s_inv = inv(sw.S_crosssection(scores, groups))
2178        else:
2179            raise ValueError('Only nonrobust, HC, HAC and cluster are ' +
2180                             'currently connected')
2181
2182        lm_value = n * (s @ s_inv @ s.T)
2183        p_value = stats.chi2.sf(lm_value, df_diff)
2184        return lm_value, p_value, df_diff
2185
2186    def compare_f_test(self, restricted):
2187        """
2188        Use F test to test whether restricted model is correct.
2189
2190        Parameters
2191        ----------
2192        restricted : Result instance
2193            The restricted model is assumed to be nested in the
2194            current model. The result instance of the restricted model
2195            is required to have two attributes, residual sum of
2196            squares, `ssr`, residual degrees of freedom, `df_resid`.
2197
2198        Returns
2199        -------
2200        f_value : float
2201            The test statistic which has an F distribution.
2202        p_value : float
2203            The p-value of the test statistic.
2204        df_diff : int
2205            The degrees of freedom of the restriction, i.e. difference in
2206            df between models.
2207
2208        Notes
2209        -----
2210        See mailing list discussion October 17,
2211
2212        This test compares the residual sum of squares of the two
2213        models.  This is not a valid test, if there is unspecified
2214        heteroscedasticity or correlation. This method will issue a
2215        warning if this is detected but still return the results under
2216        the assumption of homoscedasticity and no autocorrelation
2217        (sphericity).
2218        """
2219
2220        has_robust1 = getattr(self, 'cov_type', 'nonrobust') != 'nonrobust'
2221        has_robust2 = (getattr(restricted, 'cov_type', 'nonrobust') !=
2222                       'nonrobust')
2223
2224        if has_robust1 or has_robust2:
2225            warnings.warn('F test for comparison is likely invalid with ' +
2226                          'robust covariance, proceeding anyway',
2227                          InvalidTestWarning)
2228
2229        ssr_full = self.ssr
2230        ssr_restr = restricted.ssr
2231        df_full = self.df_resid
2232        df_restr = restricted.df_resid
2233
2234        df_diff = (df_restr - df_full)
2235        f_value = (ssr_restr - ssr_full) / df_diff / ssr_full * df_full
2236        p_value = stats.f.sf(f_value, df_diff, df_full)
2237        return f_value, p_value, df_diff
2238
2239    def compare_lr_test(self, restricted, large_sample=False):
2240        """
2241        Likelihood ratio test to test whether restricted model is correct.
2242
2243        Parameters
2244        ----------
2245        restricted : Result instance
2246            The restricted model is assumed to be nested in the current model.
2247            The result instance of the restricted model is required to have two
2248            attributes, residual sum of squares, `ssr`, residual degrees of
2249            freedom, `df_resid`.
2250
2251        large_sample : bool
2252            Flag indicating whether to use a heteroskedasticity robust version
2253            of the LR test, which is a modified LM test.
2254
2255        Returns
2256        -------
2257        lr_stat : float
2258            The likelihood ratio which is chisquare distributed with df_diff
2259            degrees of freedom.
2260        p_value : float
2261            The p-value of the test statistic.
2262        df_diff : int
2263            The degrees of freedom of the restriction, i.e. difference in df
2264            between models.
2265
2266        Notes
2267        -----
2268        The exact likelihood ratio is valid for homoskedastic data,
2269        and is defined as
2270
2271        .. math:: D=-2\\log\\left(\\frac{\\mathcal{L}_{null}}
2272           {\\mathcal{L}_{alternative}}\\right)
2273
2274        where :math:`\\mathcal{L}` is the likelihood of the
2275        model. With :math:`D` distributed as chisquare with df equal
2276        to difference in number of parameters or equivalently
2277        difference in residual degrees of freedom.
2278
2279        The large sample version of the likelihood ratio is defined as
2280
2281        .. math:: D=n s^{\\prime}S^{-1}s
2282
2283        where :math:`s=n^{-1}\\sum_{i=1}^{n} s_{i}`
2284
2285        .. math:: s_{i} = x_{i,alternative} \\epsilon_{i,null}
2286
2287        is the average score of the model evaluated using the
2288        residuals from null model and the regressors from the
2289        alternative model and :math:`S` is the covariance of the
2290        scores, :math:`s_{i}`.  The covariance of the scores is
2291        estimated using the same estimator as in the alternative
2292        model.
2293
2294        This test compares the loglikelihood of the two models.  This
2295        may not be a valid test, if there is unspecified
2296        heteroscedasticity or correlation. This method will issue a
2297        warning if this is detected but still return the results
2298        without taking unspecified heteroscedasticity or correlation
2299        into account.
2300
2301        This test compares the loglikelihood of the two models.  This
2302        may not be a valid test, if there is unspecified
2303        heteroscedasticity or correlation. This method will issue a
2304        warning if this is detected but still return the results
2305        without taking unspecified heteroscedasticity or correlation
2306        into account.
2307
2308        is the average score of the model evaluated using the
2309        residuals from null model and the regressors from the
2310        alternative model and :math:`S` is the covariance of the
2311        scores, :math:`s_{i}`.  The covariance of the scores is
2312        estimated using the same estimator as in the alternative
2313        model.
2314        """
2315        # TODO: put into separate function, needs tests
2316
2317        # See mailing list discussion October 17,
2318
2319        if large_sample:
2320            return self.compare_lm_test(restricted, use_lr=True)
2321
2322        has_robust1 = (getattr(self, 'cov_type', 'nonrobust') != 'nonrobust')
2323        has_robust2 = (
2324            getattr(restricted, 'cov_type', 'nonrobust') != 'nonrobust')
2325
2326        if has_robust1 or has_robust2:
2327            warnings.warn('Likelihood Ratio test is likely invalid with ' +
2328                          'robust covariance, proceeding anyway',
2329                          InvalidTestWarning)
2330
2331        llf_full = self.llf
2332        llf_restr = restricted.llf
2333        df_full = self.df_resid
2334        df_restr = restricted.df_resid
2335
2336        lrdf = (df_restr - df_full)
2337        lrstat = -2*(llf_restr - llf_full)
2338        lr_pvalue = stats.chi2.sf(lrstat, lrdf)
2339
2340        return lrstat, lr_pvalue, lrdf
2341
2342    def get_robustcov_results(self, cov_type='HC1', use_t=None, **kwargs):
2343        """
2344        Create new results instance with robust covariance as default.
2345
2346        Parameters
2347        ----------
2348        cov_type : str
2349            The type of robust sandwich estimator to use. See Notes below.
2350        use_t : bool
2351            If true, then the t distribution is used for inference.
2352            If false, then the normal distribution is used.
2353            If `use_t` is None, then an appropriate default is used, which is
2354            `True` if the cov_type is nonrobust, and `False` in all other
2355            cases.
2356        **kwargs
2357            Required or optional arguments for robust covariance calculation.
2358            See Notes below.
2359
2360        Returns
2361        -------
2362        RegressionResults
2363            This method creates a new results instance with the
2364            requested robust covariance as the default covariance of
2365            the parameters.  Inferential statistics like p-values and
2366            hypothesis tests will be based on this covariance matrix.
2367
2368        Notes
2369        -----
2370        The following covariance types and required or optional arguments are
2371        currently available:
2372
2373        - 'fixed scale' uses a predefined scale
2374
2375          ``scale``: float, optional
2376            Argument to set the scale. Default is 1.
2377
2378        - 'HC0', 'HC1', 'HC2', 'HC3': heteroscedasticity robust covariance
2379
2380          - no keyword arguments
2381
2382        - 'HAC': heteroskedasticity-autocorrelation robust covariance
2383
2384          ``maxlag`` :  integer, required
2385            number of lags to use
2386
2387          ``kernel`` : {callable, str}, optional
2388            kernels currently available kernels are ['bartlett', 'uniform'],
2389            default is Bartlett
2390
2391          ``use_correction``: bool, optional
2392            If true, use small sample correction
2393
2394        - 'cluster': clustered covariance estimator
2395
2396          ``groups`` : array_like[int], required :
2397            Integer-valued index of clusters or groups.
2398
2399          ``use_correction``: bool, optional
2400            If True the sandwich covariance is calculated with a small
2401            sample correction.
2402            If False the sandwich covariance is calculated without
2403            small sample correction.
2404
2405          ``df_correction``: bool, optional
2406            If True (default), then the degrees of freedom for the
2407            inferential statistics and hypothesis tests, such as
2408            pvalues, f_pvalue, conf_int, and t_test and f_test, are
2409            based on the number of groups minus one instead of the
2410            total number of observations minus the number of explanatory
2411            variables. `df_resid` of the results instance is also
2412            adjusted. When `use_t` is also True, then pvalues are
2413            computed using the Student's t distribution using the
2414            corrected values. These may differ substantially from
2415            p-values based on the normal is the number of groups is
2416            small.
2417            If False, then `df_resid` of the results instance is not
2418            adjusted.
2419
2420        - 'hac-groupsum': Driscoll and Kraay, heteroscedasticity and
2421          autocorrelation robust covariance for panel data
2422          # TODO: more options needed here
2423
2424          ``time`` : array_like, required
2425            index of time periods
2426          ``maxlag`` : integer, required
2427            number of lags to use
2428          ``kernel`` : {callable, str}, optional
2429            The available kernels are ['bartlett', 'uniform']. The default is
2430            Bartlett.
2431          ``use_correction`` : {False, 'hac', 'cluster'}, optional
2432            If False the the sandwich covariance is calculated without small
2433            sample correction. If `use_correction = 'cluster'` (default),
2434            then the same small sample correction as in the case of
2435            `covtype='cluster'` is used.
2436          ``df_correction`` : bool, optional
2437            The adjustment to df_resid, see cov_type 'cluster' above
2438
2439        - 'hac-panel': heteroscedasticity and autocorrelation robust standard
2440          errors in panel data. The data needs to be sorted in this case, the
2441          time series for each panel unit or cluster need to be stacked. The
2442          membership to a time series of an individual or group can be either
2443          specified by group indicators or by increasing time periods. One of
2444          ``groups`` or ``time`` is required. # TODO: we need more options here
2445
2446          ``groups`` : array_like[int]
2447            indicator for groups
2448          ``time`` : array_like[int]
2449            index of time periods
2450          ``maxlag`` : int, required
2451            number of lags to use
2452          ``kernel`` : {callable, str}, optional
2453            Available kernels are ['bartlett', 'uniform'], default
2454            is Bartlett
2455          ``use_correction`` : {False, 'hac', 'cluster'}, optional
2456            If False the sandwich covariance is calculated without
2457            small sample correction.
2458          ``df_correction`` : bool, optional
2459            Adjustment to df_resid, see cov_type 'cluster' above
2460
2461        **Reminder**: ``use_correction`` in "hac-groupsum" and "hac-panel" is
2462        not bool, needs to be in {False, 'hac', 'cluster'}.
2463
2464        .. todo:: Currently there is no check for extra or misspelled keywords,
2465             except in the case of cov_type `HCx`
2466        """
2467        from statsmodels.base.covtype import descriptions, normalize_cov_type
2468        import statsmodels.stats.sandwich_covariance as sw
2469
2470        cov_type = normalize_cov_type(cov_type)
2471
2472        if 'kernel' in kwargs:
2473            kwargs['weights_func'] = kwargs.pop('kernel')
2474        if 'weights_func' in kwargs and not callable(kwargs['weights_func']):
2475            kwargs['weights_func'] = sw.kernel_dict[kwargs['weights_func']]
2476
2477        # TODO: make separate function that returns a robust cov plus info
2478        use_self = kwargs.pop('use_self', False)
2479        if use_self:
2480            res = self
2481        else:
2482            res = self.__class__(
2483                self.model, self.params,
2484                normalized_cov_params=self.normalized_cov_params,
2485                scale=self.scale)
2486
2487        res.cov_type = cov_type
2488        # use_t might already be defined by the class, and already set
2489        if use_t is None:
2490            use_t = self.use_t
2491        res.cov_kwds = {'use_t': use_t}  # store for information
2492        res.use_t = use_t
2493
2494        adjust_df = False
2495        if cov_type in ['cluster', 'hac-panel', 'hac-groupsum']:
2496            df_correction = kwargs.get('df_correction', None)
2497            # TODO: check also use_correction, do I need all combinations?
2498            if df_correction is not False:  # i.e. in [None, True]:
2499                # user did not explicitely set it to False
2500                adjust_df = True
2501
2502        res.cov_kwds['adjust_df'] = adjust_df
2503
2504        # verify and set kwargs, and calculate cov
2505        # TODO: this should be outsourced in a function so we can reuse it in
2506        #       other models
2507        # TODO: make it DRYer   repeated code for checking kwargs
2508        if cov_type in ['fixed scale', 'fixed_scale']:
2509            res.cov_kwds['description'] = descriptions['fixed_scale']
2510
2511            res.cov_kwds['scale'] = scale = kwargs.get('scale', 1.)
2512            res.cov_params_default = scale * res.normalized_cov_params
2513        elif cov_type.upper() in ('HC0', 'HC1', 'HC2', 'HC3'):
2514            if kwargs:
2515                raise ValueError('heteroscedasticity robust covariance '
2516                                 'does not use keywords')
2517            res.cov_kwds['description'] = descriptions[cov_type.upper()]
2518            res.cov_params_default = getattr(self, 'cov_' + cov_type.upper())
2519        elif cov_type.lower() == 'hac':
2520            # TODO: check if required, default in cov_hac_simple
2521            maxlags = kwargs['maxlags']
2522            res.cov_kwds['maxlags'] = maxlags
2523            weights_func = kwargs.get('weights_func', sw.weights_bartlett)
2524            res.cov_kwds['weights_func'] = weights_func
2525            use_correction = kwargs.get('use_correction', False)
2526            res.cov_kwds['use_correction'] = use_correction
2527            res.cov_kwds['description'] = descriptions['HAC'].format(
2528                maxlags=maxlags,
2529                correction=['without', 'with'][use_correction])
2530
2531            res.cov_params_default = sw.cov_hac_simple(
2532                self, nlags=maxlags, weights_func=weights_func,
2533                use_correction=use_correction)
2534        elif cov_type.lower() == 'cluster':
2535            # cluster robust standard errors, one- or two-way
2536            groups = kwargs['groups']
2537            if not hasattr(groups, 'shape'):
2538                groups = np.asarray(groups).T
2539
2540            if groups.ndim >= 2:
2541                groups = groups.squeeze()
2542
2543            res.cov_kwds['groups'] = groups
2544            use_correction = kwargs.get('use_correction', True)
2545            res.cov_kwds['use_correction'] = use_correction
2546            if groups.ndim == 1:
2547                if adjust_df:
2548                    # need to find number of groups
2549                    # duplicate work
2550                    self.n_groups = n_groups = len(np.unique(groups))
2551                res.cov_params_default = sw.cov_cluster(
2552                    self, groups, use_correction=use_correction)
2553
2554            elif groups.ndim == 2:
2555                if hasattr(groups, 'values'):
2556                    groups = groups.values
2557
2558                if adjust_df:
2559                    # need to find number of groups
2560                    # duplicate work
2561                    n_groups0 = len(np.unique(groups[:, 0]))
2562                    n_groups1 = len(np.unique(groups[:, 1]))
2563                    self.n_groups = (n_groups0, n_groups1)
2564                    n_groups = min(n_groups0, n_groups1)  # use for adjust_df
2565
2566                # Note: sw.cov_cluster_2groups has 3 returns
2567                res.cov_params_default = sw.cov_cluster_2groups(
2568                    self, groups, use_correction=use_correction)[0]
2569            else:
2570                raise ValueError('only two groups are supported')
2571            res.cov_kwds['description'] = descriptions['cluster']
2572
2573        elif cov_type.lower() == 'hac-panel':
2574            # cluster robust standard errors
2575            res.cov_kwds['time'] = time = kwargs.get('time', None)
2576            res.cov_kwds['groups'] = groups = kwargs.get('groups', None)
2577            # TODO: nlags is currently required
2578            # nlags = kwargs.get('nlags', True)
2579            # res.cov_kwds['nlags'] = nlags
2580            # TODO: `nlags` or `maxlags`
2581            res.cov_kwds['maxlags'] = maxlags = kwargs['maxlags']
2582            use_correction = kwargs.get('use_correction', 'hac')
2583            res.cov_kwds['use_correction'] = use_correction
2584            weights_func = kwargs.get('weights_func', sw.weights_bartlett)
2585            res.cov_kwds['weights_func'] = weights_func
2586            if groups is not None:
2587                groups = np.asarray(groups)
2588                tt = (np.nonzero(groups[:-1] != groups[1:])[0] + 1).tolist()
2589                nobs_ = len(groups)
2590            elif time is not None:
2591                time = np.asarray(time)
2592                # TODO: clumsy time index in cov_nw_panel
2593                tt = (np.nonzero(time[1:] < time[:-1])[0] + 1).tolist()
2594                nobs_ = len(time)
2595            else:
2596                raise ValueError('either time or groups needs to be given')
2597            groupidx = lzip([0] + tt, tt + [nobs_])
2598            self.n_groups = n_groups = len(groupidx)
2599            res.cov_params_default = sw.cov_nw_panel(self, maxlags, groupidx,
2600                                                     weights_func=weights_func,
2601                                                     use_correction=use_correction)
2602            res.cov_kwds['description'] = descriptions['HAC-Panel']
2603
2604        elif cov_type.lower() == 'hac-groupsum':
2605            # Driscoll-Kraay standard errors
2606            res.cov_kwds['time'] = time = kwargs['time']
2607            # TODO: nlags is currently required
2608            # nlags = kwargs.get('nlags', True)
2609            # res.cov_kwds['nlags'] = nlags
2610            # TODO: `nlags` or `maxlags`
2611            res.cov_kwds['maxlags'] = maxlags = kwargs['maxlags']
2612            use_correction = kwargs.get('use_correction', 'cluster')
2613            res.cov_kwds['use_correction'] = use_correction
2614            weights_func = kwargs.get('weights_func', sw.weights_bartlett)
2615            res.cov_kwds['weights_func'] = weights_func
2616            if adjust_df:
2617                # need to find number of groups
2618                tt = (np.nonzero(time[1:] < time[:-1])[0] + 1)
2619                self.n_groups = n_groups = len(tt) + 1
2620            res.cov_params_default = sw.cov_nw_groupsum(
2621                self, maxlags, time, weights_func=weights_func,
2622                use_correction=use_correction)
2623            res.cov_kwds['description'] = descriptions['HAC-Groupsum']
2624        else:
2625            raise ValueError('cov_type not recognized. See docstring for ' +
2626                             'available options and spelling')
2627
2628        if adjust_df:
2629            # Note: df_resid is used for scale and others, add new attribute
2630            res.df_resid_inference = n_groups - 1
2631
2632        return res
2633
2634    @Appender(pred.get_prediction.__doc__)
2635    def get_prediction(self, exog=None, transform=True, weights=None,
2636                       row_labels=None, **kwargs):
2637
2638        return pred.get_prediction(
2639            self, exog=exog, transform=transform, weights=weights,
2640            row_labels=row_labels, **kwargs)
2641
2642    def summary(self, yname=None, xname=None, title=None, alpha=.05, slim=False):
2643        """
2644        Summarize the Regression Results.
2645
2646        Parameters
2647        ----------
2648        yname : str, optional
2649            Name of endogenous (response) variable. The Default is `y`.
2650        xname : list[str], optional
2651            Names for the exogenous variables. Default is `var_##` for ## in
2652            the number of regressors. Must match the number of parameters
2653            in the model.
2654        title : str, optional
2655            Title for the top table. If not None, then this replaces the
2656            default title.
2657        alpha : float
2658            The significance level for the confidence intervals.
2659
2660        Returns
2661        -------
2662        Summary
2663            Instance holding the summary tables and text, which can be printed
2664            or converted to various output formats.
2665
2666        See Also
2667        --------
2668        statsmodels.iolib.summary.Summary : A class that holds summary results.
2669        """
2670        from statsmodels.stats.stattools import (
2671            durbin_watson,
2672            jarque_bera,
2673            omni_normtest,
2674            )
2675
2676        jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
2677        omni, omnipv = omni_normtest(self.wresid)
2678
2679        eigvals = self.eigenvals
2680        condno = self.condition_number
2681
2682        # TODO: Avoid adding attributes in non-__init__
2683        self.diagn = dict(jb=jb, jbpv=jbpv, skew=skew, kurtosis=kurtosis,
2684                          omni=omni, omnipv=omnipv, condno=condno,
2685                          mineigval=eigvals[-1])
2686
2687        # TODO not used yet
2688        # diagn_left_header = ['Models stats']
2689        # diagn_right_header = ['Residual stats']
2690
2691        # TODO: requiring list/iterable is a bit annoying
2692        #   need more control over formatting
2693        # TODO: default do not work if it's not identically spelled
2694
2695        top_left = [('Dep. Variable:', None),
2696                    ('Model:', None),
2697                    ('Method:', ['Least Squares']),
2698                    ('Date:', None),
2699                    ('Time:', None),
2700                    ('No. Observations:', None),
2701                    ('Df Residuals:', None),
2702                    ('Df Model:', None),
2703                    ]
2704
2705        if hasattr(self, 'cov_type'):
2706            top_left.append(('Covariance Type:', [self.cov_type]))
2707
2708        rsquared_type = '' if self.k_constant else ' (uncentered)'
2709        top_right = [('R-squared' + rsquared_type + ':',
2710                      ["%#8.3f" % self.rsquared]),
2711                     ('Adj. R-squared' + rsquared_type + ':',
2712                      ["%#8.3f" % self.rsquared_adj]),
2713                     ('F-statistic:', ["%#8.4g" % self.fvalue]),
2714                     ('Prob (F-statistic):', ["%#6.3g" % self.f_pvalue]),
2715                     ('Log-Likelihood:', None),
2716                     ('AIC:', ["%#8.4g" % self.aic]),
2717                     ('BIC:', ["%#8.4g" % self.bic])
2718                     ]
2719
2720        if slim:
2721            slimlist = ['Dep. Variable:', 'Model:', 'No. Observations:',
2722                        'Covariance Type:', 'R-squared:', 'Adj. R-squared:',
2723                        'F-statistic:', 'Prob (F-statistic):']
2724            diagn_left = []
2725            diagn_right = []
2726            top_left = [elem for elem in top_left if elem[0] in slimlist]
2727            top_right = [elem for elem in top_right if elem[0] in slimlist]
2728        else:
2729            diagn_left = [('Omnibus:', ["%#6.3f" % omni]),
2730                          ('Prob(Omnibus):', ["%#6.3f" % omnipv]),
2731                          ('Skew:', ["%#6.3f" % skew]),
2732                          ('Kurtosis:', ["%#6.3f" % kurtosis])
2733                          ]
2734
2735            diagn_right = [('Durbin-Watson:',
2736                            ["%#8.3f" % durbin_watson(self.wresid)]
2737                            ),
2738                           ('Jarque-Bera (JB):', ["%#8.3f" % jb]),
2739                           ('Prob(JB):', ["%#8.3g" % jbpv]),
2740                           ('Cond. No.', ["%#8.3g" % condno])
2741                           ]
2742
2743        if title is None:
2744            title = self.model.__class__.__name__ + ' ' + "Regression Results"
2745
2746        # create summary table instance
2747        from statsmodels.iolib.summary import Summary
2748        smry = Summary()
2749        smry.add_table_2cols(self, gleft=top_left, gright=top_right,
2750                             yname=yname, xname=xname, title=title)
2751        smry.add_table_params(self, yname=yname, xname=xname, alpha=alpha,
2752                              use_t=self.use_t)
2753        if not slim:
2754            smry.add_table_2cols(self, gleft=diagn_left, gright=diagn_right,
2755                                 yname=yname, xname=xname,
2756                                 title="")
2757
2758        # add warnings/notes, added to text format only
2759        etext = []
2760        if not self.k_constant:
2761            etext.append(
2762                "R² is computed without centering (uncentered) since the "
2763                "model does not contain a constant."
2764            )
2765        if hasattr(self, 'cov_type'):
2766            etext.append(self.cov_kwds['description'])
2767        if self.model.exog.shape[0] < self.model.exog.shape[1]:
2768            wstr = "The input rank is higher than the number of observations."
2769            etext.append(wstr)
2770        if eigvals[-1] < 1e-10:
2771            wstr = "The smallest eigenvalue is %6.3g. This might indicate "
2772            wstr += "that there are\n"
2773            wstr += "strong multicollinearity problems or that the design "
2774            wstr += "matrix is singular."
2775            wstr = wstr % eigvals[-1]
2776            etext.append(wstr)
2777        elif condno > 1000:  # TODO: what is recommended?
2778            wstr = "The condition number is large, %6.3g. This might "
2779            wstr += "indicate that there are\n"
2780            wstr += "strong multicollinearity or other numerical "
2781            wstr += "problems."
2782            wstr = wstr % condno
2783            etext.append(wstr)
2784
2785        if etext:
2786            etext = ["[{0}] {1}".format(i + 1, text)
2787                     for i, text in enumerate(etext)]
2788            etext.insert(0, "Notes:")
2789            smry.add_extra_txt(etext)
2790
2791        return smry
2792
2793    def summary2(self, yname=None, xname=None, title=None, alpha=.05,
2794                 float_format="%.4f"):
2795        """
2796        Experimental summary function to summarize the regression results.
2797
2798        Parameters
2799        ----------
2800        yname : str
2801            The name of the dependent variable (optional).
2802        xname : list[str], optional
2803            Names for the exogenous variables. Default is `var_##` for ## in
2804            the number of regressors. Must match the number of parameters
2805            in the model.
2806        title : str, optional
2807            Title for the top table. If not None, then this replaces the
2808            default title.
2809        alpha : float
2810            The significance level for the confidence intervals.
2811        float_format : str
2812            The format for floats in parameters summary.
2813
2814        Returns
2815        -------
2816        Summary
2817            Instance holding the summary tables and text, which can be printed
2818            or converted to various output formats.
2819
2820        See Also
2821        --------
2822        statsmodels.iolib.summary2.Summary
2823            A class that holds summary results.
2824        """
2825        # Diagnostics
2826        from statsmodels.stats.stattools import (
2827            durbin_watson,
2828            jarque_bera,
2829            omni_normtest,
2830        )
2831
2832        jb, jbpv, skew, kurtosis = jarque_bera(self.wresid)
2833        omni, omnipv = omni_normtest(self.wresid)
2834        dw = durbin_watson(self.wresid)
2835        eigvals = self.eigenvals
2836        condno = self.condition_number
2837        eigvals = np.sort(eigvals)  # in increasing order
2838        diagnostic = dict([
2839            ('Omnibus:',  "%.3f" % omni),
2840            ('Prob(Omnibus):', "%.3f" % omnipv),
2841            ('Skew:', "%.3f" % skew),
2842            ('Kurtosis:', "%.3f" % kurtosis),
2843            ('Durbin-Watson:', "%.3f" % dw),
2844            ('Jarque-Bera (JB):', "%.3f" % jb),
2845            ('Prob(JB):', "%.3f" % jbpv),
2846            ('Condition No.:', "%.0f" % condno)
2847            ])
2848
2849        # Summary
2850        from statsmodels.iolib import summary2
2851        smry = summary2.Summary()
2852        smry.add_base(results=self, alpha=alpha, float_format=float_format,
2853                      xname=xname, yname=yname, title=title)
2854        smry.add_dict(diagnostic)
2855
2856        # Warnings
2857        if eigvals[-1] < 1e-10:
2858            warn = "The smallest eigenvalue is %6.3g. This might indicate that\
2859            there are strong multicollinearity problems or that the design\
2860            matrix is singular." % eigvals[-1]
2861            smry.add_text(warn)
2862        if condno > 1000:
2863            warn = "* The condition number is large (%.g). This might indicate \
2864            strong multicollinearity or other numerical problems." % condno
2865            smry.add_text(warn)
2866
2867        return smry
2868
2869
2870class OLSResults(RegressionResults):
2871    """
2872    Results class for for an OLS model.
2873
2874    Parameters
2875    ----------
2876    model : RegressionModel
2877        The regression model instance.
2878    params : ndarray
2879        The estimated parameters.
2880    normalized_cov_params : ndarray
2881        The normalized covariance parameters.
2882    scale : float
2883        The estimated scale of the residuals.
2884    cov_type : str
2885        The covariance estimator used in the results.
2886    cov_kwds : dict
2887        Additional keywords used in the covariance specification.
2888    use_t : bool
2889        Flag indicating to use the Student's t in inference.
2890    **kwargs
2891        Additional keyword arguments used to initialize the results.
2892
2893    See Also
2894    --------
2895    RegressionResults
2896        Results store for WLS and GLW models.
2897
2898    Notes
2899    -----
2900    Most of the methods and attributes are inherited from RegressionResults.
2901    The special methods that are only available for OLS are:
2902
2903    - get_influence
2904    - outlier_test
2905    - el_test
2906    - conf_int_el
2907    """
2908
2909    def get_influence(self):
2910        """
2911        Calculate influence and outlier measures.
2912
2913        Returns
2914        -------
2915        OLSInfluence
2916            The instance containing methods to calculate the main influence and
2917            outlier measures for the OLS regression.
2918
2919        See Also
2920        --------
2921        statsmodels.stats.outliers_influence.OLSInfluence
2922            A class that exposes methods to examine observation influence.
2923        """
2924        from statsmodels.stats.outliers_influence import OLSInfluence
2925        return OLSInfluence(self)
2926
2927    def outlier_test(self, method='bonf', alpha=.05, labels=None,
2928                     order=False, cutoff=None):
2929        """
2930        Test observations for outliers according to method.
2931
2932        Parameters
2933        ----------
2934        method : str
2935            The method to use in the outlier test.  Must be one of:
2936
2937            - `bonferroni` : one-step correction
2938            - `sidak` : one-step correction
2939            - `holm-sidak` :
2940            - `holm` :
2941            - `simes-hochberg` :
2942            - `hommel` :
2943            - `fdr_bh` : Benjamini/Hochberg
2944            - `fdr_by` : Benjamini/Yekutieli
2945
2946            See `statsmodels.stats.multitest.multipletests` for details.
2947        alpha : float
2948            The familywise error rate (FWER).
2949        labels : None or array_like
2950            If `labels` is not None, then it will be used as index to the
2951            returned pandas DataFrame. See also Returns below.
2952        order : bool
2953            Whether or not to order the results by the absolute value of the
2954            studentized residuals. If labels are provided they will also be
2955            sorted.
2956        cutoff : None or float in [0, 1]
2957            If cutoff is not None, then the return only includes observations
2958            with multiple testing corrected p-values strictly below the cutoff.
2959            The returned array or dataframe can be empty if t.
2960
2961        Returns
2962        -------
2963        array_like
2964            Returns either an ndarray or a DataFrame if labels is not None.
2965            Will attempt to get labels from model_results if available. The
2966            columns are the Studentized residuals, the unadjusted p-value,
2967            and the corrected p-value according to method.
2968
2969        Notes
2970        -----
2971        The unadjusted p-value is stats.t.sf(abs(resid), df) where
2972        df = df_resid - 1.
2973        """
2974        from statsmodels.stats.outliers_influence import outlier_test
2975        return outlier_test(self, method, alpha, labels=labels,
2976                            order=order, cutoff=cutoff)
2977
2978    def el_test(self, b0_vals, param_nums, return_weights=0, ret_params=0,
2979                method='nm', stochastic_exog=1):
2980        """
2981        Test single or joint hypotheses using Empirical Likelihood.
2982
2983        Parameters
2984        ----------
2985        b0_vals : 1darray
2986            The hypothesized value of the parameter to be tested.
2987        param_nums : 1darray
2988            The parameter number to be tested.
2989        return_weights : bool
2990            If true, returns the weights that optimize the likelihood
2991            ratio at b0_vals. The default is False.
2992        ret_params : bool
2993            If true, returns the parameter vector that maximizes the likelihood
2994            ratio at b0_vals.  Also returns the weights.  The default is False.
2995        method : str
2996            Can either be 'nm' for Nelder-Mead or 'powell' for Powell.  The
2997            optimization method that optimizes over nuisance parameters.
2998            The default is 'nm'.
2999        stochastic_exog : bool
3000            When True, the exogenous variables are assumed to be stochastic.
3001            When the regressors are nonstochastic, moment conditions are
3002            placed on the exogenous variables.  Confidence intervals for
3003            stochastic regressors are at least as large as non-stochastic
3004            regressors. The default is True.
3005
3006        Returns
3007        -------
3008        tuple
3009            The p-value and -2 times the log-likelihood ratio for the
3010            hypothesized values.
3011
3012        Examples
3013        --------
3014        >>> import statsmodels.api as sm
3015        >>> data = sm.datasets.stackloss.load()
3016        >>> endog = data.endog
3017        >>> exog = sm.add_constant(data.exog)
3018        >>> model = sm.OLS(endog, exog)
3019        >>> fitted = model.fit()
3020        >>> fitted.params
3021        >>> array([-39.91967442,   0.7156402 ,   1.29528612,  -0.15212252])
3022        >>> fitted.rsquared
3023        >>> 0.91357690446068196
3024        >>> # Test that the slope on the first variable is 0
3025        >>> fitted.el_test([0], [1])
3026        >>> (27.248146353888796, 1.7894660442330235e-07)
3027        """
3028        params = np.copy(self.params)
3029        opt_fun_inst = _ELRegOpts()  # to store weights
3030        if len(param_nums) == len(params):
3031            llr = opt_fun_inst._opt_nuis_regress(
3032                [],
3033                param_nums=param_nums,
3034                endog=self.model.endog,
3035                exog=self.model.exog,
3036                nobs=self.model.nobs,
3037                nvar=self.model.exog.shape[1],
3038                params=params,
3039                b0_vals=b0_vals,
3040                stochastic_exog=stochastic_exog)
3041            pval = 1 - stats.chi2.cdf(llr, len(param_nums))
3042            if return_weights:
3043                return llr, pval, opt_fun_inst.new_weights
3044            else:
3045                return llr, pval
3046        x0 = np.delete(params, param_nums)
3047        args = (param_nums, self.model.endog, self.model.exog,
3048                self.model.nobs, self.model.exog.shape[1], params,
3049                b0_vals, stochastic_exog)
3050        if method == 'nm':
3051            llr = optimize.fmin(opt_fun_inst._opt_nuis_regress, x0,
3052                                maxfun=10000, maxiter=10000, full_output=1,
3053                                disp=0, args=args)[1]
3054        if method == 'powell':
3055            llr = optimize.fmin_powell(opt_fun_inst._opt_nuis_regress, x0,
3056                                       full_output=1, disp=0,
3057                                       args=args)[1]
3058
3059        pval = 1 - stats.chi2.cdf(llr, len(param_nums))
3060        if ret_params:
3061            return llr, pval, opt_fun_inst.new_weights, opt_fun_inst.new_params
3062        elif return_weights:
3063            return llr, pval, opt_fun_inst.new_weights
3064        else:
3065            return llr, pval
3066
3067    def conf_int_el(self, param_num, sig=.05, upper_bound=None,
3068                    lower_bound=None, method='nm', stochastic_exog=True):
3069        """
3070        Compute the confidence interval using Empirical Likelihood.
3071
3072        Parameters
3073        ----------
3074        param_num : float
3075            The parameter for which the confidence interval is desired.
3076        sig : float
3077            The significance level.  Default is 0.05.
3078        upper_bound : float
3079            The maximum value the upper limit can be.  Default is the
3080            99.9% confidence value under OLS assumptions.
3081        lower_bound : float
3082            The minimum value the lower limit can be.  Default is the 99.9%
3083            confidence value under OLS assumptions.
3084        method : str
3085            Can either be 'nm' for Nelder-Mead or 'powell' for Powell.  The
3086            optimization method that optimizes over nuisance parameters.
3087            The default is 'nm'.
3088        stochastic_exog : bool
3089            When True, the exogenous variables are assumed to be stochastic.
3090            When the regressors are nonstochastic, moment conditions are
3091            placed on the exogenous variables.  Confidence intervals for
3092            stochastic regressors are at least as large as non-stochastic
3093            regressors.  The default is True.
3094
3095        Returns
3096        -------
3097        lowerl : float
3098            The lower bound of the confidence interval.
3099        upperl : float
3100            The upper bound of the confidence interval.
3101
3102        See Also
3103        --------
3104        el_test : Test parameters using Empirical Likelihood.
3105
3106        Notes
3107        -----
3108        This function uses brentq to find the value of beta where
3109        test_beta([beta], param_num)[1] is equal to the critical value.
3110
3111        The function returns the results of each iteration of brentq at each
3112        value of beta.
3113
3114        The current function value of the last printed optimization should be
3115        the critical value at the desired significance level. For alpha=.05,
3116        the value is 3.841459.
3117
3118        To ensure optimization terminated successfully, it is suggested to do
3119        el_test([lower_limit], [param_num]).
3120
3121        If the optimization does not terminate successfully, consider switching
3122        optimization algorithms.
3123
3124        If optimization is still not successful, try changing the values of
3125        start_int_params.  If the current function value repeatedly jumps
3126        from a number between 0 and the critical value and a very large number
3127        (>50), the starting parameters of the interior minimization need
3128        to be changed.
3129        """
3130        r0 = stats.chi2.ppf(1 - sig, 1)
3131        if upper_bound is None:
3132            upper_bound = self.conf_int(.01)[param_num][1]
3133        if lower_bound is None:
3134            lower_bound = self.conf_int(.01)[param_num][0]
3135
3136        def f(b0):
3137            return self.el_test(np.array([b0]), np.array([param_num]),
3138                                method=method,
3139                                stochastic_exog=stochastic_exog)[0] - r0
3140
3141        lowerl = optimize.brenth(f, lower_bound,
3142                                 self.params[param_num])
3143        upperl = optimize.brenth(f, self.params[param_num],
3144                                 upper_bound)
3145        #  ^ Seems to be faster than brentq in most cases
3146        return (lowerl, upperl)
3147
3148
3149class RegressionResultsWrapper(wrap.ResultsWrapper):
3150
3151    _attrs = {
3152        'chisq': 'columns',
3153        'sresid': 'rows',
3154        'weights': 'rows',
3155        'wresid': 'rows',
3156        'bcov_unscaled': 'cov',
3157        'bcov_scaled': 'cov',
3158        'HC0_se': 'columns',
3159        'HC1_se': 'columns',
3160        'HC2_se': 'columns',
3161        'HC3_se': 'columns',
3162        'norm_resid': 'rows',
3163    }
3164
3165    _wrap_attrs = wrap.union_dicts(base.LikelihoodResultsWrapper._attrs,
3166                                   _attrs)
3167
3168    _methods = {}
3169
3170    _wrap_methods = wrap.union_dicts(
3171                        base.LikelihoodResultsWrapper._wrap_methods,
3172                        _methods)
3173
3174
3175wrap.populate_wrapper(RegressionResultsWrapper,
3176                      RegressionResults)
3177