regression/tests/test_regression.py

"""
Test functions for models.regression
"""
# TODO: Test for LM
from statsmodels.compat.python import lrange

import warnings

import numpy as np
from numpy.testing import (
    assert_,
    assert_allclose,
    assert_almost_equal,
    assert_equal,
    assert_raises,
)
import pandas as pd
import pytest
from scipy.linalg import toeplitz
from scipy.stats import t as student_t

from statsmodels.datasets import longley
from statsmodels.regression.linear_model import (
    GLS,
    OLS,
    WLS,
    burg,
    yule_walker,
)
from statsmodels.tools.tools import add_constant

DECIMAL_4 = 4
DECIMAL_3 = 3
DECIMAL_2 = 2
DECIMAL_1 = 1
DECIMAL_7 = 7
DECIMAL_0 = 0

try:
    import cvxopt  # noqa:F401

    has_cvxopt = True
except ImportError:
    has_cvxopt = False


class CheckRegressionResults(object):
    """
    res2 contains results from Rmodelwrap or were obtained from a statistical
    packages such as R, Stata, or SAS and were written to model_results
    """

    decimal_params = DECIMAL_4

    def test_params(self):
        assert_almost_equal(
            self.res1.params, self.res2.params, self.decimal_params
        )

    decimal_standarderrors = DECIMAL_4

    def test_standarderrors(self):
        assert_allclose(
            self.res1.bse, self.res2.bse, self.decimal_standarderrors
        )

    decimal_confidenceintervals = DECIMAL_4

    def test_confidenceintervals(self):
        # NOTE: stata rounds residuals (at least) to sig digits so approx_equal
        conf1 = self.res1.conf_int()
        conf2 = self.res2.conf_int()
        for i in range(len(conf1)):
            assert_allclose(
                conf1[i][0],
                conf2[i][0],
                rtol=10 ** -self.decimal_confidenceintervals,
            )
            assert_allclose(
                conf1[i][1],
                conf2[i][1],
                rtol=10 ** -self.decimal_confidenceintervals,
            )

    decimal_conf_int_subset = DECIMAL_4

    def test_conf_int_subset(self):
        if len(self.res1.params) > 1:
            ci1 = self.res1.conf_int(cols=(1, 2))
            ci2 = self.res1.conf_int()[1:3]
            assert_almost_equal(ci1, ci2, self.decimal_conf_int_subset)
        else:
            pass

    decimal_scale = DECIMAL_4

    def test_scale(self):
        assert_almost_equal(
            self.res1.scale, self.res2.scale, self.decimal_scale
        )

    decimal_rsquared = DECIMAL_4

    def test_rsquared(self):
        assert_almost_equal(
            self.res1.rsquared, self.res2.rsquared, self.decimal_rsquared
        )

    decimal_rsquared_adj = DECIMAL_4

    def test_rsquared_adj(self):
        assert_almost_equal(
            self.res1.rsquared_adj,
            self.res2.rsquared_adj,
            self.decimal_rsquared_adj,
        )

    def test_degrees(self):
        assert_equal(self.res1.model.df_model, self.res2.df_model)
        assert_equal(self.res1.model.df_resid, self.res2.df_resid)

    decimal_ess = DECIMAL_4

    def test_ess(self):
        # Explained Sum of Squares
        assert_almost_equal(self.res1.ess, self.res2.ess, self.decimal_ess)

    decimal_ssr = DECIMAL_4

    def test_sumof_squaredresids(self):
        assert_almost_equal(self.res1.ssr, self.res2.ssr, self.decimal_ssr)

    decimal_mse_resid = DECIMAL_4

    def test_mse_resid(self):
        # Mean squared error of residuals
        assert_almost_equal(
            self.res1.mse_model, self.res2.mse_model, self.decimal_mse_resid
        )

    decimal_mse_model = DECIMAL_4

    def test_mse_model(self):
        assert_almost_equal(
            self.res1.mse_resid, self.res2.mse_resid, self.decimal_mse_model
        )

    decimal_mse_total = DECIMAL_4

    def test_mse_total(self):
        assert_almost_equal(
            self.res1.mse_total,
            self.res2.mse_total,
            self.decimal_mse_total,
            err_msg="Test class %s" % self,
        )

    decimal_fvalue = DECIMAL_4

    def test_fvalue(self):
        # did not change this, not sure it should complain -inf not equal -inf
        # if not (np.isinf(self.res1.fvalue) and np.isinf(self.res2.fvalue)):
        assert_almost_equal(
            self.res1.fvalue, self.res2.fvalue, self.decimal_fvalue
        )

    decimal_loglike = DECIMAL_4

    def test_loglike(self):
        assert_almost_equal(self.res1.llf, self.res2.llf, self.decimal_loglike)

    decimal_aic = DECIMAL_4

    def test_aic(self):
        assert_almost_equal(self.res1.aic, self.res2.aic, self.decimal_aic)
        # the following just checks the definition
        aicc1 = self.res1.info_criteria("aicc")
        k = self.res1.df_model + self.res1.model.k_constant
        nobs = self.res1.model.nobs
        aicc2 = self.res1.aic + 2 * (k**2 + k) / (nobs - k - 1)
        assert_allclose(aicc1, aicc2, rtol=1e-10)
        hqic1 = self.res1.info_criteria("hqic")
        hqic2 = (self.res1.aic - 2 * k) + 2 * np.log(np.log(nobs)) * k
        assert_allclose(hqic1, hqic2, rtol=1e-10)

    decimal_bic = DECIMAL_4

    def test_bic(self):
        assert_almost_equal(self.res1.bic, self.res2.bic, self.decimal_bic)

    decimal_pvalues = DECIMAL_4

    def test_pvalues(self):
        assert_almost_equal(
            self.res1.pvalues, self.res2.pvalues, self.decimal_pvalues
        )

    decimal_wresid = DECIMAL_4

    def test_wresid(self):
        assert_almost_equal(
            self.res1.wresid, self.res2.wresid, self.decimal_wresid
        )

    decimal_resids = DECIMAL_4

    def test_resids(self):
        assert_almost_equal(
            self.res1.resid, self.res2.resid, self.decimal_resids
        )

    decimal_norm_resids = DECIMAL_4

    def test_norm_resids(self):
        assert_almost_equal(
            self.res1.resid_pearson,
            self.res2.resid_pearson,
            self.decimal_norm_resids,
        )


# TODO: test fittedvalues and what else?


class TestOLS(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from .results.results_regression import Longley

        data = longley.load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        exog = add_constant(exog, prepend=False)
        res1 = OLS(endog, exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid  # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(endog, exog).fit(method="qr")

        model_qr = OLS(endog, exog)
        Q, R = np.linalg.qr(exog)
        model_qr.exog_Q, model_qr.exog_R = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = np.linalg.matrix_rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2

    def test_eigenvalues(self):
        eigenval_perc_diff = (
            self.res_qr.eigenvals - self.res_qr_manual.eigenvals
        )
        eigenval_perc_diff /= self.res_qr.eigenvals
        zeros = np.zeros_like(eigenval_perc_diff)
        assert_almost_equal(eigenval_perc_diff, zeros, DECIMAL_7)

    # Robust error tests.  Compare values computed with SAS
    def test_HC0_errors(self):
        # They are split up because the copied results do not have any
        # DECIMAL_4 places for the last place.
        assert_almost_equal(
            self.res1.HC0_se[:-1], self.res2.HC0_se[:-1], DECIMAL_4
        )
        assert_allclose(np.round(self.res1.HC0_se[-1]), self.res2.HC0_se[-1])

    def test_HC1_errors(self):
        assert_almost_equal(
            self.res1.HC1_se[:-1], self.res2.HC1_se[:-1], DECIMAL_4
        )
        # Note: tolerance is tight; rtol=3e-7 fails while 4e-7 passes
        assert_allclose(self.res1.HC1_se[-1], self.res2.HC1_se[-1], rtol=4e-7)

    def test_HC2_errors(self):
        assert_almost_equal(
            self.res1.HC2_se[:-1], self.res2.HC2_se[:-1], DECIMAL_4
        )
        # Note: tolerance is tight; rtol=4e-7 fails while 5e-7 passes
        assert_allclose(self.res1.HC2_se[-1], self.res2.HC2_se[-1], rtol=5e-7)

    def test_HC3_errors(self):
        assert_almost_equal(
            self.res1.HC3_se[:-1], self.res2.HC3_se[:-1], DECIMAL_4
        )
        # Note: tolerance is tight; rtol=1e-7 fails while 1.5e-7 passes
        assert_allclose(
            self.res1.HC3_se[-1], self.res2.HC3_se[-1], rtol=1.5e-7
        )

    def test_qr_params(self):
        assert_almost_equal(self.res1.params, self.res_qr.params, 6)

    def test_qr_normalized_cov_params(self):
        # todo: need assert_close
        assert_almost_equal(
            np.ones_like(self.res1.normalized_cov_params),
            self.res1.normalized_cov_params
            / self.res_qr.normalized_cov_params,
            5,
        )

    def test_missing(self):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        data.endog[[3, 7, 14]] = np.nan
        mod = OLS(data.endog, data.exog, missing="drop")
        assert_equal(mod.endog.shape[0], 13)
        assert_equal(mod.exog.shape[0], 13)

    def test_rsquared_adj_overfit(self):
        # Test that if df_resid = 0, rsquared_adj = 0.
        # This is a regression test for user issue:
        # https://github.com/statsmodels/statsmodels/issues/868
        with warnings.catch_warnings(record=True):
            x = np.random.randn(5)
            y = np.random.randn(5, 6)
            results = OLS(x, y).fit()
            rsquared_adj = results.rsquared_adj
            assert_equal(rsquared_adj, np.nan)

    def test_qr_alternatives(self):
        assert_allclose(
            self.res_qr.params, self.res_qr_manual.params, rtol=5e-12
        )

    def test_norm_resid(self):
        resid = self.res1.wresid
        norm_resid = resid / np.sqrt(np.sum(resid ** 2.0) / self.res1.df_resid)
        model_norm_resid = self.res1.resid_pearson
        assert_almost_equal(model_norm_resid, norm_resid, DECIMAL_7)

    def test_summary_slim(self):
        # check that slim summary is smaller, does not verify content
        summ = self.res1.summary(slim=True)
        assert len(summ.tables) == 2
        assert len(str(summ)) < 6700

    def test_norm_resid_zero_variance(self):
        with warnings.catch_warnings(record=True):
            y = self.res1.model.endog
            res = OLS(y, y).fit()
            assert_allclose(res.scale, 0, atol=1e-20)
            assert_allclose(res.wresid, res.resid_pearson, atol=5e-11)


class TestRTO(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from .results.results_regression import LongleyRTO

        data = longley.load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        res1 = OLS(endog, exog).fit()
        res2 = LongleyRTO()
        res2.wresid = res1.wresid  # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(endog, exog).fit(method="qr")
        cls.res_qr = res_qr


class TestFtest(object):
    """
    Tests f_test vs. RegressionResults
    """

    @classmethod
    def setup_class(cls):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        cls.res1 = OLS(data.endog, data.exog).fit()
        R = np.identity(7)[:-1, :]
        cls.Ftest = cls.res1.f_test(R)

    def test_F(self):
        assert_almost_equal(self.Ftest.fvalue, self.res1.fvalue, DECIMAL_4)

    def test_p(self):
        assert_almost_equal(self.Ftest.pvalue, self.res1.f_pvalue, DECIMAL_4)

    def test_Df_denom(self):
        assert_equal(self.Ftest.df_denom, self.res1.model.df_resid)

    def test_Df_num(self):
        assert_equal(self.Ftest.df_num, 6)


class TestFTest2(object):
    """
    A joint test that the coefficient on
    GNP = the coefficient on UNEMP  and that the coefficient on
    POP = the coefficient on YEAR for the Longley dataset.

    Ftest1 is from statsmodels.  Results are from Rpy using R's car library.
    """

    @classmethod
    def setup_class(cls):
        data = longley.load()
        columns = [f"x{i}" for i in range(1, data.exog.shape[1] + 1)]
        data.exog.columns = columns
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        R2 = [[0, 1, -1, 0, 0, 0, 0], [0, 0, 0, 0, 1, -1, 0]]
        cls.Ftest1 = res1.f_test(R2)
        hyp = "x2 = x3, x5 = x6"
        cls.NewFtest1 = res1.f_test(hyp)

    def test_new_ftest(self):
        assert_equal(self.NewFtest1.fvalue, self.Ftest1.fvalue)

    def test_fvalue(self):
        assert_almost_equal(self.Ftest1.fvalue, 9.7404618732968196, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(
            self.Ftest1.pvalue, 0.0056052885317493459, DECIMAL_4
        )

    def test_df_denom(self):
        assert_equal(self.Ftest1.df_denom, 9)

    def test_df_num(self):
        assert_equal(self.Ftest1.df_num, 2)


class TestFtestQ(object):
    """
    A joint hypothesis test that Rb = q.  Coefficient tests are essentially
    made up.  Test values taken from Stata.
    """

    @classmethod
    def setup_class(cls):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        R = np.array(
            [
                [0, 1, 1, 0, 0, 0, 0],
                [0, 1, 0, 1, 0, 0, 0],
                [0, 1, 0, 0, 0, 0, 0],
                [0, 0, 0, 0, 1, 0, 0],
                [0, 0, 0, 0, 0, 1, 0],
            ]
        )
        q = np.array([0, 0, 0, 1, 0])
        cls.Ftest1 = res1.f_test((R, q))

    def test_fvalue(self):
        assert_almost_equal(self.Ftest1.fvalue, 70.115557, 5)

    def test_pvalue(self):
        assert_almost_equal(self.Ftest1.pvalue, 6.229e-07, 10)

    def test_df_denom(self):
        assert_equal(self.Ftest1.df_denom, 9)

    def test_df_num(self):
        assert_equal(self.Ftest1.df_num, 5)


class TestTtest(object):
    """
    Test individual t-tests.  Ie., are the coefficients significantly
    different than zero.
    """

    @classmethod
    def setup_class(cls):
        data = longley.load()
        columns = [f"x{i}" for i in range(1, data.exog.shape[1] + 1)]
        data.exog.columns = columns
        data.exog = add_constant(data.exog, prepend=False)
        cls.res1 = OLS(data.endog, data.exog).fit()
        R = np.identity(7)
        cls.Ttest = cls.res1.t_test(R)
        hyp = "x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0, const = 0"
        cls.NewTTest = cls.res1.t_test(hyp)

    def test_new_tvalue(self):
        assert_equal(self.NewTTest.tvalue, self.Ttest.tvalue)

    def test_tvalue(self):
        assert_almost_equal(self.Ttest.tvalue, self.res1.tvalues, DECIMAL_4)

    def test_sd(self):
        assert_almost_equal(self.Ttest.sd, self.res1.bse, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(
            self.Ttest.pvalue,
            student_t.sf(np.abs(self.res1.tvalues), self.res1.model.df_resid)
            * 2,
            DECIMAL_4,
        )

    def test_df_denom(self):
        assert_equal(self.Ttest.df_denom, self.res1.model.df_resid)

    def test_effect(self):
        assert_almost_equal(self.Ttest.effect, self.res1.params)


class TestTtest2(object):
    """
    Tests the hypothesis that the coefficients on POP and YEAR
    are equal.

    Results from RPy using 'car' package.
    """

    @classmethod
    def setup_class(cls):
        R = np.zeros(7)
        R[4:6] = [1, -1]
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        cls.Ttest1 = res1.t_test(R)

    def test_tvalue(self):
        assert_almost_equal(self.Ttest1.tvalue, -4.0167754636397284, DECIMAL_4)

    def test_sd(self):
        assert_almost_equal(self.Ttest1.sd, 455.39079425195314, DECIMAL_4)

    def test_pvalue(self):
        assert_almost_equal(
            self.Ttest1.pvalue, 2 * 0.0015163772380932246, DECIMAL_4
        )

    def test_df_denom(self):
        assert_equal(self.Ttest1.df_denom, 9)

    def test_effect(self):
        assert_almost_equal(self.Ttest1.effect, -1829.2025687186533, DECIMAL_4)


class TestGLS(object):
    """
    These test results were obtained by replication with R.
    """

    @classmethod
    def setup_class(cls):
        from .results.results_regression import LongleyGls

        data = longley.load()
        exog = add_constant(
            np.column_stack((data.exog.iloc[:, 1], data.exog.iloc[:, 4])),
            prepend=False,
        )
        tmp_results = OLS(data.endog, exog).fit()
        rho = np.corrcoef(tmp_results.resid[1:], tmp_results.resid[:-1])[0][
            1
        ]  # by assumption
        order = toeplitz(np.arange(16))
        sigma = rho ** order
        GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
        cls.res1 = GLS_results
        cls.res2 = LongleyGls()
        # attach for test_missing
        cls.sigma = sigma
        cls.exog = exog
        cls.endog = data.endog

    def test_aic(self):
        # Note: tolerance is tight; rtol=3e-3 fails while 4e-3 passes
        assert_allclose(self.res1.aic + 2, self.res2.aic, rtol=4e-3)

    def test_bic(self):
        # Note: tolerance is tight; rtol=1e-2 fails while 1.5e-2 passes
        assert_allclose(self.res1.bic, self.res2.bic, rtol=1.5e-2)

    def test_loglike(self):
        assert_almost_equal(self.res1.llf, self.res2.llf, DECIMAL_0)

    def test_params(self):
        assert_almost_equal(self.res1.params, self.res2.params, DECIMAL_1)

    def test_resid(self):
        assert_almost_equal(self.res1.resid, self.res2.resid, DECIMAL_4)

    def test_scale(self):
        assert_almost_equal(self.res1.scale, self.res2.scale, DECIMAL_4)

    def test_tvalues(self):
        assert_almost_equal(self.res1.tvalues, self.res2.tvalues, DECIMAL_4)

    def test_standarderrors(self):
        assert_almost_equal(self.res1.bse, self.res2.bse, DECIMAL_4)

    def test_fittedvalues(self):
        assert_almost_equal(
            self.res1.fittedvalues, self.res2.fittedvalues, DECIMAL_4
        )

    def test_pvalues(self):
        assert_almost_equal(self.res1.pvalues, self.res2.pvalues, DECIMAL_4)

    def test_missing(self):
        endog = self.endog.copy()  # copy or changes endog for other methods
        endog[[4, 7, 14]] = np.nan
        mod = GLS(endog, self.exog, sigma=self.sigma, missing="drop")
        assert_equal(mod.endog.shape[0], 13)
        assert_equal(mod.exog.shape[0], 13)
        assert_equal(mod.sigma.shape, (13, 13))


class TestGLS_alt_sigma(CheckRegressionResults):
    """
    Test that GLS with no argument is equivalent to OLS.
    """

    @classmethod
    def setup_class(cls):
        data = longley.load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        exog = add_constant(exog, prepend=False)
        ols_res = OLS(endog, exog).fit()
        gls_res = GLS(endog, exog).fit()
        gls_res_scalar = GLS(endog, exog, sigma=1)
        cls.endog = endog
        cls.exog = exog
        cls.res1 = gls_res
        cls.res2 = ols_res
        cls.res3 = gls_res_scalar

    #        self.res2.conf_int = self.res2.conf_int()

    def test_wrong_size_sigma_1d(self):
        n = len(self.endog)
        assert_raises(
            ValueError, GLS, self.endog, self.exog, sigma=np.ones(n - 1)
        )

    def test_wrong_size_sigma_2d(self):
        n = len(self.endog)
        assert_raises(
            ValueError,
            GLS,
            self.endog,
            self.exog,
            sigma=np.ones((n - 1, n - 1)),
        )


# FIXME: do not leave commented-out, use or move/remove
#    def check_confidenceintervals(self, conf1, conf2):
#        assert_almost_equal(conf1, conf2, DECIMAL_4)


class TestLM(object):
    @classmethod
    def setup_class(cls):
        # TODO: Test HAC method
        rs = np.random.RandomState(1234)
        x = rs.randn(100, 3)
        b = np.ones((3, 1))
        e = rs.randn(100, 1)
        y = np.dot(x, b) + e
        # Cases?
        # Homoskedastic
        # HC0
        cls.res1_full = OLS(y, x).fit()
        cls.res1_restricted = OLS(y, x[:, 0]).fit()

        cls.res2_full = cls.res1_full.get_robustcov_results("HC0")
        cls.res2_restricted = cls.res1_restricted.get_robustcov_results("HC0")

        cls.x = x
        cls.Y = y

    def test_LM_homoskedastic(self):
        resid = self.res1_restricted.wresid
        n = resid.shape[0]
        x = self.x
        S = np.dot(resid, resid) / n * np.dot(x.T, x) / n
        Sinv = np.linalg.inv(S)
        s = np.mean(x * resid[:, None], 0)
        LMstat = n * np.dot(np.dot(s, Sinv), s.T)
        LMstat_OLS = self.res1_full.compare_lm_test(self.res1_restricted)
        LMstat2 = LMstat_OLS[0]
        assert_almost_equal(LMstat, LMstat2, DECIMAL_7)

    def test_LM_heteroskedastic_nodemean(self):
        resid = self.res1_restricted.wresid
        n = resid.shape[0]
        x = self.x
        scores = x * resid[:, None]
        S = np.dot(scores.T, scores) / n
        Sinv = np.linalg.inv(S)
        s = np.mean(scores, 0)
        LMstat = n * np.dot(np.dot(s, Sinv), s.T)
        LMstat_OLS = self.res2_full.compare_lm_test(
            self.res2_restricted, demean=False
        )
        LMstat2 = LMstat_OLS[0]
        assert_almost_equal(LMstat, LMstat2, DECIMAL_7)

    def test_LM_heteroskedastic_demean(self):
        resid = self.res1_restricted.wresid
        n = resid.shape[0]
        x = self.x
        scores = x * resid[:, None]
        scores_demean = scores - scores.mean(0)
        S = np.dot(scores_demean.T, scores_demean) / n
        Sinv = np.linalg.inv(S)
        s = np.mean(scores, 0)
        LMstat = n * np.dot(np.dot(s, Sinv), s.T)
        LMstat_OLS = self.res2_full.compare_lm_test(self.res2_restricted)
        LMstat2 = LMstat_OLS[0]
        assert_almost_equal(LMstat, LMstat2, DECIMAL_7)

    def test_LM_heteroskedastic_LRversion(self):
        resid = self.res1_restricted.wresid
        resid_full = self.res1_full.wresid
        n = resid.shape[0]
        x = self.x
        scores = x * resid[:, None]
        s = np.mean(scores, 0)
        scores = x * resid_full[:, None]
        S = np.dot(scores.T, scores) / n
        Sinv = np.linalg.inv(S)
        LMstat = n * np.dot(np.dot(s, Sinv), s.T)
        LMstat_OLS = self.res2_full.compare_lm_test(
            self.res2_restricted, use_lr=True
        )
        LMstat2 = LMstat_OLS[0]
        assert_almost_equal(LMstat, LMstat2, DECIMAL_7)

    def test_LM_nonnested(self):
        assert_raises(
            ValueError, self.res2_restricted.compare_lm_test, self.res2_full
        )


class TestOLS_GLS_WLS_equivalence(object):
    @classmethod
    def setup_class(cls):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        y = data.endog
        x = data.exog
        n = y.shape[0]
        w = np.ones(n)
        cls.results = []
        cls.results.append(OLS(y, x).fit())
        cls.results.append(WLS(y, x, w).fit())
        # scaling weights does not change main results (except scale)
        cls.results.append(GLS(y, x, 100 * w).fit())
        cls.results.append(GLS(y, x, np.diag(0.1 * w)).fit())

    def test_ll(self):
        llf = np.array([r.llf for r in self.results])
        llf_1 = np.ones_like(llf) * self.results[0].llf
        assert_almost_equal(llf, llf_1, DECIMAL_7)

        ic = np.array([r.aic for r in self.results])
        ic_1 = np.ones_like(ic) * self.results[0].aic
        assert_almost_equal(ic, ic_1, DECIMAL_7)

        ic = np.array([r.bic for r in self.results])
        ic_1 = np.ones_like(ic) * self.results[0].bic
        assert_almost_equal(ic, ic_1, DECIMAL_7)

    def test_params(self):
        params = np.array([r.params for r in self.results])
        params_1 = np.array([self.results[0].params] * len(self.results))
        assert_allclose(params, params_1)

    def test_ss(self):
        bse = np.array([r.bse for r in self.results])
        bse_1 = np.array([self.results[0].bse] * len(self.results))
        assert_allclose(bse, bse_1)

    def test_rsquared(self):
        rsquared = np.array([r.rsquared for r in self.results])
        rsquared_1 = np.array([self.results[0].rsquared] * len(self.results))
        assert_almost_equal(rsquared, rsquared_1, DECIMAL_7)


class TestGLS_WLS_equivalence(TestOLS_GLS_WLS_equivalence):
    # reuse test methods

    @classmethod
    def setup_class(cls):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        y = data.endog
        x = data.exog
        n = y.shape[0]
        np.random.seed(5)
        w = np.random.uniform(0.5, 1, n)
        w_inv = 1.0 / w
        cls.results = []
        cls.results.append(WLS(y, x, w).fit())
        # scaling weights does not change main results (except scale)
        cls.results.append(WLS(y, x, 0.01 * w).fit())
        cls.results.append(GLS(y, x, 100 * w_inv).fit())
        cls.results.append(GLS(y, x, np.diag(0.1 * w_inv)).fit())


class TestNonFit(object):
    @classmethod
    def setup_class(cls):
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        cls.endog = data.endog
        cls.exog = data.exog
        cls.ols_model = OLS(data.endog, data.exog)

    def test_df_resid(self):
        df_resid = self.endog.shape[0] - self.exog.shape[1]
        assert_equal(self.ols_model.df_resid, 9)


class TestWLS_CornerCases(object):
    @classmethod
    def setup_class(cls):
        cls.exog = np.ones((1,))
        cls.endog = np.ones((1,))
        weights = 1
        cls.wls_res = WLS(cls.endog, cls.exog, weights=weights).fit()

    def test_wrong_size_weights(self):
        weights = np.ones((10, 10))
        assert_raises(ValueError, WLS, self.endog, self.exog, weights=weights)


class TestWLSExogWeights(CheckRegressionResults):
    # Test WLS with Greene's credit card data
    # reg avgexp age income incomesq ownrent [aw=1/incomesq]
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.ccard import load

        from .results.results_regression import CCardWLS

        dta = load()
        endog = np.asarray(dta.endog)
        exog = np.asarray(dta.exog)
        exog = add_constant(exog, prepend=False)
        nobs = 72.0

        weights = 1 / exog[:, 2]
        # for comparison with stata analytic weights
        scaled_weights = (weights * nobs) / weights.sum()

        cls.res1 = WLS(endog, exog, weights=scaled_weights).fit()
        cls.res2 = CCardWLS()
        cls.res2.wresid = scaled_weights ** 0.5 * cls.res2.resid

        # correction because we use different definition for loglike/llf
        corr_ic = 2 * (cls.res1.llf - cls.res2.llf)
        cls.res2.aic -= corr_ic
        cls.res2.bic -= corr_ic
        cls.res2.llf += 0.5 * np.sum(np.log(cls.res1.model.weights))


def test_wls_example():
    # example from the docstring, there was a note about a bug, should
    # be fixed now
    Y = [1, 3, 4, 5, 2, 3, 4]
    x = lrange(1, 8)
    x = add_constant(x, prepend=False)
    wls_model = WLS(Y, x, weights=lrange(1, 8)).fit()
    # taken from R lm.summary
    assert_almost_equal(wls_model.fvalue, 0.127337843215, 6)
    assert_almost_equal(wls_model.scale, 2.44608530786 ** 2, 6)


def test_wls_tss():
    y = np.array([22, 22, 22, 23, 23, 23])
    x = [[1, 0], [1, 0], [1, 1], [0, 1], [0, 1], [0, 1]]

    ols_mod = OLS(y, add_constant(x, prepend=False)).fit()

    yw = np.array([22, 22, 23.0])
    Xw = [[1, 0], [1, 1], [0, 1]]
    w = np.array([2, 1, 3.0])

    wls_mod = WLS(yw, add_constant(Xw, prepend=False), weights=w).fit()
    assert_equal(ols_mod.centered_tss, wls_mod.centered_tss)


class TestWLSScalarVsArray(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.longley import load

        dta = load()
        endog = np.asarray(dta.endog)
        exog = np.asarray(dta.exog)
        exog = add_constant(exog, prepend=True)
        wls_scalar = WLS(endog, exog, weights=1.0 / 3).fit()
        weights = [1 / 3.0] * len(endog)
        wls_array = WLS(endog, exog, weights=weights).fit()
        cls.res1 = wls_scalar
        cls.res2 = wls_array


class TestWLS_GLS(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.ccard import load

        data = load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        sigma = exog[:, 2]
        cls.res1 = WLS(endog, exog, weights=1 / sigma).fit()
        cls.res2 = GLS(endog, exog, sigma=sigma).fit()

    def check_confidenceintervals(self, conf1, conf2):  # FIXME: never called
        assert_almost_equal(conf1, conf2(), DECIMAL_4)


def test_wls_missing():
    from statsmodels.datasets.ccard import load

    data = load()
    endog = data.endog
    endog[[10, 25]] = np.nan
    mod = WLS(
        data.endog, data.exog, weights=1 / data.exog.iloc[:, 2], missing="drop"
    )
    assert_equal(mod.endog.shape[0], 70)
    assert_equal(mod.exog.shape[0], 70)
    assert_equal(mod.weights.shape[0], 70)


class TestWLS_OLS(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        data = longley.load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        exog = add_constant(exog, prepend=False)
        cls.res1 = OLS(endog, exog).fit()
        cls.res2 = WLS(endog, exog).fit()

    def check_confidenceintervals(self, conf1, conf2):  # FIXME: never called
        assert_almost_equal(conf1, conf2(), DECIMAL_4)


class TestGLS_OLS(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        data = longley.load()
        endog = np.asarray(data.endog)
        exog = np.asarray(data.exog)
        exog = add_constant(exog, prepend=False)
        cls.res1 = GLS(endog, exog).fit()
        cls.res2 = OLS(endog, exog).fit()

    def check_confidenceintervals(self, conf1, conf2):  # FIXME: never called
        assert_almost_equal(conf1, conf2(), DECIMAL_4)


# FIXME: do not leave this commented-out sitting here
# TODO: test AR
# why the two-stage in AR?
# class TestAR(object):
#     from statsmodels.datasets.sunspots import load
#     data = load()
#     model = AR(data.endog, rho=4).fit()
#     R_res = RModel(data.endog, aic="FALSE", order_max=4)#

#     def test_params(self):
#         assert_almost_equal(self.model.rho,
#         pass

#     def test_order(self):
# In R this can be defined or chosen by minimizing the AIC if aic=True
#        pass


class TestYuleWalker(object):
    @classmethod
    def setup_class(cls):
        from statsmodels.datasets.sunspots import load

        data = load()
        cls.rho, cls.sigma = yule_walker(data.endog, order=4, method="mle")
        cls.R_params = [
            1.2831003105694765,
            -0.45240924374091945,
            -0.20770298557575195,
            0.047943648089542337,
        ]

    def test_params(self):
        assert_almost_equal(self.rho, self.R_params, DECIMAL_4)


class TestDataDimensions(CheckRegressionResults):
    @classmethod
    def setup_class(cls):
        np.random.seed(54321)
        cls.endog_n_ = np.random.uniform(0, 20, size=30)
        cls.endog_n_one = cls.endog_n_[:, None]
        cls.exog_n_ = np.random.uniform(0, 20, size=30)
        cls.exog_n_one = cls.exog_n_[:, None]
        cls.degen_exog = cls.exog_n_one[:-1]
        cls.mod1 = OLS(cls.endog_n_one, cls.exog_n_one)
        cls.mod1.df_model += 1
        cls.res1 = cls.mod1.fit()
        # Note that these are created for every subclass..
        # A little extra overhead probably
        cls.mod2 = OLS(cls.endog_n_one, cls.exog_n_one)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()

    def check_confidenceintervals(self, conf1, conf2):  # FIXME: never called
        assert_almost_equal(conf1, conf2(), DECIMAL_4)


class TestGLS_large_data(TestDataDimensions):
    @classmethod
    def setup_class(cls):
        super(TestGLS_large_data, cls).setup_class()
        nobs = 1000
        y = np.random.randn(nobs, 1)
        x = np.random.randn(nobs, 20)
        sigma = np.ones_like(y)
        cls.gls_res = GLS(y, x, sigma=sigma).fit()
        cls.gls_res_scalar = GLS(y, x, sigma=1).fit()
        cls.gls_res_none = GLS(y, x).fit()
        cls.ols_res = OLS(y, x).fit()

    def test_large_equal_params(self):
        assert_almost_equal(
            self.ols_res.params, self.gls_res.params, DECIMAL_7
        )

    def test_large_equal_loglike(self):
        assert_almost_equal(self.ols_res.llf, self.gls_res.llf, DECIMAL_7)

    def test_large_equal_params_none(self):
        assert_almost_equal(
            self.gls_res.params, self.gls_res_none.params, DECIMAL_7
        )


class TestNxNx(TestDataDimensions):
    @classmethod
    def setup_class(cls):
        super(TestNxNx, cls).setup_class()
        cls.mod2 = OLS(cls.endog_n_, cls.exog_n_)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()


class TestNxOneNx(TestDataDimensions):
    @classmethod
    def setup_class(cls):
        super(TestNxOneNx, cls).setup_class()
        cls.mod2 = OLS(cls.endog_n_one, cls.exog_n_)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()


class TestNxNxOne(TestDataDimensions):
    @classmethod
    def setup_class(cls):
        super(TestNxNxOne, cls).setup_class()
        cls.mod2 = OLS(cls.endog_n_, cls.exog_n_one)
        cls.mod2.df_model += 1
        cls.res2 = cls.mod2.fit()


def test_bad_size():
    np.random.seed(54321)
    data = np.random.uniform(0, 20, 31)
    assert_raises(ValueError, OLS, data, data[1:])


def test_const_indicator():
    rs = np.random.RandomState(12345)
    x = rs.randint(0, 3, size=30)
    x = pd.get_dummies(pd.Series(x, dtype="category"), drop_first=False)
    y = np.dot(x, [1.0, 2.0, 3.0]) + rs.normal(size=30)
    resc = OLS(y, add_constant(x.iloc[:, 1:], prepend=True)).fit()
    res = OLS(y, x, hasconst=True).fit()
    assert_almost_equal(resc.rsquared, res.rsquared, 12)
    assert res.model.data.k_constant == 1
    assert resc.model.data.k_constant == 1


def test_fvalue_const_only():
    rs = np.random.RandomState(12345)
    x = rs.randint(0, 3, size=30)
    x = pd.get_dummies(pd.Series(x, dtype="category"), drop_first=False)
    x.iloc[:, 0] = 1
    y = np.dot(x, [1.0, 2.0, 3.0]) + rs.normal(size=30)
    res = OLS(y, x, hasconst=True).fit(cov_type="HC1")
    assert not np.isnan(res.fvalue)
    assert isinstance(res.fvalue, float)
    assert isinstance(res.f_pvalue, float)


def test_conf_int_single_regressor():
    # GH#706 single-regressor model (i.e. no intercept) with 1D exog
    # should get passed to DataFrame for conf_int
    y = pd.Series(np.random.randn(10))
    x = pd.Series(np.ones(10))
    res = OLS(y, x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pd.DataFrame))


def test_summary_as_latex():
    # GH#734
    import re

    dta = longley.load_pandas()
    x = dta.exog
    x["constant"] = 1
    y = dta.endog
    res = OLS(y, x).fit()
    with pytest.warns(UserWarning):
        table = res.summary().as_latex()
    # replace the date and time
    table = re.sub(
        "(?<=\n\\\\textbf\\{Date:\\}             &).+?&",
        " Sun, 07 Apr 2013 &",
        table,
    )
    table = re.sub(
        "(?<=\n\\\\textbf\\{Time:\\}             &).+?&",
        "     13:46:07     &",
        table,
    )

    expected = """\\begin{center}
\\begin{tabular}{lclc}
\\toprule
\\textbf{Dep. Variable:}    &      TOTEMP      & \\textbf{  R-squared:         } &     0.995   \\\\
\\textbf{Model:}            &       OLS        & \\textbf{  Adj. R-squared:    } &     0.992   \\\\
\\textbf{Method:}           &  Least Squares   & \\textbf{  F-statistic:       } &     330.3   \\\\
\\textbf{Date:}             & Sun, 07 Apr 2013 & \\textbf{  Prob (F-statistic):} &  4.98e-10   \\\\
\\textbf{Time:}             &     13:46:07     & \\textbf{  Log-Likelihood:    } &   -109.62   \\\\
\\textbf{No. Observations:} &          16      & \\textbf{  AIC:               } &     233.2   \\\\
\\textbf{Df Residuals:}     &           9      & \\textbf{  BIC:               } &     238.6   \\\\
\\textbf{Df Model:}         &           6      & \\textbf{                     } &             \\\\
\\textbf{Covariance Type:}  &    nonrobust     & \\textbf{                     } &             \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lcccccc}
                  & \\textbf{coef} & \\textbf{std err} & \\textbf{t} & \\textbf{P$> |$t$|$} & \\textbf{[0.025} & \\textbf{0.975]}  \\\\
\\midrule
\\textbf{GNPDEFL}  &      15.0619  &       84.915     &     0.177  &         0.863        &     -177.029    &      207.153     \\\\
\\textbf{GNP}      &      -0.0358  &        0.033     &    -1.070  &         0.313        &       -0.112    &        0.040     \\\\
\\textbf{UNEMP}    &      -2.0202  &        0.488     &    -4.136  &         0.003        &       -3.125    &       -0.915     \\\\
\\textbf{ARMED}    &      -1.0332  &        0.214     &    -4.822  &         0.001        &       -1.518    &       -0.549     \\\\
\\textbf{POP}      &      -0.0511  &        0.226     &    -0.226  &         0.826        &       -0.563    &        0.460     \\\\
\\textbf{YEAR}     &    1829.1515  &      455.478     &     4.016  &         0.003        &      798.788    &     2859.515     \\\\
\\textbf{constant} &   -3.482e+06  &      8.9e+05     &    -3.911  &         0.004        &     -5.5e+06    &    -1.47e+06     \\\\
\\bottomrule
\\end{tabular}
\\begin{tabular}{lclc}
\\textbf{Omnibus:}       &  0.749 & \\textbf{  Durbin-Watson:     } &    2.559  \\\\
\\textbf{Prob(Omnibus):} &  0.688 & \\textbf{  Jarque-Bera (JB):  } &    0.684  \\\\
\\textbf{Skew:}          &  0.420 & \\textbf{  Prob(JB):          } &    0.710  \\\\
\\textbf{Kurtosis:}      &  2.434 & \\textbf{  Cond. No.          } & 4.86e+09  \\\\
\\bottomrule
\\end{tabular}
%\\caption{OLS Regression Results}
\\end{center}

Notes: \\newline
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. \\newline
 [2] The condition number is large, 4.86e+09. This might indicate that there are \\newline
 strong multicollinearity or other numerical problems."""
    assert_equal(table, expected)


class TestRegularizedFit(object):

    # Make sure there are no problems when no variables are selected.
    def test_empty_model(self):

        np.random.seed(742)
        n = 100
        endog = np.random.normal(size=n)
        exog = np.random.normal(size=(n, 3))

        for cls in OLS, WLS, GLS:
            model = cls(endog, exog)
            result = model.fit_regularized(alpha=1000)
            assert_equal(result.params, 0.0)

    def test_regularized(self):

        import os

        from .results import glmnet_r_results

        cur_dir = os.path.dirname(os.path.abspath(__file__))
        data = np.loadtxt(
            os.path.join(cur_dir, "results", "lasso_data.csv"), delimiter=","
        )

        tests = [x for x in dir(glmnet_r_results) if x.startswith("rslt_")]

        for test in tests:

            vec = getattr(glmnet_r_results, test)

            n = vec[0]
            p = vec[1]
            L1_wt = float(vec[2])
            lam = float(vec[3])
            params = vec[4:].astype(np.float64)

            endog = data[0 : int(n), 0]
            exog = data[0 : int(n), 1 : (int(p) + 1)]

            endog = endog - endog.mean()
            endog /= endog.std(ddof=1)
            exog = exog - exog.mean(0)
            exog /= exog.std(0, ddof=1)

            for cls in OLS, WLS, GLS:
                mod = cls(endog, exog)
                rslt = mod.fit_regularized(L1_wt=L1_wt, alpha=lam)
                assert_almost_equal(rslt.params, params, decimal=3)

                # Smoke test for profile likelihood
                mod.fit_regularized(L1_wt=L1_wt, alpha=lam, profile_scale=True)

    def test_regularized_weights(self):

        np.random.seed(1432)
        exog1 = np.random.normal(size=(100, 3))
        endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
        exog2 = np.random.normal(size=(100, 3))
        endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)

        exog_a = np.vstack((exog1, exog1, exog2))
        endog_a = np.concatenate((endog1, endog1, endog2))

        # Should be equivalent to exog_a, endog_a.
        exog_b = np.vstack((exog1, exog2))
        endog_b = np.concatenate((endog1, endog2))
        wgts = np.ones(200)
        wgts[0:100] = 2
        sigma = np.diag(1 / wgts)

        for L1_wt in 0, 0.5, 1:
            for alpha in 0, 1:
                mod1 = OLS(endog_a, exog_a)
                rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod2 = WLS(endog_b, exog_b, weights=wgts)
                rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod3 = GLS(endog_b, exog_b, sigma=sigma)
                rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
                assert_almost_equal(rslt1.params, rslt3.params, decimal=3)

    def test_regularized_weights_list(self):

        np.random.seed(132)
        exog1 = np.random.normal(size=(100, 3))
        endog1 = exog1[:, 0] + exog1[:, 1] + np.random.normal(size=100)
        exog2 = np.random.normal(size=(100, 3))
        endog2 = exog2[:, 0] + exog2[:, 1] + np.random.normal(size=100)

        exog_a = np.vstack((exog1, exog1, exog2))
        endog_a = np.concatenate((endog1, endog1, endog2))

        # Should be equivalent to exog_a, endog_a.
        exog_b = np.vstack((exog1, exog2))
        endog_b = np.concatenate((endog1, endog2))
        wgts = np.ones(200)
        wgts[0:100] = 2
        sigma = np.diag(1 / wgts)

        for L1_wt in 0, 0.5, 1:
            for alpha_element in 0, 1:
                alpha = [
                    alpha_element,
                ] * 3

                mod1 = OLS(endog_a, exog_a)
                rslt1 = mod1.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod2 = WLS(endog_b, exog_b, weights=wgts)
                rslt2 = mod2.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                mod3 = GLS(endog_b, exog_b, sigma=sigma)
                rslt3 = mod3.fit_regularized(L1_wt=L1_wt, alpha=alpha)

                assert_almost_equal(rslt1.params, rslt2.params, decimal=3)
                assert_almost_equal(rslt1.params, rslt3.params, decimal=3)


def test_formula_missing_cat():
    # gh-805

    from patsy import PatsyError

    import statsmodels.api as sm
    from statsmodels.formula.api import ols

    dta = sm.datasets.grunfeld.load_pandas().data
    dta.loc[dta.index[0], "firm"] = np.nan

    mod = ols(
        formula="value ~ invest + capital + firm + year", data=dta.dropna()
    )
    res = mod.fit()

    mod2 = ols(formula="value ~ invest + capital + firm + year", data=dta)
    res2 = mod2.fit()

    assert_almost_equal(res.params.values, res2.params.values)

    assert_raises(
        PatsyError,
        ols,
        "value ~ invest + capital + firm + year",
        data=dta,
        missing="raise",
    )


def test_missing_formula_predict():
    # see 2171
    nsample = 30

    data = np.linspace(0, 10, nsample)
    null = np.array([np.nan])
    data = pd.DataFrame({"x": np.concatenate((data, null))})
    beta = np.array([1, 0.1])
    e = np.random.normal(size=nsample + 1)
    data["y"] = beta[0] + beta[1] * data["x"] + e
    model = OLS.from_formula("y ~ x", data=data)
    fit = model.fit()
    fit.predict(exog=data[:-1])


def test_fvalue_implicit_constant():
    # if constant is implicit, return nan see #2444
    nobs = 100
    np.random.seed(2)
    x = np.random.randn(nobs, 1)
    x = ((x > 0) == [True, False]).astype(int)
    y = x.sum(1) + np.random.randn(nobs)

    from statsmodels.regression.linear_model import OLS, WLS

    res = OLS(y, x).fit(cov_type="HC1")
    assert_(np.isnan(res.fvalue))
    assert_(np.isnan(res.f_pvalue))
    res.summary()

    res = WLS(y, x).fit(cov_type="HC1")
    assert_(np.isnan(res.fvalue))
    assert_(np.isnan(res.f_pvalue))
    res.summary()


def test_fvalue_only_constant():
    # if only constant in model, return nan see #3642
    nobs = 20
    np.random.seed(2)
    x = np.ones(nobs)
    y = np.random.randn(nobs)

    from statsmodels.regression.linear_model import OLS, WLS

    res = OLS(y, x).fit(cov_type="hac", cov_kwds={"maxlags": 3})
    assert_(np.isnan(res.fvalue))
    assert_(np.isnan(res.f_pvalue))
    res.summary()

    res = WLS(y, x).fit(cov_type="HC1")
    assert_(np.isnan(res.fvalue))
    assert_(np.isnan(res.f_pvalue))
    res.summary()


def test_ridge():
    n = 100
    p = 5
    np.random.seed(3132)
    xmat = np.random.normal(size=(n, p))
    yvec = xmat.sum(1) + np.random.normal(size=n)

    v = np.ones(p)
    v[0] = 0

    for a in (0, 1, 10):
        for alpha in (a, a * np.ones(p), a * v):
            model1 = OLS(yvec, xmat)
            result1 = model1._fit_ridge(alpha=alpha)
            model2 = OLS(yvec, xmat)
            result2 = model2.fit_regularized(alpha=alpha, L1_wt=0)
            assert_allclose(result1.params, result2.params)
            model3 = OLS(yvec, xmat)
            result3 = model3.fit_regularized(alpha=alpha, L1_wt=1e-10)
            assert_allclose(result1.params, result3.params)

    fv1 = result1.fittedvalues
    fv2 = np.dot(xmat, result1.params)
    assert_allclose(fv1, fv2)


def test_regularized_refit():
    n = 100
    p = 5
    np.random.seed(3132)
    xmat = np.random.normal(size=(n, p))
    # covariates 0 and 2 matter
    yvec = xmat[:, 0] + xmat[:, 2] + np.random.normal(size=n)
    model1 = OLS(yvec, xmat)
    result1 = model1.fit_regularized(alpha=2.0, L1_wt=0.5, refit=True)
    model2 = OLS(yvec, xmat[:, [0, 2]])
    result2 = model2.fit()
    ii = [0, 2]
    assert_allclose(result1.params[ii], result2.params)
    assert_allclose(result1.bse[ii], result2.bse)


def test_regularized_predict():
    # this also compares WLS with GLS
    n = 100
    p = 5
    np.random.seed(3132)
    xmat = np.random.normal(size=(n, p))
    yvec = xmat.sum(1) + np.random.normal(size=n)
    wgt = np.random.uniform(1, 2, n)
    model_wls = WLS(yvec, xmat, weights=wgt)
    # TODO: params is not the same in GLS if sigma=1 / wgt, i.e 1-dim, #7755
    model_gls1 = GLS(yvec, xmat, sigma=np.diag(1 / wgt))
    model_gls2 = GLS(yvec, xmat, sigma=1 / wgt)
    res = []
    for model1 in [model_wls, model_gls1, model_gls2]:
        result1 = model1.fit_regularized(alpha=20.0, L1_wt=0.5, refit=True)
        res.append(result1)
        params = result1.params
        fittedvalues = np.dot(xmat, params)
        pr = model1.predict(result1.params)
        assert_allclose(fittedvalues, pr)
        assert_allclose(result1.fittedvalues, pr)

        pr = result1.predict()
        assert_allclose(fittedvalues, pr)

    assert_allclose(res[0].model.wendog, res[1].model.wendog, rtol=1e-10)
    assert_allclose(res[0].model.wexog, res[1].model.wexog, rtol=1e-10)
    assert_allclose(res[0].fittedvalues, res[1].fittedvalues, rtol=1e-10)
    assert_allclose(res[0].params, res[1].params, rtol=1e-10)

    assert_allclose(res[0].model.wendog, res[2].model.wendog, rtol=1e-10)
    assert_allclose(res[0].model.wexog, res[2].model.wexog, rtol=1e-10)
    assert_allclose(res[0].fittedvalues, res[2].fittedvalues, rtol=1e-10)
    assert_allclose(res[0].params, res[2].params, rtol=1e-10)


def test_regularized_options():
    n = 100
    p = 5
    np.random.seed(3132)
    xmat = np.random.normal(size=(n, p))
    yvec = xmat.sum(1) + np.random.normal(size=n)
    model1 = OLS(yvec - 1, xmat)
    result1 = model1.fit_regularized(alpha=1.0, L1_wt=0.5)
    model2 = OLS(yvec, xmat, offset=1)
    result2 = model2.fit_regularized(
        alpha=1.0, L1_wt=0.5, start_params=np.zeros(5)
    )
    assert_allclose(result1.params, result2.params)


def test_burg():
    rnd = np.random.RandomState(12345)
    e = rnd.randn(10001)
    y = e[1:] + 0.5 * e[:-1]
    # R, ar.burg
    expected = [
        [0.3909931],
        [0.4602607, -0.1771582],
        [0.47473245, -0.21475602, 0.08168813],
        [0.4787017, -0.2251910, 0.1047554, -0.0485900],
        [0.47975462, -0.22746106, 0.10963527, -0.05896347, 0.02167001],
    ]

    for i in range(1, 6):
        ar, _ = burg(y, i)
        assert_allclose(ar, expected[i - 1], atol=1e-6)
        as_nodemean, _ = burg(1 + y, i, False)
        assert np.all(ar != as_nodemean)


def test_burg_errors():
    with pytest.raises(ValueError):
        burg(np.ones((100, 2)))
    with pytest.raises(ValueError):
        burg(np.random.randn(100), 0)
    with pytest.raises(ValueError):
        burg(np.random.randn(100), "apple")


@pytest.mark.skipif(not has_cvxopt, reason="sqrt_lasso requires cvxopt")
def test_sqrt_lasso():

    np.random.seed(234923)

    # Based on the example in the Belloni paper
    n = 100
    p = 500
    ii = np.arange(p)
    cx = 0.5 ** np.abs(np.subtract.outer(ii, ii))
    cxr = np.linalg.cholesky(cx)

    x = np.dot(np.random.normal(size=(n, p)), cxr.T)
    b = np.zeros(p)
    b[0:5] = [1, 1, 1, 1, 1]

    from scipy.stats.distributions import norm

    alpha = 1.1 * np.sqrt(n) * norm.ppf(1 - 0.05 / (2 * p))

    # Use very low noise level for a unit test
    y = np.dot(x, b) + 0.25 * np.random.normal(size=n)

    # At low noise levels, the sqrt lasso should be around a
    # factor of 3 from the oracle without refit, and should
    # almost equal the oracle with refit.
    expected_oracle = {False: 3, True: 1}

    # Used for regression testing
    expected_params = {
        False: np.r_[
            0.87397122, 0.96051874, 0.9905915, 0.93868953, 0.90771773
        ],
        True: np.r_[0.95114241, 1.0302987, 1.01723074, 0.97587343, 0.99846403],
    }

    for refit in False, True:

        rslt = OLS(y, x).fit_regularized(
            method="sqrt_lasso", alpha=alpha, refit=refit
        )
        err = rslt.params - b
        numer = np.sqrt(np.dot(err, np.dot(cx, err)))

        oracle = OLS(y, x[:, 0:5]).fit()
        oracle_err = np.zeros(p)
        oracle_err[0:5] = oracle.params - b[0:5]
        denom = np.sqrt(np.dot(oracle_err, np.dot(cx, oracle_err)))

        # Check performance relative to oracle, should be around
        assert_allclose(
            numer / denom, expected_oracle[refit], rtol=0.5, atol=0.1
        )

        # Regression test the parameters
        assert_allclose(
            rslt.params[0:5], expected_params[refit], rtol=1e-5, atol=1e-5
        )


def test_bool_regressor(reset_randomstate):
    exog = np.random.randint(0, 2, size=(100, 2)).astype(bool)
    endog = np.random.standard_normal(100)
    bool_res = OLS(endog, exog).fit()
    res = OLS(endog, exog.astype(np.double)).fit()
    assert_allclose(bool_res.params, res.params)


def test_ols_constant(reset_randomstate):
    y = np.random.standard_normal((200))
    x = np.ones((200, 1))
    res = OLS(y, x).fit()
    with pytest.warns(None) as recording:
        assert np.isnan(res.fvalue)
        assert np.isnan(res.f_pvalue)
    assert len(recording) == 0


def test_summary_no_constant():
    rs = np.random.RandomState(0)
    x = rs.standard_normal((100, 2))
    y = rs.standard_normal(100)
    summary = OLS(y, x).fit().summary()
    assert "R² is computed " in summary.as_text()