1import warnings
2
3import numpy as np
4import pandas as pd
5import pytest
6from numpy.testing import assert_equal
7
8from statsmodels.iolib.summary2 import summary_col
9from statsmodels.tools.tools import add_constant
10from statsmodels.regression.linear_model import OLS
11
12
13class TestSummaryLatex(object):
14
15    def test_summarycol(self):
16        # Test for latex output of summary_col object
17        desired = r'''
18\begin{table}
19\caption{}
20\label{}
21\begin{center}
22\begin{tabular}{lll}
23\hline
24               & y I      & y II      \\
25\hline
26const          & 7.7500   & 12.4231   \\
27               & (1.1058) & (3.1872)  \\
28x1             & -0.7500  & -1.5769   \\
29               & (0.2368) & (0.6826)  \\
30R-squared      & 0.7697   & 0.6401    \\
31R-squared Adj. & 0.6930   & 0.5202    \\
32\hline
33\end{tabular}
34\end{center}
35\end{table}
36'''
37        x = [1, 5, 7, 3, 5]
38        x = add_constant(x)
39        y1 = [6, 4, 2, 7, 4]
40        y2 = [8, 5, 0, 12, 4]
41        reg1 = OLS(y1, x).fit()
42        reg2 = OLS(y2, x).fit()
43        actual = summary_col([reg1, reg2]).as_latex()
44        actual = '\n%s\n' % actual
45        assert_equal(desired, actual)
46
47    def test_summarycol_float_format(self):
48        # Test for latex output of summary_col object
49        desired = r"""
50==========================
51                y I   y II
52--------------------------
53const          7.7   12.4
54               (1.1) (3.2)
55x1             -0.7  -1.6
56               (0.2) (0.7)
57R-squared      0.8   0.6
58R-squared Adj. 0.7   0.5
59==========================
60Standard errors in
61parentheses.
62"""  # noqa:W291
63        x = [1, 5, 7, 3, 5]
64        x = add_constant(x)
65        y1 = [6, 4, 2, 7, 4]
66        y2 = [8, 5, 0, 12, 4]
67        reg1 = OLS(y1, x).fit()
68        reg2 = OLS(y2, x).fit()
69        actual = summary_col([reg1, reg2], float_format='%0.1f').as_text()
70        actual = '%s\n' % actual
71        assert_equal(actual, desired)
72
73        starred = summary_col([reg1, reg2], stars=True, float_format='%0.1f')
74        assert "7.7***" in str(starred)
75        assert "12.4**" in str(starred)
76        assert "12.4***" not in str(starred)
77
78    def test_summarycol_drop_omitted(self):
79        # gh-3702
80        x = [1, 5, 7, 3, 5]
81        x = add_constant(x)
82        x2 = np.concatenate([x, np.array([[3], [9], [-1], [4], [0]])], 1)
83        y1 = [6, 4, 2, 7, 4]
84        y2 = [8, 5, 0, 12, 4]
85        reg1 = OLS(y1, x).fit()
86        reg2 = OLS(y2, x2).fit()
87        actual = summary_col([reg1, reg2], regressor_order=['const', 'x1'],
88                             drop_omitted=True)
89        assert 'x2' not in str(actual)
90        actual = summary_col([reg1, reg2], regressor_order=['x1'],
91                             drop_omitted=False)
92        assert 'const' in str(actual)
93        assert 'x2' in str(actual)
94
95    def test_summary_col_ordering_preserved(self):
96        # gh-3767
97        x = [1, 5, 7, 3, 5]
98        x = add_constant(x)
99        x2 = np.concatenate([x, np.array([[3], [9], [-1], [4], [0]])], 1)
100        x2 = pd.DataFrame(x2, columns=['const', 'b', 'a'])
101        y1 = [6, 4, 2, 7, 4]
102        y2 = [8, 5, 0, 12, 4]
103        reg1 = OLS(y1, x2).fit()
104        reg2 = OLS(y2, x2).fit()
105
106        info_dict = {'R2': lambda x: '{:.3f}'.format(int(x.rsquared)),
107                     'N': lambda x: '{0:d}'.format(int(x.nobs))}
108        original = actual = summary_col([reg1, reg2], float_format='%0.4f')
109        actual = summary_col([reg1, reg2], regressor_order=['a', 'b'],
110                             float_format='%0.4f',
111                             info_dict=info_dict)
112        variables = ('const', 'b', 'a')
113        for line in str(original).split('\n'):
114            for variable in variables:
115                if line.startswith(variable):
116                    assert line in str(actual)
117
118    def test_OLSsummary(self):
119        # Test that latex output of regular OLS output still contains
120        # multiple tables
121
122        x = [1, 5, 7, 3, 5]
123        x = add_constant(x)
124        y1 = [6, 4, 2, 7, 4]
125        reg1 = OLS(y1, x).fit()
126        with warnings.catch_warnings():
127            warnings.simplefilter("ignore")
128            actual = reg1.summary().as_latex()
129        string_to_find = r'''\end{tabular}
130\begin{tabular}'''
131        result = string_to_find in actual
132        assert (result is True)
133
134
135def test_ols_summary_rsquared_label():
136    # Check that the "uncentered" label is correctly added after rsquared
137    x = [1, 5, 7, 3, 5, 2, 5, 3]
138    y = [6, 4, 2, 7, 4, 9, 10, 2]
139    reg_with_constant = OLS(y, add_constant(x)).fit()
140    r2_str = 'R-squared:'
141    with pytest.warns(UserWarning):
142        assert r2_str in str(reg_with_constant.summary2())
143    with pytest.warns(UserWarning):
144        assert r2_str in str(reg_with_constant.summary())
145
146    reg_without_constant = OLS(y, x, hasconst=False).fit()
147    r2_str = 'R-squared (uncentered):'
148    with pytest.warns(UserWarning):
149        assert r2_str in str(reg_without_constant.summary2())
150    with pytest.warns(UserWarning):
151        assert r2_str in str(reg_without_constant.summary())
152
153
154def test_summary_col_r2():
155    # GH 6578
156    y = [1, 1, 4, 2] * 4
157    x = add_constant([1, 2, 3, 4] * 4)
158    mod = OLS(endog=y, exog=x).fit()
159    table = summary_col(results=mod)
160    assert "R-squared  " in str(table)
161    assert "R-squared Adj." in str(table)
162