1"""
2Lowess testing suite.
3
4Expected outcomes are generated by R's lowess function given the same
5arguments. The R script test_lowess_r_outputs.R can be used to
6generate the expected outcomes.
7
8The delta tests utilize Silverman's motorcycle collision data,
9available in R's MASS package.
10"""
11
12import os
13
14import numpy as np
15import pytest
16from numpy.testing import (
17    assert_almost_equal,
18    assert_,
19    assert_raises,
20    assert_equal,
21)
22
23from statsmodels.nonparametric.smoothers_lowess import lowess
24
25# Number of decimals to test equality with.
26# The default is 7.
27curdir = os.path.dirname(os.path.abspath(__file__))
28rpath = os.path.join(curdir, "results")
29
30
31class TestLowess(object):
32    def test_import(self):
33        # this does not work
34        # from statsmodels.api.nonparametric import lowess as lowess1
35        import statsmodels.api as sm
36
37        lowess1 = sm.nonparametric.lowess
38        assert_(lowess is lowess1)
39
40    def test_flat(self):
41        test_data = {
42            "x": np.arange(20),
43            "y": np.zeros(20),
44            "out": np.zeros(20),
45        }
46        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
47        actual_lowess = lowess(test_data["y"], test_data["x"])
48        assert_almost_equal(expected_lowess, actual_lowess, 7)
49
50    def test_range(self):
51        test_data = {
52            "x": np.arange(20),
53            "y": np.arange(20),
54            "out": np.arange(20),
55        }
56        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
57        actual_lowess = lowess(test_data["y"], test_data["x"])
58        assert_almost_equal(expected_lowess, actual_lowess, 7)
59
60    @staticmethod
61    def generate(name, fname, x="x", y="y", out="out", kwargs=None, decimal=7):
62        kwargs = {} if kwargs is None else kwargs
63        data = np.genfromtxt(
64            os.path.join(rpath, fname), delimiter=",", names=True
65        )
66        assert_almost_equal.description = name
67        if callable(kwargs):
68            kwargs = kwargs(data)
69        result = lowess(data[y], data[x], **kwargs)
70        expect = np.array([data[x], data[out]]).T
71        assert_almost_equal(result, expect, decimal)
72
73    # TODO: Refactor as parametrized test once nose is permanently dropped
74    def test_simple(self):
75        self.generate("test_simple", "test_lowess_simple.csv")
76
77    def test_iter_0(self):
78        self.generate(
79            "test_iter_0",
80            "test_lowess_iter.csv",
81            out="out_0",
82            kwargs={"it": 0},
83        )
84
85    def test_iter_0_3(self):
86        self.generate(
87            "test_iter_0",
88            "test_lowess_iter.csv",
89            out="out_3",
90            kwargs={"it": 3},
91        )
92
93    def test_frac_2_3(self):
94        self.generate(
95            "test_frac_2_3",
96            "test_lowess_frac.csv",
97            out="out_2_3",
98            kwargs={"frac": 2.0 / 3},
99        )
100
101    def test_frac_1_5(self):
102        self.generate(
103            "test_frac_1_5",
104            "test_lowess_frac.csv",
105            out="out_1_5",
106            kwargs={"frac": 1.0 / 5},
107        )
108
109    def test_delta_0(self):
110        self.generate(
111            "test_delta_0",
112            "test_lowess_delta.csv",
113            out="out_0",
114            kwargs={"frac": 0.1},
115        )
116
117    def test_delta_rdef(self):
118        self.generate(
119            "test_delta_Rdef",
120            "test_lowess_delta.csv",
121            out="out_Rdef",
122            kwargs=lambda data: {
123                "frac": 0.1,
124                "delta": 0.01 * np.ptp(data["x"]),
125            },
126        )
127
128    def test_delta_1(self):
129        self.generate(
130            "test_delta_1",
131            "test_lowess_delta.csv",
132            out="out_1",
133            kwargs={"frac": 0.1, "delta": 1 + 1e-10},
134            decimal=10,
135        )
136
137    def test_options(self):
138        rfile = os.path.join(rpath, "test_lowess_simple.csv")
139        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
140        y, x = test_data["y"], test_data["x"]
141        res1_fitted = test_data["out"]
142        expected_lowess = np.array([test_data["x"], test_data["out"]]).T
143
144        # check skip sorting
145        actual_lowess1 = lowess(y, x, is_sorted=True)
146        assert_almost_equal(actual_lowess1, expected_lowess, decimal=13)
147
148        # check skip missing
149        actual_lowess = lowess(y, x, is_sorted=True, missing="none")
150        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
151
152        # check order/index, returns yfitted only
153        actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False)
154        assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13)
155
156        # check returns yfitted only
157        actual_lowess = lowess(
158            y, x, return_sorted=False, missing="none", is_sorted=True
159        )
160        assert_almost_equal(actual_lowess, actual_lowess1[:, 1], decimal=13)
161
162        # check integer input
163        actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True)
164        actual_lowess1 = lowess(np.round(y), x, is_sorted=True)
165        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
166        assert_(actual_lowess.dtype is np.dtype(float))
167        # this will also have duplicate x
168        actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True)
169        actual_lowess1 = lowess(y, np.round(x), is_sorted=True)
170        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
171        assert_(actual_lowess.dtype is np.dtype(float))
172
173        # Test specifying xvals explicitly
174        perm_idx = np.arange(len(x) // 2)
175        np.random.shuffle(perm_idx)
176        actual_lowess2 = lowess(y, x, xvals=x[perm_idx], return_sorted=False)
177        assert_almost_equal(
178            actual_lowess[perm_idx, 1], actual_lowess2, decimal=13
179        )
180
181        # check with nans,  this changes the arrays
182        y[[5, 6]] = np.nan
183        x[3] = np.nan
184        mask_valid = np.isfinite(x) & np.isfinite(y)
185        # actual_lowess1[[3, 5, 6], 1] = np.nan
186        actual_lowess = lowess(y, x, is_sorted=True)
187        actual_lowess1 = lowess(y[mask_valid], x[mask_valid], is_sorted=True)
188        assert_almost_equal(actual_lowess, actual_lowess1, decimal=13)
189        assert_raises(ValueError, lowess, y, x, missing="raise")
190
191        perm_idx = np.arange(len(x))
192        np.random.shuffle(perm_idx)
193        yperm = y[perm_idx]
194        xperm = x[perm_idx]
195        actual_lowess2 = lowess(yperm, xperm, is_sorted=False)
196        assert_almost_equal(actual_lowess, actual_lowess2, decimal=13)
197
198        actual_lowess3 = lowess(
199            yperm, xperm, is_sorted=False, return_sorted=False
200        )
201        mask_valid = np.isfinite(xperm) & np.isfinite(yperm)
202        assert_equal(np.isnan(actual_lowess3), ~mask_valid)
203        # get valid sorted smoothed y from actual_lowess3
204        sort_idx = np.argsort(xperm)
205        yhat = actual_lowess3[sort_idx]
206        yhat = yhat[np.isfinite(yhat)]
207        assert_almost_equal(yhat, actual_lowess2[:, 1], decimal=13)
208
209        # Test specifying xvals explicitly, now with nans
210        perm_idx = np.arange(actual_lowess.shape[0])
211        actual_lowess4 = lowess(
212            y, x, xvals=actual_lowess[perm_idx, 0], return_sorted=False
213        )
214        assert_almost_equal(
215            actual_lowess[perm_idx, 1], actual_lowess4, decimal=13
216        )
217
218    def test_duplicate_xs(self):
219        # see 2449
220        # Generate cases with many duplicate x values
221        x = [0] + [1] * 100 + [2] * 100 + [3]
222        y = x + np.random.normal(size=len(x)) * 1e-8
223        result = lowess(y, x, frac=50 / len(x), it=1)
224        # fit values should be approximately averages of values at
225        # a particular fit, which in this case are just equal to x
226        assert_almost_equal(result[1:-1, 1], x[1:-1], decimal=7)
227
228    def test_spike(self):
229        # see 7700
230        # Create a curve that is easy to fit at first but gets harder further along.
231        # This used to give an outlier bad fit at position 961
232        x = np.linspace(0, 10, 1001)
233        y = np.cos(x ** 2 / 5)
234        result = lowess(y, x, frac=11 / len(x), it=1)
235        assert_(np.all(result[:, 1] > np.min(y) - 0.1))
236        assert_(np.all(result[:, 1] < np.max(y) + 0.1))
237
238    def test_exog_predict(self):
239        rfile = os.path.join(rpath, "test_lowess_simple.csv")
240        test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True)
241        y, x = test_data["y"], test_data["x"]
242        target = lowess(y, x, is_sorted=True)
243
244        # Test specifying exog_predict explicitly
245        perm_idx = np.arange(len(x) // 2)
246        np.random.shuffle(perm_idx)
247        actual_lowess = lowess(y, x, xvals=x[perm_idx], missing="none")
248        assert_almost_equal(target[perm_idx, 1], actual_lowess, decimal=13)
249
250        target_it0 = lowess(y, x, return_sorted=False, it=0)
251        actual_lowess2 = lowess(y, x, xvals=x[perm_idx], it=0)
252        assert_almost_equal(target_it0[perm_idx], actual_lowess2, decimal=13)
253
254        # Check nans in exog_predict
255        with pytest.raises(ValueError):
256            lowess(y, x, xvals=np.array([np.nan, 5, 3]), missing="raise")
257
258        # With is_sorted=True
259        actual_lowess3 = lowess(y, x, xvals=x, is_sorted=True)
260        assert_equal(actual_lowess3, target[:, 1])
261
262        # check with nans,  this changes the arrays
263        y[[5, 6]] = np.nan
264        x[3] = np.nan
265        target = lowess(y, x, is_sorted=True)
266
267        # Test specifying exog_predict explicitly, now with nans
268        perm_idx = np.arange(target.shape[0])
269        actual_lowess1 = lowess(y, x, xvals=target[perm_idx, 0])
270        assert_almost_equal(target[perm_idx, 1], actual_lowess1, decimal=13)
271
272        # nans and missing='drop'
273        actual_lowess2 = lowess(y, x, xvals=x, missing="drop")
274        all_finite = np.isfinite(x) & np.isfinite(y)
275        assert_equal(actual_lowess2[all_finite], target[:, 1])
276
277        # Dimensional check
278        with pytest.raises(ValueError):
279            lowess(y, x, xvals=np.array([[5], [10]]))
280
281
282def test_returns_inputs():
283    # see 1960
284    y = [0] * 10 + [1] * 10
285    x = np.arange(20)
286    result = lowess(y, x, frac=0.4)
287    assert_almost_equal(result, np.column_stack((x, y)))
288