1""" 2Lowess testing suite. 3 4Expected outcomes are generated by R's lowess function given the same 5arguments. The R script test_lowess_r_outputs.R can be used to 6generate the expected outcomes. 7 8The delta tests utilize Silverman's motorcycle collision data, 9available in R's MASS package. 10""" 11 12import os 13 14import numpy as np 15import pytest 16from numpy.testing import ( 17 assert_almost_equal, 18 assert_, 19 assert_raises, 20 assert_equal, 21) 22 23from statsmodels.nonparametric.smoothers_lowess import lowess 24 25# Number of decimals to test equality with. 26# The default is 7. 27curdir = os.path.dirname(os.path.abspath(__file__)) 28rpath = os.path.join(curdir, "results") 29 30 31class TestLowess(object): 32 def test_import(self): 33 # this does not work 34 # from statsmodels.api.nonparametric import lowess as lowess1 35 import statsmodels.api as sm 36 37 lowess1 = sm.nonparametric.lowess 38 assert_(lowess is lowess1) 39 40 def test_flat(self): 41 test_data = { 42 "x": np.arange(20), 43 "y": np.zeros(20), 44 "out": np.zeros(20), 45 } 46 expected_lowess = np.array([test_data["x"], test_data["out"]]).T 47 actual_lowess = lowess(test_data["y"], test_data["x"]) 48 assert_almost_equal(expected_lowess, actual_lowess, 7) 49 50 def test_range(self): 51 test_data = { 52 "x": np.arange(20), 53 "y": np.arange(20), 54 "out": np.arange(20), 55 } 56 expected_lowess = np.array([test_data["x"], test_data["out"]]).T 57 actual_lowess = lowess(test_data["y"], test_data["x"]) 58 assert_almost_equal(expected_lowess, actual_lowess, 7) 59 60 @staticmethod 61 def generate(name, fname, x="x", y="y", out="out", kwargs=None, decimal=7): 62 kwargs = {} if kwargs is None else kwargs 63 data = np.genfromtxt( 64 os.path.join(rpath, fname), delimiter=",", names=True 65 ) 66 assert_almost_equal.description = name 67 if callable(kwargs): 68 kwargs = kwargs(data) 69 result = lowess(data[y], data[x], **kwargs) 70 expect = np.array([data[x], data[out]]).T 71 assert_almost_equal(result, expect, decimal) 72 73 # TODO: Refactor as parametrized test once nose is permanently dropped 74 def test_simple(self): 75 self.generate("test_simple", "test_lowess_simple.csv") 76 77 def test_iter_0(self): 78 self.generate( 79 "test_iter_0", 80 "test_lowess_iter.csv", 81 out="out_0", 82 kwargs={"it": 0}, 83 ) 84 85 def test_iter_0_3(self): 86 self.generate( 87 "test_iter_0", 88 "test_lowess_iter.csv", 89 out="out_3", 90 kwargs={"it": 3}, 91 ) 92 93 def test_frac_2_3(self): 94 self.generate( 95 "test_frac_2_3", 96 "test_lowess_frac.csv", 97 out="out_2_3", 98 kwargs={"frac": 2.0 / 3}, 99 ) 100 101 def test_frac_1_5(self): 102 self.generate( 103 "test_frac_1_5", 104 "test_lowess_frac.csv", 105 out="out_1_5", 106 kwargs={"frac": 1.0 / 5}, 107 ) 108 109 def test_delta_0(self): 110 self.generate( 111 "test_delta_0", 112 "test_lowess_delta.csv", 113 out="out_0", 114 kwargs={"frac": 0.1}, 115 ) 116 117 def test_delta_rdef(self): 118 self.generate( 119 "test_delta_Rdef", 120 "test_lowess_delta.csv", 121 out="out_Rdef", 122 kwargs=lambda data: { 123 "frac": 0.1, 124 "delta": 0.01 * np.ptp(data["x"]), 125 }, 126 ) 127 128 def test_delta_1(self): 129 self.generate( 130 "test_delta_1", 131 "test_lowess_delta.csv", 132 out="out_1", 133 kwargs={"frac": 0.1, "delta": 1 + 1e-10}, 134 decimal=10, 135 ) 136 137 def test_options(self): 138 rfile = os.path.join(rpath, "test_lowess_simple.csv") 139 test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) 140 y, x = test_data["y"], test_data["x"] 141 res1_fitted = test_data["out"] 142 expected_lowess = np.array([test_data["x"], test_data["out"]]).T 143 144 # check skip sorting 145 actual_lowess1 = lowess(y, x, is_sorted=True) 146 assert_almost_equal(actual_lowess1, expected_lowess, decimal=13) 147 148 # check skip missing 149 actual_lowess = lowess(y, x, is_sorted=True, missing="none") 150 assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) 151 152 # check order/index, returns yfitted only 153 actual_lowess = lowess(y[::-1], x[::-1], return_sorted=False) 154 assert_almost_equal(actual_lowess, actual_lowess1[::-1, 1], decimal=13) 155 156 # check returns yfitted only 157 actual_lowess = lowess( 158 y, x, return_sorted=False, missing="none", is_sorted=True 159 ) 160 assert_almost_equal(actual_lowess, actual_lowess1[:, 1], decimal=13) 161 162 # check integer input 163 actual_lowess = lowess(np.round(y).astype(int), x, is_sorted=True) 164 actual_lowess1 = lowess(np.round(y), x, is_sorted=True) 165 assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) 166 assert_(actual_lowess.dtype is np.dtype(float)) 167 # this will also have duplicate x 168 actual_lowess = lowess(y, np.round(x).astype(int), is_sorted=True) 169 actual_lowess1 = lowess(y, np.round(x), is_sorted=True) 170 assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) 171 assert_(actual_lowess.dtype is np.dtype(float)) 172 173 # Test specifying xvals explicitly 174 perm_idx = np.arange(len(x) // 2) 175 np.random.shuffle(perm_idx) 176 actual_lowess2 = lowess(y, x, xvals=x[perm_idx], return_sorted=False) 177 assert_almost_equal( 178 actual_lowess[perm_idx, 1], actual_lowess2, decimal=13 179 ) 180 181 # check with nans, this changes the arrays 182 y[[5, 6]] = np.nan 183 x[3] = np.nan 184 mask_valid = np.isfinite(x) & np.isfinite(y) 185 # actual_lowess1[[3, 5, 6], 1] = np.nan 186 actual_lowess = lowess(y, x, is_sorted=True) 187 actual_lowess1 = lowess(y[mask_valid], x[mask_valid], is_sorted=True) 188 assert_almost_equal(actual_lowess, actual_lowess1, decimal=13) 189 assert_raises(ValueError, lowess, y, x, missing="raise") 190 191 perm_idx = np.arange(len(x)) 192 np.random.shuffle(perm_idx) 193 yperm = y[perm_idx] 194 xperm = x[perm_idx] 195 actual_lowess2 = lowess(yperm, xperm, is_sorted=False) 196 assert_almost_equal(actual_lowess, actual_lowess2, decimal=13) 197 198 actual_lowess3 = lowess( 199 yperm, xperm, is_sorted=False, return_sorted=False 200 ) 201 mask_valid = np.isfinite(xperm) & np.isfinite(yperm) 202 assert_equal(np.isnan(actual_lowess3), ~mask_valid) 203 # get valid sorted smoothed y from actual_lowess3 204 sort_idx = np.argsort(xperm) 205 yhat = actual_lowess3[sort_idx] 206 yhat = yhat[np.isfinite(yhat)] 207 assert_almost_equal(yhat, actual_lowess2[:, 1], decimal=13) 208 209 # Test specifying xvals explicitly, now with nans 210 perm_idx = np.arange(actual_lowess.shape[0]) 211 actual_lowess4 = lowess( 212 y, x, xvals=actual_lowess[perm_idx, 0], return_sorted=False 213 ) 214 assert_almost_equal( 215 actual_lowess[perm_idx, 1], actual_lowess4, decimal=13 216 ) 217 218 def test_duplicate_xs(self): 219 # see 2449 220 # Generate cases with many duplicate x values 221 x = [0] + [1] * 100 + [2] * 100 + [3] 222 y = x + np.random.normal(size=len(x)) * 1e-8 223 result = lowess(y, x, frac=50 / len(x), it=1) 224 # fit values should be approximately averages of values at 225 # a particular fit, which in this case are just equal to x 226 assert_almost_equal(result[1:-1, 1], x[1:-1], decimal=7) 227 228 def test_spike(self): 229 # see 7700 230 # Create a curve that is easy to fit at first but gets harder further along. 231 # This used to give an outlier bad fit at position 961 232 x = np.linspace(0, 10, 1001) 233 y = np.cos(x ** 2 / 5) 234 result = lowess(y, x, frac=11 / len(x), it=1) 235 assert_(np.all(result[:, 1] > np.min(y) - 0.1)) 236 assert_(np.all(result[:, 1] < np.max(y) + 0.1)) 237 238 def test_exog_predict(self): 239 rfile = os.path.join(rpath, "test_lowess_simple.csv") 240 test_data = np.genfromtxt(open(rfile, "rb"), delimiter=",", names=True) 241 y, x = test_data["y"], test_data["x"] 242 target = lowess(y, x, is_sorted=True) 243 244 # Test specifying exog_predict explicitly 245 perm_idx = np.arange(len(x) // 2) 246 np.random.shuffle(perm_idx) 247 actual_lowess = lowess(y, x, xvals=x[perm_idx], missing="none") 248 assert_almost_equal(target[perm_idx, 1], actual_lowess, decimal=13) 249 250 target_it0 = lowess(y, x, return_sorted=False, it=0) 251 actual_lowess2 = lowess(y, x, xvals=x[perm_idx], it=0) 252 assert_almost_equal(target_it0[perm_idx], actual_lowess2, decimal=13) 253 254 # Check nans in exog_predict 255 with pytest.raises(ValueError): 256 lowess(y, x, xvals=np.array([np.nan, 5, 3]), missing="raise") 257 258 # With is_sorted=True 259 actual_lowess3 = lowess(y, x, xvals=x, is_sorted=True) 260 assert_equal(actual_lowess3, target[:, 1]) 261 262 # check with nans, this changes the arrays 263 y[[5, 6]] = np.nan 264 x[3] = np.nan 265 target = lowess(y, x, is_sorted=True) 266 267 # Test specifying exog_predict explicitly, now with nans 268 perm_idx = np.arange(target.shape[0]) 269 actual_lowess1 = lowess(y, x, xvals=target[perm_idx, 0]) 270 assert_almost_equal(target[perm_idx, 1], actual_lowess1, decimal=13) 271 272 # nans and missing='drop' 273 actual_lowess2 = lowess(y, x, xvals=x, missing="drop") 274 all_finite = np.isfinite(x) & np.isfinite(y) 275 assert_equal(actual_lowess2[all_finite], target[:, 1]) 276 277 # Dimensional check 278 with pytest.raises(ValueError): 279 lowess(y, x, xvals=np.array([[5], [10]])) 280 281 282def test_returns_inputs(): 283 # see 1960 284 y = [0] * 10 + [1] * 10 285 x = np.arange(20) 286 result = lowess(y, x, frac=0.4) 287 assert_almost_equal(result, np.column_stack((x, y))) 288