1import numpy as np 2from numpy.testing import (assert_almost_equal, assert_equal, assert_raises) 3from statsmodels.base.transform import (BoxCox) 4from statsmodels.datasets import macrodata 5 6 7class TestTransform: 8 9 @classmethod 10 def setup_class(cls): 11 data = macrodata.load_pandas() 12 cls.x = data.data['realgdp'].values 13 cls.bc = BoxCox() 14 15 def test_nonpositive(self): 16 # Testing negative values 17 y = [1, -1, 1] 18 assert_raises(ValueError, self.bc.transform_boxcox, y) 19 20 # Testing nonzero 21 y = [1, 0, 1] 22 assert_raises(ValueError, self.bc.transform_boxcox, y) 23 24 def test_invalid_bounds(self): 25 # more than two bounds 26 assert_raises(ValueError, self.bc._est_lambda, self.x, (-3, 2, 3)) 27 28 # upper bound <= lower bound 29 assert_raises(ValueError, self.bc._est_lambda, self.x, (2, -1)) 30 31 def test_unclear_methods(self): 32 # Both _est_lambda and untransform have a method argument that should 33 # be tested. 34 assert_raises(ValueError, self.bc._est_lambda, 35 self.x, (-1, 2), 'test') 36 assert_raises(ValueError, self.bc.untransform_boxcox, 37 self.x, 1, 'test') 38 39 def test_unclear_scale_parameter(self): 40 # bc.guerrero allows for 'mad' and 'sd', for the MAD and Standard 41 # Deviation, respectively 42 assert_raises(ValueError, self.bc._est_lambda, 43 self.x, scale='test') 44 45 # Next, check if mad/sd work: 46 self.bc._est_lambda(self.x, scale='mad') 47 self.bc._est_lambda(self.x, scale='MAD') 48 49 self.bc._est_lambda(self.x, scale='sd') 50 self.bc._est_lambda(self.x, scale='SD') 51 52 def test_valid_guerrero(self): 53 # `l <- BoxCox.lambda(x, method="guerrero")` on a ts object 54 # with frequency 4 (BoxCox.lambda defaults to 2, but we use 55 # Guerrero and Perera (2004) as a guideline) 56 lmbda = self.bc._est_lambda(self.x, method='guerrero', window_length=4) 57 assert_almost_equal(lmbda, 0.507624, 4) 58 59 # `l <- BoxCox.lambda(x, method="guerrero")` with the default grouping 60 # parameter (namely, window_length=2). 61 lmbda = self.bc._est_lambda(self.x, method='guerrero', window_length=2) 62 assert_almost_equal(lmbda, 0.513893, 4) 63 64 def test_guerrero_robust_scale(self): 65 # The lambda is derived from a manual check of the values for the MAD. 66 # Compare also the result for the standard deviation on R=4: 0.5076, 67 # i.e. almost the same value. 68 lmbda = self.bc._est_lambda(self.x, scale='mad') 69 assert_almost_equal(lmbda, 0.488621, 4) 70 71 def test_loglik_lambda_estimation(self): 72 # 0.2 is the value returned by `BoxCox.lambda(x, method="loglik")` 73 lmbda = self.bc._est_lambda(self.x, method='loglik') 74 assert_almost_equal(lmbda, 0.2, 1) 75 76 def test_boxcox_transformation_methods(self): 77 # testing estimated lambda vs. provided. Should result in almost 78 # the same transformed data. Value taken from R. 79 y_transformed_no_lambda = self.bc.transform_boxcox(self.x) 80 y_transformed_lambda = self.bc.transform_boxcox(self.x, 0.507624) 81 82 assert_almost_equal(y_transformed_no_lambda[0], 83 y_transformed_lambda[0], 3) 84 85 # a perfectly increasing set has a constant variance over the entire 86 # series, hence stabilising should result in the same scale: lmbda = 1. 87 y, lmbda = self.bc.transform_boxcox(np.arange(1, 100)) 88 assert_almost_equal(lmbda, 1., 5) 89 90 def test_zero_lambda(self): 91 # zero lambda should be a log transform. 92 y_transform_zero_lambda, lmbda = self.bc.transform_boxcox(self.x, 0.) 93 94 assert_equal(lmbda, 0.) 95 assert_almost_equal(y_transform_zero_lambda, np.log(self.x), 5) 96 97 def test_naive_back_transformation(self): 98 # test both transformations functions -> 0. and .5 99 y_zero_lambda = self.bc.transform_boxcox(self.x, 0.) 100 y_half_lambda = self.bc.transform_boxcox(self.x, .5) 101 102 y_zero_lambda_un = self.bc.untransform_boxcox(*y_zero_lambda, 103 method='naive') 104 y_half_lambda_un = self.bc.untransform_boxcox(*y_half_lambda, 105 method='naive') 106 107 assert_almost_equal(self.x, y_zero_lambda_un, 5) 108 assert_almost_equal(self.x, y_half_lambda_un, 5) 109