1""" 2Statistical tools for time series analysis 3""" 4from __future__ import annotations 5 6from statsmodels.compat.numpy import lstsq 7from statsmodels.compat.pandas import deprecate_kwarg 8from statsmodels.compat.python import lzip, Literal 9from statsmodels.compat.scipy import _next_regular 10 11from typing import Tuple 12import warnings 13 14import numpy as np 15from numpy.linalg import LinAlgError 16import pandas as pd 17from scipy import stats 18from scipy.interpolate import interp1d 19from scipy.signal import correlate 20 21from statsmodels.regression.linear_model import OLS, yule_walker 22from statsmodels.tools.sm_exceptions import ( 23 CollinearityWarning, 24 InfeasibleTestError, 25 InterpolationWarning, 26 MissingDataError, 27) 28from statsmodels.tools.tools import Bunch, add_constant 29from statsmodels.tools.validation import ( 30 array_like, 31 bool_like, 32 dict_like, 33 float_like, 34 int_like, 35 string_like, 36) 37from statsmodels.tsa._bds import bds 38from statsmodels.tsa._innovations import innovations_algo, innovations_filter 39from statsmodels.tsa.adfvalues import mackinnoncrit, mackinnonp 40from statsmodels.tsa.tsatools import add_trend, lagmat, lagmat2ds, rename_trend 41 42__all__ = [ 43 "acovf", 44 "acf", 45 "pacf", 46 "pacf_yw", 47 "pacf_ols", 48 "ccovf", 49 "ccf", 50 "q_stat", 51 "coint", 52 "arma_order_select_ic", 53 "adfuller", 54 "kpss", 55 "bds", 56 "pacf_burg", 57 "innovations_algo", 58 "innovations_filter", 59 "levinson_durbin_pacf", 60 "levinson_durbin", 61 "zivot_andrews", 62 "range_unit_root_test", 63] 64 65SQRTEPS = np.sqrt(np.finfo(np.double).eps) 66 67 68def _autolag( 69 mod, 70 endog, 71 exog, 72 startlag, 73 maxlag, 74 method, 75 modargs=(), 76 fitargs=(), 77 regresults=False, 78): 79 """ 80 Returns the results for the lag length that maximizes the info criterion. 81 82 Parameters 83 ---------- 84 mod : Model class 85 Model estimator class 86 endog : array_like 87 nobs array containing endogenous variable 88 exog : array_like 89 nobs by (startlag + maxlag) array containing lags and possibly other 90 variables 91 startlag : int 92 The first zero-indexed column to hold a lag. See Notes. 93 maxlag : int 94 The highest lag order for lag length selection. 95 method : {"aic", "bic", "t-stat"} 96 aic - Akaike Information Criterion 97 bic - Bayes Information Criterion 98 t-stat - Based on last lag 99 modargs : tuple, optional 100 args to pass to model. See notes. 101 fitargs : tuple, optional 102 args to pass to fit. See notes. 103 regresults : bool, optional 104 Flag indicating to return optional return results 105 106 Returns 107 ------- 108 icbest : float 109 Best information criteria. 110 bestlag : int 111 The lag length that maximizes the information criterion. 112 results : dict, optional 113 Dictionary containing all estimation results 114 115 Notes 116 ----- 117 Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs) 118 where i goes from lagstart to lagstart+maxlag+1. Therefore, lags are 119 assumed to be in contiguous columns from low to high lag length with 120 the highest lag in the last column. 121 """ 122 # TODO: can tcol be replaced by maxlag + 2? 123 # TODO: This could be changed to laggedRHS and exog keyword arguments if 124 # this will be more general. 125 126 results = {} 127 method = method.lower() 128 for lag in range(startlag, startlag + maxlag + 1): 129 mod_instance = mod(endog, exog[:, :lag], *modargs) 130 results[lag] = mod_instance.fit() 131 132 if method == "aic": 133 icbest, bestlag = min((v.aic, k) for k, v in results.items()) 134 elif method == "bic": 135 icbest, bestlag = min((v.bic, k) for k, v in results.items()) 136 elif method == "t-stat": 137 # stop = stats.norm.ppf(.95) 138 stop = 1.6448536269514722 139 # Default values to ensure that always set 140 bestlag = startlag + maxlag 141 icbest = 0.0 142 for lag in range(startlag + maxlag, startlag - 1, -1): 143 icbest = np.abs(results[lag].tvalues[-1]) 144 bestlag = lag 145 if np.abs(icbest) >= stop: 146 # Break for first lag with a significant t-stat 147 break 148 else: 149 raise ValueError(f"Information Criterion {method} not understood.") 150 151 if not regresults: 152 return icbest, bestlag 153 else: 154 return icbest, bestlag, results 155 156 157# this needs to be converted to a class like HetGoldfeldQuandt, 158# 3 different returns are a mess 159# See: 160# Ng and Perron(2001), Lag length selection and the construction of unit root 161# tests with good size and power, Econometrica, Vol 69 (6) pp 1519-1554 162# TODO: include drift keyword, only valid with regression == "c" 163# just changes the distribution of the test statistic to a t distribution 164# TODO: autolag is untested 165def adfuller( 166 x, 167 maxlag: int | None = None, 168 regression="c", 169 autolag="AIC", 170 store=False, 171 regresults=False, 172): 173 """ 174 Augmented Dickey-Fuller unit root test. 175 176 The Augmented Dickey-Fuller test can be used to test for a unit root in a 177 univariate process in the presence of serial correlation. 178 179 Parameters 180 ---------- 181 x : array_like, 1d 182 The data series to test. 183 maxlag : {None, int} 184 Maximum lag which is included in test, default value of 185 12*(nobs/100)^{1/4} is used when ``None``. 186 regression : {"c","ct","ctt","n"} 187 Constant and trend order to include in regression. 188 189 * "c" : constant only (default). 190 * "ct" : constant and trend. 191 * "ctt" : constant, and linear and quadratic trend. 192 * "n" : no constant, no trend. 193 194 autolag : {"AIC", "BIC", "t-stat", None} 195 Method to use when automatically determining the lag length among the 196 values 0, 1, ..., maxlag. 197 198 * If "AIC" (default) or "BIC", then the number of lags is chosen 199 to minimize the corresponding information criterion. 200 * "t-stat" based choice of maxlag. Starts with maxlag and drops a 201 lag until the t-statistic on the last lag length is significant 202 using a 5%-sized test. 203 * If None, then the number of included lags is set to maxlag. 204 store : bool 205 If True, then a result instance is returned additionally to 206 the adf statistic. Default is False. 207 regresults : bool, optional 208 If True, the full regression results are returned. Default is False. 209 210 Returns 211 ------- 212 adf : float 213 The test statistic. 214 pvalue : float 215 MacKinnon's approximate p-value based on MacKinnon (1994, 2010). 216 usedlag : int 217 The number of lags used. 218 nobs : int 219 The number of observations used for the ADF regression and calculation 220 of the critical values. 221 critical values : dict 222 Critical values for the test statistic at the 1 %, 5 %, and 10 % 223 levels. Based on MacKinnon (2010). 224 icbest : float 225 The maximized information criterion if autolag is not None. 226 resstore : ResultStore, optional 227 A dummy class with results attached as attributes. 228 229 Notes 230 ----- 231 The null hypothesis of the Augmented Dickey-Fuller is that there is a unit 232 root, with the alternative that there is no unit root. If the pvalue is 233 above a critical size, then we cannot reject that there is a unit root. 234 235 The p-values are obtained through regression surface approximation from 236 MacKinnon 1994, but using the updated 2010 tables. If the p-value is close 237 to significant, then the critical values should be used to judge whether 238 to reject the null. 239 240 The autolag option and maxlag for it are described in Greene. 241 242 References 243 ---------- 244 .. [1] W. Green. "Econometric Analysis," 5th ed., Pearson, 2003. 245 246 .. [2] Hamilton, J.D. "Time Series Analysis". Princeton, 1994. 247 248 .. [3] MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for 249 unit-root and cointegration tests. `Journal of Business and Economic 250 Statistics` 12, 167-76. 251 252 .. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests." Queen"s 253 University, Dept of Economics, Working Papers. Available at 254 http://ideas.repec.org/p/qed/wpaper/1227.html 255 256 Examples 257 -------- 258 See example notebook 259 """ 260 x = array_like(x, "x") 261 maxlag = int_like(maxlag, "maxlag", optional=True) 262 regression = rename_trend(regression) 263 regression = string_like( 264 regression, "regression", options=("c", "ct", "ctt", "n") 265 ) 266 autolag = string_like( 267 autolag, "autolag", optional=True, options=("aic", "bic", "t-stat") 268 ) 269 store = bool_like(store, "store") 270 regresults = bool_like(regresults, "regresults") 271 272 if regresults: 273 store = True 274 275 trenddict = {None: "n", 0: "c", 1: "ct", 2: "ctt"} 276 if regression is None or isinstance(regression, int): 277 regression = trenddict[regression] 278 regression = regression.lower() 279 nobs = x.shape[0] 280 281 ntrend = len(regression) if regression != "n" else 0 282 if maxlag is None: 283 # from Greene referencing Schwert 1989 284 maxlag = int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0))) 285 # -1 for the diff 286 maxlag = min(nobs // 2 - ntrend - 1, maxlag) 287 if maxlag < 0: 288 raise ValueError( 289 "sample size is too short to use selected " 290 "regression component" 291 ) 292 elif maxlag > nobs // 2 - ntrend - 1: 293 raise ValueError( 294 "maxlag must be less than (nobs/2 - 1 - ntrend) " 295 "where n trend is the number of included " 296 "deterministic regressors" 297 ) 298 xdiff = np.diff(x) 299 xdall = lagmat(xdiff[:, None], maxlag, trim="both", original="in") 300 nobs = xdall.shape[0] 301 302 xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x 303 xdshort = xdiff[-nobs:] 304 305 if store: 306 from statsmodels.stats.diagnostic import ResultsStore 307 308 resstore = ResultsStore() 309 if autolag: 310 if regression != "n": 311 fullRHS = add_trend(xdall, regression, prepend=True) 312 else: 313 fullRHS = xdall 314 startlag = fullRHS.shape[1] - xdall.shape[1] + 1 315 # 1 for level 316 # search for lag length with smallest information criteria 317 # Note: use the same number of observations to have comparable IC 318 # aic and bic: smaller is better 319 320 if not regresults: 321 icbest, bestlag = _autolag( 322 OLS, xdshort, fullRHS, startlag, maxlag, autolag 323 ) 324 else: 325 icbest, bestlag, alres = _autolag( 326 OLS, 327 xdshort, 328 fullRHS, 329 startlag, 330 maxlag, 331 autolag, 332 regresults=regresults, 333 ) 334 resstore.autolag_results = alres 335 336 bestlag -= startlag # convert to lag not column index 337 338 # rerun ols with best autolag 339 xdall = lagmat(xdiff[:, None], bestlag, trim="both", original="in") 340 nobs = xdall.shape[0] 341 xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x 342 xdshort = xdiff[-nobs:] 343 usedlag = bestlag 344 else: 345 usedlag = maxlag 346 icbest = None 347 if regression != "n": 348 resols = OLS( 349 xdshort, add_trend(xdall[:, : usedlag + 1], regression) 350 ).fit() 351 else: 352 resols = OLS(xdshort, xdall[:, : usedlag + 1]).fit() 353 354 adfstat = resols.tvalues[0] 355 # adfstat = (resols.params[0]-1.0)/resols.bse[0] 356 # the "asymptotically correct" z statistic is obtained as 357 # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1) 358 # I think this is the statistic that is used for series that are integrated 359 # for orders higher than I(1), ie., not ADF but cointegration tests. 360 361 # Get approx p-value and critical values 362 pvalue = mackinnonp(adfstat, regression=regression, N=1) 363 critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs) 364 critvalues = { 365 "1%": critvalues[0], 366 "5%": critvalues[1], 367 "10%": critvalues[2], 368 } 369 if store: 370 resstore.resols = resols 371 resstore.maxlag = maxlag 372 resstore.usedlag = usedlag 373 resstore.adfstat = adfstat 374 resstore.critvalues = critvalues 375 resstore.nobs = nobs 376 resstore.H0 = ( 377 "The coefficient on the lagged level equals 1 - " "unit root" 378 ) 379 resstore.HA = "The coefficient on the lagged level < 1 - stationary" 380 resstore.icbest = icbest 381 resstore._str = "Augmented Dickey-Fuller Test Results" 382 return adfstat, pvalue, critvalues, resstore 383 else: 384 if not autolag: 385 return adfstat, pvalue, usedlag, nobs, critvalues 386 else: 387 return adfstat, pvalue, usedlag, nobs, critvalues, icbest 388 389 390@deprecate_kwarg("unbiased", "adjusted") 391def acovf(x, adjusted=False, demean=True, fft=True, missing="none", nlag=None): 392 """ 393 Estimate autocovariances. 394 395 Parameters 396 ---------- 397 x : array_like 398 Time series data. Must be 1d. 399 adjusted : bool, default False 400 If True, then denominators is n-k, otherwise n. 401 demean : bool, default True 402 If True, then subtract the mean x from each element of x. 403 fft : bool, default True 404 If True, use FFT convolution. This method should be preferred 405 for long time series. 406 missing : str, default "none" 407 A string in ["none", "raise", "conservative", "drop"] specifying how 408 the NaNs are to be treated. "none" performs no checks. "raise" raises 409 an exception if NaN values are found. "drop" removes the missing 410 observations and then estimates the autocovariances treating the 411 non-missing as contiguous. "conservative" computes the autocovariance 412 using nan-ops so that nans are removed when computing the mean 413 and cross-products that are used to estimate the autocovariance. 414 When using "conservative", n is set to the number of non-missing 415 observations. 416 nlag : {int, None}, default None 417 Limit the number of autocovariances returned. Size of returned 418 array is nlag + 1. Setting nlag when fft is False uses a simple, 419 direct estimator of the autocovariances that only computes the first 420 nlag + 1 values. This can be much faster when the time series is long 421 and only a small number of autocovariances are needed. 422 423 Returns 424 ------- 425 ndarray 426 The estimated autocovariances. 427 428 References 429 ----------- 430 .. [1] Parzen, E., 1963. On spectral analysis with missing observations 431 and amplitude modulation. Sankhya: The Indian Journal of 432 Statistics, Series A, pp.383-392. 433 """ 434 adjusted = bool_like(adjusted, "adjusted") 435 demean = bool_like(demean, "demean") 436 fft = bool_like(fft, "fft", optional=False) 437 missing = string_like( 438 missing, "missing", options=("none", "raise", "conservative", "drop") 439 ) 440 nlag = int_like(nlag, "nlag", optional=True) 441 442 x = array_like(x, "x", ndim=1) 443 444 missing = missing.lower() 445 if missing == "none": 446 deal_with_masked = False 447 else: 448 deal_with_masked = has_missing(x) 449 if deal_with_masked: 450 if missing == "raise": 451 raise MissingDataError("NaNs were encountered in the data") 452 notmask_bool = ~np.isnan(x) # bool 453 if missing == "conservative": 454 # Must copy for thread safety 455 x = x.copy() 456 x[~notmask_bool] = 0 457 else: # "drop" 458 x = x[notmask_bool] # copies non-missing 459 notmask_int = notmask_bool.astype(int) # int 460 461 if demean and deal_with_masked: 462 # whether "drop" or "conservative": 463 xo = x - x.sum() / notmask_int.sum() 464 if missing == "conservative": 465 xo[~notmask_bool] = 0 466 elif demean: 467 xo = x - x.mean() 468 else: 469 xo = x 470 471 n = len(x) 472 lag_len = nlag 473 if nlag is None: 474 lag_len = n - 1 475 elif nlag > n - 1: 476 raise ValueError("nlag must be smaller than nobs - 1") 477 478 if not fft and nlag is not None: 479 acov = np.empty(lag_len + 1) 480 acov[0] = xo.dot(xo) 481 for i in range(lag_len): 482 acov[i + 1] = xo[i + 1 :].dot(xo[: -(i + 1)]) 483 if not deal_with_masked or missing == "drop": 484 if adjusted: 485 acov /= n - np.arange(lag_len + 1) 486 else: 487 acov /= n 488 else: 489 if adjusted: 490 divisor = np.empty(lag_len + 1, dtype=np.int64) 491 divisor[0] = notmask_int.sum() 492 for i in range(lag_len): 493 divisor[i + 1] = notmask_int[i + 1 :].dot( 494 notmask_int[: -(i + 1)] 495 ) 496 divisor[divisor == 0] = 1 497 acov /= divisor 498 else: # biased, missing data but npt "drop" 499 acov /= notmask_int.sum() 500 return acov 501 502 if adjusted and deal_with_masked and missing == "conservative": 503 d = np.correlate(notmask_int, notmask_int, "full") 504 d[d == 0] = 1 505 elif adjusted: 506 xi = np.arange(1, n + 1) 507 d = np.hstack((xi, xi[:-1][::-1])) 508 elif deal_with_masked: 509 # biased and NaNs given and ("drop" or "conservative") 510 d = notmask_int.sum() * np.ones(2 * n - 1) 511 else: # biased and no NaNs or missing=="none" 512 d = n * np.ones(2 * n - 1) 513 514 if fft: 515 nobs = len(xo) 516 n = _next_regular(2 * nobs + 1) 517 Frf = np.fft.fft(xo, n=n) 518 acov = np.fft.ifft(Frf * np.conjugate(Frf))[:nobs] / d[nobs - 1 :] 519 acov = acov.real 520 else: 521 acov = np.correlate(xo, xo, "full")[n - 1 :] / d[n - 1 :] 522 523 if nlag is not None: 524 # Copy to allow gc of full array rather than view 525 return acov[: lag_len + 1].copy() 526 return acov 527 528 529def q_stat(x, nobs): 530 """ 531 Compute Ljung-Box Q Statistic. 532 533 Parameters 534 ---------- 535 x : array_like 536 Array of autocorrelation coefficients. Can be obtained from acf. 537 nobs : int, optional 538 Number of observations in the entire sample (ie., not just the length 539 of the autocorrelation function results. 540 541 Returns 542 ------- 543 q-stat : ndarray 544 Ljung-Box Q-statistic for autocorrelation parameters. 545 p-value : ndarray 546 P-value of the Q statistic. 547 548 Notes 549 ----- 550 Designed to be used with acf. 551 """ 552 x = array_like(x, "x") 553 nobs = int_like(nobs, "nobs") 554 555 ret = ( 556 nobs 557 * (nobs + 2) 558 * np.cumsum((1.0 / (nobs - np.arange(1, len(x) + 1))) * x ** 2) 559 ) 560 chi2 = stats.chi2.sf(ret, np.arange(1, len(x) + 1)) 561 return ret, chi2 562 563 564# NOTE: Changed unbiased to False 565# see for example 566# http://www.itl.nist.gov/div898/handbook/eda/section3/autocopl.htm 567def acf( 568 x, 569 adjusted=False, 570 nlags=None, 571 qstat=False, 572 fft=True, 573 alpha=None, 574 bartlett_confint=True, 575 missing="none", 576): 577 """ 578 Calculate the autocorrelation function. 579 580 Parameters 581 ---------- 582 x : array_like 583 The time series data. 584 adjusted : bool, default False 585 If True, then denominators for autocovariance are n-k, otherwise n. 586 nlags : int, optional 587 Number of lags to return autocorrelation for. If not provided, 588 uses min(10 * np.log10(nobs), nobs - 1). 589 qstat : bool, default False 590 If True, returns the Ljung-Box q statistic for each autocorrelation 591 coefficient. See q_stat for more information. 592 fft : bool, default True 593 If True, computes the ACF via FFT. 594 alpha : scalar, default None 595 If a number is given, the confidence intervals for the given level are 596 returned. For instance if alpha=.05, 95 % confidence intervals are 597 returned where the standard deviation is computed according to 598 Bartlett"s formula. 599 bartlett_confint : bool, default True 600 Confidence intervals for ACF values are generally placed at 2 601 standard errors around r_k. The formula used for standard error 602 depends upon the situation. If the autocorrelations are being used 603 to test for randomness of residuals as part of the ARIMA routine, 604 the standard errors are determined assuming the residuals are white 605 noise. The approximate formula for any lag is that standard error 606 of each r_k = 1/sqrt(N). See section 9.4 of [2] for more details on 607 the 1/sqrt(N) result. For more elementary discussion, see section 5.3.2 608 in [3]. 609 For the ACF of raw data, the standard error at a lag k is 610 found as if the right model was an MA(k-1). This allows the possible 611 interpretation that if all autocorrelations past a certain lag are 612 within the limits, the model might be an MA of order defined by the 613 last significant autocorrelation. In this case, a moving average 614 model is assumed for the data and the standard errors for the 615 confidence intervals should be generated using Bartlett's formula. 616 For more details on Bartlett formula result, see section 7.2 in [2]. 617 missing : str, default "none" 618 A string in ["none", "raise", "conservative", "drop"] specifying how 619 the NaNs are to be treated. "none" performs no checks. "raise" raises 620 an exception if NaN values are found. "drop" removes the missing 621 observations and then estimates the autocovariances treating the 622 non-missing as contiguous. "conservative" computes the autocovariance 623 using nan-ops so that nans are removed when computing the mean 624 and cross-products that are used to estimate the autocovariance. 625 When using "conservative", n is set to the number of non-missing 626 observations. 627 628 Returns 629 ------- 630 acf : ndarray 631 The autocorrelation function. 632 confint : ndarray, optional 633 Confidence intervals for the ACF. Returned if alpha is not None. 634 qstat : ndarray, optional 635 The Ljung-Box Q-Statistic. Returned if q_stat is True. 636 pvalues : ndarray, optional 637 The p-values associated with the Q-statistics. Returned if q_stat is 638 True. 639 640 Notes 641 ----- 642 The acf at lag 0 (ie., 1) is returned. 643 644 For very long time series it is recommended to use fft convolution instead. 645 When fft is False uses a simple, direct estimator of the autocovariances 646 that only computes the first nlag + 1 values. This can be much faster when 647 the time series is long and only a small number of autocovariances are 648 needed. 649 650 If adjusted is true, the denominator for the autocovariance is adjusted 651 for the loss of data. 652 653 References 654 ---------- 655 .. [1] Parzen, E., 1963. On spectral analysis with missing observations 656 and amplitude modulation. Sankhya: The Indian Journal of 657 Statistics, Series A, pp.383-392. 658 .. [2] Brockwell and Davis, 1987. Time Series Theory and Methods 659 .. [3] Brockwell and Davis, 2010. Introduction to Time Series and 660 Forecasting, 2nd edition. 661 """ 662 adjusted = bool_like(adjusted, "adjusted") 663 nlags = int_like(nlags, "nlags", optional=True) 664 qstat = bool_like(qstat, "qstat") 665 fft = bool_like(fft, "fft", optional=False) 666 alpha = float_like(alpha, "alpha", optional=True) 667 missing = string_like( 668 missing, "missing", options=("none", "raise", "conservative", "drop") 669 ) 670 x = array_like(x, "x") 671 # TODO: should this shrink for missing="drop" and NaNs in x? 672 nobs = x.shape[0] 673 if nlags is None: 674 nlags = min(int(10 * np.log10(nobs)), nobs - 1) 675 676 avf = acovf(x, adjusted=adjusted, demean=True, fft=fft, missing=missing) 677 acf = avf[: nlags + 1] / avf[0] 678 if not (qstat or alpha): 679 return acf 680 if alpha is not None: 681 if bartlett_confint: 682 varacf = np.ones_like(acf) / nobs 683 varacf[0] = 0 684 varacf[1] = 1.0 / nobs 685 varacf[2:] *= 1 + 2 * np.cumsum(acf[1:-1] ** 2) 686 else: 687 varacf = 1.0 / len(x) 688 interval = stats.norm.ppf(1 - alpha / 2.0) * np.sqrt(varacf) 689 confint = np.array(lzip(acf - interval, acf + interval)) 690 if not qstat: 691 return acf, confint 692 if qstat: 693 qstat, pvalue = q_stat(acf[1:], nobs=nobs) # drop lag 0 694 if alpha is not None: 695 return acf, confint, qstat, pvalue 696 else: 697 return acf, qstat, pvalue 698 699 700def pacf_yw(x, nlags=None, method="adjusted"): 701 """ 702 Partial autocorrelation estimated with non-recursive yule_walker. 703 704 Parameters 705 ---------- 706 x : array_like 707 The observations of time series for which pacf is calculated. 708 nlags : int, optional 709 Number of lags to return autocorrelation for. If not provided, 710 uses min(10 * np.log10(nobs), nobs - 1). 711 method : {"adjusted", "mle"}, default "adjusted" 712 The method for the autocovariance calculations in yule walker. 713 714 Returns 715 ------- 716 ndarray 717 The partial autocorrelations, maxlag+1 elements. 718 719 See Also 720 -------- 721 statsmodels.tsa.stattools.pacf 722 Partial autocorrelation estimation. 723 statsmodels.tsa.stattools.pacf_ols 724 Partial autocorrelation estimation using OLS. 725 statsmodels.tsa.stattools.pacf_burg 726 Partial autocorrelation estimation using Burg"s method. 727 728 Notes 729 ----- 730 This solves yule_walker for each desired lag and contains 731 currently duplicate calculations. 732 """ 733 x = array_like(x, "x") 734 nlags = int_like(nlags, "nlags", optional=True) 735 nobs = x.shape[0] 736 if nlags is None: 737 nlags = min(int(10 * np.log10(nobs)), nobs - 1) 738 739 method = string_like(method, "method", options=("adjusted", "mle")) 740 pacf = [1.0] 741 for k in range(1, nlags + 1): 742 pacf.append(yule_walker(x, k, method=method)[0][-1]) 743 return np.array(pacf) 744 745 746def pacf_burg(x, nlags=None, demean=True): 747 """ 748 Calculate Burg"s partial autocorrelation estimator. 749 750 Parameters 751 ---------- 752 x : array_like 753 Observations of time series for which pacf is calculated. 754 nlags : int, optional 755 Number of lags to return autocorrelation for. If not provided, 756 uses min(10 * np.log10(nobs), nobs - 1). 757 demean : bool, optional 758 Flag indicating to demean that data. Set to False if x has been 759 previously demeaned. 760 761 Returns 762 ------- 763 pacf : ndarray 764 Partial autocorrelations for lags 0, 1, ..., nlag. 765 sigma2 : ndarray 766 Residual variance estimates where the value in position m is the 767 residual variance in an AR model that includes m lags. 768 769 See Also 770 -------- 771 statsmodels.tsa.stattools.pacf 772 Partial autocorrelation estimation. 773 statsmodels.tsa.stattools.pacf_yw 774 Partial autocorrelation estimation using Yule-Walker. 775 statsmodels.tsa.stattools.pacf_ols 776 Partial autocorrelation estimation using OLS. 777 778 References 779 ---------- 780 .. [1] Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series 781 and forecasting. Springer. 782 """ 783 x = array_like(x, "x") 784 if demean: 785 x = x - x.mean() 786 nobs = x.shape[0] 787 p = nlags if nlags is not None else min(int(10 * np.log10(nobs)), nobs - 1) 788 if p > nobs - 1: 789 raise ValueError("nlags must be smaller than nobs - 1") 790 d = np.zeros(p + 1) 791 d[0] = 2 * x.dot(x) 792 pacf = np.zeros(p + 1) 793 u = x[::-1].copy() 794 v = x[::-1].copy() 795 d[1] = u[:-1].dot(u[:-1]) + v[1:].dot(v[1:]) 796 pacf[1] = 2 / d[1] * v[1:].dot(u[:-1]) 797 last_u = np.empty_like(u) 798 last_v = np.empty_like(v) 799 for i in range(1, p): 800 last_u[:] = u 801 last_v[:] = v 802 u[1:] = last_u[:-1] - pacf[i] * last_v[1:] 803 v[1:] = last_v[1:] - pacf[i] * last_u[:-1] 804 d[i + 1] = (1 - pacf[i] ** 2) * d[i] - v[i] ** 2 - u[-1] ** 2 805 pacf[i + 1] = 2 / d[i + 1] * v[i + 1 :].dot(u[i:-1]) 806 sigma2 = (1 - pacf ** 2) * d / (2.0 * (nobs - np.arange(0, p + 1))) 807 pacf[0] = 1 # Insert the 0 lag partial autocorrel 808 809 return pacf, sigma2 810 811 812@deprecate_kwarg("unbiased", "adjusted") 813def pacf_ols(x, nlags=None, efficient=True, adjusted=False): 814 """ 815 Calculate partial autocorrelations via OLS. 816 817 Parameters 818 ---------- 819 x : array_like 820 Observations of time series for which pacf is calculated. 821 nlags : int, optional 822 Number of lags to return autocorrelation for. If not provided, 823 uses min(10 * np.log10(nobs), nobs - 1). 824 efficient : bool, optional 825 If true, uses the maximum number of available observations to compute 826 each partial autocorrelation. If not, uses the same number of 827 observations to compute all pacf values. 828 adjusted : bool, optional 829 Adjust each partial autocorrelation by n / (n - lag). 830 831 Returns 832 ------- 833 ndarray 834 The partial autocorrelations, (maxlag,) array corresponding to lags 835 0, 1, ..., maxlag. 836 837 See Also 838 -------- 839 statsmodels.tsa.stattools.pacf 840 Partial autocorrelation estimation. 841 statsmodels.tsa.stattools.pacf_yw 842 Partial autocorrelation estimation using Yule-Walker. 843 statsmodels.tsa.stattools.pacf_burg 844 Partial autocorrelation estimation using Burg"s method. 845 846 Notes 847 ----- 848 This solves a separate OLS estimation for each desired lag using method in 849 [1]_. Setting efficient to True has two effects. First, it uses 850 `nobs - lag` observations of estimate each pacf. Second, it re-estimates 851 the mean in each regression. If efficient is False, then the data are first 852 demeaned, and then `nobs - maxlag` observations are used to estimate each 853 partial autocorrelation. 854 855 The inefficient estimator appears to have better finite sample properties. 856 This option should only be used in time series that are covariance 857 stationary. 858 859 OLS estimation of the pacf does not guarantee that all pacf values are 860 between -1 and 1. 861 862 References 863 ---------- 864 .. [1] Box, G. E., Jenkins, G. M., Reinsel, G. C., & Ljung, G. M. (2015). 865 Time series analysis: forecasting and control. John Wiley & Sons, p. 66 866 """ 867 x = array_like(x, "x") 868 nlags = int_like(nlags, "nlags", optional=True) 869 efficient = bool_like(efficient, "efficient") 870 adjusted = bool_like(adjusted, "adjusted") 871 nobs = x.shape[0] 872 if nlags is None: 873 nlags = min(int(10 * np.log10(nobs)), nobs - 1) 874 875 pacf = np.empty(nlags + 1) 876 pacf[0] = 1.0 877 if efficient: 878 xlags, x0 = lagmat(x, nlags, original="sep") 879 xlags = add_constant(xlags) 880 for k in range(1, nlags + 1): 881 params = lstsq(xlags[k:, : k + 1], x0[k:], rcond=None)[0] 882 pacf[k] = params[-1] 883 else: 884 x = x - np.mean(x) 885 # Create a single set of lags for multivariate OLS 886 xlags, x0 = lagmat(x, nlags, original="sep", trim="both") 887 for k in range(1, nlags + 1): 888 params = lstsq(xlags[:, :k], x0, rcond=None)[0] 889 # Last coefficient corresponds to PACF value (see [1]) 890 pacf[k] = params[-1] 891 892 if adjusted: 893 pacf *= nobs / (nobs - np.arange(nlags + 1)) 894 895 return pacf 896 897 898def pacf(x, nlags=None, method="ywadjusted", alpha=None): 899 """ 900 Partial autocorrelation estimate. 901 902 Parameters 903 ---------- 904 x : array_like 905 Observations of time series for which pacf is calculated. 906 nlags : int, optional 907 Number of lags to return autocorrelation for. If not provided, 908 uses min(10 * np.log10(nobs), nobs // 2 - 1). 909 method : str, default "ywunbiased" 910 Specifies which method for the calculations to use. 911 912 - "yw" or "ywadjusted" : Yule-Walker with sample-size adjustment in 913 denominator for acovf. Default. 914 - "ywm" or "ywmle" : Yule-Walker without adjustment. 915 - "ols" : regression of time series on lags of it and on constant. 916 - "ols-inefficient" : regression of time series on lags using a single 917 common sample to estimate all pacf coefficients. 918 - "ols-adjusted" : regression of time series on lags with a bias 919 adjustment. 920 - "ld" or "ldadjusted" : Levinson-Durbin recursion with bias 921 correction. 922 - "ldb" or "ldbiased" : Levinson-Durbin recursion without bias 923 correction. 924 925 alpha : float, optional 926 If a number is given, the confidence intervals for the given level are 927 returned. For instance if alpha=.05, 95 % confidence intervals are 928 returned where the standard deviation is computed according to 929 1/sqrt(len(x)). 930 931 Returns 932 ------- 933 pacf : ndarray 934 Partial autocorrelations, nlags elements, including lag zero. 935 confint : ndarray, optional 936 Confidence intervals for the PACF. Returned if confint is not None. 937 938 See Also 939 -------- 940 statsmodels.tsa.stattools.acf 941 Estimate the autocorrelation function. 942 statsmodels.tsa.stattools.pacf 943 Partial autocorrelation estimation. 944 statsmodels.tsa.stattools.pacf_yw 945 Partial autocorrelation estimation using Yule-Walker. 946 statsmodels.tsa.stattools.pacf_ols 947 Partial autocorrelation estimation using OLS. 948 statsmodels.tsa.stattools.pacf_burg 949 Partial autocorrelation estimation using Burg"s method. 950 951 Notes 952 ----- 953 Based on simulation evidence across a range of low-order ARMA models, 954 the best methods based on root MSE are Yule-Walker (MLW), Levinson-Durbin 955 (MLE) and Burg, respectively. The estimators with the lowest bias included 956 included these three in addition to OLS and OLS-adjusted. 957 958 Yule-Walker (adjusted) and Levinson-Durbin (adjusted) performed 959 consistently worse than the other options. 960 """ 961 nlags = int_like(nlags, "nlags", optional=True) 962 methods = ( 963 "ols", 964 "ols-inefficient", 965 "ols-adjusted", 966 "yw", 967 "ywa", 968 "ld", 969 "ywadjusted", 970 "yw_adjusted", 971 "ywm", 972 "ywmle", 973 "yw_mle", 974 "lda", 975 "ldadjusted", 976 "ld_adjusted", 977 "ldb", 978 "ldbiased", 979 "ld_biased", 980 ) 981 x = array_like(x, "x", maxdim=2) 982 method = string_like(method, "method", options=methods) 983 alpha = float_like(alpha, "alpha", optional=True) 984 985 nobs = x.shape[0] 986 if nlags is None: 987 nlags = min(int(10 * np.log10(nobs)), nobs // 2 - 1) 988 if nlags >= x.shape[0] // 2: 989 raise ValueError( 990 "Can only compute partial correlations for lags up to 50% of the " 991 f"sample size. The requested nlags {nlags} must be < " 992 f"{x.shape[0] // 2}." 993 ) 994 995 if method in ("ols", "ols-inefficient", "ols-adjusted"): 996 efficient = "inefficient" not in method 997 adjusted = "adjusted" in method 998 ret = pacf_ols(x, nlags=nlags, efficient=efficient, adjusted=adjusted) 999 elif method in ("yw", "ywa", "ywadjusted", "yw_adjusted"): 1000 ret = pacf_yw(x, nlags=nlags, method="adjusted") 1001 elif method in ("ywm", "ywmle", "yw_mle"): 1002 ret = pacf_yw(x, nlags=nlags, method="mle") 1003 elif method in ("ld", "lda", "ldadjusted", "ld_adjusted"): 1004 acv = acovf(x, adjusted=True, fft=False) 1005 ld_ = levinson_durbin(acv, nlags=nlags, isacov=True) 1006 ret = ld_[2] 1007 # inconsistent naming with ywmle 1008 else: # method in ("ldb", "ldbiased", "ld_biased") 1009 acv = acovf(x, adjusted=False, fft=False) 1010 ld_ = levinson_durbin(acv, nlags=nlags, isacov=True) 1011 ret = ld_[2] 1012 1013 if alpha is not None: 1014 varacf = 1.0 / len(x) # for all lags >=1 1015 interval = stats.norm.ppf(1.0 - alpha / 2.0) * np.sqrt(varacf) 1016 confint = np.array(lzip(ret - interval, ret + interval)) 1017 confint[0] = ret[0] # fix confidence interval for lag 0 to varpacf=0 1018 return ret, confint 1019 else: 1020 return ret 1021 1022 1023@deprecate_kwarg("unbiased", "adjusted") 1024def ccovf(x, y, adjusted=True, demean=True, fft=True): 1025 """ 1026 Calculate the crosscovariance between two series. 1027 1028 Parameters 1029 ---------- 1030 x, y : array_like 1031 The time series data to use in the calculation. 1032 adjusted : bool, optional 1033 If True, then denominators for crosscovariance is n-k, otherwise n. 1034 demean : bool, optional 1035 Flag indicating whether to demean x and y. 1036 fft : bool, default True 1037 If True, use FFT convolution. This method should be preferred 1038 for long time series. 1039 1040 Returns 1041 ------- 1042 ndarray 1043 The estimated crosscovariance function. 1044 """ 1045 x = array_like(x, "x") 1046 y = array_like(y, "y") 1047 adjusted = bool_like(adjusted, "adjusted") 1048 demean = bool_like(demean, "demean") 1049 fft = bool_like(fft, "fft", optional=False) 1050 1051 n = len(x) 1052 if demean: 1053 xo = x - x.mean() 1054 yo = y - y.mean() 1055 else: 1056 xo = x 1057 yo = y 1058 if adjusted: 1059 d = np.arange(n, 0, -1) 1060 else: 1061 d = n 1062 1063 method = "fft" if fft else "direct" 1064 return correlate(xo, yo, "full", method=method)[n - 1 :] / d 1065 1066 1067@deprecate_kwarg("unbiased", "adjusted") 1068def ccf(x, y, adjusted=True, fft=True): 1069 """ 1070 The cross-correlation function. 1071 1072 Parameters 1073 ---------- 1074 x, y : array_like 1075 The time series data to use in the calculation. 1076 adjusted : bool 1077 If True, then denominators for cross-correlation is n-k, otherwise n. 1078 fft : bool, default True 1079 If True, use FFT convolution. This method should be preferred 1080 for long time series. 1081 1082 Returns 1083 ------- 1084 ndarray 1085 The cross-correlation function of x and y. 1086 1087 Notes 1088 ----- 1089 If adjusted is true, the denominator for the autocovariance is adjusted. 1090 """ 1091 x = array_like(x, "x") 1092 y = array_like(y, "y") 1093 adjusted = bool_like(adjusted, "adjusted") 1094 fft = bool_like(fft, "fft", optional=False) 1095 1096 cvf = ccovf(x, y, adjusted=adjusted, demean=True, fft=fft) 1097 return cvf / (np.std(x) * np.std(y)) 1098 1099 1100# moved from sandbox.tsa.examples.try_ld_nitime, via nitime 1101# TODO: check what to return, for testing and trying out returns everything 1102def levinson_durbin(s, nlags=10, isacov=False): 1103 """ 1104 Levinson-Durbin recursion for autoregressive processes. 1105 1106 Parameters 1107 ---------- 1108 s : array_like 1109 If isacov is False, then this is the time series. If iasacov is true 1110 then this is interpreted as autocovariance starting with lag 0. 1111 nlags : int, optional 1112 The largest lag to include in recursion or order of the autoregressive 1113 process. 1114 isacov : bool, optional 1115 Flag indicating whether the first argument, s, contains the 1116 autocovariances or the data series. 1117 1118 Returns 1119 ------- 1120 sigma_v : float 1121 The estimate of the error variance. 1122 arcoefs : ndarray 1123 The estimate of the autoregressive coefficients for a model including 1124 nlags. 1125 pacf : ndarray 1126 The partial autocorrelation function. 1127 sigma : ndarray 1128 The entire sigma array from intermediate result, last value is sigma_v. 1129 phi : ndarray 1130 The entire phi array from intermediate result, last column contains 1131 autoregressive coefficients for AR(nlags). 1132 1133 Notes 1134 ----- 1135 This function returns currently all results, but maybe we drop sigma and 1136 phi from the returns. 1137 1138 If this function is called with the time series (isacov=False), then the 1139 sample autocovariance function is calculated with the default options 1140 (biased, no fft). 1141 """ 1142 s = array_like(s, "s") 1143 nlags = int_like(nlags, "nlags") 1144 isacov = bool_like(isacov, "isacov") 1145 1146 order = nlags 1147 1148 if isacov: 1149 sxx_m = s 1150 else: 1151 sxx_m = acovf(s, fft=False)[: order + 1] # not tested 1152 1153 phi = np.zeros((order + 1, order + 1), "d") 1154 sig = np.zeros(order + 1) 1155 # initial points for the recursion 1156 phi[1, 1] = sxx_m[1] / sxx_m[0] 1157 sig[1] = sxx_m[0] - phi[1, 1] * sxx_m[1] 1158 for k in range(2, order + 1): 1159 phi[k, k] = ( 1160 sxx_m[k] - np.dot(phi[1:k, k - 1], sxx_m[1:k][::-1]) 1161 ) / sig[k - 1] 1162 for j in range(1, k): 1163 phi[j, k] = phi[j, k - 1] - phi[k, k] * phi[k - j, k - 1] 1164 sig[k] = sig[k - 1] * (1 - phi[k, k] ** 2) 1165 1166 sigma_v = sig[-1] 1167 arcoefs = phi[1:, -1] 1168 pacf_ = np.diag(phi).copy() 1169 pacf_[0] = 1.0 1170 return sigma_v, arcoefs, pacf_, sig, phi # return everything 1171 1172 1173def levinson_durbin_pacf(pacf, nlags=None): 1174 """ 1175 Levinson-Durbin algorithm that returns the acf and ar coefficients. 1176 1177 Parameters 1178 ---------- 1179 pacf : array_like 1180 Partial autocorrelation array for lags 0, 1, ... p. 1181 nlags : int, optional 1182 Number of lags in the AR model. If omitted, returns coefficients from 1183 an AR(p) and the first p autocorrelations. 1184 1185 Returns 1186 ------- 1187 arcoefs : ndarray 1188 AR coefficients computed from the partial autocorrelations. 1189 acf : ndarray 1190 The acf computed from the partial autocorrelations. Array returned 1191 contains the autocorrelations corresponding to lags 0, 1, ..., p. 1192 1193 References 1194 ---------- 1195 .. [1] Brockwell, P.J. and Davis, R.A., 2016. Introduction to time series 1196 and forecasting. Springer. 1197 """ 1198 pacf = array_like(pacf, "pacf") 1199 nlags = int_like(nlags, "nlags", optional=True) 1200 pacf = np.squeeze(np.asarray(pacf)) 1201 1202 if pacf[0] != 1: 1203 raise ValueError( 1204 "The first entry of the pacf corresponds to lags 0 " 1205 "and so must be 1." 1206 ) 1207 pacf = pacf[1:] 1208 n = pacf.shape[0] 1209 if nlags is not None: 1210 if nlags > n: 1211 raise ValueError( 1212 "Must provide at least as many values from the " 1213 "pacf as the number of lags." 1214 ) 1215 pacf = pacf[:nlags] 1216 n = pacf.shape[0] 1217 1218 acf = np.zeros(n + 1) 1219 acf[1] = pacf[0] 1220 nu = np.cumprod(1 - pacf ** 2) 1221 arcoefs = pacf.copy() 1222 for i in range(1, n): 1223 prev = arcoefs[: -(n - i)].copy() 1224 arcoefs[: -(n - i)] = prev - arcoefs[i] * prev[::-1] 1225 acf[i + 1] = arcoefs[i] * nu[i - 1] + prev.dot(acf[1 : -(n - i)][::-1]) 1226 acf[0] = 1 1227 return arcoefs, acf 1228 1229 1230def breakvar_heteroskedasticity_test( 1231 resid, subset_length=1 / 3, alternative="two-sided", use_f=True 1232): 1233 r""" 1234 Test for heteroskedasticity of residuals 1235 1236 Tests whether the sum-of-squares in the first subset of the sample is 1237 significantly different than the sum-of-squares in the last subset 1238 of the sample. Analogous to a Goldfeld-Quandt test. The null hypothesis 1239 is of no heteroskedasticity. 1240 1241 Parameters 1242 ---------- 1243 resid : array_like 1244 Residuals of a time series model. 1245 The shape is 1d (nobs,) or 2d (nobs, nvars). 1246 subset_length : {int, float} 1247 Length of the subsets to test (h in Notes below). 1248 If a float in 0 < subset_length < 1, it is interpreted as fraction. 1249 Default is 1/3. 1250 alternative : str, 'increasing', 'decreasing' or 'two-sided' 1251 This specifies the alternative for the p-value calculation. Default 1252 is two-sided. 1253 use_f : bool, optional 1254 Whether or not to compare against the asymptotic distribution 1255 (chi-squared) or the approximate small-sample distribution (F). 1256 Default is True (i.e. default is to compare against an F 1257 distribution). 1258 1259 Returns 1260 ------- 1261 test_statistic : {float, ndarray} 1262 Test statistic(s) H(h). 1263 p_value : {float, ndarray} 1264 p-value(s) of test statistic(s). 1265 1266 Notes 1267 ----- 1268 The null hypothesis is of no heteroskedasticity. That means different 1269 things depending on which alternative is selected: 1270 1271 - Increasing: Null hypothesis is that the variance is not increasing 1272 throughout the sample; that the sum-of-squares in the later 1273 subsample is *not* greater than the sum-of-squares in the earlier 1274 subsample. 1275 - Decreasing: Null hypothesis is that the variance is not decreasing 1276 throughout the sample; that the sum-of-squares in the earlier 1277 subsample is *not* greater than the sum-of-squares in the later 1278 subsample. 1279 - Two-sided: Null hypothesis is that the variance is not changing 1280 throughout the sample. Both that the sum-of-squares in the earlier 1281 subsample is not greater than the sum-of-squares in the later 1282 subsample *and* that the sum-of-squares in the later subsample is 1283 not greater than the sum-of-squares in the earlier subsample. 1284 1285 For :math:`h = [T/3]`, the test statistic is: 1286 1287 .. math:: 1288 1289 H(h) = \sum_{t=T-h+1}^T \tilde v_t^2 1290 \Bigg / \sum_{t=1}^{h} \tilde v_t^2 1291 1292 This statistic can be tested against an :math:`F(h,h)` distribution. 1293 Alternatively, :math:`h H(h)` is asymptotically distributed according 1294 to :math:`\chi_h^2`; this second test can be applied by passing 1295 `use_f=False` as an argument. 1296 1297 See section 5.4 of [1]_ for the above formula and discussion, as well 1298 as additional details. 1299 1300 References 1301 ---------- 1302 .. [1] Harvey, Andrew C. 1990. *Forecasting, Structural Time Series* 1303 *Models and the Kalman Filter.* Cambridge University Press. 1304 """ 1305 squared_resid = np.asarray(resid, dtype=float) ** 2 1306 if squared_resid.ndim == 1: 1307 squared_resid = squared_resid.reshape(-1, 1) 1308 nobs = len(resid) 1309 1310 if 0 < subset_length < 1: 1311 h = int(np.round(nobs * subset_length)) 1312 elif type(subset_length) is int and subset_length >= 1: 1313 h = subset_length 1314 1315 numer_resid = squared_resid[-h:] 1316 numer_dof = (~np.isnan(numer_resid)).sum(axis=0) 1317 numer_squared_sum = np.nansum(numer_resid, axis=0) 1318 for i, dof in enumerate(numer_dof): 1319 if dof < 2: 1320 warnings.warn( 1321 "Early subset of data for variable %d" 1322 " has too few non-missing observations to" 1323 " calculate test statistic." % i 1324 ) 1325 numer_squared_sum[i] = np.nan 1326 1327 denom_resid = squared_resid[:h] 1328 denom_dof = (~np.isnan(denom_resid)).sum(axis=0) 1329 denom_squared_sum = np.nansum(denom_resid, axis=0) 1330 for i, dof in enumerate(denom_dof): 1331 if dof < 2: 1332 warnings.warn( 1333 "Later subset of data for variable %d" 1334 " has too few non-missing observations to" 1335 " calculate test statistic." % i 1336 ) 1337 denom_squared_sum[i] = np.nan 1338 1339 test_statistic = numer_squared_sum / denom_squared_sum 1340 1341 # Setup functions to calculate the p-values 1342 if use_f: 1343 from scipy.stats import f 1344 1345 pval_lower = lambda test_statistics: f.cdf( # noqa:E731 1346 test_statistics, numer_dof, denom_dof 1347 ) 1348 pval_upper = lambda test_statistics: f.sf( # noqa:E731 1349 test_statistics, numer_dof, denom_dof 1350 ) 1351 else: 1352 from scipy.stats import chi2 1353 1354 pval_lower = lambda test_statistics: chi2.cdf( # noqa:E731 1355 numer_dof * test_statistics, denom_dof 1356 ) 1357 pval_upper = lambda test_statistics: chi2.sf( # noqa:E731 1358 numer_dof * test_statistics, denom_dof 1359 ) 1360 1361 # Calculate the one- or two-sided p-values 1362 alternative = alternative.lower() 1363 if alternative in ["i", "inc", "increasing"]: 1364 p_value = pval_upper(test_statistic) 1365 elif alternative in ["d", "dec", "decreasing"]: 1366 test_statistic = 1.0 / test_statistic 1367 p_value = pval_upper(test_statistic) 1368 elif alternative in ["2", "2-sided", "two-sided"]: 1369 p_value = 2 * np.minimum( 1370 pval_lower(test_statistic), pval_upper(test_statistic) 1371 ) 1372 else: 1373 raise ValueError("Invalid alternative.") 1374 1375 if len(test_statistic) == 1: 1376 return test_statistic[0], p_value[0] 1377 1378 return test_statistic, p_value 1379 1380 1381def grangercausalitytests(x, maxlag, addconst=True, verbose=True): 1382 """ 1383 Four tests for granger non causality of 2 time series. 1384 1385 All four tests give similar results. `params_ftest` and `ssr_ftest` are 1386 equivalent based on F test which is identical to lmtest:grangertest in R. 1387 1388 Parameters 1389 ---------- 1390 x : array_like 1391 The data for test whether the time series in the second column Granger 1392 causes the time series in the first column. Missing values are not 1393 supported. 1394 maxlag : {int, Iterable[int]} 1395 If an integer, computes the test for all lags up to maxlag. If an 1396 iterable, computes the tests only for the lags in maxlag. 1397 addconst : bool 1398 Include a constant in the model. 1399 verbose : bool 1400 Print results. 1401 1402 Returns 1403 ------- 1404 dict 1405 All test results, dictionary keys are the number of lags. For each 1406 lag the values are a tuple, with the first element a dictionary with 1407 test statistic, pvalues, degrees of freedom, the second element are 1408 the OLS estimation results for the restricted model, the unrestricted 1409 model and the restriction (contrast) matrix for the parameter f_test. 1410 1411 Notes 1412 ----- 1413 TODO: convert to class and attach results properly 1414 1415 The Null hypothesis for grangercausalitytests is that the time series in 1416 the second column, x2, does NOT Granger cause the time series in the first 1417 column, x1. Grange causality means that past values of x2 have a 1418 statistically significant effect on the current value of x1, taking past 1419 values of x1 into account as regressors. We reject the null hypothesis 1420 that x2 does not Granger cause x1 if the pvalues are below a desired size 1421 of the test. 1422 1423 The null hypothesis for all four test is that the coefficients 1424 corresponding to past values of the second time series are zero. 1425 1426 `params_ftest`, `ssr_ftest` are based on F distribution 1427 1428 `ssr_chi2test`, `lrtest` are based on chi-square distribution 1429 1430 References 1431 ---------- 1432 .. [1] https://en.wikipedia.org/wiki/Granger_causality 1433 1434 .. [2] Greene: Econometric Analysis 1435 1436 Examples 1437 -------- 1438 >>> import statsmodels.api as sm 1439 >>> from statsmodels.tsa.stattools import grangercausalitytests 1440 >>> import numpy as np 1441 >>> data = sm.datasets.macrodata.load_pandas() 1442 >>> data = data.data[["realgdp", "realcons"]].pct_change().dropna() 1443 1444 All lags up to 4 1445 >>> gc_res = grangercausalitytests(data, 4) 1446 1447 Only lag 4 1448 >>> gc_res = grangercausalitytests(data, [4]) 1449 """ 1450 x = array_like(x, "x", ndim=2) 1451 if not np.isfinite(x).all(): 1452 raise ValueError("x contains NaN or inf values.") 1453 addconst = bool_like(addconst, "addconst") 1454 verbose = bool_like(verbose, "verbose") 1455 try: 1456 maxlag = int_like(maxlag, "maxlag") 1457 if maxlag <= 0: 1458 raise ValueError("maxlag must a a positive integer") 1459 lags = np.arange(1, maxlag + 1) 1460 except TypeError: 1461 lags = np.array([int(lag) for lag in maxlag]) 1462 maxlag = lags.max() 1463 if lags.min() <= 0 or lags.size == 0: 1464 raise ValueError( 1465 "maxlag must be a non-empty list containing only " 1466 "positive integers" 1467 ) 1468 1469 if x.shape[0] <= 3 * maxlag + int(addconst): 1470 raise ValueError( 1471 "Insufficient observations. Maximum allowable " 1472 "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1) 1473 ) 1474 1475 resli = {} 1476 1477 for mlg in lags: 1478 result = {} 1479 if verbose: 1480 print("\nGranger Causality") 1481 print("number of lags (no zero)", mlg) 1482 mxlg = mlg 1483 1484 # create lagmat of both time series 1485 dta = lagmat2ds(x, mxlg, trim="both", dropex=1) 1486 1487 # add constant 1488 if addconst: 1489 dtaown = add_constant(dta[:, 1 : (mxlg + 1)], prepend=False) 1490 dtajoint = add_constant(dta[:, 1:], prepend=False) 1491 if ( 1492 dtajoint.shape[1] == (dta.shape[1] - 1) 1493 or (dtajoint.max(0) == dtajoint.min(0)).sum() != 1 1494 ): 1495 raise InfeasibleTestError( 1496 "The x values include a column with constant values and so" 1497 " the test statistic cannot be computed." 1498 ) 1499 else: 1500 raise NotImplementedError("Not Implemented") 1501 # dtaown = dta[:, 1:mxlg] 1502 # dtajoint = dta[:, 1:] 1503 1504 # Run ols on both models without and with lags of second variable 1505 res2down = OLS(dta[:, 0], dtaown).fit() 1506 res2djoint = OLS(dta[:, 0], dtajoint).fit() 1507 1508 # print results 1509 # for ssr based tests see: 1510 # http://support.sas.com/rnd/app/examples/ets/granger/index.htm 1511 # the other tests are made-up 1512 1513 # Granger Causality test using ssr (F statistic) 1514 if res2djoint.model.k_constant: 1515 tss = res2djoint.centered_tss 1516 else: 1517 tss = res2djoint.centered_tss 1518 if ( 1519 tss == 0 1520 or res2djoint.ssr == 0 1521 or np.isnan(res2djoint.rsquared) 1522 or (res2djoint.ssr / tss) < np.finfo(float).eps 1523 or res2djoint.params.shape[0] != dtajoint.shape[1] 1524 ): 1525 raise InfeasibleTestError( 1526 "The Granger causality test statistic cannot be compute " 1527 "because the VAR has a perfect fit of the data." 1528 ) 1529 fgc1 = ( 1530 (res2down.ssr - res2djoint.ssr) 1531 / res2djoint.ssr 1532 / mxlg 1533 * res2djoint.df_resid 1534 ) 1535 if verbose: 1536 print( 1537 "ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d," 1538 " df_num=%d" 1539 % ( 1540 fgc1, 1541 stats.f.sf(fgc1, mxlg, res2djoint.df_resid), 1542 res2djoint.df_resid, 1543 mxlg, 1544 ) 1545 ) 1546 result["ssr_ftest"] = ( 1547 fgc1, 1548 stats.f.sf(fgc1, mxlg, res2djoint.df_resid), 1549 res2djoint.df_resid, 1550 mxlg, 1551 ) 1552 1553 # Granger Causality test using ssr (ch2 statistic) 1554 fgc2 = res2down.nobs * (res2down.ssr - res2djoint.ssr) / res2djoint.ssr 1555 if verbose: 1556 print( 1557 "ssr based chi2 test: chi2=%-8.4f, p=%-8.4f, " 1558 "df=%d" % (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) 1559 ) 1560 result["ssr_chi2test"] = (fgc2, stats.chi2.sf(fgc2, mxlg), mxlg) 1561 1562 # likelihood ratio test pvalue: 1563 lr = -2 * (res2down.llf - res2djoint.llf) 1564 if verbose: 1565 print( 1566 "likelihood ratio test: chi2=%-8.4f, p=%-8.4f, df=%d" 1567 % (lr, stats.chi2.sf(lr, mxlg), mxlg) 1568 ) 1569 result["lrtest"] = (lr, stats.chi2.sf(lr, mxlg), mxlg) 1570 1571 # F test that all lag coefficients of exog are zero 1572 rconstr = np.column_stack( 1573 (np.zeros((mxlg, mxlg)), np.eye(mxlg, mxlg), np.zeros((mxlg, 1))) 1574 ) 1575 ftres = res2djoint.f_test(rconstr) 1576 if verbose: 1577 print( 1578 "parameter F test: F=%-8.4f, p=%-8.4f, df_denom=%d," 1579 " df_num=%d" 1580 % (ftres.fvalue, ftres.pvalue, ftres.df_denom, ftres.df_num) 1581 ) 1582 result["params_ftest"] = ( 1583 np.squeeze(ftres.fvalue)[()], 1584 np.squeeze(ftres.pvalue)[()], 1585 ftres.df_denom, 1586 ftres.df_num, 1587 ) 1588 1589 resli[mxlg] = (result, [res2down, res2djoint, rconstr]) 1590 1591 return resli 1592 1593 1594def coint( 1595 y0, 1596 y1, 1597 trend="c", 1598 method="aeg", 1599 maxlag=None, 1600 autolag: str | None = "aic", 1601 return_results=None, 1602): 1603 """ 1604 Test for no-cointegration of a univariate equation. 1605 1606 The null hypothesis is no cointegration. Variables in y0 and y1 are 1607 assumed to be integrated of order 1, I(1). 1608 1609 This uses the augmented Engle-Granger two-step cointegration test. 1610 Constant or trend is included in 1st stage regression, i.e. in 1611 cointegrating equation. 1612 1613 **Warning:** The autolag default has changed compared to statsmodels 0.8. 1614 In 0.8 autolag was always None, no the keyword is used and defaults to 1615 "aic". Use `autolag=None` to avoid the lag search. 1616 1617 Parameters 1618 ---------- 1619 y0 : array_like 1620 The first element in cointegrated system. Must be 1-d. 1621 y1 : array_like 1622 The remaining elements in cointegrated system. 1623 trend : str {"c", "ct"} 1624 The trend term included in regression for cointegrating equation. 1625 1626 * "c" : constant. 1627 * "ct" : constant and linear trend. 1628 * also available quadratic trend "ctt", and no constant "n". 1629 1630 method : {"aeg"} 1631 Only "aeg" (augmented Engle-Granger) is available. 1632 maxlag : None or int 1633 Argument for `adfuller`, largest or given number of lags. 1634 autolag : str 1635 Argument for `adfuller`, lag selection criterion. 1636 1637 * If None, then maxlag lags are used without lag search. 1638 * If "AIC" (default) or "BIC", then the number of lags is chosen 1639 to minimize the corresponding information criterion. 1640 * "t-stat" based choice of maxlag. Starts with maxlag and drops a 1641 lag until the t-statistic on the last lag length is significant 1642 using a 5%-sized test. 1643 return_results : bool 1644 For future compatibility, currently only tuple available. 1645 If True, then a results instance is returned. Otherwise, a tuple 1646 with the test outcome is returned. Set `return_results=False` to 1647 avoid future changes in return. 1648 1649 Returns 1650 ------- 1651 coint_t : float 1652 The t-statistic of unit-root test on residuals. 1653 pvalue : float 1654 MacKinnon"s approximate, asymptotic p-value based on MacKinnon (1994). 1655 crit_value : dict 1656 Critical values for the test statistic at the 1 %, 5 %, and 10 % 1657 levels based on regression curve. This depends on the number of 1658 observations. 1659 1660 Notes 1661 ----- 1662 The Null hypothesis is that there is no cointegration, the alternative 1663 hypothesis is that there is cointegrating relationship. If the pvalue is 1664 small, below a critical size, then we can reject the hypothesis that there 1665 is no cointegrating relationship. 1666 1667 P-values and critical values are obtained through regression surface 1668 approximation from MacKinnon 1994 and 2010. 1669 1670 If the two series are almost perfectly collinear, then computing the 1671 test is numerically unstable. However, the two series will be cointegrated 1672 under the maintained assumption that they are integrated. In this case 1673 the t-statistic will be set to -inf and the pvalue to zero. 1674 1675 TODO: We could handle gaps in data by dropping rows with nans in the 1676 Auxiliary regressions. Not implemented yet, currently assumes no nans 1677 and no gaps in time series. 1678 1679 References 1680 ---------- 1681 .. [1] MacKinnon, J.G. 1994 "Approximate Asymptotic Distribution Functions 1682 for Unit-Root and Cointegration Tests." Journal of Business & Economics 1683 Statistics, 12.2, 167-76. 1684 .. [2] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests." 1685 Queen"s University, Dept of Economics Working Papers 1227. 1686 http://ideas.repec.org/p/qed/wpaper/1227.html 1687 """ 1688 y0 = array_like(y0, "y0") 1689 y1 = array_like(y1, "y1", ndim=2) 1690 trend = rename_trend(trend) 1691 trend = string_like(trend, "trend", options=("c", "n", "ct", "ctt")) 1692 string_like(method, "method", options=("aeg",)) 1693 maxlag = int_like(maxlag, "maxlag", optional=True) 1694 autolag = string_like( 1695 autolag, "autolag", optional=True, options=("aic", "bic", "t-stat") 1696 ) 1697 return_results = bool_like(return_results, "return_results", optional=True) 1698 1699 nobs, k_vars = y1.shape 1700 k_vars += 1 # add 1 for y0 1701 1702 if trend == "n": 1703 xx = y1 1704 else: 1705 xx = add_trend(y1, trend=trend, prepend=False) 1706 1707 res_co = OLS(y0, xx).fit() 1708 1709 if res_co.rsquared < 1 - 100 * SQRTEPS: 1710 res_adf = adfuller( 1711 res_co.resid, maxlag=maxlag, autolag=autolag, regression="n" 1712 ) 1713 else: 1714 warnings.warn( 1715 "y0 and y1 are (almost) perfectly colinear." 1716 "Cointegration test is not reliable in this case.", 1717 CollinearityWarning, 1718 ) 1719 # Edge case where series are too similar 1720 res_adf = (-np.inf,) 1721 1722 # no constant or trend, see egranger in Stata and MacKinnon 1723 if trend == "n": 1724 crit = [np.nan] * 3 # 2010 critical values not available 1725 else: 1726 crit = mackinnoncrit(N=k_vars, regression=trend, nobs=nobs - 1) 1727 # nobs - 1, the -1 is to match egranger in Stata, I do not know why. 1728 # TODO: check nobs or df = nobs - k 1729 1730 pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars) 1731 return res_adf[0], pval_asy, crit 1732 1733 1734def _safe_arma_fit(y, order, model_kw, trend, fit_kw, start_params=None): 1735 from statsmodels.tsa.arima.model import ARIMA 1736 1737 try: 1738 return ARIMA(y, order=order, **model_kw, trend=trend).fit( 1739 start_params=start_params, **fit_kw 1740 ) 1741 except LinAlgError: 1742 # SVD convergence failure on badly misspecified models 1743 return 1744 1745 except ValueError as error: 1746 if start_params is not None: # do not recurse again 1747 # user supplied start_params only get one chance 1748 return 1749 # try a little harder, should be handled in fit really 1750 elif "initial" not in error.args[0] or "initial" in str(error): 1751 start_params = [0.1] * sum(order) 1752 if trend == "c": 1753 start_params = [0.1] + start_params 1754 return _safe_arma_fit( 1755 y, order, model_kw, trend, fit_kw, start_params 1756 ) 1757 else: 1758 return 1759 except: # no idea what happened 1760 return 1761 1762 1763def arma_order_select_ic( 1764 y, max_ar=4, max_ma=2, ic="bic", trend="c", model_kw=None, fit_kw=None 1765): 1766 """ 1767 Compute information criteria for many ARMA models. 1768 1769 Parameters 1770 ---------- 1771 y : array_like 1772 Array of time-series data. 1773 max_ar : int 1774 Maximum number of AR lags to use. Default 4. 1775 max_ma : int 1776 Maximum number of MA lags to use. Default 2. 1777 ic : str, list 1778 Information criteria to report. Either a single string or a list 1779 of different criteria is possible. 1780 trend : str 1781 The trend to use when fitting the ARMA models. 1782 model_kw : dict 1783 Keyword arguments to be passed to the ``ARMA`` model. 1784 fit_kw : dict 1785 Keyword arguments to be passed to ``ARMA.fit``. 1786 1787 Returns 1788 ------- 1789 Bunch 1790 Dict-like object with attribute access. Each ic is an attribute with a 1791 DataFrame for the results. The AR order used is the row index. The ma 1792 order used is the column index. The minimum orders are available as 1793 ``ic_min_order``. 1794 1795 Notes 1796 ----- 1797 This method can be used to tentatively identify the order of an ARMA 1798 process, provided that the time series is stationary and invertible. This 1799 function computes the full exact MLE estimate of each model and can be, 1800 therefore a little slow. An implementation using approximate estimates 1801 will be provided in the future. In the meantime, consider passing 1802 {method : "css"} to fit_kw. 1803 1804 Examples 1805 -------- 1806 1807 >>> from statsmodels.tsa.arima_process import arma_generate_sample 1808 >>> import statsmodels.api as sm 1809 >>> import numpy as np 1810 1811 >>> arparams = np.array([.75, -.25]) 1812 >>> maparams = np.array([.65, .35]) 1813 >>> arparams = np.r_[1, -arparams] 1814 >>> maparam = np.r_[1, maparams] 1815 >>> nobs = 250 1816 >>> np.random.seed(2014) 1817 >>> y = arma_generate_sample(arparams, maparams, nobs) 1818 >>> res = sm.tsa.arma_order_select_ic(y, ic=["aic", "bic"], trend="n") 1819 >>> res.aic_min_order 1820 >>> res.bic_min_order 1821 """ 1822 max_ar = int_like(max_ar, "max_ar") 1823 max_ma = int_like(max_ma, "max_ma") 1824 trend = string_like(trend, "trend", options=("n", "c")) 1825 model_kw = dict_like(model_kw, "model_kw", optional=True) 1826 fit_kw = dict_like(fit_kw, "fit_kw", optional=True) 1827 1828 ar_range = [i for i in range(max_ar + 1)] 1829 ma_range = [i for i in range(max_ma + 1)] 1830 if isinstance(ic, str): 1831 ic = [ic] 1832 elif not isinstance(ic, (list, tuple)): 1833 raise ValueError("Need a list or a tuple for ic if not a string.") 1834 1835 results = np.zeros((len(ic), max_ar + 1, max_ma + 1)) 1836 model_kw = {} if model_kw is None else model_kw 1837 fit_kw = {} if fit_kw is None else fit_kw 1838 y_arr = array_like(y, "y", contiguous=True) 1839 for ar in ar_range: 1840 for ma in ma_range: 1841 mod = _safe_arma_fit(y_arr, (ar, 0, ma), model_kw, trend, fit_kw) 1842 if mod is None: 1843 results[:, ar, ma] = np.nan 1844 continue 1845 1846 for i, criteria in enumerate(ic): 1847 results[i, ar, ma] = getattr(mod, criteria) 1848 1849 dfs = [ 1850 pd.DataFrame(res, columns=ma_range, index=ar_range) for res in results 1851 ] 1852 1853 res = dict(zip(ic, dfs)) 1854 1855 # add the minimums to the results dict 1856 min_res = {} 1857 for i, result in res.items(): 1858 mins = np.where(result.min().min() == result) 1859 min_res.update({i + "_min_order": (mins[0][0], mins[1][0])}) 1860 res.update(min_res) 1861 1862 return Bunch(**res) 1863 1864 1865def has_missing(data): 1866 """ 1867 Returns True if "data" contains missing entries, otherwise False 1868 """ 1869 return np.isnan(np.sum(data)) 1870 1871 1872def kpss( 1873 x, 1874 regression: Literal["c", "ct"] = "c", 1875 nlags: Literal["auto", "legacy"] | int = "auto", 1876 store: bool = False, 1877) -> Tuple[float, float, int, dict[str, float]]: 1878 """ 1879 Kwiatkowski-Phillips-Schmidt-Shin test for stationarity. 1880 1881 Computes the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test for the null 1882 hypothesis that x is level or trend stationary. 1883 1884 Parameters 1885 ---------- 1886 x : array_like, 1d 1887 The data series to test. 1888 regression : str{"c", "ct"} 1889 The null hypothesis for the KPSS test. 1890 1891 * "c" : The data is stationary around a constant (default). 1892 * "ct" : The data is stationary around a trend. 1893 nlags : {str, int}, optional 1894 Indicates the number of lags to be used. If "auto" (default), lags 1895 is calculated using the data-dependent method of Hobijn et al. (1998). 1896 See also Andrews (1991), Newey & West (1994), and Schwert (1989). If 1897 set to "legacy", uses int(12 * (n / 100)**(1 / 4)) , as outlined in 1898 Schwert (1989). 1899 store : bool 1900 If True, then a result instance is returned additionally to 1901 the KPSS statistic (default is False). 1902 1903 Returns 1904 ------- 1905 kpss_stat : float 1906 The KPSS test statistic. 1907 p_value : float 1908 The p-value of the test. The p-value is interpolated from 1909 Table 1 in Kwiatkowski et al. (1992), and a boundary point 1910 is returned if the test statistic is outside the table of 1911 critical values, that is, if the p-value is outside the 1912 interval (0.01, 0.1). 1913 lags : int 1914 The truncation lag parameter. 1915 crit : dict 1916 The critical values at 10%, 5%, 2.5% and 1%. Based on 1917 Kwiatkowski et al. (1992). 1918 resstore : (optional) instance of ResultStore 1919 An instance of a dummy class with results attached as attributes. 1920 1921 Notes 1922 ----- 1923 To estimate sigma^2 the Newey-West estimator is used. If lags is "legacy", 1924 the truncation lag parameter is set to int(12 * (n / 100) ** (1 / 4)), 1925 as outlined in Schwert (1989). The p-values are interpolated from 1926 Table 1 of Kwiatkowski et al. (1992). If the computed statistic is 1927 outside the table of critical values, then a warning message is 1928 generated. 1929 1930 Missing values are not handled. 1931 1932 References 1933 ---------- 1934 .. [1] Andrews, D.W.K. (1991). Heteroskedasticity and autocorrelation 1935 consistent covariance matrix estimation. Econometrica, 59: 817-858. 1936 1937 .. [2] Hobijn, B., Frances, B.H., & Ooms, M. (2004). Generalizations of the 1938 KPSS-test for stationarity. Statistica Neerlandica, 52: 483-502. 1939 1940 .. [3] Kwiatkowski, D., Phillips, P.C.B., Schmidt, P., & Shin, Y. (1992). 1941 Testing the null hypothesis of stationarity against the alternative of a 1942 unit root. Journal of Econometrics, 54: 159-178. 1943 1944 .. [4] Newey, W.K., & West, K.D. (1994). Automatic lag selection in 1945 covariance matrix estimation. Review of Economic Studies, 61: 631-653. 1946 1947 .. [5] Schwert, G. W. (1989). Tests for unit roots: A Monte Carlo 1948 investigation. Journal of Business and Economic Statistics, 7 (2): 1949 147-159. 1950 """ 1951 x = array_like(x, "x") 1952 regression = string_like(regression, "regression", options=("c", "ct")) 1953 store = bool_like(store, "store") 1954 1955 nobs = x.shape[0] 1956 hypo = regression 1957 1958 # if m is not one, n != m * n 1959 if nobs != x.size: 1960 raise ValueError("x of shape {0} not understood".format(x.shape)) 1961 1962 if hypo == "ct": 1963 # p. 162 Kwiatkowski et al. (1992): y_t = beta * t + r_t + e_t, 1964 # where beta is the trend, r_t a random walk and e_t a stationary 1965 # error term. 1966 resids = OLS(x, add_constant(np.arange(1, nobs + 1))).fit().resid 1967 crit = [0.119, 0.146, 0.176, 0.216] 1968 else: # hypo == "c" 1969 # special case of the model above, where beta = 0 (so the null 1970 # hypothesis is that the data is stationary around r_0). 1971 resids = x - x.mean() 1972 crit = [0.347, 0.463, 0.574, 0.739] 1973 1974 if nlags == "legacy": 1975 nlags = int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0))) 1976 nlags = min(nlags, nobs - 1) 1977 elif nlags == "auto" or nlags is None: 1978 if nlags is None: 1979 # TODO: Remove before 0.14 is released 1980 warnings.warn( 1981 "None is not a valid value for nlags. It must be an integer, " 1982 "'auto' or 'legacy'. None will raise starting in 0.14", 1983 FutureWarning, 1984 ) 1985 # autolag method of Hobijn et al. (1998) 1986 nlags = _kpss_autolag(resids, nobs) 1987 nlags = min(nlags, nobs - 1) 1988 elif isinstance(nlags, str): 1989 raise ValueError("nvals must be 'auto' or 'legacy' when not an int") 1990 else: 1991 nlags = int_like(nlags, "nlags", optional=False) 1992 1993 if nlags >= nobs: 1994 raise ValueError( 1995 f"lags ({nlags}) must be < number of observations ({nobs})" 1996 ) 1997 1998 pvals = [0.10, 0.05, 0.025, 0.01] 1999 2000 eta = np.sum(resids.cumsum() ** 2) / (nobs ** 2) # eq. 11, p. 165 2001 s_hat = _sigma_est_kpss(resids, nobs, nlags) 2002 2003 kpss_stat = eta / s_hat 2004 p_value = np.interp(kpss_stat, crit, pvals) 2005 2006 warn_msg = """\ 2007The test statistic is outside of the range of p-values available in the 2008look-up table. The actual p-value is {direction} than the p-value returned. 2009""" 2010 if p_value == pvals[-1]: 2011 warnings.warn( 2012 warn_msg.format(direction="smaller"), InterpolationWarning 2013 ) 2014 elif p_value == pvals[0]: 2015 warnings.warn( 2016 warn_msg.format(direction="greater"), InterpolationWarning 2017 ) 2018 2019 crit_dict = {"10%": crit[0], "5%": crit[1], "2.5%": crit[2], "1%": crit[3]} 2020 2021 if store: 2022 from statsmodels.stats.diagnostic import ResultsStore 2023 2024 rstore = ResultsStore() 2025 rstore.lags = nlags 2026 rstore.nobs = nobs 2027 2028 stationary_type = "level" if hypo == "c" else "trend" 2029 rstore.H0 = "The series is {0} stationary".format(stationary_type) 2030 rstore.HA = "The series is not {0} stationary".format(stationary_type) 2031 2032 return kpss_stat, p_value, crit_dict, rstore 2033 else: 2034 return kpss_stat, p_value, nlags, crit_dict 2035 2036 2037def _sigma_est_kpss(resids, nobs, lags): 2038 """ 2039 Computes equation 10, p. 164 of Kwiatkowski et al. (1992). This is the 2040 consistent estimator for the variance. 2041 """ 2042 s_hat = np.sum(resids ** 2) 2043 for i in range(1, lags + 1): 2044 resids_prod = np.dot(resids[i:], resids[: nobs - i]) 2045 s_hat += 2 * resids_prod * (1.0 - (i / (lags + 1.0))) 2046 return s_hat / nobs 2047 2048 2049def _kpss_autolag(resids, nobs): 2050 """ 2051 Computes the number of lags for covariance matrix estimation in KPSS test 2052 using method of Hobijn et al (1998). See also Andrews (1991), Newey & West 2053 (1994), and Schwert (1989). Assumes Bartlett / Newey-West kernel. 2054 """ 2055 covlags = int(np.power(nobs, 2.0 / 9.0)) 2056 s0 = np.sum(resids ** 2) / nobs 2057 s1 = 0 2058 for i in range(1, covlags + 1): 2059 resids_prod = np.dot(resids[i:], resids[: nobs - i]) 2060 resids_prod /= nobs / 2.0 2061 s0 += resids_prod 2062 s1 += i * resids_prod 2063 s_hat = s1 / s0 2064 pwr = 1.0 / 3.0 2065 gamma_hat = 1.1447 * np.power(s_hat * s_hat, pwr) 2066 autolags = int(gamma_hat * np.power(nobs, pwr)) 2067 return autolags 2068 2069 2070def range_unit_root_test(x, store=False): 2071 """ 2072 Range unit-root test for stationarity. 2073 2074 Computes the Range Unit-Root (RUR) test for the null 2075 hypothesis that x is stationary. 2076 2077 Parameters 2078 ---------- 2079 x : array_like, 1d 2080 The data series to test. 2081 store : bool 2082 If True, then a result instance is returned additionally to 2083 the RUR statistic (default is False). 2084 2085 Returns 2086 ------- 2087 rur_stat : float 2088 The RUR test statistic. 2089 p_value : float 2090 The p-value of the test. The p-value is interpolated from 2091 Table 1 in Aparicio et al. (2006), and a boundary point 2092 is returned if the test statistic is outside the table of 2093 critical values, that is, if the p-value is outside the 2094 interval (0.01, 0.1). 2095 crit : dict 2096 The critical values at 10%, 5%, 2.5% and 1%. Based on 2097 Aparicio et al. (2006). 2098 resstore : (optional) instance of ResultStore 2099 An instance of a dummy class with results attached as attributes. 2100 2101 Notes 2102 ----- 2103 The p-values are interpolated from 2104 Table 1 of Aparicio et al. (2006). If the computed statistic is 2105 outside the table of critical values, then a warning message is 2106 generated. 2107 2108 Missing values are not handled. 2109 2110 References 2111 ---------- 2112 .. [1] Aparicio, F., Escribano A., Sipols, A.E. (2006). Range Unit-Root (RUR) 2113 tests: robust against nonlinearities, error distributions, structural breaks 2114 and outliers. Journal of Time Series Analysis, 27 (4): 545-576. 2115 """ 2116 x = array_like(x, "x") 2117 store = bool_like(store, "store") 2118 2119 nobs = x.shape[0] 2120 2121 # if m is not one, n != m * n 2122 if nobs != x.size: 2123 raise ValueError("x of shape {0} not understood".format(x.shape)) 2124 2125 # Table from [1] has been replicated using 200,000 samples 2126 # Critical values for new n_obs values have been identified 2127 pvals = [0.01, 0.025, 0.05, 0.10, 0.90, 0.95] 2128 n = np.array( 2129 [25, 50, 100, 150, 200, 250, 500, 1000, 2000, 3000, 4000, 5000] 2130 ) 2131 crit = np.array( 2132 [ 2133 [0.6626, 0.8126, 0.9192, 1.0712, 2.4863, 2.7312], 2134 [0.7977, 0.9274, 1.0478, 1.1964, 2.6821, 2.9613], 2135 [0.9070, 1.0243, 1.1412, 1.2888, 2.8317, 3.1393], 2136 [0.9543, 1.0768, 1.1869, 1.3294, 2.8915, 3.2049], 2137 [0.9833, 1.0984, 1.2101, 1.3494, 2.9308, 3.2482], 2138 [0.9982, 1.1137, 1.2242, 1.3632, 2.9571, 3.2842], 2139 [1.0494, 1.1643, 1.2712, 1.4076, 3.0207, 3.3584], 2140 [1.0846, 1.1959, 1.2988, 1.4344, 3.0653, 3.4073], 2141 [1.1121, 1.2200, 1.3230, 1.4556, 3.0948, 3.4439], 2142 [1.1204, 1.2295, 1.3303, 1.4656, 3.1054, 3.4632], 2143 [1.1309, 1.2347, 1.3378, 1.4693, 3.1165, 3.4717], 2144 [1.1377, 1.2402, 1.3408, 1.4729, 3.1252, 3.4807], 2145 ] 2146 ) 2147 2148 # Interpolation for nobs 2149 inter_crit = np.zeros((1, crit.shape[1])) 2150 for i in range(crit.shape[1]): 2151 f = interp1d(n, crit[:, i]) 2152 inter_crit[0, i] = f(nobs) 2153 2154 # Calculate RUR stat 2155 xs = pd.Series(x) 2156 exp_max = xs.expanding(1).max().shift(1) 2157 exp_min = xs.expanding(1).min().shift(1) 2158 count = (xs > exp_max).sum() + (xs < exp_min).sum() 2159 2160 rur_stat = count / np.sqrt(len(x)) 2161 2162 k = len(pvals) - 1 2163 for i in range(len(pvals) - 1, -1, -1): 2164 if rur_stat < inter_crit[0, i]: 2165 k = i 2166 else: 2167 break 2168 2169 p_value = pvals[k] 2170 2171 warn_msg = """\ 2172The test statistic is outside of the range of p-values available in the 2173look-up table. The actual p-value is {direction} than the p-value returned. 2174""" 2175 direction = "" 2176 if p_value == pvals[-1]: 2177 direction = "smaller" 2178 elif p_value == pvals[0]: 2179 direction = "larger" 2180 2181 if direction: 2182 warnings.warn( 2183 warn_msg.format(direction=direction), InterpolationWarning 2184 ) 2185 2186 crit_dict = { 2187 "10%": inter_crit[0, 3], 2188 "5%": inter_crit[0, 2], 2189 "2.5%": inter_crit[0, 1], 2190 "1%": inter_crit[0, 0], 2191 } 2192 2193 if store: 2194 from statsmodels.stats.diagnostic import ResultsStore 2195 2196 rstore = ResultsStore() 2197 rstore.nobs = nobs 2198 2199 rstore.H0 = "The series is not stationary" 2200 rstore.HA = "The series is stationary" 2201 2202 return rur_stat, p_value, crit_dict, rstore 2203 else: 2204 return rur_stat, p_value, crit_dict 2205 2206 2207class ZivotAndrewsUnitRoot(object): 2208 """ 2209 Class wrapper for Zivot-Andrews structural-break unit-root test 2210 """ 2211 2212 def __init__(self): 2213 """ 2214 Critical values for the three different models specified for the 2215 Zivot-Andrews unit-root test. 2216 2217 Notes 2218 ----- 2219 The p-values are generated through Monte Carlo simulation using 2220 100,000 replications and 2000 data points. 2221 """ 2222 self._za_critical_values = {} 2223 # constant-only model 2224 self._c = ( 2225 (0.001, -6.78442), 2226 (0.100, -5.83192), 2227 (0.200, -5.68139), 2228 (0.300, -5.58461), 2229 (0.400, -5.51308), 2230 (0.500, -5.45043), 2231 (0.600, -5.39924), 2232 (0.700, -5.36023), 2233 (0.800, -5.33219), 2234 (0.900, -5.30294), 2235 (1.000, -5.27644), 2236 (2.500, -5.03340), 2237 (5.000, -4.81067), 2238 (7.500, -4.67636), 2239 (10.000, -4.56618), 2240 (12.500, -4.48130), 2241 (15.000, -4.40507), 2242 (17.500, -4.33947), 2243 (20.000, -4.28155), 2244 (22.500, -4.22683), 2245 (25.000, -4.17830), 2246 (27.500, -4.13101), 2247 (30.000, -4.08586), 2248 (32.500, -4.04455), 2249 (35.000, -4.00380), 2250 (37.500, -3.96144), 2251 (40.000, -3.92078), 2252 (42.500, -3.88178), 2253 (45.000, -3.84503), 2254 (47.500, -3.80549), 2255 (50.000, -3.77031), 2256 (52.500, -3.73209), 2257 (55.000, -3.69600), 2258 (57.500, -3.65985), 2259 (60.000, -3.62126), 2260 (65.000, -3.54580), 2261 (70.000, -3.46848), 2262 (75.000, -3.38533), 2263 (80.000, -3.29112), 2264 (85.000, -3.17832), 2265 (90.000, -3.04165), 2266 (92.500, -2.95146), 2267 (95.000, -2.83179), 2268 (96.000, -2.76465), 2269 (97.000, -2.68624), 2270 (98.000, -2.57884), 2271 (99.000, -2.40044), 2272 (99.900, -1.88932), 2273 ) 2274 self._za_critical_values["c"] = np.asarray(self._c) 2275 # trend-only model 2276 self._t = ( 2277 (0.001, -83.9094), 2278 (0.100, -13.8837), 2279 (0.200, -9.13205), 2280 (0.300, -6.32564), 2281 (0.400, -5.60803), 2282 (0.500, -5.38794), 2283 (0.600, -5.26585), 2284 (0.700, -5.18734), 2285 (0.800, -5.12756), 2286 (0.900, -5.07984), 2287 (1.000, -5.03421), 2288 (2.500, -4.65634), 2289 (5.000, -4.40580), 2290 (7.500, -4.25214), 2291 (10.000, -4.13678), 2292 (12.500, -4.03765), 2293 (15.000, -3.95185), 2294 (17.500, -3.87945), 2295 (20.000, -3.81295), 2296 (22.500, -3.75273), 2297 (25.000, -3.69836), 2298 (27.500, -3.64785), 2299 (30.000, -3.59819), 2300 (32.500, -3.55146), 2301 (35.000, -3.50522), 2302 (37.500, -3.45987), 2303 (40.000, -3.41672), 2304 (42.500, -3.37465), 2305 (45.000, -3.33394), 2306 (47.500, -3.29393), 2307 (50.000, -3.25316), 2308 (52.500, -3.21244), 2309 (55.000, -3.17124), 2310 (57.500, -3.13211), 2311 (60.000, -3.09204), 2312 (65.000, -3.01135), 2313 (70.000, -2.92897), 2314 (75.000, -2.83614), 2315 (80.000, -2.73893), 2316 (85.000, -2.62840), 2317 (90.000, -2.49611), 2318 (92.500, -2.41337), 2319 (95.000, -2.30820), 2320 (96.000, -2.25797), 2321 (97.000, -2.19648), 2322 (98.000, -2.11320), 2323 (99.000, -1.99138), 2324 (99.900, -1.67466), 2325 ) 2326 self._za_critical_values["t"] = np.asarray(self._t) 2327 # constant + trend model 2328 self._ct = ( 2329 (0.001, -38.17800), 2330 (0.100, -6.43107), 2331 (0.200, -6.07279), 2332 (0.300, -5.95496), 2333 (0.400, -5.86254), 2334 (0.500, -5.77081), 2335 (0.600, -5.72541), 2336 (0.700, -5.68406), 2337 (0.800, -5.65163), 2338 (0.900, -5.60419), 2339 (1.000, -5.57556), 2340 (2.500, -5.29704), 2341 (5.000, -5.07332), 2342 (7.500, -4.93003), 2343 (10.000, -4.82668), 2344 (12.500, -4.73711), 2345 (15.000, -4.66020), 2346 (17.500, -4.58970), 2347 (20.000, -4.52855), 2348 (22.500, -4.47100), 2349 (25.000, -4.42011), 2350 (27.500, -4.37387), 2351 (30.000, -4.32705), 2352 (32.500, -4.28126), 2353 (35.000, -4.23793), 2354 (37.500, -4.19822), 2355 (40.000, -4.15800), 2356 (42.500, -4.11946), 2357 (45.000, -4.08064), 2358 (47.500, -4.04286), 2359 (50.000, -4.00489), 2360 (52.500, -3.96837), 2361 (55.000, -3.93200), 2362 (57.500, -3.89496), 2363 (60.000, -3.85577), 2364 (65.000, -3.77795), 2365 (70.000, -3.69794), 2366 (75.000, -3.61852), 2367 (80.000, -3.52485), 2368 (85.000, -3.41665), 2369 (90.000, -3.28527), 2370 (92.500, -3.19724), 2371 (95.000, -3.08769), 2372 (96.000, -3.03088), 2373 (97.000, -2.96091), 2374 (98.000, -2.85581), 2375 (99.000, -2.71015), 2376 (99.900, -2.28767), 2377 ) 2378 self._za_critical_values["ct"] = np.asarray(self._ct) 2379 2380 def _za_crit(self, stat, model="c"): 2381 """ 2382 Linear interpolation for Zivot-Andrews p-values and critical values 2383 2384 Parameters 2385 ---------- 2386 stat : float 2387 The ZA test statistic 2388 model : {"c","t","ct"} 2389 The model used when computing the ZA statistic. "c" is default. 2390 2391 Returns 2392 ------- 2393 pvalue : float 2394 The interpolated p-value 2395 cvdict : dict 2396 Critical values for the test statistic at the 1%, 5%, and 10% 2397 levels 2398 2399 Notes 2400 ----- 2401 The p-values are linear interpolated from the quantiles of the 2402 simulated ZA test statistic distribution 2403 """ 2404 table = self._za_critical_values[model] 2405 pcnts = table[:, 0] 2406 stats = table[:, 1] 2407 # ZA cv table contains quantiles multiplied by 100 2408 pvalue = np.interp(stat, stats, pcnts) / 100.0 2409 cv = [1.0, 5.0, 10.0] 2410 crit_value = np.interp(cv, pcnts, stats) 2411 cvdict = { 2412 "1%": crit_value[0], 2413 "5%": crit_value[1], 2414 "10%": crit_value[2], 2415 } 2416 return pvalue, cvdict 2417 2418 def _quick_ols(self, endog, exog): 2419 """ 2420 Minimal implementation of LS estimator for internal use 2421 """ 2422 xpxi = np.linalg.inv(exog.T.dot(exog)) 2423 xpy = exog.T.dot(endog) 2424 nobs, k_exog = exog.shape 2425 b = xpxi.dot(xpy) 2426 e = endog - exog.dot(b) 2427 sigma2 = e.T.dot(e) / (nobs - k_exog) 2428 return b / np.sqrt(np.diag(sigma2 * xpxi)) 2429 2430 def _format_regression_data(self, series, nobs, const, trend, cols, lags): 2431 """ 2432 Create the endog/exog data for the auxiliary regressions 2433 from the original (standardized) series under test. 2434 """ 2435 # first-diff y and standardize for numerical stability 2436 endog = np.diff(series, axis=0) 2437 endog /= np.sqrt(endog.T.dot(endog)) 2438 series /= np.sqrt(series.T.dot(series)) 2439 # reserve exog space 2440 exog = np.zeros((endog[lags:].shape[0], cols + lags)) 2441 exog[:, 0] = const 2442 # lagged y and dy 2443 exog[:, cols - 1] = series[lags : (nobs - 1)] 2444 exog[:, cols:] = lagmat(endog, lags, trim="none")[ 2445 lags : exog.shape[0] + lags 2446 ] 2447 return endog, exog 2448 2449 def _update_regression_exog( 2450 self, exog, regression, period, nobs, const, trend, cols, lags 2451 ): 2452 """ 2453 Update the exog array for the next regression. 2454 """ 2455 cutoff = period - (lags + 1) 2456 if regression != "t": 2457 exog[:cutoff, 1] = 0 2458 exog[cutoff:, 1] = const 2459 exog[:, 2] = trend[(lags + 2) : (nobs + 1)] 2460 if regression == "ct": 2461 exog[:cutoff, 3] = 0 2462 exog[cutoff:, 3] = trend[1 : (nobs - period + 1)] 2463 else: 2464 exog[:, 1] = trend[(lags + 2) : (nobs + 1)] 2465 exog[: (cutoff - 1), 2] = 0 2466 exog[(cutoff - 1) :, 2] = trend[0 : (nobs - period + 1)] 2467 return exog 2468 2469 def run(self, x, trim=0.15, maxlag=None, regression="c", autolag="AIC"): 2470 """ 2471 Zivot-Andrews structural-break unit-root test. 2472 2473 The Zivot-Andrews test tests for a unit root in a univariate process 2474 in the presence of serial correlation and a single structural break. 2475 2476 Parameters 2477 ---------- 2478 x : array_like 2479 The data series to test. 2480 trim : float 2481 The percentage of series at begin/end to exclude from break-period 2482 calculation in range [0, 0.333] (default=0.15). 2483 maxlag : int 2484 The maximum lag which is included in test, default is 2485 12*(nobs/100)^{1/4} (Schwert, 1989). 2486 regression : {"c","t","ct"} 2487 Constant and trend order to include in regression. 2488 2489 * "c" : constant only (default). 2490 * "t" : trend only. 2491 * "ct" : constant and trend. 2492 autolag : {"AIC", "BIC", "t-stat", None} 2493 The method to select the lag length when using automatic selection. 2494 2495 * if None, then maxlag lags are used, 2496 * if "AIC" (default) or "BIC", then the number of lags is chosen 2497 to minimize the corresponding information criterion, 2498 * "t-stat" based choice of maxlag. Starts with maxlag and drops a 2499 lag until the t-statistic on the last lag length is significant 2500 using a 5%-sized test. 2501 2502 Returns 2503 ------- 2504 zastat : float 2505 The test statistic. 2506 pvalue : float 2507 The pvalue based on MC-derived critical values. 2508 cvdict : dict 2509 The critical values for the test statistic at the 1%, 5%, and 10% 2510 levels. 2511 baselag : int 2512 The number of lags used for period regressions. 2513 bpidx : int 2514 The index of x corresponding to endogenously calculated break period 2515 with values in the range [0..nobs-1]. 2516 2517 Notes 2518 ----- 2519 H0 = unit root with a single structural break 2520 2521 Algorithm follows Baum (2004/2015) approximation to original 2522 Zivot-Andrews method. Rather than performing an autolag regression at 2523 each candidate break period (as per the original paper), a single 2524 autolag regression is run up-front on the base model (constant + trend 2525 with no dummies) to determine the best lag length. This lag length is 2526 then used for all subsequent break-period regressions. This results in 2527 significant run time reduction but also slightly more pessimistic test 2528 statistics than the original Zivot-Andrews method, although no attempt 2529 has been made to characterize the size/power trade-off. 2530 2531 References 2532 ---------- 2533 .. [1] Baum, C.F. (2004). ZANDREWS: Stata module to calculate 2534 Zivot-Andrews unit root test in presence of structural break," 2535 Statistical Software Components S437301, Boston College Department 2536 of Economics, revised 2015. 2537 2538 .. [2] Schwert, G.W. (1989). Tests for unit roots: A Monte Carlo 2539 investigation. Journal of Business & Economic Statistics, 7: 2540 147-159. 2541 2542 .. [3] Zivot, E., and Andrews, D.W.K. (1992). Further evidence on the 2543 great crash, the oil-price shock, and the unit-root hypothesis. 2544 Journal of Business & Economic Studies, 10: 251-270. 2545 """ 2546 x = array_like(x, "x") 2547 trim = float_like(trim, "trim") 2548 maxlag = int_like(maxlag, "maxlag", optional=True) 2549 regression = string_like( 2550 regression, "regression", options=("c", "t", "ct") 2551 ) 2552 autolag = string_like( 2553 autolag, "autolag", options=("aic", "bic", "t-stat"), optional=True 2554 ) 2555 if trim < 0 or trim > (1.0 / 3.0): 2556 raise ValueError("trim value must be a float in range [0, 1/3)") 2557 nobs = x.shape[0] 2558 if autolag: 2559 adf_res = adfuller( 2560 x, maxlag=maxlag, regression="ct", autolag=autolag 2561 ) 2562 baselags = adf_res[2] 2563 elif maxlag: 2564 baselags = maxlag 2565 else: 2566 baselags = int(12.0 * np.power(nobs / 100.0, 1 / 4.0)) 2567 trimcnt = int(nobs * trim) 2568 start_period = trimcnt 2569 end_period = nobs - trimcnt 2570 if regression == "ct": 2571 basecols = 5 2572 else: 2573 basecols = 4 2574 # normalize constant and trend terms for stability 2575 c_const = 1 / np.sqrt(nobs) 2576 t_const = np.arange(1.0, nobs + 2) 2577 t_const *= np.sqrt(3) / nobs ** (3 / 2) 2578 # format the auxiliary regression data 2579 endog, exog = self._format_regression_data( 2580 x, nobs, c_const, t_const, basecols, baselags 2581 ) 2582 # iterate through the time periods 2583 stats = np.full(end_period + 1, np.inf) 2584 for bp in range(start_period + 1, end_period + 1): 2585 # update intercept dummy / trend / trend dummy 2586 exog = self._update_regression_exog( 2587 exog, 2588 regression, 2589 bp, 2590 nobs, 2591 c_const, 2592 t_const, 2593 basecols, 2594 baselags, 2595 ) 2596 # check exog rank on first iteration 2597 if bp == start_period + 1: 2598 o = OLS(endog[baselags:], exog, hasconst=1).fit() 2599 if o.df_model < exog.shape[1] - 1: 2600 raise ValueError( 2601 "ZA: auxiliary exog matrix is not full rank.\n" 2602 " cols (exc intercept) = {} rank = {}".format( 2603 exog.shape[1] - 1, o.df_model 2604 ) 2605 ) 2606 stats[bp] = o.tvalues[basecols - 1] 2607 else: 2608 stats[bp] = self._quick_ols(endog[baselags:], exog)[ 2609 basecols - 1 2610 ] 2611 # return best seen 2612 zastat = np.min(stats) 2613 bpidx = np.argmin(stats) - 1 2614 crit = self._za_crit(zastat, regression) 2615 pval = crit[0] 2616 cvdict = crit[1] 2617 return zastat, pval, cvdict, baselags, bpidx 2618 2619 def __call__( 2620 self, x, trim=0.15, maxlag=None, regression="c", autolag="AIC" 2621 ): 2622 return self.run( 2623 x, trim=trim, maxlag=maxlag, regression=regression, autolag=autolag 2624 ) 2625 2626 2627zivot_andrews = ZivotAndrewsUnitRoot() 2628zivot_andrews.__doc__ = zivot_andrews.run.__doc__ 2629