1""" 2Run x12/x13-arima specs in a subprocess from Python and curry results back 3into python. 4 5Notes 6----- 7Many of the functions are called x12. However, they are also intended to work 8for x13. If this is not the case, it's a bug. 9""" 10from statsmodels.compat.pandas import deprecate_kwarg 11 12import os 13import subprocess 14import tempfile 15import re 16from warnings import warn 17 18import pandas as pd 19 20from statsmodels.tools.tools import Bunch 21from statsmodels.tools.sm_exceptions import (X13NotFoundError, 22 IOWarning, X13Error, 23 X13Warning) 24 25__all__ = ["x13_arima_select_order", "x13_arima_analysis"] 26 27_binary_names = ('x13as.exe', 'x13as', 'x12a.exe', 'x12a') 28 29 30class _freq_to_period: 31 def __getitem__(self, key): 32 if key.startswith('M'): 33 return 12 34 elif key.startswith('Q'): 35 return 4 36 elif key.startswith('W'): 37 return 52 38 39 40_freq_to_period = _freq_to_period() 41 42_period_to_freq = {12: 'M', 4: 'Q'} 43_log_to_x12 = {True: 'log', False: 'none', None: 'auto'} 44_bool_to_yes_no = lambda x: 'yes' if x else 'no' # noqa:E731 45 46 47def _find_x12(x12path=None, prefer_x13=True): 48 """ 49 If x12path is not given, then either x13as[.exe] or x12a[.exe] must 50 be found on the PATH. Otherwise, the environmental variable X12PATH or 51 X13PATH must be defined. If prefer_x13 is True, only X13PATH is searched 52 for. If it is false, only X12PATH is searched for. 53 """ 54 global _binary_names 55 if x12path is not None and x12path.endswith(_binary_names): 56 # remove binary from path if given 57 x12path = os.path.dirname(x12path) 58 59 if not prefer_x13: # search for x12 first 60 _binary_names = _binary_names[::-1] 61 if x12path is None: 62 x12path = os.getenv("X12PATH", "") 63 if not x12path: 64 x12path = os.getenv("X13PATH", "") 65 elif x12path is None: 66 x12path = os.getenv("X13PATH", "") 67 if not x12path: 68 x12path = os.getenv("X12PATH", "") 69 70 for binary in _binary_names: 71 x12 = os.path.join(x12path, binary) 72 try: 73 subprocess.check_call(x12, stdout=subprocess.PIPE, 74 stderr=subprocess.PIPE) 75 return x12 76 except OSError: 77 pass 78 79 else: 80 return False 81 82 83def _check_x12(x12path=None): 84 x12path = _find_x12(x12path) 85 if not x12path: 86 raise X13NotFoundError("x12a and x13as not found on path. Give the " 87 "path, put them on PATH, or set the " 88 "X12PATH or X13PATH environmental variable.") 89 return x12path 90 91 92def _clean_order(order): 93 """ 94 Takes something like (1 1 0)(0 1 1) and returns a arma order, sarma 95 order tuple. Also accepts (1 1 0) and return arma order and (0, 0, 0) 96 """ 97 order = re.findall(r"\([0-9 ]*?\)", order) 98 99 def clean(x): 100 return tuple(map(int, re.sub("[()]", "", x).split(" "))) 101 102 if len(order) > 1: 103 order, sorder = map(clean, order) 104 else: 105 order = clean(order[0]) 106 sorder = (0, 0, 0) 107 108 return order, sorder 109 110 111def run_spec(x12path, specpath, outname=None, meta=False, datameta=False): 112 113 if meta and datameta: 114 raise ValueError("Cannot specify both meta and datameta.") 115 if meta: 116 args = [x12path, "-m " + specpath] 117 elif datameta: 118 args = [x12path, "-d " + specpath] 119 else: 120 args = [x12path, specpath] 121 122 if outname: 123 args += [outname] 124 125 return subprocess.Popen(args, stdout=subprocess.PIPE, 126 stderr=subprocess.STDOUT) 127 128 129def _make_automdl_options(maxorder, maxdiff, diff): 130 options = "\n" 131 options += "maxorder = ({0} {1})\n".format(maxorder[0], maxorder[1]) 132 if maxdiff is not None: # maxdiff always takes precedence 133 options += "maxdiff = ({0} {1})\n".format(maxdiff[0], maxdiff[1]) 134 else: 135 options += "diff = ({0} {1})\n".format(diff[0], diff[1]) 136 return options 137 138 139def _make_var_names(exog): 140 if hasattr(exog, "name"): 141 var_names = exog.name 142 elif hasattr(exog, "columns"): 143 var_names = exog.columns 144 else: 145 raise ValueError("exog is not a Series or DataFrame or is unnamed.") 146 try: 147 var_names = " ".join(var_names) 148 except TypeError: # cannot have names that are numbers, pandas default 149 from statsmodels.base.data import _make_exog_names 150 if exog.ndim == 1: 151 var_names = "x1" 152 else: 153 var_names = " ".join(_make_exog_names(exog)) 154 return var_names 155 156 157def _make_regression_options(trading, exog): 158 if not trading and exog is None: # start regression spec 159 return "" 160 161 reg_spec = "regression{\n" 162 if trading: 163 reg_spec += " variables = (td)\n" 164 if exog is not None: 165 var_names = _make_var_names(exog) 166 reg_spec += " user = ({0})\n".format(var_names) 167 reg_spec += " data = ({0})\n".format("\n".join(map(str, 168 exog.values.ravel().tolist()))) 169 170 reg_spec += "}\n" # close out regression spec 171 return reg_spec 172 173 174def _make_forecast_options(forecast_periods): 175 if forecast_periods is None: 176 return "" 177 forecast_spec = "forecast{\n" 178 forecast_spec += "maxlead = ({0})\n}}\n".format(forecast_periods) 179 return forecast_spec 180 181 182def _check_errors(errors): 183 errors = errors[errors.find("spc:")+4:].strip() 184 if errors and 'ERROR' in errors: 185 raise X13Error(errors) 186 elif errors and 'WARNING' in errors: 187 warn(errors, X13Warning) 188 189 190def _convert_out_to_series(x, dates, name): 191 """ 192 Convert x to a DataFrame where x is a string in the format given by 193 x-13arima-seats output. 194 """ 195 from io import StringIO 196 from pandas import read_csv 197 out = read_csv(StringIO(x), skiprows=2, 198 header=None, sep='\t', engine='python') 199 return out.set_index(dates).rename(columns={1: name})[name] 200 201 202def _open_and_read(fname): 203 # opens a file, reads it, and make sure it's closed 204 with open(fname, 'r') as fin: 205 fout = fin.read() 206 return fout 207 208 209class Spec(object): 210 @property 211 def spec_name(self): 212 return self.__class__.__name__.replace("Spec", "") 213 214 def create_spec(self, **kwargs): 215 spec = """{name} {{ 216 {options} 217 }} 218 """ 219 return spec.format(name=self.spec_name, 220 options=self.options) 221 222 def set_options(self, **kwargs): 223 options = "" 224 for key, value in kwargs.items(): 225 options += "{0}={1}\n".format(key, value) 226 self.__dict__.update({key: value}) 227 self.options = options 228 229 230class SeriesSpec(Spec): 231 """ 232 Parameters 233 ---------- 234 data 235 appendbcst : bool 236 appendfcst : bool 237 comptype 238 compwt 239 decimals 240 modelspan 241 name 242 period 243 precision 244 to_print 245 to_save 246 span 247 start 248 title 249 type 250 251 Notes 252 ----- 253 Rarely used arguments 254 255 divpower 256 missingcode 257 missingval 258 saveprecision 259 trimzero 260 """ 261 def __init__(self, data, name='Unnamed Series', appendbcst=False, 262 appendfcst=False, 263 comptype=None, compwt=1, decimals=0, modelspan=(), 264 period=12, precision=0, to_print=[], to_save=[], span=(), 265 start=(1, 1), title='', series_type=None, divpower=None, 266 missingcode=-99999, missingval=1000000000): 267 268 appendbcst, appendfcst = map(_bool_to_yes_no, [appendbcst, 269 appendfcst, 270 ]) 271 272 series_name = "\"{0}\"".format(name[:64]) # trim to 64 characters 273 title = "\"{0}\"".format(title[:79]) # trim to 79 characters 274 self.set_options(data=data, appendbcst=appendbcst, 275 appendfcst=appendfcst, period=period, start=start, 276 title=title, name=series_name, 277 ) 278 279 280def pandas_to_series_spec(x): 281 # from statsmodels.tools.data import _check_period_index 282 # check_period_index(x) 283 if hasattr(x, 'columns'): # convert to series 284 if len(x.columns) > 1: 285 raise ValueError("Does not handle DataFrame with more than one " 286 "column") 287 x = x[x.columns[0]] 288 289 data = "({0})".format("\n".join(map(str, x.values.tolist()))) 290 291 # get periodicity 292 # get start / first data 293 # give it a title 294 try: 295 period = _freq_to_period[x.index.freqstr] 296 except (AttributeError, ValueError): 297 from pandas.tseries.api import infer_freq 298 period = _freq_to_period[infer_freq(x.index)] 299 start_date = x.index[0] 300 if period == 12: 301 year, stperiod = start_date.year, start_date.month 302 elif period == 4: 303 year, stperiod = start_date.year, start_date.quarter 304 else: # pragma: no cover 305 raise ValueError("Only monthly and quarterly periods are supported." 306 " Please report or send a pull request if you want " 307 "this extended.") 308 309 if hasattr(x, 'name'): 310 name = x.name or "Unnamed Series" 311 else: 312 name = 'Unnamed Series' 313 series_spec = SeriesSpec(data=data, name=name, period=period, 314 title=name, start="{0}.{1}".format(year, 315 stperiod)) 316 return series_spec 317 318 319@deprecate_kwarg('forecast_years', 'forecast_periods') 320def x13_arima_analysis(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None, 321 exog=None, log=None, outlier=True, trading=False, 322 forecast_periods=None, retspec=False, 323 speconly=False, start=None, freq=None, 324 print_stdout=False, x12path=None, prefer_x13=True): 325 """ 326 Perform x13-arima analysis for monthly or quarterly data. 327 328 Parameters 329 ---------- 330 endog : array_like, pandas.Series 331 The series to model. It is best to use a pandas object with a 332 DatetimeIndex or PeriodIndex. However, you can pass an array-like 333 object. If your object does not have a dates index then ``start`` and 334 ``freq`` are not optional. 335 maxorder : tuple 336 The maximum order of the regular and seasonal ARMA polynomials to 337 examine during the model identification. The order for the regular 338 polynomial must be greater than zero and no larger than 4. The 339 order for the seasonal polynomial may be 1 or 2. 340 maxdiff : tuple 341 The maximum orders for regular and seasonal differencing in the 342 automatic differencing procedure. Acceptable inputs for regular 343 differencing are 1 and 2. The maximum order for seasonal differencing 344 is 1. If ``diff`` is specified then ``maxdiff`` should be None. 345 Otherwise, ``diff`` will be ignored. See also ``diff``. 346 diff : tuple 347 Fixes the orders of differencing for the regular and seasonal 348 differencing. Regular differencing may be 0, 1, or 2. Seasonal 349 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise 350 ``diff`` is ignored. 351 exog : array_like 352 Exogenous variables. 353 log : bool or None 354 If None, it is automatically determined whether to log the series or 355 not. If False, logs are not taken. If True, logs are taken. 356 outlier : bool 357 Whether or not outliers are tested for and corrected, if detected. 358 trading : bool 359 Whether or not trading day effects are tested for. 360 forecast_periods : int 361 Number of forecasts produced. The default is None. 362 retspec : bool 363 Whether to return the created specification file. Can be useful for 364 debugging. 365 speconly : bool 366 Whether to create the specification file and then return it without 367 performing the analysis. Can be useful for debugging. 368 start : str, datetime 369 Must be given if ``endog`` does not have date information in its index. 370 Anything accepted by pandas.DatetimeIndex for the start value. 371 freq : str 372 Must be givein if ``endog`` does not have date information in its 373 index. Anything accepted by pandas.DatetimeIndex for the freq value. 374 print_stdout : bool 375 The stdout from X12/X13 is suppressed. To print it out, set this 376 to True. Default is False. 377 x12path : str or None 378 The path to x12 or x13 binary. If None, the program will attempt 379 to find x13as or x12a on the PATH or by looking at X13PATH or 380 X12PATH depending on the value of prefer_x13. 381 prefer_x13 : bool 382 If True, will look for x13as first and will fallback to the X13PATH 383 environmental variable. If False, will look for x12a first and will 384 fallback to the X12PATH environmental variable. If x12path points 385 to the path for the X12/X13 binary, it does nothing. 386 387 Returns 388 ------- 389 Bunch 390 A bunch object containing the listed attributes. 391 392 - results : str 393 The full output from the X12/X13 run. 394 - seasadj : pandas.Series 395 The final seasonally adjusted ``endog``. 396 - trend : pandas.Series 397 The trend-cycle component of ``endog``. 398 - irregular : pandas.Series 399 The final irregular component of ``endog``. 400 - stdout : str 401 The captured stdout produced by x12/x13. 402 - spec : str, optional 403 Returned if ``retspec`` is True. The only thing returned if 404 ``speconly`` is True. 405 406 Notes 407 ----- 408 This works by creating a specification file, writing it to a temporary 409 directory, invoking X12/X13 in a subprocess, and reading the output 410 directory, invoking exog12/X13 in a subprocess, and reading the output 411 back in. 412 """ 413 x12path = _check_x12(x12path) 414 415 if not isinstance(endog, (pd.DataFrame, pd.Series)): 416 if start is None or freq is None: 417 raise ValueError("start and freq cannot be none if endog is not " 418 "a pandas object") 419 endog = pd.Series(endog, index=pd.DatetimeIndex(start=start, 420 periods=len(endog), 421 freq=freq)) 422 spec_obj = pandas_to_series_spec(endog) 423 spec = spec_obj.create_spec() 424 spec += "transform{{function={0}}}\n".format(_log_to_x12[log]) 425 if outlier: 426 spec += "outlier{}\n" 427 options = _make_automdl_options(maxorder, maxdiff, diff) 428 spec += "automdl{{{0}}}\n".format(options) 429 spec += _make_regression_options(trading, exog) 430 spec += _make_forecast_options(forecast_periods) 431 spec += "x11{ save=(d11 d12 d13) }" 432 if speconly: 433 return spec 434 # write it to a tempfile 435 # TODO: make this more robust - give the user some control? 436 ftempin = tempfile.NamedTemporaryFile(delete=False, suffix='.spc') 437 ftempout = tempfile.NamedTemporaryFile(delete=False) 438 try: 439 ftempin.write(spec.encode('utf8')) 440 ftempin.close() 441 ftempout.close() 442 # call x12 arima 443 p = run_spec(x12path, ftempin.name[:-4], ftempout.name) 444 p.wait() 445 stdout = p.stdout.read() 446 if print_stdout: 447 print(p.stdout.read()) 448 # check for errors 449 errors = _open_and_read(ftempout.name + '.err') 450 _check_errors(errors) 451 452 # read in results 453 results = _open_and_read(ftempout.name + '.out') 454 seasadj = _open_and_read(ftempout.name + '.d11') 455 trend = _open_and_read(ftempout.name + '.d12') 456 irregular = _open_and_read(ftempout.name + '.d13') 457 finally: 458 try: # sometimes this gives a permission denied error? 459 # not sure why. no process should have these open 460 os.remove(ftempin.name) 461 os.remove(ftempout.name) 462 except OSError: 463 if os.path.exists(ftempin.name): 464 warn("Failed to delete resource {0}".format(ftempin.name), 465 IOWarning) 466 if os.path.exists(ftempout.name): 467 warn("Failed to delete resource {0}".format(ftempout.name), 468 IOWarning) 469 470 seasadj = _convert_out_to_series(seasadj, endog.index, 'seasadj') 471 trend = _convert_out_to_series(trend, endog.index, 'trend') 472 irregular = _convert_out_to_series(irregular, endog.index, 'irregular') 473 474 # NOTE: there is not likely anything in stdout that's not in results 475 # so may be safe to just suppress and remove it 476 if not retspec: 477 res = X13ArimaAnalysisResult(observed=endog, results=results, 478 seasadj=seasadj, trend=trend, 479 irregular=irregular, stdout=stdout) 480 else: 481 res = X13ArimaAnalysisResult(observed=endog, results=results, 482 seasadj=seasadj, trend=trend, 483 irregular=irregular, stdout=stdout, 484 spec=spec) 485 return res 486 487 488@deprecate_kwarg('forecast_years', 'forecast_periods') 489def x13_arima_select_order(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None, 490 exog=None, log=None, outlier=True, trading=False, 491 forecast_periods=None, 492 start=None, freq=None, print_stdout=False, 493 x12path=None, prefer_x13=True): 494 """ 495 Perform automatic seasonal ARIMA order identification using x12/x13 ARIMA. 496 497 Parameters 498 ---------- 499 endog : array_like, pandas.Series 500 The series to model. It is best to use a pandas object with a 501 DatetimeIndex or PeriodIndex. However, you can pass an array-like 502 object. If your object does not have a dates index then ``start`` and 503 ``freq`` are not optional. 504 maxorder : tuple 505 The maximum order of the regular and seasonal ARMA polynomials to 506 examine during the model identification. The order for the regular 507 polynomial must be greater than zero and no larger than 4. The 508 order for the seasonal polynomial may be 1 or 2. 509 maxdiff : tuple 510 The maximum orders for regular and seasonal differencing in the 511 automatic differencing procedure. Acceptable inputs for regular 512 differencing are 1 and 2. The maximum order for seasonal differencing 513 is 1. If ``diff`` is specified then ``maxdiff`` should be None. 514 Otherwise, ``diff`` will be ignored. See also ``diff``. 515 diff : tuple 516 Fixes the orders of differencing for the regular and seasonal 517 differencing. Regular differencing may be 0, 1, or 2. Seasonal 518 differencing may be 0 or 1. ``maxdiff`` must be None, otherwise 519 ``diff`` is ignored. 520 exog : array_like 521 Exogenous variables. 522 log : bool or None 523 If None, it is automatically determined whether to log the series or 524 not. If False, logs are not taken. If True, logs are taken. 525 outlier : bool 526 Whether or not outliers are tested for and corrected, if detected. 527 trading : bool 528 Whether or not trading day effects are tested for. 529 forecast_periods : int 530 Number of forecasts produced. The default is None. 531 start : str, datetime 532 Must be given if ``endog`` does not have date information in its index. 533 Anything accepted by pandas.DatetimeIndex for the start value. 534 freq : str 535 Must be givein if ``endog`` does not have date information in its 536 index. Anything accepted by pandas.DatetimeIndex for the freq value. 537 print_stdout : bool 538 The stdout from X12/X13 is suppressed. To print it out, set this 539 to True. Default is False. 540 x12path : str or None 541 The path to x12 or x13 binary. If None, the program will attempt 542 to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH 543 depending on the value of prefer_x13. 544 prefer_x13 : bool 545 If True, will look for x13as first and will fallback to the X13PATH 546 environmental variable. If False, will look for x12a first and will 547 fallback to the X12PATH environmental variable. If x12path points 548 to the path for the X12/X13 binary, it does nothing. 549 550 Returns 551 ------- 552 Bunch 553 A bunch object containing the listed attributes. 554 555 - order : tuple 556 The regular order. 557 - sorder : tuple 558 The seasonal order. 559 - include_mean : bool 560 Whether to include a mean or not. 561 - results : str 562 The full results from the X12/X13 analysis. 563 - stdout : str 564 The captured stdout from the X12/X13 analysis. 565 566 Notes 567 ----- 568 This works by creating a specification file, writing it to a temporary 569 directory, invoking X12/X13 in a subprocess, and reading the output back 570 in. 571 """ 572 results = x13_arima_analysis(endog, x12path=x12path, exog=exog, log=log, 573 outlier=outlier, trading=trading, 574 forecast_periods=forecast_periods, 575 maxorder=maxorder, maxdiff=maxdiff, diff=diff, 576 start=start, freq=freq, prefer_x13=prefer_x13) 577 model = re.search("(?<=Final automatic model choice : ).*", 578 results.results) 579 order = model.group() 580 if re.search("Mean is not significant", results.results): 581 include_mean = False 582 elif re.search("Constant", results.results): 583 include_mean = True 584 else: 585 include_mean = False 586 order, sorder = _clean_order(order) 587 res = Bunch(order=order, sorder=sorder, include_mean=include_mean, 588 results=results.results, stdout=results.stdout) 589 return res 590 591 592class X13ArimaAnalysisResult(object): 593 def __init__(self, **kwargs): 594 for key, value in kwargs.items(): 595 setattr(self, key, value) 596 597 def plot(self): 598 from statsmodels.graphics.utils import _import_mpl 599 plt = _import_mpl() 600 fig, axes = plt.subplots(4, 1, sharex=True) 601 self.observed.plot(ax=axes[0], legend=False) 602 axes[0].set_ylabel('Observed') 603 self.seasadj.plot(ax=axes[1], legend=False) 604 axes[1].set_ylabel('Seas. Adjusted') 605 self.trend.plot(ax=axes[2], legend=False) 606 axes[2].set_ylabel('Trend') 607 self.irregular.plot(ax=axes[3], legend=False) 608 axes[3].set_ylabel('Irregular') 609 610 fig.tight_layout() 611 return fig 612