1"""
2Run x12/x13-arima specs in a subprocess from Python and curry results back
3into python.
4
5Notes
6-----
7Many of the functions are called x12. However, they are also intended to work
8for x13. If this is not the case, it's a bug.
9"""
10from statsmodels.compat.pandas import deprecate_kwarg
11
12import os
13import subprocess
14import tempfile
15import re
16from warnings import warn
17
18import pandas as pd
19
20from statsmodels.tools.tools import Bunch
21from statsmodels.tools.sm_exceptions import (X13NotFoundError,
22                                             IOWarning, X13Error,
23                                             X13Warning)
24
25__all__ = ["x13_arima_select_order", "x13_arima_analysis"]
26
27_binary_names = ('x13as.exe', 'x13as', 'x12a.exe', 'x12a')
28
29
30class _freq_to_period:
31    def __getitem__(self, key):
32        if key.startswith('M'):
33            return 12
34        elif key.startswith('Q'):
35            return 4
36        elif key.startswith('W'):
37            return 52
38
39
40_freq_to_period = _freq_to_period()
41
42_period_to_freq = {12: 'M', 4: 'Q'}
43_log_to_x12 = {True: 'log', False: 'none', None: 'auto'}
44_bool_to_yes_no = lambda x: 'yes' if x else 'no'  # noqa:E731
45
46
47def _find_x12(x12path=None, prefer_x13=True):
48    """
49    If x12path is not given, then either x13as[.exe] or x12a[.exe] must
50    be found on the PATH. Otherwise, the environmental variable X12PATH or
51    X13PATH must be defined. If prefer_x13 is True, only X13PATH is searched
52    for. If it is false, only X12PATH is searched for.
53    """
54    global _binary_names
55    if x12path is not None and x12path.endswith(_binary_names):
56        # remove binary from path if given
57        x12path = os.path.dirname(x12path)
58
59    if not prefer_x13:  # search for x12 first
60        _binary_names = _binary_names[::-1]
61        if x12path is None:
62            x12path = os.getenv("X12PATH", "")
63        if not x12path:
64            x12path = os.getenv("X13PATH", "")
65    elif x12path is None:
66        x12path = os.getenv("X13PATH", "")
67        if not x12path:
68            x12path = os.getenv("X12PATH", "")
69
70    for binary in _binary_names:
71        x12 = os.path.join(x12path, binary)
72        try:
73            subprocess.check_call(x12, stdout=subprocess.PIPE,
74                                  stderr=subprocess.PIPE)
75            return x12
76        except OSError:
77            pass
78
79    else:
80        return False
81
82
83def _check_x12(x12path=None):
84    x12path = _find_x12(x12path)
85    if not x12path:
86        raise X13NotFoundError("x12a and x13as not found on path. Give the "
87                               "path, put them on PATH, or set the "
88                               "X12PATH or X13PATH environmental variable.")
89    return x12path
90
91
92def _clean_order(order):
93    """
94    Takes something like (1 1 0)(0 1 1) and returns a arma order, sarma
95    order tuple. Also accepts (1 1 0) and return arma order and (0, 0, 0)
96    """
97    order = re.findall(r"\([0-9 ]*?\)", order)
98
99    def clean(x):
100        return tuple(map(int, re.sub("[()]", "", x).split(" ")))
101
102    if len(order) > 1:
103        order, sorder = map(clean, order)
104    else:
105        order = clean(order[0])
106        sorder = (0, 0, 0)
107
108    return order, sorder
109
110
111def run_spec(x12path, specpath, outname=None, meta=False, datameta=False):
112
113    if meta and datameta:
114        raise ValueError("Cannot specify both meta and datameta.")
115    if meta:
116        args = [x12path, "-m " + specpath]
117    elif datameta:
118        args = [x12path, "-d " + specpath]
119    else:
120        args = [x12path, specpath]
121
122    if outname:
123        args += [outname]
124
125    return subprocess.Popen(args, stdout=subprocess.PIPE,
126                            stderr=subprocess.STDOUT)
127
128
129def _make_automdl_options(maxorder, maxdiff, diff):
130    options = "\n"
131    options += "maxorder = ({0} {1})\n".format(maxorder[0], maxorder[1])
132    if maxdiff is not None:  # maxdiff always takes precedence
133        options += "maxdiff = ({0} {1})\n".format(maxdiff[0], maxdiff[1])
134    else:
135        options += "diff = ({0} {1})\n".format(diff[0], diff[1])
136    return options
137
138
139def _make_var_names(exog):
140    if hasattr(exog, "name"):
141        var_names = exog.name
142    elif hasattr(exog, "columns"):
143        var_names = exog.columns
144    else:
145        raise ValueError("exog is not a Series or DataFrame or is unnamed.")
146    try:
147        var_names = " ".join(var_names)
148    except TypeError:  # cannot have names that are numbers, pandas default
149        from statsmodels.base.data import _make_exog_names
150        if exog.ndim == 1:
151            var_names = "x1"
152        else:
153            var_names = " ".join(_make_exog_names(exog))
154    return var_names
155
156
157def _make_regression_options(trading, exog):
158    if not trading and exog is None:  # start regression spec
159        return ""
160
161    reg_spec = "regression{\n"
162    if trading:
163        reg_spec += "    variables = (td)\n"
164    if exog is not None:
165        var_names = _make_var_names(exog)
166        reg_spec += "    user = ({0})\n".format(var_names)
167        reg_spec += "    data = ({0})\n".format("\n".join(map(str,
168                                                exog.values.ravel().tolist())))
169
170    reg_spec += "}\n"  # close out regression spec
171    return reg_spec
172
173
174def _make_forecast_options(forecast_periods):
175    if forecast_periods is None:
176        return ""
177    forecast_spec = "forecast{\n"
178    forecast_spec += "maxlead = ({0})\n}}\n".format(forecast_periods)
179    return forecast_spec
180
181
182def _check_errors(errors):
183    errors = errors[errors.find("spc:")+4:].strip()
184    if errors and 'ERROR' in errors:
185        raise X13Error(errors)
186    elif errors and 'WARNING' in errors:
187        warn(errors, X13Warning)
188
189
190def _convert_out_to_series(x, dates, name):
191    """
192    Convert x to a DataFrame where x is a string in the format given by
193    x-13arima-seats output.
194    """
195    from io import StringIO
196    from pandas import read_csv
197    out = read_csv(StringIO(x), skiprows=2,
198                   header=None, sep='\t', engine='python')
199    return out.set_index(dates).rename(columns={1: name})[name]
200
201
202def _open_and_read(fname):
203    # opens a file, reads it, and make sure it's closed
204    with open(fname, 'r') as fin:
205        fout = fin.read()
206    return fout
207
208
209class Spec(object):
210    @property
211    def spec_name(self):
212        return self.__class__.__name__.replace("Spec", "")
213
214    def create_spec(self, **kwargs):
215        spec = """{name} {{
216        {options}
217        }}
218        """
219        return spec.format(name=self.spec_name,
220                           options=self.options)
221
222    def set_options(self, **kwargs):
223        options = ""
224        for key, value in kwargs.items():
225            options += "{0}={1}\n".format(key, value)
226            self.__dict__.update({key: value})
227        self.options = options
228
229
230class SeriesSpec(Spec):
231    """
232    Parameters
233    ----------
234    data
235    appendbcst : bool
236    appendfcst : bool
237    comptype
238    compwt
239    decimals
240    modelspan
241    name
242    period
243    precision
244    to_print
245    to_save
246    span
247    start
248    title
249    type
250
251    Notes
252    -----
253    Rarely used arguments
254
255    divpower
256    missingcode
257    missingval
258    saveprecision
259    trimzero
260    """
261    def __init__(self, data, name='Unnamed Series', appendbcst=False,
262                 appendfcst=False,
263                 comptype=None, compwt=1, decimals=0, modelspan=(),
264                 period=12, precision=0, to_print=[], to_save=[], span=(),
265                 start=(1, 1), title='', series_type=None, divpower=None,
266                 missingcode=-99999, missingval=1000000000):
267
268        appendbcst, appendfcst = map(_bool_to_yes_no, [appendbcst,
269                                                       appendfcst,
270                                                       ])
271
272        series_name = "\"{0}\"".format(name[:64])  # trim to 64 characters
273        title = "\"{0}\"".format(title[:79])  # trim to 79 characters
274        self.set_options(data=data, appendbcst=appendbcst,
275                         appendfcst=appendfcst, period=period, start=start,
276                         title=title, name=series_name,
277                         )
278
279
280def pandas_to_series_spec(x):
281    # from statsmodels.tools.data import _check_period_index
282    # check_period_index(x)
283    if hasattr(x, 'columns'):  # convert to series
284        if len(x.columns) > 1:
285            raise ValueError("Does not handle DataFrame with more than one "
286                             "column")
287        x = x[x.columns[0]]
288
289    data = "({0})".format("\n".join(map(str, x.values.tolist())))
290
291    # get periodicity
292    # get start / first data
293    # give it a title
294    try:
295        period = _freq_to_period[x.index.freqstr]
296    except (AttributeError, ValueError):
297        from pandas.tseries.api import infer_freq
298        period = _freq_to_period[infer_freq(x.index)]
299    start_date = x.index[0]
300    if period == 12:
301        year, stperiod = start_date.year, start_date.month
302    elif period == 4:
303        year, stperiod = start_date.year, start_date.quarter
304    else:  # pragma: no cover
305        raise ValueError("Only monthly and quarterly periods are supported."
306                         " Please report or send a pull request if you want "
307                         "this extended.")
308
309    if hasattr(x, 'name'):
310        name = x.name or "Unnamed Series"
311    else:
312        name = 'Unnamed Series'
313    series_spec = SeriesSpec(data=data, name=name, period=period,
314                             title=name, start="{0}.{1}".format(year,
315                                                                stperiod))
316    return series_spec
317
318
319@deprecate_kwarg('forecast_years', 'forecast_periods')
320def x13_arima_analysis(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None,
321                       exog=None, log=None, outlier=True, trading=False,
322                       forecast_periods=None, retspec=False,
323                       speconly=False, start=None, freq=None,
324                       print_stdout=False, x12path=None, prefer_x13=True):
325    """
326    Perform x13-arima analysis for monthly or quarterly data.
327
328    Parameters
329    ----------
330    endog : array_like, pandas.Series
331        The series to model. It is best to use a pandas object with a
332        DatetimeIndex or PeriodIndex. However, you can pass an array-like
333        object. If your object does not have a dates index then ``start`` and
334        ``freq`` are not optional.
335    maxorder : tuple
336        The maximum order of the regular and seasonal ARMA polynomials to
337        examine during the model identification. The order for the regular
338        polynomial must be greater than zero and no larger than 4. The
339        order for the seasonal polynomial may be 1 or 2.
340    maxdiff : tuple
341        The maximum orders for regular and seasonal differencing in the
342        automatic differencing procedure. Acceptable inputs for regular
343        differencing are 1 and 2. The maximum order for seasonal differencing
344        is 1. If ``diff`` is specified then ``maxdiff`` should be None.
345        Otherwise, ``diff`` will be ignored. See also ``diff``.
346    diff : tuple
347        Fixes the orders of differencing for the regular and seasonal
348        differencing. Regular differencing may be 0, 1, or 2. Seasonal
349        differencing may be 0 or 1. ``maxdiff`` must be None, otherwise
350        ``diff`` is ignored.
351    exog : array_like
352        Exogenous variables.
353    log : bool or None
354        If None, it is automatically determined whether to log the series or
355        not. If False, logs are not taken. If True, logs are taken.
356    outlier : bool
357        Whether or not outliers are tested for and corrected, if detected.
358    trading : bool
359        Whether or not trading day effects are tested for.
360    forecast_periods : int
361        Number of forecasts produced. The default is None.
362    retspec : bool
363        Whether to return the created specification file. Can be useful for
364        debugging.
365    speconly : bool
366        Whether to create the specification file and then return it without
367        performing the analysis. Can be useful for debugging.
368    start : str, datetime
369        Must be given if ``endog`` does not have date information in its index.
370        Anything accepted by pandas.DatetimeIndex for the start value.
371    freq : str
372        Must be givein if ``endog`` does not have date information in its
373        index. Anything accepted by pandas.DatetimeIndex for the freq value.
374    print_stdout : bool
375        The stdout from X12/X13 is suppressed. To print it out, set this
376        to True. Default is False.
377    x12path : str or None
378        The path to x12 or x13 binary. If None, the program will attempt
379        to find x13as or x12a on the PATH or by looking at X13PATH or
380        X12PATH depending on the value of prefer_x13.
381    prefer_x13 : bool
382        If True, will look for x13as first and will fallback to the X13PATH
383        environmental variable. If False, will look for x12a first and will
384        fallback to the X12PATH environmental variable. If x12path points
385        to the path for the X12/X13 binary, it does nothing.
386
387    Returns
388    -------
389    Bunch
390        A bunch object containing the listed attributes.
391
392        - results : str
393          The full output from the X12/X13 run.
394        - seasadj : pandas.Series
395          The final seasonally adjusted ``endog``.
396        - trend : pandas.Series
397          The trend-cycle component of ``endog``.
398        - irregular : pandas.Series
399          The final irregular component of ``endog``.
400        - stdout : str
401          The captured stdout produced by x12/x13.
402        - spec : str, optional
403          Returned if ``retspec`` is True. The only thing returned if
404          ``speconly`` is True.
405
406    Notes
407    -----
408    This works by creating a specification file, writing it to a temporary
409    directory, invoking X12/X13 in a subprocess, and reading the output
410    directory, invoking exog12/X13 in a subprocess, and reading the output
411    back in.
412    """
413    x12path = _check_x12(x12path)
414
415    if not isinstance(endog, (pd.DataFrame, pd.Series)):
416        if start is None or freq is None:
417            raise ValueError("start and freq cannot be none if endog is not "
418                             "a pandas object")
419        endog = pd.Series(endog, index=pd.DatetimeIndex(start=start,
420                                                        periods=len(endog),
421                                                        freq=freq))
422    spec_obj = pandas_to_series_spec(endog)
423    spec = spec_obj.create_spec()
424    spec += "transform{{function={0}}}\n".format(_log_to_x12[log])
425    if outlier:
426        spec += "outlier{}\n"
427    options = _make_automdl_options(maxorder, maxdiff, diff)
428    spec += "automdl{{{0}}}\n".format(options)
429    spec += _make_regression_options(trading, exog)
430    spec += _make_forecast_options(forecast_periods)
431    spec += "x11{ save=(d11 d12 d13) }"
432    if speconly:
433        return spec
434    # write it to a tempfile
435    # TODO: make this more robust - give the user some control?
436    ftempin = tempfile.NamedTemporaryFile(delete=False, suffix='.spc')
437    ftempout = tempfile.NamedTemporaryFile(delete=False)
438    try:
439        ftempin.write(spec.encode('utf8'))
440        ftempin.close()
441        ftempout.close()
442        # call x12 arima
443        p = run_spec(x12path, ftempin.name[:-4], ftempout.name)
444        p.wait()
445        stdout = p.stdout.read()
446        if print_stdout:
447            print(p.stdout.read())
448        # check for errors
449        errors = _open_and_read(ftempout.name + '.err')
450        _check_errors(errors)
451
452        # read in results
453        results = _open_and_read(ftempout.name + '.out')
454        seasadj = _open_and_read(ftempout.name + '.d11')
455        trend = _open_and_read(ftempout.name + '.d12')
456        irregular = _open_and_read(ftempout.name + '.d13')
457    finally:
458        try:  # sometimes this gives a permission denied error?
459            #   not sure why. no process should have these open
460            os.remove(ftempin.name)
461            os.remove(ftempout.name)
462        except OSError:
463            if os.path.exists(ftempin.name):
464                warn("Failed to delete resource {0}".format(ftempin.name),
465                     IOWarning)
466            if os.path.exists(ftempout.name):
467                warn("Failed to delete resource {0}".format(ftempout.name),
468                     IOWarning)
469
470    seasadj = _convert_out_to_series(seasadj, endog.index, 'seasadj')
471    trend = _convert_out_to_series(trend, endog.index, 'trend')
472    irregular = _convert_out_to_series(irregular, endog.index, 'irregular')
473
474    # NOTE: there is not likely anything in stdout that's not in results
475    #       so may be safe to just suppress and remove it
476    if not retspec:
477        res = X13ArimaAnalysisResult(observed=endog, results=results,
478                                     seasadj=seasadj, trend=trend,
479                                     irregular=irregular, stdout=stdout)
480    else:
481        res = X13ArimaAnalysisResult(observed=endog, results=results,
482                                     seasadj=seasadj, trend=trend,
483                                     irregular=irregular, stdout=stdout,
484                                     spec=spec)
485    return res
486
487
488@deprecate_kwarg('forecast_years', 'forecast_periods')
489def x13_arima_select_order(endog, maxorder=(2, 1), maxdiff=(2, 1), diff=None,
490                           exog=None, log=None, outlier=True, trading=False,
491                           forecast_periods=None,
492                           start=None, freq=None, print_stdout=False,
493                           x12path=None, prefer_x13=True):
494    """
495    Perform automatic seasonal ARIMA order identification using x12/x13 ARIMA.
496
497    Parameters
498    ----------
499    endog : array_like, pandas.Series
500        The series to model. It is best to use a pandas object with a
501        DatetimeIndex or PeriodIndex. However, you can pass an array-like
502        object. If your object does not have a dates index then ``start`` and
503        ``freq`` are not optional.
504    maxorder : tuple
505        The maximum order of the regular and seasonal ARMA polynomials to
506        examine during the model identification. The order for the regular
507        polynomial must be greater than zero and no larger than 4. The
508        order for the seasonal polynomial may be 1 or 2.
509    maxdiff : tuple
510        The maximum orders for regular and seasonal differencing in the
511        automatic differencing procedure. Acceptable inputs for regular
512        differencing are 1 and 2. The maximum order for seasonal differencing
513        is 1. If ``diff`` is specified then ``maxdiff`` should be None.
514        Otherwise, ``diff`` will be ignored. See also ``diff``.
515    diff : tuple
516        Fixes the orders of differencing for the regular and seasonal
517        differencing. Regular differencing may be 0, 1, or 2. Seasonal
518        differencing may be 0 or 1. ``maxdiff`` must be None, otherwise
519        ``diff`` is ignored.
520    exog : array_like
521        Exogenous variables.
522    log : bool or None
523        If None, it is automatically determined whether to log the series or
524        not. If False, logs are not taken. If True, logs are taken.
525    outlier : bool
526        Whether or not outliers are tested for and corrected, if detected.
527    trading : bool
528        Whether or not trading day effects are tested for.
529    forecast_periods : int
530        Number of forecasts produced. The default is None.
531    start : str, datetime
532        Must be given if ``endog`` does not have date information in its index.
533        Anything accepted by pandas.DatetimeIndex for the start value.
534    freq : str
535        Must be givein if ``endog`` does not have date information in its
536        index. Anything accepted by pandas.DatetimeIndex for the freq value.
537    print_stdout : bool
538        The stdout from X12/X13 is suppressed. To print it out, set this
539        to True. Default is False.
540    x12path : str or None
541        The path to x12 or x13 binary. If None, the program will attempt
542        to find x13as or x12a on the PATH or by looking at X13PATH or X12PATH
543        depending on the value of prefer_x13.
544    prefer_x13 : bool
545        If True, will look for x13as first and will fallback to the X13PATH
546        environmental variable. If False, will look for x12a first and will
547        fallback to the X12PATH environmental variable. If x12path points
548        to the path for the X12/X13 binary, it does nothing.
549
550    Returns
551    -------
552    Bunch
553        A bunch object containing the listed attributes.
554
555        - order : tuple
556          The regular order.
557        - sorder : tuple
558          The seasonal order.
559        - include_mean : bool
560          Whether to include a mean or not.
561        - results : str
562          The full results from the X12/X13 analysis.
563        - stdout : str
564          The captured stdout from the X12/X13 analysis.
565
566    Notes
567    -----
568    This works by creating a specification file, writing it to a temporary
569    directory, invoking X12/X13 in a subprocess, and reading the output back
570    in.
571    """
572    results = x13_arima_analysis(endog, x12path=x12path, exog=exog, log=log,
573                                 outlier=outlier, trading=trading,
574                                 forecast_periods=forecast_periods,
575                                 maxorder=maxorder, maxdiff=maxdiff, diff=diff,
576                                 start=start, freq=freq, prefer_x13=prefer_x13)
577    model = re.search("(?<=Final automatic model choice : ).*",
578                      results.results)
579    order = model.group()
580    if re.search("Mean is not significant", results.results):
581        include_mean = False
582    elif re.search("Constant", results.results):
583        include_mean = True
584    else:
585        include_mean = False
586    order, sorder = _clean_order(order)
587    res = Bunch(order=order, sorder=sorder, include_mean=include_mean,
588                results=results.results, stdout=results.stdout)
589    return res
590
591
592class X13ArimaAnalysisResult(object):
593    def __init__(self, **kwargs):
594        for key, value in kwargs.items():
595            setattr(self, key, value)
596
597    def plot(self):
598        from statsmodels.graphics.utils import _import_mpl
599        plt = _import_mpl()
600        fig, axes = plt.subplots(4, 1, sharex=True)
601        self.observed.plot(ax=axes[0], legend=False)
602        axes[0].set_ylabel('Observed')
603        self.seasadj.plot(ax=axes[1], legend=False)
604        axes[1].set_ylabel('Seas. Adjusted')
605        self.trend.plot(ax=axes[2], legend=False)
606        axes[2].set_ylabel('Trend')
607        self.irregular.plot(ax=axes[3], legend=False)
608        axes[3].set_ylabel('Irregular')
609
610        fig.tight_layout()
611        return fig
612