1'''trying to verify theoretical acf of arma
2
3explicit functions for autocovariance functions of ARIMA(1,1), MA(1), MA(2)
4plus 3 functions from nitime.utils
5
6'''
7import numpy as np
8from numpy.testing import assert_array_almost_equal
9import matplotlib.pyplot as plt
10
11from statsmodels import regression
12from statsmodels.tsa.arima_process import arma_generate_sample, arma_impulse_response
13from statsmodels.tsa.arima_process import arma_acovf, arma_acf
14from statsmodels.tsa.arima.model import ARIMA
15from statsmodels.tsa.stattools import acf, acovf
16from statsmodels.graphics.tsaplots import plot_acf
17
18ar = [1., -0.6]
19#ar = [1., 0.]
20ma = [1., 0.4]
21#ma = [1., 0.4, 0.6]
22#ma = [1., 0.]
23mod = ''#'ma2'
24x = arma_generate_sample(ar, ma, 5000)
25x_acf = acf(x)[:10]
26x_ir = arma_impulse_response(ar, ma)
27
28#print x_acf[:10]
29#print x_ir[:10]
30#irc2 = np.correlate(x_ir,x_ir,'full')[len(x_ir)-1:]
31#print irc2[:10]
32#print irc2[:10]/irc2[0]
33#print irc2[:10-1] / irc2[1:10]
34#print x_acf[:10-1] / x_acf[1:10]
35
36# detrend helper from matplotlib.mlab
37def detrend(x, key=None):
38    if key is None or key=='constant':
39        return detrend_mean(x)
40    elif key=='linear':
41        return detrend_linear(x)
42
43def demean(x, axis=0):
44    "Return x minus its mean along the specified axis"
45    x = np.asarray(x)
46    if axis:
47        ind = [slice(None)] * axis
48        ind.append(np.newaxis)
49        return x - x.mean(axis)[ind]
50    return x - x.mean(axis)
51
52def detrend_mean(x):
53    "Return x minus the mean(x)"
54    return x - x.mean()
55
56def detrend_none(x):
57    "Return x: no detrending"
58    return x
59
60def detrend_linear(y):
61    "Return y minus best fit line; 'linear' detrending "
62    # This is faster than an algorithm based on linalg.lstsq.
63    x = np.arange(len(y), dtype=np.float_)
64    C = np.cov(x, y, bias=1)
65    b = C[0,1]/C[0,0]
66    a = y.mean() - b*x.mean()
67    return y - (b*x + a)
68
69def acovf_explicit(ar, ma, nobs):
70    '''add correlation of MA representation explicitely
71
72    '''
73    ir = arma_impulse_response(ar, ma)
74    acovfexpl = [np.dot(ir[:nobs-t], ir[t:nobs]) for t in range(10)]
75    return acovfexpl
76
77def acovf_arma11(ar, ma):
78    # ARMA(1,1)
79    # Florens et al page 278
80    # wrong result ?
81    # new calculation bigJudge p 311, now the same
82    a = -ar[1]
83    b = ma[1]
84    #rho = [1.]
85    #rho.append((1-a*b)*(a-b)/(1.+a**2-2*a*b))
86    rho = [(1.+b**2+2*a*b)/(1.-a**2)]
87    rho.append((1+a*b)*(a+b)/(1.-a**2))
88    for _ in range(8):
89        last = rho[-1]
90        rho.append(a*last)
91    return np.array(rho)
92
93#    print acf11[:10]
94#    print acf11[:10] /acf11[0]
95
96def acovf_ma2(ma):
97    # MA(2)
98    # from Greene p616 (with typo), Florens p280
99    b1 = -ma[1]
100    b2 = -ma[2]
101    rho = np.zeros(10)
102    rho[0] = (1 + b1**2 + b2**2)
103    rho[1] = (-b1 + b1*b2)
104    rho[2] = -b2
105    return rho
106
107#    rho2 = rho/rho[0]
108#    print rho2
109#    print irc2[:10]/irc2[0]
110
111def acovf_ma1(ma):
112    # MA(1)
113    # from Greene p616 (with typo), Florens p280
114    b = -ma[1]
115    rho = np.zeros(10)
116    rho[0] = (1 + b**2)
117    rho[1] = -b
118    return rho
119
120#    rho2 = rho/rho[0]
121#    print rho2
122#    print irc2[:10]/irc2[0]
123
124
125ar1 = [1., -0.8]
126ar0 = [1., 0.]
127ma1 = [1., 0.4]
128ma2 = [1., 0.4, 0.6]
129ma0 = [1., 0.]
130
131comparefn = dict(
132        [('ma1', acovf_ma1),
133        ('ma2', acovf_ma2),
134        ('arma11', acovf_arma11),
135        ('ar1', acovf_arma11)])
136
137cases = [('ma1', (ar0, ma1)),
138        ('ma2', (ar0, ma2)),
139        ('arma11', (ar1, ma1)),
140        ('ar1', (ar1, ma0))]
141
142for c, args in cases:
143
144    ar, ma = args
145    print('')
146    print(c, ar, ma)
147    myacovf = arma_acovf(ar, ma, nobs=10)
148    myacf = arma_acf(ar, ma, lags=10)
149    if c[:2]=='ma':
150        othacovf = comparefn[c](ma)
151    else:
152        othacovf = comparefn[c](ar, ma)
153    print(myacovf[:5])
154    print(othacovf[:5])
155    #something broke again,
156    #for high persistence case eg ar=0.99, nobs of IR has to be large
157    #made changes to arma_acovf
158    assert_array_almost_equal(myacovf, othacovf,10)
159    assert_array_almost_equal(myacf, othacovf/othacovf[0],10)
160
161
162#from nitime.utils
163def ar_generator(N=512, sigma=1.):
164    # this generates a signal u(n) = a1*u(n-1) + a2*u(n-2) + ... + v(n)
165    # where v(n) is a stationary stochastic process with zero mean
166    # and variance = sigma
167    # this sequence is shown to be estimated well by an order 8 AR system
168    taps = np.array([2.7607, -3.8106, 2.6535, -0.9238])
169    v = np.random.normal(size=N, scale=sigma**0.5)
170    u = np.zeros(N)
171    P = len(taps)
172    for l in range(P):
173        u[l] = v[l] + np.dot(u[:l][::-1], taps[:l])
174    for l in range(P,N):
175        u[l] = v[l] + np.dot(u[l-P:l][::-1], taps)
176    return u, v, taps
177
178#JP: small differences to using np.correlate, because assumes mean(s)=0
179#    denominator is N, not N-k, biased estimator
180#    misnomer: (biased) autocovariance not autocorrelation
181#from nitime.utils
182def autocorr(s, axis=-1):
183    """Returns the autocorrelation of signal s at all lags. Adheres to the
184definition r(k) = E{s(n)s*(n-k)} where E{} is the expectation operator.
185"""
186    N = s.shape[axis]
187    S = np.fft.fft(s, n=2*N-1, axis=axis)
188    sxx = np.fft.ifft(S*S.conjugate(), axis=axis).real[:N]
189    return sxx/N
190
191#JP: with valid this returns a single value, if x and y have same length
192#   e.g. norm_corr(x, x)
193#   using std subtracts mean, but correlate does not, requires means are exactly 0
194#   biased, no n-k correction for laglength
195#from nitime.utils
196def norm_corr(x,y,mode = 'valid'):
197    """Returns the correlation between two ndarrays, by calling np.correlate in
198'same' mode and normalizing the result by the std of the arrays and by
199their lengths. This results in a correlation = 1 for an auto-correlation"""
200
201    return ( np.correlate(x,y,mode) /
202             (np.std(x)*np.std(y)*(x.shape[-1])) )
203
204
205
206# from matplotlib axes.py
207# note: self is axis
208def pltacorr(self, x, **kwargs):
209    r"""
210    call signature::
211
212        acorr(x, normed=True, detrend=detrend_none, usevlines=True,
213              maxlags=10, **kwargs)
214
215    Plot the autocorrelation of *x*.  If *normed* = *True*,
216    normalize the data by the autocorrelation at 0-th lag.  *x* is
217    detrended by the *detrend* callable (default no normalization).
218
219    Data are plotted as ``plot(lags, c, **kwargs)``
220
221    Return value is a tuple (*lags*, *c*, *line*) where:
222
223      - *lags* are a length 2*maxlags+1 lag vector
224
225      - *c* is the 2*maxlags+1 auto correlation vector
226
227      - *line* is a :class:`~matplotlib.lines.Line2D` instance
228        returned by :meth:`plot`
229
230    The default *linestyle* is None and the default *marker* is
231    ``'o'``, though these can be overridden with keyword args.
232    The cross correlation is performed with
233    :func:`numpy.correlate` with *mode* = 2.
234
235    If *usevlines* is *True*, :meth:`~matplotlib.axes.Axes.vlines`
236    rather than :meth:`~matplotlib.axes.Axes.plot` is used to draw
237    vertical lines from the origin to the acorr.  Otherwise, the
238    plot style is determined by the kwargs, which are
239    :class:`~matplotlib.lines.Line2D` properties.
240
241    *maxlags* is a positive integer detailing the number of lags
242    to show.  The default value of *None* will return all
243    :math:`2 \mathrm{len}(x) - 1` lags.
244
245    The return value is a tuple (*lags*, *c*, *linecol*, *b*)
246    where
247
248    - *linecol* is the
249      :class:`~matplotlib.collections.LineCollection`
250
251    - *b* is the *x*-axis.
252
253    .. seealso::
254
255        :meth:`~matplotlib.axes.Axes.plot` or
256        :meth:`~matplotlib.axes.Axes.vlines`
257           For documentation on valid kwargs.
258
259    **Example:**
260
261    :func:`~matplotlib.pyplot.xcorr` above, and
262    :func:`~matplotlib.pyplot.acorr` below.
263
264    **Example:**
265
266    .. plot:: mpl_examples/pylab_examples/xcorr_demo.py
267    """
268    return self.xcorr(x, x, **kwargs)
269
270def pltxcorr(self, x, y, normed=True, detrend=detrend_none,
271          usevlines=True, maxlags=10, **kwargs):
272    """
273    call signature::
274
275        def xcorr(self, x, y, normed=True, detrend=detrend_none,
276          usevlines=True, maxlags=10, **kwargs):
277
278    Plot the cross correlation between *x* and *y*.  If *normed* =
279    *True*, normalize the data by the cross correlation at 0-th
280    lag.  *x* and y are detrended by the *detrend* callable
281    (default no normalization).  *x* and *y* must be equal length.
282
283    Data are plotted as ``plot(lags, c, **kwargs)``
284
285    Return value is a tuple (*lags*, *c*, *line*) where:
286
287      - *lags* are a length ``2*maxlags+1`` lag vector
288
289      - *c* is the ``2*maxlags+1`` auto correlation vector
290
291      - *line* is a :class:`~matplotlib.lines.Line2D` instance
292         returned by :func:`~matplotlib.pyplot.plot`.
293
294    The default *linestyle* is *None* and the default *marker* is
295    'o', though these can be overridden with keyword args.  The
296    cross correlation is performed with :func:`numpy.correlate`
297    with *mode* = 2.
298
299    If *usevlines* is *True*:
300
301       :func:`~matplotlib.pyplot.vlines`
302       rather than :func:`~matplotlib.pyplot.plot` is used to draw
303       vertical lines from the origin to the xcorr.  Otherwise the
304       plotstyle is determined by the kwargs, which are
305       :class:`~matplotlib.lines.Line2D` properties.
306
307       The return value is a tuple (*lags*, *c*, *linecol*, *b*)
308       where *linecol* is the
309       :class:`matplotlib.collections.LineCollection` instance and
310       *b* is the *x*-axis.
311
312    *maxlags* is a positive integer detailing the number of lags to show.
313    The default value of *None* will return all ``(2*len(x)-1)`` lags.
314
315    **Example:**
316
317    :func:`~matplotlib.pyplot.xcorr` above, and
318    :func:`~matplotlib.pyplot.acorr` below.
319
320    **Example:**
321
322    .. plot:: mpl_examples/pylab_examples/xcorr_demo.py
323    """
324
325
326    Nx = len(x)
327    if Nx!=len(y):
328        raise ValueError('x and y must be equal length')
329
330    x = detrend(np.asarray(x))
331    y = detrend(np.asarray(y))
332
333    c = np.correlate(x, y, mode=2)
334
335    if normed:
336        c /= np.sqrt(np.dot(x, x) * np.dot(y, y))
337
338    if maxlags is None:
339        maxlags = Nx - 1
340
341    if maxlags >= Nx or maxlags < 1:
342        raise ValueError('maxlags must be None or strictly '
343                         'positive < %d' % Nx)
344
345    lags = np.arange(-maxlags,maxlags+1)
346    c = c[Nx-1-maxlags:Nx+maxlags]
347
348
349    if usevlines:
350        a = self.vlines(lags, [0], c, **kwargs)
351        b = self.axhline(**kwargs)
352        kwargs.setdefault('marker', 'o')
353        kwargs.setdefault('linestyle', 'None')
354        d = self.plot(lags, c, **kwargs)
355    else:
356
357        kwargs.setdefault('marker', 'o')
358        kwargs.setdefault('linestyle', 'None')
359        a, = self.plot(lags, c, **kwargs)
360        b = None
361    return lags, c, a, b
362
363
364
365
366
367
368arrvs = ar_generator()
369##arma = ARIMA()
370##res = arma.fit(arrvs[0], 4, 0)
371arma = ARIMA(arrvs[0])
372res = arma.fit((4,0, 0))
373
374print(res[0])
375
376acf1 = acf(arrvs[0])
377acovf1b = acovf(arrvs[0], unbiased=False)
378acf2 = autocorr(arrvs[0])
379acf2m = autocorr(arrvs[0]-arrvs[0].mean())
380print(acf1[:10])
381print(acovf1b[:10])
382print(acf2[:10])
383print(acf2m[:10])
384
385
386x = arma_generate_sample([1.0, -0.8], [1.0], 500)
387print(acf(x)[:20])
388print(regression.yule_walker(x, 10))
389
390#ax = plt.axes()
391plt.plot(x)
392#plt.show()
393
394plt.figure()
395pltxcorr(plt,x,x)
396plt.figure()
397pltxcorr(plt,x,x, usevlines=False)
398plt.figure()
399#FIXME: plotacf was moved to graphics/tsaplots.py, and interface changed
400plot_acf(plt, acf1[:20], np.arange(len(acf1[:20])), usevlines=True)
401plt.figure()
402ax = plt.subplot(211)
403plot_acf(ax, acf1[:20], usevlines=True)
404ax = plt.subplot(212)
405plot_acf(ax, acf1[:20], np.arange(len(acf1[:20])), usevlines=False)
406
407#plt.show()
408