1'''getting started with diffusions, continuous time stochastic processes
2
3Author: josef-pktd
4License: BSD
5
6
7References
8----------
9
10An Algorithmic Introduction to Numerical Simulation of Stochastic Differential
11Equations
12Author(s): Desmond J. Higham
13Source: SIAM Review, Vol. 43, No. 3 (Sep., 2001), pp. 525-546
14Published by: Society for Industrial and Applied Mathematics
15Stable URL: http://www.jstor.org/stable/3649798
16
17http://www.sitmo.com/  especially the formula collection
18
19
20Notes
21-----
22
23OU process: use same trick for ARMA with constant (non-zero mean) and drift
24some of the processes have easy multivariate extensions
25
26*Open Issues*
27
28include xzero in returned sample or not? currently not
29
30*TODOS*
31
32* Milstein from Higham paper, for which processes does it apply
33* Maximum Likelihood estimation
34* more statistical properties (useful for tests)
35* helper functions for display and MonteCarlo summaries (also for testing/checking)
36* more processes for the menagerie (e.g. from empirical papers)
37* characteristic functions
38* transformations, non-linear e.g. log
39* special estimators, e.g. Ait Sahalia, empirical characteristic functions
40* fft examples
41* check naming of methods, "simulate", "sample", "simexact", ... ?
42
43
44
45stochastic volatility models: estimation unclear
46
47finance applications ? option pricing, interest rate models
48
49
50'''
51import numpy as np
52from scipy import stats, signal
53import matplotlib.pyplot as plt
54
55#np.random.seed(987656789)
56
57class Diffusion(object):
58    '''Wiener Process, Brownian Motion with mu=0 and sigma=1
59    '''
60    def __init__(self):
61        pass
62
63    def simulateW(self, nobs=100, T=1, dt=None, nrepl=1):
64        '''generate sample of Wiener Process
65        '''
66        dt = T*1.0/nobs
67        t = np.linspace(dt, 1, nobs)
68        dW = np.sqrt(dt)*np.random.normal(size=(nrepl, nobs))
69        W = np.cumsum(dW,1)
70        self.dW = dW
71        return W, t
72
73    def expectedsim(self, func, nobs=100, T=1, dt=None, nrepl=1):
74        '''get expectation of a function of a Wiener Process by simulation
75
76        initially test example from
77        '''
78        W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
79        U = func(t, W)
80        Umean = U.mean(0)
81        return U, Umean, t
82
83class AffineDiffusion(Diffusion):
84    r'''
85
86    differential equation:
87
88    :math::
89    dx_t = f(t,x)dt + \sigma(t,x)dW_t
90
91    integral:
92
93    :math::
94    x_T = x_0 + \int_{0}^{T}f(t,S)dt + \int_0^T  \sigma(t,S)dW_t
95
96    TODO: check definition, affine, what about jump diffusion?
97
98    '''
99
100    def __init__(self):
101        pass
102
103    def sim(self, nobs=100, T=1, dt=None, nrepl=1):
104        # this does not look correct if drift or sig depend on x
105        # see arithmetic BM
106        W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
107        dx =  self._drift() + self._sig() * W
108        x  = np.cumsum(dx,1)
109        xmean = x.mean(0)
110        return x, xmean, t
111
112    def simEM(self, xzero=None, nobs=100, T=1, dt=None, nrepl=1, Tratio=4):
113        '''
114
115        from Higham 2001
116
117        TODO: reverse parameterization to start with final nobs and DT
118        TODO: check if I can skip the loop using my way from exactprocess
119              problem might be Winc (reshape into 3d and sum)
120        TODO: (later) check memory efficiency for large simulations
121        '''
122        #TODO: reverse parameterization to start with final nobs and DT
123        nobs = nobs * Tratio  # simple way to change parameter
124        # maybe wrong parameterization,
125        # drift too large, variance too small ? which dt/Dt
126        # _drift, _sig independent of dt is wrong
127        if xzero is None:
128            xzero = self.xzero
129        if dt is None:
130            dt = T*1.0/nobs
131        W, t = self.simulateW(nobs=nobs, T=T, dt=dt, nrepl=nrepl)
132        dW = self.dW
133        t = np.linspace(dt, 1, nobs)
134        Dt = Tratio*dt
135        L = nobs/Tratio        # L EM steps of size Dt = R*dt
136        Xem = np.zeros((nrepl,L))    # preallocate for efficiency
137        Xtemp = xzero
138        Xem[:,0] = xzero
139        for j in np.arange(1,L):
140            #Winc = np.sum(dW[:,Tratio*(j-1)+1:Tratio*j],1)
141            Winc = np.sum(dW[:,np.arange(Tratio*(j-1)+1,Tratio*j)],1)
142            #Xtemp = Xtemp + Dt*lamda*Xtemp + mu*Xtemp*Winc;
143            Xtemp = Xtemp + self._drift(x=Xtemp) + self._sig(x=Xtemp) * Winc
144            #Dt*lamda*Xtemp + mu*Xtemp*Winc;
145            Xem[:,j] = Xtemp
146        return Xem
147
148'''
149    R = 4; Dt = R*dt; L = N/R;        % L EM steps of size Dt = R*dt
150    Xem = zeros(1,L);                 % preallocate for efficiency
151    Xtemp = Xzero;
152    for j = 1:L
153       Winc = sum(dW(R*(j-1)+1:R*j));
154       Xtemp = Xtemp + Dt*lambda*Xtemp + mu*Xtemp*Winc;
155       Xem(j) = Xtemp;
156    end
157'''
158
159class ExactDiffusion(AffineDiffusion):
160    '''Diffusion that has an exact integral representation
161
162    this is currently mainly for geometric, log processes
163
164    '''
165
166    def __init__(self):
167        pass
168
169    def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
170        '''ddt : discrete delta t
171
172
173
174        should be the same as an AR(1)
175        not tested yet
176        '''
177        t = np.linspace(ddt, nobs*ddt, nobs)
178        #expnt = np.exp(-self.lambd * t)
179        expddt = np.exp(-self.lambd * ddt)
180        normrvs = np.random.normal(size=(nrepl,nobs))
181        #do I need lfilter here AR(1) ? if mean reverting lag-coeff<1
182        #lfilter does not handle 2d arrays, it does?
183        inc = self._exactconst(expddt) + self._exactstd(expddt) * normrvs
184        return signal.lfilter([1.], [1.,-expddt], inc)
185
186    def exactdist(self, xzero, t):
187        expnt = np.exp(-self.lambd * t)
188        meant = xzero * expnt + self._exactconst(expnt)
189        stdt = self._exactstd(expnt)
190        return stats.norm(loc=meant, scale=stdt)
191
192class ArithmeticBrownian(AffineDiffusion):
193    '''
194    :math::
195    dx_t &= \\mu dt + \\sigma dW_t
196    '''
197
198    def __init__(self, xzero, mu, sigma):
199        self.xzero = xzero
200        self.mu = mu
201        self.sigma = sigma
202
203    def _drift(self, *args, **kwds):
204        return self.mu
205    def _sig(self, *args, **kwds):
206        return self.sigma
207    def exactprocess(self, nobs, xzero=None, ddt=1., nrepl=2):
208        '''ddt : discrete delta t
209
210        not tested yet
211        '''
212        if xzero is None:
213            xzero = self.xzero
214        t = np.linspace(ddt, nobs*ddt, nobs)
215        normrvs = np.random.normal(size=(nrepl,nobs))
216        inc = self._drift + self._sigma * np.sqrt(ddt) * normrvs
217        #return signal.lfilter([1.], [1.,-1], inc)
218        return xzero + np.cumsum(inc,1)
219
220    def exactdist(self, xzero, t):
221        expnt = np.exp(-self.lambd * t)
222        meant = self._drift * t
223        stdt = self._sigma * np.sqrt(t)
224        return stats.norm(loc=meant, scale=stdt)
225
226
227class GeometricBrownian(AffineDiffusion):
228    '''Geometric Brownian Motion
229
230    :math::
231    dx_t &= \\mu x_t dt + \\sigma x_t dW_t
232
233    $x_t $ stochastic process of Geometric Brownian motion,
234    $\\mu $ is the drift,
235    $\\sigma $ is the Volatility,
236    $W$ is the Wiener process (Brownian motion).
237
238    '''
239    def __init__(self, xzero, mu, sigma):
240        self.xzero = xzero
241        self.mu = mu
242        self.sigma = sigma
243
244    def _drift(self, *args, **kwds):
245        x = kwds['x']
246        return self.mu * x
247    def _sig(self, *args, **kwds):
248        x = kwds['x']
249        return self.sigma * x
250
251
252class OUprocess(AffineDiffusion):
253    '''Ornstein-Uhlenbeck
254
255    :math::
256      dx_t&=\\lambda(\\mu - x_t)dt+\\sigma dW_t
257
258    mean reverting process
259
260
261
262    TODO: move exact higher up in class hierarchy
263    '''
264    def __init__(self, xzero, mu, lambd, sigma):
265        self.xzero = xzero
266        self.lambd = lambd
267        self.mu = mu
268        self.sigma = sigma
269
270    def _drift(self, *args, **kwds):
271        x = kwds['x']
272        return self.lambd * (self.mu - x)
273    def _sig(self, *args, **kwds):
274        x = kwds['x']
275        return self.sigma * x
276    def exact(self, xzero, t, normrvs):
277        #TODO: aggregate over time for process with observations for all t
278        #      i.e. exact conditional distribution for discrete time increment
279        #      -> exactprocess
280        #TODO: for single t, return stats.norm -> exactdist
281        expnt = np.exp(-self.lambd * t)
282        return (xzero * expnt + self.mu * (1-expnt) +
283                self.sigma * np.sqrt((1-expnt*expnt)/2./self.lambd) * normrvs)
284
285    def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
286        '''ddt : discrete delta t
287
288        should be the same as an AR(1)
289        not tested yet
290        # after writing this I saw the same use of lfilter in sitmo
291        '''
292        t = np.linspace(ddt, nobs*ddt, nobs)
293        expnt = np.exp(-self.lambd * t)
294        expddt = np.exp(-self.lambd * ddt)
295        normrvs = np.random.normal(size=(nrepl,nobs))
296        #do I need lfilter here AR(1) ? lfilter does not handle 2d arrays, it does?
297        from scipy import signal
298        #xzero * expnt
299        inc = ( self.mu * (1-expddt) +
300                self.sigma * np.sqrt((1-expddt*expddt)/2./self.lambd) * normrvs )
301
302        return signal.lfilter([1.], [1.,-expddt], inc)
303
304
305    def exactdist(self, xzero, t):
306        #TODO: aggregate over time for process with observations for all t
307        #TODO: for single t, return stats.norm
308        expnt = np.exp(-self.lambd * t)
309        meant = xzero * expnt + self.mu * (1-expnt)
310        stdt = self.sigma * np.sqrt((1-expnt*expnt)/2./self.lambd)
311        from scipy import stats
312        return stats.norm(loc=meant, scale=stdt)
313
314    def fitls(self, data, dt):
315        '''assumes data is 1d, univariate time series
316        formula from sitmo
317        '''
318        # brute force, no parameter estimation errors
319        nobs = len(data)-1
320        exog = np.column_stack((np.ones(nobs), data[:-1]))
321        parest, res, rank, sing = np.linalg.lstsq(exog, data[1:], rcond=-1)
322        const, slope = parest
323        errvar = res/(nobs-2.)
324        lambd = -np.log(slope)/dt
325        sigma = np.sqrt(-errvar * 2.*np.log(slope)/ (1-slope**2)/dt)
326        mu = const / (1-slope)
327        return mu, lambd, sigma
328
329
330class SchwartzOne(ExactDiffusion):
331    '''the Schwartz type 1 stochastic process
332
333    :math::
334    dx_t = \\kappa (\\mu - \\ln x_t) x_t dt + \\sigma x_tdW \\
335
336    The Schwartz type 1 process is a log of the Ornstein-Uhlenbeck stochastic
337    process.
338
339    '''
340
341    def __init__(self, xzero, mu, kappa, sigma):
342        self.xzero = xzero
343        self.mu = mu
344        self.kappa = kappa
345        self.lambd = kappa #alias until I fix exact
346        self.sigma = sigma
347
348    def _exactconst(self, expnt):
349        return (1-expnt) * (self.mu - self.sigma**2 / 2. /self.kappa)
350
351    def _exactstd(self, expnt):
352        return self.sigma * np.sqrt((1-expnt*expnt)/2./self.kappa)
353
354    def exactprocess(self, xzero, nobs, ddt=1., nrepl=2):
355        '''uses exact solution for log of process
356        '''
357        lnxzero = np.log(xzero)
358        lnx = super(self.__class__, self).exactprocess(xzero, nobs, ddt=ddt, nrepl=nrepl)
359        return np.exp(lnx)
360
361    def exactdist(self, xzero, t):
362        expnt = np.exp(-self.lambd * t)
363        #TODO: check this is still wrong, just guessing
364        meant = np.log(xzero) * expnt + self._exactconst(expnt)
365        stdt = self._exactstd(expnt)
366        return stats.lognorm(loc=meant, scale=stdt)
367
368    def fitls(self, data, dt):
369        '''assumes data is 1d, univariate time series
370        formula from sitmo
371        '''
372        # brute force, no parameter estimation errors
373        nobs = len(data)-1
374        exog = np.column_stack((np.ones(nobs),np.log(data[:-1])))
375        parest, res, rank, sing = np.linalg.lstsq(exog, np.log(data[1:]), rcond=-1)
376        const, slope = parest
377        errvar = res/(nobs-2.)  #check denominator estimate, of sigma too low
378        kappa = -np.log(slope)/dt
379        sigma = np.sqrt(errvar * kappa / (1-np.exp(-2*kappa*dt)))
380        mu = const / (1-np.exp(-kappa*dt)) + sigma**2/2./kappa
381        if np.shape(mu)== (1,):
382            mu = mu[0]   # TODO: how to remove scalar array ?
383        if np.shape(sigma)== (1,):
384            sigma = sigma[0]
385        #mu, kappa are good, sigma too small
386        return mu, kappa, sigma
387
388
389
390class BrownianBridge(object):
391    def __init__(self):
392        pass
393
394    def simulate(self, x0, x1, nobs, nrepl=1, ddt=1., sigma=1.):
395        nobs=nobs+1
396        dt = ddt*1./nobs
397        t = np.linspace(dt, ddt-dt, nobs)
398        t = np.linspace(dt, ddt, nobs)
399        wm = [t/ddt, 1-t/ddt]
400        #wmi = wm[1]
401        #wm1 = x1*wm[0]
402        wmi = 1-dt/(ddt-t)
403        wm1 = x1*(dt/(ddt-t))
404        su = sigma* np.sqrt(t*(1-t)/ddt)
405        s = sigma* np.sqrt(dt*(ddt-t-dt)/(ddt-t))
406        x = np.zeros((nrepl, nobs))
407        x[:,0] = x0
408        rvs = s*np.random.normal(size=(nrepl,nobs))
409        for i in range(1,nobs):
410            x[:,i] = x[:,i-1]*wmi[i] + wm1[i] + rvs[:,i]
411        return x, t, su
412
413
414class CompoundPoisson(object):
415    '''nobs iid compound poisson distributions, not a process in time
416    '''
417    def __init__(self, lambd, randfn=np.random.normal):
418        if len(lambd) != len(randfn):
419            raise ValueError('lambd and randfn need to have the same number of elements')
420
421        self.nobj = len(lambd)
422        self.randfn = randfn
423        self.lambd = np.asarray(lambd)
424
425    def simulate(self, nobs, nrepl=1):
426        nobj = self.nobj
427        x = np.zeros((nrepl, nobs, nobj))
428        N = np.random.poisson(self.lambd[None,None,:], size=(nrepl,nobs,nobj))
429        for io in range(nobj):
430            randfnc = self.randfn[io]
431
432            nc = N[:,:,io]
433            #print nrepl,nobs,nc
434            #xio = randfnc(size=(nrepl,nobs,np.max(nc))).cumsum(-1)[np.arange(nrepl)[:,None],np.arange(nobs),nc-1]
435            rvs = randfnc(size=(nrepl,nobs,np.max(nc)))
436            print('rvs.sum()', rvs.sum(), rvs.shape)
437            xio = rvs.cumsum(-1)[np.arange(nrepl)[:,None],np.arange(nobs),nc-1]
438            #print xio.shape
439            x[:,:,io] = xio
440        x[N==0] = 0
441        return x, N
442
443
444
445
446
447
448
449
450
451'''
452randn('state',100)                                % set the state of randn
453T = 1; N = 500; dt = T/N; t = [dt:dt:1];
454
455M = 1000;                                         % M paths simultaneously
456dW = sqrt(dt)*randn(M,N);                         % increments
457W = cumsum(dW,2);                                 % cumulative sum
458U = exp(repmat(t,[M 1]) + 0.5*W);
459Umean = mean(U);
460plot([0,t],[1,Umean],'b-'), hold on               % plot mean over M paths
461plot([0,t],[ones(5,1),U(1:5,:)],'r--'), hold off  % plot 5 individual paths
462xlabel('t','FontSize',16)
463ylabel('U(t)','FontSize',16,'Rotation',0,'HorizontalAlignment','right')
464legend('mean of 1000 paths','5 individual paths',2)
465
466averr = norm((Umean - exp(9*t/8)),'inf')          % sample error
467'''
468
469if __name__ == '__main__':
470    doplot = 1
471    nrepl = 1000
472    examples = []#['all']
473
474    if 'all' in examples:
475        w = Diffusion()
476
477        # Wiener Process
478        # ^^^^^^^^^^^^^^
479
480        ws = w.simulateW(1000, nrepl=nrepl)
481        if doplot:
482            plt.figure()
483            tmp = plt.plot(ws[0].T)
484            tmp = plt.plot(ws[0].mean(0), linewidth=2)
485            plt.title('Standard Brownian Motion (Wiener Process)')
486
487        func = lambda t, W: np.exp(t + 0.5*W)
488        us = w.expectedsim(func, nobs=500, nrepl=nrepl)
489        if doplot:
490            plt.figure()
491            tmp = plt.plot(us[0].T)
492            tmp = plt.plot(us[1], linewidth=2)
493            plt.title('Brownian Motion - exp')
494        #plt.show()
495        averr = np.linalg.norm(us[1] - np.exp(9*us[2]/8.), np.inf)
496        print(averr)
497        #print us[1][:10]
498        #print np.exp(9.*us[2][:10]/8.)
499
500        # Geometric Brownian
501        # ^^^^^^^^^^^^^^^^^^
502
503        gb = GeometricBrownian(xzero=1., mu=0.01, sigma=0.5)
504        gbs = gb.simEM(nobs=100, nrepl=100)
505        if doplot:
506            plt.figure()
507            tmp = plt.plot(gbs.T)
508            tmp = plt.plot(gbs.mean(0), linewidth=2)
509            plt.title('Geometric Brownian')
510            plt.figure()
511            tmp = plt.plot(np.log(gbs).T)
512            tmp = plt.plot(np.log(gbs.mean(0)), linewidth=2)
513            plt.title('Geometric Brownian - log-transformed')
514
515        ab = ArithmeticBrownian(xzero=1, mu=0.05, sigma=1)
516        abs = ab.simEM(nobs=100, nrepl=100)
517        if doplot:
518            plt.figure()
519            tmp = plt.plot(abs.T)
520            tmp = plt.plot(abs.mean(0), linewidth=2)
521            plt.title('Arithmetic Brownian')
522
523        # Ornstein-Uhlenbeck
524        # ^^^^^^^^^^^^^^^^^^
525
526        ou = OUprocess(xzero=2, mu=1, lambd=0.5, sigma=0.1)
527        ous = ou.simEM()
528        oue = ou.exact(1, 1, np.random.normal(size=(5,10)))
529        ou.exact(0, np.linspace(0,10,10/0.1), 0)
530        ou.exactprocess(0,10)
531        print(ou.exactprocess(0,10, ddt=0.1,nrepl=10).mean(0))
532        #the following looks good, approaches mu
533        oues = ou.exactprocess(0,100, ddt=0.1,nrepl=100)
534        if doplot:
535            plt.figure()
536            tmp = plt.plot(oues.T)
537            tmp = plt.plot(oues.mean(0), linewidth=2)
538            plt.title('Ornstein-Uhlenbeck')
539
540        # SchwartsOne
541        # ^^^^^^^^^^^
542
543        so = SchwartzOne(xzero=0, mu=1, kappa=0.5, sigma=0.1)
544        sos = so.exactprocess(0,50, ddt=0.1,nrepl=100)
545        print(sos.mean(0))
546        print(np.log(sos.mean(0)))
547        doplot = 1
548        if doplot:
549            plt.figure()
550            tmp = plt.plot(sos.T)
551            tmp = plt.plot(sos.mean(0), linewidth=2)
552            plt.title('Schwartz One')
553        print(so.fitls(sos[0,:],dt=0.1))
554        sos2 = so.exactprocess(0,500, ddt=0.1,nrepl=5)
555        print('true: mu=1, kappa=0.5, sigma=0.1')
556        for i in range(5):
557            print(so.fitls(sos2[i],dt=0.1))
558
559
560
561        # Brownian Bridge
562        # ^^^^^^^^^^^^^^^
563
564        bb = BrownianBridge()
565        #bbs = bb.sample(x0, x1, nobs, nrepl=1, ddt=1., sigma=1.)
566        bbs, t, wm = bb.simulate(0, 0.5, 99, nrepl=500, ddt=1., sigma=0.1)
567        if doplot:
568            plt.figure()
569            tmp = plt.plot(bbs.T)
570            tmp = plt.plot(bbs.mean(0), linewidth=2)
571            plt.title('Brownian Bridge')
572            plt.figure()
573            plt.plot(wm,'r', label='theoretical')
574            plt.plot(bbs.std(0), label='simulated')
575            plt.title('Brownian Bridge - Variance')
576            plt.legend()
577
578    # Compound Poisson
579    # ^^^^^^^^^^^^^^^^
580    cp = CompoundPoisson([1,1], [np.random.normal,np.random.normal])
581    cps = cp.simulate(nobs=20000,nrepl=3)
582    print(cps[0].sum(-1).sum(-1))
583    print(cps[0].sum())
584    print(cps[0].mean(-1).mean(-1))
585    print(cps[0].mean())
586    print(cps[1].size)
587    print(cps[1].sum())
588    #Note Y = sum^{N} X is compound poisson of iid x, then
589    #E(Y) = E(N)*E(X)   eg. eq. (6.37) page 385 in http://ee.stanford.edu/~gray/sp.html
590
591
592    #plt.show()
593