1# -*- coding: utf-8 -*-
2"""Playing with correlation of DJ-30 stock returns
3
4this uses pickled data that needs to be created with findow.py
5to see graphs, uncomment plt.show()
6
7
8Created on Sat Jan 30 16:30:18 2010
9Author: josef-pktd
10"""
11import pickle
12
13import numpy as np
14import matplotlib.pyplot as plt
15import matplotlib as mpl
16
17import statsmodels.sandbox.tools as sbtools
18
19from statsmodels.graphics.correlation import plot_corr, plot_corr_grid
20
21try:
22    with open('dj30rr', 'rb') as fd:
23        rrdm = pickle.load(fd)
24except Exception: #blanket for any unpickling error
25    print("Error with unpickling, a new pickle file can be created with findow_1")
26    raise
27
28ticksym = rrdm.columns.tolist()
29rr = rrdm.values[1:400]
30
31rrcorr = np.corrcoef(rr, rowvar=0)
32
33
34plot_corr(rrcorr, xnames=ticksym)
35nvars = rrcorr.shape[0]
36plt.figure()
37plt.hist(rrcorr[np.triu_indices(nvars,1)])
38plt.title('Correlation Coefficients')
39
40xreda, facta, evaa, evea  = sbtools.pcasvd(rr)
41evallcs = (evaa).cumsum()
42print(evallcs/evallcs[-1])
43xred, fact, eva, eve  = sbtools.pcasvd(rr, keepdim=4)
44pcacorr = np.corrcoef(xred, rowvar=0)
45
46plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA')
47
48resid = rr-xred
49residcorr = np.corrcoef(resid, rowvar=0)
50plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals')
51
52plt.matshow(residcorr)
53plt.imshow(residcorr, cmap=plt.cm.jet, interpolation='nearest',
54           extent=(0,30,0,30), vmin=-1.0, vmax=1.0)
55plt.colorbar()
56
57normcolor = (0,1) #False #True
58fig = plt.figure()
59ax = fig.add_subplot(2,2,1)
60plot_corr(rrcorr, xnames=ticksym, normcolor=normcolor, ax=ax)
61ax2 = fig.add_subplot(2,2,3)
62#pcacorr = np.corrcoef(xred, rowvar=0)
63plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA',
64          normcolor=normcolor, ax=ax2)
65ax3 = fig.add_subplot(2,2,4)
66plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals',
67          normcolor=normcolor, ax=ax3)
68
69images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
70print(images)
71print(ax.get_children())
72#cax = fig.add_subplot(2,2,2)
73#[0.85, 0.1, 0.075, 0.8]
74fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
75cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
76fig.colorbar(images[0], cax=cax)
77fig.savefig('corrmatrixgrid.png', dpi=120)
78
79has_sklearn = True
80try:
81    import sklearn  # noqa:F401
82except ImportError:
83    has_sklearn = False
84    print('sklearn not available')
85
86
87def cov2corr(cov):
88    std_ = np.sqrt(np.diag(cov))
89    corr = cov / np.outer(std_, std_)
90    return corr
91
92if has_sklearn:
93    from sklearn.covariance import LedoitWolf, OAS, MCD
94
95    lw = LedoitWolf(store_precision=False)
96    lw.fit(rr, assume_centered=False)
97    cov_lw = lw.covariance_
98    corr_lw = cov2corr(cov_lw)
99
100    oas = OAS(store_precision=False)
101    oas.fit(rr, assume_centered=False)
102    cov_oas = oas.covariance_
103    corr_oas = cov2corr(cov_oas)
104
105    mcd = MCD()#.fit(rr, reweight=None)
106    mcd.fit(rr, assume_centered=False)
107    cov_mcd = mcd.covariance_
108    corr_mcd = cov2corr(cov_mcd)
109
110    titles = ['raw correlation', 'lw', 'oas', 'mcd']
111    normcolor = None
112    fig = plt.figure()
113    for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]):
114    #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]):
115        ax = fig.add_subplot(2,2,i+1)
116        plot_corr(c, xnames=None, title=titles[i],
117              normcolor=normcolor, ax=ax)
118
119    images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)]
120    fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9)
121    cax = fig.add_axes([0.9, 0.1, 0.025, 0.8])
122    fig.colorbar(images[0], cax=cax)
123
124    corrli = [rrcorr, corr_lw, corr_oas, corr_mcd, pcacorr]
125    diffssq = np.array([[((ci-cj)**2).sum() for ci in corrli]
126                            for cj in corrli])
127    diffsabs = np.array([[np.max(np.abs(ci-cj)) for ci in corrli]
128                            for cj in corrli])
129    print(diffssq)
130    print('\nmaxabs')
131    print(diffsabs)
132    fig.savefig('corrmatrix_sklearn.png', dpi=120)
133
134    fig2 = plot_corr_grid(corrli+[residcorr], ncols=3,
135                          titles=titles+['pca', 'pca-residual'],
136                          xnames=[], ynames=[])
137    fig2.savefig('corrmatrix_sklearn_2.png', dpi=120)
138
139#plt.show()
140#plt.close('all')
141