1# -*- coding: utf-8 -*- 2"""Playing with correlation of DJ-30 stock returns 3 4this uses pickled data that needs to be created with findow.py 5to see graphs, uncomment plt.show() 6 7 8Created on Sat Jan 30 16:30:18 2010 9Author: josef-pktd 10""" 11import pickle 12 13import numpy as np 14import matplotlib.pyplot as plt 15import matplotlib as mpl 16 17import statsmodels.sandbox.tools as sbtools 18 19from statsmodels.graphics.correlation import plot_corr, plot_corr_grid 20 21try: 22 with open('dj30rr', 'rb') as fd: 23 rrdm = pickle.load(fd) 24except Exception: #blanket for any unpickling error 25 print("Error with unpickling, a new pickle file can be created with findow_1") 26 raise 27 28ticksym = rrdm.columns.tolist() 29rr = rrdm.values[1:400] 30 31rrcorr = np.corrcoef(rr, rowvar=0) 32 33 34plot_corr(rrcorr, xnames=ticksym) 35nvars = rrcorr.shape[0] 36plt.figure() 37plt.hist(rrcorr[np.triu_indices(nvars,1)]) 38plt.title('Correlation Coefficients') 39 40xreda, facta, evaa, evea = sbtools.pcasvd(rr) 41evallcs = (evaa).cumsum() 42print(evallcs/evallcs[-1]) 43xred, fact, eva, eve = sbtools.pcasvd(rr, keepdim=4) 44pcacorr = np.corrcoef(xred, rowvar=0) 45 46plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA') 47 48resid = rr-xred 49residcorr = np.corrcoef(resid, rowvar=0) 50plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals') 51 52plt.matshow(residcorr) 53plt.imshow(residcorr, cmap=plt.cm.jet, interpolation='nearest', 54 extent=(0,30,0,30), vmin=-1.0, vmax=1.0) 55plt.colorbar() 56 57normcolor = (0,1) #False #True 58fig = plt.figure() 59ax = fig.add_subplot(2,2,1) 60plot_corr(rrcorr, xnames=ticksym, normcolor=normcolor, ax=ax) 61ax2 = fig.add_subplot(2,2,3) 62#pcacorr = np.corrcoef(xred, rowvar=0) 63plot_corr(pcacorr, xnames=ticksym, title='Correlation PCA', 64 normcolor=normcolor, ax=ax2) 65ax3 = fig.add_subplot(2,2,4) 66plot_corr(residcorr, xnames=ticksym, title='Correlation Residuals', 67 normcolor=normcolor, ax=ax3) 68 69images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)] 70print(images) 71print(ax.get_children()) 72#cax = fig.add_subplot(2,2,2) 73#[0.85, 0.1, 0.075, 0.8] 74fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9) 75cax = fig.add_axes([0.9, 0.1, 0.025, 0.8]) 76fig.colorbar(images[0], cax=cax) 77fig.savefig('corrmatrixgrid.png', dpi=120) 78 79has_sklearn = True 80try: 81 import sklearn # noqa:F401 82except ImportError: 83 has_sklearn = False 84 print('sklearn not available') 85 86 87def cov2corr(cov): 88 std_ = np.sqrt(np.diag(cov)) 89 corr = cov / np.outer(std_, std_) 90 return corr 91 92if has_sklearn: 93 from sklearn.covariance import LedoitWolf, OAS, MCD 94 95 lw = LedoitWolf(store_precision=False) 96 lw.fit(rr, assume_centered=False) 97 cov_lw = lw.covariance_ 98 corr_lw = cov2corr(cov_lw) 99 100 oas = OAS(store_precision=False) 101 oas.fit(rr, assume_centered=False) 102 cov_oas = oas.covariance_ 103 corr_oas = cov2corr(cov_oas) 104 105 mcd = MCD()#.fit(rr, reweight=None) 106 mcd.fit(rr, assume_centered=False) 107 cov_mcd = mcd.covariance_ 108 corr_mcd = cov2corr(cov_mcd) 109 110 titles = ['raw correlation', 'lw', 'oas', 'mcd'] 111 normcolor = None 112 fig = plt.figure() 113 for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]): 114 #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]): 115 ax = fig.add_subplot(2,2,i+1) 116 plot_corr(c, xnames=None, title=titles[i], 117 normcolor=normcolor, ax=ax) 118 119 images = [c for fig_ax in fig.axes for c in fig_ax.get_children() if isinstance(c, mpl.image.AxesImage)] 120 fig. subplots_adjust(bottom=0.1, right=0.9, top=0.9) 121 cax = fig.add_axes([0.9, 0.1, 0.025, 0.8]) 122 fig.colorbar(images[0], cax=cax) 123 124 corrli = [rrcorr, corr_lw, corr_oas, corr_mcd, pcacorr] 125 diffssq = np.array([[((ci-cj)**2).sum() for ci in corrli] 126 for cj in corrli]) 127 diffsabs = np.array([[np.max(np.abs(ci-cj)) for ci in corrli] 128 for cj in corrli]) 129 print(diffssq) 130 print('\nmaxabs') 131 print(diffsabs) 132 fig.savefig('corrmatrix_sklearn.png', dpi=120) 133 134 fig2 = plot_corr_grid(corrli+[residcorr], ncols=3, 135 titles=titles+['pca', 'pca-residual'], 136 xnames=[], ynames=[]) 137 fig2.savefig('corrmatrix_sklearn_2.png', dpi=120) 138 139#plt.show() 140#plt.close('all') 141