1""" 2================================================= 3Example of pattern recognition on simulated data 4================================================= 5 6This example simulates data according to a very simple sketch of brain 7imaging data and applies machine learning techniques to predict output 8values. 9 10We use a very simple generating function to simulate data, as in `Michel 11et al. 2012 <http://dx.doi.org/10.1109/TMI.2011.2113378>`_ , a linear 12model with a random design matrix **X**: 13 14.. math:: 15 16 \\mathbf{y} = \\mathbf{X} \\mathbf{w} + \\mathbf{e} 17 18* **w**: the weights of the linear model correspond to the predictive 19 brain regions. Here, in the simulations, they form a 3D image with 5, four 20 of which in opposite corners and one in the middle, as plotted below. 21 22* **X**: the design matrix corresponds to the observed fMRI data. Here 23 we simulate random normal variables and smooth them as in Gaussian 24 fields. 25 26* **e** is random normal noise. 27 28 29""" 30 31# Licence : BSD 32 33print(__doc__) 34 35from time import time 36 37import numpy as np 38import matplotlib.pyplot as plt 39from scipy import linalg, ndimage 40 41from sklearn import linear_model, svm 42from sklearn.utils import check_random_state 43from sklearn.model_selection import KFold 44from sklearn.feature_selection import f_regression 45 46import nibabel 47 48from nilearn import decoding 49import nilearn.masking 50from nilearn.plotting import show 51 52 53############################################################################## 54# A function to generate data 55############################################################################## 56def create_simulation_data(snr=0, n_samples=2 * 100, size=12, random_state=1): 57 generator = check_random_state(random_state) 58 roi_size = 2 # size / 3 59 smooth_X = 1 60 # Coefs 61 w = np.zeros((size, size, size)) 62 w[0:roi_size, 0:roi_size, 0:roi_size] = -0.6 63 w[-roi_size:, -roi_size:, 0:roi_size] = 0.5 64 w[0:roi_size, -roi_size:, -roi_size:] = -0.6 65 w[-roi_size:, 0:roi_size:, -roi_size:] = 0.5 66 w[(size - roi_size) // 2:(size + roi_size) // 2, 67 (size - roi_size) // 2:(size + roi_size) // 2, 68 (size - roi_size) // 2:(size + roi_size) // 2] = 0.5 69 w = w.ravel() 70 # Generate smooth background noise 71 XX = generator.randn(n_samples, size, size, size) 72 noise = [] 73 for i in range(n_samples): 74 Xi = ndimage.filters.gaussian_filter(XX[i, :, :, :], smooth_X) 75 Xi = Xi.ravel() 76 noise.append(Xi) 77 noise = np.array(noise) 78 # Generate the signal y 79 y = generator.randn(n_samples) 80 X = np.dot(y[:, np.newaxis], w[np.newaxis]) 81 norm_noise = linalg.norm(X, 2) / np.exp(snr / 20.) 82 noise_coef = norm_noise / linalg.norm(noise, 2) 83 noise *= noise_coef 84 snr = 20 * np.log(linalg.norm(X, 2) / linalg.norm(noise, 2)) 85 print("SNR: %.1f dB" % snr) 86 # Mixing of signal + noise and splitting into train/test 87 X += noise 88 X -= X.mean(axis=-1)[:, np.newaxis] 89 X /= X.std(axis=-1)[:, np.newaxis] 90 X_test = X[n_samples // 2:, :] 91 X_train = X[:n_samples // 2, :] 92 y_test = y[n_samples // 2:] 93 y = y[:n_samples // 2] 94 95 return X_train, X_test, y, y_test, snr, w, size 96 97 98############################################################################## 99# A simple function to plot slices 100############################################################################## 101def plot_slices(data, title=None): 102 plt.figure(figsize=(5.5, 2.2)) 103 vmax = np.abs(data).max() 104 for i in (0, 6, 11): 105 plt.subplot(1, 3, i // 5 + 1) 106 plt.imshow(data[:, :, i], vmin=-vmax, vmax=vmax, 107 interpolation="nearest", cmap=plt.cm.RdBu_r) 108 plt.xticks(()) 109 plt.yticks(()) 110 plt.subplots_adjust(hspace=0.05, wspace=0.05, left=.03, right=.97, top=.9) 111 if title is not None: 112 plt.suptitle(title, y=.95) 113 114 115############################################################################### 116# Create data 117############################################################################### 118X_train, X_test, y_train, y_test, snr, coefs, size = \ 119 create_simulation_data(snr=-10, n_samples=100, size=12) 120 121# Create masks for SearchLight. process_mask is the voxels where SearchLight 122# computation is performed. It is a subset of the brain mask, just to reduce 123# computation time. 124mask = np.ones((size, size, size), dtype=bool) 125mask_img = nibabel.Nifti1Image(mask.astype(int), np.eye(4)) 126process_mask = np.zeros((size, size, size), dtype=bool) 127process_mask[:, :, 0] = True 128process_mask[:, :, 6] = True 129process_mask[:, :, 11] = True 130process_mask_img = nibabel.Nifti1Image(process_mask.astype(int), np.eye(4)) 131 132coefs = np.reshape(coefs, [size, size, size]) 133plot_slices(coefs, title="Ground truth") 134 135############################################################################### 136# Run different estimators 137############################################################################### 138# 139# We can now run different estimators and look at their prediction score, 140# as well as the feature maps that they recover. Namely, we will use 141# 142# * A support vector regression (`SVM 143# <http://scikit-learn.org/stable/modules/svm.html>`_) 144# 145# * An `elastic-net 146# <http://scikit-learn.org/stable/modules/linear_model.html#elastic-net>`_ 147# 148# * A *Bayesian* ridge estimator, i.e. a ridge estimator that sets its 149# parameter according to a metaprior 150# 151# * A ridge estimator that set its parameter by cross-validation 152# 153# Note that the `RidgeCV` and the `ElasticNetCV` have names ending in `CV` 154# that stands for `cross-validation`: in the list of possible `alpha` 155# values that they are given, they choose the best by cross-validation. 156 157estimators = [ 158 ('bayesian_ridge', linear_model.BayesianRidge(normalize=True)), 159 ('enet_cv', linear_model.ElasticNetCV(alphas=[5, 1, 0.5, 0.1], 160 l1_ratio=0.05)), 161 ('ridge_cv', linear_model.RidgeCV(alphas=[100, 10, 1, 0.1], cv=5)), 162 ('svr', svm.SVR(kernel='linear', C=0.001)), 163 ('searchlight', decoding.SearchLight(mask_img, 164 process_mask_img=process_mask_img, 165 radius=2.7, 166 scoring='r2', 167 estimator=svm.SVR(kernel="linear"), 168 cv=KFold(n_splits=4), 169 verbose=1, 170 n_jobs=1, 171 ) 172 ) 173] 174 175############################################################################### 176# Run the estimators 177# 178# As the estimators expose a fairly consistent API, we can all fit them in 179# a for loop: they all have a `fit` method for fitting the data, a `score` 180# method to retrieve the prediction score, and because they are all linear 181# models, a `coef_` attribute that stores the coefficients **w** estimated 182 183for name, estimator in estimators: 184 t1 = time() 185 if name != "searchlight": 186 estimator.fit(X_train, y_train) 187 else: 188 X = nilearn.masking.unmask(X_train, mask_img) 189 estimator.fit(X, y_train) 190 del X 191 elapsed_time = time() - t1 192 193 if name != 'searchlight': 194 coefs = estimator.coef_ 195 coefs = np.reshape(coefs, [size, size, size]) 196 score = estimator.score(X_test, y_test) 197 title = '%s: prediction score %.3f, training time: %.2fs' % ( 198 estimator.__class__.__name__, score, 199 elapsed_time) 200 201 else: # Searchlight 202 coefs = estimator.scores_ 203 title = '%s: training time: %.2fs' % ( 204 estimator.__class__.__name__, 205 elapsed_time) 206 207 # We use the plot_slices function provided in the example to 208 # plot the results 209 plot_slices(coefs, title=title) 210 211 print(title) 212 213f_values, p_values = f_regression(X_train, y_train) 214p_values = np.reshape(p_values, (size, size, size)) 215p_values = -np.log10(p_values) 216p_values[np.isnan(p_values)] = 0 217p_values[p_values > 10] = 10 218plot_slices(p_values, title="f_regress") 219 220show() 221 222############################################################################### 223# An exercice to go further 224############################################################################### 225# 226# As an exercice, you can use recursive feature elimination (RFE) with 227# the SVM 228# 229# Read the object's documentation to find out how to use RFE. 230# 231# **Performance tip**: increase the `step` parameter, or it will be very 232# slow. 233 234from sklearn.feature_selection import RFE 235