1""" 2This module implements fMRI Design Matrix creation. 3 4Design matrices are represented by Pandas DataFrames 5Computations of the different parts of the design matrix are confined 6to the make_first_level_design_matrix function, that create a DataFrame 7All the others are ancillary functions. 8 9Design matrices contain three different types of regressors: 10 111. Task-related regressors, that result from the convolution 12 of the experimental paradigm regressors with hemodynamic models 13 A hemodynamic model is one of: 14 15 - 'spm' : linear filter used in the SPM software 16 - 'glover' : linear filter estimated by G.Glover 17 - 'spm + derivative', 'glover + derivative': the same linear models, 18 plus their time derivative (2 regressors per condition) 19 - 'spm + derivative + dispersion', 'glover + derivative + dispersion': 20 idem plus the derivative wrt the dispersion parameter of the hrf 21 (3 regressors per condition) 22 - 'fir' : finite impulse response model, generic linear filter 23 242. User-specified regressors, that represent information available on 25 the data, e.g. motion parameters, physiological data resampled at 26 the acquisition rate, or sinusoidal regressors that model the 27 signal at a frequency of interest. 28 293. Drift regressors, that represent low_frequency phenomena of no 30 interest in the data; they need to be included to reduce variance 31 estimates. 32 33Author: Bertrand Thirion, 2009-2015 34 35""" 36import sys 37from warnings import warn 38 39import numpy as np 40import pandas as pd 41 42from nilearn._utils.glm import full_rank 43from nilearn.glm.first_level.experimental_paradigm import check_events 44from nilearn.glm.first_level.hemodynamic_models import (_orthogonalize, 45 compute_regressor) 46 47###################################################################### 48# Ancillary functions 49###################################################################### 50 51 52def _poly_drift(order, frame_times): 53 """Create a polynomial drift matrix 54 55 Parameters 56 ---------- 57 order : int, 58 Number of polynomials in the drift model. 59 60 frame_times : array of shape(n_scans), 61 Time stamps used to sample polynomials. 62 63 Returns 64 ------- 65 pol : ndarray, shape(n_scans, order + 1) 66 Estimated polynomial drifts plus a constant regressor. 67 68 """ 69 order = int(order) 70 pol = np.zeros((np.size(frame_times), order + 1)) 71 tmax = float(frame_times.max()) 72 for k in range(order + 1): 73 pol[:, k] = (frame_times / tmax) ** k 74 pol = _orthogonalize(pol) 75 pol = np.hstack((pol[:, 1:], pol[:, :1])) 76 return pol 77 78 79def _cosine_drift(high_pass, frame_times): 80 """Create a cosine drift matrix with frequencies or equal to 81 high_pass. 82 83 Parameters 84 ---------- 85 high_pass : float 86 Cut frequency of the high-pass filter in Hz 87 88 frame_times : array of shape (n_scans,) 89 The sampling times in seconds 90 91 Returns 92 ------- 93 cosine_drift : array of shape(n_scans, n_drifts) 94 Cosine drifts plus a constant regressor at cosine_drift[:, -1] 95 96 References 97 ---------- 98 http://en.wikipedia.org/wiki/Discrete_cosine_transform DCT-II 99 100 """ 101 n_frames = len(frame_times) 102 n_times = np.arange(n_frames) 103 dt = (frame_times[-1] - frame_times[0]) / (n_frames - 1) 104 if high_pass * dt >= .5: 105 warn('High-pass filter will span all accessible frequencies ' 106 'and saturate the design matrix. ' 107 'You may want to reduce the high_pass value.' 108 'The provided value is {0} Hz'.format(high_pass)) 109 order = np.minimum(n_frames - 1, 110 int(np.floor(2 * n_frames * high_pass * dt))) 111 cosine_drift = np.zeros((n_frames, order + 1)) 112 normalizer = np.sqrt(2.0 / n_frames) 113 114 for k in range(1, order + 1): 115 cosine_drift[:, k - 1] = normalizer * np.cos( 116 (np.pi / n_frames) * (n_times + .5) * k) 117 118 cosine_drift[:, -1] = 1. 119 return cosine_drift 120 121 122def _none_drift(frame_times): 123 """ Create an intercept vector 124 125 Returns 126 ------- 127 np.ones_like(frame_times) 128 129 """ 130 return np.reshape(np.ones_like(frame_times), (np.size(frame_times), 1)) 131 132 133def _make_drift(drift_model, frame_times, order, high_pass): 134 """Create the drift matrix 135 136 Parameters 137 ---------- 138 drift_model : {'polynomial', 'cosine', None}, 139 string that specifies the desired drift model 140 141 frame_times : array of shape(n_scans), 142 list of values representing the desired TRs 143 144 order : int, optional, 145 order of the drift model (in case it is polynomial) 146 147 high_pass : float, optional, 148 high-pass frequency in case of a cosine model (in Hz) 149 150 Returns 151 ------- 152 drift : array of shape(n_scans, n_drifts), 153 the drift matrix 154 155 names : list of length(n_drifts), 156 the associated names 157 158 """ 159 if isinstance(drift_model, str): 160 drift_model = drift_model.lower() # for robust comparisons 161 if drift_model == 'polynomial': 162 drift = _poly_drift(order, frame_times) 163 elif drift_model == 'cosine': 164 drift = _cosine_drift(high_pass, frame_times) 165 elif drift_model is None: 166 drift = _none_drift(frame_times) 167 else: 168 raise NotImplementedError("Unknown drift model %r" % (drift_model)) 169 names = [] 170 for k in range(1, drift.shape[1]): 171 names.append('drift_%d' % k) 172 names.append('constant') 173 return drift, names 174 175 176def _convolve_regressors(events, hrf_model, frame_times, fir_delays=[0], 177 min_onset=-24, oversampling=50): 178 """ Creation of a matrix that comprises 179 the convolution of the conditions onset with a certain hrf model 180 181 Parameters 182 ---------- 183 events : DataFrame instance, 184 Events data describing the experimental paradigm 185 see nistats.experimental_paradigm to check the specification 186 for these to be valid paradigm descriptors 187 188 hrf_model : {'spm', 'spm + derivative', 'spm + derivative + dispersion', 189 'glover', 'glover + derivative', 'glover + derivative + dispersion', 190 'fir', None} 191 String that specifies the hemodynamic response function 192 193 frame_times : array of shape (n_scans,) 194 The targeted timing for the design matrix. 195 196 fir_delays : array-like of shape (n_onsets,), optional 197 In case of FIR design, yields the array of delays 198 used in the FIR model (in scans). Default=[0]. 199 200 min_onset : float, optional 201 Minimal onset relative to frame_times[0] (in seconds) events 202 that start before frame_times[0] + min_onset are not considered. 203 Default=-24. 204 205 oversampling : int, optional 206 Oversampling factor used in temporal convolutions. Default=50. 207 208 Returns 209 ------- 210 regressor_matrix : array of shape (n_scans, n_regressors), 211 Contains the convolved regressors associated with the 212 experimental conditions. 213 214 regressor_names : list of strings, 215 The regressor names, that depend on the hrf model used 216 if 'glover' or 'spm' then this is identical to the input names 217 if 'glover + derivative' or 'spm + derivative', a second name is output 218 i.e. '#name_derivative' 219 if 'spm + derivative + dispersion' or 220 'glover + derivative + dispersion', 221 a third name is used, i.e. '#name_dispersion' 222 if 'fir', the regressos are numbered according to '#name_#delay' 223 224 """ 225 regressor_names = [] 226 regressor_matrix = None 227 trial_type, onset, duration, modulation = check_events(events) 228 for condition in np.unique(trial_type): 229 condition_mask = (trial_type == condition) 230 exp_condition = (onset[condition_mask], 231 duration[condition_mask], 232 modulation[condition_mask]) 233 reg, names = compute_regressor( 234 exp_condition, hrf_model, frame_times, con_id=condition, 235 fir_delays=fir_delays, oversampling=oversampling, 236 min_onset=min_onset) 237 238 regressor_names += names 239 if regressor_matrix is None: 240 regressor_matrix = reg 241 else: 242 regressor_matrix = np.hstack((regressor_matrix, reg)) 243 return regressor_matrix, regressor_names 244 245 246###################################################################### 247# Design matrix creation 248###################################################################### 249 250 251def make_first_level_design_matrix( 252 frame_times, events=None, hrf_model='glover', 253 drift_model='cosine', high_pass=.01, drift_order=1, fir_delays=[0], 254 add_regs=None, add_reg_names=None, min_onset=-24, oversampling=50): 255 """Generate a design matrix from the input parameters 256 257 Parameters 258 ---------- 259 frame_times : array of shape (n_frames,) 260 The timing of acquisition of the scans in seconds. 261 262 events : DataFrame instance, optional 263 Events data that describes the experimental paradigm. 264 The DataFrame instance might have these keys: 265 'onset': column to specify the start time of each events in 266 seconds. An error is raised if this key is missing. 267 'trial_type': column to specify per-event experimental conditions 268 identifier. If missing each event are labelled 269 'dummy' and considered to form a unique condition. 270 'duration': column to specify the duration of each events in 271 seconds. If missing the duration of each events is set 272 to zero. 273 'modulation': column to specify the amplitude of each 274 events. If missing the default is set to 275 ones(n_events). 276 277 An experimental paradigm is valid if it has an 'onset' key 278 and a 'duration' key. 279 If these keys are missing an error will be raised. 280 For the others keys a warning will be displayed. 281 Particular attention should be given to the 'trial_type' key 282 which defines the different conditions in the experimental paradigm. 283 284 hrf_model : {'glover', 'spm', 'spm + derivative', \ 285 'spm + derivative + dispersion', 'glover + derivative', \ 286 'glover + derivative + dispersion', \ 287 'fir', None}, optional 288 Specifies the hemodynamic response function. Default='glover'. 289 290 drift_model : {'cosine', 'polynomial', None}, optional 291 Specifies the desired drift model. Default='cosine'. 292 293 high_pass : float, optional 294 High-pass frequency in case of a cosine model (in Hz). 295 Default=0.01. 296 297 drift_order : int, optional 298 Order of the drift model (in case it is polynomial). 299 Default=1. 300 301 fir_delays : array of shape(n_onsets) or list, optional 302 In case of FIR design, yields the array of delays used in the FIR 303 model (in scans). Default=[0]. 304 305 add_regs : array of shape(n_frames, n_add_reg) or \ 306 pandas DataFrame, optional 307 additional user-supplied regressors, e.g. data driven noise regressors 308 or seed based regressors. 309 310 add_reg_names : list of (n_add_reg,) strings, optional 311 If None, while add_regs was provided, these will be termed 312 'reg_%i', i = 0..n_add_reg - 1 313 If add_regs is a DataFrame, the corresponding column names are used 314 and add_reg_names is ignored. 315 316 min_onset : float, optional 317 Minimal onset relative to frame_times[0] (in seconds) 318 events that start before frame_times[0] + min_onset are not considered. 319 Default=-24. 320 321 oversampling : int, optional 322 Oversampling factor used in temporal convolutions. Default=50. 323 324 Returns 325 ------- 326 design_matrix : DataFrame instance, 327 holding the computed design matrix, the index being the frames_times 328 and each column a regressor. 329 330 """ 331 # check arguments 332 # check that additional regressor specification is correct 333 n_add_regs = 0 334 if add_regs is not None: 335 if isinstance(add_regs, pd.DataFrame): 336 add_regs_ = add_regs.values 337 add_reg_names = add_regs.columns.tolist() 338 else: 339 add_regs_ = np.atleast_2d(add_regs) 340 n_add_regs = add_regs_.shape[1] 341 assert add_regs_.shape[0] == np.size(frame_times), ValueError( 342 'Incorrect specification of additional regressors: ' 343 'length of regressors provided: %d, number of ' 344 'time-frames: %d' % (add_regs_.shape[0], np.size(frame_times))) 345 346 # check that additional regressor names are well specified 347 if add_reg_names is None: 348 add_reg_names = ['reg%d' % k for k in range(n_add_regs)] 349 elif len(add_reg_names) != n_add_regs: 350 raise ValueError( 351 'Incorrect number of additional regressor names was provided' 352 '(%d provided, %d expected' % (len(add_reg_names), 353 n_add_regs)) 354 355 # computation of the matrix 356 names = [] 357 matrix = None 358 359 # step 1: events-related regressors 360 if events is not None: 361 # create the condition-related regressors 362 if isinstance(hrf_model, str): 363 hrf_model = hrf_model.lower() 364 matrix, names = _convolve_regressors( 365 events, hrf_model, frame_times, fir_delays, min_onset, 366 oversampling) 367 368 # step 2: additional regressors 369 if add_regs is not None: 370 # add user-supplied regressors and corresponding names 371 if matrix is not None: 372 matrix = np.hstack((matrix, add_regs)) 373 else: 374 matrix = add_regs 375 names += add_reg_names 376 377 # step 3: drifts 378 drift, dnames = _make_drift(drift_model, frame_times, drift_order, 379 high_pass) 380 381 if matrix is not None: 382 matrix = np.hstack((matrix, drift)) 383 else: 384 matrix = drift 385 386 names += dnames 387 # check column names are all unique 388 if len(np.unique(names)) != len(names): 389 raise ValueError('Design matrix columns do not have unique names') 390 391 # step 4: Force the design matrix to be full rank at working precision 392 matrix, _ = full_rank(matrix) 393 394 design_matrix = pd.DataFrame( 395 matrix, columns=names, index=frame_times) 396 return design_matrix 397 398 399def check_design_matrix(design_matrix): 400 """Check that the provided DataFrame is indeed a valid design matrix 401 descriptor, and returns a triplet of fields 402 403 Parameters 404 ---------- 405 design matrix : pandas DataFrame, 406 Describes a design matrix. 407 408 Returns 409 ------- 410 frame_times : array of shape (n_frames,), 411 Sampling times of the design matrix in seconds. 412 413 matrix : array of shape (n_frames, n_regressors), dtype='f' 414 Numerical values for the design matrix. 415 416 names : array of shape (n_events,), dtype='f' 417 Per-event onset time (in seconds) 418 419 """ 420 names = [name for name in design_matrix.keys()] 421 frame_times = design_matrix.index 422 matrix = design_matrix.values 423 return frame_times, matrix, names 424 425 426def make_second_level_design_matrix(subjects_label, confounds=None): 427 """Sets up a second level design. 428 429 Construct a design matrix with an intercept and subject specific confounds. 430 431 Parameters 432 ---------- 433 subjects_label : list of str 434 Contain subject labels to extract confounders in the right order, 435 corresponding with the images, to create the design matrix. 436 437 confounds : pandas DataFrame, optional 438 If given, contains at least two columns, 'subject_label' and one 439 confound. The subjects list determines the rows to extract from 440 confounds thanks to its 'subject_label' column. All subjects must 441 have confounds specified. There should be only one row per subject. 442 443 Returns 444 ------- 445 design_matrix : pandas DataFrame 446 The second level design matrix. 447 448 """ 449 confounds_name = [] 450 if confounds is not None: 451 confounds_name = confounds.columns.tolist() 452 confounds_name.remove('subject_label') 453 454 design_columns = (confounds_name + ['intercept']) 455 # check column names are unique 456 if len(np.unique(design_columns)) != len(design_columns): 457 raise ValueError('Design matrix columns do not have unique names') 458 459 # float dtype necessary for linalg 460 design_matrix = pd.DataFrame(columns=design_columns, dtype=float) 461 for ridx, subject_label in enumerate(subjects_label): 462 design_matrix.loc[ridx] = [0] * len(design_columns) 463 design_matrix.loc[ridx, 'intercept'] = 1 464 if confounds is not None: 465 conrow = confounds['subject_label'] == subject_label 466 if np.sum(conrow) > 1: 467 raise ValueError('confounds contain more than one row for ' 468 'subject %s' % subject_label) 469 elif np.sum(conrow) == 0: 470 raise ValueError('confounds not specified for subject %s' % 471 subject_label) 472 for conf_name in confounds_name: 473 confounds_value = confounds[conrow][conf_name].values[0] 474 design_matrix.loc[ridx, conf_name] = confounds_value 475 476 # check design matrix is not singular 477 sys.float_info.epsilon 478 if np.linalg.cond(design_matrix.values) > design_matrix.size: 479 warn('Attention: Design matrix is singular. Aberrant estimates ' 480 'are expected.') 481 return design_matrix 482