1"""
2This module implements fMRI Design Matrix creation.
3
4Design matrices are represented by Pandas DataFrames
5Computations of the different parts of the design matrix are confined
6to the make_first_level_design_matrix function, that create a DataFrame
7All the others are ancillary functions.
8
9Design matrices contain three different types of regressors:
10
111. Task-related regressors, that result from the convolution
12   of the experimental paradigm regressors with hemodynamic models
13   A hemodynamic model is one of:
14
15         - 'spm' : linear filter used in the SPM software
16         - 'glover' : linear filter estimated by G.Glover
17         - 'spm + derivative', 'glover + derivative': the same linear models,
18            plus their time derivative (2 regressors per condition)
19         - 'spm + derivative + dispersion', 'glover + derivative + dispersion':
20            idem plus the derivative wrt the dispersion parameter of the hrf
21            (3 regressors per condition)
22         - 'fir' : finite impulse response model, generic linear filter
23
242. User-specified regressors, that represent information available on
25   the data, e.g. motion parameters, physiological data resampled at
26   the acquisition rate, or sinusoidal regressors that model the
27   signal at a frequency of interest.
28
293. Drift regressors, that represent low_frequency phenomena of no
30   interest in the data; they need to be included to reduce variance
31   estimates.
32
33Author: Bertrand Thirion, 2009-2015
34
35"""
36import sys
37from warnings import warn
38
39import numpy as np
40import pandas as pd
41
42from nilearn._utils.glm import full_rank
43from nilearn.glm.first_level.experimental_paradigm import check_events
44from nilearn.glm.first_level.hemodynamic_models import (_orthogonalize,
45                                                        compute_regressor)
46
47######################################################################
48# Ancillary functions
49######################################################################
50
51
52def _poly_drift(order, frame_times):
53    """Create a polynomial drift matrix
54
55    Parameters
56    ----------
57    order : int,
58        Number of polynomials in the drift model.
59
60    frame_times : array of shape(n_scans),
61        Time stamps used to sample polynomials.
62
63    Returns
64    -------
65    pol : ndarray, shape(n_scans, order + 1)
66         Estimated polynomial drifts plus a constant regressor.
67
68    """
69    order = int(order)
70    pol = np.zeros((np.size(frame_times), order + 1))
71    tmax = float(frame_times.max())
72    for k in range(order + 1):
73        pol[:, k] = (frame_times / tmax) ** k
74    pol = _orthogonalize(pol)
75    pol = np.hstack((pol[:, 1:], pol[:, :1]))
76    return pol
77
78
79def _cosine_drift(high_pass, frame_times):
80    """Create a cosine drift matrix with frequencies  or equal to
81    high_pass.
82
83    Parameters
84    ----------
85    high_pass : float
86        Cut frequency of the high-pass filter in Hz
87
88    frame_times : array of shape (n_scans,)
89        The sampling times in seconds
90
91    Returns
92    -------
93    cosine_drift : array of shape(n_scans, n_drifts)
94        Cosine drifts plus a constant regressor at cosine_drift[:, -1]
95
96    References
97    ----------
98    http://en.wikipedia.org/wiki/Discrete_cosine_transform DCT-II
99
100    """
101    n_frames = len(frame_times)
102    n_times = np.arange(n_frames)
103    dt = (frame_times[-1] - frame_times[0]) / (n_frames - 1)
104    if high_pass * dt >= .5:
105        warn('High-pass filter will span all accessible frequencies '
106             'and saturate the design matrix. '
107             'You may want to reduce the high_pass value.'
108             'The provided value is {0} Hz'.format(high_pass))
109    order = np.minimum(n_frames - 1,
110                       int(np.floor(2 * n_frames * high_pass * dt)))
111    cosine_drift = np.zeros((n_frames, order + 1))
112    normalizer = np.sqrt(2.0 / n_frames)
113
114    for k in range(1, order + 1):
115        cosine_drift[:, k - 1] = normalizer * np.cos(
116            (np.pi / n_frames) * (n_times + .5) * k)
117
118    cosine_drift[:, -1] = 1.
119    return cosine_drift
120
121
122def _none_drift(frame_times):
123    """ Create an intercept vector
124
125    Returns
126    -------
127    np.ones_like(frame_times)
128
129    """
130    return np.reshape(np.ones_like(frame_times), (np.size(frame_times), 1))
131
132
133def _make_drift(drift_model, frame_times, order, high_pass):
134    """Create the drift matrix
135
136    Parameters
137    ----------
138    drift_model : {'polynomial', 'cosine', None},
139        string that specifies the desired drift model
140
141    frame_times : array of shape(n_scans),
142        list of values representing the desired TRs
143
144    order : int, optional,
145        order of the drift model (in case it is polynomial)
146
147    high_pass : float, optional,
148        high-pass frequency in case of a cosine model (in Hz)
149
150    Returns
151    -------
152    drift : array of shape(n_scans, n_drifts),
153        the drift matrix
154
155    names : list of length(n_drifts),
156        the associated names
157
158    """
159    if isinstance(drift_model, str):
160        drift_model = drift_model.lower()  # for robust comparisons
161    if drift_model == 'polynomial':
162        drift = _poly_drift(order, frame_times)
163    elif drift_model == 'cosine':
164        drift = _cosine_drift(high_pass, frame_times)
165    elif drift_model is None:
166        drift = _none_drift(frame_times)
167    else:
168        raise NotImplementedError("Unknown drift model %r" % (drift_model))
169    names = []
170    for k in range(1, drift.shape[1]):
171        names.append('drift_%d' % k)
172    names.append('constant')
173    return drift, names
174
175
176def _convolve_regressors(events, hrf_model, frame_times, fir_delays=[0],
177                         min_onset=-24, oversampling=50):
178    """ Creation of  a matrix that comprises
179    the convolution of the conditions onset with a certain hrf model
180
181    Parameters
182    ----------
183    events : DataFrame instance,
184        Events data describing the experimental paradigm
185        see nistats.experimental_paradigm to check the specification
186        for these to be valid paradigm descriptors
187
188    hrf_model : {'spm', 'spm + derivative', 'spm + derivative + dispersion',
189        'glover', 'glover + derivative', 'glover + derivative + dispersion',
190        'fir', None}
191        String that specifies the hemodynamic response function
192
193    frame_times : array of shape (n_scans,)
194        The targeted timing for the design matrix.
195
196    fir_delays : array-like of shape (n_onsets,), optional
197        In case of FIR design, yields the array of delays
198        used in the FIR model (in scans). Default=[0].
199
200    min_onset : float, optional
201        Minimal onset relative to frame_times[0] (in seconds) events
202        that start before frame_times[0] + min_onset are not considered.
203        Default=-24.
204
205    oversampling : int, optional
206        Oversampling factor used in temporal convolutions. Default=50.
207
208    Returns
209    -------
210    regressor_matrix : array of shape (n_scans, n_regressors),
211        Contains the convolved regressors associated with the
212        experimental conditions.
213
214    regressor_names : list of strings,
215        The regressor names, that depend on the hrf model used
216        if 'glover' or 'spm' then this is identical to the input names
217        if 'glover + derivative' or 'spm + derivative', a second name is output
218            i.e. '#name_derivative'
219        if 'spm + derivative + dispersion' or
220            'glover + derivative + dispersion',
221            a third name is used, i.e. '#name_dispersion'
222        if 'fir', the regressos are numbered according to '#name_#delay'
223
224    """
225    regressor_names = []
226    regressor_matrix = None
227    trial_type, onset, duration, modulation = check_events(events)
228    for condition in np.unique(trial_type):
229        condition_mask = (trial_type == condition)
230        exp_condition = (onset[condition_mask],
231                         duration[condition_mask],
232                         modulation[condition_mask])
233        reg, names = compute_regressor(
234            exp_condition, hrf_model, frame_times, con_id=condition,
235            fir_delays=fir_delays, oversampling=oversampling,
236            min_onset=min_onset)
237
238        regressor_names += names
239        if regressor_matrix is None:
240            regressor_matrix = reg
241        else:
242            regressor_matrix = np.hstack((regressor_matrix, reg))
243    return regressor_matrix, regressor_names
244
245
246######################################################################
247# Design matrix creation
248######################################################################
249
250
251def make_first_level_design_matrix(
252        frame_times, events=None, hrf_model='glover',
253        drift_model='cosine', high_pass=.01, drift_order=1, fir_delays=[0],
254        add_regs=None, add_reg_names=None, min_onset=-24, oversampling=50):
255    """Generate a design matrix from the input parameters
256
257    Parameters
258    ----------
259    frame_times : array of shape (n_frames,)
260        The timing of acquisition of the scans in seconds.
261
262    events : DataFrame instance, optional
263        Events data that describes the experimental paradigm.
264         The DataFrame instance might have these keys:
265            'onset': column to specify the start time of each events in
266                     seconds. An error is raised if this key is missing.
267            'trial_type': column to specify per-event experimental conditions
268                          identifier. If missing each event are labelled
269                          'dummy' and considered to form a unique condition.
270            'duration': column to specify the duration of each events in
271                        seconds. If missing the duration of each events is set
272                        to zero.
273            'modulation': column to specify the amplitude of each
274                          events. If missing the default is set to
275                          ones(n_events).
276
277        An experimental paradigm is valid if it has an 'onset' key
278        and a 'duration' key.
279        If these keys are missing an error will be raised.
280        For the others keys a warning will be displayed.
281        Particular attention should be given to the 'trial_type' key
282        which defines the different conditions in the experimental paradigm.
283
284    hrf_model : {'glover', 'spm', 'spm + derivative', \
285            'spm + derivative + dispersion', 'glover + derivative', \
286            'glover + derivative + dispersion', \
287            'fir', None}, optional
288        Specifies the hemodynamic response function. Default='glover'.
289
290    drift_model : {'cosine', 'polynomial', None}, optional
291        Specifies the desired drift model. Default='cosine'.
292
293    high_pass : float, optional
294        High-pass frequency in case of a cosine model (in Hz).
295        Default=0.01.
296
297    drift_order : int, optional
298        Order of the drift model (in case it is polynomial).
299        Default=1.
300
301    fir_delays : array of shape(n_onsets) or list, optional
302        In case of FIR design, yields the array of delays used in the FIR
303        model (in scans). Default=[0].
304
305    add_regs : array of shape(n_frames, n_add_reg) or \
306            pandas DataFrame, optional
307        additional user-supplied regressors, e.g. data driven noise regressors
308        or seed based regressors.
309
310    add_reg_names : list of (n_add_reg,) strings, optional
311        If None, while add_regs was provided, these will be termed
312        'reg_%i', i = 0..n_add_reg - 1
313        If add_regs is a DataFrame, the corresponding column names are used
314        and add_reg_names is ignored.
315
316    min_onset : float, optional
317        Minimal onset relative to frame_times[0] (in seconds)
318        events that start before frame_times[0] + min_onset are not considered.
319        Default=-24.
320
321    oversampling : int, optional
322        Oversampling factor used in temporal convolutions. Default=50.
323
324    Returns
325    -------
326    design_matrix : DataFrame instance,
327        holding the computed design matrix, the index being the frames_times
328        and each column a regressor.
329
330    """
331    # check arguments
332    # check that additional regressor specification is correct
333    n_add_regs = 0
334    if add_regs is not None:
335        if isinstance(add_regs, pd.DataFrame):
336            add_regs_ = add_regs.values
337            add_reg_names = add_regs.columns.tolist()
338        else:
339            add_regs_ = np.atleast_2d(add_regs)
340        n_add_regs = add_regs_.shape[1]
341        assert add_regs_.shape[0] == np.size(frame_times), ValueError(
342            'Incorrect specification of additional regressors: '
343            'length of regressors provided: %d, number of '
344            'time-frames: %d' % (add_regs_.shape[0], np.size(frame_times)))
345
346    # check that additional regressor names are well specified
347    if add_reg_names is None:
348        add_reg_names = ['reg%d' % k for k in range(n_add_regs)]
349    elif len(add_reg_names) != n_add_regs:
350        raise ValueError(
351            'Incorrect number of additional regressor names was provided'
352            '(%d provided, %d expected' % (len(add_reg_names),
353                                           n_add_regs))
354
355    # computation of the matrix
356    names = []
357    matrix = None
358
359    # step 1: events-related regressors
360    if events is not None:
361        # create the condition-related regressors
362        if isinstance(hrf_model, str):
363            hrf_model = hrf_model.lower()
364        matrix, names = _convolve_regressors(
365            events, hrf_model, frame_times, fir_delays, min_onset,
366            oversampling)
367
368    # step 2: additional regressors
369    if add_regs is not None:
370        # add user-supplied regressors and corresponding names
371        if matrix is not None:
372            matrix = np.hstack((matrix, add_regs))
373        else:
374            matrix = add_regs
375        names += add_reg_names
376
377    # step 3: drifts
378    drift, dnames = _make_drift(drift_model, frame_times, drift_order,
379                                high_pass)
380
381    if matrix is not None:
382        matrix = np.hstack((matrix, drift))
383    else:
384        matrix = drift
385
386    names += dnames
387    # check column names are all unique
388    if len(np.unique(names)) != len(names):
389        raise ValueError('Design matrix columns do not have unique names')
390
391    # step 4: Force the design matrix to be full rank at working precision
392    matrix, _ = full_rank(matrix)
393
394    design_matrix = pd.DataFrame(
395        matrix, columns=names, index=frame_times)
396    return design_matrix
397
398
399def check_design_matrix(design_matrix):
400    """Check that the provided DataFrame is indeed a valid design matrix
401    descriptor, and returns a triplet of fields
402
403    Parameters
404    ----------
405    design matrix : pandas DataFrame,
406        Describes a design matrix.
407
408    Returns
409    -------
410    frame_times : array of shape (n_frames,),
411        Sampling times of the design matrix in seconds.
412
413    matrix : array of shape (n_frames, n_regressors), dtype='f'
414        Numerical values for the design matrix.
415
416    names : array of shape (n_events,), dtype='f'
417        Per-event onset time (in seconds)
418
419    """
420    names = [name for name in design_matrix.keys()]
421    frame_times = design_matrix.index
422    matrix = design_matrix.values
423    return frame_times, matrix, names
424
425
426def make_second_level_design_matrix(subjects_label, confounds=None):
427    """Sets up a second level design.
428
429    Construct a design matrix with an intercept and subject specific confounds.
430
431    Parameters
432    ----------
433    subjects_label : list of str
434        Contain subject labels to extract confounders in the right order,
435        corresponding with the images, to create the design matrix.
436
437    confounds : pandas DataFrame, optional
438        If given, contains at least two columns, 'subject_label' and one
439        confound. The subjects list determines the rows to extract from
440        confounds thanks to its 'subject_label' column. All subjects must
441        have confounds specified. There should be only one row per subject.
442
443    Returns
444    -------
445    design_matrix : pandas DataFrame
446        The second level design matrix.
447
448    """
449    confounds_name = []
450    if confounds is not None:
451        confounds_name = confounds.columns.tolist()
452        confounds_name.remove('subject_label')
453
454    design_columns = (confounds_name + ['intercept'])
455    # check column names are unique
456    if len(np.unique(design_columns)) != len(design_columns):
457        raise ValueError('Design matrix columns do not have unique names')
458
459    # float dtype necessary for linalg
460    design_matrix = pd.DataFrame(columns=design_columns, dtype=float)
461    for ridx, subject_label in enumerate(subjects_label):
462        design_matrix.loc[ridx] = [0] * len(design_columns)
463        design_matrix.loc[ridx, 'intercept'] = 1
464        if confounds is not None:
465            conrow = confounds['subject_label'] == subject_label
466            if np.sum(conrow) > 1:
467                raise ValueError('confounds contain more than one row for '
468                                 'subject %s' % subject_label)
469            elif np.sum(conrow) == 0:
470                raise ValueError('confounds not specified for subject %s' %
471                                 subject_label)
472            for conf_name in confounds_name:
473                confounds_value = confounds[conrow][conf_name].values[0]
474                design_matrix.loc[ridx, conf_name] = confounds_value
475
476    # check design matrix is not singular
477    sys.float_info.epsilon
478    if np.linalg.cond(design_matrix.values) > design_matrix.size:
479        warn('Attention: Design matrix is singular. Aberrant estimates '
480             'are expected.')
481    return design_matrix
482