1"""
2sklearn-compatible implementation of spatially structured learners (
3TV-L1, Graph-Net, etc.)
4
5"""
6# Author: DOHMATOB Elvis Dopgima,
7#         PIZARRO Gaspar,
8#         VAROQUAUX Gael,
9#         GRAMFORT Alexandre,
10#         EICKENBERG Michael,
11#         THIRION Bertrand
12# License: simplified BSD
13
14import warnings
15import numbers
16import time
17import sys
18from functools import partial
19import numpy as np
20from scipy import stats, ndimage
21from sklearn.utils.extmath import safe_sparse_dot
22from sklearn.utils import check_array
23from sklearn.linear_model import LinearRegression
24from sklearn.feature_selection import (SelectPercentile, f_regression,
25                                       f_classif)
26from joblib import Memory, Parallel, delayed
27from sklearn.preprocessing import LabelBinarizer
28from sklearn.metrics import accuracy_score
29from ..input_data.masker_validation import check_embedded_nifti_masker
30from .._utils.param_validation import _adjust_screening_percentile
31from .._utils import fill_doc
32from sklearn.utils import check_X_y
33from sklearn.model_selection import check_cv
34try:
35    from sklearn.linear_model._base import _preprocess_data as center_data
36except ImportError:
37    # Sklearn < 0.23
38    from sklearn.linear_model.base import _preprocess_data as center_data
39from .._utils.cache_mixin import CacheMixin
40from nilearn.masking import _unmask_from_to_3d_array
41from .space_net_solvers import (tvl1_solver, _graph_net_logistic,
42                                _graph_net_squared_loss)
43from nilearn.image import get_data
44
45
46def _crop_mask(mask):
47    """Crops input mask to produce tighter (i.e smaller) bounding box with
48    the same support (active voxels)"""
49    idx = np.where(mask)
50    if idx[0].size == 0:
51        raise ValueError("Empty mask: if you have given a mask, it is "
52                         "empty, and if you have not given a mask, the "
53                         "mask-extraction routines have failed. Please "
54                         "provide an appropriate mask.")
55    i_min = max(idx[0].min() - 1, 0)
56    i_max = idx[0].max()
57    j_min = max(idx[1].min() - 1, 0)
58    j_max = idx[1].max()
59    k_min = max(idx[2].min() - 1, 0)
60    k_max = idx[2].max()
61    return mask[i_min:i_max + 1, j_min:j_max + 1, k_min:k_max + 1]
62
63
64@fill_doc
65def _univariate_feature_screening(
66        X, y, mask, is_classif, screening_percentile, smoothing_fwhm=2.):
67    """
68    Selects the most import features, via a univariate test
69
70    Parameters
71    ----------
72    X : ndarray, shape (n_samples, n_features)
73        Design matrix.
74
75    y : ndarray, shape (n_samples,)
76        Response Vector.
77
78    mask: ndarray or booleans, shape (nx, ny, nz)
79        Mask defining brain Rois.
80
81    is_classif: bool
82        Flag telling whether the learning task is classification or regression.
83
84    screening_percentile : float in the closed interval [0., 100.]
85        Only the `screening_percentile * 100" percent most import voxels will
86        be retained.
87    %(smoothing_fwhm)s
88        Default=2.
89
90    Returns
91    -------
92    X_: ndarray, shape (n_samples, n_features_)
93        Reduced design matrix with only columns corresponding to the voxels
94        retained after screening.
95
96    mask_ : ndarray of booleans, shape (nx, ny, nz)
97        Mask with support reduced to only contain voxels retained after
98        screening.
99
100    support : ndarray of ints, shape (n_features_,)
101        Support of the screened mask, as a subset of the support of the
102        original mask.
103    """
104    # smooth the data (with isotropic Gaussian kernel) before screening
105    if smoothing_fwhm > 0.:
106        sX = np.empty(X.shape)
107        for sample in range(sX.shape[0]):
108            sX[sample] = ndimage.gaussian_filter(
109                _unmask_from_to_3d_array(X[sample].copy(),  # avoid modifying X
110                                         mask), (smoothing_fwhm, smoothing_fwhm,
111                                                 smoothing_fwhm))[mask]
112    else:
113        sX = X
114
115    # do feature screening proper
116    selector = SelectPercentile(f_classif if is_classif else f_regression,
117                                percentile=screening_percentile).fit(sX, y)
118    support = selector.get_support()
119
120    # erode and then dilate mask, thus obtaining a "cleaner" version of
121    # the mask on which a spatial prior actually makes sense
122    mask_ = mask.copy()
123    mask_[mask] = (support > 0)
124    mask_ = ndimage.binary_dilation(ndimage.binary_erosion(
125        mask_)).astype(bool)
126    mask_[np.logical_not(mask)] = 0
127    support = mask_[mask]
128    X = X[:, support]
129
130    return X, mask_, support
131
132
133def _space_net_alpha_grid(X, y, eps=1e-3, n_alphas=10, l1_ratio=1.,
134                          logistic=False):
135    """Compute the grid of alpha values for TV-L1 and Graph-Net.
136
137    Parameters
138    ----------
139    X : ndarray, shape (n_samples, n_features)
140        Training data (design matrix).
141
142    y : ndarray, shape (n_samples,)
143        Target / response vector.
144
145    l1_ratio : float, optional
146        The ElasticNet mixing parameter, with ``0 <= l1_ratio <= 1``.
147        For ``l1_ratio = 0`` the penalty is purely a spatial prior
148        (Graph-Net, TV, etc.). ``For l1_ratio = 1`` it is an L1 penalty.
149        For ``0 < l1_ratio < 1``, the penalty is a combination of L1
150        and a spatial prior.
151        Default=1.
152
153    eps : float, optional
154        Length of the path. ``eps=1e-3`` means that
155        ``alpha_min / alpha_max = 1e-3``.
156        Default=1e-3.
157
158    n_alphas : int, optional
159        Number of alphas along the regularization path.
160        Default=10.
161
162    logistic : bool, optional
163        Indicates where the underlying loss function is logistic.
164        Default=False.
165
166    """
167    if logistic:
168        # Computes the theoretical upper bound for the overall
169        # regularization, as derived in "An Interior-Point Method for
170        # Large-Scale l1-Regularized Logistic Regression", by Koh, Kim,
171        # Boyd, in Journal of Machine Learning Research, 8:1519-1555,
172        # July 2007.
173        # url: http://www.stanford.edu/~boyd/papers/pdf/l1_logistic_reg.pdf
174        m = float(y.size)
175        m_plus = float(y[y == 1].size)
176        m_minus = float(y[y == -1].size)
177        b = np.zeros_like(y)
178        b[y == 1] = m_minus / m
179        b[y == -1] = - m_plus / m
180        alpha_max = np.max(np.abs(X.T.dot(b)))
181
182        # tt may happen that b is in the kernel of X.T!
183        if alpha_max == 0.:
184            alpha_max = np.abs(np.dot(X.T, y)).max()
185    else:
186        alpha_max = np.abs(np.dot(X.T, y)).max()
187
188    # prevent alpha_max from exploding when l1_ratio = 0
189    if l1_ratio == 0.:
190        l1_ratio = 1e-3
191    alpha_max /= l1_ratio
192
193    if n_alphas == 1:
194        return np.array([alpha_max])
195
196    alpha_min = alpha_max * eps
197    return np.logspace(np.log10(alpha_min), np.log10(alpha_max),
198                       num=n_alphas)[::-1]
199
200
201class _EarlyStoppingCallback(object):
202    """Out-of-bag early stopping
203
204        A callable that returns True when the test error starts
205        rising. We use a Spearman correlation (between X_test.w and y_test)
206        for scoring.
207    """
208
209    def __init__(self, X_test, y_test, is_classif, debias=False, verbose=0):
210        self.X_test = X_test
211        self.y_test = y_test
212        self.is_classif = is_classif
213        self.debias = debias
214        self.verbose = verbose
215        self.tol = -1e-4 if self.is_classif else -1e-2
216        self.test_scores = []
217        self.counter = 0.
218
219    def __call__(self, variables):
220        """The callback proper """
221        # misc
222        if not isinstance(variables, dict):
223            variables = dict(w=variables)
224        self.counter += 1
225        w = variables['w']
226
227        # use Spearman score as stopping criterion
228        score = self.test_score(w)[0]
229
230        self.test_scores.append(score)
231        if not (self.counter > 20 and (self.counter % 10) == 2):
232            return
233
234        # check whether score increased on average over last 5 iterations
235        if len(self.test_scores) > 4:
236            if np.mean(np.diff(self.test_scores[-5:][::-1])) >= self.tol:
237                if self.verbose:
238                    if self.verbose > 1:
239                        print('Early stopping. Test score: %.8f %s' % (
240                              score, 40 * '-'))
241                    else:
242                        sys.stderr.write('.')
243                return True
244
245        if self.verbose > 1:
246            print('Test score: %.8f' % score)
247        return False
248
249    def _debias(self, w):
250        """"Debias w by rescaling the coefficients by a fixed factor.
251
252        Precisely, the scaling factor is: <y_pred, y_test> / ||y_test||^2.
253        """
254        y_pred = np.dot(self.X_test, w)
255        scaling = np.dot(y_pred, y_pred)
256        if scaling > 0.:
257            scaling = np.dot(y_pred, self.y_test) / scaling
258            w *= scaling
259        return w
260
261    def test_score(self, w):
262        """Compute test score for model, given weights map `w`.
263
264        We use correlations between linear prediction and
265        ground truth (y_test).
266
267        We return 2 scores for model selection: one is the Spearman
268        correlation, which captures ordering between input and
269        output, but tends to have 'flat' regions. The other
270        is the Pearson correlation, that we can use to disambiguate
271        between regions with equivalent Spearman correlation.
272
273        """
274        if self.is_classif:
275            w = w[:-1]
276        if w.ptp() == 0:
277            # constant map, there is nothing
278            return (-np.inf, -np.inf)
279        y_pred = np.dot(self.X_test, w)
280        spearman_score = stats.spearmanr(y_pred, self.y_test)[0]
281        pearson_score = np.corrcoef(y_pred, self.y_test)[1, 0]
282        if self.is_classif:
283            return spearman_score, pearson_score
284        else:
285            return pearson_score, spearman_score
286
287
288@fill_doc
289def path_scores(solver, X, y, mask, alphas, l1_ratios, train, test,
290                solver_params, is_classif=False, n_alphas=10, eps=1E-3,
291                key=None, debias=False, Xmean=None,
292                screening_percentile=20., verbose=1):
293    """Function to compute scores of different alphas in regression and
294    classification used by CV objects
295
296    Parameters
297    ----------
298    X : 2D array of shape (n_samples, n_features)
299        Design matrix, one row per sample point.
300
301    y : 1D array of length n_samples
302        Response vector; one value per sample.
303
304    mask : 3D arrays of boolean
305        Mask defining brain regions that we work on.
306
307    alphas : list of floats
308        List of regularization parameters being considered.
309
310    train : array or list of integers
311        List of indices for the train samples.
312
313    test : array or list of integers
314        List of indices for the test samples.
315
316    l1_ratio : float in the interval [0, 1]; optional (default .5)
317        Constant that mixes L1 and TV (resp. Graph-Net) penalization.
318        l1_ratio == 0: just smooth. l1_ratio == 1: just lasso.
319
320    eps : float, optional (default 1e-3)
321        Length of the path. For example, ``eps=1e-3`` means that
322        ``alpha_min / alpha_max = 1e-3``.
323
324    n_alphas : int, optional (default 10).
325        Generate this number of alphas per regularization path.
326        This parameter is mutually exclusive with the `alphas` parameter.
327
328    solver : function handle
329       See for example tv.TVl1Classifier documentation.
330
331    solver_params : dict
332       Dictionary of param-value pairs to be passed to solver.
333
334    is_classif : bool, optional
335        Indicates whether the loss is a classification loss or a
336        regression loss. Default=False.
337
338    Xmean: ??? TODO: Add description.
339
340    key: ??? TODO: Add description.
341
342    debias : bool, optional
343        If set, then the estimated weights maps will be debiased.
344        Default=False.
345
346    screening_percentile : float in the interval [0, 100], optional
347        Percentile value for ANOVA univariate feature selection. A value of
348        100 means 'keep all features'. This percentile is expressed
349        w.r.t the volume of a standard (MNI152) brain, and so is corrected
350        at runtime to correspond to the volume of the user-supplied mask
351        (which is typically smaller). If '100' is given, all the features
352        are used, regardless of the number of voxels.
353        Default=20.
354    %(verbose)s
355
356    """
357    if l1_ratios is None:
358        raise ValueError("l1_ratios must be specified!")
359
360    # misc
361    _, n_features = X.shape
362    verbose = int(verbose if verbose is not None else 0)
363
364    # Univariate feature screening. Note that if we have only as few as 100
365    # features in the mask's support, then we should use all of them to
366    # learn the model i.e disable this screening)
367    do_screening = (n_features > 100) and screening_percentile < 100.
368    if do_screening:
369        X, mask, support = _univariate_feature_screening(
370            X, y, mask, is_classif, screening_percentile)
371
372    # crop the mask to have a tighter bounding box
373    mask = _crop_mask(mask)
374
375    # get train and test data
376    X_train, y_train = X[train].copy(), y[train].copy()
377    X_test, y_test = X[test].copy(), y[test].copy()
378
379    # it is essential to center the data in regression
380    X_train, y_train, _, y_train_mean, _ = center_data(
381        X_train, y_train, fit_intercept=True, normalize=False,
382        copy=False)
383
384    # misc
385    if isinstance(l1_ratios, numbers.Number):
386        l1_ratios = [l1_ratios]
387    l1_ratios = sorted(l1_ratios)[::-1]  # from large to small l1_ratios
388    best_score = -np.inf
389    best_secondary_score = -np.inf
390    best_l1_ratio = l1_ratios[0]
391    best_alpha = None
392    best_init = None
393    all_test_scores = []
394    if len(test) > 0.:
395        # do l1_ratio path
396        for l1_ratio in l1_ratios:
397            this_test_scores = []
398
399            # make alpha grid
400            if alphas is None:
401                alphas_ = _space_net_alpha_grid(
402                    X_train, y_train, l1_ratio=l1_ratio, eps=eps,
403                    n_alphas=n_alphas, logistic=is_classif)
404            else:
405                alphas_ = alphas
406            alphas_ = sorted(alphas_)[::-1]  # from large to small l1_ratios
407
408            # do alpha path
409            if best_alpha is None:
410                best_alpha = alphas_[0]
411            init = None
412            path_solver_params = solver_params.copy()
413            # Use a lighter tol during the path
414            path_solver_params['tol'] = 2 * path_solver_params.get('tol', 1e-4)
415            for alpha in alphas_:
416                # setup callback mechanism for early stopping
417                early_stopper = _EarlyStoppingCallback(
418                    X_test, y_test, is_classif=is_classif, debias=debias,
419                    verbose=verbose)
420                w, _, init = solver(
421                    X_train, y_train, alpha, l1_ratio, mask=mask, init=init,
422                    callback=early_stopper, verbose=max(verbose - 1, 0.),
423                    **path_solver_params)
424
425                # We use 2 scores for model selection: the second one is to
426                # disambiguate between regions of equivalent Spearman
427                # correlations
428                score, secondary_score = early_stopper.test_score(w)
429                this_test_scores.append(score)
430                if (np.isfinite(score) and
431                        (score > best_score
432                         or (score == best_score and
433                             secondary_score > best_secondary_score))):
434                    best_secondary_score = secondary_score
435                    best_score = score
436                    best_l1_ratio = l1_ratio
437                    best_alpha = alpha
438                    best_init = init.copy()
439            all_test_scores.append(this_test_scores)
440    else:
441        if alphas is None:
442            alphas_ = _space_net_alpha_grid(
443                X_train, y_train, l1_ratio=best_l1_ratio, eps=eps,
444                n_alphas=n_alphas, logistic=is_classif)
445        else:
446            alphas_ = alphas
447        best_alpha = alphas_[0]
448
449    # re-fit best model to high precision (i.e without early stopping, etc.)
450    best_w, _, init = solver(X_train, y_train, best_alpha, best_l1_ratio,
451                             mask=mask, init=best_init,
452                             verbose=max(verbose - 1, 0), **solver_params)
453    if debias:
454        best_w = _EarlyStoppingCallback(
455            X_test, y_test, is_classif=is_classif, debias=debias,
456            verbose=verbose)._debias(best_w)
457
458    if len(test) == 0.:
459        all_test_scores.append(np.nan)
460
461    # unmask univariate screening
462    if do_screening:
463        w_ = np.zeros(len(support))
464        if is_classif:
465            w_ = np.append(w_, best_w[-1])
466            w_[:-1][support] = best_w[:-1]
467        else:
468            w_[support] = best_w
469        best_w = w_
470
471    if len(best_w) == n_features:
472        if Xmean is None:
473            Xmean = np.zeros(n_features)
474        best_w = np.append(best_w, 0.)
475
476    all_test_scores = np.array(all_test_scores)
477    return (all_test_scores, best_w, best_alpha, best_l1_ratio, alphas_,
478            y_train_mean, key)
479
480
481@fill_doc
482class BaseSpaceNet(LinearRegression, CacheMixin):
483    """
484    Regression and classification learners with sparsity and spatial priors
485
486    `SpaceNet` implements Graph-Net and TV-L1 priors /
487    penalties. Thus, the penalty is a sum of an L1 term and a spatial term. The
488    aim of such a hybrid prior is to obtain weights maps which are structured
489    (due to the spatial prior) and sparse (enforced by L1 norm).
490
491    Parameters
492    ----------
493    penalty : string, optional (default 'graph-net')
494        Penalty to used in the model. Can be 'graph-net' or 'tv-l1'.
495
496    loss : string, optional (default None)
497        Loss to be used in the model. Must be an one of "mse", or "logistic".
498
499    is_classif : bool, optional (default False)
500        Flag telling whether the learning task is classification or regression.
501
502    l1_ratios : float or list of floats in the interval [0, 1];
503    optional (default .5)
504        Constant that mixes L1 and spatial prior terms in penalization.
505        l1_ratio == 1 corresponds to pure LASSO. The larger the value of this
506        parameter, the sparser the estimated weights map. If list is provided,
507        then the best value will be selected by cross-validation.
508
509    alphas : float or list of floats, optional (default None)
510        Choices for the constant that scales the overall regularization term.
511        This parameter is mutually exclusive with the `n_alphas` parameter.
512        If None or list of floats is provided, then the best value will be
513        selected by cross-validation.
514
515    n_alphas : int, optional (default 10).
516        Generate this number of alphas per regularization path.
517        This parameter is mutually exclusive with the `alphas` parameter.
518
519    eps : float, optional (default 1e-3)
520        Length of the path. For example, ``eps=1e-3`` means that
521        ``alpha_min / alpha_max = 1e-3``
522
523    mask : filename, niimg, NiftiMasker instance, optional (default None)
524        Mask to be used on data. If an instance of masker is passed,
525        then its mask will be used. If no mask is it will be computed
526        automatically by a NiftiMasker.
527    %(target_affine)s
528        An important use-case of this parameter is for downsampling the
529        input data to a coarser resolution (to speed of the model fit).
530    %(target_shape)s
531    %(low_pass)s
532    %(high_pass)s
533    %(t_r)s
534    screening_percentile : float in the interval [0, 100]; Optional (
535    default 20)
536        Percentile value for ANOVA univariate feature selection. A value of
537        100 means 'keep all features'. This percentile is expressed
538        w.r.t the volume of a standard (MNI152) brain, and so is corrected
539        at runtime to correspond to the volume of the user-supplied mask
540        (which is typically smaller). If '100' is given, all the features
541        are used, regardless of the number of voxels.
542
543    standardize : bool, optional (default True):
544        If set, then the data (X, y) are centered to have mean zero along
545        axis 0. This is here because nearly all linear models will want
546        their data to be centered.
547
548    fit_intercept : bool, optional (default True)
549        Fit or not an intercept.
550
551    max_iter : int (default 200)
552        Defines the iterations for the solver.
553
554    tol : float, optional (default 5e-4)
555        Defines the tolerance for convergence for the backend FISTA solver.
556    %(verbose)s
557    %(n_jobs)s
558    %(memory)s
559    %(memory_level1)s
560    cv : int, a cv generator instance, or None (default 8)
561        The input specifying which cross-validation generator to use.
562        It can be an integer, in which case it is the number of folds in a
563        KFold, None, in which case 3 fold is used, or another object, that
564        will then be used as a cv generator.
565
566    debias : bool, optional (default False)
567        If set, then the estimated weights maps will be debiased.
568
569    Attributes
570    ----------
571    `all_coef_` : ndarray, shape (n_l1_ratios, n_folds, n_features)
572        Coefficients for all folds and features.
573
574    `alpha_grids_` : ndarray, shape (n_folds, n_alphas)
575        Alpha values considered for selection of the best ones
576        (saved in `best_model_params_`)
577
578    `best_model_params_` : ndarray, shape (n_folds, n_parameter)
579        Best model parameters (alpha, l1_ratio) saved for the different
580        cross-validation folds.
581
582    `classes_` : ndarray of labels (`n_classes_`)
583        Labels of the classes (for classification problems)
584
585    `n_classes_` : int
586        Number of classes (for classification problems)
587
588    `coef_` : ndarray, shape
589        (1, n_features) for 2 class classification problems (i.e n_classes = 2)
590        (n_classes, n_features) for n_classes > 2
591        Coefficient of the features in the decision function.
592
593    `coef_img_` : nifti image
594        Masked model coefficients
595
596    `mask_` : ndarray 3D
597        An array contains values of the mask image.
598
599    `masker_` : instance of NiftiMasker
600        The nifti masker used to mask the data.
601
602    `mask_img_` : Nifti like image
603        The mask of the data. If no mask was supplied by the user,
604        this attribute is the mask image computed automatically from the
605        data `X`.
606
607    `memory_` : joblib memory cache
608
609    `intercept_` : narray, shape
610        (1,) for 2 class classification problems (i.e n_classes = 2)
611        (n_classes,) for n_classes > 2
612        Intercept (a.k.a. bias) added to the decision function.
613        It is available only when parameter intercept is set to True.
614
615    `cv_` : list of pairs of lists
616        Each pair is the list of indices for the train and test samples
617        for the corresponding fold.
618
619    `cv_scores_` : ndarray, shape (n_folds, n_alphas) or (n_l1_ratios, n_folds, n_alphas)
620        Scores (misclassification) for each alpha, and on each fold
621
622    `screening_percentile_` : float
623        Screening percentile corrected according to volume of mask,
624        relative to the volume of standard brain.
625
626    `w_` : ndarray, shape
627        (1, n_features + 1) for 2 class classification problems (i.e n_classes = 2)
628        (n_classes, n_features + 1) for n_classes > 2, and (n_features,) for
629        regression
630        Model weights
631
632    `ymean_` : array, shape (n_samples,)
633        Mean of prediction targets
634
635    `Xmean_` : array, shape (n_features,)
636        Mean of X across samples
637
638    `Xstd_` : array, shape (n_features,)
639        Standard deviation of X across samples
640    """
641    SUPPORTED_PENALTIES = ["graph-net", "tv-l1"]
642    SUPPORTED_LOSSES = ["mse", "logistic"]
643
644    def __init__(self, penalty="graph-net", is_classif=False, loss=None,
645                 l1_ratios=.5, alphas=None, n_alphas=10, mask=None,
646                 target_affine=None, target_shape=None, low_pass=None,
647                 high_pass=None, t_r=None, max_iter=200, tol=5e-4,
648                 memory=None, memory_level=1, standardize=True, verbose=1,
649                 mask_args=None,
650                 n_jobs=1, eps=1e-3, cv=8, fit_intercept=True,
651                 screening_percentile=20., debias=False):
652        self.penalty = penalty
653        self.is_classif = is_classif
654        self.loss = loss
655        self.n_alphas = n_alphas
656        self.eps = eps
657        self.l1_ratios = l1_ratios
658        self.alphas = alphas
659        self.mask = mask
660        self.fit_intercept = fit_intercept
661        self.memory = memory
662        self.memory_level = memory_level
663        self.max_iter = max_iter
664        self.tol = tol
665        self.verbose = verbose
666        self.standardize = standardize
667        self.n_jobs = n_jobs
668        self.cv = cv
669        self.screening_percentile = screening_percentile
670        self.debias = debias
671        self.low_pass = low_pass
672        self.high_pass = high_pass
673        self.t_r = t_r
674        self.target_affine = target_affine
675        self.target_shape = target_shape
676        self.mask_args = mask_args
677
678        # sanity check on params
679        self.check_params()
680
681    def check_params(self):
682        """Makes sure parameters are sane"""
683        if self.l1_ratios is not None:
684            l1_ratios = self.l1_ratios
685            if isinstance(l1_ratios, numbers.Number):
686                l1_ratios = [l1_ratios]
687            for l1_ratio in l1_ratios:
688                if not 0 <= l1_ratio <= 1.:
689                    raise ValueError(
690                        "l1_ratio must be in the interval [0, 1]; got %g" % (
691                            l1_ratio))
692                elif l1_ratio == 0. or l1_ratio == 1.:
693                    warnings.warn(
694                        ("Specified l1_ratio = %g. It's advised to only "
695                         "specify values of l1_ratio strictly between 0 "
696                         "and 1." % l1_ratio))
697        if not (0. <= self.screening_percentile <= 100.):
698            raise ValueError(
699                ("screening_percentile should be in the interval"
700                 " [0, 100], got %g" % self.screening_percentile))
701        if self.penalty not in self.SUPPORTED_PENALTIES:
702            raise ValueError(
703                "'penalty' parameter must be one of %s%s or %s; got %s" % (
704                    ",".join(self.SUPPORTED_PENALTIES[:-1]), "," if len(
705                        self.SUPPORTED_PENALTIES) > 2 else "",
706                    self.SUPPORTED_PENALTIES[-1], self.penalty))
707        if not (self.loss is None or self.loss in self.SUPPORTED_LOSSES):
708            raise ValueError(
709                "'loss' parameter must be one of %s%s or %s; got %s" % (
710                    ",".join(self.SUPPORTED_LOSSES[:-1]), "," if len(
711                        self.SUPPORTED_LOSSES) > 2 else "",
712                    self.SUPPORTED_LOSSES[-1], self.loss))
713        if self.loss is not None and not self.is_classif and (
714                self.loss == "logistic"):
715            raise ValueError(
716                ("'logistic' loss is only available for classification "
717                 "problems."))
718
719    def _set_coef_and_intercept(self, w):
720        """Sets the loadings vector (coef) and the intercept of the fitted
721        model."""
722        self.w_ = np.array(w)
723        if self.w_.ndim == 1:
724            self.w_ = self.w_[np.newaxis, :]
725        self.coef_ = self.w_[:, :-1]
726        if self.is_classif:
727            self.intercept_ = self.w_[:, -1]
728        else:
729            self._set_intercept(self.Xmean_, self.ymean_, self.Xstd_)
730
731    def fit(self, X, y):
732        """Fit the learner
733
734        Parameters
735        ----------
736        X : list of Niimg-like objects
737            See http://nilearn.github.io/manipulating_images/input_output.html
738            Data on which model is to be fitted. If this is a list,
739            the affine is considered the same for all.
740
741        y : array or list of length n_samples
742            The dependent variable (age, sex, QI, etc.).
743
744        Notes
745        -----
746        self : `SpaceNet` object
747            Model selection is via cross-validation with bagging.
748        """
749        # misc
750        self.check_params()
751        if self.memory is None or isinstance(self.memory, str):
752            self.memory_ = Memory(self.memory,
753                                  verbose=max(0, self.verbose - 1))
754        else:
755            self.memory_ = self.memory
756        if self.verbose:
757            tic = time.time()
758
759        # nifti masking
760        self.masker_ = check_embedded_nifti_masker(self, multi_subject=False)
761        X = self.masker_.fit_transform(X)
762
763        X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=float,
764                         multi_output=True, y_numeric=not self.is_classif)
765
766        if not self.is_classif and np.all(np.diff(y) == 0.):
767            raise ValueError("The given input y must have at least 2 targets"
768                             " to do regression analysis. You provided only"
769                             " one target {0}".format(np.unique(y)))
770
771        # misc
772        self.Xmean_ = X.mean(axis=0)
773        self.Xstd_ = X.std(axis=0)
774        self.Xstd_[self.Xstd_ < 1e-8] = 1
775        self.mask_img_ = self.masker_.mask_img_
776        self.mask_ = get_data(self.mask_img_).astype(bool)
777        n_samples, _ = X.shape
778        y = np.array(y).copy()
779        l1_ratios = self.l1_ratios
780        if isinstance(l1_ratios, numbers.Number):
781            l1_ratios = [l1_ratios]
782        alphas = self.alphas
783        if isinstance(alphas, numbers.Number):
784            alphas = [alphas]
785        if self.loss is not None:
786            loss = self.loss
787        elif self.is_classif:
788            loss = "logistic"
789        else:
790            loss = "mse"
791
792        # set backend solver
793        if self.penalty.lower() == "graph-net":
794            if not self.is_classif or loss == "mse":
795                solver = _graph_net_squared_loss
796            else:
797                solver = _graph_net_logistic
798        else:
799            if not self.is_classif or loss == "mse":
800                solver = partial(tvl1_solver, loss="mse")
801            else:
802                solver = partial(tvl1_solver, loss="logistic")
803
804        # generate fold indices
805        case1 = (None in [alphas, l1_ratios]) and self.n_alphas > 1
806        case2 = (alphas is not None) and min(len(l1_ratios), len(alphas)) > 1
807        if case1 or case2:
808            self.cv_ = list(check_cv(
809                self.cv, y=y, classifier=self.is_classif).split(X, y))
810        else:
811            # no cross-validation needed, user supplied all params
812            self.cv_ = [(np.arange(n_samples), [])]
813        n_folds = len(self.cv_)
814
815        # number of problems to solve
816        if self.is_classif:
817            y = self._binarize_y(y)
818        else:
819            y = y[:, np.newaxis]
820        if self.is_classif and self.n_classes_ > 2:
821            n_problems = self.n_classes_
822        else:
823            n_problems = 1
824
825        # standardize y
826        self.ymean_ = np.zeros(y.shape[0])
827        if n_problems == 1:
828            y = y[:, 0]
829
830        # scores & mean weights map over all folds
831        self.cv_scores_ = [[] for i in range(n_problems)]
832        w = np.zeros((n_problems, X.shape[1] + 1))
833        self.all_coef_ = np.ndarray((n_problems, n_folds, X.shape[1]))
834
835        self.screening_percentile_ = _adjust_screening_percentile(
836            self.screening_percentile, self.mask_img_, verbose=self.verbose)
837
838        # main loop: loop on classes and folds
839        solver_params = dict(tol=self.tol, max_iter=self.max_iter)
840        self.best_model_params_ = []
841        self.alpha_grids_ = []
842        for (test_scores, best_w, best_alpha, best_l1_ratio, alphas,
843             y_train_mean, (cls, fold)) in Parallel(
844            n_jobs=self.n_jobs, verbose=2 * self.verbose)(
845                delayed(self._cache(path_scores, func_memory_level=2))(
846                    solver, X, y[:, cls] if n_problems > 1 else y,
847                    self.mask_, alphas, l1_ratios, self.cv_[fold][0],
848                    self.cv_[fold][1], solver_params, n_alphas=self.n_alphas,
849                    eps=self.eps, is_classif=self.loss == "logistic",
850                    key=(cls, fold), debias=self.debias,
851                    verbose=self.verbose,
852                    screening_percentile=self.screening_percentile_,
853                ) for cls in range(n_problems) for fold in range(n_folds)):
854            self.best_model_params_.append((best_alpha, best_l1_ratio))
855            self.alpha_grids_.append(alphas)
856            self.ymean_[cls] += y_train_mean
857            self.all_coef_[cls, fold] = best_w[:-1]
858            if len(np.atleast_1d(l1_ratios)) == 1:
859                test_scores = test_scores[0]
860            self.cv_scores_[cls].append(test_scores)
861            w[cls] += best_w
862
863        # misc
864        self.cv_scores_ = np.array(self.cv_scores_)
865        self.best_model_params_ = np.array(self.best_model_params_)
866        self.alpha_grids_ = np.array(self.alpha_grids_)
867        self.ymean_ /= n_folds
868        if not self.is_classif:
869            self.all_coef_ = np.array(self.all_coef_)
870            w = w[0]
871            self.ymean_ = self.ymean_[0]
872
873        # bagging: average best weights maps over folds
874        w /= n_folds
875
876        # set coefs and intercepts
877        self._set_coef_and_intercept(w)
878
879        # unmask weights map as a niimg
880        self.coef_img_ = self.masker_.inverse_transform(self.coef_)
881
882        # report time elapsed
883        if self.verbose:
884            duration = time.time() - tic
885            print("Time Elapsed: %g seconds, %i minutes." % (
886                duration, duration / 60.))
887
888        return self
889
890    def decision_function(self, X):
891        """Predict confidence scores for samples
892
893        The confidence score for a sample is the signed distance of that
894        sample to the hyperplane.
895
896        Parameters
897        ----------
898        X : {array-like, sparse matrix}, shape = (n_samples, n_features)
899            Samples.
900
901        Returns
902        -------
903        array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
904            Confidence scores per (sample, class) combination. In the binary
905            case, confidence score for `self.classes_[1]` where >0 means this
906            class would be predicted.
907        """
908        # handle regression (least-squared loss)
909        if not self.is_classif:
910            raise ValueError(
911                'There is no decision_function in classification')
912
913        X = check_array(X)
914        n_features = self.coef_.shape[1]
915        if X.shape[1] != n_features:
916            raise ValueError("X has %d features per sample; expecting %d"
917                             % (X.shape[1], n_features))
918
919        scores = safe_sparse_dot(X, self.coef_.T,
920                                 dense_output=True) + self.intercept_
921        return scores.ravel() if scores.shape[1] == 1 else scores
922
923    def predict(self, X):
924        """Predict class labels for samples in X.
925
926        Parameters
927        ----------
928        X : list of Niimg-like objects
929            See http://nilearn.github.io/manipulating_images/input_output.html
930            Data on prediction is to be made. If this is a list,
931            the affine is considered the same for all.
932
933        Returns
934        -------
935        y_pred : ndarray, shape (n_samples,)
936            Predicted class label per sample.
937        """
938        # cast X into usual 2D array
939        if not hasattr(self, "masker_"):
940            raise RuntimeError("This %s instance is not fitted yet!" % (
941                self.__class__.__name__))
942        X = self.masker_.transform(X)
943
944        # handle regression (least-squared loss)
945        if not self.is_classif:
946            return LinearRegression.predict(self, X)
947
948        # prediction proper
949        scores = self.decision_function(X)
950        if len(scores.shape) == 1:
951            indices = (scores > 0).astype(int)
952        else:
953            indices = scores.argmax(axis=1)
954        return self.classes_[indices]
955
956
957@fill_doc
958class SpaceNetClassifier(BaseSpaceNet):
959    """Classification learners with sparsity and spatial priors.
960
961    `SpaceNetClassifier` implements Graph-Net and TV-L1
962    priors / penalties for classification problems. Thus, the penalty
963    is a sum an L1 term and a spatial term. The aim of such a hybrid prior
964    is to obtain weights maps which are structured (due to the spatial
965    prior) and sparse (enforced by L1 norm).
966
967    Parameters
968    ----------
969    penalty : string, optional (default 'graph-net')
970        Penalty to used in the model. Can be 'graph-net' or 'tv-l1'.
971
972    loss : string, optional (default "logistic")
973        Loss to be used in the classifier. Must be one of "mse", or "logistic".
974
975    l1_ratios : float or list of floats in the interval [0, 1]; optional (default .5)
976        Constant that mixes L1 and spatial prior terms in penalization.
977        l1_ratio == 1 corresponds to pure LASSO. The larger the value of this
978        parameter, the sparser the estimated weights map. If list is provided,
979        then the best value will be selected by cross-validation.
980
981    alphas : float or list of floats, optional (default None)
982        Choices for the constant that scales the overall regularization term.
983        This parameter is mutually exclusive with the `n_alphas` parameter.
984        If None or list of floats is provided, then the best value will be
985        selected by cross-validation.
986
987    n_alphas : int, optional (default 10).
988        Generate this number of alphas per regularization path.
989        This parameter is mutually exclusive with the `alphas` parameter.
990
991    eps : float, optional (default 1e-3)
992        Length of the path. For example, ``eps=1e-3`` means that
993        ``alpha_min / alpha_max = 1e-3``.
994
995    mask : filename, niimg, NiftiMasker instance, optional (default None)
996        Mask to be used on data. If an instance of masker is passed,
997        then its mask will be used. If no mask is it will be computed
998        automatically by a MultiNiftiMasker with default parameters.
999    %(target_affine)s
1000    %(target_shape)s
1001    %(low_pass)s
1002    %(high_pass)s
1003    %(t_r)s
1004    screening_percentile : float in the interval [0, 100]; Optional (default 20)
1005        Percentile value for ANOVA univariate feature selection. A value of
1006        100 means 'keep all features'. This percentile is is expressed
1007        w.r.t the volume of a standard (MNI152) brain, and so is corrected
1008        at runtime by premultiplying it with the ratio of the volume of the
1009        mask of the data and volume of a standard brain.  If '100' is given,
1010        all the features are used, regardless of the number of voxels.
1011
1012    standardize : bool, optional (default True):
1013        If set, then we'll center the data (X, y) have mean zero along axis 0.
1014        This is here because nearly all linear models will want their data
1015        to be centered.
1016
1017    fit_intercept : bool, optional (default True)
1018        Fit or not an intercept.
1019
1020    max_iter : int (default 200)
1021        Defines the iterations for the solver.
1022
1023    tol : float
1024        Defines the tolerance for convergence. Defaults to 1e-4.
1025    %(verbose)s
1026    %(n_jobs)s
1027    %(memory)s
1028    %(memory_level1)s
1029    cv : int, a cv generator instance, or None (default 8)
1030        The input specifying which cross-validation generator to use.
1031        It can be an integer, in which case it is the number of folds in a
1032        KFold, None, in which case 3 fold is used, or another object, that
1033        will then be used as a cv generator.
1034
1035    debias : bool, optional (default False)
1036        If set, then the estimated weights maps will be debiased.
1037
1038    Attributes
1039    ----------
1040    `all_coef_` : ndarray, shape (n_l1_ratios, n_folds, n_features)
1041        Coefficients for all folds and features.
1042
1043    `alpha_grids_` : ndarray, shape (n_folds, n_alphas)
1044        Alpha values considered for selection of the best ones
1045        (saved in `best_model_params_`)
1046
1047    `best_model_params_` : ndarray, shape (n_folds, n_parameter)
1048        Best model parameters (alpha, l1_ratio) saved for the different
1049        cross-validation folds.
1050
1051    `classes_` : ndarray of labels (`n_classes_`)
1052        Labels of the classes
1053
1054    `n_classes_` : int
1055        Number of classes
1056
1057    `coef_` : ndarray, shape
1058        (1, n_features) for 2 class classification problems (i.e n_classes = 2)
1059        (n_classes, n_features) for n_classes > 2
1060        Coefficient of the features in the decision function.
1061
1062    `coef_img_` : nifti image
1063        Masked model coefficients
1064
1065    `mask_` : ndarray 3D
1066        An array contains values of the mask image.
1067
1068    `masker_` : instance of NiftiMasker
1069        The nifti masker used to mask the data.
1070
1071    `mask_img_` : Nifti like image
1072        The mask of the data. If no mask was supplied by the user,
1073        this attribute is the mask image computed automatically from the
1074        data `X`.
1075
1076    `memory_` : joblib memory cache
1077
1078    `intercept_` : narray, shape
1079        (1, ) for 2 class classification problems (i.e n_classes = 2)
1080        (n_classes, ) for n_classes > 2
1081        Intercept (a.k.a. bias) added to the decision function.
1082        It is available only when parameter intercept is set to True.
1083
1084    `cv_` : list of pairs of lists
1085        Each pair is the list of indices for the train and test
1086        samples for the corresponding fold.
1087
1088    `cv_scores_` : ndarray, shape (n_folds, n_alphas) or (n_l1_ratios, n_folds, n_alphas)
1089        Scores (misclassification) for each alpha, and on each fold
1090
1091    `screening_percentile_` : float
1092        Screening percentile corrected according to volume of mask,
1093        relative to the volume of standard brain.
1094
1095    `w_` : ndarray, shape
1096        (1, n_features + 1) for 2 class classification problems (i.e n_classes = 2)
1097        (n_classes, n_features + 1) for n_classes > 2
1098        Model weights
1099
1100    `ymean_` : array, shape (n_samples,)
1101        Mean of prediction targets
1102
1103    `Xmean_` : array, shape (n_features,)
1104        Mean of X across samples
1105
1106    `Xstd_` : array, shape (n_features,)
1107        Standard deviation of X across samples
1108
1109    See Also
1110    --------
1111    nilearn.decoding.SpaceNetRegressor: Graph-Net and TV-L1 priors/penalties
1112
1113    """
1114
1115    def __init__(self, penalty="graph-net", loss="logistic",
1116                 l1_ratios=.5, alphas=None, n_alphas=10, mask=None,
1117                 target_affine=None, target_shape=None, low_pass=None,
1118                 high_pass=None, t_r=None, max_iter=200, tol=1e-4,
1119                 memory=Memory(None), memory_level=1, standardize=True,
1120                 verbose=1, n_jobs=1, eps=1e-3,
1121                 cv=8, fit_intercept=True, screening_percentile=20.,
1122                 debias=False):
1123        super(SpaceNetClassifier, self).__init__(
1124            penalty=penalty, is_classif=True, l1_ratios=l1_ratios,
1125            alphas=alphas, n_alphas=n_alphas, target_shape=target_shape,
1126            low_pass=low_pass, high_pass=high_pass, mask=mask, t_r=t_r,
1127            max_iter=max_iter, tol=tol, memory=memory,
1128            memory_level=memory_level,
1129            n_jobs=n_jobs, eps=eps, cv=cv, debias=debias,
1130            fit_intercept=fit_intercept, standardize=standardize,
1131            screening_percentile=screening_percentile, loss=loss,
1132            target_affine=target_affine, verbose=verbose)
1133
1134    def _binarize_y(self, y):
1135        """Helper function invoked just before fitting a classifier."""
1136        y = np.array(y)
1137
1138        # encode target classes as -1 and 1
1139        self._enc = LabelBinarizer(pos_label=1, neg_label=-1)
1140        y = self._enc.fit_transform(y)
1141        self.classes_ = self._enc.classes_
1142        self.n_classes_ = len(self.classes_)
1143        return y
1144
1145    def score(self, X, y):
1146        """Returns the mean accuracy on the given test data and labels.
1147
1148        Parameters
1149        ----------
1150        X : list of Niimg-like objects
1151            See http://nilearn.github.io/manipulating_images/input_output.html
1152            Data on which model is to be fitted. If this is a list,
1153            the affine is considered the same for all.
1154
1155        y : array or list of length n_samples.
1156            Labels.
1157
1158        Returns
1159        -------
1160        score : float
1161            Mean accuracy of self.predict(X)  w.r.t y.
1162        """
1163        return accuracy_score(y, self.predict(X))
1164
1165
1166@fill_doc
1167class SpaceNetRegressor(BaseSpaceNet):
1168    """Regression learners with sparsity and spatial priors.
1169
1170    `SpaceNetRegressor` implements Graph-Net and TV-L1 priors / penalties
1171    for regression problems. Thus, the penalty is a sum an L1 term and a
1172    spatial term. The aim of such a hybrid prior is to obtain weights maps
1173    which are structured (due to the spatial prior) and sparse (enforced
1174    by L1 norm).
1175
1176    Parameters
1177    ----------
1178    penalty : string, optional (default 'graph-net')
1179        Penalty to used in the model. Can be 'graph-net' or 'tv-l1'.
1180
1181    l1_ratios : float or list of floats in the interval [0, 1]; optional (default .5)
1182        Constant that mixes L1 and spatial prior terms in penalization.
1183        l1_ratio == 1 corresponds to pure LASSO. The larger the value of this
1184        parameter, the sparser the estimated weights map. If list is provided,
1185        then the best value will be selected by cross-validation.
1186
1187    alphas : float or list of floats, optional (default None)
1188        Choices for the constant that scales the overall regularization term.
1189        This parameter is mutually exclusive with the `n_alphas` parameter.
1190        If None or list of floats is provided, then the best value will be
1191        selected by cross-validation.
1192
1193    n_alphas : int, optional (default 10).
1194        Generate this number of alphas per regularization path.
1195        This parameter is mutually exclusive with the `alphas` parameter.
1196
1197    eps : float, optional (default 1e-3)
1198        Length of the path. For example, ``eps=1e-3`` means that
1199        ``alpha_min / alpha_max = 1e-3``
1200
1201    mask : filename, niimg, NiftiMasker instance, optional (default None)
1202        Mask to be used on data. If an instance of masker is passed,
1203        then its mask will be used. If no mask is it will be computed
1204        automatically by a MultiNiftiMasker with default parameters.
1205    %(target_affine)s
1206    %(target_shape)s
1207    %(low_pass)s
1208    %(high_pass)s
1209    %(t_r)s
1210    screening_percentile : float in the interval [0, 100]; Optional (default 20)
1211        Percentile value for ANOVA univariate feature selection. A value of
1212        100 means 'keep all features'. This percentile is is expressed
1213        w.r.t the volume of a standard (MNI152) brain, and so is corrected
1214        at runtime to correspond to the volume of the user-supplied mask
1215        (which is typically smaller).
1216
1217    standardize : bool, optional (default True):
1218        If set, then we'll center the data (X, y) have mean zero along axis 0.
1219        This is here because nearly all linear models will want their data
1220        to be centered.
1221
1222    fit_intercept : bool, optional (default True)
1223        Fit or not an intercept.
1224
1225    max_iter : int (default 200)
1226        Defines the iterations for the solver.
1227
1228    tol : float
1229        Defines the tolerance for convergence. Defaults to 1e-4.
1230    %(verbose)s
1231    %(n_jobs)s
1232    %(memory)s
1233    %(memory_level1)s
1234    cv : int, a cv generator instance, or None (default 8)
1235        The input specifying which cross-validation generator to use.
1236        It can be an integer, in which case it is the number of folds in a
1237        KFold, None, in which case 3 fold is used, or another object, that
1238        will then be used as a cv generator.
1239
1240    debias: bool, optional (default False)
1241        If set, then the estimated weights maps will be debiased.
1242
1243    Attributes
1244    ----------
1245    `all_coef_` : ndarray, shape (n_l1_ratios, n_folds, n_features)
1246        Coefficients for all folds and features.
1247
1248    `alpha_grids_` : ndarray, shape (n_folds, n_alphas)
1249        Alpha values considered for selection of the best ones
1250        (saved in `best_model_params_`)
1251
1252    `best_model_params_` : ndarray, shape (n_folds, n_parameter)
1253        Best model parameters (alpha, l1_ratio) saved for the different
1254        cross-validation folds.
1255
1256    `coef_` : ndarray, shape (n_features,)
1257        Coefficient of the features in the decision function.
1258
1259    `coef_img_` : nifti image
1260        Masked model coefficients
1261
1262    `mask_` : ndarray 3D
1263        An array contains values of the mask image.
1264
1265    `masker_` : instance of NiftiMasker
1266        The nifti masker used to mask the data.
1267
1268    `mask_img_` : Nifti like image
1269        The mask of the data. If no mask was supplied by the user, this
1270        attribute is the mask image computed automatically from the data `X`.
1271
1272    `memory_` : joblib memory cache
1273
1274    `intercept_` : narray, shape (1)
1275        Intercept (a.k.a. bias) added to the decision function.
1276        It is available only when parameter intercept is set to True.
1277
1278    `cv_` : list of pairs of lists
1279        Each pair is the list of indices for the train and test
1280        samples for the corresponding fold.
1281
1282    `cv_scores_` : ndarray, shape (n_folds, n_alphas) or (n_l1_ratios, n_folds, n_alphas)
1283        Scores (misclassification) for each alpha, and on each fold
1284
1285    `screening_percentile_` : float
1286        Screening percentile corrected according to volume of mask,
1287        relative to the volume of standard brain.
1288
1289    `w_` : ndarray, shape (n_features,)
1290        Model weights
1291
1292    `ymean_` : array, shape (n_samples,)
1293        Mean of prediction targets
1294
1295    `Xmean_` : array, shape (n_features,)
1296        Mean of X across samples
1297
1298    `Xstd_` : array, shape (n_features,)
1299        Standard deviation of X across samples
1300
1301    See Also
1302    --------
1303    nilearn.decoding.SpaceNetClassifier: Graph-Net and TV-L1 priors/penalties
1304
1305    """
1306
1307    def __init__(self, penalty="graph-net", l1_ratios=.5, alphas=None,
1308                 n_alphas=10, mask=None, target_affine=None,
1309                 target_shape=None, low_pass=None, high_pass=None, t_r=None,
1310                 max_iter=200, tol=1e-4, memory=Memory(None), memory_level=1,
1311                 standardize=True, verbose=1, n_jobs=1, eps=1e-3, cv=8,
1312                 fit_intercept=True, screening_percentile=20., debias=False):
1313        super(SpaceNetRegressor, self).__init__(
1314            penalty=penalty, is_classif=False, l1_ratios=l1_ratios,
1315            alphas=alphas, n_alphas=n_alphas, target_shape=target_shape,
1316            low_pass=low_pass, high_pass=high_pass, mask=mask, t_r=t_r,
1317            max_iter=max_iter, tol=tol, memory=memory,
1318            memory_level=memory_level,
1319            n_jobs=n_jobs, eps=eps, cv=cv, debias=debias,
1320            fit_intercept=fit_intercept, standardize=standardize,
1321            screening_percentile=screening_percentile,
1322            target_affine=target_affine, verbose=verbose)
1323