1"""
2The :mod:`sklearn.model_selection._validation` module includes classes and
3functions to validate the model.
4"""
5
6# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
7#         Gael Varoquaux <gael.varoquaux@normalesup.org>
8#         Olivier Grisel <olivier.grisel@ensta.org>
9#         Raghav RV <rvraghav93@gmail.com>
10#         Michal Karbownik <michakarbownik@gmail.com>
11# License: BSD 3 clause
12
13
14import warnings
15import numbers
16import time
17from traceback import format_exc
18from contextlib import suppress
19from collections import Counter
20
21import numpy as np
22import scipy.sparse as sp
23from joblib import Parallel, logger
24
25from ..base import is_classifier, clone
26from ..utils import indexable, check_random_state, _safe_indexing
27from ..utils.validation import _check_fit_params
28from ..utils.validation import _num_samples
29from ..utils.fixes import delayed
30from ..utils.metaestimators import _safe_split
31from ..metrics import check_scoring
32from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
33from ..exceptions import FitFailedWarning, NotFittedError
34from ._split import check_cv
35from ..preprocessing import LabelEncoder
36
37
38__all__ = [
39    "cross_validate",
40    "cross_val_score",
41    "cross_val_predict",
42    "permutation_test_score",
43    "learning_curve",
44    "validation_curve",
45]
46
47
48def cross_validate(
49    estimator,
50    X,
51    y=None,
52    *,
53    groups=None,
54    scoring=None,
55    cv=None,
56    n_jobs=None,
57    verbose=0,
58    fit_params=None,
59    pre_dispatch="2*n_jobs",
60    return_train_score=False,
61    return_estimator=False,
62    error_score=np.nan,
63):
64    """Evaluate metric(s) by cross-validation and also record fit/score times.
65
66    Read more in the :ref:`User Guide <multimetric_cross_validation>`.
67
68    Parameters
69    ----------
70    estimator : estimator object implementing 'fit'
71        The object to use to fit the data.
72
73    X : array-like of shape (n_samples, n_features)
74        The data to fit. Can be for example a list, or an array.
75
76    y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
77            default=None
78        The target variable to try to predict in the case of
79        supervised learning.
80
81    groups : array-like of shape (n_samples,), default=None
82        Group labels for the samples used while splitting the dataset into
83        train/test set. Only used in conjunction with a "Group" :term:`cv`
84        instance (e.g., :class:`GroupKFold`).
85
86    scoring : str, callable, list, tuple, or dict, default=None
87        Strategy to evaluate the performance of the cross-validated model on
88        the test set.
89
90        If `scoring` represents a single score, one can use:
91
92        - a single string (see :ref:`scoring_parameter`);
93        - a callable (see :ref:`scoring`) that returns a single value.
94
95        If `scoring` represents multiple scores, one can use:
96
97        - a list or tuple of unique strings;
98        - a callable returning a dictionary where the keys are the metric
99          names and the values are the metric scores;
100        - a dictionary with metric names as keys and callables a values.
101
102        See :ref:`multimetric_grid_search` for an example.
103
104    cv : int, cross-validation generator or an iterable, default=None
105        Determines the cross-validation splitting strategy.
106        Possible inputs for cv are:
107
108        - None, to use the default 5-fold cross validation,
109        - int, to specify the number of folds in a `(Stratified)KFold`,
110        - :term:`CV splitter`,
111        - An iterable yielding (train, test) splits as arrays of indices.
112
113        For int/None inputs, if the estimator is a classifier and ``y`` is
114        either binary or multiclass, :class:`StratifiedKFold` is used. In all
115        other cases, :class:`.Fold` is used. These splitters are instantiated
116        with `shuffle=False` so the splits will be the same across calls.
117
118        Refer :ref:`User Guide <cross_validation>` for the various
119        cross-validation strategies that can be used here.
120
121        .. versionchanged:: 0.22
122            ``cv`` default value if None changed from 3-fold to 5-fold.
123
124    n_jobs : int, default=None
125        Number of jobs to run in parallel. Training the estimator and computing
126        the score are parallelized over the cross-validation splits.
127        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
128        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
129        for more details.
130
131    verbose : int, default=0
132        The verbosity level.
133
134    fit_params : dict, default=None
135        Parameters to pass to the fit method of the estimator.
136
137    pre_dispatch : int or str, default='2*n_jobs'
138        Controls the number of jobs that get dispatched during parallel
139        execution. Reducing this number can be useful to avoid an
140        explosion of memory consumption when more jobs get dispatched
141        than CPUs can process. This parameter can be:
142
143            - None, in which case all the jobs are immediately
144              created and spawned. Use this for lightweight and
145              fast-running jobs, to avoid delays due to on-demand
146              spawning of the jobs
147
148            - An int, giving the exact number of total jobs that are
149              spawned
150
151            - A str, giving an expression as a function of n_jobs,
152              as in '2*n_jobs'
153
154    return_train_score : bool, default=False
155        Whether to include train scores.
156        Computing training scores is used to get insights on how different
157        parameter settings impact the overfitting/underfitting trade-off.
158        However computing the scores on the training set can be computationally
159        expensive and is not strictly required to select the parameters that
160        yield the best generalization performance.
161
162        .. versionadded:: 0.19
163
164        .. versionchanged:: 0.21
165            Default value was changed from ``True`` to ``False``
166
167    return_estimator : bool, default=False
168        Whether to return the estimators fitted on each split.
169
170        .. versionadded:: 0.20
171
172    error_score : 'raise' or numeric, default=np.nan
173        Value to assign to the score if an error occurs in estimator fitting.
174        If set to 'raise', the error is raised.
175        If a numeric value is given, FitFailedWarning is raised.
176
177        .. versionadded:: 0.20
178
179    Returns
180    -------
181    scores : dict of float arrays of shape (n_splits,)
182        Array of scores of the estimator for each run of the cross validation.
183
184        A dict of arrays containing the score/time arrays for each scorer is
185        returned. The possible keys for this ``dict`` are:
186
187            ``test_score``
188                The score array for test scores on each cv split.
189                Suffix ``_score`` in ``test_score`` changes to a specific
190                metric like ``test_r2`` or ``test_auc`` if there are
191                multiple scoring metrics in the scoring parameter.
192            ``train_score``
193                The score array for train scores on each cv split.
194                Suffix ``_score`` in ``train_score`` changes to a specific
195                metric like ``train_r2`` or ``train_auc`` if there are
196                multiple scoring metrics in the scoring parameter.
197                This is available only if ``return_train_score`` parameter
198                is ``True``.
199            ``fit_time``
200                The time for fitting the estimator on the train
201                set for each cv split.
202            ``score_time``
203                The time for scoring the estimator on the test set for each
204                cv split. (Note time for scoring on the train set is not
205                included even if ``return_train_score`` is set to ``True``
206            ``estimator``
207                The estimator objects for each cv split.
208                This is available only if ``return_estimator`` parameter
209                is set to ``True``.
210
211    Examples
212    --------
213    >>> from sklearn import datasets, linear_model
214    >>> from sklearn.model_selection import cross_validate
215    >>> from sklearn.metrics import make_scorer
216    >>> from sklearn.metrics import confusion_matrix
217    >>> from sklearn.svm import LinearSVC
218    >>> diabetes = datasets.load_diabetes()
219    >>> X = diabetes.data[:150]
220    >>> y = diabetes.target[:150]
221    >>> lasso = linear_model.Lasso()
222
223    Single metric evaluation using ``cross_validate``
224
225    >>> cv_results = cross_validate(lasso, X, y, cv=3)
226    >>> sorted(cv_results.keys())
227    ['fit_time', 'score_time', 'test_score']
228    >>> cv_results['test_score']
229    array([0.33150734, 0.08022311, 0.03531764])
230
231    Multiple metric evaluation using ``cross_validate``
232    (please refer the ``scoring`` parameter doc for more information)
233
234    >>> scores = cross_validate(lasso, X, y, cv=3,
235    ...                         scoring=('r2', 'neg_mean_squared_error'),
236    ...                         return_train_score=True)
237    >>> print(scores['test_neg_mean_squared_error'])
238    [-3635.5... -3573.3... -6114.7...]
239    >>> print(scores['train_r2'])
240    [0.28010158 0.39088426 0.22784852]
241
242    See Also
243    ---------
244    cross_val_score : Run cross-validation for single metric evaluation.
245
246    cross_val_predict : Get predictions from each split of cross-validation for
247        diagnostic purposes.
248
249    sklearn.metrics.make_scorer : Make a scorer from a performance metric or
250        loss function.
251
252    """
253    X, y, groups = indexable(X, y, groups)
254
255    cv = check_cv(cv, y, classifier=is_classifier(estimator))
256
257    if callable(scoring):
258        scorers = scoring
259    elif scoring is None or isinstance(scoring, str):
260        scorers = check_scoring(estimator, scoring)
261    else:
262        scorers = _check_multimetric_scoring(estimator, scoring)
263
264    # We clone the estimator to make sure that all the folds are
265    # independent, and that it is pickle-able.
266    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
267    results = parallel(
268        delayed(_fit_and_score)(
269            clone(estimator),
270            X,
271            y,
272            scorers,
273            train,
274            test,
275            verbose,
276            None,
277            fit_params,
278            return_train_score=return_train_score,
279            return_times=True,
280            return_estimator=return_estimator,
281            error_score=error_score,
282        )
283        for train, test in cv.split(X, y, groups)
284    )
285
286    _warn_about_fit_failures(results, error_score)
287
288    # For callabe scoring, the return type is only know after calling. If the
289    # return type is a dictionary, the error scores can now be inserted with
290    # the correct key.
291    if callable(scoring):
292        _insert_error_scores(results, error_score)
293
294    results = _aggregate_score_dicts(results)
295
296    ret = {}
297    ret["fit_time"] = results["fit_time"]
298    ret["score_time"] = results["score_time"]
299
300    if return_estimator:
301        ret["estimator"] = results["estimator"]
302
303    test_scores_dict = _normalize_score_results(results["test_scores"])
304    if return_train_score:
305        train_scores_dict = _normalize_score_results(results["train_scores"])
306
307    for name in test_scores_dict:
308        ret["test_%s" % name] = test_scores_dict[name]
309        if return_train_score:
310            key = "train_%s" % name
311            ret[key] = train_scores_dict[name]
312
313    return ret
314
315
316def _insert_error_scores(results, error_score):
317    """Insert error in `results` by replacing them inplace with `error_score`.
318
319    This only applies to multimetric scores because `_fit_and_score` will
320    handle the single metric case.
321    """
322    successful_score = None
323    failed_indices = []
324    for i, result in enumerate(results):
325        if result["fit_error"] is not None:
326            failed_indices.append(i)
327        elif successful_score is None:
328            successful_score = result["test_scores"]
329
330    if successful_score is None:
331        raise NotFittedError("All estimators failed to fit")
332
333    if isinstance(successful_score, dict):
334        formatted_error = {name: error_score for name in successful_score}
335        for i in failed_indices:
336            results[i]["test_scores"] = formatted_error.copy()
337            if "train_scores" in results[i]:
338                results[i]["train_scores"] = formatted_error.copy()
339
340
341def _normalize_score_results(scores, scaler_score_key="score"):
342    """Creates a scoring dictionary based on the type of `scores`"""
343    if isinstance(scores[0], dict):
344        # multimetric scoring
345        return _aggregate_score_dicts(scores)
346    # scaler
347    return {scaler_score_key: scores}
348
349
350def _warn_about_fit_failures(results, error_score):
351    fit_errors = [
352        result["fit_error"] for result in results if result["fit_error"] is not None
353    ]
354    if fit_errors:
355        num_failed_fits = len(fit_errors)
356        num_fits = len(results)
357        fit_errors_counter = Counter(fit_errors)
358        delimiter = "-" * 80 + "\n"
359        fit_errors_summary = "\n".join(
360            f"{delimiter}{n} fits failed with the following error:\n{error}"
361            for error, n in fit_errors_counter.items()
362        )
363
364        some_fits_failed_message = (
365            f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n"
366            "The score on these train-test partitions for these parameters"
367            f" will be set to {error_score}.\n"
368            "If these failures are not expected, you can try to debug them "
369            "by setting error_score='raise'.\n\n"
370            f"Below are more details about the failures:\n{fit_errors_summary}"
371        )
372        warnings.warn(some_fits_failed_message, FitFailedWarning)
373
374
375def cross_val_score(
376    estimator,
377    X,
378    y=None,
379    *,
380    groups=None,
381    scoring=None,
382    cv=None,
383    n_jobs=None,
384    verbose=0,
385    fit_params=None,
386    pre_dispatch="2*n_jobs",
387    error_score=np.nan,
388):
389    """Evaluate a score by cross-validation.
390
391    Read more in the :ref:`User Guide <cross_validation>`.
392
393    Parameters
394    ----------
395    estimator : estimator object implementing 'fit'
396        The object to use to fit the data.
397
398    X : array-like of shape (n_samples, n_features)
399        The data to fit. Can be for example a list, or an array.
400
401    y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
402            default=None
403        The target variable to try to predict in the case of
404        supervised learning.
405
406    groups : array-like of shape (n_samples,), default=None
407        Group labels for the samples used while splitting the dataset into
408        train/test set. Only used in conjunction with a "Group" :term:`cv`
409        instance (e.g., :class:`GroupKFold`).
410
411    scoring : str or callable, default=None
412        A str (see model evaluation documentation) or
413        a scorer callable object / function with signature
414        ``scorer(estimator, X, y)`` which should return only
415        a single value.
416
417        Similar to :func:`cross_validate`
418        but only a single metric is permitted.
419
420        If `None`, the estimator's default scorer (if available) is used.
421
422    cv : int, cross-validation generator or an iterable, default=None
423        Determines the cross-validation splitting strategy.
424        Possible inputs for cv are:
425
426        - `None`, to use the default 5-fold cross validation,
427        - int, to specify the number of folds in a `(Stratified)KFold`,
428        - :term:`CV splitter`,
429        - An iterable that generates (train, test) splits as arrays of indices.
430
431        For `int`/`None` inputs, if the estimator is a classifier and `y` is
432        either binary or multiclass, :class:`StratifiedKFold` is used. In all
433        other cases, :class:`KFold` is used. These splitters are instantiated
434        with `shuffle=False` so the splits will be the same across calls.
435
436        Refer :ref:`User Guide <cross_validation>` for the various
437        cross-validation strategies that can be used here.
438
439        .. versionchanged:: 0.22
440            `cv` default value if `None` changed from 3-fold to 5-fold.
441
442    n_jobs : int, default=None
443        Number of jobs to run in parallel. Training the estimator and computing
444        the score are parallelized over the cross-validation splits.
445        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
446        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
447        for more details.
448
449    verbose : int, default=0
450        The verbosity level.
451
452    fit_params : dict, default=None
453        Parameters to pass to the fit method of the estimator.
454
455    pre_dispatch : int or str, default='2*n_jobs'
456        Controls the number of jobs that get dispatched during parallel
457        execution. Reducing this number can be useful to avoid an
458        explosion of memory consumption when more jobs get dispatched
459        than CPUs can process. This parameter can be:
460
461            - ``None``, in which case all the jobs are immediately
462              created and spawned. Use this for lightweight and
463              fast-running jobs, to avoid delays due to on-demand
464              spawning of the jobs
465
466            - An int, giving the exact number of total jobs that are
467              spawned
468
469            - A str, giving an expression as a function of n_jobs,
470              as in '2*n_jobs'
471
472    error_score : 'raise' or numeric, default=np.nan
473        Value to assign to the score if an error occurs in estimator fitting.
474        If set to 'raise', the error is raised.
475        If a numeric value is given, FitFailedWarning is raised.
476
477        .. versionadded:: 0.20
478
479    Returns
480    -------
481    scores : ndarray of float of shape=(len(list(cv)),)
482        Array of scores of the estimator for each run of the cross validation.
483
484    Examples
485    --------
486    >>> from sklearn import datasets, linear_model
487    >>> from sklearn.model_selection import cross_val_score
488    >>> diabetes = datasets.load_diabetes()
489    >>> X = diabetes.data[:150]
490    >>> y = diabetes.target[:150]
491    >>> lasso = linear_model.Lasso()
492    >>> print(cross_val_score(lasso, X, y, cv=3))
493    [0.33150734 0.08022311 0.03531764]
494
495    See Also
496    ---------
497    cross_validate : To run cross-validation on multiple metrics and also to
498        return train scores, fit times and score times.
499
500    cross_val_predict : Get predictions from each split of cross-validation for
501        diagnostic purposes.
502
503    sklearn.metrics.make_scorer : Make a scorer from a performance metric or
504        loss function.
505    """
506    # To ensure multimetric format is not supported
507    scorer = check_scoring(estimator, scoring=scoring)
508
509    cv_results = cross_validate(
510        estimator=estimator,
511        X=X,
512        y=y,
513        groups=groups,
514        scoring={"score": scorer},
515        cv=cv,
516        n_jobs=n_jobs,
517        verbose=verbose,
518        fit_params=fit_params,
519        pre_dispatch=pre_dispatch,
520        error_score=error_score,
521    )
522    return cv_results["test_score"]
523
524
525def _fit_and_score(
526    estimator,
527    X,
528    y,
529    scorer,
530    train,
531    test,
532    verbose,
533    parameters,
534    fit_params,
535    return_train_score=False,
536    return_parameters=False,
537    return_n_test_samples=False,
538    return_times=False,
539    return_estimator=False,
540    split_progress=None,
541    candidate_progress=None,
542    error_score=np.nan,
543):
544
545    """Fit estimator and compute scores for a given dataset split.
546
547    Parameters
548    ----------
549    estimator : estimator object implementing 'fit'
550        The object to use to fit the data.
551
552    X : array-like of shape (n_samples, n_features)
553        The data to fit.
554
555    y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
556        The target variable to try to predict in the case of
557        supervised learning.
558
559    scorer : A single callable or dict mapping scorer name to the callable
560        If it is a single callable, the return value for ``train_scores`` and
561        ``test_scores`` is a single float.
562
563        For a dict, it should be one mapping the scorer name to the scorer
564        callable object / function.
565
566        The callable object / fn should have signature
567        ``scorer(estimator, X, y)``.
568
569    train : array-like of shape (n_train_samples,)
570        Indices of training samples.
571
572    test : array-like of shape (n_test_samples,)
573        Indices of test samples.
574
575    verbose : int
576        The verbosity level.
577
578    error_score : 'raise' or numeric, default=np.nan
579        Value to assign to the score if an error occurs in estimator fitting.
580        If set to 'raise', the error is raised.
581        If a numeric value is given, FitFailedWarning is raised.
582
583    parameters : dict or None
584        Parameters to be set on the estimator.
585
586    fit_params : dict or None
587        Parameters that will be passed to ``estimator.fit``.
588
589    return_train_score : bool, default=False
590        Compute and return score on training set.
591
592    return_parameters : bool, default=False
593        Return parameters that has been used for the estimator.
594
595    split_progress : {list, tuple} of int, default=None
596        A list or tuple of format (<current_split_id>, <total_num_of_splits>).
597
598    candidate_progress : {list, tuple} of int, default=None
599        A list or tuple of format
600        (<current_candidate_id>, <total_number_of_candidates>).
601
602    return_n_test_samples : bool, default=False
603        Whether to return the ``n_test_samples``.
604
605    return_times : bool, default=False
606        Whether to return the fit/score times.
607
608    return_estimator : bool, default=False
609        Whether to return the fitted estimator.
610
611    Returns
612    -------
613    result : dict with the following attributes
614        train_scores : dict of scorer name -> float
615            Score on training set (for all the scorers),
616            returned only if `return_train_score` is `True`.
617        test_scores : dict of scorer name -> float
618            Score on testing set (for all the scorers).
619        n_test_samples : int
620            Number of test samples.
621        fit_time : float
622            Time spent for fitting in seconds.
623        score_time : float
624            Time spent for scoring in seconds.
625        parameters : dict or None
626            The parameters that have been evaluated.
627        estimator : estimator object
628            The fitted estimator.
629        fit_error : str or None
630            Traceback str if the fit failed, None if the fit succeeded.
631    """
632    if not isinstance(error_score, numbers.Number) and error_score != "raise":
633        raise ValueError(
634            "error_score must be the string 'raise' or a numeric value. "
635            "(Hint: if using 'raise', please make sure that it has been "
636            "spelled correctly.)"
637        )
638
639    progress_msg = ""
640    if verbose > 2:
641        if split_progress is not None:
642            progress_msg = f" {split_progress[0]+1}/{split_progress[1]}"
643        if candidate_progress and verbose > 9:
644            progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}"
645
646    if verbose > 1:
647        if parameters is None:
648            params_msg = ""
649        else:
650            sorted_keys = sorted(parameters)  # Ensure deterministic o/p
651            params_msg = ", ".join(f"{k}={parameters[k]}" for k in sorted_keys)
652    if verbose > 9:
653        start_msg = f"[CV{progress_msg}] START {params_msg}"
654        print(f"{start_msg}{(80 - len(start_msg)) * '.'}")
655
656    # Adjust length of sample weights
657    fit_params = fit_params if fit_params is not None else {}
658    fit_params = _check_fit_params(X, fit_params, train)
659
660    if parameters is not None:
661        # clone after setting parameters in case any parameters
662        # are estimators (like pipeline steps)
663        # because pipeline doesn't clone steps in fit
664        cloned_parameters = {}
665        for k, v in parameters.items():
666            cloned_parameters[k] = clone(v, safe=False)
667
668        estimator = estimator.set_params(**cloned_parameters)
669
670    start_time = time.time()
671
672    X_train, y_train = _safe_split(estimator, X, y, train)
673    X_test, y_test = _safe_split(estimator, X, y, test, train)
674
675    result = {}
676    try:
677        if y_train is None:
678            estimator.fit(X_train, **fit_params)
679        else:
680            estimator.fit(X_train, y_train, **fit_params)
681
682    except Exception:
683        # Note fit time as time until error
684        fit_time = time.time() - start_time
685        score_time = 0.0
686        if error_score == "raise":
687            raise
688        elif isinstance(error_score, numbers.Number):
689            if isinstance(scorer, dict):
690                test_scores = {name: error_score for name in scorer}
691                if return_train_score:
692                    train_scores = test_scores.copy()
693            else:
694                test_scores = error_score
695                if return_train_score:
696                    train_scores = error_score
697        result["fit_error"] = format_exc()
698    else:
699        result["fit_error"] = None
700
701        fit_time = time.time() - start_time
702        test_scores = _score(estimator, X_test, y_test, scorer, error_score)
703        score_time = time.time() - start_time - fit_time
704        if return_train_score:
705            train_scores = _score(estimator, X_train, y_train, scorer, error_score)
706
707    if verbose > 1:
708        total_time = score_time + fit_time
709        end_msg = f"[CV{progress_msg}] END "
710        result_msg = params_msg + (";" if params_msg else "")
711        if verbose > 2:
712            if isinstance(test_scores, dict):
713                for scorer_name in sorted(test_scores):
714                    result_msg += f" {scorer_name}: ("
715                    if return_train_score:
716                        scorer_scores = train_scores[scorer_name]
717                        result_msg += f"train={scorer_scores:.3f}, "
718                    result_msg += f"test={test_scores[scorer_name]:.3f})"
719            else:
720                result_msg += ", score="
721                if return_train_score:
722                    result_msg += f"(train={train_scores:.3f}, test={test_scores:.3f})"
723                else:
724                    result_msg += f"{test_scores:.3f}"
725        result_msg += f" total time={logger.short_format_time(total_time)}"
726
727        # Right align the result_msg
728        end_msg += "." * (80 - len(end_msg) - len(result_msg))
729        end_msg += result_msg
730        print(end_msg)
731
732    result["test_scores"] = test_scores
733    if return_train_score:
734        result["train_scores"] = train_scores
735    if return_n_test_samples:
736        result["n_test_samples"] = _num_samples(X_test)
737    if return_times:
738        result["fit_time"] = fit_time
739        result["score_time"] = score_time
740    if return_parameters:
741        result["parameters"] = parameters
742    if return_estimator:
743        result["estimator"] = estimator
744    return result
745
746
747def _score(estimator, X_test, y_test, scorer, error_score="raise"):
748    """Compute the score(s) of an estimator on a given test set.
749
750    Will return a dict of floats if `scorer` is a dict, otherwise a single
751    float is returned.
752    """
753    if isinstance(scorer, dict):
754        # will cache method calls if needed. scorer() returns a dict
755        scorer = _MultimetricScorer(**scorer)
756
757    try:
758        if y_test is None:
759            scores = scorer(estimator, X_test)
760        else:
761            scores = scorer(estimator, X_test, y_test)
762    except Exception:
763        if error_score == "raise":
764            raise
765        else:
766            if isinstance(scorer, _MultimetricScorer):
767                scores = {name: error_score for name in scorer._scorers}
768            else:
769                scores = error_score
770            warnings.warn(
771                "Scoring failed. The score on this train-test partition for "
772                f"these parameters will be set to {error_score}. Details: \n"
773                f"{format_exc()}",
774                UserWarning,
775            )
776
777    error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)"
778    if isinstance(scores, dict):
779        for name, score in scores.items():
780            if hasattr(score, "item"):
781                with suppress(ValueError):
782                    # e.g. unwrap memmapped scalars
783                    score = score.item()
784            if not isinstance(score, numbers.Number):
785                raise ValueError(error_msg % (score, type(score), name))
786            scores[name] = score
787    else:  # scalar
788        if hasattr(scores, "item"):
789            with suppress(ValueError):
790                # e.g. unwrap memmapped scalars
791                scores = scores.item()
792        if not isinstance(scores, numbers.Number):
793            raise ValueError(error_msg % (scores, type(scores), scorer))
794    return scores
795
796
797def cross_val_predict(
798    estimator,
799    X,
800    y=None,
801    *,
802    groups=None,
803    cv=None,
804    n_jobs=None,
805    verbose=0,
806    fit_params=None,
807    pre_dispatch="2*n_jobs",
808    method="predict",
809):
810    """Generate cross-validated estimates for each input data point.
811
812    The data is split according to the cv parameter. Each sample belongs
813    to exactly one test set, and its prediction is computed with an
814    estimator fitted on the corresponding training set.
815
816    Passing these predictions into an evaluation metric may not be a valid
817    way to measure generalization performance. Results can differ from
818    :func:`cross_validate` and :func:`cross_val_score` unless all tests sets
819    have equal size and the metric decomposes over samples.
820
821    Read more in the :ref:`User Guide <cross_validation>`.
822
823    Parameters
824    ----------
825    estimator : estimator object implementing 'fit' and 'predict'
826        The object to use to fit the data.
827
828    X : array-like of shape (n_samples, n_features)
829        The data to fit. Can be, for example a list, or an array at least 2d.
830
831    y : array-like of shape (n_samples,) or (n_samples, n_outputs), \
832            default=None
833        The target variable to try to predict in the case of
834        supervised learning.
835
836    groups : array-like of shape (n_samples,), default=None
837        Group labels for the samples used while splitting the dataset into
838        train/test set. Only used in conjunction with a "Group" :term:`cv`
839        instance (e.g., :class:`GroupKFold`).
840
841    cv : int, cross-validation generator or an iterable, default=None
842        Determines the cross-validation splitting strategy.
843        Possible inputs for cv are:
844
845        - None, to use the default 5-fold cross validation,
846        - int, to specify the number of folds in a `(Stratified)KFold`,
847        - :term:`CV splitter`,
848        - An iterable that generates (train, test) splits as arrays of indices.
849
850        For int/None inputs, if the estimator is a classifier and ``y`` is
851        either binary or multiclass, :class:`StratifiedKFold` is used. In all
852        other cases, :class:`KFold` is used. These splitters are instantiated
853        with `shuffle=False` so the splits will be the same across calls.
854
855        Refer :ref:`User Guide <cross_validation>` for the various
856        cross-validation strategies that can be used here.
857
858        .. versionchanged:: 0.22
859            ``cv`` default value if None changed from 3-fold to 5-fold.
860
861    n_jobs : int, default=None
862        Number of jobs to run in parallel. Training the estimator and
863        predicting are parallelized over the cross-validation splits.
864        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
865        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
866        for more details.
867
868    verbose : int, default=0
869        The verbosity level.
870
871    fit_params : dict, default=None
872        Parameters to pass to the fit method of the estimator.
873
874    pre_dispatch : int or str, default='2*n_jobs'
875        Controls the number of jobs that get dispatched during parallel
876        execution. Reducing this number can be useful to avoid an
877        explosion of memory consumption when more jobs get dispatched
878        than CPUs can process. This parameter can be:
879
880            - None, in which case all the jobs are immediately
881              created and spawned. Use this for lightweight and
882              fast-running jobs, to avoid delays due to on-demand
883              spawning of the jobs
884
885            - An int, giving the exact number of total jobs that are
886              spawned
887
888            - A str, giving an expression as a function of n_jobs,
889              as in '2*n_jobs'
890
891    method : {'predict', 'predict_proba', 'predict_log_proba', \
892              'decision_function'}, default='predict'
893        The method to be invoked by `estimator`.
894
895    Returns
896    -------
897    predictions : ndarray
898        This is the result of calling `method`. Shape:
899
900            - When `method` is 'predict' and in special case where `method` is
901              'decision_function' and the target is binary: (n_samples,)
902            - When `method` is one of {'predict_proba', 'predict_log_proba',
903              'decision_function'} (unless special case above):
904              (n_samples, n_classes)
905            - If `estimator` is :term:`multioutput`, an extra dimension
906              'n_outputs' is added to the end of each shape above.
907
908    See Also
909    --------
910    cross_val_score : Calculate score for each CV split.
911    cross_validate : Calculate one or more scores and timings for each CV
912        split.
913
914    Notes
915    -----
916    In the case that one or more classes are absent in a training portion, a
917    default score needs to be assigned to all instances for that class if
918    ``method`` produces columns per class, as in {'decision_function',
919    'predict_proba', 'predict_log_proba'}.  For ``predict_proba`` this value is
920    0.  In order to ensure finite output, we approximate negative infinity by
921    the minimum finite float value for the dtype in other cases.
922
923    Examples
924    --------
925    >>> from sklearn import datasets, linear_model
926    >>> from sklearn.model_selection import cross_val_predict
927    >>> diabetes = datasets.load_diabetes()
928    >>> X = diabetes.data[:150]
929    >>> y = diabetes.target[:150]
930    >>> lasso = linear_model.Lasso()
931    >>> y_pred = cross_val_predict(lasso, X, y, cv=3)
932    """
933    X, y, groups = indexable(X, y, groups)
934
935    cv = check_cv(cv, y, classifier=is_classifier(estimator))
936    splits = list(cv.split(X, y, groups))
937
938    test_indices = np.concatenate([test for _, test in splits])
939    if not _check_is_permutation(test_indices, _num_samples(X)):
940        raise ValueError("cross_val_predict only works for partitions")
941
942    # If classification methods produce multiple columns of output,
943    # we need to manually encode classes to ensure consistent column ordering.
944    encode = (
945        method in ["decision_function", "predict_proba", "predict_log_proba"]
946        and y is not None
947    )
948    if encode:
949        y = np.asarray(y)
950        if y.ndim == 1:
951            le = LabelEncoder()
952            y = le.fit_transform(y)
953        elif y.ndim == 2:
954            y_enc = np.zeros_like(y, dtype=int)
955            for i_label in range(y.shape[1]):
956                y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label])
957            y = y_enc
958
959    # We clone the estimator to make sure that all the folds are
960    # independent, and that it is pickle-able.
961    parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
962    predictions = parallel(
963        delayed(_fit_and_predict)(
964            clone(estimator), X, y, train, test, verbose, fit_params, method
965        )
966        for train, test in splits
967    )
968
969    inv_test_indices = np.empty(len(test_indices), dtype=int)
970    inv_test_indices[test_indices] = np.arange(len(test_indices))
971
972    if sp.issparse(predictions[0]):
973        predictions = sp.vstack(predictions, format=predictions[0].format)
974    elif encode and isinstance(predictions[0], list):
975        # `predictions` is a list of method outputs from each fold.
976        # If each of those is also a list, then treat this as a
977        # multioutput-multiclass task. We need to separately concatenate
978        # the method outputs for each label into an `n_labels` long list.
979        n_labels = y.shape[1]
980        concat_pred = []
981        for i_label in range(n_labels):
982            label_preds = np.concatenate([p[i_label] for p in predictions])
983            concat_pred.append(label_preds)
984        predictions = concat_pred
985    else:
986        predictions = np.concatenate(predictions)
987
988    if isinstance(predictions, list):
989        return [p[inv_test_indices] for p in predictions]
990    else:
991        return predictions[inv_test_indices]
992
993
994def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method):
995    """Fit estimator and predict values for a given dataset split.
996
997    Read more in the :ref:`User Guide <cross_validation>`.
998
999    Parameters
1000    ----------
1001    estimator : estimator object implementing 'fit' and 'predict'
1002        The object to use to fit the data.
1003
1004    X : array-like of shape (n_samples, n_features)
1005        The data to fit.
1006
1007        .. versionchanged:: 0.20
1008            X is only required to be an object with finite length or shape now
1009
1010    y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
1011        The target variable to try to predict in the case of
1012        supervised learning.
1013
1014    train : array-like of shape (n_train_samples,)
1015        Indices of training samples.
1016
1017    test : array-like of shape (n_test_samples,)
1018        Indices of test samples.
1019
1020    verbose : int
1021        The verbosity level.
1022
1023    fit_params : dict or None
1024        Parameters that will be passed to ``estimator.fit``.
1025
1026    method : str
1027        Invokes the passed method name of the passed estimator.
1028
1029    Returns
1030    -------
1031    predictions : sequence
1032        Result of calling 'estimator.method'
1033    """
1034    # Adjust length of sample weights
1035    fit_params = fit_params if fit_params is not None else {}
1036    fit_params = _check_fit_params(X, fit_params, train)
1037
1038    X_train, y_train = _safe_split(estimator, X, y, train)
1039    X_test, _ = _safe_split(estimator, X, y, test, train)
1040
1041    if y_train is None:
1042        estimator.fit(X_train, **fit_params)
1043    else:
1044        estimator.fit(X_train, y_train, **fit_params)
1045    func = getattr(estimator, method)
1046    predictions = func(X_test)
1047
1048    encode = (
1049        method in ["decision_function", "predict_proba", "predict_log_proba"]
1050        and y is not None
1051    )
1052
1053    if encode:
1054        if isinstance(predictions, list):
1055            predictions = [
1056                _enforce_prediction_order(
1057                    estimator.classes_[i_label],
1058                    predictions[i_label],
1059                    n_classes=len(set(y[:, i_label])),
1060                    method=method,
1061                )
1062                for i_label in range(len(predictions))
1063            ]
1064        else:
1065            # A 2D y array should be a binary label indicator matrix
1066            n_classes = len(set(y)) if y.ndim == 1 else y.shape[1]
1067            predictions = _enforce_prediction_order(
1068                estimator.classes_, predictions, n_classes, method
1069            )
1070    return predictions
1071
1072
1073def _enforce_prediction_order(classes, predictions, n_classes, method):
1074    """Ensure that prediction arrays have correct column order
1075
1076    When doing cross-validation, if one or more classes are
1077    not present in the subset of data used for training,
1078    then the output prediction array might not have the same
1079    columns as other folds. Use the list of class names
1080    (assumed to be ints) to enforce the correct column order.
1081
1082    Note that `classes` is the list of classes in this fold
1083    (a subset of the classes in the full training set)
1084    and `n_classes` is the number of classes in the full training set.
1085    """
1086    if n_classes != len(classes):
1087        recommendation = (
1088            "To fix this, use a cross-validation "
1089            "technique resulting in properly "
1090            "stratified folds"
1091        )
1092        warnings.warn(
1093            "Number of classes in training fold ({}) does "
1094            "not match total number of classes ({}). "
1095            "Results may not be appropriate for your use case. "
1096            "{}".format(len(classes), n_classes, recommendation),
1097            RuntimeWarning,
1098        )
1099        if method == "decision_function":
1100            if predictions.ndim == 2 and predictions.shape[1] != len(classes):
1101                # This handles the case when the shape of predictions
1102                # does not match the number of classes used to train
1103                # it with. This case is found when sklearn.svm.SVC is
1104                # set to `decision_function_shape='ovo'`.
1105                raise ValueError(
1106                    "Output shape {} of {} does not match "
1107                    "number of classes ({}) in fold. "
1108                    "Irregular decision_function outputs "
1109                    "are not currently supported by "
1110                    "cross_val_predict".format(predictions.shape, method, len(classes))
1111                )
1112            if len(classes) <= 2:
1113                # In this special case, `predictions` contains a 1D array.
1114                raise ValueError(
1115                    "Only {} class/es in training fold, but {} "
1116                    "in overall dataset. This "
1117                    "is not supported for decision_function "
1118                    "with imbalanced folds. {}".format(
1119                        len(classes), n_classes, recommendation
1120                    )
1121                )
1122
1123        float_min = np.finfo(predictions.dtype).min
1124        default_values = {
1125            "decision_function": float_min,
1126            "predict_log_proba": float_min,
1127            "predict_proba": 0,
1128        }
1129        predictions_for_all_classes = np.full(
1130            (_num_samples(predictions), n_classes),
1131            default_values[method],
1132            dtype=predictions.dtype,
1133        )
1134        predictions_for_all_classes[:, classes] = predictions
1135        predictions = predictions_for_all_classes
1136    return predictions
1137
1138
1139def _check_is_permutation(indices, n_samples):
1140    """Check whether indices is a reordering of the array np.arange(n_samples)
1141
1142    Parameters
1143    ----------
1144    indices : ndarray
1145        int array to test
1146    n_samples : int
1147        number of expected elements
1148
1149    Returns
1150    -------
1151    is_partition : bool
1152        True iff sorted(indices) is np.arange(n)
1153    """
1154    if len(indices) != n_samples:
1155        return False
1156    hit = np.zeros(n_samples, dtype=bool)
1157    hit[indices] = True
1158    if not np.all(hit):
1159        return False
1160    return True
1161
1162
1163def permutation_test_score(
1164    estimator,
1165    X,
1166    y,
1167    *,
1168    groups=None,
1169    cv=None,
1170    n_permutations=100,
1171    n_jobs=None,
1172    random_state=0,
1173    verbose=0,
1174    scoring=None,
1175    fit_params=None,
1176):
1177    """Evaluate the significance of a cross-validated score with permutations
1178
1179    Permutes targets to generate 'randomized data' and compute the empirical
1180    p-value against the null hypothesis that features and targets are
1181    independent.
1182
1183    The p-value represents the fraction of randomized data sets where the
1184    estimator performed as well or better than in the original data. A small
1185    p-value suggests that there is a real dependency between features and
1186    targets which has been used by the estimator to give good predictions.
1187    A large p-value may be due to lack of real dependency between features
1188    and targets or the estimator was not able to use the dependency to
1189    give good predictions.
1190
1191    Read more in the :ref:`User Guide <permutation_test_score>`.
1192
1193    Parameters
1194    ----------
1195    estimator : estimator object implementing 'fit'
1196        The object to use to fit the data.
1197
1198    X : array-like of shape at least 2D
1199        The data to fit.
1200
1201    y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
1202        The target variable to try to predict in the case of
1203        supervised learning.
1204
1205    groups : array-like of shape (n_samples,), default=None
1206        Labels to constrain permutation within groups, i.e. ``y`` values
1207        are permuted among samples with the same group identifier.
1208        When not specified, ``y`` values are permuted among all samples.
1209
1210        When a grouped cross-validator is used, the group labels are
1211        also passed on to the ``split`` method of the cross-validator. The
1212        cross-validator uses them for grouping the samples  while splitting
1213        the dataset into train/test set.
1214
1215    scoring : str or callable, default=None
1216        A single str (see :ref:`scoring_parameter`) or a callable
1217        (see :ref:`scoring`) to evaluate the predictions on the test set.
1218
1219        If `None` the estimator's score method is used.
1220
1221    cv : int, cross-validation generator or an iterable, default=None
1222        Determines the cross-validation splitting strategy.
1223        Possible inputs for cv are:
1224
1225        - `None`, to use the default 5-fold cross validation,
1226        - int, to specify the number of folds in a `(Stratified)KFold`,
1227        - :term:`CV splitter`,
1228        - An iterable yielding (train, test) splits as arrays of indices.
1229
1230        For `int`/`None` inputs, if the estimator is a classifier and `y` is
1231        either binary or multiclass, :class:`StratifiedKFold` is used. In all
1232        other cases, :class:`KFold` is used. These splitters are instantiated
1233        with `shuffle=False` so the splits will be the same across calls.
1234
1235        Refer :ref:`User Guide <cross_validation>` for the various
1236        cross-validation strategies that can be used here.
1237
1238        .. versionchanged:: 0.22
1239            `cv` default value if `None` changed from 3-fold to 5-fold.
1240
1241    n_permutations : int, default=100
1242        Number of times to permute ``y``.
1243
1244    n_jobs : int, default=None
1245        Number of jobs to run in parallel. Training the estimator and computing
1246        the cross-validated score are parallelized over the permutations.
1247        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
1248        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
1249        for more details.
1250
1251    random_state : int, RandomState instance or None, default=0
1252        Pass an int for reproducible output for permutation of
1253        ``y`` values among samples. See :term:`Glossary <random_state>`.
1254
1255    verbose : int, default=0
1256        The verbosity level.
1257
1258    fit_params : dict, default=None
1259        Parameters to pass to the fit method of the estimator.
1260
1261        .. versionadded:: 0.24
1262
1263    Returns
1264    -------
1265    score : float
1266        The true score without permuting targets.
1267
1268    permutation_scores : array of shape (n_permutations,)
1269        The scores obtained for each permutations.
1270
1271    pvalue : float
1272        The p-value, which approximates the probability that the score would
1273        be obtained by chance. This is calculated as:
1274
1275        `(C + 1) / (n_permutations + 1)`
1276
1277        Where C is the number of permutations whose score >= the true score.
1278
1279        The best possible p-value is 1/(n_permutations + 1), the worst is 1.0.
1280
1281    Notes
1282    -----
1283    This function implements Test 1 in:
1284
1285        Ojala and Garriga. `Permutation Tests for Studying Classifier
1286        Performance
1287        <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_. The
1288        Journal of Machine Learning Research (2010) vol. 11
1289
1290    """
1291    X, y, groups = indexable(X, y, groups)
1292
1293    cv = check_cv(cv, y, classifier=is_classifier(estimator))
1294    scorer = check_scoring(estimator, scoring=scoring)
1295    random_state = check_random_state(random_state)
1296
1297    # We clone the estimator to make sure that all the folds are
1298    # independent, and that it is pickle-able.
1299    score = _permutation_test_score(
1300        clone(estimator), X, y, groups, cv, scorer, fit_params=fit_params
1301    )
1302    permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)(
1303        delayed(_permutation_test_score)(
1304            clone(estimator),
1305            X,
1306            _shuffle(y, groups, random_state),
1307            groups,
1308            cv,
1309            scorer,
1310            fit_params=fit_params,
1311        )
1312        for _ in range(n_permutations)
1313    )
1314    permutation_scores = np.array(permutation_scores)
1315    pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1)
1316    return score, permutation_scores, pvalue
1317
1318
1319def _permutation_test_score(estimator, X, y, groups, cv, scorer, fit_params):
1320    """Auxiliary function for permutation_test_score"""
1321    # Adjust length of sample weights
1322    fit_params = fit_params if fit_params is not None else {}
1323    avg_score = []
1324    for train, test in cv.split(X, y, groups):
1325        X_train, y_train = _safe_split(estimator, X, y, train)
1326        X_test, y_test = _safe_split(estimator, X, y, test, train)
1327        fit_params = _check_fit_params(X, fit_params, train)
1328        estimator.fit(X_train, y_train, **fit_params)
1329        avg_score.append(scorer(estimator, X_test, y_test))
1330    return np.mean(avg_score)
1331
1332
1333def _shuffle(y, groups, random_state):
1334    """Return a shuffled copy of y eventually shuffle among same groups."""
1335    if groups is None:
1336        indices = random_state.permutation(len(y))
1337    else:
1338        indices = np.arange(len(groups))
1339        for group in np.unique(groups):
1340            this_mask = groups == group
1341            indices[this_mask] = random_state.permutation(indices[this_mask])
1342    return _safe_indexing(y, indices)
1343
1344
1345def learning_curve(
1346    estimator,
1347    X,
1348    y,
1349    *,
1350    groups=None,
1351    train_sizes=np.linspace(0.1, 1.0, 5),
1352    cv=None,
1353    scoring=None,
1354    exploit_incremental_learning=False,
1355    n_jobs=None,
1356    pre_dispatch="all",
1357    verbose=0,
1358    shuffle=False,
1359    random_state=None,
1360    error_score=np.nan,
1361    return_times=False,
1362    fit_params=None,
1363):
1364    """Learning curve.
1365
1366    Determines cross-validated training and test scores for different training
1367    set sizes.
1368
1369    A cross-validation generator splits the whole dataset k times in training
1370    and test data. Subsets of the training set with varying sizes will be used
1371    to train the estimator and a score for each training subset size and the
1372    test set will be computed. Afterwards, the scores will be averaged over
1373    all k runs for each training subset size.
1374
1375    Read more in the :ref:`User Guide <learning_curve>`.
1376
1377    Parameters
1378    ----------
1379    estimator : object type that implements the "fit" and "predict" methods
1380        An object of that type which is cloned for each validation.
1381
1382    X : array-like of shape (n_samples, n_features)
1383        Training vector, where `n_samples` is the number of samples and
1384        `n_features` is the number of features.
1385
1386    y : array-like of shape (n_samples,) or (n_samples, n_outputs)
1387        Target relative to X for classification or regression;
1388        None for unsupervised learning.
1389
1390    groups : array-like of  shape (n_samples,), default=None
1391        Group labels for the samples used while splitting the dataset into
1392        train/test set. Only used in conjunction with a "Group" :term:`cv`
1393        instance (e.g., :class:`GroupKFold`).
1394
1395    train_sizes : array-like of shape (n_ticks,), \
1396            default=np.linspace(0.1, 1.0, 5)
1397        Relative or absolute numbers of training examples that will be used to
1398        generate the learning curve. If the dtype is float, it is regarded as a
1399        fraction of the maximum size of the training set (that is determined
1400        by the selected validation method), i.e. it has to be within (0, 1].
1401        Otherwise it is interpreted as absolute sizes of the training sets.
1402        Note that for classification the number of samples usually have to
1403        be big enough to contain at least one sample from each class.
1404
1405    cv : int, cross-validation generator or an iterable, default=None
1406        Determines the cross-validation splitting strategy.
1407        Possible inputs for cv are:
1408
1409        - None, to use the default 5-fold cross validation,
1410        - int, to specify the number of folds in a `(Stratified)KFold`,
1411        - :term:`CV splitter`,
1412        - An iterable yielding (train, test) splits as arrays of indices.
1413
1414        For int/None inputs, if the estimator is a classifier and ``y`` is
1415        either binary or multiclass, :class:`StratifiedKFold` is used. In all
1416        other cases, :class:`KFold` is used. These splitters are instantiated
1417        with `shuffle=False` so the splits will be the same across calls.
1418
1419        Refer :ref:`User Guide <cross_validation>` for the various
1420        cross-validation strategies that can be used here.
1421
1422        .. versionchanged:: 0.22
1423            ``cv`` default value if None changed from 3-fold to 5-fold.
1424
1425    scoring : str or callable, default=None
1426        A str (see model evaluation documentation) or
1427        a scorer callable object / function with signature
1428        ``scorer(estimator, X, y)``.
1429
1430    exploit_incremental_learning : bool, default=False
1431        If the estimator supports incremental learning, this will be
1432        used to speed up fitting for different training set sizes.
1433
1434    n_jobs : int, default=None
1435        Number of jobs to run in parallel. Training the estimator and computing
1436        the score are parallelized over the different training and test sets.
1437        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
1438        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
1439        for more details.
1440
1441    pre_dispatch : int or str, default='all'
1442        Number of predispatched jobs for parallel execution (default is
1443        all). The option can reduce the allocated memory. The str can
1444        be an expression like '2*n_jobs'.
1445
1446    verbose : int, default=0
1447        Controls the verbosity: the higher, the more messages.
1448
1449    shuffle : bool, default=False
1450        Whether to shuffle training data before taking prefixes of it
1451        based on``train_sizes``.
1452
1453    random_state : int, RandomState instance or None, default=None
1454        Used when ``shuffle`` is True. Pass an int for reproducible
1455        output across multiple function calls.
1456        See :term:`Glossary <random_state>`.
1457
1458    error_score : 'raise' or numeric, default=np.nan
1459        Value to assign to the score if an error occurs in estimator fitting.
1460        If set to 'raise', the error is raised.
1461        If a numeric value is given, FitFailedWarning is raised.
1462
1463        .. versionadded:: 0.20
1464
1465    return_times : bool, default=False
1466        Whether to return the fit and score times.
1467
1468    fit_params : dict, default=None
1469        Parameters to pass to the fit method of the estimator.
1470
1471        .. versionadded:: 0.24
1472
1473    Returns
1474    -------
1475    train_sizes_abs : array of shape (n_unique_ticks,)
1476        Numbers of training examples that has been used to generate the
1477        learning curve. Note that the number of ticks might be less
1478        than n_ticks because duplicate entries will be removed.
1479
1480    train_scores : array of shape (n_ticks, n_cv_folds)
1481        Scores on training sets.
1482
1483    test_scores : array of shape (n_ticks, n_cv_folds)
1484        Scores on test set.
1485
1486    fit_times : array of shape (n_ticks, n_cv_folds)
1487        Times spent for fitting in seconds. Only present if ``return_times``
1488        is True.
1489
1490    score_times : array of shape (n_ticks, n_cv_folds)
1491        Times spent for scoring in seconds. Only present if ``return_times``
1492        is True.
1493
1494    Notes
1495    -----
1496    See :ref:`examples/model_selection/plot_learning_curve.py
1497    <sphx_glr_auto_examples_model_selection_plot_learning_curve.py>`
1498    """
1499    if exploit_incremental_learning and not hasattr(estimator, "partial_fit"):
1500        raise ValueError(
1501            "An estimator must support the partial_fit interface "
1502            "to exploit incremental learning"
1503        )
1504    X, y, groups = indexable(X, y, groups)
1505
1506    cv = check_cv(cv, y, classifier=is_classifier(estimator))
1507    # Store it as list as we will be iterating over the list multiple times
1508    cv_iter = list(cv.split(X, y, groups))
1509
1510    scorer = check_scoring(estimator, scoring=scoring)
1511
1512    n_max_training_samples = len(cv_iter[0][0])
1513    # Because the lengths of folds can be significantly different, it is
1514    # not guaranteed that we use all of the available training data when we
1515    # use the first 'n_max_training_samples' samples.
1516    train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples)
1517    n_unique_ticks = train_sizes_abs.shape[0]
1518    if verbose > 0:
1519        print("[learning_curve] Training set sizes: " + str(train_sizes_abs))
1520
1521    parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose)
1522
1523    if shuffle:
1524        rng = check_random_state(random_state)
1525        cv_iter = ((rng.permutation(train), test) for train, test in cv_iter)
1526
1527    if exploit_incremental_learning:
1528        classes = np.unique(y) if is_classifier(estimator) else None
1529        out = parallel(
1530            delayed(_incremental_fit_estimator)(
1531                clone(estimator),
1532                X,
1533                y,
1534                classes,
1535                train,
1536                test,
1537                train_sizes_abs,
1538                scorer,
1539                verbose,
1540                return_times,
1541                error_score=error_score,
1542                fit_params=fit_params,
1543            )
1544            for train, test in cv_iter
1545        )
1546        out = np.asarray(out).transpose((2, 1, 0))
1547    else:
1548        train_test_proportions = []
1549        for train, test in cv_iter:
1550            for n_train_samples in train_sizes_abs:
1551                train_test_proportions.append((train[:n_train_samples], test))
1552
1553        results = parallel(
1554            delayed(_fit_and_score)(
1555                clone(estimator),
1556                X,
1557                y,
1558                scorer,
1559                train,
1560                test,
1561                verbose,
1562                parameters=None,
1563                fit_params=fit_params,
1564                return_train_score=True,
1565                error_score=error_score,
1566                return_times=return_times,
1567            )
1568            for train, test in train_test_proportions
1569        )
1570        results = _aggregate_score_dicts(results)
1571        train_scores = results["train_scores"].reshape(-1, n_unique_ticks).T
1572        test_scores = results["test_scores"].reshape(-1, n_unique_ticks).T
1573        out = [train_scores, test_scores]
1574
1575        if return_times:
1576            fit_times = results["fit_time"].reshape(-1, n_unique_ticks).T
1577            score_times = results["score_time"].reshape(-1, n_unique_ticks).T
1578            out.extend([fit_times, score_times])
1579
1580    ret = train_sizes_abs, out[0], out[1]
1581
1582    if return_times:
1583        ret = ret + (out[2], out[3])
1584
1585    return ret
1586
1587
1588def _translate_train_sizes(train_sizes, n_max_training_samples):
1589    """Determine absolute sizes of training subsets and validate 'train_sizes'.
1590
1591    Examples:
1592        _translate_train_sizes([0.5, 1.0], 10) -> [5, 10]
1593        _translate_train_sizes([5, 10], 10) -> [5, 10]
1594
1595    Parameters
1596    ----------
1597    train_sizes : array-like of shape (n_ticks,)
1598        Numbers of training examples that will be used to generate the
1599        learning curve. If the dtype is float, it is regarded as a
1600        fraction of 'n_max_training_samples', i.e. it has to be within (0, 1].
1601
1602    n_max_training_samples : int
1603        Maximum number of training samples (upper bound of 'train_sizes').
1604
1605    Returns
1606    -------
1607    train_sizes_abs : array of shape (n_unique_ticks,)
1608        Numbers of training examples that will be used to generate the
1609        learning curve. Note that the number of ticks might be less
1610        than n_ticks because duplicate entries will be removed.
1611    """
1612    train_sizes_abs = np.asarray(train_sizes)
1613    n_ticks = train_sizes_abs.shape[0]
1614    n_min_required_samples = np.min(train_sizes_abs)
1615    n_max_required_samples = np.max(train_sizes_abs)
1616    if np.issubdtype(train_sizes_abs.dtype, np.floating):
1617        if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0:
1618            raise ValueError(
1619                "train_sizes has been interpreted as fractions "
1620                "of the maximum number of training samples and "
1621                "must be within (0, 1], but is within [%f, %f]."
1622                % (n_min_required_samples, n_max_required_samples)
1623            )
1624        train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype(
1625            dtype=int, copy=False
1626        )
1627        train_sizes_abs = np.clip(train_sizes_abs, 1, n_max_training_samples)
1628    else:
1629        if (
1630            n_min_required_samples <= 0
1631            or n_max_required_samples > n_max_training_samples
1632        ):
1633            raise ValueError(
1634                "train_sizes has been interpreted as absolute "
1635                "numbers of training samples and must be within "
1636                "(0, %d], but is within [%d, %d]."
1637                % (
1638                    n_max_training_samples,
1639                    n_min_required_samples,
1640                    n_max_required_samples,
1641                )
1642            )
1643
1644    train_sizes_abs = np.unique(train_sizes_abs)
1645    if n_ticks > train_sizes_abs.shape[0]:
1646        warnings.warn(
1647            "Removed duplicate entries from 'train_sizes'. Number "
1648            "of ticks will be less than the size of "
1649            "'train_sizes': %d instead of %d." % (train_sizes_abs.shape[0], n_ticks),
1650            RuntimeWarning,
1651        )
1652
1653    return train_sizes_abs
1654
1655
1656def _incremental_fit_estimator(
1657    estimator,
1658    X,
1659    y,
1660    classes,
1661    train,
1662    test,
1663    train_sizes,
1664    scorer,
1665    verbose,
1666    return_times,
1667    error_score,
1668    fit_params,
1669):
1670    """Train estimator on training subsets incrementally and compute scores."""
1671    train_scores, test_scores, fit_times, score_times = [], [], [], []
1672    partitions = zip(train_sizes, np.split(train, train_sizes)[:-1])
1673    if fit_params is None:
1674        fit_params = {}
1675    for n_train_samples, partial_train in partitions:
1676        train_subset = train[:n_train_samples]
1677        X_train, y_train = _safe_split(estimator, X, y, train_subset)
1678        X_partial_train, y_partial_train = _safe_split(estimator, X, y, partial_train)
1679        X_test, y_test = _safe_split(estimator, X, y, test, train_subset)
1680        start_fit = time.time()
1681        if y_partial_train is None:
1682            estimator.partial_fit(X_partial_train, classes=classes, **fit_params)
1683        else:
1684            estimator.partial_fit(
1685                X_partial_train, y_partial_train, classes=classes, **fit_params
1686            )
1687        fit_time = time.time() - start_fit
1688        fit_times.append(fit_time)
1689
1690        start_score = time.time()
1691
1692        test_scores.append(_score(estimator, X_test, y_test, scorer, error_score))
1693        train_scores.append(_score(estimator, X_train, y_train, scorer, error_score))
1694
1695        score_time = time.time() - start_score
1696        score_times.append(score_time)
1697
1698    ret = (
1699        (train_scores, test_scores, fit_times, score_times)
1700        if return_times
1701        else (train_scores, test_scores)
1702    )
1703
1704    return np.array(ret).T
1705
1706
1707def validation_curve(
1708    estimator,
1709    X,
1710    y,
1711    *,
1712    param_name,
1713    param_range,
1714    groups=None,
1715    cv=None,
1716    scoring=None,
1717    n_jobs=None,
1718    pre_dispatch="all",
1719    verbose=0,
1720    error_score=np.nan,
1721    fit_params=None,
1722):
1723    """Validation curve.
1724
1725    Determine training and test scores for varying parameter values.
1726
1727    Compute scores for an estimator with different values of a specified
1728    parameter. This is similar to grid search with one parameter. However, this
1729    will also compute training scores and is merely a utility for plotting the
1730    results.
1731
1732    Read more in the :ref:`User Guide <validation_curve>`.
1733
1734    Parameters
1735    ----------
1736    estimator : object type that implements the "fit" and "predict" methods
1737        An object of that type which is cloned for each validation.
1738
1739    X : array-like of shape (n_samples, n_features)
1740        Training vector, where `n_samples` is the number of samples and
1741        `n_features` is the number of features.
1742
1743    y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
1744        Target relative to X for classification or regression;
1745        None for unsupervised learning.
1746
1747    param_name : str
1748        Name of the parameter that will be varied.
1749
1750    param_range : array-like of shape (n_values,)
1751        The values of the parameter that will be evaluated.
1752
1753    groups : array-like of shape (n_samples,), default=None
1754        Group labels for the samples used while splitting the dataset into
1755        train/test set. Only used in conjunction with a "Group" :term:`cv`
1756        instance (e.g., :class:`GroupKFold`).
1757
1758    cv : int, cross-validation generator or an iterable, default=None
1759        Determines the cross-validation splitting strategy.
1760        Possible inputs for cv are:
1761
1762        - None, to use the default 5-fold cross validation,
1763        - int, to specify the number of folds in a `(Stratified)KFold`,
1764        - :term:`CV splitter`,
1765        - An iterable yielding (train, test) splits as arrays of indices.
1766
1767        For int/None inputs, if the estimator is a classifier and ``y`` is
1768        either binary or multiclass, :class:`StratifiedKFold` is used. In all
1769        other cases, :class:`KFold` is used. These splitters are instantiated
1770        with `shuffle=False` so the splits will be the same across calls.
1771
1772        Refer :ref:`User Guide <cross_validation>` for the various
1773        cross-validation strategies that can be used here.
1774
1775        .. versionchanged:: 0.22
1776            ``cv`` default value if None changed from 3-fold to 5-fold.
1777
1778    scoring : str or callable, default=None
1779        A str (see model evaluation documentation) or
1780        a scorer callable object / function with signature
1781        ``scorer(estimator, X, y)``.
1782
1783    n_jobs : int, default=None
1784        Number of jobs to run in parallel. Training the estimator and computing
1785        the score are parallelized over the combinations of each parameter
1786        value and each cross-validation split.
1787        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
1788        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
1789        for more details.
1790
1791    pre_dispatch : int or str, default='all'
1792        Number of predispatched jobs for parallel execution (default is
1793        all). The option can reduce the allocated memory. The str can
1794        be an expression like '2*n_jobs'.
1795
1796    verbose : int, default=0
1797        Controls the verbosity: the higher, the more messages.
1798
1799    fit_params : dict, default=None
1800        Parameters to pass to the fit method of the estimator.
1801
1802        .. versionadded:: 0.24
1803
1804    error_score : 'raise' or numeric, default=np.nan
1805        Value to assign to the score if an error occurs in estimator fitting.
1806        If set to 'raise', the error is raised.
1807        If a numeric value is given, FitFailedWarning is raised.
1808
1809        .. versionadded:: 0.20
1810
1811    Returns
1812    -------
1813    train_scores : array of shape (n_ticks, n_cv_folds)
1814        Scores on training sets.
1815
1816    test_scores : array of shape (n_ticks, n_cv_folds)
1817        Scores on test set.
1818
1819    Notes
1820    -----
1821    See :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py`
1822
1823    """
1824    X, y, groups = indexable(X, y, groups)
1825
1826    cv = check_cv(cv, y, classifier=is_classifier(estimator))
1827    scorer = check_scoring(estimator, scoring=scoring)
1828
1829    parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose)
1830    results = parallel(
1831        delayed(_fit_and_score)(
1832            clone(estimator),
1833            X,
1834            y,
1835            scorer,
1836            train,
1837            test,
1838            verbose,
1839            parameters={param_name: v},
1840            fit_params=fit_params,
1841            return_train_score=True,
1842            error_score=error_score,
1843        )
1844        # NOTE do not change order of iteration to allow one time cv splitters
1845        for train, test in cv.split(X, y, groups)
1846        for v in param_range
1847    )
1848    n_params = len(param_range)
1849
1850    results = _aggregate_score_dicts(results)
1851    train_scores = results["train_scores"].reshape(-1, n_params).T
1852    test_scores = results["test_scores"].reshape(-1, n_params).T
1853
1854    return train_scores, test_scores
1855
1856
1857def _aggregate_score_dicts(scores):
1858    """Aggregate the list of dict to dict of np ndarray
1859
1860    The aggregated output of _aggregate_score_dicts will be a list of dict
1861    of form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...]
1862    Convert it to a dict of array {'prec': np.array([0.1 ...]), ...}
1863
1864    Parameters
1865    ----------
1866
1867    scores : list of dict
1868        List of dicts of the scores for all scorers. This is a flat list,
1869        assumed originally to be of row major order.
1870
1871    Example
1872    -------
1873
1874    >>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3},
1875    ...           {'a': 10, 'b': 10}]                         # doctest: +SKIP
1876    >>> _aggregate_score_dicts(scores)                        # doctest: +SKIP
1877    {'a': array([1, 2, 3, 10]),
1878     'b': array([10, 2, 3, 10])}
1879    """
1880    return {
1881        key: np.asarray([score[key] for score in scores])
1882        if isinstance(scores[0][key], numbers.Number)
1883        else [score[key] for score in scores]
1884        for key in scores[0]
1885    }
1886