1""" 2The :mod:`sklearn.model_selection._validation` module includes classes and 3functions to validate the model. 4""" 5 6# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr> 7# Gael Varoquaux <gael.varoquaux@normalesup.org> 8# Olivier Grisel <olivier.grisel@ensta.org> 9# Raghav RV <rvraghav93@gmail.com> 10# Michal Karbownik <michakarbownik@gmail.com> 11# License: BSD 3 clause 12 13 14import warnings 15import numbers 16import time 17from traceback import format_exc 18from contextlib import suppress 19from collections import Counter 20 21import numpy as np 22import scipy.sparse as sp 23from joblib import Parallel, logger 24 25from ..base import is_classifier, clone 26from ..utils import indexable, check_random_state, _safe_indexing 27from ..utils.validation import _check_fit_params 28from ..utils.validation import _num_samples 29from ..utils.fixes import delayed 30from ..utils.metaestimators import _safe_split 31from ..metrics import check_scoring 32from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer 33from ..exceptions import FitFailedWarning, NotFittedError 34from ._split import check_cv 35from ..preprocessing import LabelEncoder 36 37 38__all__ = [ 39 "cross_validate", 40 "cross_val_score", 41 "cross_val_predict", 42 "permutation_test_score", 43 "learning_curve", 44 "validation_curve", 45] 46 47 48def cross_validate( 49 estimator, 50 X, 51 y=None, 52 *, 53 groups=None, 54 scoring=None, 55 cv=None, 56 n_jobs=None, 57 verbose=0, 58 fit_params=None, 59 pre_dispatch="2*n_jobs", 60 return_train_score=False, 61 return_estimator=False, 62 error_score=np.nan, 63): 64 """Evaluate metric(s) by cross-validation and also record fit/score times. 65 66 Read more in the :ref:`User Guide <multimetric_cross_validation>`. 67 68 Parameters 69 ---------- 70 estimator : estimator object implementing 'fit' 71 The object to use to fit the data. 72 73 X : array-like of shape (n_samples, n_features) 74 The data to fit. Can be for example a list, or an array. 75 76 y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ 77 default=None 78 The target variable to try to predict in the case of 79 supervised learning. 80 81 groups : array-like of shape (n_samples,), default=None 82 Group labels for the samples used while splitting the dataset into 83 train/test set. Only used in conjunction with a "Group" :term:`cv` 84 instance (e.g., :class:`GroupKFold`). 85 86 scoring : str, callable, list, tuple, or dict, default=None 87 Strategy to evaluate the performance of the cross-validated model on 88 the test set. 89 90 If `scoring` represents a single score, one can use: 91 92 - a single string (see :ref:`scoring_parameter`); 93 - a callable (see :ref:`scoring`) that returns a single value. 94 95 If `scoring` represents multiple scores, one can use: 96 97 - a list or tuple of unique strings; 98 - a callable returning a dictionary where the keys are the metric 99 names and the values are the metric scores; 100 - a dictionary with metric names as keys and callables a values. 101 102 See :ref:`multimetric_grid_search` for an example. 103 104 cv : int, cross-validation generator or an iterable, default=None 105 Determines the cross-validation splitting strategy. 106 Possible inputs for cv are: 107 108 - None, to use the default 5-fold cross validation, 109 - int, to specify the number of folds in a `(Stratified)KFold`, 110 - :term:`CV splitter`, 111 - An iterable yielding (train, test) splits as arrays of indices. 112 113 For int/None inputs, if the estimator is a classifier and ``y`` is 114 either binary or multiclass, :class:`StratifiedKFold` is used. In all 115 other cases, :class:`.Fold` is used. These splitters are instantiated 116 with `shuffle=False` so the splits will be the same across calls. 117 118 Refer :ref:`User Guide <cross_validation>` for the various 119 cross-validation strategies that can be used here. 120 121 .. versionchanged:: 0.22 122 ``cv`` default value if None changed from 3-fold to 5-fold. 123 124 n_jobs : int, default=None 125 Number of jobs to run in parallel. Training the estimator and computing 126 the score are parallelized over the cross-validation splits. 127 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 128 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 129 for more details. 130 131 verbose : int, default=0 132 The verbosity level. 133 134 fit_params : dict, default=None 135 Parameters to pass to the fit method of the estimator. 136 137 pre_dispatch : int or str, default='2*n_jobs' 138 Controls the number of jobs that get dispatched during parallel 139 execution. Reducing this number can be useful to avoid an 140 explosion of memory consumption when more jobs get dispatched 141 than CPUs can process. This parameter can be: 142 143 - None, in which case all the jobs are immediately 144 created and spawned. Use this for lightweight and 145 fast-running jobs, to avoid delays due to on-demand 146 spawning of the jobs 147 148 - An int, giving the exact number of total jobs that are 149 spawned 150 151 - A str, giving an expression as a function of n_jobs, 152 as in '2*n_jobs' 153 154 return_train_score : bool, default=False 155 Whether to include train scores. 156 Computing training scores is used to get insights on how different 157 parameter settings impact the overfitting/underfitting trade-off. 158 However computing the scores on the training set can be computationally 159 expensive and is not strictly required to select the parameters that 160 yield the best generalization performance. 161 162 .. versionadded:: 0.19 163 164 .. versionchanged:: 0.21 165 Default value was changed from ``True`` to ``False`` 166 167 return_estimator : bool, default=False 168 Whether to return the estimators fitted on each split. 169 170 .. versionadded:: 0.20 171 172 error_score : 'raise' or numeric, default=np.nan 173 Value to assign to the score if an error occurs in estimator fitting. 174 If set to 'raise', the error is raised. 175 If a numeric value is given, FitFailedWarning is raised. 176 177 .. versionadded:: 0.20 178 179 Returns 180 ------- 181 scores : dict of float arrays of shape (n_splits,) 182 Array of scores of the estimator for each run of the cross validation. 183 184 A dict of arrays containing the score/time arrays for each scorer is 185 returned. The possible keys for this ``dict`` are: 186 187 ``test_score`` 188 The score array for test scores on each cv split. 189 Suffix ``_score`` in ``test_score`` changes to a specific 190 metric like ``test_r2`` or ``test_auc`` if there are 191 multiple scoring metrics in the scoring parameter. 192 ``train_score`` 193 The score array for train scores on each cv split. 194 Suffix ``_score`` in ``train_score`` changes to a specific 195 metric like ``train_r2`` or ``train_auc`` if there are 196 multiple scoring metrics in the scoring parameter. 197 This is available only if ``return_train_score`` parameter 198 is ``True``. 199 ``fit_time`` 200 The time for fitting the estimator on the train 201 set for each cv split. 202 ``score_time`` 203 The time for scoring the estimator on the test set for each 204 cv split. (Note time for scoring on the train set is not 205 included even if ``return_train_score`` is set to ``True`` 206 ``estimator`` 207 The estimator objects for each cv split. 208 This is available only if ``return_estimator`` parameter 209 is set to ``True``. 210 211 Examples 212 -------- 213 >>> from sklearn import datasets, linear_model 214 >>> from sklearn.model_selection import cross_validate 215 >>> from sklearn.metrics import make_scorer 216 >>> from sklearn.metrics import confusion_matrix 217 >>> from sklearn.svm import LinearSVC 218 >>> diabetes = datasets.load_diabetes() 219 >>> X = diabetes.data[:150] 220 >>> y = diabetes.target[:150] 221 >>> lasso = linear_model.Lasso() 222 223 Single metric evaluation using ``cross_validate`` 224 225 >>> cv_results = cross_validate(lasso, X, y, cv=3) 226 >>> sorted(cv_results.keys()) 227 ['fit_time', 'score_time', 'test_score'] 228 >>> cv_results['test_score'] 229 array([0.33150734, 0.08022311, 0.03531764]) 230 231 Multiple metric evaluation using ``cross_validate`` 232 (please refer the ``scoring`` parameter doc for more information) 233 234 >>> scores = cross_validate(lasso, X, y, cv=3, 235 ... scoring=('r2', 'neg_mean_squared_error'), 236 ... return_train_score=True) 237 >>> print(scores['test_neg_mean_squared_error']) 238 [-3635.5... -3573.3... -6114.7...] 239 >>> print(scores['train_r2']) 240 [0.28010158 0.39088426 0.22784852] 241 242 See Also 243 --------- 244 cross_val_score : Run cross-validation for single metric evaluation. 245 246 cross_val_predict : Get predictions from each split of cross-validation for 247 diagnostic purposes. 248 249 sklearn.metrics.make_scorer : Make a scorer from a performance metric or 250 loss function. 251 252 """ 253 X, y, groups = indexable(X, y, groups) 254 255 cv = check_cv(cv, y, classifier=is_classifier(estimator)) 256 257 if callable(scoring): 258 scorers = scoring 259 elif scoring is None or isinstance(scoring, str): 260 scorers = check_scoring(estimator, scoring) 261 else: 262 scorers = _check_multimetric_scoring(estimator, scoring) 263 264 # We clone the estimator to make sure that all the folds are 265 # independent, and that it is pickle-able. 266 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) 267 results = parallel( 268 delayed(_fit_and_score)( 269 clone(estimator), 270 X, 271 y, 272 scorers, 273 train, 274 test, 275 verbose, 276 None, 277 fit_params, 278 return_train_score=return_train_score, 279 return_times=True, 280 return_estimator=return_estimator, 281 error_score=error_score, 282 ) 283 for train, test in cv.split(X, y, groups) 284 ) 285 286 _warn_about_fit_failures(results, error_score) 287 288 # For callabe scoring, the return type is only know after calling. If the 289 # return type is a dictionary, the error scores can now be inserted with 290 # the correct key. 291 if callable(scoring): 292 _insert_error_scores(results, error_score) 293 294 results = _aggregate_score_dicts(results) 295 296 ret = {} 297 ret["fit_time"] = results["fit_time"] 298 ret["score_time"] = results["score_time"] 299 300 if return_estimator: 301 ret["estimator"] = results["estimator"] 302 303 test_scores_dict = _normalize_score_results(results["test_scores"]) 304 if return_train_score: 305 train_scores_dict = _normalize_score_results(results["train_scores"]) 306 307 for name in test_scores_dict: 308 ret["test_%s" % name] = test_scores_dict[name] 309 if return_train_score: 310 key = "train_%s" % name 311 ret[key] = train_scores_dict[name] 312 313 return ret 314 315 316def _insert_error_scores(results, error_score): 317 """Insert error in `results` by replacing them inplace with `error_score`. 318 319 This only applies to multimetric scores because `_fit_and_score` will 320 handle the single metric case. 321 """ 322 successful_score = None 323 failed_indices = [] 324 for i, result in enumerate(results): 325 if result["fit_error"] is not None: 326 failed_indices.append(i) 327 elif successful_score is None: 328 successful_score = result["test_scores"] 329 330 if successful_score is None: 331 raise NotFittedError("All estimators failed to fit") 332 333 if isinstance(successful_score, dict): 334 formatted_error = {name: error_score for name in successful_score} 335 for i in failed_indices: 336 results[i]["test_scores"] = formatted_error.copy() 337 if "train_scores" in results[i]: 338 results[i]["train_scores"] = formatted_error.copy() 339 340 341def _normalize_score_results(scores, scaler_score_key="score"): 342 """Creates a scoring dictionary based on the type of `scores`""" 343 if isinstance(scores[0], dict): 344 # multimetric scoring 345 return _aggregate_score_dicts(scores) 346 # scaler 347 return {scaler_score_key: scores} 348 349 350def _warn_about_fit_failures(results, error_score): 351 fit_errors = [ 352 result["fit_error"] for result in results if result["fit_error"] is not None 353 ] 354 if fit_errors: 355 num_failed_fits = len(fit_errors) 356 num_fits = len(results) 357 fit_errors_counter = Counter(fit_errors) 358 delimiter = "-" * 80 + "\n" 359 fit_errors_summary = "\n".join( 360 f"{delimiter}{n} fits failed with the following error:\n{error}" 361 for error, n in fit_errors_counter.items() 362 ) 363 364 some_fits_failed_message = ( 365 f"\n{num_failed_fits} fits failed out of a total of {num_fits}.\n" 366 "The score on these train-test partitions for these parameters" 367 f" will be set to {error_score}.\n" 368 "If these failures are not expected, you can try to debug them " 369 "by setting error_score='raise'.\n\n" 370 f"Below are more details about the failures:\n{fit_errors_summary}" 371 ) 372 warnings.warn(some_fits_failed_message, FitFailedWarning) 373 374 375def cross_val_score( 376 estimator, 377 X, 378 y=None, 379 *, 380 groups=None, 381 scoring=None, 382 cv=None, 383 n_jobs=None, 384 verbose=0, 385 fit_params=None, 386 pre_dispatch="2*n_jobs", 387 error_score=np.nan, 388): 389 """Evaluate a score by cross-validation. 390 391 Read more in the :ref:`User Guide <cross_validation>`. 392 393 Parameters 394 ---------- 395 estimator : estimator object implementing 'fit' 396 The object to use to fit the data. 397 398 X : array-like of shape (n_samples, n_features) 399 The data to fit. Can be for example a list, or an array. 400 401 y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ 402 default=None 403 The target variable to try to predict in the case of 404 supervised learning. 405 406 groups : array-like of shape (n_samples,), default=None 407 Group labels for the samples used while splitting the dataset into 408 train/test set. Only used in conjunction with a "Group" :term:`cv` 409 instance (e.g., :class:`GroupKFold`). 410 411 scoring : str or callable, default=None 412 A str (see model evaluation documentation) or 413 a scorer callable object / function with signature 414 ``scorer(estimator, X, y)`` which should return only 415 a single value. 416 417 Similar to :func:`cross_validate` 418 but only a single metric is permitted. 419 420 If `None`, the estimator's default scorer (if available) is used. 421 422 cv : int, cross-validation generator or an iterable, default=None 423 Determines the cross-validation splitting strategy. 424 Possible inputs for cv are: 425 426 - `None`, to use the default 5-fold cross validation, 427 - int, to specify the number of folds in a `(Stratified)KFold`, 428 - :term:`CV splitter`, 429 - An iterable that generates (train, test) splits as arrays of indices. 430 431 For `int`/`None` inputs, if the estimator is a classifier and `y` is 432 either binary or multiclass, :class:`StratifiedKFold` is used. In all 433 other cases, :class:`KFold` is used. These splitters are instantiated 434 with `shuffle=False` so the splits will be the same across calls. 435 436 Refer :ref:`User Guide <cross_validation>` for the various 437 cross-validation strategies that can be used here. 438 439 .. versionchanged:: 0.22 440 `cv` default value if `None` changed from 3-fold to 5-fold. 441 442 n_jobs : int, default=None 443 Number of jobs to run in parallel. Training the estimator and computing 444 the score are parallelized over the cross-validation splits. 445 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 446 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 447 for more details. 448 449 verbose : int, default=0 450 The verbosity level. 451 452 fit_params : dict, default=None 453 Parameters to pass to the fit method of the estimator. 454 455 pre_dispatch : int or str, default='2*n_jobs' 456 Controls the number of jobs that get dispatched during parallel 457 execution. Reducing this number can be useful to avoid an 458 explosion of memory consumption when more jobs get dispatched 459 than CPUs can process. This parameter can be: 460 461 - ``None``, in which case all the jobs are immediately 462 created and spawned. Use this for lightweight and 463 fast-running jobs, to avoid delays due to on-demand 464 spawning of the jobs 465 466 - An int, giving the exact number of total jobs that are 467 spawned 468 469 - A str, giving an expression as a function of n_jobs, 470 as in '2*n_jobs' 471 472 error_score : 'raise' or numeric, default=np.nan 473 Value to assign to the score if an error occurs in estimator fitting. 474 If set to 'raise', the error is raised. 475 If a numeric value is given, FitFailedWarning is raised. 476 477 .. versionadded:: 0.20 478 479 Returns 480 ------- 481 scores : ndarray of float of shape=(len(list(cv)),) 482 Array of scores of the estimator for each run of the cross validation. 483 484 Examples 485 -------- 486 >>> from sklearn import datasets, linear_model 487 >>> from sklearn.model_selection import cross_val_score 488 >>> diabetes = datasets.load_diabetes() 489 >>> X = diabetes.data[:150] 490 >>> y = diabetes.target[:150] 491 >>> lasso = linear_model.Lasso() 492 >>> print(cross_val_score(lasso, X, y, cv=3)) 493 [0.33150734 0.08022311 0.03531764] 494 495 See Also 496 --------- 497 cross_validate : To run cross-validation on multiple metrics and also to 498 return train scores, fit times and score times. 499 500 cross_val_predict : Get predictions from each split of cross-validation for 501 diagnostic purposes. 502 503 sklearn.metrics.make_scorer : Make a scorer from a performance metric or 504 loss function. 505 """ 506 # To ensure multimetric format is not supported 507 scorer = check_scoring(estimator, scoring=scoring) 508 509 cv_results = cross_validate( 510 estimator=estimator, 511 X=X, 512 y=y, 513 groups=groups, 514 scoring={"score": scorer}, 515 cv=cv, 516 n_jobs=n_jobs, 517 verbose=verbose, 518 fit_params=fit_params, 519 pre_dispatch=pre_dispatch, 520 error_score=error_score, 521 ) 522 return cv_results["test_score"] 523 524 525def _fit_and_score( 526 estimator, 527 X, 528 y, 529 scorer, 530 train, 531 test, 532 verbose, 533 parameters, 534 fit_params, 535 return_train_score=False, 536 return_parameters=False, 537 return_n_test_samples=False, 538 return_times=False, 539 return_estimator=False, 540 split_progress=None, 541 candidate_progress=None, 542 error_score=np.nan, 543): 544 545 """Fit estimator and compute scores for a given dataset split. 546 547 Parameters 548 ---------- 549 estimator : estimator object implementing 'fit' 550 The object to use to fit the data. 551 552 X : array-like of shape (n_samples, n_features) 553 The data to fit. 554 555 y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None 556 The target variable to try to predict in the case of 557 supervised learning. 558 559 scorer : A single callable or dict mapping scorer name to the callable 560 If it is a single callable, the return value for ``train_scores`` and 561 ``test_scores`` is a single float. 562 563 For a dict, it should be one mapping the scorer name to the scorer 564 callable object / function. 565 566 The callable object / fn should have signature 567 ``scorer(estimator, X, y)``. 568 569 train : array-like of shape (n_train_samples,) 570 Indices of training samples. 571 572 test : array-like of shape (n_test_samples,) 573 Indices of test samples. 574 575 verbose : int 576 The verbosity level. 577 578 error_score : 'raise' or numeric, default=np.nan 579 Value to assign to the score if an error occurs in estimator fitting. 580 If set to 'raise', the error is raised. 581 If a numeric value is given, FitFailedWarning is raised. 582 583 parameters : dict or None 584 Parameters to be set on the estimator. 585 586 fit_params : dict or None 587 Parameters that will be passed to ``estimator.fit``. 588 589 return_train_score : bool, default=False 590 Compute and return score on training set. 591 592 return_parameters : bool, default=False 593 Return parameters that has been used for the estimator. 594 595 split_progress : {list, tuple} of int, default=None 596 A list or tuple of format (<current_split_id>, <total_num_of_splits>). 597 598 candidate_progress : {list, tuple} of int, default=None 599 A list or tuple of format 600 (<current_candidate_id>, <total_number_of_candidates>). 601 602 return_n_test_samples : bool, default=False 603 Whether to return the ``n_test_samples``. 604 605 return_times : bool, default=False 606 Whether to return the fit/score times. 607 608 return_estimator : bool, default=False 609 Whether to return the fitted estimator. 610 611 Returns 612 ------- 613 result : dict with the following attributes 614 train_scores : dict of scorer name -> float 615 Score on training set (for all the scorers), 616 returned only if `return_train_score` is `True`. 617 test_scores : dict of scorer name -> float 618 Score on testing set (for all the scorers). 619 n_test_samples : int 620 Number of test samples. 621 fit_time : float 622 Time spent for fitting in seconds. 623 score_time : float 624 Time spent for scoring in seconds. 625 parameters : dict or None 626 The parameters that have been evaluated. 627 estimator : estimator object 628 The fitted estimator. 629 fit_error : str or None 630 Traceback str if the fit failed, None if the fit succeeded. 631 """ 632 if not isinstance(error_score, numbers.Number) and error_score != "raise": 633 raise ValueError( 634 "error_score must be the string 'raise' or a numeric value. " 635 "(Hint: if using 'raise', please make sure that it has been " 636 "spelled correctly.)" 637 ) 638 639 progress_msg = "" 640 if verbose > 2: 641 if split_progress is not None: 642 progress_msg = f" {split_progress[0]+1}/{split_progress[1]}" 643 if candidate_progress and verbose > 9: 644 progress_msg += f"; {candidate_progress[0]+1}/{candidate_progress[1]}" 645 646 if verbose > 1: 647 if parameters is None: 648 params_msg = "" 649 else: 650 sorted_keys = sorted(parameters) # Ensure deterministic o/p 651 params_msg = ", ".join(f"{k}={parameters[k]}" for k in sorted_keys) 652 if verbose > 9: 653 start_msg = f"[CV{progress_msg}] START {params_msg}" 654 print(f"{start_msg}{(80 - len(start_msg)) * '.'}") 655 656 # Adjust length of sample weights 657 fit_params = fit_params if fit_params is not None else {} 658 fit_params = _check_fit_params(X, fit_params, train) 659 660 if parameters is not None: 661 # clone after setting parameters in case any parameters 662 # are estimators (like pipeline steps) 663 # because pipeline doesn't clone steps in fit 664 cloned_parameters = {} 665 for k, v in parameters.items(): 666 cloned_parameters[k] = clone(v, safe=False) 667 668 estimator = estimator.set_params(**cloned_parameters) 669 670 start_time = time.time() 671 672 X_train, y_train = _safe_split(estimator, X, y, train) 673 X_test, y_test = _safe_split(estimator, X, y, test, train) 674 675 result = {} 676 try: 677 if y_train is None: 678 estimator.fit(X_train, **fit_params) 679 else: 680 estimator.fit(X_train, y_train, **fit_params) 681 682 except Exception: 683 # Note fit time as time until error 684 fit_time = time.time() - start_time 685 score_time = 0.0 686 if error_score == "raise": 687 raise 688 elif isinstance(error_score, numbers.Number): 689 if isinstance(scorer, dict): 690 test_scores = {name: error_score for name in scorer} 691 if return_train_score: 692 train_scores = test_scores.copy() 693 else: 694 test_scores = error_score 695 if return_train_score: 696 train_scores = error_score 697 result["fit_error"] = format_exc() 698 else: 699 result["fit_error"] = None 700 701 fit_time = time.time() - start_time 702 test_scores = _score(estimator, X_test, y_test, scorer, error_score) 703 score_time = time.time() - start_time - fit_time 704 if return_train_score: 705 train_scores = _score(estimator, X_train, y_train, scorer, error_score) 706 707 if verbose > 1: 708 total_time = score_time + fit_time 709 end_msg = f"[CV{progress_msg}] END " 710 result_msg = params_msg + (";" if params_msg else "") 711 if verbose > 2: 712 if isinstance(test_scores, dict): 713 for scorer_name in sorted(test_scores): 714 result_msg += f" {scorer_name}: (" 715 if return_train_score: 716 scorer_scores = train_scores[scorer_name] 717 result_msg += f"train={scorer_scores:.3f}, " 718 result_msg += f"test={test_scores[scorer_name]:.3f})" 719 else: 720 result_msg += ", score=" 721 if return_train_score: 722 result_msg += f"(train={train_scores:.3f}, test={test_scores:.3f})" 723 else: 724 result_msg += f"{test_scores:.3f}" 725 result_msg += f" total time={logger.short_format_time(total_time)}" 726 727 # Right align the result_msg 728 end_msg += "." * (80 - len(end_msg) - len(result_msg)) 729 end_msg += result_msg 730 print(end_msg) 731 732 result["test_scores"] = test_scores 733 if return_train_score: 734 result["train_scores"] = train_scores 735 if return_n_test_samples: 736 result["n_test_samples"] = _num_samples(X_test) 737 if return_times: 738 result["fit_time"] = fit_time 739 result["score_time"] = score_time 740 if return_parameters: 741 result["parameters"] = parameters 742 if return_estimator: 743 result["estimator"] = estimator 744 return result 745 746 747def _score(estimator, X_test, y_test, scorer, error_score="raise"): 748 """Compute the score(s) of an estimator on a given test set. 749 750 Will return a dict of floats if `scorer` is a dict, otherwise a single 751 float is returned. 752 """ 753 if isinstance(scorer, dict): 754 # will cache method calls if needed. scorer() returns a dict 755 scorer = _MultimetricScorer(**scorer) 756 757 try: 758 if y_test is None: 759 scores = scorer(estimator, X_test) 760 else: 761 scores = scorer(estimator, X_test, y_test) 762 except Exception: 763 if error_score == "raise": 764 raise 765 else: 766 if isinstance(scorer, _MultimetricScorer): 767 scores = {name: error_score for name in scorer._scorers} 768 else: 769 scores = error_score 770 warnings.warn( 771 "Scoring failed. The score on this train-test partition for " 772 f"these parameters will be set to {error_score}. Details: \n" 773 f"{format_exc()}", 774 UserWarning, 775 ) 776 777 error_msg = "scoring must return a number, got %s (%s) instead. (scorer=%s)" 778 if isinstance(scores, dict): 779 for name, score in scores.items(): 780 if hasattr(score, "item"): 781 with suppress(ValueError): 782 # e.g. unwrap memmapped scalars 783 score = score.item() 784 if not isinstance(score, numbers.Number): 785 raise ValueError(error_msg % (score, type(score), name)) 786 scores[name] = score 787 else: # scalar 788 if hasattr(scores, "item"): 789 with suppress(ValueError): 790 # e.g. unwrap memmapped scalars 791 scores = scores.item() 792 if not isinstance(scores, numbers.Number): 793 raise ValueError(error_msg % (scores, type(scores), scorer)) 794 return scores 795 796 797def cross_val_predict( 798 estimator, 799 X, 800 y=None, 801 *, 802 groups=None, 803 cv=None, 804 n_jobs=None, 805 verbose=0, 806 fit_params=None, 807 pre_dispatch="2*n_jobs", 808 method="predict", 809): 810 """Generate cross-validated estimates for each input data point. 811 812 The data is split according to the cv parameter. Each sample belongs 813 to exactly one test set, and its prediction is computed with an 814 estimator fitted on the corresponding training set. 815 816 Passing these predictions into an evaluation metric may not be a valid 817 way to measure generalization performance. Results can differ from 818 :func:`cross_validate` and :func:`cross_val_score` unless all tests sets 819 have equal size and the metric decomposes over samples. 820 821 Read more in the :ref:`User Guide <cross_validation>`. 822 823 Parameters 824 ---------- 825 estimator : estimator object implementing 'fit' and 'predict' 826 The object to use to fit the data. 827 828 X : array-like of shape (n_samples, n_features) 829 The data to fit. Can be, for example a list, or an array at least 2d. 830 831 y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ 832 default=None 833 The target variable to try to predict in the case of 834 supervised learning. 835 836 groups : array-like of shape (n_samples,), default=None 837 Group labels for the samples used while splitting the dataset into 838 train/test set. Only used in conjunction with a "Group" :term:`cv` 839 instance (e.g., :class:`GroupKFold`). 840 841 cv : int, cross-validation generator or an iterable, default=None 842 Determines the cross-validation splitting strategy. 843 Possible inputs for cv are: 844 845 - None, to use the default 5-fold cross validation, 846 - int, to specify the number of folds in a `(Stratified)KFold`, 847 - :term:`CV splitter`, 848 - An iterable that generates (train, test) splits as arrays of indices. 849 850 For int/None inputs, if the estimator is a classifier and ``y`` is 851 either binary or multiclass, :class:`StratifiedKFold` is used. In all 852 other cases, :class:`KFold` is used. These splitters are instantiated 853 with `shuffle=False` so the splits will be the same across calls. 854 855 Refer :ref:`User Guide <cross_validation>` for the various 856 cross-validation strategies that can be used here. 857 858 .. versionchanged:: 0.22 859 ``cv`` default value if None changed from 3-fold to 5-fold. 860 861 n_jobs : int, default=None 862 Number of jobs to run in parallel. Training the estimator and 863 predicting are parallelized over the cross-validation splits. 864 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 865 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 866 for more details. 867 868 verbose : int, default=0 869 The verbosity level. 870 871 fit_params : dict, default=None 872 Parameters to pass to the fit method of the estimator. 873 874 pre_dispatch : int or str, default='2*n_jobs' 875 Controls the number of jobs that get dispatched during parallel 876 execution. Reducing this number can be useful to avoid an 877 explosion of memory consumption when more jobs get dispatched 878 than CPUs can process. This parameter can be: 879 880 - None, in which case all the jobs are immediately 881 created and spawned. Use this for lightweight and 882 fast-running jobs, to avoid delays due to on-demand 883 spawning of the jobs 884 885 - An int, giving the exact number of total jobs that are 886 spawned 887 888 - A str, giving an expression as a function of n_jobs, 889 as in '2*n_jobs' 890 891 method : {'predict', 'predict_proba', 'predict_log_proba', \ 892 'decision_function'}, default='predict' 893 The method to be invoked by `estimator`. 894 895 Returns 896 ------- 897 predictions : ndarray 898 This is the result of calling `method`. Shape: 899 900 - When `method` is 'predict' and in special case where `method` is 901 'decision_function' and the target is binary: (n_samples,) 902 - When `method` is one of {'predict_proba', 'predict_log_proba', 903 'decision_function'} (unless special case above): 904 (n_samples, n_classes) 905 - If `estimator` is :term:`multioutput`, an extra dimension 906 'n_outputs' is added to the end of each shape above. 907 908 See Also 909 -------- 910 cross_val_score : Calculate score for each CV split. 911 cross_validate : Calculate one or more scores and timings for each CV 912 split. 913 914 Notes 915 ----- 916 In the case that one or more classes are absent in a training portion, a 917 default score needs to be assigned to all instances for that class if 918 ``method`` produces columns per class, as in {'decision_function', 919 'predict_proba', 'predict_log_proba'}. For ``predict_proba`` this value is 920 0. In order to ensure finite output, we approximate negative infinity by 921 the minimum finite float value for the dtype in other cases. 922 923 Examples 924 -------- 925 >>> from sklearn import datasets, linear_model 926 >>> from sklearn.model_selection import cross_val_predict 927 >>> diabetes = datasets.load_diabetes() 928 >>> X = diabetes.data[:150] 929 >>> y = diabetes.target[:150] 930 >>> lasso = linear_model.Lasso() 931 >>> y_pred = cross_val_predict(lasso, X, y, cv=3) 932 """ 933 X, y, groups = indexable(X, y, groups) 934 935 cv = check_cv(cv, y, classifier=is_classifier(estimator)) 936 splits = list(cv.split(X, y, groups)) 937 938 test_indices = np.concatenate([test for _, test in splits]) 939 if not _check_is_permutation(test_indices, _num_samples(X)): 940 raise ValueError("cross_val_predict only works for partitions") 941 942 # If classification methods produce multiple columns of output, 943 # we need to manually encode classes to ensure consistent column ordering. 944 encode = ( 945 method in ["decision_function", "predict_proba", "predict_log_proba"] 946 and y is not None 947 ) 948 if encode: 949 y = np.asarray(y) 950 if y.ndim == 1: 951 le = LabelEncoder() 952 y = le.fit_transform(y) 953 elif y.ndim == 2: 954 y_enc = np.zeros_like(y, dtype=int) 955 for i_label in range(y.shape[1]): 956 y_enc[:, i_label] = LabelEncoder().fit_transform(y[:, i_label]) 957 y = y_enc 958 959 # We clone the estimator to make sure that all the folds are 960 # independent, and that it is pickle-able. 961 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch) 962 predictions = parallel( 963 delayed(_fit_and_predict)( 964 clone(estimator), X, y, train, test, verbose, fit_params, method 965 ) 966 for train, test in splits 967 ) 968 969 inv_test_indices = np.empty(len(test_indices), dtype=int) 970 inv_test_indices[test_indices] = np.arange(len(test_indices)) 971 972 if sp.issparse(predictions[0]): 973 predictions = sp.vstack(predictions, format=predictions[0].format) 974 elif encode and isinstance(predictions[0], list): 975 # `predictions` is a list of method outputs from each fold. 976 # If each of those is also a list, then treat this as a 977 # multioutput-multiclass task. We need to separately concatenate 978 # the method outputs for each label into an `n_labels` long list. 979 n_labels = y.shape[1] 980 concat_pred = [] 981 for i_label in range(n_labels): 982 label_preds = np.concatenate([p[i_label] for p in predictions]) 983 concat_pred.append(label_preds) 984 predictions = concat_pred 985 else: 986 predictions = np.concatenate(predictions) 987 988 if isinstance(predictions, list): 989 return [p[inv_test_indices] for p in predictions] 990 else: 991 return predictions[inv_test_indices] 992 993 994def _fit_and_predict(estimator, X, y, train, test, verbose, fit_params, method): 995 """Fit estimator and predict values for a given dataset split. 996 997 Read more in the :ref:`User Guide <cross_validation>`. 998 999 Parameters 1000 ---------- 1001 estimator : estimator object implementing 'fit' and 'predict' 1002 The object to use to fit the data. 1003 1004 X : array-like of shape (n_samples, n_features) 1005 The data to fit. 1006 1007 .. versionchanged:: 0.20 1008 X is only required to be an object with finite length or shape now 1009 1010 y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None 1011 The target variable to try to predict in the case of 1012 supervised learning. 1013 1014 train : array-like of shape (n_train_samples,) 1015 Indices of training samples. 1016 1017 test : array-like of shape (n_test_samples,) 1018 Indices of test samples. 1019 1020 verbose : int 1021 The verbosity level. 1022 1023 fit_params : dict or None 1024 Parameters that will be passed to ``estimator.fit``. 1025 1026 method : str 1027 Invokes the passed method name of the passed estimator. 1028 1029 Returns 1030 ------- 1031 predictions : sequence 1032 Result of calling 'estimator.method' 1033 """ 1034 # Adjust length of sample weights 1035 fit_params = fit_params if fit_params is not None else {} 1036 fit_params = _check_fit_params(X, fit_params, train) 1037 1038 X_train, y_train = _safe_split(estimator, X, y, train) 1039 X_test, _ = _safe_split(estimator, X, y, test, train) 1040 1041 if y_train is None: 1042 estimator.fit(X_train, **fit_params) 1043 else: 1044 estimator.fit(X_train, y_train, **fit_params) 1045 func = getattr(estimator, method) 1046 predictions = func(X_test) 1047 1048 encode = ( 1049 method in ["decision_function", "predict_proba", "predict_log_proba"] 1050 and y is not None 1051 ) 1052 1053 if encode: 1054 if isinstance(predictions, list): 1055 predictions = [ 1056 _enforce_prediction_order( 1057 estimator.classes_[i_label], 1058 predictions[i_label], 1059 n_classes=len(set(y[:, i_label])), 1060 method=method, 1061 ) 1062 for i_label in range(len(predictions)) 1063 ] 1064 else: 1065 # A 2D y array should be a binary label indicator matrix 1066 n_classes = len(set(y)) if y.ndim == 1 else y.shape[1] 1067 predictions = _enforce_prediction_order( 1068 estimator.classes_, predictions, n_classes, method 1069 ) 1070 return predictions 1071 1072 1073def _enforce_prediction_order(classes, predictions, n_classes, method): 1074 """Ensure that prediction arrays have correct column order 1075 1076 When doing cross-validation, if one or more classes are 1077 not present in the subset of data used for training, 1078 then the output prediction array might not have the same 1079 columns as other folds. Use the list of class names 1080 (assumed to be ints) to enforce the correct column order. 1081 1082 Note that `classes` is the list of classes in this fold 1083 (a subset of the classes in the full training set) 1084 and `n_classes` is the number of classes in the full training set. 1085 """ 1086 if n_classes != len(classes): 1087 recommendation = ( 1088 "To fix this, use a cross-validation " 1089 "technique resulting in properly " 1090 "stratified folds" 1091 ) 1092 warnings.warn( 1093 "Number of classes in training fold ({}) does " 1094 "not match total number of classes ({}). " 1095 "Results may not be appropriate for your use case. " 1096 "{}".format(len(classes), n_classes, recommendation), 1097 RuntimeWarning, 1098 ) 1099 if method == "decision_function": 1100 if predictions.ndim == 2 and predictions.shape[1] != len(classes): 1101 # This handles the case when the shape of predictions 1102 # does not match the number of classes used to train 1103 # it with. This case is found when sklearn.svm.SVC is 1104 # set to `decision_function_shape='ovo'`. 1105 raise ValueError( 1106 "Output shape {} of {} does not match " 1107 "number of classes ({}) in fold. " 1108 "Irregular decision_function outputs " 1109 "are not currently supported by " 1110 "cross_val_predict".format(predictions.shape, method, len(classes)) 1111 ) 1112 if len(classes) <= 2: 1113 # In this special case, `predictions` contains a 1D array. 1114 raise ValueError( 1115 "Only {} class/es in training fold, but {} " 1116 "in overall dataset. This " 1117 "is not supported for decision_function " 1118 "with imbalanced folds. {}".format( 1119 len(classes), n_classes, recommendation 1120 ) 1121 ) 1122 1123 float_min = np.finfo(predictions.dtype).min 1124 default_values = { 1125 "decision_function": float_min, 1126 "predict_log_proba": float_min, 1127 "predict_proba": 0, 1128 } 1129 predictions_for_all_classes = np.full( 1130 (_num_samples(predictions), n_classes), 1131 default_values[method], 1132 dtype=predictions.dtype, 1133 ) 1134 predictions_for_all_classes[:, classes] = predictions 1135 predictions = predictions_for_all_classes 1136 return predictions 1137 1138 1139def _check_is_permutation(indices, n_samples): 1140 """Check whether indices is a reordering of the array np.arange(n_samples) 1141 1142 Parameters 1143 ---------- 1144 indices : ndarray 1145 int array to test 1146 n_samples : int 1147 number of expected elements 1148 1149 Returns 1150 ------- 1151 is_partition : bool 1152 True iff sorted(indices) is np.arange(n) 1153 """ 1154 if len(indices) != n_samples: 1155 return False 1156 hit = np.zeros(n_samples, dtype=bool) 1157 hit[indices] = True 1158 if not np.all(hit): 1159 return False 1160 return True 1161 1162 1163def permutation_test_score( 1164 estimator, 1165 X, 1166 y, 1167 *, 1168 groups=None, 1169 cv=None, 1170 n_permutations=100, 1171 n_jobs=None, 1172 random_state=0, 1173 verbose=0, 1174 scoring=None, 1175 fit_params=None, 1176): 1177 """Evaluate the significance of a cross-validated score with permutations 1178 1179 Permutes targets to generate 'randomized data' and compute the empirical 1180 p-value against the null hypothesis that features and targets are 1181 independent. 1182 1183 The p-value represents the fraction of randomized data sets where the 1184 estimator performed as well or better than in the original data. A small 1185 p-value suggests that there is a real dependency between features and 1186 targets which has been used by the estimator to give good predictions. 1187 A large p-value may be due to lack of real dependency between features 1188 and targets or the estimator was not able to use the dependency to 1189 give good predictions. 1190 1191 Read more in the :ref:`User Guide <permutation_test_score>`. 1192 1193 Parameters 1194 ---------- 1195 estimator : estimator object implementing 'fit' 1196 The object to use to fit the data. 1197 1198 X : array-like of shape at least 2D 1199 The data to fit. 1200 1201 y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None 1202 The target variable to try to predict in the case of 1203 supervised learning. 1204 1205 groups : array-like of shape (n_samples,), default=None 1206 Labels to constrain permutation within groups, i.e. ``y`` values 1207 are permuted among samples with the same group identifier. 1208 When not specified, ``y`` values are permuted among all samples. 1209 1210 When a grouped cross-validator is used, the group labels are 1211 also passed on to the ``split`` method of the cross-validator. The 1212 cross-validator uses them for grouping the samples while splitting 1213 the dataset into train/test set. 1214 1215 scoring : str or callable, default=None 1216 A single str (see :ref:`scoring_parameter`) or a callable 1217 (see :ref:`scoring`) to evaluate the predictions on the test set. 1218 1219 If `None` the estimator's score method is used. 1220 1221 cv : int, cross-validation generator or an iterable, default=None 1222 Determines the cross-validation splitting strategy. 1223 Possible inputs for cv are: 1224 1225 - `None`, to use the default 5-fold cross validation, 1226 - int, to specify the number of folds in a `(Stratified)KFold`, 1227 - :term:`CV splitter`, 1228 - An iterable yielding (train, test) splits as arrays of indices. 1229 1230 For `int`/`None` inputs, if the estimator is a classifier and `y` is 1231 either binary or multiclass, :class:`StratifiedKFold` is used. In all 1232 other cases, :class:`KFold` is used. These splitters are instantiated 1233 with `shuffle=False` so the splits will be the same across calls. 1234 1235 Refer :ref:`User Guide <cross_validation>` for the various 1236 cross-validation strategies that can be used here. 1237 1238 .. versionchanged:: 0.22 1239 `cv` default value if `None` changed from 3-fold to 5-fold. 1240 1241 n_permutations : int, default=100 1242 Number of times to permute ``y``. 1243 1244 n_jobs : int, default=None 1245 Number of jobs to run in parallel. Training the estimator and computing 1246 the cross-validated score are parallelized over the permutations. 1247 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 1248 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 1249 for more details. 1250 1251 random_state : int, RandomState instance or None, default=0 1252 Pass an int for reproducible output for permutation of 1253 ``y`` values among samples. See :term:`Glossary <random_state>`. 1254 1255 verbose : int, default=0 1256 The verbosity level. 1257 1258 fit_params : dict, default=None 1259 Parameters to pass to the fit method of the estimator. 1260 1261 .. versionadded:: 0.24 1262 1263 Returns 1264 ------- 1265 score : float 1266 The true score without permuting targets. 1267 1268 permutation_scores : array of shape (n_permutations,) 1269 The scores obtained for each permutations. 1270 1271 pvalue : float 1272 The p-value, which approximates the probability that the score would 1273 be obtained by chance. This is calculated as: 1274 1275 `(C + 1) / (n_permutations + 1)` 1276 1277 Where C is the number of permutations whose score >= the true score. 1278 1279 The best possible p-value is 1/(n_permutations + 1), the worst is 1.0. 1280 1281 Notes 1282 ----- 1283 This function implements Test 1 in: 1284 1285 Ojala and Garriga. `Permutation Tests for Studying Classifier 1286 Performance 1287 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_. The 1288 Journal of Machine Learning Research (2010) vol. 11 1289 1290 """ 1291 X, y, groups = indexable(X, y, groups) 1292 1293 cv = check_cv(cv, y, classifier=is_classifier(estimator)) 1294 scorer = check_scoring(estimator, scoring=scoring) 1295 random_state = check_random_state(random_state) 1296 1297 # We clone the estimator to make sure that all the folds are 1298 # independent, and that it is pickle-able. 1299 score = _permutation_test_score( 1300 clone(estimator), X, y, groups, cv, scorer, fit_params=fit_params 1301 ) 1302 permutation_scores = Parallel(n_jobs=n_jobs, verbose=verbose)( 1303 delayed(_permutation_test_score)( 1304 clone(estimator), 1305 X, 1306 _shuffle(y, groups, random_state), 1307 groups, 1308 cv, 1309 scorer, 1310 fit_params=fit_params, 1311 ) 1312 for _ in range(n_permutations) 1313 ) 1314 permutation_scores = np.array(permutation_scores) 1315 pvalue = (np.sum(permutation_scores >= score) + 1.0) / (n_permutations + 1) 1316 return score, permutation_scores, pvalue 1317 1318 1319def _permutation_test_score(estimator, X, y, groups, cv, scorer, fit_params): 1320 """Auxiliary function for permutation_test_score""" 1321 # Adjust length of sample weights 1322 fit_params = fit_params if fit_params is not None else {} 1323 avg_score = [] 1324 for train, test in cv.split(X, y, groups): 1325 X_train, y_train = _safe_split(estimator, X, y, train) 1326 X_test, y_test = _safe_split(estimator, X, y, test, train) 1327 fit_params = _check_fit_params(X, fit_params, train) 1328 estimator.fit(X_train, y_train, **fit_params) 1329 avg_score.append(scorer(estimator, X_test, y_test)) 1330 return np.mean(avg_score) 1331 1332 1333def _shuffle(y, groups, random_state): 1334 """Return a shuffled copy of y eventually shuffle among same groups.""" 1335 if groups is None: 1336 indices = random_state.permutation(len(y)) 1337 else: 1338 indices = np.arange(len(groups)) 1339 for group in np.unique(groups): 1340 this_mask = groups == group 1341 indices[this_mask] = random_state.permutation(indices[this_mask]) 1342 return _safe_indexing(y, indices) 1343 1344 1345def learning_curve( 1346 estimator, 1347 X, 1348 y, 1349 *, 1350 groups=None, 1351 train_sizes=np.linspace(0.1, 1.0, 5), 1352 cv=None, 1353 scoring=None, 1354 exploit_incremental_learning=False, 1355 n_jobs=None, 1356 pre_dispatch="all", 1357 verbose=0, 1358 shuffle=False, 1359 random_state=None, 1360 error_score=np.nan, 1361 return_times=False, 1362 fit_params=None, 1363): 1364 """Learning curve. 1365 1366 Determines cross-validated training and test scores for different training 1367 set sizes. 1368 1369 A cross-validation generator splits the whole dataset k times in training 1370 and test data. Subsets of the training set with varying sizes will be used 1371 to train the estimator and a score for each training subset size and the 1372 test set will be computed. Afterwards, the scores will be averaged over 1373 all k runs for each training subset size. 1374 1375 Read more in the :ref:`User Guide <learning_curve>`. 1376 1377 Parameters 1378 ---------- 1379 estimator : object type that implements the "fit" and "predict" methods 1380 An object of that type which is cloned for each validation. 1381 1382 X : array-like of shape (n_samples, n_features) 1383 Training vector, where `n_samples` is the number of samples and 1384 `n_features` is the number of features. 1385 1386 y : array-like of shape (n_samples,) or (n_samples, n_outputs) 1387 Target relative to X for classification or regression; 1388 None for unsupervised learning. 1389 1390 groups : array-like of shape (n_samples,), default=None 1391 Group labels for the samples used while splitting the dataset into 1392 train/test set. Only used in conjunction with a "Group" :term:`cv` 1393 instance (e.g., :class:`GroupKFold`). 1394 1395 train_sizes : array-like of shape (n_ticks,), \ 1396 default=np.linspace(0.1, 1.0, 5) 1397 Relative or absolute numbers of training examples that will be used to 1398 generate the learning curve. If the dtype is float, it is regarded as a 1399 fraction of the maximum size of the training set (that is determined 1400 by the selected validation method), i.e. it has to be within (0, 1]. 1401 Otherwise it is interpreted as absolute sizes of the training sets. 1402 Note that for classification the number of samples usually have to 1403 be big enough to contain at least one sample from each class. 1404 1405 cv : int, cross-validation generator or an iterable, default=None 1406 Determines the cross-validation splitting strategy. 1407 Possible inputs for cv are: 1408 1409 - None, to use the default 5-fold cross validation, 1410 - int, to specify the number of folds in a `(Stratified)KFold`, 1411 - :term:`CV splitter`, 1412 - An iterable yielding (train, test) splits as arrays of indices. 1413 1414 For int/None inputs, if the estimator is a classifier and ``y`` is 1415 either binary or multiclass, :class:`StratifiedKFold` is used. In all 1416 other cases, :class:`KFold` is used. These splitters are instantiated 1417 with `shuffle=False` so the splits will be the same across calls. 1418 1419 Refer :ref:`User Guide <cross_validation>` for the various 1420 cross-validation strategies that can be used here. 1421 1422 .. versionchanged:: 0.22 1423 ``cv`` default value if None changed from 3-fold to 5-fold. 1424 1425 scoring : str or callable, default=None 1426 A str (see model evaluation documentation) or 1427 a scorer callable object / function with signature 1428 ``scorer(estimator, X, y)``. 1429 1430 exploit_incremental_learning : bool, default=False 1431 If the estimator supports incremental learning, this will be 1432 used to speed up fitting for different training set sizes. 1433 1434 n_jobs : int, default=None 1435 Number of jobs to run in parallel. Training the estimator and computing 1436 the score are parallelized over the different training and test sets. 1437 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 1438 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 1439 for more details. 1440 1441 pre_dispatch : int or str, default='all' 1442 Number of predispatched jobs for parallel execution (default is 1443 all). The option can reduce the allocated memory. The str can 1444 be an expression like '2*n_jobs'. 1445 1446 verbose : int, default=0 1447 Controls the verbosity: the higher, the more messages. 1448 1449 shuffle : bool, default=False 1450 Whether to shuffle training data before taking prefixes of it 1451 based on``train_sizes``. 1452 1453 random_state : int, RandomState instance or None, default=None 1454 Used when ``shuffle`` is True. Pass an int for reproducible 1455 output across multiple function calls. 1456 See :term:`Glossary <random_state>`. 1457 1458 error_score : 'raise' or numeric, default=np.nan 1459 Value to assign to the score if an error occurs in estimator fitting. 1460 If set to 'raise', the error is raised. 1461 If a numeric value is given, FitFailedWarning is raised. 1462 1463 .. versionadded:: 0.20 1464 1465 return_times : bool, default=False 1466 Whether to return the fit and score times. 1467 1468 fit_params : dict, default=None 1469 Parameters to pass to the fit method of the estimator. 1470 1471 .. versionadded:: 0.24 1472 1473 Returns 1474 ------- 1475 train_sizes_abs : array of shape (n_unique_ticks,) 1476 Numbers of training examples that has been used to generate the 1477 learning curve. Note that the number of ticks might be less 1478 than n_ticks because duplicate entries will be removed. 1479 1480 train_scores : array of shape (n_ticks, n_cv_folds) 1481 Scores on training sets. 1482 1483 test_scores : array of shape (n_ticks, n_cv_folds) 1484 Scores on test set. 1485 1486 fit_times : array of shape (n_ticks, n_cv_folds) 1487 Times spent for fitting in seconds. Only present if ``return_times`` 1488 is True. 1489 1490 score_times : array of shape (n_ticks, n_cv_folds) 1491 Times spent for scoring in seconds. Only present if ``return_times`` 1492 is True. 1493 1494 Notes 1495 ----- 1496 See :ref:`examples/model_selection/plot_learning_curve.py 1497 <sphx_glr_auto_examples_model_selection_plot_learning_curve.py>` 1498 """ 1499 if exploit_incremental_learning and not hasattr(estimator, "partial_fit"): 1500 raise ValueError( 1501 "An estimator must support the partial_fit interface " 1502 "to exploit incremental learning" 1503 ) 1504 X, y, groups = indexable(X, y, groups) 1505 1506 cv = check_cv(cv, y, classifier=is_classifier(estimator)) 1507 # Store it as list as we will be iterating over the list multiple times 1508 cv_iter = list(cv.split(X, y, groups)) 1509 1510 scorer = check_scoring(estimator, scoring=scoring) 1511 1512 n_max_training_samples = len(cv_iter[0][0]) 1513 # Because the lengths of folds can be significantly different, it is 1514 # not guaranteed that we use all of the available training data when we 1515 # use the first 'n_max_training_samples' samples. 1516 train_sizes_abs = _translate_train_sizes(train_sizes, n_max_training_samples) 1517 n_unique_ticks = train_sizes_abs.shape[0] 1518 if verbose > 0: 1519 print("[learning_curve] Training set sizes: " + str(train_sizes_abs)) 1520 1521 parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) 1522 1523 if shuffle: 1524 rng = check_random_state(random_state) 1525 cv_iter = ((rng.permutation(train), test) for train, test in cv_iter) 1526 1527 if exploit_incremental_learning: 1528 classes = np.unique(y) if is_classifier(estimator) else None 1529 out = parallel( 1530 delayed(_incremental_fit_estimator)( 1531 clone(estimator), 1532 X, 1533 y, 1534 classes, 1535 train, 1536 test, 1537 train_sizes_abs, 1538 scorer, 1539 verbose, 1540 return_times, 1541 error_score=error_score, 1542 fit_params=fit_params, 1543 ) 1544 for train, test in cv_iter 1545 ) 1546 out = np.asarray(out).transpose((2, 1, 0)) 1547 else: 1548 train_test_proportions = [] 1549 for train, test in cv_iter: 1550 for n_train_samples in train_sizes_abs: 1551 train_test_proportions.append((train[:n_train_samples], test)) 1552 1553 results = parallel( 1554 delayed(_fit_and_score)( 1555 clone(estimator), 1556 X, 1557 y, 1558 scorer, 1559 train, 1560 test, 1561 verbose, 1562 parameters=None, 1563 fit_params=fit_params, 1564 return_train_score=True, 1565 error_score=error_score, 1566 return_times=return_times, 1567 ) 1568 for train, test in train_test_proportions 1569 ) 1570 results = _aggregate_score_dicts(results) 1571 train_scores = results["train_scores"].reshape(-1, n_unique_ticks).T 1572 test_scores = results["test_scores"].reshape(-1, n_unique_ticks).T 1573 out = [train_scores, test_scores] 1574 1575 if return_times: 1576 fit_times = results["fit_time"].reshape(-1, n_unique_ticks).T 1577 score_times = results["score_time"].reshape(-1, n_unique_ticks).T 1578 out.extend([fit_times, score_times]) 1579 1580 ret = train_sizes_abs, out[0], out[1] 1581 1582 if return_times: 1583 ret = ret + (out[2], out[3]) 1584 1585 return ret 1586 1587 1588def _translate_train_sizes(train_sizes, n_max_training_samples): 1589 """Determine absolute sizes of training subsets and validate 'train_sizes'. 1590 1591 Examples: 1592 _translate_train_sizes([0.5, 1.0], 10) -> [5, 10] 1593 _translate_train_sizes([5, 10], 10) -> [5, 10] 1594 1595 Parameters 1596 ---------- 1597 train_sizes : array-like of shape (n_ticks,) 1598 Numbers of training examples that will be used to generate the 1599 learning curve. If the dtype is float, it is regarded as a 1600 fraction of 'n_max_training_samples', i.e. it has to be within (0, 1]. 1601 1602 n_max_training_samples : int 1603 Maximum number of training samples (upper bound of 'train_sizes'). 1604 1605 Returns 1606 ------- 1607 train_sizes_abs : array of shape (n_unique_ticks,) 1608 Numbers of training examples that will be used to generate the 1609 learning curve. Note that the number of ticks might be less 1610 than n_ticks because duplicate entries will be removed. 1611 """ 1612 train_sizes_abs = np.asarray(train_sizes) 1613 n_ticks = train_sizes_abs.shape[0] 1614 n_min_required_samples = np.min(train_sizes_abs) 1615 n_max_required_samples = np.max(train_sizes_abs) 1616 if np.issubdtype(train_sizes_abs.dtype, np.floating): 1617 if n_min_required_samples <= 0.0 or n_max_required_samples > 1.0: 1618 raise ValueError( 1619 "train_sizes has been interpreted as fractions " 1620 "of the maximum number of training samples and " 1621 "must be within (0, 1], but is within [%f, %f]." 1622 % (n_min_required_samples, n_max_required_samples) 1623 ) 1624 train_sizes_abs = (train_sizes_abs * n_max_training_samples).astype( 1625 dtype=int, copy=False 1626 ) 1627 train_sizes_abs = np.clip(train_sizes_abs, 1, n_max_training_samples) 1628 else: 1629 if ( 1630 n_min_required_samples <= 0 1631 or n_max_required_samples > n_max_training_samples 1632 ): 1633 raise ValueError( 1634 "train_sizes has been interpreted as absolute " 1635 "numbers of training samples and must be within " 1636 "(0, %d], but is within [%d, %d]." 1637 % ( 1638 n_max_training_samples, 1639 n_min_required_samples, 1640 n_max_required_samples, 1641 ) 1642 ) 1643 1644 train_sizes_abs = np.unique(train_sizes_abs) 1645 if n_ticks > train_sizes_abs.shape[0]: 1646 warnings.warn( 1647 "Removed duplicate entries from 'train_sizes'. Number " 1648 "of ticks will be less than the size of " 1649 "'train_sizes': %d instead of %d." % (train_sizes_abs.shape[0], n_ticks), 1650 RuntimeWarning, 1651 ) 1652 1653 return train_sizes_abs 1654 1655 1656def _incremental_fit_estimator( 1657 estimator, 1658 X, 1659 y, 1660 classes, 1661 train, 1662 test, 1663 train_sizes, 1664 scorer, 1665 verbose, 1666 return_times, 1667 error_score, 1668 fit_params, 1669): 1670 """Train estimator on training subsets incrementally and compute scores.""" 1671 train_scores, test_scores, fit_times, score_times = [], [], [], [] 1672 partitions = zip(train_sizes, np.split(train, train_sizes)[:-1]) 1673 if fit_params is None: 1674 fit_params = {} 1675 for n_train_samples, partial_train in partitions: 1676 train_subset = train[:n_train_samples] 1677 X_train, y_train = _safe_split(estimator, X, y, train_subset) 1678 X_partial_train, y_partial_train = _safe_split(estimator, X, y, partial_train) 1679 X_test, y_test = _safe_split(estimator, X, y, test, train_subset) 1680 start_fit = time.time() 1681 if y_partial_train is None: 1682 estimator.partial_fit(X_partial_train, classes=classes, **fit_params) 1683 else: 1684 estimator.partial_fit( 1685 X_partial_train, y_partial_train, classes=classes, **fit_params 1686 ) 1687 fit_time = time.time() - start_fit 1688 fit_times.append(fit_time) 1689 1690 start_score = time.time() 1691 1692 test_scores.append(_score(estimator, X_test, y_test, scorer, error_score)) 1693 train_scores.append(_score(estimator, X_train, y_train, scorer, error_score)) 1694 1695 score_time = time.time() - start_score 1696 score_times.append(score_time) 1697 1698 ret = ( 1699 (train_scores, test_scores, fit_times, score_times) 1700 if return_times 1701 else (train_scores, test_scores) 1702 ) 1703 1704 return np.array(ret).T 1705 1706 1707def validation_curve( 1708 estimator, 1709 X, 1710 y, 1711 *, 1712 param_name, 1713 param_range, 1714 groups=None, 1715 cv=None, 1716 scoring=None, 1717 n_jobs=None, 1718 pre_dispatch="all", 1719 verbose=0, 1720 error_score=np.nan, 1721 fit_params=None, 1722): 1723 """Validation curve. 1724 1725 Determine training and test scores for varying parameter values. 1726 1727 Compute scores for an estimator with different values of a specified 1728 parameter. This is similar to grid search with one parameter. However, this 1729 will also compute training scores and is merely a utility for plotting the 1730 results. 1731 1732 Read more in the :ref:`User Guide <validation_curve>`. 1733 1734 Parameters 1735 ---------- 1736 estimator : object type that implements the "fit" and "predict" methods 1737 An object of that type which is cloned for each validation. 1738 1739 X : array-like of shape (n_samples, n_features) 1740 Training vector, where `n_samples` is the number of samples and 1741 `n_features` is the number of features. 1742 1743 y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None 1744 Target relative to X for classification or regression; 1745 None for unsupervised learning. 1746 1747 param_name : str 1748 Name of the parameter that will be varied. 1749 1750 param_range : array-like of shape (n_values,) 1751 The values of the parameter that will be evaluated. 1752 1753 groups : array-like of shape (n_samples,), default=None 1754 Group labels for the samples used while splitting the dataset into 1755 train/test set. Only used in conjunction with a "Group" :term:`cv` 1756 instance (e.g., :class:`GroupKFold`). 1757 1758 cv : int, cross-validation generator or an iterable, default=None 1759 Determines the cross-validation splitting strategy. 1760 Possible inputs for cv are: 1761 1762 - None, to use the default 5-fold cross validation, 1763 - int, to specify the number of folds in a `(Stratified)KFold`, 1764 - :term:`CV splitter`, 1765 - An iterable yielding (train, test) splits as arrays of indices. 1766 1767 For int/None inputs, if the estimator is a classifier and ``y`` is 1768 either binary or multiclass, :class:`StratifiedKFold` is used. In all 1769 other cases, :class:`KFold` is used. These splitters are instantiated 1770 with `shuffle=False` so the splits will be the same across calls. 1771 1772 Refer :ref:`User Guide <cross_validation>` for the various 1773 cross-validation strategies that can be used here. 1774 1775 .. versionchanged:: 0.22 1776 ``cv`` default value if None changed from 3-fold to 5-fold. 1777 1778 scoring : str or callable, default=None 1779 A str (see model evaluation documentation) or 1780 a scorer callable object / function with signature 1781 ``scorer(estimator, X, y)``. 1782 1783 n_jobs : int, default=None 1784 Number of jobs to run in parallel. Training the estimator and computing 1785 the score are parallelized over the combinations of each parameter 1786 value and each cross-validation split. 1787 ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. 1788 ``-1`` means using all processors. See :term:`Glossary <n_jobs>` 1789 for more details. 1790 1791 pre_dispatch : int or str, default='all' 1792 Number of predispatched jobs for parallel execution (default is 1793 all). The option can reduce the allocated memory. The str can 1794 be an expression like '2*n_jobs'. 1795 1796 verbose : int, default=0 1797 Controls the verbosity: the higher, the more messages. 1798 1799 fit_params : dict, default=None 1800 Parameters to pass to the fit method of the estimator. 1801 1802 .. versionadded:: 0.24 1803 1804 error_score : 'raise' or numeric, default=np.nan 1805 Value to assign to the score if an error occurs in estimator fitting. 1806 If set to 'raise', the error is raised. 1807 If a numeric value is given, FitFailedWarning is raised. 1808 1809 .. versionadded:: 0.20 1810 1811 Returns 1812 ------- 1813 train_scores : array of shape (n_ticks, n_cv_folds) 1814 Scores on training sets. 1815 1816 test_scores : array of shape (n_ticks, n_cv_folds) 1817 Scores on test set. 1818 1819 Notes 1820 ----- 1821 See :ref:`sphx_glr_auto_examples_model_selection_plot_validation_curve.py` 1822 1823 """ 1824 X, y, groups = indexable(X, y, groups) 1825 1826 cv = check_cv(cv, y, classifier=is_classifier(estimator)) 1827 scorer = check_scoring(estimator, scoring=scoring) 1828 1829 parallel = Parallel(n_jobs=n_jobs, pre_dispatch=pre_dispatch, verbose=verbose) 1830 results = parallel( 1831 delayed(_fit_and_score)( 1832 clone(estimator), 1833 X, 1834 y, 1835 scorer, 1836 train, 1837 test, 1838 verbose, 1839 parameters={param_name: v}, 1840 fit_params=fit_params, 1841 return_train_score=True, 1842 error_score=error_score, 1843 ) 1844 # NOTE do not change order of iteration to allow one time cv splitters 1845 for train, test in cv.split(X, y, groups) 1846 for v in param_range 1847 ) 1848 n_params = len(param_range) 1849 1850 results = _aggregate_score_dicts(results) 1851 train_scores = results["train_scores"].reshape(-1, n_params).T 1852 test_scores = results["test_scores"].reshape(-1, n_params).T 1853 1854 return train_scores, test_scores 1855 1856 1857def _aggregate_score_dicts(scores): 1858 """Aggregate the list of dict to dict of np ndarray 1859 1860 The aggregated output of _aggregate_score_dicts will be a list of dict 1861 of form [{'prec': 0.1, 'acc':1.0}, {'prec': 0.1, 'acc':1.0}, ...] 1862 Convert it to a dict of array {'prec': np.array([0.1 ...]), ...} 1863 1864 Parameters 1865 ---------- 1866 1867 scores : list of dict 1868 List of dicts of the scores for all scorers. This is a flat list, 1869 assumed originally to be of row major order. 1870 1871 Example 1872 ------- 1873 1874 >>> scores = [{'a': 1, 'b':10}, {'a': 2, 'b':2}, {'a': 3, 'b':3}, 1875 ... {'a': 10, 'b': 10}] # doctest: +SKIP 1876 >>> _aggregate_score_dicts(scores) # doctest: +SKIP 1877 {'a': array([1, 2, 3, 10]), 1878 'b': array([10, 2, 3, 10])} 1879 """ 1880 return { 1881 key: np.asarray([score[key] for score in scores]) 1882 if isinstance(scores[0][key], numbers.Number) 1883 else [score[key] for score in scores] 1884 for key in scores[0] 1885 } 1886