1from logging import DEBUG
2from logging import INFO
3from logging import WARNING
4from numbers import Integral
5from numbers import Number
6from time import time
7from typing import Any
8from typing import Callable
9from typing import Dict
10from typing import Iterable
11from typing import List
12from typing import Mapping
13from typing import Optional
14from typing import Union
15
16import numpy as np
17import scipy as sp
18from scipy.sparse import spmatrix
19
20from optuna import distributions
21from optuna import logging
22from optuna import samplers
23from optuna import study as study_module
24from optuna import TrialPruned
25from optuna._experimental import experimental
26from optuna._imports import try_import
27from optuna.study import StudyDirection
28from optuna.trial import FrozenTrial
29from optuna.trial import Trial
30
31
32with try_import() as _imports:
33    import pandas as pd
34    import sklearn
35    from sklearn.base import BaseEstimator
36    from sklearn.base import clone
37    from sklearn.base import is_classifier
38    from sklearn.metrics import check_scoring
39    from sklearn.model_selection import BaseCrossValidator
40    from sklearn.model_selection import check_cv
41    from sklearn.model_selection import cross_validate
42    from sklearn.utils import check_random_state
43    from sklearn.utils.metaestimators import _safe_split
44
45    if sklearn.__version__ >= "0.22":
46        from sklearn.utils import _safe_indexing as sklearn_safe_indexing
47    else:
48        from sklearn.utils import safe_indexing as sklearn_safe_indexing
49    from sklearn.utils.validation import check_is_fitted
50
51if not _imports.is_successful():
52    BaseEstimator = object  # NOQA
53
54ArrayLikeType = Union[List, np.ndarray, "pd.Series", spmatrix]
55OneDimArrayLikeType = Union[List[float], np.ndarray, "pd.Series"]
56TwoDimArrayLikeType = Union[List[List[float]], np.ndarray, "pd.DataFrame", spmatrix]
57IterableType = Union[List, "pd.DataFrame", np.ndarray, "pd.Series", spmatrix, None]
58IndexableType = Union[Iterable, None]
59
60_logger = logging.get_logger(__name__)
61
62
63def _check_fit_params(
64    X: TwoDimArrayLikeType, fit_params: Dict, indices: OneDimArrayLikeType
65) -> Dict:
66
67    fit_params_validated = {}
68    for key, value in fit_params.items():
69
70        # NOTE Original implementation:
71        # https://github.com/scikit-learn/scikit-learn/blob/ \
72        # 2467e1b84aeb493a22533fa15ff92e0d7c05ed1c/sklearn/utils/validation.py#L1324-L1328
73        # Scikit-learn does not accept non-iterable inputs.
74        # This line is for keeping backward compatibility.
75        # (See: https://github.com/scikit-learn/scikit-learn/issues/15805)
76        if not _is_arraylike(value) or _num_samples(value) != _num_samples(X):
77            fit_params_validated[key] = value
78        else:
79            fit_params_validated[key] = _make_indexable(value)
80            fit_params_validated[key] = _safe_indexing(fit_params_validated[key], indices)
81    return fit_params_validated
82
83
84# NOTE Original implementation:
85# https://github.com/scikit-learn/scikit-learn/blob/ \
86# 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L131-L135
87def _is_arraylike(x: Any) -> bool:
88
89    return hasattr(x, "__len__") or hasattr(x, "shape") or hasattr(x, "__array__")
90
91
92# NOTE Original implementation:
93# https://github.com/scikit-learn/scikit-learn/blob/ \
94# 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L217-L234
95def _make_indexable(iterable: IterableType) -> IndexableType:
96
97    tocsr_func = getattr(iterable, "tocsr", None)
98    if tocsr_func is not None and sp.sparse.issparse(iterable):
99        return tocsr_func(iterable)
100    elif hasattr(iterable, "__getitem__") or hasattr(iterable, "iloc"):
101        return iterable
102    elif iterable is None:
103        return iterable
104    return np.array(iterable)
105
106
107def _num_samples(x: ArrayLikeType) -> int:
108
109    # NOTE For dask dataframes
110    # https://github.com/scikit-learn/scikit-learn/blob/ \
111    # 8caa93889f85254fc3ca84caa0a24a1640eebdd1/sklearn/utils/validation.py#L155-L158
112    x_shape = getattr(x, "shape", None)
113    if x_shape is not None:
114        if isinstance(x_shape[0], Integral):
115            return int(x_shape[0])
116
117    try:
118        return len(x)
119    except TypeError:
120        raise TypeError("Expected sequence or array-like, got %s." % type(x)) from None
121
122
123def _safe_indexing(
124    X: Union[OneDimArrayLikeType, TwoDimArrayLikeType], indices: OneDimArrayLikeType
125) -> Union[OneDimArrayLikeType, TwoDimArrayLikeType]:
126
127    if X is None:
128        return X
129
130    return sklearn_safe_indexing(X, indices)
131
132
133class _Objective(object):
134    """Callable that implements objective function.
135
136    Args:
137        estimator:
138            Object to use to fit the data. This is assumed to implement the
139            scikit-learn estimator interface. Either this needs to provide
140            ``score``, or ``scoring`` must be passed.
141
142        param_distributions:
143            Dictionary where keys are parameters and values are distributions.
144            Distributions are assumed to implement the optuna distribution
145            interface.
146
147        X:
148            Training data.
149
150        y:
151            Target variable.
152
153        cv:
154            Cross-validation strategy.
155
156        enable_pruning:
157            If :obj:`True`, pruning is performed in the case where the
158            underlying estimator supports ``partial_fit``.
159
160        error_score:
161            Value to assign to the score if an error occurs in fitting. If
162            'raise', the error is raised. If numeric,
163            ``sklearn.exceptions.FitFailedWarning`` is raised. This does not
164            affect the refit step, which will always raise the error.
165
166        fit_params:
167            Parameters passed to ``fit`` one the estimator.
168
169        groups:
170            Group labels for the samples used while splitting the dataset into
171            train/validation set.
172
173        max_iter:
174            Maximum number of epochs. This is only used if the underlying
175            estimator supports ``partial_fit``.
176
177        return_train_score:
178            If :obj:`True`, training scores will be included. Computing
179            training scores is used to get insights on how different
180            hyperparameter settings impact the overfitting/underfitting
181            trade-off. However computing training scores can be
182            computationally expensive and is not strictly required to select
183            the hyperparameters that yield the best generalization
184            performance.
185
186        scoring:
187            Scorer function.
188    """
189
190    def __init__(
191        self,
192        estimator: "BaseEstimator",
193        param_distributions: Mapping[str, distributions.BaseDistribution],
194        X: TwoDimArrayLikeType,
195        y: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]],
196        cv: "BaseCrossValidator",
197        enable_pruning: bool,
198        error_score: Union[Number, float, str],
199        fit_params: Dict[str, Any],
200        groups: Optional[OneDimArrayLikeType],
201        max_iter: int,
202        return_train_score: bool,
203        scoring: Callable[..., Number],
204    ) -> None:
205
206        self.cv = cv
207        self.enable_pruning = enable_pruning
208        self.error_score = error_score
209        self.estimator = estimator
210        self.fit_params = fit_params
211        self.groups = groups
212        self.max_iter = max_iter
213        self.param_distributions = param_distributions
214        self.return_train_score = return_train_score
215        self.scoring = scoring
216        self.X = X
217        self.y = y
218
219    def __call__(self, trial: Trial) -> float:
220
221        estimator = clone(self.estimator)
222        params = self._get_params(trial)
223
224        estimator.set_params(**params)
225
226        if self.enable_pruning:
227            scores = self._cross_validate_with_pruning(trial, estimator)
228        else:
229            scores = cross_validate(
230                estimator,
231                self.X,
232                self.y,
233                cv=self.cv,
234                error_score=self.error_score,
235                fit_params=self.fit_params,
236                groups=self.groups,
237                return_train_score=self.return_train_score,
238                scoring=self.scoring,
239            )
240
241        self._store_scores(trial, scores)
242
243        return trial.user_attrs["mean_test_score"]
244
245    def _cross_validate_with_pruning(
246        self, trial: Trial, estimator: "BaseEstimator"
247    ) -> Mapping[str, OneDimArrayLikeType]:
248
249        if is_classifier(estimator):
250            partial_fit_params = self.fit_params.copy()
251            classes = np.unique(self.y)
252
253            partial_fit_params.setdefault("classes", classes)
254
255        else:
256            partial_fit_params = self.fit_params
257
258        n_splits = self.cv.get_n_splits(self.X, self.y, groups=self.groups)
259        estimators = [clone(estimator) for _ in range(n_splits)]
260        scores = {
261            "fit_time": np.zeros(n_splits),
262            "score_time": np.zeros(n_splits),
263            "test_score": np.empty(n_splits),
264        }
265
266        if self.return_train_score:
267            scores["train_score"] = np.empty(n_splits)
268
269        for step in range(self.max_iter):
270            for i, (train, test) in enumerate(self.cv.split(self.X, self.y, groups=self.groups)):
271                out = self._partial_fit_and_score(estimators[i], train, test, partial_fit_params)
272
273                if self.return_train_score:
274                    scores["train_score"][i] = out.pop(0)
275
276                scores["test_score"][i] = out[0]
277                scores["fit_time"][i] += out[1]
278                scores["score_time"][i] += out[2]
279
280            intermediate_value = np.nanmean(scores["test_score"])
281
282            trial.report(intermediate_value, step=step)
283
284            if trial.should_prune():
285                self._store_scores(trial, scores)
286
287                raise TrialPruned("trial was pruned at iteration {}.".format(step))
288
289        return scores
290
291    def _get_params(self, trial: Trial) -> Dict[str, Any]:
292
293        return {
294            name: trial._suggest(name, distribution)
295            for name, distribution in self.param_distributions.items()
296        }
297
298    def _partial_fit_and_score(
299        self,
300        estimator: "BaseEstimator",
301        train: List[int],
302        test: List[int],
303        partial_fit_params: Dict[str, Any],
304    ) -> List[Number]:
305
306        X_train, y_train = _safe_split(estimator, self.X, self.y, train)
307        X_test, y_test = _safe_split(estimator, self.X, self.y, test, train_indices=train)
308
309        start_time = time()
310
311        try:
312            estimator.partial_fit(X_train, y_train, **partial_fit_params)
313
314        except Exception as e:
315            if self.error_score == "raise":
316                raise e
317
318            elif isinstance(self.error_score, Number):
319                fit_time = time() - start_time
320                test_score = self.error_score
321                score_time = 0.0
322
323                if self.return_train_score:
324                    train_score = self.error_score
325
326            else:
327                raise ValueError("error_score must be 'raise' or numeric.") from e
328
329        else:
330            fit_time = time() - start_time
331            test_score = self.scoring(estimator, X_test, y_test)
332            score_time = time() - fit_time - start_time
333
334            if self.return_train_score:
335                train_score = self.scoring(estimator, X_train, y_train)
336
337        # Required for type checking but is never expected to fail.
338        assert isinstance(fit_time, Number)
339        assert isinstance(score_time, Number)
340
341        ret = [test_score, fit_time, score_time]
342
343        if self.return_train_score:
344            ret.insert(0, train_score)
345
346        return ret
347
348    def _store_scores(self, trial: Trial, scores: Mapping[str, OneDimArrayLikeType]) -> None:
349
350        for name, array in scores.items():
351            if name in ["test_score", "train_score"]:
352                for i, score in enumerate(array):
353                    trial.set_user_attr("split{}_{}".format(i, name), score)
354
355            trial.set_user_attr("mean_{}".format(name), np.nanmean(array))
356            trial.set_user_attr("std_{}".format(name), np.nanstd(array))
357
358
359@experimental("0.17.0")
360class OptunaSearchCV(BaseEstimator):
361    """Hyperparameter search with cross-validation.
362
363    Args:
364        estimator:
365            Object to use to fit the data. This is assumed to implement the
366            scikit-learn estimator interface. Either this needs to provide
367            ``score``, or ``scoring`` must be passed.
368
369        param_distributions:
370            Dictionary where keys are parameters and values are distributions.
371            Distributions are assumed to implement the optuna distribution
372            interface.
373
374        cv:
375            Cross-validation strategy. Possible inputs for cv are:
376
377            - integer to specify the number of folds in a CV splitter,
378            - a CV splitter,
379            - an iterable yielding (train, validation) splits as arrays of indices.
380
381            For integer, if :obj:`estimator` is a classifier and :obj:`y` is
382            either binary or multiclass,
383            ``sklearn.model_selection.StratifiedKFold`` is used. otherwise,
384            ``sklearn.model_selection.KFold`` is used.
385
386        enable_pruning:
387            If :obj:`True`, pruning is performed in the case where the
388            underlying estimator supports ``partial_fit``.
389
390        error_score:
391            Value to assign to the score if an error occurs in fitting. If
392            'raise', the error is raised. If numeric,
393            ``sklearn.exceptions.FitFailedWarning`` is raised. This does not
394            affect the refit step, which will always raise the error.
395
396        max_iter:
397            Maximum number of epochs. This is only used if the underlying
398            estimator supports ``partial_fit``.
399
400        n_jobs:
401            Number of :obj:`threading` based parallel jobs. :obj:`-1` means
402            using the number is set to CPU count.
403
404                .. note::
405                    ``n_jobs`` allows parallelization using :obj:`threading` and may suffer from
406                    `Python's GIL <https://wiki.python.org/moin/GlobalInterpreterLock>`_.
407                    It is recommended to use :ref:`process-based parallelization<distributed>`
408                    if ``func`` is CPU bound.
409
410                .. warning::
411                    Deprecated in v2.7.0. This feature will be removed in the future.
412                    It is recommended to use :ref:`process-based parallelization<distributed>`.
413                    The removal of this feature is currently scheduled for v4.0.0, but this
414                    schedule is subject to change.
415                    See https://github.com/optuna/optuna/releases/tag/v2.7.0.
416
417        n_trials:
418            Number of trials. If :obj:`None`, there is no limitation on the
419            number of trials. If :obj:`timeout` is also set to :obj:`None`,
420            the study continues to create trials until it receives a
421            termination signal such as Ctrl+C or SIGTERM. This trades off
422            runtime vs quality of the solution.
423
424        random_state:
425            Seed of the pseudo random number generator. If int, this is the
426            seed used by the random number generator. If
427            ``numpy.random.RandomState`` object, this is the random number
428            generator. If :obj:`None`, the global random state from
429            ``numpy.random`` is used.
430
431        refit:
432            If :obj:`True`, refit the estimator with the best found
433            hyperparameters. The refitted estimator is made available at the
434            ``best_estimator_`` attribute and permits using ``predict``
435            directly.
436
437        return_train_score:
438            If :obj:`True`, training scores will be included. Computing
439            training scores is used to get insights on how different
440            hyperparameter settings impact the overfitting/underfitting
441            trade-off. However computing training scores can be
442            computationally expensive and is not strictly required to select
443            the hyperparameters that yield the best generalization
444            performance.
445
446        scoring:
447            String or callable to evaluate the predictions on the validation data.
448            If :obj:`None`, ``score`` on the estimator is used.
449
450        study:
451            Study corresponds to the optimization task. If :obj:`None`, a new
452            study is created.
453
454        subsample:
455            Proportion of samples that are used during hyperparameter search.
456
457            - If int, then draw ``subsample`` samples.
458            - If float, then draw ``subsample`` * ``X.shape[0]`` samples.
459
460        timeout:
461            Time limit in seconds for the search of appropriate models. If
462            :obj:`None`, the study is executed without time limitation. If
463            :obj:`n_trials` is also set to :obj:`None`, the study continues to
464            create trials until it receives a termination signal such as
465            Ctrl+C or SIGTERM. This trades off runtime vs quality of the
466            solution.
467
468        verbose:
469            Verbosity level. The higher, the more messages.
470
471    Attributes:
472        best_estimator_:
473            Estimator that was chosen by the search. This is present only if
474            ``refit`` is set to :obj:`True`.
475
476        n_splits_:
477            Number of cross-validation splits.
478
479        refit_time_:
480            Time for refitting the best estimator. This is present only if
481            ``refit`` is set to :obj:`True`.
482
483        sample_indices_:
484            Indices of samples that are used during hyperparameter search.
485
486        scorer_:
487            Scorer function.
488
489        study_:
490            Actual study.
491
492    Examples:
493
494        .. testcode::
495
496            import optuna
497            from sklearn.datasets import load_iris
498            from sklearn.svm import SVC
499
500            clf = SVC(gamma="auto")
501            param_distributions = {"C": optuna.distributions.LogUniformDistribution(1e-10, 1e10)}
502            optuna_search = optuna.integration.OptunaSearchCV(clf, param_distributions)
503            X, y = load_iris(return_X_y=True)
504            optuna_search.fit(X, y)
505            y_pred = optuna_search.predict(X)
506    """
507
508    _required_parameters = ["estimator", "param_distributions"]
509
510    @property
511    def _estimator_type(self) -> str:
512
513        return self.estimator._estimator_type
514
515    @property
516    def best_index_(self) -> int:
517        """Index which corresponds to the best candidate parameter setting."""
518
519        df = self.trials_dataframe()
520
521        return df["value"].idxmin()
522
523    @property
524    def best_params_(self) -> Dict[str, Any]:
525        """Parameters of the best trial in the :class:`~optuna.study.Study`."""
526
527        self._check_is_fitted()
528
529        return self.study_.best_params
530
531    @property
532    def best_score_(self) -> float:
533        """Mean cross-validated score of the best estimator."""
534
535        self._check_is_fitted()
536
537        return self.study_.best_value
538
539    @property
540    def best_trial_(self) -> FrozenTrial:
541        """Best trial in the :class:`~optuna.study.Study`."""
542
543        self._check_is_fitted()
544
545        return self.study_.best_trial
546
547    @property
548    def classes_(self) -> OneDimArrayLikeType:
549        """Class labels."""
550
551        self._check_is_fitted()
552
553        return self.best_estimator_.classes_
554
555    @property
556    def n_trials_(self) -> int:
557        """Actual number of trials."""
558
559        return len(self.trials_)
560
561    @property
562    def trials_(self) -> List[FrozenTrial]:
563        """All trials in the :class:`~optuna.study.Study`."""
564
565        self._check_is_fitted()
566
567        return self.study_.trials
568
569    @property
570    def user_attrs_(self) -> Dict[str, Any]:
571        """User attributes in the :class:`~optuna.study.Study`."""
572
573        self._check_is_fitted()
574
575        return self.study_.user_attrs
576
577    @property
578    def decision_function(self) -> Callable[..., Union[OneDimArrayLikeType, TwoDimArrayLikeType]]:
579        """Call ``decision_function`` on the best estimator.
580
581        This is available only if the underlying estimator supports
582        ``decision_function`` and ``refit`` is set to :obj:`True`.
583        """
584
585        self._check_is_fitted()
586
587        return self.best_estimator_.decision_function
588
589    @property
590    def inverse_transform(self) -> Callable[..., TwoDimArrayLikeType]:
591        """Call ``inverse_transform`` on the best estimator.
592
593        This is available only if the underlying estimator supports
594        ``inverse_transform`` and ``refit`` is set to :obj:`True`.
595        """
596
597        self._check_is_fitted()
598
599        return self.best_estimator_.inverse_transform
600
601    @property
602    def predict(self) -> Callable[..., Union[OneDimArrayLikeType, TwoDimArrayLikeType]]:
603        """Call ``predict`` on the best estimator.
604
605        This is available only if the underlying estimator supports ``predict``
606        and ``refit`` is set to :obj:`True`.
607        """
608
609        self._check_is_fitted()
610
611        return self.best_estimator_.predict
612
613    @property
614    def predict_log_proba(self) -> Callable[..., TwoDimArrayLikeType]:
615        """Call ``predict_log_proba`` on the best estimator.
616
617        This is available only if the underlying estimator supports
618        ``predict_log_proba`` and ``refit`` is set to :obj:`True`.
619        """
620
621        self._check_is_fitted()
622
623        return self.best_estimator_.predict_log_proba
624
625    @property
626    def predict_proba(self) -> Callable[..., TwoDimArrayLikeType]:
627        """Call ``predict_proba`` on the best estimator.
628
629        This is available only if the underlying estimator supports
630        ``predict_proba`` and ``refit`` is set to :obj:`True`.
631        """
632
633        self._check_is_fitted()
634
635        return self.best_estimator_.predict_proba
636
637    @property
638    def score_samples(self) -> Callable[..., OneDimArrayLikeType]:
639        """Call ``score_samples`` on the best estimator.
640
641        This is available only if the underlying estimator supports
642        ``score_samples`` and ``refit`` is set to :obj:`True`.
643        """
644
645        self._check_is_fitted()
646
647        return self.best_estimator_.score_samples
648
649    @property
650    def set_user_attr(self) -> Callable[..., None]:
651        """Call ``set_user_attr`` on the :class:`~optuna.study.Study`."""
652
653        self._check_is_fitted()
654
655        return self.study_.set_user_attr
656
657    @property
658    def transform(self) -> Callable[..., TwoDimArrayLikeType]:
659        """Call ``transform`` on the best estimator.
660
661        This is available only if the underlying estimator supports
662        ``transform`` and ``refit`` is set to :obj:`True`.
663        """
664
665        self._check_is_fitted()
666
667        return self.best_estimator_.transform
668
669    @property
670    def trials_dataframe(self) -> Callable[..., "pd.DataFrame"]:
671        """Call ``trials_dataframe`` on the :class:`~optuna.study.Study`."""
672
673        self._check_is_fitted()
674
675        return self.study_.trials_dataframe
676
677    def __init__(
678        self,
679        estimator: "BaseEstimator",
680        param_distributions: Mapping[str, distributions.BaseDistribution],
681        cv: Optional[Union["BaseCrossValidator", int]] = 5,
682        enable_pruning: bool = False,
683        error_score: Union[Number, float, str] = np.nan,
684        max_iter: int = 1000,
685        n_jobs: int = 1,
686        n_trials: int = 10,
687        random_state: Optional[Union[int, np.random.RandomState]] = None,
688        refit: bool = True,
689        return_train_score: bool = False,
690        scoring: Optional[Union[Callable[..., float], str]] = None,
691        study: Optional[study_module.Study] = None,
692        subsample: Union[float, int] = 1.0,
693        timeout: Optional[float] = None,
694        verbose: int = 0,
695    ) -> None:
696
697        _imports.check()
698
699        self.cv = cv
700        self.enable_pruning = enable_pruning
701        self.error_score = error_score
702        self.estimator = estimator
703        self.max_iter = max_iter
704        self.n_trials = n_trials
705        self.n_jobs = n_jobs
706        self.param_distributions = param_distributions
707        self.random_state = random_state
708        self.refit = refit
709        self.return_train_score = return_train_score
710        self.scoring = scoring
711        self.study = study
712        self.subsample = subsample
713        self.timeout = timeout
714        self.verbose = verbose
715
716    def _check_is_fitted(self) -> None:
717
718        attributes = ["n_splits_", "sample_indices_", "scorer_", "study_"]
719
720        if self.refit:
721            attributes += ["best_estimator_", "refit_time_"]
722
723        check_is_fitted(self, attributes)
724
725    def _check_params(self) -> None:
726
727        if not hasattr(self.estimator, "fit"):
728            raise ValueError("estimator must be a scikit-learn estimator.")
729
730        if type(self.param_distributions) is not dict:
731            raise ValueError("param_distributions must be a dictionary.")
732
733        for name, distribution in self.param_distributions.items():
734            if not isinstance(distribution, distributions.BaseDistribution):
735                raise ValueError("Value of {} must be a optuna distribution.".format(name))
736
737        if self.enable_pruning and not hasattr(self.estimator, "partial_fit"):
738            raise ValueError("estimator must support partial_fit.")
739
740        if self.max_iter <= 0:
741            raise ValueError("max_iter must be > 0, got {}.".format(self.max_iter))
742
743        if self.study is not None and self.study.direction != StudyDirection.MAXIMIZE:
744            raise ValueError("direction of study must be 'maximize'.")
745
746    def _more_tags(self) -> Dict[str, bool]:
747
748        return {"non_deterministic": True, "no_validation": True}
749
750    def _refit(
751        self,
752        X: TwoDimArrayLikeType,
753        y: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]] = None,
754        **fit_params: Any,
755    ) -> "OptunaSearchCV":
756
757        n_samples = _num_samples(X)
758
759        self.best_estimator_ = clone(self.estimator)
760
761        try:
762            self.best_estimator_.set_params(**self.study_.best_params)
763        except ValueError as e:
764            _logger.exception(e)
765
766        _logger.info("Refitting the estimator using {} samples...".format(n_samples))
767
768        start_time = time()
769
770        self.best_estimator_.fit(X, y, **fit_params)
771
772        self.refit_time_ = time() - start_time
773
774        _logger.info("Finished refitting! (elapsed time: {:.3f} sec.)".format(self.refit_time_))
775
776        return self
777
778    def fit(
779        self,
780        X: TwoDimArrayLikeType,
781        y: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]] = None,
782        groups: Optional[OneDimArrayLikeType] = None,
783        **fit_params: Any,
784    ) -> "OptunaSearchCV":
785        """Run fit with all sets of parameters.
786
787        Args:
788            X:
789                Training data.
790
791            y:
792                Target variable.
793
794            groups:
795                Group labels for the samples used while splitting the dataset
796                into train/validation set.
797
798            **fit_params:
799                Parameters passed to ``fit`` on the estimator.
800
801        Returns:
802            self:
803                Return self.
804        """
805
806        self._check_params()
807
808        random_state = check_random_state(self.random_state)
809        max_samples = self.subsample
810        n_samples = _num_samples(X)
811        old_level = _logger.getEffectiveLevel()
812
813        if self.verbose > 1:
814            _logger.setLevel(DEBUG)
815        elif self.verbose > 0:
816            _logger.setLevel(INFO)
817        else:
818            _logger.setLevel(WARNING)
819
820        self.sample_indices_ = np.arange(n_samples)
821
822        if type(max_samples) is float:
823            max_samples = int(max_samples * n_samples)
824
825        if max_samples < n_samples:
826            self.sample_indices_ = random_state.choice(
827                self.sample_indices_, max_samples, replace=False
828            )
829
830            self.sample_indices_.sort()
831
832        X_res = _safe_indexing(X, self.sample_indices_)
833        y_res = _safe_indexing(y, self.sample_indices_)
834        groups_res = _safe_indexing(groups, self.sample_indices_)
835        fit_params_res = fit_params
836
837        if fit_params_res is not None:
838            fit_params_res = _check_fit_params(X, fit_params, self.sample_indices_)
839
840        classifier = is_classifier(self.estimator)
841        cv = check_cv(self.cv, y_res, classifier=classifier)
842
843        self.n_splits_ = cv.get_n_splits(X_res, y_res, groups=groups_res)
844        self.scorer_ = check_scoring(self.estimator, scoring=self.scoring)
845
846        if self.study is None:
847            seed = random_state.randint(0, np.iinfo("int32").max)
848            sampler = samplers.TPESampler(seed=seed)
849
850            self.study_ = study_module.create_study(direction="maximize", sampler=sampler)
851
852        else:
853            self.study_ = self.study
854
855        objective = _Objective(
856            self.estimator,
857            self.param_distributions,
858            X_res,
859            y_res,
860            cv,
861            self.enable_pruning,
862            self.error_score,
863            fit_params_res,
864            groups_res,
865            self.max_iter,
866            self.return_train_score,
867            self.scorer_,
868        )
869
870        _logger.info(
871            "Searching the best hyperparameters using {} "
872            "samples...".format(_num_samples(self.sample_indices_))
873        )
874
875        self.study_.optimize(
876            objective, n_jobs=self.n_jobs, n_trials=self.n_trials, timeout=self.timeout
877        )
878
879        _logger.info("Finished hyperparemeter search!")
880
881        if self.refit:
882            self._refit(X, y, **fit_params)
883
884        _logger.setLevel(old_level)
885
886        return self
887
888    def score(
889        self,
890        X: TwoDimArrayLikeType,
891        y: Optional[Union[OneDimArrayLikeType, TwoDimArrayLikeType]] = None,
892    ) -> float:
893        """Return the score on the given data.
894
895        Args:
896            X:
897                Data.
898
899            y:
900                Target variable.
901
902        Returns:
903            score:
904                Scaler score.
905        """
906
907        return self.scorer_(self.best_estimator_, X, y)
908