1from collections import OrderedDict
2from typing import Any
3from typing import Callable
4from typing import Dict
5from typing import Optional
6from typing import Sequence
7from typing import Union
8import warnings
9
10import numpy
11
12from optuna import logging
13from optuna._experimental import experimental
14from optuna._imports import try_import
15from optuna._transform import _SearchSpaceTransform
16from optuna.distributions import BaseDistribution
17from optuna.samplers import BaseSampler
18from optuna.samplers import IntersectionSearchSpace
19from optuna.samplers import RandomSampler
20from optuna.study import Study
21from optuna.study import StudyDirection
22from optuna.trial import FrozenTrial
23from optuna.trial import TrialState
24
25
26with try_import() as _imports:
27    from botorch.acquisition.monte_carlo import qExpectedImprovement
28    from botorch.acquisition.multi_objective.monte_carlo import qExpectedHypervolumeImprovement
29    from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective
30    from botorch.acquisition.objective import ConstrainedMCObjective
31    from botorch.acquisition.objective import GenericMCObjective
32    from botorch.fit import fit_gpytorch_model
33    from botorch.models import SingleTaskGP
34    from botorch.models.transforms.outcome import Standardize
35    from botorch.optim import optimize_acqf
36    from botorch.sampling.samplers import SobolQMCNormalSampler
37    from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning
38    from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization
39    from botorch.utils.sampling import sample_simplex
40    from botorch.utils.transforms import normalize
41    from botorch.utils.transforms import unnormalize
42    from gpytorch.mlls import ExactMarginalLogLikelihood
43    import torch
44
45
46_logger = logging.get_logger(__name__)
47
48
49@experimental("2.4.0")
50def qei_candidates_func(
51    train_x: "torch.Tensor",
52    train_obj: "torch.Tensor",
53    train_con: Optional["torch.Tensor"],
54    bounds: "torch.Tensor",
55) -> "torch.Tensor":
56    """Quasi MC-based batch Expected Improvement (qEI).
57
58    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
59    with single-objective optimization.
60
61    Args:
62        train_x:
63            Previous parameter configurations. A ``torch.Tensor`` of shape
64            ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials
65            and ``n_params`` is the number of parameters. ``n_params`` may be larger than the
66            actual number of parameters if categorical parameters are included in the search
67            space, since these parameters are one-hot encoded.
68            Values are not normalized.
69        train_obj:
70            Previously observed objectives. A ``torch.Tensor`` of shape
71            ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``.
72            ``n_objectives`` is the number of objectives. Observations are not normalized.
73        train_con:
74            Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``.
75            ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of
76            constraints. A constraint is violated if strictly larger than 0. If no constraints are
77            involved in the optimization, this argument will be :obj:`None`.
78        bounds:
79            Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is
80            identical to that of ``train_x``. The first and the second rows correspond to the
81            lower and upper bounds for each parameter respectively.
82
83    Returns:
84        Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.
85
86    """
87
88    if train_obj.size(-1) != 1:
89        raise ValueError("Objective may only contain single values with qEI.")
90    if train_con is not None:
91        train_y = torch.cat([train_obj, train_con], dim=-1)
92
93        is_feas = (train_con <= 0).all(dim=-1)
94        train_obj_feas = train_obj[is_feas]
95
96        if train_obj_feas.numel() == 0:
97            # TODO(hvy): Do not use 0 as the best observation.
98            _logger.warning(
99                "No objective values are feasible. Using 0 as the best objective in qEI."
100            )
101            best_f = torch.zeros(())
102        else:
103            best_f = train_obj_feas.max()
104
105        constraints = []
106        n_constraints = train_con.size(1)
107        for i in range(n_constraints):
108            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
109        objective = ConstrainedMCObjective(
110            objective=lambda Z: Z[..., 0],
111            constraints=constraints,
112        )
113    else:
114        train_y = train_obj
115
116        best_f = train_obj.max()
117
118        objective = None  # Using the default identity objective.
119
120    train_x = normalize(train_x, bounds=bounds)
121
122    model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
123    mll = ExactMarginalLogLikelihood(model.likelihood, model)
124    fit_gpytorch_model(mll)
125
126    acqf = qExpectedImprovement(
127        model=model,
128        best_f=best_f,
129        sampler=SobolQMCNormalSampler(num_samples=256),
130        objective=objective,
131    )
132
133    standard_bounds = torch.zeros_like(bounds)
134    standard_bounds[1] = 1
135
136    candidates, _ = optimize_acqf(
137        acq_function=acqf,
138        bounds=standard_bounds,
139        q=1,
140        num_restarts=10,
141        raw_samples=512,
142        options={"batch_limit": 5, "maxiter": 200},
143        sequential=True,
144    )
145
146    candidates = unnormalize(candidates.detach(), bounds=bounds)
147
148    return candidates
149
150
151@experimental("2.4.0")
152def qehvi_candidates_func(
153    train_x: "torch.Tensor",
154    train_obj: "torch.Tensor",
155    train_con: Optional["torch.Tensor"],
156    bounds: "torch.Tensor",
157) -> "torch.Tensor":
158    """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI).
159
160    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
161    with multi-objective optimization when the number of objectives is three or less.
162
163    .. seealso::
164        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
165        descriptions.
166    """
167
168    n_objectives = train_obj.size(-1)
169
170    if train_con is not None:
171        train_y = torch.cat([train_obj, train_con], dim=-1)
172
173        is_feas = (train_con <= 0).all(dim=-1)
174        train_obj_feas = train_obj[is_feas]
175
176        constraints = []
177        n_constraints = train_con.size(1)
178
179        for i in range(n_constraints):
180            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
181        additional_qehvi_kwargs = {
182            "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))),
183            "constraints": constraints,
184        }
185    else:
186        train_y = train_obj
187
188        train_obj_feas = train_obj
189
190        additional_qehvi_kwargs = {}
191
192    train_x = normalize(train_x, bounds=bounds)
193
194    model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1]))
195    mll = ExactMarginalLogLikelihood(model.likelihood, model)
196    fit_gpytorch_model(mll)
197
198    # Approximate box decomposition similar to Ax when the number of objectives is large.
199    # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults
200    if n_objectives > 2:
201        alpha = 10 ** (-8 + n_objectives)
202    else:
203        alpha = 0.0
204
205    ref_point = train_obj.min(dim=0).values - 1e-8
206
207    partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_obj_feas, alpha=alpha)
208
209    ref_point_list = ref_point.tolist()
210
211    acqf = qExpectedHypervolumeImprovement(
212        model=model,
213        ref_point=ref_point_list,
214        partitioning=partitioning,
215        sampler=SobolQMCNormalSampler(num_samples=256),
216        **additional_qehvi_kwargs,
217    )
218
219    standard_bounds = torch.zeros_like(bounds)
220    standard_bounds[1] = 1
221
222    candidates, _ = optimize_acqf(
223        acq_function=acqf,
224        bounds=standard_bounds,
225        q=1,
226        num_restarts=20,
227        raw_samples=1024,
228        options={"batch_limit": 5, "maxiter": 200, "nonnegative": True},
229        sequential=True,
230    )
231
232    candidates = unnormalize(candidates.detach(), bounds=bounds)
233
234    return candidates
235
236
237@experimental("2.4.0")
238def qparego_candidates_func(
239    train_x: "torch.Tensor",
240    train_obj: "torch.Tensor",
241    train_con: Optional["torch.Tensor"],
242    bounds: "torch.Tensor",
243) -> "torch.Tensor":
244    """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.
245
246    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
247    with multi-objective optimization when the number of objectives is larger than three.
248
249    .. seealso::
250        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
251        descriptions.
252    """
253
254    n_objectives = train_obj.size(-1)
255
256    weights = sample_simplex(n_objectives).squeeze()
257    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)
258
259    if train_con is not None:
260        train_y = torch.cat([train_obj, train_con], dim=-1)
261
262        constraints = []
263        n_constraints = train_con.size(1)
264
265        for i in range(n_constraints):
266            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
267
268        objective = ConstrainedMCObjective(
269            objective=lambda Z: scalarization(Z[..., :n_objectives]),
270            constraints=constraints,
271        )
272    else:
273        train_y = train_obj
274
275        objective = GenericMCObjective(scalarization)
276
277    train_x = normalize(train_x, bounds=bounds)
278
279    model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1)))
280    mll = ExactMarginalLogLikelihood(model.likelihood, model)
281    fit_gpytorch_model(mll)
282
283    acqf = qExpectedImprovement(
284        model=model,
285        best_f=objective(train_y).max(),
286        sampler=SobolQMCNormalSampler(num_samples=256),
287        objective=objective,
288    )
289
290    standard_bounds = torch.zeros_like(bounds)
291    standard_bounds[1] = 1
292
293    candidates, _ = optimize_acqf(
294        acq_function=acqf,
295        bounds=standard_bounds,
296        q=1,
297        num_restarts=20,
298        raw_samples=1024,
299        options={"batch_limit": 5, "maxiter": 200},
300        sequential=True,
301    )
302
303    candidates = unnormalize(candidates.detach(), bounds=bounds)
304
305    return candidates
306
307
308def _get_default_candidates_func(
309    n_objectives: int,
310) -> Callable[
311    [
312        "torch.Tensor",
313        "torch.Tensor",
314        Optional["torch.Tensor"],
315        "torch.Tensor",
316    ],
317    "torch.Tensor",
318]:
319    if n_objectives > 3:
320        return qparego_candidates_func
321    elif n_objectives > 1:
322        return qehvi_candidates_func
323    else:
324        return qei_candidates_func
325
326
327# TODO(hvy): Allow utilizing GPUs via some parameter, not having to rewrite the callback
328# functions.
329@experimental("2.4.0")
330class BoTorchSampler(BaseSampler):
331    """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch.
332
333    This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter
334    configurations. Parameters are transformed to continuous space and passed to BoTorch, and then
335    transformed back to Optuna's representations. Categorical parameters are one-hot encoded.
336
337    .. seealso::
338        See an `example <https://github.com/optuna/optuna-examples/blob/main/multi_objective/
339        botorch_simple.py>`_ how to use the sampler.
340
341    .. seealso::
342        See the `BoTorch <https://botorch.org/>`_ homepage for details and for how to implement
343        your own ``candidates_func``.
344
345    .. note::
346        An instance of this sampler *should not be used with different studies* when used with
347        constraints. Instead, a new instance should be created for each new study. The reason for
348        this is that the sampler is stateful keeping all the computed constraints.
349
350    Args:
351        candidates_func:
352            An optional function that suggests the next candidates. It must take the training
353            data, the objectives, the constraints, the search space bounds and return the next
354            candidates. The arguments are of type ``torch.Tensor``. The return value must be a
355            ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be
356            :obj:`None`. For any constraints that failed to compute, the tensor will contain
357            NaN.
358
359            If omitted, it is determined automatically based on the number of objectives. If the
360            number of objectives is one, Quasi MC-based batch Expected Improvement (qEI) is used.
361            If the number of objectives is either two or three, Quasi MC-based
362            batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for larger number
363            of objectives, the faster Quasi MC-based extended ParEGO (qParEGO) is used.
364
365            The function should assume *maximization* of the objective.
366
367            .. seealso::
368                See :func:`optuna.integration.botorch.qei_candidates_func` for an example.
369        constraints_func:
370            An optional function that computes the objective constraints. It must take a
371            :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must
372            be a sequence of :obj:`float` s. A value strictly larger than 0 means that a
373            constraint is violated. A value equal to or smaller than 0 is considered feasible.
374
375            If omitted, no constraints will be passed to ``candidates_func`` nor taken into
376            account during suggestion.
377        n_startup_trials:
378            Number of initial trials, that is the number of trials to resort to independent
379            sampling.
380        independent_sampler:
381            An independent sampler to use for the initial trials and for parameters that are
382            conditional.
383    """
384
385    def __init__(
386        self,
387        *,
388        candidates_func: Callable[
389            [
390                "torch.Tensor",
391                "torch.Tensor",
392                Optional["torch.Tensor"],
393                "torch.Tensor",
394            ],
395            "torch.Tensor",
396        ] = None,
397        constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None,
398        n_startup_trials: int = 10,
399        independent_sampler: Optional[BaseSampler] = None,
400    ):
401        _imports.check()
402
403        self._candidates_func = candidates_func
404        self._constraints_func = constraints_func
405        self._independent_sampler = independent_sampler or RandomSampler()
406        self._n_startup_trials = n_startup_trials
407
408        self._study_id: Optional[int] = None
409        self._search_space = IntersectionSearchSpace()
410
411    def infer_relative_search_space(
412        self,
413        study: Study,
414        trial: FrozenTrial,
415    ) -> Dict[str, BaseDistribution]:
416        if self._study_id is None:
417            self._study_id = study._study_id
418        if self._study_id != study._study_id:
419            # Note that the check below is meaningless when `InMemoryStorage` is used
420            # because `InMemoryStorage.create_new_study` always returns the same study ID.
421            raise RuntimeError("BoTorchSampler cannot handle multiple studies.")
422
423        return self._search_space.calculate(study, ordered_dict=True)  # type: ignore
424
425    def sample_relative(
426        self,
427        study: Study,
428        trial: FrozenTrial,
429        search_space: Dict[str, BaseDistribution],
430    ) -> Dict[str, Any]:
431        assert isinstance(search_space, OrderedDict)
432
433        if len(search_space) == 0:
434            return {}
435
436        trials = [t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE]
437
438        n_trials = len(trials)
439        if n_trials < self._n_startup_trials:
440            return {}
441
442        trans = _SearchSpaceTransform(search_space)
443        n_objectives = len(study.directions)
444        values: Union[numpy.ndarray, torch.Tensor] = numpy.empty(
445            (n_trials, n_objectives), dtype=numpy.float64
446        )
447        params: Union[numpy.ndarray, torch.Tensor]
448        con: Optional[Union[numpy.ndarray, torch.Tensor]] = None
449        bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds
450        params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64)
451        for trial_idx, trial in enumerate(trials):
452            params[trial_idx] = trans.transform(trial.params)
453            assert len(study.directions) == len(trial.values)
454
455            for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)):
456                assert value is not None
457                if direction == StudyDirection.MINIMIZE:  # BoTorch always assumes maximization.
458                    value *= -1
459                values[trial_idx, obj_idx] = value
460
461            if self._constraints_func is not None:
462                constraints = study._storage.get_trial_system_attrs(trial._trial_id).get(
463                    "botorch:constraints"
464                )
465                if constraints is not None:
466                    n_constraints = len(constraints)
467
468                    if con is None:
469                        con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64)
470                    elif n_constraints != con.shape[1]:
471                        raise RuntimeError(
472                            f"Expected {con.shape[1]} constraints but received {n_constraints}."
473                        )
474
475                    con[trial_idx] = constraints
476
477        if self._constraints_func is not None:
478            if con is None:
479                warnings.warn(
480                    "`constraints_func` was given but no call to it correctly computed "
481                    "constraints. Constraints passed to `candidates_func` will be `None`."
482                )
483            elif numpy.isnan(con).any():
484                warnings.warn(
485                    "`constraints_func` was given but some calls to it did not correctly compute "
486                    "constraints. Constraints passed to `candidates_func` will contain NaN."
487                )
488
489        values = torch.from_numpy(values)
490        params = torch.from_numpy(params)
491        if con is not None:
492            con = torch.from_numpy(con)
493        bounds = torch.from_numpy(bounds)
494
495        if con is not None:
496            if con.dim() == 1:
497                con.unsqueeze_(-1)
498        bounds.transpose_(0, 1)
499
500        if self._candidates_func is None:
501            self._candidates_func = _get_default_candidates_func(n_objectives=n_objectives)
502        candidates = self._candidates_func(params, values, con, bounds)
503
504        if not isinstance(candidates, torch.Tensor):
505            raise TypeError("Candidates must be a torch.Tensor.")
506        if candidates.dim() == 2:
507            if candidates.size(0) != 1:
508                raise ValueError(
509                    "Candidates batch optimization is not supported and the first dimension must "
510                    "have size 1 if candidates is a two-dimensional tensor. Actual: "
511                    f"{candidates.size()}."
512                )
513            # Batch size is one. Get rid of the batch dimension.
514            candidates = candidates.squeeze(0)
515        if candidates.dim() != 1:
516            raise ValueError("Candidates must be one or two-dimensional.")
517        if candidates.size(0) != bounds.size(1):
518            raise ValueError(
519                "Candidates size must match with the given bounds. Actual candidates: "
520                f"{candidates.size(0)}, bounds: {bounds.size(1)}."
521            )
522
523        return trans.untransform(candidates.numpy())
524
525    def sample_independent(
526        self,
527        study: Study,
528        trial: FrozenTrial,
529        param_name: str,
530        param_distribution: BaseDistribution,
531    ) -> Any:
532        return self._independent_sampler.sample_independent(
533            study, trial, param_name, param_distribution
534        )
535
536    def reseed_rng(self) -> None:
537        self._independent_sampler.reseed_rng()
538
539    def after_trial(
540        self,
541        study: Study,
542        trial: FrozenTrial,
543        state: TrialState,
544        values: Optional[Sequence[float]],
545    ) -> None:
546        if self._constraints_func is not None:
547            constraints = None
548
549            try:
550                con = self._constraints_func(trial)
551                if not isinstance(con, (tuple, list)):
552                    warnings.warn(
553                        f"Constraints should be a sequence of floats but got {type(con).__name__}."
554                    )
555                constraints = tuple(con)
556            except Exception:
557                raise
558            finally:
559                assert constraints is None or isinstance(constraints, tuple)
560
561                study._storage.set_trial_system_attr(
562                    trial._trial_id,
563                    "botorch:constraints",
564                    constraints,
565                )
566        self._independent_sampler.after_trial(study, trial, state, values)
567