1from collections import OrderedDict 2from typing import Any 3from typing import Callable 4from typing import Dict 5from typing import Optional 6from typing import Sequence 7from typing import Union 8import warnings 9 10import numpy 11 12from optuna import logging 13from optuna._experimental import experimental 14from optuna._imports import try_import 15from optuna._transform import _SearchSpaceTransform 16from optuna.distributions import BaseDistribution 17from optuna.samplers import BaseSampler 18from optuna.samplers import IntersectionSearchSpace 19from optuna.samplers import RandomSampler 20from optuna.study import Study 21from optuna.study import StudyDirection 22from optuna.trial import FrozenTrial 23from optuna.trial import TrialState 24 25 26with try_import() as _imports: 27 from botorch.acquisition.monte_carlo import qExpectedImprovement 28 from botorch.acquisition.multi_objective.monte_carlo import qExpectedHypervolumeImprovement 29 from botorch.acquisition.multi_objective.objective import IdentityMCMultiOutputObjective 30 from botorch.acquisition.objective import ConstrainedMCObjective 31 from botorch.acquisition.objective import GenericMCObjective 32 from botorch.fit import fit_gpytorch_model 33 from botorch.models import SingleTaskGP 34 from botorch.models.transforms.outcome import Standardize 35 from botorch.optim import optimize_acqf 36 from botorch.sampling.samplers import SobolQMCNormalSampler 37 from botorch.utils.multi_objective.box_decompositions import NondominatedPartitioning 38 from botorch.utils.multi_objective.scalarization import get_chebyshev_scalarization 39 from botorch.utils.sampling import sample_simplex 40 from botorch.utils.transforms import normalize 41 from botorch.utils.transforms import unnormalize 42 from gpytorch.mlls import ExactMarginalLogLikelihood 43 import torch 44 45 46_logger = logging.get_logger(__name__) 47 48 49@experimental("2.4.0") 50def qei_candidates_func( 51 train_x: "torch.Tensor", 52 train_obj: "torch.Tensor", 53 train_con: Optional["torch.Tensor"], 54 bounds: "torch.Tensor", 55) -> "torch.Tensor": 56 """Quasi MC-based batch Expected Improvement (qEI). 57 58 The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` 59 with single-objective optimization. 60 61 Args: 62 train_x: 63 Previous parameter configurations. A ``torch.Tensor`` of shape 64 ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials 65 and ``n_params`` is the number of parameters. ``n_params`` may be larger than the 66 actual number of parameters if categorical parameters are included in the search 67 space, since these parameters are one-hot encoded. 68 Values are not normalized. 69 train_obj: 70 Previously observed objectives. A ``torch.Tensor`` of shape 71 ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``. 72 ``n_objectives`` is the number of objectives. Observations are not normalized. 73 train_con: 74 Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``. 75 ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of 76 constraints. A constraint is violated if strictly larger than 0. If no constraints are 77 involved in the optimization, this argument will be :obj:`None`. 78 bounds: 79 Search space bounds. A ``torch.Tensor`` of shape ``(2, n_params)``. ``n_params`` is 80 identical to that of ``train_x``. The first and the second rows correspond to the 81 lower and upper bounds for each parameter respectively. 82 83 Returns: 84 Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``. 85 86 """ 87 88 if train_obj.size(-1) != 1: 89 raise ValueError("Objective may only contain single values with qEI.") 90 if train_con is not None: 91 train_y = torch.cat([train_obj, train_con], dim=-1) 92 93 is_feas = (train_con <= 0).all(dim=-1) 94 train_obj_feas = train_obj[is_feas] 95 96 if train_obj_feas.numel() == 0: 97 # TODO(hvy): Do not use 0 as the best observation. 98 _logger.warning( 99 "No objective values are feasible. Using 0 as the best objective in qEI." 100 ) 101 best_f = torch.zeros(()) 102 else: 103 best_f = train_obj_feas.max() 104 105 constraints = [] 106 n_constraints = train_con.size(1) 107 for i in range(n_constraints): 108 constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) 109 objective = ConstrainedMCObjective( 110 objective=lambda Z: Z[..., 0], 111 constraints=constraints, 112 ) 113 else: 114 train_y = train_obj 115 116 best_f = train_obj.max() 117 118 objective = None # Using the default identity objective. 119 120 train_x = normalize(train_x, bounds=bounds) 121 122 model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) 123 mll = ExactMarginalLogLikelihood(model.likelihood, model) 124 fit_gpytorch_model(mll) 125 126 acqf = qExpectedImprovement( 127 model=model, 128 best_f=best_f, 129 sampler=SobolQMCNormalSampler(num_samples=256), 130 objective=objective, 131 ) 132 133 standard_bounds = torch.zeros_like(bounds) 134 standard_bounds[1] = 1 135 136 candidates, _ = optimize_acqf( 137 acq_function=acqf, 138 bounds=standard_bounds, 139 q=1, 140 num_restarts=10, 141 raw_samples=512, 142 options={"batch_limit": 5, "maxiter": 200}, 143 sequential=True, 144 ) 145 146 candidates = unnormalize(candidates.detach(), bounds=bounds) 147 148 return candidates 149 150 151@experimental("2.4.0") 152def qehvi_candidates_func( 153 train_x: "torch.Tensor", 154 train_obj: "torch.Tensor", 155 train_con: Optional["torch.Tensor"], 156 bounds: "torch.Tensor", 157) -> "torch.Tensor": 158 """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI). 159 160 The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` 161 with multi-objective optimization when the number of objectives is three or less. 162 163 .. seealso:: 164 :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value 165 descriptions. 166 """ 167 168 n_objectives = train_obj.size(-1) 169 170 if train_con is not None: 171 train_y = torch.cat([train_obj, train_con], dim=-1) 172 173 is_feas = (train_con <= 0).all(dim=-1) 174 train_obj_feas = train_obj[is_feas] 175 176 constraints = [] 177 n_constraints = train_con.size(1) 178 179 for i in range(n_constraints): 180 constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) 181 additional_qehvi_kwargs = { 182 "objective": IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))), 183 "constraints": constraints, 184 } 185 else: 186 train_y = train_obj 187 188 train_obj_feas = train_obj 189 190 additional_qehvi_kwargs = {} 191 192 train_x = normalize(train_x, bounds=bounds) 193 194 model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.shape[-1])) 195 mll = ExactMarginalLogLikelihood(model.likelihood, model) 196 fit_gpytorch_model(mll) 197 198 # Approximate box decomposition similar to Ax when the number of objectives is large. 199 # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults 200 if n_objectives > 2: 201 alpha = 10 ** (-8 + n_objectives) 202 else: 203 alpha = 0.0 204 205 ref_point = train_obj.min(dim=0).values - 1e-8 206 207 partitioning = NondominatedPartitioning(ref_point=ref_point, Y=train_obj_feas, alpha=alpha) 208 209 ref_point_list = ref_point.tolist() 210 211 acqf = qExpectedHypervolumeImprovement( 212 model=model, 213 ref_point=ref_point_list, 214 partitioning=partitioning, 215 sampler=SobolQMCNormalSampler(num_samples=256), 216 **additional_qehvi_kwargs, 217 ) 218 219 standard_bounds = torch.zeros_like(bounds) 220 standard_bounds[1] = 1 221 222 candidates, _ = optimize_acqf( 223 acq_function=acqf, 224 bounds=standard_bounds, 225 q=1, 226 num_restarts=20, 227 raw_samples=1024, 228 options={"batch_limit": 5, "maxiter": 200, "nonnegative": True}, 229 sequential=True, 230 ) 231 232 candidates = unnormalize(candidates.detach(), bounds=bounds) 233 234 return candidates 235 236 237@experimental("2.4.0") 238def qparego_candidates_func( 239 train_x: "torch.Tensor", 240 train_obj: "torch.Tensor", 241 train_con: Optional["torch.Tensor"], 242 bounds: "torch.Tensor", 243) -> "torch.Tensor": 244 """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization. 245 246 The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler` 247 with multi-objective optimization when the number of objectives is larger than three. 248 249 .. seealso:: 250 :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value 251 descriptions. 252 """ 253 254 n_objectives = train_obj.size(-1) 255 256 weights = sample_simplex(n_objectives).squeeze() 257 scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj) 258 259 if train_con is not None: 260 train_y = torch.cat([train_obj, train_con], dim=-1) 261 262 constraints = [] 263 n_constraints = train_con.size(1) 264 265 for i in range(n_constraints): 266 constraints.append(lambda Z, i=i: Z[..., -n_constraints + i]) 267 268 objective = ConstrainedMCObjective( 269 objective=lambda Z: scalarization(Z[..., :n_objectives]), 270 constraints=constraints, 271 ) 272 else: 273 train_y = train_obj 274 275 objective = GenericMCObjective(scalarization) 276 277 train_x = normalize(train_x, bounds=bounds) 278 279 model = SingleTaskGP(train_x, train_y, outcome_transform=Standardize(m=train_y.size(-1))) 280 mll = ExactMarginalLogLikelihood(model.likelihood, model) 281 fit_gpytorch_model(mll) 282 283 acqf = qExpectedImprovement( 284 model=model, 285 best_f=objective(train_y).max(), 286 sampler=SobolQMCNormalSampler(num_samples=256), 287 objective=objective, 288 ) 289 290 standard_bounds = torch.zeros_like(bounds) 291 standard_bounds[1] = 1 292 293 candidates, _ = optimize_acqf( 294 acq_function=acqf, 295 bounds=standard_bounds, 296 q=1, 297 num_restarts=20, 298 raw_samples=1024, 299 options={"batch_limit": 5, "maxiter": 200}, 300 sequential=True, 301 ) 302 303 candidates = unnormalize(candidates.detach(), bounds=bounds) 304 305 return candidates 306 307 308def _get_default_candidates_func( 309 n_objectives: int, 310) -> Callable[ 311 [ 312 "torch.Tensor", 313 "torch.Tensor", 314 Optional["torch.Tensor"], 315 "torch.Tensor", 316 ], 317 "torch.Tensor", 318]: 319 if n_objectives > 3: 320 return qparego_candidates_func 321 elif n_objectives > 1: 322 return qehvi_candidates_func 323 else: 324 return qei_candidates_func 325 326 327# TODO(hvy): Allow utilizing GPUs via some parameter, not having to rewrite the callback 328# functions. 329@experimental("2.4.0") 330class BoTorchSampler(BaseSampler): 331 """A sampler that uses BoTorch, a Bayesian optimization library built on top of PyTorch. 332 333 This sampler allows using BoTorch's optimization algorithms from Optuna to suggest parameter 334 configurations. Parameters are transformed to continuous space and passed to BoTorch, and then 335 transformed back to Optuna's representations. Categorical parameters are one-hot encoded. 336 337 .. seealso:: 338 See an `example <https://github.com/optuna/optuna-examples/blob/main/multi_objective/ 339 botorch_simple.py>`_ how to use the sampler. 340 341 .. seealso:: 342 See the `BoTorch <https://botorch.org/>`_ homepage for details and for how to implement 343 your own ``candidates_func``. 344 345 .. note:: 346 An instance of this sampler *should not be used with different studies* when used with 347 constraints. Instead, a new instance should be created for each new study. The reason for 348 this is that the sampler is stateful keeping all the computed constraints. 349 350 Args: 351 candidates_func: 352 An optional function that suggests the next candidates. It must take the training 353 data, the objectives, the constraints, the search space bounds and return the next 354 candidates. The arguments are of type ``torch.Tensor``. The return value must be a 355 ``torch.Tensor``. However, if ``constraints_func`` is omitted, constraints will be 356 :obj:`None`. For any constraints that failed to compute, the tensor will contain 357 NaN. 358 359 If omitted, it is determined automatically based on the number of objectives. If the 360 number of objectives is one, Quasi MC-based batch Expected Improvement (qEI) is used. 361 If the number of objectives is either two or three, Quasi MC-based 362 batch Expected Hypervolume Improvement (qEHVI) is used. Otherwise, for larger number 363 of objectives, the faster Quasi MC-based extended ParEGO (qParEGO) is used. 364 365 The function should assume *maximization* of the objective. 366 367 .. seealso:: 368 See :func:`optuna.integration.botorch.qei_candidates_func` for an example. 369 constraints_func: 370 An optional function that computes the objective constraints. It must take a 371 :class:`~optuna.trial.FrozenTrial` and return the constraints. The return value must 372 be a sequence of :obj:`float` s. A value strictly larger than 0 means that a 373 constraint is violated. A value equal to or smaller than 0 is considered feasible. 374 375 If omitted, no constraints will be passed to ``candidates_func`` nor taken into 376 account during suggestion. 377 n_startup_trials: 378 Number of initial trials, that is the number of trials to resort to independent 379 sampling. 380 independent_sampler: 381 An independent sampler to use for the initial trials and for parameters that are 382 conditional. 383 """ 384 385 def __init__( 386 self, 387 *, 388 candidates_func: Callable[ 389 [ 390 "torch.Tensor", 391 "torch.Tensor", 392 Optional["torch.Tensor"], 393 "torch.Tensor", 394 ], 395 "torch.Tensor", 396 ] = None, 397 constraints_func: Optional[Callable[[FrozenTrial], Sequence[float]]] = None, 398 n_startup_trials: int = 10, 399 independent_sampler: Optional[BaseSampler] = None, 400 ): 401 _imports.check() 402 403 self._candidates_func = candidates_func 404 self._constraints_func = constraints_func 405 self._independent_sampler = independent_sampler or RandomSampler() 406 self._n_startup_trials = n_startup_trials 407 408 self._study_id: Optional[int] = None 409 self._search_space = IntersectionSearchSpace() 410 411 def infer_relative_search_space( 412 self, 413 study: Study, 414 trial: FrozenTrial, 415 ) -> Dict[str, BaseDistribution]: 416 if self._study_id is None: 417 self._study_id = study._study_id 418 if self._study_id != study._study_id: 419 # Note that the check below is meaningless when `InMemoryStorage` is used 420 # because `InMemoryStorage.create_new_study` always returns the same study ID. 421 raise RuntimeError("BoTorchSampler cannot handle multiple studies.") 422 423 return self._search_space.calculate(study, ordered_dict=True) # type: ignore 424 425 def sample_relative( 426 self, 427 study: Study, 428 trial: FrozenTrial, 429 search_space: Dict[str, BaseDistribution], 430 ) -> Dict[str, Any]: 431 assert isinstance(search_space, OrderedDict) 432 433 if len(search_space) == 0: 434 return {} 435 436 trials = [t for t in study.get_trials(deepcopy=False) if t.state == TrialState.COMPLETE] 437 438 n_trials = len(trials) 439 if n_trials < self._n_startup_trials: 440 return {} 441 442 trans = _SearchSpaceTransform(search_space) 443 n_objectives = len(study.directions) 444 values: Union[numpy.ndarray, torch.Tensor] = numpy.empty( 445 (n_trials, n_objectives), dtype=numpy.float64 446 ) 447 params: Union[numpy.ndarray, torch.Tensor] 448 con: Optional[Union[numpy.ndarray, torch.Tensor]] = None 449 bounds: Union[numpy.ndarray, torch.Tensor] = trans.bounds 450 params = numpy.empty((n_trials, trans.bounds.shape[0]), dtype=numpy.float64) 451 for trial_idx, trial in enumerate(trials): 452 params[trial_idx] = trans.transform(trial.params) 453 assert len(study.directions) == len(trial.values) 454 455 for obj_idx, (direction, value) in enumerate(zip(study.directions, trial.values)): 456 assert value is not None 457 if direction == StudyDirection.MINIMIZE: # BoTorch always assumes maximization. 458 value *= -1 459 values[trial_idx, obj_idx] = value 460 461 if self._constraints_func is not None: 462 constraints = study._storage.get_trial_system_attrs(trial._trial_id).get( 463 "botorch:constraints" 464 ) 465 if constraints is not None: 466 n_constraints = len(constraints) 467 468 if con is None: 469 con = numpy.full((n_trials, n_constraints), numpy.nan, dtype=numpy.float64) 470 elif n_constraints != con.shape[1]: 471 raise RuntimeError( 472 f"Expected {con.shape[1]} constraints but received {n_constraints}." 473 ) 474 475 con[trial_idx] = constraints 476 477 if self._constraints_func is not None: 478 if con is None: 479 warnings.warn( 480 "`constraints_func` was given but no call to it correctly computed " 481 "constraints. Constraints passed to `candidates_func` will be `None`." 482 ) 483 elif numpy.isnan(con).any(): 484 warnings.warn( 485 "`constraints_func` was given but some calls to it did not correctly compute " 486 "constraints. Constraints passed to `candidates_func` will contain NaN." 487 ) 488 489 values = torch.from_numpy(values) 490 params = torch.from_numpy(params) 491 if con is not None: 492 con = torch.from_numpy(con) 493 bounds = torch.from_numpy(bounds) 494 495 if con is not None: 496 if con.dim() == 1: 497 con.unsqueeze_(-1) 498 bounds.transpose_(0, 1) 499 500 if self._candidates_func is None: 501 self._candidates_func = _get_default_candidates_func(n_objectives=n_objectives) 502 candidates = self._candidates_func(params, values, con, bounds) 503 504 if not isinstance(candidates, torch.Tensor): 505 raise TypeError("Candidates must be a torch.Tensor.") 506 if candidates.dim() == 2: 507 if candidates.size(0) != 1: 508 raise ValueError( 509 "Candidates batch optimization is not supported and the first dimension must " 510 "have size 1 if candidates is a two-dimensional tensor. Actual: " 511 f"{candidates.size()}." 512 ) 513 # Batch size is one. Get rid of the batch dimension. 514 candidates = candidates.squeeze(0) 515 if candidates.dim() != 1: 516 raise ValueError("Candidates must be one or two-dimensional.") 517 if candidates.size(0) != bounds.size(1): 518 raise ValueError( 519 "Candidates size must match with the given bounds. Actual candidates: " 520 f"{candidates.size(0)}, bounds: {bounds.size(1)}." 521 ) 522 523 return trans.untransform(candidates.numpy()) 524 525 def sample_independent( 526 self, 527 study: Study, 528 trial: FrozenTrial, 529 param_name: str, 530 param_distribution: BaseDistribution, 531 ) -> Any: 532 return self._independent_sampler.sample_independent( 533 study, trial, param_name, param_distribution 534 ) 535 536 def reseed_rng(self) -> None: 537 self._independent_sampler.reseed_rng() 538 539 def after_trial( 540 self, 541 study: Study, 542 trial: FrozenTrial, 543 state: TrialState, 544 values: Optional[Sequence[float]], 545 ) -> None: 546 if self._constraints_func is not None: 547 constraints = None 548 549 try: 550 con = self._constraints_func(trial) 551 if not isinstance(con, (tuple, list)): 552 warnings.warn( 553 f"Constraints should be a sequence of floats but got {type(con).__name__}." 554 ) 555 constraints = tuple(con) 556 except Exception: 557 raise 558 finally: 559 assert constraints is None or isinstance(constraints, tuple) 560 561 study._storage.set_trial_system_attr( 562 trial._trial_id, 563 "botorch:constraints", 564 constraints, 565 ) 566 self._independent_sampler.after_trial(study, trial, state, values) 567