1# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2# 3# This source code is licensed under the MIT license found in the 4# LICENSE file in the root directory of this source tree. 5import os 6import math 7import logging 8import itertools 9from collections import deque 10import warnings 11import cma 12import numpy as np 13from bayes_opt import UtilityFunction 14from bayes_opt import BayesianOptimization 15import nevergrad.common.typing as tp 16from nevergrad.common import errors 17from nevergrad.parametrization import parameter as p 18from nevergrad.parametrization import transforms 19from nevergrad.parametrization import discretization 20from nevergrad.parametrization import _layering 21from nevergrad.parametrization import _datalayers 22from . import oneshot 23from . import base 24from . import mutations 25from .base import registry as registry 26from .base import addCompare # pylint: disable=unused-import 27from .base import IntOrParameter 28 29 30# families of optimizers 31# pylint: disable=unused-wildcard-import,wildcard-import,too-many-lines,too-many-arguments 32from .differentialevolution import * # type: ignore # noqa: F403 33from .es import * # type: ignore # noqa: F403 34from .oneshot import * # noqa: F403 35from .recastlib import * # noqa: F403 36 37try: 38 from .externalbo import HyperOpt # pylint: disable=unused-import 39except ModuleNotFoundError: 40 pass 41 42# run with LOGLEVEL=DEBUG for more debug information 43logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) 44logger = logging.getLogger(__name__) 45 46# # # # # optimizers # # # # # 47 48 49class _OnePlusOne(base.Optimizer): 50 """Simple but sometimes powerful optimization algorithm. 51 52 We use the one-fifth adaptation rule, going back to Schumer and Steiglitz (1968). 53 It was independently rediscovered by Devroye (1972) and Rechenberg (1973). 54 We use asynchronous updates, so that the 1+1 can actually be parallel and even 55 performs quite well in such a context - this is naturally close to 1+lambda. 56 57 Posssible mutations include gaussian and cauchy for the continuous case, and in the discrete case: 58 discrete, fastga, doublefastga, adaptive, portfolio, discreteBSO, doerr. 59 - discrete is the most classical discrete mutation operator, 60 - doubleFastGA is an adaptation of FastGA to arity > 2, Portfolio corresponds to random mutation rates, 61 - discreteBSO corresponds to a decreasing schedule of mutation rate. 62 - adaptive and doerr correspond to various self-adaptive mutation rates. 63 - coordinatewise_adaptive is the anisotropic counterpart of the adaptive version. 64 """ 65 66 def __init__( 67 self, 68 parametrization: IntOrParameter, 69 budget: tp.Optional[int] = None, 70 num_workers: int = 1, 71 *, 72 noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None, 73 mutation: str = "gaussian", 74 crossover: bool = False, 75 use_pareto: bool = False, 76 ) -> None: 77 super().__init__(parametrization, budget=budget, num_workers=num_workers) 78 self._sigma: float = 1 79 self._previous_best_loss = float("inf") 80 self.use_pareto = use_pareto 81 all_params = p.helpers.flatten(self.parametrization) 82 arity = max( 83 len(param.choices) if isinstance(param, p.TransitionChoice) else 500 for _, param in all_params 84 ) 85 self.arity_for_discrete_mutation = arity 86 # configuration 87 if noise_handling is not None: 88 if isinstance(noise_handling, str): 89 assert noise_handling in [ 90 "random", 91 "optimistic", 92 ], f"Unkwnown noise handling: '{noise_handling}'" 93 else: 94 assert isinstance( 95 noise_handling, tuple 96 ), "noise_handling must be a string or a tuple of type (strategy, factor)" 97 assert noise_handling[1] > 0.0, "the factor must be a float greater than 0" 98 assert noise_handling[0] in [ 99 "random", 100 "optimistic", 101 ], f"Unkwnown noise handling: '{noise_handling}'" 102 assert mutation in [ 103 "gaussian", 104 "cauchy", 105 "discrete", 106 "fastga", 107 "doublefastga", 108 "adaptive", 109 "coordinatewise_adaptive", 110 "portfolio", 111 "discreteBSO", 112 "lengler", 113 "doerr", 114 ], f"Unkwnown mutation: '{mutation}'" 115 if mutation == "adaptive": 116 self._adaptive_mr = 0.5 117 if mutation == "coordinatewise_adaptive": 118 self._velocity = self._rng.uniform(size=self.dimension) * arity / 4.0 119 self._modified_variables = np.array([True] * self.dimension) 120 self.noise_handling = noise_handling 121 self.mutation = mutation 122 self.crossover = crossover 123 if mutation == "doerr": 124 assert num_workers == 1, "Doerr mutation is implemented only in the sequential case." 125 self._doerr_mutation_rates = [1, 2] 126 self._doerr_mutation_rewards = [0.0, 0.0] 127 self._doerr_counters = [0.0, 0.0] 128 self._doerr_epsilon = 0.25 # self.dimension ** (-0.01) 129 self._doerr_gamma = 1 - 2 / self.dimension 130 self._doerr_current_best = float("inf") 131 i = 3 132 j = 2 133 self._doerr_index: int = -1 # Nothing has been mutated for now. 134 while i < self.dimension: 135 self._doerr_mutation_rates += [i] 136 self._doerr_mutation_rewards += [0.0] 137 self._doerr_counters += [0.0] 138 i += j 139 j += 2 140 141 def _internal_ask_candidate(self) -> p.Parameter: 142 # pylint: disable=too-many-return-statements, too-many-branches 143 noise_handling = self.noise_handling 144 if not self._num_ask: 145 out = self.parametrization.spawn_child() 146 out._meta["sigma"] = self._sigma 147 return out 148 # for noisy version 149 if noise_handling is not None: 150 limit = (0.05 if isinstance(noise_handling, str) else noise_handling[1]) * len(self.archive) ** 3 151 strategy = noise_handling if isinstance(noise_handling, str) else noise_handling[0] 152 if self._num_ask <= limit: 153 if strategy in ["cubic", "random"]: 154 idx = self._rng.choice(len(self.archive)) 155 return list(self.archive.values())[idx].parameter.spawn_child() # type: ignore 156 elif strategy == "optimistic": 157 return self.current_bests["optimistic"].parameter.spawn_child() 158 # crossover 159 mutator = mutations.Mutator(self._rng) 160 pessimistic = self.current_bests["pessimistic"].parameter.spawn_child() 161 if self.num_objectives > 1 and self.use_pareto: # multiobjective 162 # revert to using a sample of the pareto front (not "pessimistic" though) 163 pareto = ( 164 self.pareto_front() 165 ) # we can't use choice directly, because numpy does not like iterables 166 pessimistic = pareto[self._rng.choice(len(pareto))].spawn_child() 167 ref = self.parametrization 168 if self.crossover and self._num_ask % 2 == 1 and len(self.archive) > 2: 169 data = mutator.crossover( 170 pessimistic.get_standardized_data(reference=ref), mutator.get_roulette(self.archive, num=2) 171 ) 172 return pessimistic.set_standardized_data(data, reference=ref) 173 # mutating 174 175 mutation = self.mutation 176 if mutation in ("gaussian", "cauchy"): # standard case 177 step = ( 178 self._rng.normal(0, 1, self.dimension) 179 if mutation == "gaussian" 180 else self._rng.standard_cauchy(self.dimension) 181 ) 182 out = pessimistic.set_standardized_data(self._sigma * step) 183 out._meta["sigma"] = self._sigma 184 return out 185 else: 186 pessimistic_data = pessimistic.get_standardized_data(reference=ref) 187 if mutation == "crossover": 188 if self._num_ask % 2 == 0 or len(self.archive) < 3: 189 data = mutator.portfolio_discrete_mutation( 190 pessimistic_data, arity=self.arity_for_discrete_mutation 191 ) 192 else: 193 data = mutator.crossover(pessimistic_data, mutator.get_roulette(self.archive, num=2)) 194 elif mutation == "adaptive": 195 data = mutator.portfolio_discrete_mutation( 196 pessimistic_data, 197 intensity=max(1, int(self._adaptive_mr * self.dimension)), 198 arity=self.arity_for_discrete_mutation, 199 ) 200 elif mutation == "discreteBSO": 201 assert self.budget is not None, "DiscreteBSO needs a budget." 202 intensity = int(self.dimension - self._num_ask * self.dimension / self.budget) 203 if intensity < 1: 204 intensity = 1 205 data = mutator.portfolio_discrete_mutation( 206 pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation 207 ) 208 elif mutation == "coordinatewise_adaptive": 209 self._modified_variables = np.array([True] * self.dimension) 210 data = mutator.coordinatewise_mutation( 211 pessimistic_data, 212 self._velocity, 213 self._modified_variables, 214 arity=self.arity_for_discrete_mutation, 215 ) 216 elif mutation == "lengler": 217 alpha = 1.54468 218 intensity = int(max(1, self.dimension * (alpha * np.log(self.num_ask) / self.num_ask))) 219 data = mutator.portfolio_discrete_mutation( 220 pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation 221 ) 222 elif mutation == "doerr": 223 # Selection, either random, or greedy, or a mutation rate. 224 assert self._doerr_index == -1, "We should have used this index in tell." 225 if self._rng.uniform() < self._doerr_epsilon: 226 index = self._rng.choice(range(len(self._doerr_mutation_rates))) 227 self._doerr_index = index 228 else: 229 index = self._doerr_mutation_rewards.index(max(self._doerr_mutation_rewards)) 230 self._doerr_index = -1 231 intensity = self._doerr_mutation_rates[index] 232 data = mutator.portfolio_discrete_mutation( 233 pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation 234 ) 235 else: 236 func: tp.Any = { # type: ignore 237 "discrete": mutator.discrete_mutation, 238 "fastga": mutator.doerr_discrete_mutation, 239 "doublefastga": mutator.doubledoerr_discrete_mutation, 240 "portfolio": mutator.portfolio_discrete_mutation, 241 }[mutation] 242 data = func(pessimistic_data, arity=self.arity_for_discrete_mutation) 243 return pessimistic.set_standardized_data(data, reference=ref) 244 245 def _internal_tell(self, x: tp.ArrayLike, loss: tp.FloatLoss) -> None: 246 # only used for cauchy and gaussian 247 if self._previous_best_loss != loss: 248 self._sigma *= 2.0 if loss < self._previous_best_loss else 0.84 249 if self.mutation == "doerr" and self._doerr_current_best < float("inf") and self._doerr_index >= 0: 250 improvement = max(0.0, self._doerr_current_best - loss) 251 # Decay. 252 index = self._doerr_index 253 counter = self._doerr_counters[index] 254 self._doerr_mutation_rewards[index] = ( 255 self._doerr_gamma * counter * self._doerr_mutation_rewards[index] + improvement 256 ) / (self._doerr_gamma * counter + 1) 257 self._doerr_counters = [self._doerr_gamma * x for x in self._doerr_counters] 258 self._doerr_counters[index] += 1 259 self._doerr_index = -1 260 if self.mutation == "doerr": 261 self._doerr_current_best = min(self._doerr_current_best, loss) 262 if self.mutation == "adaptive": 263 factor = 1.2 if loss <= self._previous_best_loss else 0.731 # 0.731 = 1.2**(-np.exp(1)-1) 264 self._adaptive_mr = min(1.0, factor * self._adaptive_mr) 265 if self.mutation == "coordinatewise_adaptive": 266 factor = 1.2 if loss < self._previous_best_loss else 0.731 # 0.731 = 1.2**(-np.exp(1)-1) 267 inds = self._modified_variables 268 self._velocity[inds] = np.clip( 269 self._velocity[inds] * factor, 1.0, self.arity_for_discrete_mutation / 4.0 270 ) 271 self._previous_best_loss = self.current_bests["pessimistic"].mean # could be the current one 272 273 274class ParametrizedOnePlusOne(base.ConfiguredOptimizer): 275 """Simple but sometimes powerfull class of optimization algorithm. 276 This use asynchronous updates, so that (1+1) can actually be parallel and even 277 performs quite well in such a context - this is naturally close to (1+lambda). 278 279 280 Parameters 281 ---------- 282 noise_handling: str or Tuple[str, float] 283 Method for handling the noise. The name can be: 284 285 - `"random"`: a random point is reevaluated regularly, this uses the one-fifth adaptation rule, 286 going back to Schumer and Steiglitz (1968). It was independently rediscovered by Devroye (1972) and Rechenberg (1973). 287 - `"optimistic"`: the best optimistic point is reevaluated regularly, optimism in front of uncertainty 288 - a coefficient can to tune the regularity of these reevaluations (default .05) 289 mutation: str 290 One of the available mutations from: 291 292 - `"gaussian"`: standard mutation by adding a Gaussian random variable (with progressive 293 widening) to the best pessimistic point 294 - `"cauchy"`: same as Gaussian but with a Cauchy distribution. 295 - `"discrete"`: when a variable is mutated (which happens with probability 1/d in dimension d), it's just 296 randomly drawn. This means that on average, only one variable is mutated. 297 - `"discreteBSO"`: as in brainstorm optimization, we slowly decrease the mutation rate from 1 to 1/d. 298 - `"fastga"`: FastGA mutations from the current best 299 - `"doublefastga"`: double-FastGA mutations from the current best (Doerr et al, Fast Genetic Algorithms, 2017) 300 - `"portfolio"`: Random number of mutated bits (called niform mixing in 301 Dang & Lehre "Self-adaptation of Mutation Rates in Non-elitist Population", 2016) 302 - `"lengler"`: specific mutation rate chosen as a function of the dimension and iteration index. 303 crossover: bool 304 whether to add a genetic crossover step every other iteration. 305 use_pareto: bool 306 whether to restart from a random pareto element in multiobjective mode, instead of the last one added 307 308 Notes 309 ----- 310 After many papers advocared the mutation rate 1/d in the discrete (1+1) for the discrete case, 311 `it was proposed <https://arxiv.org/abs/1606.05551>`_ to use of a randomly 312 drawn mutation rate. `Fast genetic algorithms <https://arxiv.org/abs/1703.03334>`_ are based on a similar idea 313 These two simple methods perform quite well on a wide range of problems. 314 315 """ 316 317 # pylint: disable=unused-argument 318 def __init__( 319 self, 320 *, 321 noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None, 322 mutation: str = "gaussian", 323 crossover: bool = False, 324 use_pareto: bool = False, 325 ) -> None: 326 super().__init__(_OnePlusOne, locals()) 327 328 329OnePlusOne = ParametrizedOnePlusOne().set_name("OnePlusOne", register=True) 330NoisyOnePlusOne = ParametrizedOnePlusOne(noise_handling="random").set_name("NoisyOnePlusOne", register=True) 331DiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="discrete").set_name("DiscreteOnePlusOne", register=True) 332PortfolioDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="portfolio").set_name( 333 "PortfolioDiscreteOnePlusOne", register=True 334) 335DiscreteLenglerOnePlusOne = ParametrizedOnePlusOne(mutation="lengler").set_name( 336 "DiscreteLenglerOnePlusOne", register=True 337) 338 339AdaptiveDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="adaptive").set_name( 340 "AdaptiveDiscreteOnePlusOne", register=True 341) 342AnisotropicAdaptiveDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="coordinatewise_adaptive").set_name( 343 "AnisotropicAdaptiveDiscreteOnePlusOne", register=True 344) 345 346DiscreteBSOOnePlusOne = ParametrizedOnePlusOne(mutation="discreteBSO").set_name( 347 "DiscreteBSOOnePlusOne", register=True 348) 349DiscreteDoerrOnePlusOne = ParametrizedOnePlusOne(mutation="doerr").set_name( 350 "DiscreteDoerrOnePlusOne", register=True 351) 352DiscreteDoerrOnePlusOne.no_parallelization = True 353CauchyOnePlusOne = ParametrizedOnePlusOne(mutation="cauchy").set_name("CauchyOnePlusOne", register=True) 354OptimisticNoisyOnePlusOne = ParametrizedOnePlusOne(noise_handling="optimistic").set_name( 355 "OptimisticNoisyOnePlusOne", register=True 356) 357OptimisticDiscreteOnePlusOne = ParametrizedOnePlusOne( 358 noise_handling="optimistic", mutation="discrete" 359).set_name("OptimisticDiscreteOnePlusOne", register=True) 360NoisyDiscreteOnePlusOne = ParametrizedOnePlusOne( 361 noise_handling=("random", 1.0), mutation="discrete" 362).set_name("NoisyDiscreteOnePlusOne", register=True) 363DoubleFastGADiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="doublefastga").set_name( 364 "DoubleFastGADiscreteOnePlusOne", register=True 365) 366RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne = ParametrizedOnePlusOne( 367 crossover=True, mutation="portfolio", noise_handling="optimistic" 368).set_name("RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne", register=True) 369 370 371# pylint: too-many-arguments,too-many-instance-attributes 372class _CMA(base.Optimizer): 373 def __init__( 374 self, 375 parametrization: IntOrParameter, 376 budget: tp.Optional[int] = None, 377 num_workers: int = 1, 378 scale: float = 1.0, 379 elitist: bool = False, 380 popsize: tp.Optional[int] = None, 381 diagonal: bool = False, 382 fcmaes: bool = False, 383 random_init: bool = False, 384 ) -> None: 385 super().__init__(parametrization, budget=budget, num_workers=num_workers) 386 self._scale = scale 387 self._elitist = elitist 388 self._popsize = ( 389 max(self.num_workers, 4 + int(3 * np.log(self.dimension))) if popsize is None else popsize 390 ) 391 self._diagonal = diagonal 392 self._fcmaes = fcmaes 393 self._random_init = random_init 394 # internal attributes 395 self._to_be_asked: tp.Deque[np.ndarray] = deque() 396 self._to_be_told: tp.List[p.Parameter] = [] 397 self._num_spawners = self._popsize // 2 # experimental, for visualization 398 self._parents = [self.parametrization] 399 # delay initialization to ease implementation of variants 400 self._es: tp.Any = None 401 402 @property 403 def es(self) -> tp.Any: # typing not possible since cmaes not imported :( 404 if self._es is None: 405 if not self._fcmaes: 406 inopts = dict( 407 popsize=self._popsize, 408 randn=self._rng.randn, 409 CMA_diagonal=self._diagonal, 410 verbose=0, 411 seed=np.nan, 412 CMA_elitist=self._elitist, 413 ) 414 self._es = cma.CMAEvolutionStrategy( 415 x0=self._rng.normal(size=self.dimension) 416 if self._random_init 417 else np.zeros(self.dimension, dtype=np.float), 418 sigma0=self._scale, 419 inopts=inopts, 420 ) 421 else: 422 try: 423 from fcmaes import cmaes # pylint: disable=import-outside-toplevel 424 except ImportError as e: 425 raise ImportError( 426 "Please install fcmaes (pip install fcmaes) to use FCMA optimizers" 427 ) from e 428 self._es = cmaes.Cmaes( 429 x0=np.zeros(self.dimension, dtype=np.float), 430 input_sigma=self._scale, 431 popsize=self._popsize, 432 randn=self._rng.randn, 433 ) 434 return self._es 435 436 def _internal_ask_candidate(self) -> p.Parameter: 437 if not self._to_be_asked: 438 self._to_be_asked.extend(self.es.ask()) 439 data = self._to_be_asked.popleft() 440 parent = self._parents[self.num_ask % len(self._parents)] 441 candidate = parent.spawn_child().set_standardized_data(data, reference=self.parametrization) 442 return candidate 443 444 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 445 self._to_be_told.append(candidate) 446 if len(self._to_be_told) >= self.es.popsize: 447 listx = [c.get_standardized_data(reference=self.parametrization) for c in self._to_be_told] 448 listy = [c.loss for c in self._to_be_told] 449 args = (listy, listx) if self._fcmaes else (listx, listy) 450 try: 451 self.es.tell(*args) 452 except RuntimeError: 453 pass 454 else: 455 self._parents = sorted(self._to_be_told, key=base._loss)[: self._num_spawners] 456 self._to_be_told = [] 457 458 def _internal_provide_recommendation(self) -> np.ndarray: 459 pessimistic = self.current_bests["pessimistic"].parameter.get_standardized_data( 460 reference=self.parametrization 461 ) 462 if self._es is None: 463 return pessimistic 464 cma_best: tp.Optional[np.ndarray] = self.es.best_x if self._fcmaes else self.es.result.xbest 465 if cma_best is None: 466 return pessimistic 467 return cma_best 468 469 470class ParametrizedCMA(base.ConfiguredOptimizer): 471 """CMA-ES optimizer, 472 This evolution strategy uses a Gaussian sampling, iteratively modified 473 for searching in the best directions. 474 This optimizer wraps an external implementation: https://github.com/CMA-ES/pycma 475 476 Parameters 477 ---------- 478 scale: float 479 scale of the search 480 elitist: bool 481 whether we switch to elitist mode, i.e. mode + instead of comma, 482 i.e. mode in which we always keep the best point in the population. 483 popsize: Optional[int] = None 484 population size, should be n * self.num_workers for int n >= 1. 485 default is max(self.num_workers, 4 + int(3 * np.log(self.dimension))) 486 487 diagonal: bool 488 use the diagonal version of CMA (advised in big dimension) 489 fcmaes: bool = False 490 use fast implementation, doesn't support diagonal=True. 491 produces equivalent results, preferable for high dimensions or 492 if objective function evaluation is fast. 493 """ 494 495 # pylint: disable=unused-argument 496 def __init__( 497 self, 498 *, 499 scale: float = 1.0, 500 elitist: bool = False, 501 popsize: tp.Optional[int] = None, 502 diagonal: bool = False, 503 fcmaes: bool = False, 504 random_init: bool = False, 505 ) -> None: 506 super().__init__(_CMA, locals()) 507 if fcmaes: 508 if diagonal: 509 raise RuntimeError("fcmaes doesn't support diagonal=True, use fcmaes=False") 510 511 512CMA = ParametrizedCMA().set_name("CMA", register=True) 513DiagonalCMA = ParametrizedCMA(diagonal=True).set_name("DiagonalCMA", register=True) 514FCMA = ParametrizedCMA(fcmaes=True).set_name("FCMA", register=True) 515 516 517class _PopulationSizeController: 518 """Population control scheme for TBPSA and EDA""" 519 520 def __init__(self, llambda: int, mu: int, dimension: int, num_workers: int = 1) -> None: 521 self.llambda = max(llambda, num_workers) 522 self.min_mu = min(mu, dimension) 523 self.mu = mu 524 self.dimension = dimension 525 self.num_workers = num_workers 526 self._loss_record: tp.List[float] = [] 527 528 def add_value(self, loss: tp.FloatLoss) -> None: 529 self._loss_record += [loss] 530 if len(self._loss_record) >= 5 * self.llambda: 531 first_fifth = self._loss_record[: self.llambda] 532 last_fifth = self._loss_record[-int(self.llambda) :] # casting to int to avoid pylint bug 533 means = [sum(fitnesses) / float(self.llambda) for fitnesses in [first_fifth, last_fifth]] 534 stds = [np.std(fitnesses) / np.sqrt(self.llambda - 1) for fitnesses in [first_fifth, last_fifth]] 535 z = (means[0] - means[1]) / (np.sqrt(stds[0] ** 2 + stds[1] ** 2)) 536 if z < 2.0: 537 self.mu *= 2 538 else: 539 self.mu = max(self.min_mu, int(self.mu * 0.84)) 540 self.llambda = 4 * self.mu 541 if self.num_workers > 1: 542 self.llambda = max(self.llambda, self.num_workers) 543 self.mu = self.llambda // 4 544 self._loss_record = [] 545 546 547# pylint: disable=too-many-instance-attributes 548@registry.register 549class EDA(base.Optimizer): 550 """Test-based population-size adaptation. 551 552 Population-size equal to lambda = 4 x dimension. 553 Test by comparing the first fifth and the last fifth of the 5lambda evaluations. 554 555 Caution 556 ------- 557 This optimizer is probably wrong. 558 """ 559 560 _POPSIZE_ADAPTATION = False 561 _COVARIANCE_MEMORY = False 562 563 def __init__( 564 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 565 ) -> None: 566 super().__init__(parametrization, budget=budget, num_workers=num_workers) 567 self.sigma = 1 568 self.covariance = np.identity(self.dimension) 569 dim = self.dimension 570 self.popsize = _PopulationSizeController( 571 llambda=4 * dim, mu=dim, dimension=dim, num_workers=num_workers 572 ) 573 self.current_center: np.ndarray = np.zeros(self.dimension) 574 # Population 575 self.children: tp.List[p.Parameter] = [] 576 self.parents: tp.List[p.Parameter] = [ 577 self.parametrization 578 ] # for transfering heritage (checkpoints in PBT) 579 580 def _internal_provide_recommendation( 581 self, 582 ) -> tp.ArrayLike: # This is NOT the naive version. We deal with noise. 583 return self.current_center 584 585 def _internal_ask_candidate(self) -> p.Parameter: 586 mutated_sigma = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension)) 587 # TODO: is a sigma necessary here as well? given the covariance is estimated 588 assert len(self.current_center) == len(self.covariance), [ 589 self.dimension, 590 self.current_center, 591 self.covariance, 592 ] 593 data = self._rng.multivariate_normal(self.current_center, mutated_sigma * self.covariance) 594 parent = self.parents[self.num_ask % len(self.parents)] 595 candidate = parent.spawn_child().set_standardized_data(data, reference=self.parametrization) 596 if parent is self.parametrization: 597 candidate.heritage["lineage"] = candidate.uid # for tracking 598 candidate._meta["sigma"] = mutated_sigma 599 return candidate 600 601 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 602 self.children.append(candidate) 603 if self._POPSIZE_ADAPTATION: 604 self.popsize.add_value(loss) 605 if len(self.children) >= self.popsize.llambda: 606 self.children = sorted(self.children, key=base._loss) 607 population_data = [c.get_standardized_data(reference=self.parametrization) for c in self.children] 608 mu = self.popsize.mu 609 arrays = population_data[:mu] 610 # covariance 611 # TODO: check actual covariance that should be used 612 centered_arrays = np.array([x - self.current_center for x in arrays]) 613 cov = centered_arrays.T.dot(centered_arrays) 614 # cov = np.cov(np.array(population_data).T) 615 mem_factor = 0.9 if self._COVARIANCE_MEMORY else 0 616 self.covariance *= mem_factor 617 self.covariance += (1 - mem_factor) * cov 618 # Computing the new parent 619 self.current_center = sum(arrays) / mu # type: ignore 620 self.sigma = np.exp(sum([np.log(c._meta["sigma"]) for c in self.children[:mu]]) / mu) 621 self.parents = self.children[:mu] 622 self.children = [] 623 624 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 625 raise errors.TellNotAskedNotSupportedError 626 627 628class PCEDA(EDA): 629 _POPSIZE_ADAPTATION = True 630 _COVARIANCE_MEMORY = False 631 632 633class MPCEDA(EDA): 634 _POPSIZE_ADAPTATION = True 635 _COVARIANCE_MEMORY = True 636 637 638class MEDA(EDA): 639 _POPSIZE_ADAPTATION = False 640 _COVARIANCE_MEMORY = True 641 642 643class _TBPSA(base.Optimizer): 644 """Test-based population-size adaptation. 645 646 Population-size equal to lambda = 4 x dimension. 647 Test by comparing the first fifth and the last fifth of the 5lambda evaluations. 648 """ 649 650 # pylint: disable=too-many-instance-attributes 651 652 def __init__( 653 self, 654 parametrization: IntOrParameter, 655 budget: tp.Optional[int] = None, 656 num_workers: int = 1, 657 naive: bool = True, 658 initial_popsize: tp.Optional[int] = None, 659 ) -> None: 660 super().__init__(parametrization, budget=budget, num_workers=num_workers) 661 self.sigma = 1 662 self.naive = naive 663 if initial_popsize is None: 664 initial_popsize = self.dimension 665 self.popsize = _PopulationSizeController( 666 llambda=4 * initial_popsize, mu=initial_popsize, dimension=self.dimension, num_workers=num_workers 667 ) 668 self.current_center: np.ndarray = np.zeros(self.dimension) 669 # population 670 self.parents: tp.List[p.Parameter] = [ 671 self.parametrization 672 ] # for transfering heritage (checkpoints in PBT) 673 self.children: tp.List[p.Parameter] = [] 674 675 def recommend(self) -> p.Parameter: 676 if self.naive: 677 return self.current_bests["optimistic"].parameter 678 else: 679 # This is NOT the naive version. We deal with noise. 680 out = self.parametrization.spawn_child() 681 with p.helpers.deterministic_sampling(out): 682 out.set_standardized_data(self.current_center) 683 return out 684 685 def _internal_ask_candidate(self) -> p.Parameter: 686 mutated_sigma = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension)) 687 individual = self.current_center + mutated_sigma * self._rng.normal(0, 1, self.dimension) 688 parent = self.parents[self.num_ask % len(self.parents)] 689 candidate = parent.spawn_child().set_standardized_data(individual, reference=self.parametrization) 690 if parent is self.parametrization: 691 candidate.heritage["lineage"] = candidate.uid # for tracking 692 candidate._meta["sigma"] = mutated_sigma 693 return candidate 694 695 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 696 self.popsize.add_value(loss) 697 self.children.append(candidate) 698 if len(self.children) >= self.popsize.llambda: 699 # Sorting the population. 700 self.children.sort(key=base._loss) 701 # Computing the new parent. 702 703 self.parents = self.children[: self.popsize.mu] 704 self.children = [] 705 self.current_center = ( 706 sum( # type: ignore 707 c.get_standardized_data(reference=self.parametrization) for c in self.parents 708 ) 709 / self.popsize.mu 710 ) 711 self.sigma = np.exp(np.sum(np.log([c._meta["sigma"] for c in self.parents])) / self.popsize.mu) 712 713 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 714 data = candidate.get_standardized_data(reference=self.parametrization) 715 sigma = np.linalg.norm(data - self.current_center) / np.sqrt(self.dimension) # educated guess 716 candidate._meta["sigma"] = sigma 717 self._internal_tell_candidate(candidate, loss) # go through standard pipeline 718 719 720class ParametrizedTBPSA(base.ConfiguredOptimizer): 721 """`Test-based population-size adaptation <https://homepages.fhv.at/hgb/New-Papers/PPSN16_HB16.pdf>`_ 722 This method, based on adapting the population size, performs the best in 723 many noisy optimization problems, even in large dimension 724 725 Parameters 726 ---------- 727 naive: bool 728 set to False for noisy problem, so that the best points will be an 729 average of the final population. 730 initial_popsize: Optional[int] 731 initial (and minimal) population size (default: 4 x dimension) 732 733 Note 734 ---- 735 Derived from: 736 Hellwig, Michael & Beyer, Hans-Georg. (2016). 737 Evolution under Strong Noise: A Self-Adaptive Evolution Strategy 738 Reaches the Lower Performance Bound -- the pcCMSA-ES. 739 https://homepages.fhv.at/hgb/New-Papers/PPSN16_HB16.pdf 740 """ 741 742 # pylint: disable=unused-argument 743 def __init__( 744 self, 745 *, 746 naive: bool = True, 747 initial_popsize: tp.Optional[int] = None, 748 ) -> None: 749 super().__init__(_TBPSA, locals()) 750 751 752TBPSA = ParametrizedTBPSA(naive=False).set_name("TBPSA", register=True) 753NaiveTBPSA = ParametrizedTBPSA().set_name("NaiveTBPSA", register=True) 754 755 756@registry.register 757class NoisyBandit(base.Optimizer): 758 """UCB. 759 This is upper confidence bound (adapted to minimization), 760 with very poor parametrization; in particular, the logarithmic term is set to zero. 761 Infinite arms: we add one arm when `20 * #ask >= #arms ** 3`. 762 """ 763 764 def _internal_ask(self) -> tp.ArrayLike: 765 if 20 * self._num_ask >= len(self.archive) ** 3: 766 return self._rng.normal(0, 1, self.dimension) # type: ignore 767 if self._rng.choice([True, False]): 768 # numpy does not accept choice on list of tuples, must choose index instead 769 idx = self._rng.choice(len(self.archive)) 770 return np.frombuffer(list(self.archive.bytesdict.keys())[idx]) # type: ignore 771 return self.current_bests["optimistic"].x 772 773 774class _PSO(base.Optimizer): 775 776 # pylint: disable=too-many-instance-attributes 777 def __init__( 778 self, 779 parametrization: IntOrParameter, 780 budget: tp.Optional[int] = None, 781 num_workers: int = 1, 782 config: tp.Optional["ConfiguredPSO"] = None, 783 ) -> None: 784 super().__init__(parametrization, budget=budget, num_workers=num_workers) 785 self._config = ConfiguredPSO() if config is None else config 786 if budget is not None and budget < 60: 787 warnings.warn("PSO is inefficient with budget < 60", errors.InefficientSettingsWarning) 788 cases: tp.Dict[str, tp.Tuple[tp.Optional[float], transforms.Transform]] = dict( 789 arctan=(0, transforms.ArctanBound(0, 1)), 790 identity=(None, transforms.Affine(1, 0)), 791 gaussian=(1e-10, transforms.CumulativeDensity()), 792 ) 793 # eps is used for clipping to make sure it is admissible 794 self._eps, self._transform = cases[self._config.transform] 795 self.llambda = max(40, num_workers) 796 if self._config.popsize is not None: 797 self.llambda = self._config.popsize 798 self._uid_queue = base.utils.UidQueue() 799 self.population: tp.Dict[str, p.Parameter] = {} 800 self._best = self.parametrization.spawn_child() 801 802 def _internal_ask_candidate(self) -> p.Parameter: 803 # population is increased only if queue is empty (otherwise tell_not_asked does not work well at the beginning) 804 if len(self.population) < self.llambda: 805 candidate = self.parametrization.sample() 806 self.population[candidate.uid] = candidate 807 dim = self.parametrization.dimension 808 candidate.heritage["speed"] = ( 809 self._rng.normal(size=dim) if self._eps is None else self._rng.uniform(-1, 1, dim) 810 ) 811 self._uid_queue.asked.add(candidate.uid) 812 return candidate 813 uid = self._uid_queue.ask() 814 candidate = self._spawn_mutated_particle(self.population[uid]) 815 candidate.heritage["lineage"] = uid # override in case it was a tell-not-asked 816 return candidate 817 818 def _get_boxed_data(self, particle: p.Parameter) -> np.ndarray: 819 if particle._frozen and "boxed_data" in particle._meta: 820 return particle._meta["boxed_data"] # type: ignore 821 boxed_data = self._transform.forward(particle.get_standardized_data(reference=self.parametrization)) 822 if particle._frozen: # only save is frozen 823 particle._meta["boxed_data"] = boxed_data 824 return boxed_data 825 826 def _spawn_mutated_particle(self, particle: p.Parameter) -> p.Parameter: 827 x = self._get_boxed_data(particle) 828 speed: np.ndarray = particle.heritage["speed"] 829 global_best_x = self._get_boxed_data(self._best) 830 parent_best_x = self._get_boxed_data(particle.heritage.get("best_parent", particle)) 831 rp = self._rng.uniform(0.0, 1.0, size=self.dimension) 832 rg = self._rng.uniform(0.0, 1.0, size=self.dimension) 833 speed = ( 834 self._config.omega * speed 835 + self._config.phip * rp * (parent_best_x - x) 836 + self._config.phig * rg * (global_best_x - x) 837 ) 838 data = speed + x 839 if self._eps is not None: 840 data = np.clip(data, self._eps, 1 - self._eps) 841 data = self._transform.backward(data) 842 new_part = particle.spawn_child().set_standardized_data(data, reference=self.parametrization) 843 new_part.heritage["speed"] = speed 844 return new_part 845 846 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 847 uid = candidate.heritage["lineage"] 848 if uid not in self.population: 849 self._internal_tell_not_asked(candidate, loss) 850 return 851 self._uid_queue.tell(uid) 852 self.population[uid] = candidate 853 if self._best.loss is None or loss < self._best.loss: 854 self._best = candidate 855 if loss <= candidate.heritage.get("best_parent", candidate).loss: 856 candidate.heritage["best_parent"] = candidate 857 858 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 859 # nearly same as DE 860 worst: tp.Optional[p.Parameter] = None 861 if not len(self.population) < self.llambda: 862 uid, worst = max(self.population.items(), key=lambda p: base._loss(p[1])) 863 if base._loss(worst) < loss: 864 return # no need to update 865 else: 866 del self.population[uid] 867 self._uid_queue.discard(uid) 868 if "speed" not in candidate.heritage: 869 candidate.heritage["speed"] = self._rng.uniform(-1.0, 1.0, self.parametrization.dimension) 870 self.population[candidate.uid] = candidate 871 self._uid_queue.tell(candidate.uid) 872 if loss < base._loss(self._best): 873 self._best = candidate 874 875 876class ConfiguredPSO(base.ConfiguredOptimizer): 877 """`Particle Swarm Optimization <https://en.wikipedia.org/wiki/Particle_swarm_optimization>`_ 878 is based on a set of particles with their inertia. 879 Wikipedia provides a beautiful illustration ;) (see link) 880 881 882 Parameters 883 ---------- 884 transform: str 885 name of the transform to use to map from PSO optimization space to R-space. 886 popsize: int 887 population size of the particle swarm. Defaults to max(40, num_workers) 888 omega: float 889 particle swarm optimization parameter 890 phip: float 891 particle swarm optimization parameter 892 phig: float 893 particle swarm optimization parameter 894 895 Note 896 ---- 897 - Using non-default "transform" and "wide" parameters can lead to extreme values 898 - Implementation partially following SPSO2011. However, no randomization of the population order. 899 - Reference: 900 M. Zambrano-Bigiarini, M. Clerc and R. Rojas, 901 Standard Particle Swarm Optimisation 2011 at CEC-2013: A baseline for future PSO improvements, 902 2013 IEEE Congress on Evolutionary Computation, Cancun, 2013, pp. 2337-2344. 903 https://ieeexplore.ieee.org/document/6557848 904 """ 905 906 # pylint: disable=unused-argument 907 def __init__( 908 self, 909 transform: str = "identity", 910 popsize: tp.Optional[int] = None, 911 omega: float = 0.5 / math.log(2.0), 912 phip: float = 0.5 + math.log(2.0), 913 phig: float = 0.5 + math.log(2.0), 914 ) -> None: 915 super().__init__(_PSO, locals(), as_config=True) 916 assert transform in ["arctan", "gaussian", "identity"] 917 self.transform = transform 918 self.popsize = popsize 919 self.omega = omega 920 self.phip = phip 921 self.phig = phig 922 923 924RealSpacePSO = ConfiguredPSO().set_name("RealSpacePSO", register=True) 925PSO = ConfiguredPSO(transform="arctan").set_name("PSO", register=True) 926 927 928@registry.register 929class SPSA(base.Optimizer): 930 # pylint: disable=too-many-instance-attributes 931 """The First order SPSA algorithm as shown in [1,2,3], with implementation details 932 from [4,5]. 933 934 1) https://en.wikipedia.org/wiki/Simultaneous_perturbation_stochastic_approximation 935 2) https://www.chessprogramming.org/SPSA 936 3) Spall, James C. "Multivariate stochastic approximation using a simultaneous perturbation gradient approximation." 937 IEEE transactions on automatic control 37.3 (1992): 332-341. 938 4) Section 7.5.2 in "Introduction to Stochastic Search and Optimization: Estimation, Simulation and Control" by James C. Spall. 939 5) Pushpendre Rastogi, Jingyi Zhu, James C. Spall CISS (2016). 940 Efficient implementation of Enhanced Adaptive Simultaneous Perturbation Algorithms. 941 """ 942 no_parallelization = True 943 944 def __init__( 945 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 946 ) -> None: 947 super().__init__(parametrization, budget=budget, num_workers=num_workers) 948 self.init = True 949 self.idx = 0 950 self.delta = float("nan") 951 self.ym: tp.Optional[np.ndarray] = None 952 self.yp: tp.Optional[np.ndarray] = None 953 self.t: np.ndarray = np.zeros(self.dimension) 954 self.avg: np.ndarray = np.zeros(self.dimension) 955 # Set A, a, c according to the practical implementation 956 # guidelines in the ISSO book. 957 self.A = 10 if budget is None else max(10, budget // 20) 958 # TODO: We should spend first 10-20 iterations 959 # to estimate the noise standard deviation and 960 # then set c = standard deviation. 1e-1 is arbitrary. 961 self.c = 1e-1 962 # TODO: We should chose a to be inversely proportional to 963 # the magnitude of gradient and proportional to (1+A)^0.602 964 # we should spend some burn-in iterations to estimate the 965 # magnitude of the gradient. 1e-5 is arbitrary. 966 self.a = 1e-5 967 968 def _ck(self, k: int) -> float: 969 "c_k determines the pertubation." 970 return self.c / (k // 2 + 1) ** 0.101 971 972 def _ak(self, k: int) -> float: 973 "a_k is the learning rate." 974 return self.a / (k // 2 + 1 + self.A) ** 0.602 975 976 def _internal_ask(self) -> tp.ArrayLike: 977 k = self.idx 978 if k % 2 == 0: 979 if not self.init: 980 assert self.yp is not None and self.ym is not None 981 self.t -= (self._ak(k) * (self.yp - self.ym) / 2 / self._ck(k)) * self.delta 982 self.avg += (self.t - self.avg) / (k // 2 + 1) 983 self.delta = 2 * self._rng.randint(2, size=self.dimension) - 1 984 return self.t - self._ck(k) * self.delta # type:ignore 985 return self.t + self._ck(k) * self.delta # type: ignore 986 987 def _internal_tell(self, x: tp.ArrayLike, loss: tp.FloatLoss) -> None: 988 setattr(self, ("ym" if self.idx % 2 == 0 else "yp"), np.array(loss, copy=True)) 989 self.idx += 1 990 if self.init and self.yp is not None and self.ym is not None: 991 self.init = False 992 993 def _internal_provide_recommendation(self) -> tp.ArrayLike: 994 return self.avg 995 996 997class _Rescaled(base.Optimizer): 998 """Proposes a version of a base optimizer which works at a different scale.""" 999 1000 def __init__( 1001 self, 1002 parametrization: IntOrParameter, 1003 budget: tp.Optional[int] = None, 1004 num_workers: int = 1, 1005 base_optimizer: base.OptCls = CMA, 1006 scale: tp.Optional[float] = None, 1007 ) -> None: 1008 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1009 self._optimizer = base_optimizer(self.parametrization, budget=budget, num_workers=num_workers) 1010 self._subcandidates: tp.Dict[str, p.Parameter] = {} 1011 if scale is None: 1012 assert budget is not None, "Either scale or budget must be known in _Rescaled." 1013 scale = np.sqrt(np.log(self.budget) / self.dimension) 1014 self.scale = scale 1015 assert self.scale != 0.0, "scale should be non-zero in Rescaler." 1016 1017 def rescale_candidate(self, candidate: p.Parameter, inverse: bool = False) -> p.Parameter: 1018 data = candidate.get_standardized_data(reference=self.parametrization) 1019 scale = self.scale if not inverse else 1.0 / self.scale 1020 return self.parametrization.spawn_child().set_standardized_data(scale * data) 1021 1022 def _internal_ask_candidate(self) -> p.Parameter: 1023 candidate = self._optimizer.ask() 1024 sent_candidate = self.rescale_candidate(candidate) 1025 # We store the version corresponding to the underlying optimizer. 1026 self._subcandidates[sent_candidate.uid] = candidate 1027 return sent_candidate 1028 1029 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1030 self._optimizer.tell(self._subcandidates.pop(candidate.uid), loss) 1031 1032 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1033 candidate = self.rescale_candidate(candidate, inverse=True) 1034 self._optimizer.tell(candidate, loss) 1035 1036 1037class SplitOptimizer(base.Optimizer): 1038 """Combines optimizers, each of them working on their own variables. 1039 1040 Parameters 1041 --------- 1042 num_optims: int or None 1043 number of optimizers 1044 num_vars: int or None 1045 number of variable per optimizer. 1046 progressive: bool 1047 True if we want to progressively add optimizers during the optimization run. 1048 If progressive = True, the optimizer is forced at OptimisticNoisyOnePlusOne. 1049 1050 Example 1051 ------- 1052 for 5 optimizers, each of them working on 2 variables, one can use: 1053 1054 opt = SplitOptimizer(parametrization=10, num_workers=3, num_optims=5, num_vars=[2, 2, 2, 2, 2]) 1055 or equivalently: 1056 opt = SplitOptimizer(parametrization=10, num_workers=3, num_vars=[2, 2, 2, 2, 2]) 1057 Given that all optimizers have the same number of variables, one can also run: 1058 opt = SplitOptimizer(parametrization=10, num_workers=3, num_optims=5) 1059 1060 Note 1061 ---- 1062 By default, it uses CMA for multivariate groups and RandomSearch for monovariate groups. 1063 1064 Caution 1065 ------- 1066 The variables refer to the deep representation used by optimizers. 1067 For example, a categorical variable with 5 possible values becomes 5 continuous variables. 1068 """ 1069 1070 def __init__( 1071 self, 1072 parametrization: IntOrParameter, 1073 budget: tp.Optional[int] = None, 1074 num_workers: int = 1, 1075 num_optims: tp.Optional[int] = None, 1076 num_vars: tp.Optional[tp.List[int]] = None, 1077 multivariate_optimizer: base.OptCls = CMA, 1078 monovariate_optimizer: base.OptCls = OnePlusOne, 1079 progressive: bool = False, 1080 non_deterministic_descriptor: bool = True, 1081 ) -> None: 1082 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1083 self._subcandidates: tp.Dict[str, tp.List[p.Parameter]] = {} 1084 self._progressive = progressive 1085 subparams: tp.List[p.Parameter] = [] 1086 if num_vars is not None: # The user has specified how are the splits (s)he wants. 1087 assert ( 1088 sum(num_vars) == self.dimension 1089 ), f"sum(num_vars)={sum(num_vars)} should be equal to the dimension {self.dimension}." 1090 if num_optims is None: # we deduce the number of splits. 1091 num_optims = len(num_vars) 1092 assert num_optims == len( 1093 num_vars 1094 ), f"The number {num_optims} of optimizers should match len(num_vars)={len(num_vars)}." 1095 elif num_optims is None: 1096 # if no num_vars and no num_optims, try to guess how to split. Otherwise, just assume 2. 1097 if isinstance(parametrization, p.Parameter): 1098 subparams = p.helpers.list_data(parametrization) # type: ignore 1099 if len(subparams) == 1: 1100 subparams.clear() 1101 num_optims = len(subparams) 1102 if not subparams: # Desperate situation: just split in 2. 1103 num_optims = 2 1104 if not subparams: 1105 # if num_vars not given: we will distribute variables equally. 1106 assert num_optims is not None 1107 num_optims = min(num_optims, self.dimension) 1108 num_vars = num_vars if num_vars else [] 1109 for i in range(num_optims): 1110 if len(num_vars) < i + 1: 1111 num_vars += [(self.dimension // num_optims) + (self.dimension % num_optims > i)] 1112 assert num_vars[i] >= 1, "At least one variable per optimizer." 1113 subparams += [p.Array(shape=(num_vars[i],))] 1114 if non_deterministic_descriptor: 1115 for param in subparams: 1116 param.function.deterministic = False 1117 # synchronize random state and create optimizers 1118 self.optims: tp.List[base.Optimizer] = [] 1119 mono, multi = monovariate_optimizer, multivariate_optimizer 1120 for param in subparams: 1121 param.random_state = self.parametrization.random_state 1122 self.optims.append((multi if param.dimension > 1 else mono)(param, budget, num_workers)) 1123 # final check for dimension 1124 assert ( 1125 sum(opt.dimension for opt in self.optims) == self.dimension 1126 ), "sum of sub-dimensions should be equal to the total dimension." 1127 1128 def _internal_ask_candidate(self) -> p.Parameter: 1129 candidates: tp.List[p.Parameter] = [] 1130 for i, opt in enumerate(self.optims): 1131 if self._progressive: 1132 assert self.budget is not None 1133 if i > 0 and i / len(self.optims) > np.sqrt(2.0 * self.num_ask / self.budget): 1134 candidates.append(opt.parametrization.spawn_child()) # unchanged 1135 continue 1136 candidates.append(opt.ask()) 1137 data = np.concatenate( 1138 [ 1139 c.get_standardized_data(reference=opt.parametrization) 1140 for c, opt in zip(candidates, self.optims) 1141 ], 1142 axis=0, 1143 ) 1144 cand = self.parametrization.spawn_child().set_standardized_data(data) 1145 self._subcandidates[cand.uid] = candidates 1146 return cand 1147 1148 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1149 candidates = self._subcandidates.pop(candidate.uid) 1150 for cand, opt in zip(candidates, self.optims): 1151 opt.tell(cand, loss) 1152 1153 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1154 data = candidate.get_standardized_data(reference=self.parametrization) 1155 start = 0 1156 for opt in self.optims: 1157 local_data = data[start : start + opt.dimension] 1158 start += opt.dimension 1159 local_candidate = opt.parametrization.spawn_child().set_standardized_data(local_data) 1160 opt.tell(local_candidate, loss) 1161 1162 1163class Rescaled(base.ConfiguredOptimizer): 1164 """Configured optimizer for creating rescaled optimization algorithms. 1165 1166 Parameters 1167 ---------- 1168 base_optimizer: base.OptCls 1169 optimization algorithm to be rescaled. 1170 scale: how much do we rescale. E.g. 0.001 if we want to focus on the center 1171 with std 0.001 (assuming the std of the domain is set to 1). 1172 """ 1173 1174 # pylint: disable=unused-argument 1175 def __init__( 1176 self, 1177 *, 1178 base_optimizer: base.OptCls = CMA, 1179 scale: tp.Optional[float] = None, 1180 ) -> None: 1181 super().__init__(_Rescaled, locals()) 1182 1183 1184RescaledCMA = Rescaled().set_name("RescaledCMA", register=True) 1185 1186 1187class ConfSplitOptimizer(base.ConfiguredOptimizer): 1188 """ "Combines optimizers, each of them working on their own variables. 1189 1190 Parameters 1191 ---------- 1192 num_optims: int 1193 number of optimizers 1194 num_vars: optional list of int 1195 number of variable per optimizer. 1196 progressive: optional bool 1197 whether we progressively add optimizers. 1198 non_deterministic_descriptor: bool 1199 subparts parametrization descriptor is set to noisy function. 1200 This can have an impact for optimizer selection for competence maps. 1201 """ 1202 1203 # pylint: disable=unused-argument 1204 def __init__( 1205 self, 1206 *, 1207 num_optims: tp.Optional[int] = None, 1208 num_vars: tp.Optional[tp.List[int]] = None, 1209 multivariate_optimizer: base.OptCls = CMA, 1210 monovariate_optimizer: base.OptCls = RandomSearch, 1211 progressive: bool = False, 1212 non_deterministic_descriptor: bool = True, 1213 ) -> None: 1214 super().__init__(SplitOptimizer, locals()) 1215 1216 1217@registry.register 1218class Portfolio(base.Optimizer): 1219 """Passive portfolio of CMA, 2-pt DE and Scr-Hammersley.""" 1220 1221 def __init__( 1222 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1223 ) -> None: 1224 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1225 assert budget is not None 1226 self.optims = [ 1227 CMA( 1228 self.parametrization, budget // 3 + (budget % 3 > 0), num_workers 1229 ), # share parametrization and its rng 1230 TwoPointsDE(self.parametrization, budget // 3 + (budget % 3 > 1), num_workers), # noqa: F405 1231 ScrHammersleySearch(self.parametrization, budget // 3, num_workers), 1232 ] # noqa: F405 1233 if budget < 12 * num_workers: 1234 self.optims = [ScrHammersleySearch(self.parametrization, budget, num_workers)] # noqa: F405 1235 1236 def _internal_ask_candidate(self) -> p.Parameter: 1237 optim_index = self._num_ask % len(self.optims) 1238 candidate = self.optims[optim_index].ask() 1239 candidate._meta["optim_index"] = optim_index 1240 return candidate 1241 1242 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1243 for opt in self.optims: 1244 try: 1245 opt.tell(candidate, loss) 1246 except errors.TellNotAskedNotSupportedError: 1247 pass 1248 # Presumably better than self.optims[optim_index].tell(candidate, value) 1249 1250 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1251 at_least_one_ok = False 1252 for opt in self.optims: 1253 try: 1254 opt.tell(candidate, loss) 1255 at_least_one_ok = True 1256 except errors.TellNotAskedNotSupportedError: 1257 pass 1258 if not at_least_one_ok: 1259 raise errors.TellNotAskedNotSupportedError 1260 1261 1262class InfiniteMetaModelOptimum(ValueError): 1263 """Sometimes the optimum of the metamodel is at infinity.""" 1264 1265 1266def learn_on_k_best(archive: utils.Archive[utils.MultiValue], k: int) -> tp.ArrayLike: 1267 """Approximate optimum learnt from the k best. 1268 1269 Parameters 1270 ---------- 1271 archive: utils.Archive[utils.Value] 1272 """ 1273 items = list(archive.items_as_arrays()) 1274 dimension = len(items[0][0]) 1275 1276 # Select the k best. 1277 first_k_individuals = [ 1278 x for x in sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k] 1279 ] 1280 assert len(first_k_individuals) == k 1281 1282 # Recenter the best. 1283 middle = np.array(sum(p[0] for p in first_k_individuals) / k) 1284 normalization = 1e-15 + np.sqrt(np.sum((first_k_individuals[-1][0] - first_k_individuals[0][0]) ** 2)) 1285 y = [archive[c[0]].get_estimation("pessimistic") for c in first_k_individuals] 1286 X = np.asarray([(c[0] - middle) / normalization for c in first_k_individuals]) 1287 1288 # We need SKLearn. 1289 from sklearn.linear_model import LinearRegression 1290 from sklearn.preprocessing import PolynomialFeatures 1291 1292 polynomial_features = PolynomialFeatures(degree=2) 1293 X2 = polynomial_features.fit_transform(X) 1294 1295 # Fit a linear model. 1296 model = LinearRegression() 1297 model.fit(X2, y) 1298 1299 try: 1300 for cls in Powell, DE: # Powell excellent here, DE as a backup for thread safety. 1301 optimizer = cls(parametrization=dimension, budget=45 * dimension + 30) 1302 try: 1303 minimum = optimizer.minimize( 1304 lambda x: float(model.predict(polynomial_features.fit_transform(x[None, :]))) 1305 ).value 1306 except RuntimeError: 1307 assert cls == Powell, "Only Powell is allowed to crash here." 1308 else: 1309 break 1310 except ValueError: 1311 raise InfiniteMetaModelOptimum("Infinite meta-model optimum in learn_on_k_best.") 1312 1313 if np.sum(minimum ** 2) > 1.0: 1314 raise InfiniteMetaModelOptimum("huge meta-model optimum in learn_on_k_best.") 1315 return middle + normalization * minimum 1316 1317 1318@registry.register 1319class MetaModel(base.Optimizer): 1320 """Adding a metamodel into CMA.""" 1321 1322 def __init__( 1323 self, 1324 parametrization: IntOrParameter, 1325 budget: tp.Optional[int] = None, 1326 num_workers: int = 1, 1327 multivariate_optimizer: tp.Optional[base.OptCls] = None, 1328 ) -> None: 1329 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1330 if multivariate_optimizer is None: 1331 multivariate_optimizer = CMA if self.dimension > 1 else OnePlusOne 1332 self._optim = multivariate_optimizer( 1333 self.parametrization, budget, num_workers 1334 ) # share parametrization and its rng 1335 1336 def _internal_ask_candidate(self) -> p.Parameter: 1337 # We request a bit more points than what is really necessary for our dimensionality (+dimension). 1338 sample_size = int((self.dimension * (self.dimension - 1)) / 2 + 2 * self.dimension + 1) 1339 if ( 1340 self._num_ask % max(13, self.num_workers, self.dimension) == 0 1341 and len(self.archive) >= sample_size 1342 ): 1343 try: 1344 data = learn_on_k_best(self.archive, sample_size) 1345 candidate = self.parametrization.spawn_child().set_standardized_data(data) 1346 except InfiniteMetaModelOptimum: # The optimum is at infinity. Shit happens. 1347 candidate = self._optim.ask() 1348 else: 1349 candidate = self._optim.ask() 1350 return candidate 1351 1352 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1353 self._optim.tell(candidate, loss) 1354 1355 1356class ParaPortfolio(Portfolio): 1357 """Passive portfolio of CMA, 2-pt DE, PSO, SQP and Scr-Hammersley.""" 1358 1359 def __init__( 1360 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1361 ) -> None: 1362 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1363 assert budget is not None 1364 1365 def intshare(n: int, m: int) -> tp.Tuple[int, ...]: 1366 x = [n // m] * m 1367 i = 0 1368 while sum(x) < n: 1369 x[i] += 1 1370 i += 1 1371 return tuple(x) 1372 1373 nw1, nw2, nw3, nw4 = intshare(num_workers - 1, 4) 1374 self.which_optim = [0] * nw1 + [1] * nw2 + [2] * nw3 + [3] + [4] * nw4 1375 assert len(self.which_optim) == num_workers 1376 # b1, b2, b3, b4, b5 = intshare(budget, 5) 1377 self.optims: tp.List[base.Optimizer] = [ 1378 CMA(self.parametrization, num_workers=nw1), # share parametrization and its rng 1379 TwoPointsDE(self.parametrization, num_workers=nw2), # noqa: F405 1380 PSO(self.parametrization, num_workers=nw3), 1381 SQP(self.parametrization, num_workers=1), # noqa: F405 1382 ScrHammersleySearch( 1383 self.parametrization, budget=(budget // len(self.which_optim)) * nw4 1384 ), # noqa: F405 1385 ] 1386 1387 def _internal_ask_candidate(self) -> p.Parameter: 1388 optim_index = self.which_optim[self._num_ask % len(self.which_optim)] 1389 candidate = self.optims[optim_index].ask() 1390 candidate._meta["optim_index"] = optim_index 1391 return candidate 1392 1393 1394@registry.register 1395class SQPCMA(ParaPortfolio): 1396 """Passive portfolio of CMA and many SQP.""" 1397 1398 def __init__( 1399 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1400 ) -> None: 1401 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1402 assert budget is not None 1403 nw = num_workers // 2 1404 self.which_optim = [0] * nw 1405 for i in range(num_workers - nw): 1406 self.which_optim += [i + 1] 1407 assert len(self.which_optim) == num_workers 1408 # b1, b2, b3, b4, b5 = intshare(budget, 5) 1409 self.optims = [CMA(self.parametrization, num_workers=nw)] # share parametrization and its rng 1410 for i in range(num_workers - nw): 1411 self.optims += [SQP(self.parametrization, num_workers=1)] # noqa: F405 1412 if i > 0: 1413 self.optims[-1].initial_guess = self._rng.normal(0, 1, self.dimension) # type: ignore 1414 1415 1416@registry.register 1417class ASCMADEthird(Portfolio): 1418 """Algorithm selection, with CMA and Lhs-DE. Active selection at 1/3.""" 1419 1420 def __init__( 1421 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1422 ) -> None: 1423 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1424 assert budget is not None 1425 self.optims = [ 1426 CMA( 1427 self.parametrization, budget=None, num_workers=num_workers 1428 ), # share parametrization and its rng 1429 LhsDE(self.parametrization, budget=None, num_workers=num_workers), 1430 ] # noqa: F405 1431 self.budget_before_choosing = budget // 3 1432 self.best_optim = -1 1433 1434 def _internal_ask_candidate(self) -> p.Parameter: 1435 if self.budget_before_choosing > 0: 1436 self.budget_before_choosing -= 1 1437 optim_index = self._num_ask % len(self.optims) 1438 else: 1439 if self.best_optim is None: 1440 best_loss = float("inf") 1441 optim_index = -1 1442 for i, optim in enumerate(self.optims): 1443 val = optim.current_bests["pessimistic"].get_estimation("pessimistic") 1444 if not val > best_loss: 1445 optim_index = i 1446 best_loss = val 1447 self.best_optim = optim_index 1448 optim_index = self.best_optim 1449 candidate = self.optims[optim_index].ask() 1450 candidate._meta["optim_index"] = optim_index 1451 return candidate 1452 1453 1454@registry.register 1455class CMandAS2(ASCMADEthird): 1456 """Competence map, with algorithm selection in one of the cases (3 CMAs).""" 1457 1458 def __init__( 1459 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1460 ) -> None: 1461 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1462 self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)] # noqa: F405 1463 assert budget is not None 1464 self.budget_before_choosing = 2 * budget 1465 if budget < 201: 1466 self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)] 1467 if budget > 50 * self.dimension or num_workers < 30: 1468 self.optims = [ 1469 MetaModel(self.parametrization, budget=None, num_workers=num_workers) for _ in range(3) 1470 ] 1471 self.budget_before_choosing = budget // 10 1472 1473 1474@registry.register 1475class CMandAS3(ASCMADEthird): 1476 """Competence map, with algorithm selection in one of the cases (3 CMAs).""" 1477 1478 def __init__( 1479 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1480 ) -> None: 1481 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1482 self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)] # noqa: F405 1483 assert budget is not None 1484 self.budget_before_choosing = 2 * budget 1485 if budget < 201: 1486 self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)] 1487 if budget > 50 * self.dimension or num_workers < 30: 1488 if num_workers == 1: 1489 self.optims = [ 1490 ChainCMAPowell( 1491 self.parametrization, budget=None, num_workers=num_workers 1492 ), # share parametrization and its rng 1493 ChainCMAPowell(self.parametrization, budget=None, num_workers=num_workers), 1494 ChainCMAPowell(self.parametrization, budget=None, num_workers=num_workers), 1495 ] 1496 else: 1497 self.optims = [ 1498 CMA( 1499 self.parametrization, budget=None, num_workers=num_workers 1500 ), # share parametrization and its rng 1501 CMA(self.parametrization, budget=None, num_workers=num_workers), 1502 CMA(self.parametrization, budget=None, num_workers=num_workers), 1503 ] 1504 self.budget_before_choosing = budget // 10 1505 1506 1507@registry.register 1508class CM(CMandAS2): 1509 """Competence map, simplest.""" 1510 1511 def __init__( 1512 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1513 ) -> None: 1514 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1515 assert budget is not None 1516 # share parametrization and its random number generator between all underlying optimizers 1517 self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)] # noqa: F405 1518 self.budget_before_choosing = 2 * budget 1519 if budget < 201: 1520 self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)] 1521 if budget > 50 * self.dimension: 1522 self.optims = [CMA(self.parametrization, budget=None, num_workers=num_workers)] 1523 1524 1525@registry.register 1526class MultiCMA(CM): 1527 """Combining 3 CMAs. Exactly identical. Active selection at 1/10 of the budget.""" 1528 1529 def __init__( 1530 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1531 ) -> None: 1532 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1533 assert budget is not None 1534 self.optims = [ 1535 CMA( 1536 self.parametrization, budget=None, num_workers=num_workers 1537 ), # share parametrization and its rng 1538 CMA(self.parametrization, budget=None, num_workers=num_workers), 1539 CMA(self.parametrization, budget=None, num_workers=num_workers), 1540 ] 1541 self.budget_before_choosing = budget // 10 1542 1543 1544@registry.register 1545class MultiDiscrete(CM): 1546 """Combining 3 Discrete(1+1). Exactly identical. Active selection at 1/10 of the budget.""" 1547 1548 def __init__( 1549 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1550 ) -> None: 1551 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1552 assert budget is not None 1553 self.optims = [ 1554 DiscreteOnePlusOne( 1555 self.parametrization, budget=budget // 12, num_workers=num_workers 1556 ), # share parametrization and its rng 1557 DiscreteBSOOnePlusOne(self.parametrization, budget=budget // 12, num_workers=num_workers), 1558 DoubleFastGADiscreteOnePlusOne( 1559 self.parametrization, budget=(budget // 4) - 2 * (budget // 12), num_workers=num_workers 1560 ), 1561 ] 1562 self.budget_before_choosing = budget // 4 1563 1564 1565@registry.register 1566class TripleCMA(CM): 1567 """Combining 3 CMAs. Exactly identical. Active selection at 1/3 of the budget.""" 1568 1569 def __init__( 1570 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1571 ) -> None: 1572 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1573 assert budget is not None 1574 self.optims = [ 1575 ParametrizedCMA(random_init=True)( 1576 self.parametrization, budget=None, num_workers=num_workers 1577 ), # share parametrization and its rng 1578 ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers), 1579 ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers), 1580 ] 1581 self.budget_before_choosing = budget // 3 1582 1583 1584@registry.register 1585class PolyCMA(CM): 1586 """Combining 20 CMAs. Exactly identical. Active selection at 1/3 of the budget.""" 1587 1588 def __init__( 1589 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1590 ) -> None: 1591 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1592 assert budget is not None 1593 self.optims = [ 1594 ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers) 1595 for _ in range(20) 1596 ] 1597 1598 self.budget_before_choosing = budget // 3 1599 1600 1601@registry.register 1602class MultiScaleCMA(CM): 1603 """Combining 3 CMAs with different init scale. Active selection at 1/3 of the budget.""" 1604 1605 def __init__( 1606 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 1607 ) -> None: 1608 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1609 self.optims = [ 1610 CMA( 1611 self.parametrization, budget=None, num_workers=num_workers 1612 ), # share parametrization and its rng 1613 ParametrizedCMA(scale=1e-3, random_init=True)( 1614 self.parametrization, budget=None, num_workers=num_workers 1615 ), 1616 ParametrizedCMA(scale=1e-6, random_init=True)( 1617 self.parametrization, budget=None, num_workers=num_workers 1618 ), 1619 ] 1620 assert budget is not None 1621 self.budget_before_choosing = budget // 3 1622 1623 1624class _FakeFunction: 1625 """Simple function that returns the loss which was registered just before. 1626 This is a hack for BO. 1627 """ 1628 1629 def __init__(self, num_digits: int) -> None: 1630 self.num_digits = num_digits 1631 self._registered: tp.List[tp.Tuple[np.ndarray, float]] = [] 1632 1633 def key(self, num: int) -> str: 1634 """Key corresponding to the array sample 1635 (uses zero-filling to keep order) 1636 """ 1637 return "x" + str(num).zfill(self.num_digits) 1638 1639 def register(self, x: np.ndarray, loss: tp.FloatLoss) -> None: 1640 if self._registered: 1641 raise RuntimeError("Only one call can be registered at a time") 1642 self._registered.append((x, loss)) 1643 1644 def __call__(self, **kwargs: float) -> float: 1645 if not self._registered: 1646 raise RuntimeError("Call must be registered first") 1647 x = [kwargs[self.key(i)] for i in range(len(kwargs))] 1648 xr, loss = self._registered[0] 1649 np.testing.assert_array_almost_equal(x, xr, err_msg="Call does not match registered") 1650 self._registered.clear() 1651 return loss 1652 1653 1654class _BO(base.Optimizer): 1655 def __init__( 1656 self, 1657 parametrization: IntOrParameter, 1658 budget: tp.Optional[int] = None, 1659 num_workers: int = 1, 1660 *, 1661 initialization: tp.Optional[str] = None, 1662 init_budget: tp.Optional[int] = None, 1663 middle_point: bool = False, 1664 utility_kind: str = "ucb", # bayes_opt default 1665 utility_kappa: float = 2.576, 1666 utility_xi: float = 0.0, 1667 gp_parameters: tp.Optional[tp.Dict[str, tp.Any]] = None, 1668 ) -> None: 1669 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1670 self._transform = transforms.ArctanBound(0, 1) 1671 self._bo: tp.Optional[BayesianOptimization] = None 1672 self._fake_function = _FakeFunction(num_digits=len(str(self.dimension))) 1673 # initialization 1674 init = initialization 1675 self._init_budget = init_budget 1676 self._middle_point = middle_point 1677 if init is None: 1678 self._InitOpt: tp.Optional[base.ConfiguredOptimizer] = None 1679 elif init == "random": 1680 self._InitOpt = oneshot.RandomSearch 1681 else: 1682 self._InitOpt = oneshot.SamplingSearch(sampler=init, scrambled=init == "Hammersley") 1683 # configuration 1684 self.utility_kind = utility_kind 1685 self.utility_kappa = utility_kappa 1686 self.utility_xi = utility_xi 1687 self.gp_parameters = {} if gp_parameters is None else gp_parameters 1688 if isinstance(parametrization, p.Parameter) and self.gp_parameters.get("alpha", 0) == 0: 1689 analysis = p.helpers.analyze(parametrization) 1690 noisy = not analysis.deterministic 1691 cont = analysis.continuous 1692 if noisy or not cont: 1693 warnings.warn( 1694 "Dis-continuous and noisy parametrization require gp_parameters['alpha'] > 0 " 1695 "(for your parametrization, continuity={cont} and noisy={noisy}).\n" 1696 "Find more information on BayesianOptimization's github.\n" 1697 "You should then create a new instance of optimizerlib.ParametrizedBO with appropriate parametrization.", 1698 errors.InefficientSettingsWarning, 1699 ) 1700 1701 @property 1702 def bo(self) -> BayesianOptimization: 1703 if self._bo is None: 1704 bounds = {self._fake_function.key(i): (0.0, 1.0) for i in range(self.dimension)} 1705 self._bo = BayesianOptimization(self._fake_function, bounds, random_state=self._rng) 1706 init_budget = max( 1707 2, int(np.sqrt(self.budget) if self._init_budget is None else self._init_budget) 1708 ) 1709 if self.gp_parameters is not None: 1710 self._bo.set_gp_params(**self.gp_parameters) 1711 # init 1712 if self._middle_point: 1713 self._bo.probe([0.5] * self.dimension, lazy=True) 1714 init_budget -= 1 1715 if self._InitOpt is not None and init_budget > 0: 1716 param = p.Array(shape=(self.dimension,)).set_bounds(lower=0, upper=1.0) 1717 param.random_state = self._rng 1718 opt = self._InitOpt(param, budget=init_budget) 1719 for _ in range(init_budget): 1720 self._bo.probe(opt.ask().value, lazy=True) 1721 else: # default 1722 for _ in range(init_budget): 1723 self._bo.probe(self._bo._space.random_sample(), lazy=True) 1724 return self._bo 1725 1726 def _internal_ask_candidate(self) -> p.Parameter: 1727 util = UtilityFunction(kind=self.utility_kind, kappa=self.utility_kappa, xi=self.utility_xi) 1728 if self.bo._queue: 1729 x_probe = next(self.bo._queue) 1730 else: 1731 x_probe = self.bo.suggest(util) # this is time consuming 1732 x_probe = [x_probe[self._fake_function.key(i)] for i in range(len(x_probe))] 1733 data = self._transform.backward(np.array(x_probe, copy=False)) 1734 candidate = self.parametrization.spawn_child().set_standardized_data(data) 1735 candidate._meta["x_probe"] = x_probe 1736 return candidate 1737 1738 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1739 if "x_probe" in candidate._meta: 1740 y = candidate._meta["x_probe"] 1741 else: 1742 data = candidate.get_standardized_data(reference=self.parametrization) 1743 y = self._transform.forward(data) # tell not asked 1744 self._fake_function.register(y, -loss) # minimizing 1745 self.bo.probe(y, lazy=False) 1746 # for some unknown reasons, BO wants to evaluate twice the same point, 1747 # but since it keeps a cache of the values, the registered value is not used 1748 # so we should clean the "fake" function 1749 self._fake_function._registered.clear() 1750 1751 def _internal_provide_recommendation(self) -> tp.Optional[tp.ArrayLike]: 1752 if not self.archive: 1753 return None 1754 return self._transform.backward( 1755 np.array([self.bo.max["params"][self._fake_function.key(i)] for i in range(self.dimension)]) 1756 ) 1757 1758 1759class ParametrizedBO(base.ConfiguredOptimizer): 1760 """Bayesian optimization. 1761 Hyperparameter tuning method, based on statistical modeling of the objective function. 1762 This class is a wrapper over the `bayes_opt <https://github.com/fmfn/BayesianOptimization>`_ package. 1763 1764 Parameters 1765 ---------- 1766 initialization: str 1767 Initialization algorithms (None, "Hammersley", "random" or "LHS") 1768 init_budget: int or None 1769 Number of initialization algorithm steps 1770 middle_point: bool 1771 whether to sample the 0 point first 1772 utility_kind: str 1773 Type of utility function to use among "ucb", "ei" and "poi" 1774 utility_kappa: float 1775 Kappa parameter for the utility function 1776 utility_xi: float 1777 Xi parameter for the utility function 1778 gp_parameters: dict 1779 dictionnary of parameters for the gaussian process 1780 """ 1781 1782 no_parallelization = True 1783 1784 # pylint: disable=unused-argument 1785 def __init__( 1786 self, 1787 *, 1788 initialization: tp.Optional[str] = None, 1789 init_budget: tp.Optional[int] = None, 1790 middle_point: bool = False, 1791 utility_kind: str = "ucb", # bayes_opt default 1792 utility_kappa: float = 2.576, 1793 utility_xi: float = 0.0, 1794 gp_parameters: tp.Optional[tp.Dict[str, tp.Any]] = None, 1795 ) -> None: 1796 super().__init__(_BO, locals()) 1797 1798 1799BO = ParametrizedBO().set_name("BO", register=True) 1800 1801 1802class _Chain(base.Optimizer): 1803 def __init__( 1804 self, 1805 parametrization: IntOrParameter, 1806 budget: tp.Optional[int] = None, 1807 num_workers: int = 1, 1808 *, 1809 optimizers: tp.Sequence[tp.Union[base.ConfiguredOptimizer, tp.Type[base.Optimizer]]] = [ 1810 LHSSearch, 1811 DE, 1812 ], 1813 budgets: tp.Sequence[tp.Union[str, int]] = (10,), 1814 ) -> None: 1815 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1816 # delayed initialization 1817 # Either we have the budget for each algorithm, or the last algorithm uses the rest of the budget, so: 1818 self.optimizers: tp.List[base.Optimizer] = [] 1819 converter = { 1820 "num_workers": self.num_workers, 1821 "dimension": self.dimension, 1822 "half": self.budget // 2 if self.budget else self.num_workers, 1823 "third": self.budget // 3 if self.budget else self.num_workers, 1824 "sqrt": int(np.sqrt(self.budget)) if self.budget else self.num_workers, 1825 } 1826 self.budgets = [max(1, converter[b]) if isinstance(b, str) else b for b in budgets] 1827 last_budget = None if self.budget is None else max(4, self.budget - sum(self.budgets)) 1828 assert len(optimizers) == len(self.budgets) + 1 1829 assert all( 1830 x in ("third", "half", "dimension", "num_workers", "sqrt") or x > 0 for x in self.budgets 1831 ), str(self.budgets) 1832 for opt, optbudget in zip(optimizers, self.budgets + [last_budget]): # type: ignore 1833 self.optimizers.append(opt(self.parametrization, budget=optbudget, num_workers=self.num_workers)) 1834 if self.name.startswith("chain"): 1835 warnings.warn( 1836 "Chain optimizers are renamed with a capital C for consistency. " 1837 "Eg: chainCMAPowell becomes ChainCMAPowell", 1838 errors.NevergradDeprecationWarning, 1839 ) 1840 1841 def _internal_ask_candidate(self) -> p.Parameter: 1842 # Which algorithm are we playing with ? 1843 sum_budget = 0.0 1844 opt = self.optimizers[0] 1845 for opt in self.optimizers: 1846 sum_budget += float("inf") if opt.budget is None else opt.budget 1847 if self.num_ask < sum_budget: 1848 break 1849 # if we are over budget, then use the last one... 1850 return opt.ask() 1851 1852 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1853 # Let us inform all concerned algorithms 1854 sum_budget = 0.0 1855 for opt in self.optimizers: 1856 sum_budget += float("inf") if opt.budget is None else opt.budget 1857 if self.num_tell < sum_budget: 1858 opt.tell(candidate, loss) 1859 1860 1861class Chaining(base.ConfiguredOptimizer): 1862 """ 1863 A chaining consists in running algorithm 1 during T1, then algorithm 2 during T2, then algorithm 3 during T3, etc. 1864 Each algorithm is fed with what happened before it. 1865 1866 Parameters 1867 ---------- 1868 optimizers: list of Optimizer classes 1869 the sequence of optimizers to use 1870 budgets: list of int 1871 the corresponding budgets for each optimizer but the last one 1872 1873 """ 1874 1875 # pylint: disable=unused-argument 1876 def __init__( 1877 self, 1878 optimizers: tp.Sequence[tp.Union[base.ConfiguredOptimizer, tp.Type[base.Optimizer]]], 1879 budgets: tp.Sequence[tp.Union[str, int]], 1880 ) -> None: 1881 super().__init__(_Chain, locals()) 1882 1883 1884# depreated: old names (need a capital letter for consistency 1885chainCMAPowell = Chaining([CMA, Powell], ["half"]).set_name("chainCMAPowell", register=True) 1886chainCMAPowell.no_parallelization = True 1887chainMetaModelSQP = Chaining([MetaModel, SQP], ["half"]).set_name("chainMetaModelSQP", register=True) 1888chainMetaModelSQP.no_parallelization = True 1889chainMetaModelPowell = Chaining([MetaModel, Powell], ["half"]).set_name("chainMetaModelPowell", register=True) 1890chainMetaModelPowell.no_parallelization = True 1891chainDiagonalCMAPowell = Chaining([DiagonalCMA, Powell], ["half"]).set_name( 1892 "chainDiagonalCMAPowell", register=True 1893) 1894chainDiagonalCMAPowell.no_parallelization = True 1895chainNaiveTBPSAPowell = Chaining([NaiveTBPSA, Powell], ["half"]).set_name( 1896 "chainNaiveTBPSAPowell", register=True 1897) 1898chainNaiveTBPSAPowell.no_parallelization = True 1899chainNaiveTBPSACMAPowell = Chaining([NaiveTBPSA, CMA, Powell], ["third", "third"]).set_name( 1900 "chainNaiveTBPSACMAPowell", register=True 1901) 1902chainNaiveTBPSACMAPowell.no_parallelization = True 1903 1904# new names 1905ChainCMAPowell = Chaining([CMA, Powell], ["half"]).set_name("ChainCMAPowell", register=True) 1906ChainCMAPowell.no_parallelization = True # TODO make this automatic 1907ChainMetaModelSQP = Chaining([MetaModel, SQP], ["half"]).set_name("ChainMetaModelSQP", register=True) 1908ChainMetaModelSQP.no_parallelization = True 1909ChainMetaModelPowell = Chaining([MetaModel, Powell], ["half"]).set_name("ChainMetaModelPowell", register=True) 1910ChainMetaModelPowell.no_parallelization = True 1911ChainDiagonalCMAPowell = Chaining([DiagonalCMA, Powell], ["half"]).set_name( 1912 "ChainDiagonalCMAPowell", register=True 1913) 1914ChainDiagonalCMAPowell.no_parallelization = True 1915ChainNaiveTBPSAPowell = Chaining([NaiveTBPSA, Powell], ["half"]).set_name( 1916 "ChainNaiveTBPSAPowell", register=True 1917) 1918ChainNaiveTBPSAPowell.no_parallelization = True 1919ChainNaiveTBPSACMAPowell = Chaining([NaiveTBPSA, CMA, Powell], ["third", "third"]).set_name( 1920 "ChainNaiveTBPSACMAPowell", register=True 1921) 1922ChainNaiveTBPSACMAPowell.no_parallelization = True 1923 1924 1925@registry.register 1926class cGA(base.Optimizer): 1927 """`Compact Genetic Algorithm <https://ieeexplore.ieee.org/document/797971>`_. 1928 A discrete optimization algorithm, introduced in and often used as a first baseline. 1929 """ 1930 1931 # pylint: disable=too-many-instance-attributes 1932 1933 def __init__( 1934 self, 1935 parametrization: IntOrParameter, 1936 budget: tp.Optional[int] = None, 1937 num_workers: int = 1, 1938 arity: tp.Optional[int] = None, 1939 ) -> None: 1940 super().__init__(parametrization, budget=budget, num_workers=num_workers) 1941 if arity is None: 1942 all_params = p.helpers.flatten(self.parametrization) 1943 arity = max( 1944 len(param.choices) if isinstance(param, p.TransitionChoice) else 500 1945 for _, param in all_params 1946 ) 1947 self._arity = arity 1948 self._penalize_cheap_violations = False # Not sure this is the optimal decision. 1949 # self.p[i][j] is the probability that the ith variable has value 0<=j< arity. 1950 self.p: np.ndarray = np.ones((self.dimension, arity)) / arity 1951 # Probability increments are of order 1./self.llambda 1952 # and lower bounded by something of order 1./self.llambda. 1953 self.llambda = max(num_workers, 40) # FIXME: no good heuristic ? 1954 # CGA generates a candidate, then a second candidate; 1955 # then updates depending on the comparison with the first one. We therefore have to store the previous candidate. 1956 self._previous_value_candidate: tp.Optional[tp.Tuple[float, np.ndarray]] = None 1957 1958 def _internal_ask_candidate(self) -> p.Parameter: 1959 # Multinomial. 1960 values: tp.List[int] = [ 1961 sum(self._rng.uniform() > cum_proba) for cum_proba in np.cumsum(self.p, axis=1) 1962 ] 1963 data = discretization.noisy_inverse_threshold_discretization(values, arity=self._arity, gen=self._rng) 1964 return self.parametrization.spawn_child().set_standardized_data(data) 1965 1966 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 1967 data = candidate.get_standardized_data(reference=self.parametrization) 1968 if self._previous_value_candidate is None: 1969 self._previous_value_candidate = (loss, data) 1970 else: 1971 winner, loser = self._previous_value_candidate[1], data 1972 if self._previous_value_candidate[0] > loss: 1973 winner, loser = loser, winner 1974 winner_data = discretization.threshold_discretization(np.asarray(winner.data), arity=self._arity) 1975 loser_data = discretization.threshold_discretization(np.asarray(loser.data), arity=self._arity) 1976 for i, _ in enumerate(winner_data): 1977 if winner_data[i] != loser_data[i]: 1978 self.p[i][winner_data[i]] += 1.0 / self.llambda 1979 self.p[i][loser_data[i]] -= 1.0 / self.llambda 1980 for j in range(len(self.p[i])): 1981 self.p[i][j] = max(self.p[i][j], 1.0 / self.llambda) 1982 self.p[i] /= sum(self.p[i]) 1983 self._previous_value_candidate = None 1984 1985 1986class _EMNA(base.Optimizer): 1987 """Simple Estimation of Multivariate Normal Algorithm (EMNA).""" 1988 1989 # pylint: disable=too-many-instance-attributes 1990 1991 def __init__( 1992 self, 1993 parametrization: IntOrParameter, 1994 budget: tp.Optional[int] = None, 1995 num_workers: int = 1, 1996 isotropic: bool = True, 1997 naive: bool = True, 1998 population_size_adaptation: bool = False, 1999 initial_popsize: tp.Optional[int] = None, 2000 ) -> None: 2001 super().__init__(parametrization, budget=budget, num_workers=num_workers) 2002 self.isotropic: bool = isotropic 2003 self.naive: bool = naive 2004 self.population_size_adaptation = population_size_adaptation 2005 self.min_coef_parallel_context: int = 8 2006 # Sigma initialization 2007 self.sigma: tp.Union[float, np.ndarray] 2008 if initial_popsize is None: 2009 initial_popsize = self.dimension 2010 if self.isotropic: 2011 self.sigma = 1.0 2012 else: 2013 self.sigma = np.ones(self.dimension) 2014 # population size and parent size initializations 2015 self.popsize = _PopulationSizeController( 2016 llambda=4 * initial_popsize, mu=initial_popsize, dimension=self.dimension, num_workers=num_workers 2017 ) 2018 if not self.population_size_adaptation: 2019 self.popsize.mu = max(16, self.dimension) 2020 self.popsize.llambda = 4 * self.popsize.mu 2021 self.popsize.llambda = max(self.popsize.llambda, num_workers) 2022 if budget is not None and self.popsize.llambda > budget: 2023 self.popsize.llambda = budget 2024 self.popsize.mu = self.popsize.llambda // 4 2025 warnings.warn( 2026 "Budget may be too small in front of the dimension for EMNA", 2027 errors.InefficientSettingsWarning, 2028 ) 2029 self.current_center: np.ndarray = np.zeros(self.dimension) 2030 # population 2031 self.parents: tp.List[p.Parameter] = [self.parametrization] 2032 self.children: tp.List[p.Parameter] = [] 2033 2034 def recommend(self) -> p.Parameter: 2035 if self.naive: 2036 return self.current_bests["optimistic"].parameter 2037 else: 2038 # This is NOT the naive version. We deal with noise. 2039 out = self.parametrization.spawn_child() 2040 with p.helpers.deterministic_sampling(out): 2041 out.set_standardized_data(self.current_center) 2042 return out 2043 2044 def _internal_ask_candidate(self) -> p.Parameter: 2045 sigma_tmp = self.sigma 2046 if ( 2047 self.population_size_adaptation 2048 and self.popsize.llambda < self.min_coef_parallel_context * self.dimension 2049 ): 2050 sigma_tmp = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension)) 2051 individual = self.current_center + sigma_tmp * self._rng.normal(0, 1, self.dimension) 2052 parent = self.parents[self.num_ask % len(self.parents)] 2053 candidate = parent.spawn_child().set_standardized_data(individual, reference=self.parametrization) 2054 if parent is self.parametrization: 2055 candidate.heritage["lineage"] = candidate.uid 2056 candidate._meta["sigma"] = sigma_tmp 2057 return candidate 2058 2059 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 2060 if self.population_size_adaptation: 2061 self.popsize.add_value(loss) 2062 self.children.append(candidate) 2063 if len(self.children) >= self.popsize.llambda: 2064 # Sorting the population. 2065 self.children.sort(key=base._loss) 2066 # Computing the new parent. 2067 self.parents = self.children[: self.popsize.mu] 2068 self.children = [] 2069 self.current_center = ( 2070 sum( # type: ignore 2071 c.get_standardized_data(reference=self.parametrization) for c in self.parents 2072 ) 2073 / self.popsize.mu 2074 ) 2075 if self.population_size_adaptation: 2076 if ( 2077 self.popsize.llambda < self.min_coef_parallel_context * self.dimension 2078 ): # Population size not large enough for emna 2079 self.sigma = np.exp( 2080 np.sum( 2081 np.log([c._meta["sigma"] for c in self.parents]), 2082 axis=0 if self.isotropic else None, 2083 ) 2084 / self.popsize.mu 2085 ) 2086 else: 2087 stdd = [ 2088 ( 2089 self.parents[i].get_standardized_data(reference=self.parametrization) 2090 - self.current_center 2091 ) 2092 ** 2 2093 for i in range(self.popsize.mu) 2094 ] 2095 self.sigma = np.sqrt( 2096 np.sum(stdd) / (self.popsize.mu * (self.dimension if self.isotropic else 1)) 2097 ) 2098 else: 2099 # EMNA update 2100 stdd = [ 2101 ( 2102 self.parents[i].get_standardized_data(reference=self.parametrization) 2103 - self.current_center 2104 ) 2105 ** 2 2106 for i in range(self.popsize.mu) 2107 ] 2108 self.sigma = np.sqrt( 2109 np.sum(stdd, axis=0 if self.isotropic else None) 2110 / (self.popsize.mu * (self.dimension if self.isotropic else 1)) 2111 ) 2112 2113 if self.num_workers / self.dimension > 32: # faster decrease of sigma if large parallel context 2114 imp = max(1, (np.log(self.popsize.llambda) / 2) ** (1 / self.dimension)) 2115 self.sigma /= imp 2116 2117 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 2118 raise errors.TellNotAskedNotSupportedError 2119 2120 2121class EMNA(base.ConfiguredOptimizer): 2122 """Estimation of Multivariate Normal Algorithm 2123 This algorithm is quite efficient in a parallel context, i.e. when 2124 the population size is large. 2125 2126 Parameters 2127 ---------- 2128 isotropic: bool 2129 isotropic version on EMNA if True, i.e. we have an 2130 identity matrix for the Gaussian, else we here consider the separable 2131 version, meaning we have a diagonal matrix for the Gaussian (anisotropic) 2132 naive: bool 2133 set to False for noisy problem, so that the best points will be an 2134 average of the final population. 2135 population_size_adaptation: bool 2136 population size automatically adapts to the landscape 2137 initial_popsize: Optional[int] 2138 initial (and minimal) population size (default: 4 x dimension) 2139 """ 2140 2141 # pylint: disable=unused-argument 2142 def __init__( 2143 self, 2144 *, 2145 isotropic: bool = True, 2146 naive: bool = True, 2147 population_size_adaptation: bool = False, 2148 initial_popsize: tp.Optional[int] = None, 2149 ) -> None: 2150 super().__init__(_EMNA, locals()) 2151 2152 2153NaiveIsoEMNA = EMNA().set_name("NaiveIsoEMNA", register=True) 2154 2155 2156# Discussions with Jialin Liu and Fabien Teytaud helped the following development. 2157# This includes discussion at Dagstuhl's 2019 seminars on randomized search heuristics and computational intelligence in games. 2158@registry.register 2159class NGOptBase(base.Optimizer): 2160 """Nevergrad optimizer by competence map.""" 2161 2162 # pylint: disable=too-many-branches 2163 def __init__( 2164 self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1 2165 ) -> None: 2166 super().__init__(parametrization, budget=budget, num_workers=num_workers) 2167 analysis = p.helpers.analyze(self.parametrization) 2168 funcinfo = self.parametrization.function 2169 self.has_noise = not (analysis.deterministic and funcinfo.deterministic) 2170 # The noise coming from discrete variables goes to 0. 2171 self.noise_from_instrumentation = self.has_noise and funcinfo.deterministic 2172 self.fully_continuous = analysis.continuous 2173 all_params = p.helpers.flatten(self.parametrization) 2174 # figure out if there is any discretization layers 2175 int_layers = list( 2176 itertools.chain.from_iterable([_layering.Int.filter_from(x) for _, x in all_params]) 2177 ) 2178 int_layers = [x for x in int_layers if x.arity is not None] # only "Choice" instances for now 2179 self.has_discrete_not_softmax = any( 2180 not isinstance(lay, _datalayers.SoftmaxSampling) for lay in int_layers 2181 ) 2182 self._has_discrete = bool(int_layers) 2183 self._arity: int = max((lay.arity for lay in int_layers), default=-1) # type: ignore 2184 if self.fully_continuous: 2185 self._arity = -1 2186 self._optim: tp.Optional[base.Optimizer] = None 2187 self._constraints_manager.update( 2188 max_trials=1000, 2189 penalty_factor=1.0, 2190 penalty_exponent=1.01, 2191 ) 2192 2193 @property 2194 def optim(self) -> base.Optimizer: 2195 if self._optim is None: 2196 self._optim = self._select_optimizer_cls()(self.parametrization, self.budget, self.num_workers) 2197 optim = self._optim if not isinstance(self._optim, NGOptBase) else self._optim.optim 2198 logger.debug("%s selected %s optimizer.", *(x.name for x in (self, optim))) 2199 return self._optim 2200 2201 def _select_optimizer_cls(self) -> base.OptCls: 2202 # pylint: disable=too-many-nested-blocks 2203 assert self.budget is not None 2204 if self.has_noise and self.has_discrete_not_softmax: 2205 # noise and discrete: let us merge evolution and bandits. 2206 cls: base.OptCls = DoubleFastGADiscreteOnePlusOne if self.dimension < 60 else CMA 2207 else: 2208 if self.has_noise and self.fully_continuous: 2209 # This is the real of population control. FIXME: should we pair with a bandit ? 2210 cls = TBPSA 2211 else: 2212 if ( 2213 self.has_discrete_not_softmax 2214 or not self.parametrization.function.metrizable 2215 or not self.fully_continuous 2216 ): 2217 cls = DoubleFastGADiscreteOnePlusOne 2218 else: 2219 if self.num_workers > self.budget / 5: 2220 if self.num_workers > self.budget / 2.0 or self.budget < self.dimension: 2221 cls = MetaRecentering 2222 else: 2223 cls = NaiveTBPSA 2224 else: 2225 # Possibly a good idea to go memetic for large budget, but something goes wrong for the moment. 2226 if ( 2227 self.num_workers == 1 and self.budget > 6000 and self.dimension > 7 2228 ): # Let us go memetic. 2229 cls = ChainCMAPowell 2230 else: 2231 if self.num_workers == 1 and self.budget < self.dimension * 30: 2232 # One plus one so good in large ratio "dimension / budget". 2233 cls = OnePlusOne if self.dimension > 30 else Cobyla 2234 else: 2235 # DE is great in such a case (?). 2236 cls = ( 2237 DE if self.dimension > 2000 else CMA if self.dimension > 1 else OnePlusOne 2238 ) 2239 return cls 2240 2241 def _internal_ask_candidate(self) -> p.Parameter: 2242 return self.optim.ask() 2243 2244 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 2245 self.optim.tell(candidate, loss) 2246 2247 def recommend(self) -> p.Parameter: 2248 return self.optim.recommend() 2249 2250 def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 2251 self.optim.tell(candidate, loss) 2252 2253 2254@registry.register 2255class Shiwa(NGOptBase): 2256 """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map.""" 2257 2258 def _select_optimizer_cls(self) -> base.OptCls: 2259 optCls: base.OptCls = NGOptBase 2260 funcinfo = self.parametrization.function 2261 if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable): 2262 optCls = RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne 2263 elif self.dimension >= 60 and not funcinfo.metrizable: 2264 optCls = CMA 2265 return optCls 2266 2267 2268@registry.register 2269class NGO(NGOptBase): # compatibility 2270 pass 2271 2272 2273@registry.register 2274class NGOpt4(NGOptBase): 2275 """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map.""" 2276 2277 def _select_optimizer_cls(self) -> base.OptCls: 2278 self.fully_continuous = ( 2279 self.fully_continuous and not self.has_discrete_not_softmax and self._arity < 0 2280 ) 2281 budget, num_workers = self.budget, self.num_workers 2282 funcinfo = self.parametrization.function 2283 assert budget is not None 2284 optimClass: base.OptCls 2285 if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable): 2286 mutation = "portfolio" if budget > 1000 else "discrete" 2287 optimClass = ParametrizedOnePlusOne( 2288 crossover=True, mutation=mutation, noise_handling="optimistic" 2289 ) 2290 elif self._arity > 0: 2291 if self._arity == 2: 2292 optimClass = DiscreteOnePlusOne 2293 else: 2294 optimClass = AdaptiveDiscreteOnePlusOne if self._arity < 5 else CMandAS2 2295 else: 2296 # pylint: disable=too-many-nested-blocks 2297 if self.has_noise and self.fully_continuous and self.dimension > 100: 2298 # Waow, this is actually a discrete algorithm. 2299 optimClass = ConfSplitOptimizer( 2300 num_optims=13, progressive=True, multivariate_optimizer=OptimisticDiscreteOnePlusOne 2301 ) 2302 else: 2303 if self.has_noise and self.fully_continuous: 2304 if budget > 100: 2305 optimClass = ( 2306 OnePlusOne if self.noise_from_instrumentation or self.num_workers > 1 else SQP 2307 ) 2308 else: 2309 optimClass = OnePlusOne 2310 else: 2311 if self.has_discrete_not_softmax or not funcinfo.metrizable or not self.fully_continuous: 2312 optimClass = DoubleFastGADiscreteOnePlusOne 2313 else: 2314 if num_workers > budget / 5: 2315 if num_workers > budget / 2.0 or budget < self.dimension: 2316 optimClass = MetaTuneRecentering 2317 elif self.dimension < 5 and budget < 100: 2318 optimClass = DiagonalCMA 2319 elif self.dimension < 5 and budget < 500: 2320 optimClass = Chaining([DiagonalCMA, MetaModel], [100]) 2321 else: 2322 optimClass = NaiveTBPSA 2323 else: 2324 # Possibly a good idea to go memetic for large budget, but something goes wrong for the moment. 2325 if ( 2326 num_workers == 1 and budget > 6000 and self.dimension > 7 2327 ): # Let us go memetic. 2328 optimClass = ChainNaiveTBPSACMAPowell 2329 else: 2330 if num_workers == 1 and budget < self.dimension * 30: 2331 if ( 2332 self.dimension > 30 2333 ): # One plus one so good in large ratio "dimension / budget". 2334 optimClass = OnePlusOne 2335 elif self.dimension < 5: 2336 optimClass = MetaModel 2337 else: 2338 optimClass = Cobyla 2339 else: 2340 if self.dimension > 2000: # DE is great in such a case (?). 2341 optimClass = DE 2342 else: 2343 if self.dimension < 10 and budget < 500: 2344 optimClass = MetaModel 2345 else: 2346 if ( 2347 self.dimension > 40 2348 and num_workers > self.dimension 2349 and budget < 7 * self.dimension ** 2 2350 ): 2351 optimClass = DiagonalCMA 2352 elif ( 2353 3 * num_workers > self.dimension ** 2 2354 and budget > self.dimension ** 2 2355 ): 2356 optimClass = MetaModel 2357 else: 2358 optimClass = CMA 2359 return optimClass 2360 2361 2362@registry.register 2363class NGOpt8(NGOpt4): 2364 """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map.""" 2365 2366 def _select_optimizer_cls(self) -> base.OptCls: 2367 # Extracting info as far as possible. 2368 assert self.budget is not None 2369 funcinfo = self.parametrization.function 2370 optimClass: base.OptCls 2371 if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable): 2372 if self.budget > 10000: 2373 optimClass = RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne 2374 else: 2375 optimClass = ParametrizedOnePlusOne( 2376 crossover=True, mutation="discrete", noise_handling="optimistic" 2377 ) 2378 elif self._arity > 0: 2379 if self.budget < 1000 and self.num_workers == 1: 2380 optimClass = DiscreteBSOOnePlusOne 2381 elif self.num_workers > 2: 2382 optimClass = CMandAS2 # type: ignore 2383 else: 2384 optimClass = super()._select_optimizer_cls() 2385 else: 2386 if ( 2387 not (self.has_noise and self.fully_continuous and self.dimension > 100) 2388 and not (self.has_noise and self.fully_continuous) 2389 and not (self.num_workers > self.budget / 5) 2390 and (self.num_workers == 1 and self.budget > 6000 and self.dimension > 7) 2391 ): 2392 optimClass = ChainMetaModelPowell 2393 else: 2394 optimClass = super()._select_optimizer_cls() 2395 2396 return optimClass 2397 2398 def _num_objectives_set_callback(self) -> None: 2399 super()._num_objectives_set_callback() 2400 if self.num_objectives > 1: 2401 if self.noise_from_instrumentation or not self.has_noise: 2402 # override at runtime 2403 self._optim = DE(self.parametrization, self.budget, self.num_workers) 2404 2405 2406@registry.register 2407class NGOpt10(NGOpt8): 2408 def _select_optimizer_cls(self) -> base.OptCls: 2409 if not self.has_noise and self._arity > 0: 2410 return DiscreteLenglerOnePlusOne 2411 else: 2412 return super()._select_optimizer_cls() 2413 2414 def recommend(self) -> p.Parameter: 2415 return base.Optimizer.recommend(self) 2416 2417 2418@registry.register 2419class NGOpt(NGOpt10): 2420 pass 2421 2422 2423class _MSR(CM): 2424 """This code applies multiple copies of NGOpt with random weights for the different objective functions. 2425 2426 Variants dedicated to multiobjective optimization by multiple singleobjective optimization. 2427 """ 2428 2429 def __init__( 2430 self, 2431 parametrization: IntOrParameter, 2432 budget: tp.Optional[int] = None, 2433 num_workers: int = 1, 2434 num_single_runs: int = 9, 2435 base_optimizer: base.OptCls = NGOpt, 2436 ) -> None: 2437 super().__init__(parametrization, budget=budget, num_workers=num_workers) 2438 self.num_optims = num_single_runs 2439 self.optims = [ 2440 base_optimizer( 2441 self.parametrization, 2442 budget=1 + (budget // self.num_optims) if budget is not None else None, 2443 num_workers=num_workers, 2444 ) 2445 for _ in range(self.num_optims) 2446 ] 2447 self.coeff: tp.Optional[tp.List[float]] = None 2448 2449 def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None: 2450 if self.coeff is None: 2451 self.coeffs = [ 2452 self.parametrization.random_state.uniform(size=self.num_objectives) 2453 for _ in range(self.num_optims) 2454 ] 2455 for coeffs, opt in zip(self.coeffs, self.optims): 2456 this_loss = np.sum(loss * coeffs) 2457 opt.tell(candidate, this_loss) 2458 2459 2460class MultipleSingleRuns(base.ConfiguredOptimizer): 2461 """Multiple single-objective runs, in particular for multi-objective optimization. 2462 Parameters 2463 ---------- 2464 num_single_runs: int 2465 number of single runs. 2466 """ 2467 2468 # pylint: disable=unused-argument 2469 def __init__( 2470 self, 2471 *, 2472 num_single_runs: int = 9, 2473 base_optimizer: base.OptCls = NGOpt, 2474 ) -> None: 2475 super().__init__(_MSR, locals()) 2476