1# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2#
3# This source code is licensed under the MIT license found in the
4# LICENSE file in the root directory of this source tree.
5import os
6import math
7import logging
8import itertools
9from collections import deque
10import warnings
11import cma
12import numpy as np
13from bayes_opt import UtilityFunction
14from bayes_opt import BayesianOptimization
15import nevergrad.common.typing as tp
16from nevergrad.common import errors
17from nevergrad.parametrization import parameter as p
18from nevergrad.parametrization import transforms
19from nevergrad.parametrization import discretization
20from nevergrad.parametrization import _layering
21from nevergrad.parametrization import _datalayers
22from . import oneshot
23from . import base
24from . import mutations
25from .base import registry as registry
26from .base import addCompare  # pylint: disable=unused-import
27from .base import IntOrParameter
28
29
30# families of optimizers
31# pylint: disable=unused-wildcard-import,wildcard-import,too-many-lines,too-many-arguments
32from .differentialevolution import *  # type: ignore  # noqa: F403
33from .es import *  # type: ignore  # noqa: F403
34from .oneshot import *  # noqa: F403
35from .recastlib import *  # noqa: F403
36
37try:
38    from .externalbo import HyperOpt  # pylint: disable=unused-import
39except ModuleNotFoundError:
40    pass
41
42# run with LOGLEVEL=DEBUG for more debug information
43logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
44logger = logging.getLogger(__name__)
45
46# # # # # optimizers # # # # #
47
48
49class _OnePlusOne(base.Optimizer):
50    """Simple but sometimes powerful optimization algorithm.
51
52    We use the one-fifth adaptation rule, going back to Schumer and Steiglitz (1968).
53    It was independently rediscovered by Devroye (1972) and Rechenberg (1973).
54    We use asynchronous updates, so that the 1+1 can actually be parallel and even
55    performs quite well in such a context - this is naturally close to 1+lambda.
56
57    Posssible mutations include gaussian and cauchy for the continuous case, and in the discrete case:
58    discrete, fastga, doublefastga, adaptive, portfolio, discreteBSO, doerr.
59    - discrete is the most classical discrete mutation operator,
60    - doubleFastGA is an adaptation of FastGA to arity > 2, Portfolio corresponds to random mutation rates,
61    - discreteBSO corresponds to a decreasing schedule of mutation rate.
62    - adaptive and doerr correspond to various self-adaptive mutation rates.
63    - coordinatewise_adaptive is the anisotropic counterpart of the adaptive version.
64    """
65
66    def __init__(
67        self,
68        parametrization: IntOrParameter,
69        budget: tp.Optional[int] = None,
70        num_workers: int = 1,
71        *,
72        noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None,
73        mutation: str = "gaussian",
74        crossover: bool = False,
75        use_pareto: bool = False,
76    ) -> None:
77        super().__init__(parametrization, budget=budget, num_workers=num_workers)
78        self._sigma: float = 1
79        self._previous_best_loss = float("inf")
80        self.use_pareto = use_pareto
81        all_params = p.helpers.flatten(self.parametrization)
82        arity = max(
83            len(param.choices) if isinstance(param, p.TransitionChoice) else 500 for _, param in all_params
84        )
85        self.arity_for_discrete_mutation = arity
86        # configuration
87        if noise_handling is not None:
88            if isinstance(noise_handling, str):
89                assert noise_handling in [
90                    "random",
91                    "optimistic",
92                ], f"Unkwnown noise handling: '{noise_handling}'"
93            else:
94                assert isinstance(
95                    noise_handling, tuple
96                ), "noise_handling must be a string or  a tuple of type (strategy, factor)"
97                assert noise_handling[1] > 0.0, "the factor must be a float greater than 0"
98                assert noise_handling[0] in [
99                    "random",
100                    "optimistic",
101                ], f"Unkwnown noise handling: '{noise_handling}'"
102        assert mutation in [
103            "gaussian",
104            "cauchy",
105            "discrete",
106            "fastga",
107            "doublefastga",
108            "adaptive",
109            "coordinatewise_adaptive",
110            "portfolio",
111            "discreteBSO",
112            "lengler",
113            "doerr",
114        ], f"Unkwnown mutation: '{mutation}'"
115        if mutation == "adaptive":
116            self._adaptive_mr = 0.5
117        if mutation == "coordinatewise_adaptive":
118            self._velocity = self._rng.uniform(size=self.dimension) * arity / 4.0
119            self._modified_variables = np.array([True] * self.dimension)
120        self.noise_handling = noise_handling
121        self.mutation = mutation
122        self.crossover = crossover
123        if mutation == "doerr":
124            assert num_workers == 1, "Doerr mutation is implemented only in the sequential case."
125            self._doerr_mutation_rates = [1, 2]
126            self._doerr_mutation_rewards = [0.0, 0.0]
127            self._doerr_counters = [0.0, 0.0]
128            self._doerr_epsilon = 0.25  # self.dimension ** (-0.01)
129            self._doerr_gamma = 1 - 2 / self.dimension
130            self._doerr_current_best = float("inf")
131            i = 3
132            j = 2
133            self._doerr_index: int = -1  # Nothing has been mutated for now.
134            while i < self.dimension:
135                self._doerr_mutation_rates += [i]
136                self._doerr_mutation_rewards += [0.0]
137                self._doerr_counters += [0.0]
138                i += j
139                j += 2
140
141    def _internal_ask_candidate(self) -> p.Parameter:
142        # pylint: disable=too-many-return-statements, too-many-branches
143        noise_handling = self.noise_handling
144        if not self._num_ask:
145            out = self.parametrization.spawn_child()
146            out._meta["sigma"] = self._sigma
147            return out
148        # for noisy version
149        if noise_handling is not None:
150            limit = (0.05 if isinstance(noise_handling, str) else noise_handling[1]) * len(self.archive) ** 3
151            strategy = noise_handling if isinstance(noise_handling, str) else noise_handling[0]
152            if self._num_ask <= limit:
153                if strategy in ["cubic", "random"]:
154                    idx = self._rng.choice(len(self.archive))
155                    return list(self.archive.values())[idx].parameter.spawn_child()  # type: ignore
156                elif strategy == "optimistic":
157                    return self.current_bests["optimistic"].parameter.spawn_child()
158        # crossover
159        mutator = mutations.Mutator(self._rng)
160        pessimistic = self.current_bests["pessimistic"].parameter.spawn_child()
161        if self.num_objectives > 1 and self.use_pareto:  # multiobjective
162            # revert to using a sample of the pareto front (not "pessimistic" though)
163            pareto = (
164                self.pareto_front()
165            )  # we can't use choice directly, because numpy does not like iterables
166            pessimistic = pareto[self._rng.choice(len(pareto))].spawn_child()
167        ref = self.parametrization
168        if self.crossover and self._num_ask % 2 == 1 and len(self.archive) > 2:
169            data = mutator.crossover(
170                pessimistic.get_standardized_data(reference=ref), mutator.get_roulette(self.archive, num=2)
171            )
172            return pessimistic.set_standardized_data(data, reference=ref)
173        # mutating
174
175        mutation = self.mutation
176        if mutation in ("gaussian", "cauchy"):  # standard case
177            step = (
178                self._rng.normal(0, 1, self.dimension)
179                if mutation == "gaussian"
180                else self._rng.standard_cauchy(self.dimension)
181            )
182            out = pessimistic.set_standardized_data(self._sigma * step)
183            out._meta["sigma"] = self._sigma
184            return out
185        else:
186            pessimistic_data = pessimistic.get_standardized_data(reference=ref)
187            if mutation == "crossover":
188                if self._num_ask % 2 == 0 or len(self.archive) < 3:
189                    data = mutator.portfolio_discrete_mutation(
190                        pessimistic_data, arity=self.arity_for_discrete_mutation
191                    )
192                else:
193                    data = mutator.crossover(pessimistic_data, mutator.get_roulette(self.archive, num=2))
194            elif mutation == "adaptive":
195                data = mutator.portfolio_discrete_mutation(
196                    pessimistic_data,
197                    intensity=max(1, int(self._adaptive_mr * self.dimension)),
198                    arity=self.arity_for_discrete_mutation,
199                )
200            elif mutation == "discreteBSO":
201                assert self.budget is not None, "DiscreteBSO needs a budget."
202                intensity = int(self.dimension - self._num_ask * self.dimension / self.budget)
203                if intensity < 1:
204                    intensity = 1
205                data = mutator.portfolio_discrete_mutation(
206                    pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation
207                )
208            elif mutation == "coordinatewise_adaptive":
209                self._modified_variables = np.array([True] * self.dimension)
210                data = mutator.coordinatewise_mutation(
211                    pessimistic_data,
212                    self._velocity,
213                    self._modified_variables,
214                    arity=self.arity_for_discrete_mutation,
215                )
216            elif mutation == "lengler":
217                alpha = 1.54468
218                intensity = int(max(1, self.dimension * (alpha * np.log(self.num_ask) / self.num_ask)))
219                data = mutator.portfolio_discrete_mutation(
220                    pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation
221                )
222            elif mutation == "doerr":
223                # Selection, either random, or greedy, or a mutation rate.
224                assert self._doerr_index == -1, "We should have used this index in tell."
225                if self._rng.uniform() < self._doerr_epsilon:
226                    index = self._rng.choice(range(len(self._doerr_mutation_rates)))
227                    self._doerr_index = index
228                else:
229                    index = self._doerr_mutation_rewards.index(max(self._doerr_mutation_rewards))
230                    self._doerr_index = -1
231                intensity = self._doerr_mutation_rates[index]
232                data = mutator.portfolio_discrete_mutation(
233                    pessimistic_data, intensity=intensity, arity=self.arity_for_discrete_mutation
234                )
235            else:
236                func: tp.Any = {  # type: ignore
237                    "discrete": mutator.discrete_mutation,
238                    "fastga": mutator.doerr_discrete_mutation,
239                    "doublefastga": mutator.doubledoerr_discrete_mutation,
240                    "portfolio": mutator.portfolio_discrete_mutation,
241                }[mutation]
242                data = func(pessimistic_data, arity=self.arity_for_discrete_mutation)
243            return pessimistic.set_standardized_data(data, reference=ref)
244
245    def _internal_tell(self, x: tp.ArrayLike, loss: tp.FloatLoss) -> None:
246        # only used for cauchy and gaussian
247        if self._previous_best_loss != loss:
248            self._sigma *= 2.0 if loss < self._previous_best_loss else 0.84
249        if self.mutation == "doerr" and self._doerr_current_best < float("inf") and self._doerr_index >= 0:
250            improvement = max(0.0, self._doerr_current_best - loss)
251            # Decay.
252            index = self._doerr_index
253            counter = self._doerr_counters[index]
254            self._doerr_mutation_rewards[index] = (
255                self._doerr_gamma * counter * self._doerr_mutation_rewards[index] + improvement
256            ) / (self._doerr_gamma * counter + 1)
257            self._doerr_counters = [self._doerr_gamma * x for x in self._doerr_counters]
258            self._doerr_counters[index] += 1
259            self._doerr_index = -1
260        if self.mutation == "doerr":
261            self._doerr_current_best = min(self._doerr_current_best, loss)
262        if self.mutation == "adaptive":
263            factor = 1.2 if loss <= self._previous_best_loss else 0.731  # 0.731 = 1.2**(-np.exp(1)-1)
264            self._adaptive_mr = min(1.0, factor * self._adaptive_mr)
265        if self.mutation == "coordinatewise_adaptive":
266            factor = 1.2 if loss < self._previous_best_loss else 0.731  # 0.731 = 1.2**(-np.exp(1)-1)
267            inds = self._modified_variables
268            self._velocity[inds] = np.clip(
269                self._velocity[inds] * factor, 1.0, self.arity_for_discrete_mutation / 4.0
270            )
271        self._previous_best_loss = self.current_bests["pessimistic"].mean  # could be the current one
272
273
274class ParametrizedOnePlusOne(base.ConfiguredOptimizer):
275    """Simple but sometimes powerfull class of optimization algorithm.
276    This use asynchronous updates, so that (1+1) can actually be parallel and even
277    performs quite well in such a context - this is naturally close to (1+lambda).
278
279
280    Parameters
281    ----------
282    noise_handling: str or Tuple[str, float]
283        Method for handling the noise. The name can be:
284
285        - `"random"`: a random point is reevaluated regularly, this uses the one-fifth adaptation rule,
286          going back to Schumer and Steiglitz (1968). It was independently rediscovered by Devroye (1972) and Rechenberg (1973).
287        - `"optimistic"`: the best optimistic point is reevaluated regularly, optimism in front of uncertainty
288        - a coefficient can to tune the regularity of these reevaluations (default .05)
289    mutation: str
290        One of the available mutations from:
291
292        - `"gaussian"`: standard mutation by adding a Gaussian random variable (with progressive
293          widening) to the best pessimistic point
294        - `"cauchy"`: same as Gaussian but with a Cauchy distribution.
295        - `"discrete"`: when a variable is mutated (which happens with probability 1/d in dimension d), it's just
296             randomly drawn. This means that on average, only one variable is mutated.
297        - `"discreteBSO"`: as in brainstorm optimization, we slowly decrease the mutation rate from 1 to 1/d.
298        - `"fastga"`: FastGA mutations from the current best
299        - `"doublefastga"`: double-FastGA mutations from the current best (Doerr et al, Fast Genetic Algorithms, 2017)
300        - `"portfolio"`: Random number of mutated bits (called niform mixing in
301          Dang & Lehre "Self-adaptation of Mutation Rates in Non-elitist Population", 2016)
302        - `"lengler"`: specific mutation rate chosen as a function of the dimension and iteration index.
303    crossover: bool
304        whether to add a genetic crossover step every other iteration.
305    use_pareto: bool
306        whether to restart from a random pareto element in multiobjective mode, instead of the last one added
307
308    Notes
309    -----
310    After many papers advocared the mutation rate 1/d in the discrete (1+1) for the discrete case,
311    `it was proposed <https://arxiv.org/abs/1606.05551>`_ to use of a randomly
312    drawn mutation rate. `Fast genetic algorithms <https://arxiv.org/abs/1703.03334>`_ are based on a similar idea
313    These two simple methods perform quite well on a wide range of problems.
314
315    """
316
317    # pylint: disable=unused-argument
318    def __init__(
319        self,
320        *,
321        noise_handling: tp.Optional[tp.Union[str, tp.Tuple[str, float]]] = None,
322        mutation: str = "gaussian",
323        crossover: bool = False,
324        use_pareto: bool = False,
325    ) -> None:
326        super().__init__(_OnePlusOne, locals())
327
328
329OnePlusOne = ParametrizedOnePlusOne().set_name("OnePlusOne", register=True)
330NoisyOnePlusOne = ParametrizedOnePlusOne(noise_handling="random").set_name("NoisyOnePlusOne", register=True)
331DiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="discrete").set_name("DiscreteOnePlusOne", register=True)
332PortfolioDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="portfolio").set_name(
333    "PortfolioDiscreteOnePlusOne", register=True
334)
335DiscreteLenglerOnePlusOne = ParametrizedOnePlusOne(mutation="lengler").set_name(
336    "DiscreteLenglerOnePlusOne", register=True
337)
338
339AdaptiveDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="adaptive").set_name(
340    "AdaptiveDiscreteOnePlusOne", register=True
341)
342AnisotropicAdaptiveDiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="coordinatewise_adaptive").set_name(
343    "AnisotropicAdaptiveDiscreteOnePlusOne", register=True
344)
345
346DiscreteBSOOnePlusOne = ParametrizedOnePlusOne(mutation="discreteBSO").set_name(
347    "DiscreteBSOOnePlusOne", register=True
348)
349DiscreteDoerrOnePlusOne = ParametrizedOnePlusOne(mutation="doerr").set_name(
350    "DiscreteDoerrOnePlusOne", register=True
351)
352DiscreteDoerrOnePlusOne.no_parallelization = True
353CauchyOnePlusOne = ParametrizedOnePlusOne(mutation="cauchy").set_name("CauchyOnePlusOne", register=True)
354OptimisticNoisyOnePlusOne = ParametrizedOnePlusOne(noise_handling="optimistic").set_name(
355    "OptimisticNoisyOnePlusOne", register=True
356)
357OptimisticDiscreteOnePlusOne = ParametrizedOnePlusOne(
358    noise_handling="optimistic", mutation="discrete"
359).set_name("OptimisticDiscreteOnePlusOne", register=True)
360NoisyDiscreteOnePlusOne = ParametrizedOnePlusOne(
361    noise_handling=("random", 1.0), mutation="discrete"
362).set_name("NoisyDiscreteOnePlusOne", register=True)
363DoubleFastGADiscreteOnePlusOne = ParametrizedOnePlusOne(mutation="doublefastga").set_name(
364    "DoubleFastGADiscreteOnePlusOne", register=True
365)
366RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne = ParametrizedOnePlusOne(
367    crossover=True, mutation="portfolio", noise_handling="optimistic"
368).set_name("RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne", register=True)
369
370
371# pylint: too-many-arguments,too-many-instance-attributes
372class _CMA(base.Optimizer):
373    def __init__(
374        self,
375        parametrization: IntOrParameter,
376        budget: tp.Optional[int] = None,
377        num_workers: int = 1,
378        scale: float = 1.0,
379        elitist: bool = False,
380        popsize: tp.Optional[int] = None,
381        diagonal: bool = False,
382        fcmaes: bool = False,
383        random_init: bool = False,
384    ) -> None:
385        super().__init__(parametrization, budget=budget, num_workers=num_workers)
386        self._scale = scale
387        self._elitist = elitist
388        self._popsize = (
389            max(self.num_workers, 4 + int(3 * np.log(self.dimension))) if popsize is None else popsize
390        )
391        self._diagonal = diagonal
392        self._fcmaes = fcmaes
393        self._random_init = random_init
394        # internal attributes
395        self._to_be_asked: tp.Deque[np.ndarray] = deque()
396        self._to_be_told: tp.List[p.Parameter] = []
397        self._num_spawners = self._popsize // 2  # experimental, for visualization
398        self._parents = [self.parametrization]
399        # delay initialization to ease implementation of variants
400        self._es: tp.Any = None
401
402    @property
403    def es(self) -> tp.Any:  # typing not possible since cmaes not imported :(
404        if self._es is None:
405            if not self._fcmaes:
406                inopts = dict(
407                    popsize=self._popsize,
408                    randn=self._rng.randn,
409                    CMA_diagonal=self._diagonal,
410                    verbose=0,
411                    seed=np.nan,
412                    CMA_elitist=self._elitist,
413                )
414                self._es = cma.CMAEvolutionStrategy(
415                    x0=self._rng.normal(size=self.dimension)
416                    if self._random_init
417                    else np.zeros(self.dimension, dtype=np.float),
418                    sigma0=self._scale,
419                    inopts=inopts,
420                )
421            else:
422                try:
423                    from fcmaes import cmaes  # pylint: disable=import-outside-toplevel
424                except ImportError as e:
425                    raise ImportError(
426                        "Please install fcmaes (pip install fcmaes) to use FCMA optimizers"
427                    ) from e
428                self._es = cmaes.Cmaes(
429                    x0=np.zeros(self.dimension, dtype=np.float),
430                    input_sigma=self._scale,
431                    popsize=self._popsize,
432                    randn=self._rng.randn,
433                )
434        return self._es
435
436    def _internal_ask_candidate(self) -> p.Parameter:
437        if not self._to_be_asked:
438            self._to_be_asked.extend(self.es.ask())
439        data = self._to_be_asked.popleft()
440        parent = self._parents[self.num_ask % len(self._parents)]
441        candidate = parent.spawn_child().set_standardized_data(data, reference=self.parametrization)
442        return candidate
443
444    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
445        self._to_be_told.append(candidate)
446        if len(self._to_be_told) >= self.es.popsize:
447            listx = [c.get_standardized_data(reference=self.parametrization) for c in self._to_be_told]
448            listy = [c.loss for c in self._to_be_told]
449            args = (listy, listx) if self._fcmaes else (listx, listy)
450            try:
451                self.es.tell(*args)
452            except RuntimeError:
453                pass
454            else:
455                self._parents = sorted(self._to_be_told, key=base._loss)[: self._num_spawners]
456                self._to_be_told = []
457
458    def _internal_provide_recommendation(self) -> np.ndarray:
459        pessimistic = self.current_bests["pessimistic"].parameter.get_standardized_data(
460            reference=self.parametrization
461        )
462        if self._es is None:
463            return pessimistic
464        cma_best: tp.Optional[np.ndarray] = self.es.best_x if self._fcmaes else self.es.result.xbest
465        if cma_best is None:
466            return pessimistic
467        return cma_best
468
469
470class ParametrizedCMA(base.ConfiguredOptimizer):
471    """CMA-ES optimizer,
472    This evolution strategy uses a Gaussian sampling, iteratively modified
473    for searching in the best directions.
474    This optimizer wraps an external implementation: https://github.com/CMA-ES/pycma
475
476    Parameters
477    ----------
478    scale: float
479        scale of the search
480    elitist: bool
481        whether we switch to elitist mode, i.e. mode + instead of comma,
482        i.e. mode in which we always keep the best point in the population.
483    popsize: Optional[int] = None
484        population size, should be n * self.num_workers for int n >= 1.
485        default is max(self.num_workers, 4 + int(3 * np.log(self.dimension)))
486
487    diagonal: bool
488        use the diagonal version of CMA (advised in big dimension)
489    fcmaes: bool = False
490        use fast implementation, doesn't support diagonal=True.
491        produces equivalent results, preferable for high dimensions or
492        if objective function evaluation is fast.
493    """
494
495    # pylint: disable=unused-argument
496    def __init__(
497        self,
498        *,
499        scale: float = 1.0,
500        elitist: bool = False,
501        popsize: tp.Optional[int] = None,
502        diagonal: bool = False,
503        fcmaes: bool = False,
504        random_init: bool = False,
505    ) -> None:
506        super().__init__(_CMA, locals())
507        if fcmaes:
508            if diagonal:
509                raise RuntimeError("fcmaes doesn't support diagonal=True, use fcmaes=False")
510
511
512CMA = ParametrizedCMA().set_name("CMA", register=True)
513DiagonalCMA = ParametrizedCMA(diagonal=True).set_name("DiagonalCMA", register=True)
514FCMA = ParametrizedCMA(fcmaes=True).set_name("FCMA", register=True)
515
516
517class _PopulationSizeController:
518    """Population control scheme for TBPSA and EDA"""
519
520    def __init__(self, llambda: int, mu: int, dimension: int, num_workers: int = 1) -> None:
521        self.llambda = max(llambda, num_workers)
522        self.min_mu = min(mu, dimension)
523        self.mu = mu
524        self.dimension = dimension
525        self.num_workers = num_workers
526        self._loss_record: tp.List[float] = []
527
528    def add_value(self, loss: tp.FloatLoss) -> None:
529        self._loss_record += [loss]
530        if len(self._loss_record) >= 5 * self.llambda:
531            first_fifth = self._loss_record[: self.llambda]
532            last_fifth = self._loss_record[-int(self.llambda) :]  # casting to int to avoid pylint bug
533            means = [sum(fitnesses) / float(self.llambda) for fitnesses in [first_fifth, last_fifth]]
534            stds = [np.std(fitnesses) / np.sqrt(self.llambda - 1) for fitnesses in [first_fifth, last_fifth]]
535            z = (means[0] - means[1]) / (np.sqrt(stds[0] ** 2 + stds[1] ** 2))
536            if z < 2.0:
537                self.mu *= 2
538            else:
539                self.mu = max(self.min_mu, int(self.mu * 0.84))
540            self.llambda = 4 * self.mu
541            if self.num_workers > 1:
542                self.llambda = max(self.llambda, self.num_workers)
543                self.mu = self.llambda // 4
544            self._loss_record = []
545
546
547# pylint: disable=too-many-instance-attributes
548@registry.register
549class EDA(base.Optimizer):
550    """Test-based population-size adaptation.
551
552    Population-size equal to lambda = 4 x dimension.
553    Test by comparing the first fifth and the last fifth of the 5lambda evaluations.
554
555    Caution
556    -------
557    This optimizer is probably wrong.
558    """
559
560    _POPSIZE_ADAPTATION = False
561    _COVARIANCE_MEMORY = False
562
563    def __init__(
564        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
565    ) -> None:
566        super().__init__(parametrization, budget=budget, num_workers=num_workers)
567        self.sigma = 1
568        self.covariance = np.identity(self.dimension)
569        dim = self.dimension
570        self.popsize = _PopulationSizeController(
571            llambda=4 * dim, mu=dim, dimension=dim, num_workers=num_workers
572        )
573        self.current_center: np.ndarray = np.zeros(self.dimension)
574        # Population
575        self.children: tp.List[p.Parameter] = []
576        self.parents: tp.List[p.Parameter] = [
577            self.parametrization
578        ]  # for transfering heritage (checkpoints in PBT)
579
580    def _internal_provide_recommendation(
581        self,
582    ) -> tp.ArrayLike:  # This is NOT the naive version. We deal with noise.
583        return self.current_center
584
585    def _internal_ask_candidate(self) -> p.Parameter:
586        mutated_sigma = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension))
587        # TODO: is a sigma necessary here as well? given the covariance is estimated
588        assert len(self.current_center) == len(self.covariance), [
589            self.dimension,
590            self.current_center,
591            self.covariance,
592        ]
593        data = self._rng.multivariate_normal(self.current_center, mutated_sigma * self.covariance)
594        parent = self.parents[self.num_ask % len(self.parents)]
595        candidate = parent.spawn_child().set_standardized_data(data, reference=self.parametrization)
596        if parent is self.parametrization:
597            candidate.heritage["lineage"] = candidate.uid  # for tracking
598        candidate._meta["sigma"] = mutated_sigma
599        return candidate
600
601    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
602        self.children.append(candidate)
603        if self._POPSIZE_ADAPTATION:
604            self.popsize.add_value(loss)
605        if len(self.children) >= self.popsize.llambda:
606            self.children = sorted(self.children, key=base._loss)
607            population_data = [c.get_standardized_data(reference=self.parametrization) for c in self.children]
608            mu = self.popsize.mu
609            arrays = population_data[:mu]
610            # covariance
611            # TODO: check actual covariance that should be used
612            centered_arrays = np.array([x - self.current_center for x in arrays])
613            cov = centered_arrays.T.dot(centered_arrays)
614            # cov = np.cov(np.array(population_data).T)
615            mem_factor = 0.9 if self._COVARIANCE_MEMORY else 0
616            self.covariance *= mem_factor
617            self.covariance += (1 - mem_factor) * cov
618            # Computing the new parent
619            self.current_center = sum(arrays) / mu  # type: ignore
620            self.sigma = np.exp(sum([np.log(c._meta["sigma"]) for c in self.children[:mu]]) / mu)
621            self.parents = self.children[:mu]
622            self.children = []
623
624    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
625        raise errors.TellNotAskedNotSupportedError
626
627
628class PCEDA(EDA):
629    _POPSIZE_ADAPTATION = True
630    _COVARIANCE_MEMORY = False
631
632
633class MPCEDA(EDA):
634    _POPSIZE_ADAPTATION = True
635    _COVARIANCE_MEMORY = True
636
637
638class MEDA(EDA):
639    _POPSIZE_ADAPTATION = False
640    _COVARIANCE_MEMORY = True
641
642
643class _TBPSA(base.Optimizer):
644    """Test-based population-size adaptation.
645
646    Population-size equal to lambda = 4 x dimension.
647    Test by comparing the first fifth and the last fifth of the 5lambda evaluations.
648    """
649
650    # pylint: disable=too-many-instance-attributes
651
652    def __init__(
653        self,
654        parametrization: IntOrParameter,
655        budget: tp.Optional[int] = None,
656        num_workers: int = 1,
657        naive: bool = True,
658        initial_popsize: tp.Optional[int] = None,
659    ) -> None:
660        super().__init__(parametrization, budget=budget, num_workers=num_workers)
661        self.sigma = 1
662        self.naive = naive
663        if initial_popsize is None:
664            initial_popsize = self.dimension
665        self.popsize = _PopulationSizeController(
666            llambda=4 * initial_popsize, mu=initial_popsize, dimension=self.dimension, num_workers=num_workers
667        )
668        self.current_center: np.ndarray = np.zeros(self.dimension)
669        # population
670        self.parents: tp.List[p.Parameter] = [
671            self.parametrization
672        ]  # for transfering heritage (checkpoints in PBT)
673        self.children: tp.List[p.Parameter] = []
674
675    def recommend(self) -> p.Parameter:
676        if self.naive:
677            return self.current_bests["optimistic"].parameter
678        else:
679            # This is NOT the naive version. We deal with noise.
680            out = self.parametrization.spawn_child()
681            with p.helpers.deterministic_sampling(out):
682                out.set_standardized_data(self.current_center)
683            return out
684
685    def _internal_ask_candidate(self) -> p.Parameter:
686        mutated_sigma = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension))
687        individual = self.current_center + mutated_sigma * self._rng.normal(0, 1, self.dimension)
688        parent = self.parents[self.num_ask % len(self.parents)]
689        candidate = parent.spawn_child().set_standardized_data(individual, reference=self.parametrization)
690        if parent is self.parametrization:
691            candidate.heritage["lineage"] = candidate.uid  # for tracking
692        candidate._meta["sigma"] = mutated_sigma
693        return candidate
694
695    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
696        self.popsize.add_value(loss)
697        self.children.append(candidate)
698        if len(self.children) >= self.popsize.llambda:
699            # Sorting the population.
700            self.children.sort(key=base._loss)
701            # Computing the new parent.
702
703            self.parents = self.children[: self.popsize.mu]
704            self.children = []
705            self.current_center = (
706                sum(  # type: ignore
707                    c.get_standardized_data(reference=self.parametrization) for c in self.parents
708                )
709                / self.popsize.mu
710            )
711            self.sigma = np.exp(np.sum(np.log([c._meta["sigma"] for c in self.parents])) / self.popsize.mu)
712
713    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
714        data = candidate.get_standardized_data(reference=self.parametrization)
715        sigma = np.linalg.norm(data - self.current_center) / np.sqrt(self.dimension)  # educated guess
716        candidate._meta["sigma"] = sigma
717        self._internal_tell_candidate(candidate, loss)  # go through standard pipeline
718
719
720class ParametrizedTBPSA(base.ConfiguredOptimizer):
721    """`Test-based population-size adaptation <https://homepages.fhv.at/hgb/New-Papers/PPSN16_HB16.pdf>`_
722    This method, based on adapting the population size, performs the best in
723    many noisy optimization problems, even in large dimension
724
725    Parameters
726    ----------
727    naive: bool
728        set to False for noisy problem, so that the best points will be an
729        average of the final population.
730    initial_popsize: Optional[int]
731        initial (and minimal) population size (default: 4 x dimension)
732
733    Note
734    ----
735    Derived from:
736    Hellwig, Michael & Beyer, Hans-Georg. (2016).
737    Evolution under Strong Noise: A Self-Adaptive Evolution Strategy
738    Reaches the Lower Performance Bound -- the pcCMSA-ES.
739    https://homepages.fhv.at/hgb/New-Papers/PPSN16_HB16.pdf
740    """
741
742    # pylint: disable=unused-argument
743    def __init__(
744        self,
745        *,
746        naive: bool = True,
747        initial_popsize: tp.Optional[int] = None,
748    ) -> None:
749        super().__init__(_TBPSA, locals())
750
751
752TBPSA = ParametrizedTBPSA(naive=False).set_name("TBPSA", register=True)
753NaiveTBPSA = ParametrizedTBPSA().set_name("NaiveTBPSA", register=True)
754
755
756@registry.register
757class NoisyBandit(base.Optimizer):
758    """UCB.
759    This is upper confidence bound (adapted to minimization),
760    with very poor parametrization; in particular, the logarithmic term is set to zero.
761    Infinite arms: we add one arm when `20 * #ask >= #arms ** 3`.
762    """
763
764    def _internal_ask(self) -> tp.ArrayLike:
765        if 20 * self._num_ask >= len(self.archive) ** 3:
766            return self._rng.normal(0, 1, self.dimension)  # type: ignore
767        if self._rng.choice([True, False]):
768            # numpy does not accept choice on list of tuples, must choose index instead
769            idx = self._rng.choice(len(self.archive))
770            return np.frombuffer(list(self.archive.bytesdict.keys())[idx])  # type: ignore
771        return self.current_bests["optimistic"].x
772
773
774class _PSO(base.Optimizer):
775
776    # pylint: disable=too-many-instance-attributes
777    def __init__(
778        self,
779        parametrization: IntOrParameter,
780        budget: tp.Optional[int] = None,
781        num_workers: int = 1,
782        config: tp.Optional["ConfiguredPSO"] = None,
783    ) -> None:
784        super().__init__(parametrization, budget=budget, num_workers=num_workers)
785        self._config = ConfiguredPSO() if config is None else config
786        if budget is not None and budget < 60:
787            warnings.warn("PSO is inefficient with budget < 60", errors.InefficientSettingsWarning)
788        cases: tp.Dict[str, tp.Tuple[tp.Optional[float], transforms.Transform]] = dict(
789            arctan=(0, transforms.ArctanBound(0, 1)),
790            identity=(None, transforms.Affine(1, 0)),
791            gaussian=(1e-10, transforms.CumulativeDensity()),
792        )
793        # eps is used for clipping to make sure it is admissible
794        self._eps, self._transform = cases[self._config.transform]
795        self.llambda = max(40, num_workers)
796        if self._config.popsize is not None:
797            self.llambda = self._config.popsize
798        self._uid_queue = base.utils.UidQueue()
799        self.population: tp.Dict[str, p.Parameter] = {}
800        self._best = self.parametrization.spawn_child()
801
802    def _internal_ask_candidate(self) -> p.Parameter:
803        # population is increased only if queue is empty (otherwise tell_not_asked does not work well at the beginning)
804        if len(self.population) < self.llambda:
805            candidate = self.parametrization.sample()
806            self.population[candidate.uid] = candidate
807            dim = self.parametrization.dimension
808            candidate.heritage["speed"] = (
809                self._rng.normal(size=dim) if self._eps is None else self._rng.uniform(-1, 1, dim)
810            )
811            self._uid_queue.asked.add(candidate.uid)
812            return candidate
813        uid = self._uid_queue.ask()
814        candidate = self._spawn_mutated_particle(self.population[uid])
815        candidate.heritage["lineage"] = uid  # override in case it was a tell-not-asked
816        return candidate
817
818    def _get_boxed_data(self, particle: p.Parameter) -> np.ndarray:
819        if particle._frozen and "boxed_data" in particle._meta:
820            return particle._meta["boxed_data"]  # type: ignore
821        boxed_data = self._transform.forward(particle.get_standardized_data(reference=self.parametrization))
822        if particle._frozen:  # only save is frozen
823            particle._meta["boxed_data"] = boxed_data
824        return boxed_data
825
826    def _spawn_mutated_particle(self, particle: p.Parameter) -> p.Parameter:
827        x = self._get_boxed_data(particle)
828        speed: np.ndarray = particle.heritage["speed"]
829        global_best_x = self._get_boxed_data(self._best)
830        parent_best_x = self._get_boxed_data(particle.heritage.get("best_parent", particle))
831        rp = self._rng.uniform(0.0, 1.0, size=self.dimension)
832        rg = self._rng.uniform(0.0, 1.0, size=self.dimension)
833        speed = (
834            self._config.omega * speed
835            + self._config.phip * rp * (parent_best_x - x)
836            + self._config.phig * rg * (global_best_x - x)
837        )
838        data = speed + x
839        if self._eps is not None:
840            data = np.clip(data, self._eps, 1 - self._eps)
841        data = self._transform.backward(data)
842        new_part = particle.spawn_child().set_standardized_data(data, reference=self.parametrization)
843        new_part.heritage["speed"] = speed
844        return new_part
845
846    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
847        uid = candidate.heritage["lineage"]
848        if uid not in self.population:
849            self._internal_tell_not_asked(candidate, loss)
850            return
851        self._uid_queue.tell(uid)
852        self.population[uid] = candidate
853        if self._best.loss is None or loss < self._best.loss:
854            self._best = candidate
855        if loss <= candidate.heritage.get("best_parent", candidate).loss:
856            candidate.heritage["best_parent"] = candidate
857
858    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
859        # nearly same as DE
860        worst: tp.Optional[p.Parameter] = None
861        if not len(self.population) < self.llambda:
862            uid, worst = max(self.population.items(), key=lambda p: base._loss(p[1]))
863            if base._loss(worst) < loss:
864                return  # no need to update
865            else:
866                del self.population[uid]
867                self._uid_queue.discard(uid)
868        if "speed" not in candidate.heritage:
869            candidate.heritage["speed"] = self._rng.uniform(-1.0, 1.0, self.parametrization.dimension)
870        self.population[candidate.uid] = candidate
871        self._uid_queue.tell(candidate.uid)
872        if loss < base._loss(self._best):
873            self._best = candidate
874
875
876class ConfiguredPSO(base.ConfiguredOptimizer):
877    """`Particle Swarm Optimization <https://en.wikipedia.org/wiki/Particle_swarm_optimization>`_
878    is based on a set of particles with their inertia.
879    Wikipedia provides a beautiful illustration ;) (see link)
880
881
882    Parameters
883    ----------
884    transform: str
885        name of the transform to use to map from PSO optimization space to R-space.
886    popsize: int
887        population size of the particle swarm. Defaults to max(40, num_workers)
888    omega: float
889        particle swarm optimization parameter
890    phip: float
891        particle swarm optimization parameter
892    phig: float
893        particle swarm optimization parameter
894
895    Note
896    ----
897    - Using non-default "transform" and "wide" parameters can lead to extreme values
898    - Implementation partially following SPSO2011. However, no randomization of the population order.
899    - Reference:
900      M. Zambrano-Bigiarini, M. Clerc and R. Rojas,
901      Standard Particle Swarm Optimisation 2011 at CEC-2013: A baseline for future PSO improvements,
902      2013 IEEE Congress on Evolutionary Computation, Cancun, 2013, pp. 2337-2344.
903      https://ieeexplore.ieee.org/document/6557848
904    """
905
906    # pylint: disable=unused-argument
907    def __init__(
908        self,
909        transform: str = "identity",
910        popsize: tp.Optional[int] = None,
911        omega: float = 0.5 / math.log(2.0),
912        phip: float = 0.5 + math.log(2.0),
913        phig: float = 0.5 + math.log(2.0),
914    ) -> None:
915        super().__init__(_PSO, locals(), as_config=True)
916        assert transform in ["arctan", "gaussian", "identity"]
917        self.transform = transform
918        self.popsize = popsize
919        self.omega = omega
920        self.phip = phip
921        self.phig = phig
922
923
924RealSpacePSO = ConfiguredPSO().set_name("RealSpacePSO", register=True)
925PSO = ConfiguredPSO(transform="arctan").set_name("PSO", register=True)
926
927
928@registry.register
929class SPSA(base.Optimizer):
930    # pylint: disable=too-many-instance-attributes
931    """The First order SPSA algorithm as shown in [1,2,3], with implementation details
932    from [4,5].
933
934    1) https://en.wikipedia.org/wiki/Simultaneous_perturbation_stochastic_approximation
935    2) https://www.chessprogramming.org/SPSA
936    3) Spall, James C. "Multivariate stochastic approximation using a simultaneous perturbation gradient approximation."
937       IEEE transactions on automatic control 37.3 (1992): 332-341.
938    4) Section 7.5.2 in "Introduction to Stochastic Search and Optimization: Estimation, Simulation and Control" by James C. Spall.
939    5) Pushpendre Rastogi, Jingyi Zhu, James C. Spall CISS (2016).
940       Efficient implementation of Enhanced Adaptive Simultaneous Perturbation Algorithms.
941    """
942    no_parallelization = True
943
944    def __init__(
945        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
946    ) -> None:
947        super().__init__(parametrization, budget=budget, num_workers=num_workers)
948        self.init = True
949        self.idx = 0
950        self.delta = float("nan")
951        self.ym: tp.Optional[np.ndarray] = None
952        self.yp: tp.Optional[np.ndarray] = None
953        self.t: np.ndarray = np.zeros(self.dimension)
954        self.avg: np.ndarray = np.zeros(self.dimension)
955        # Set A, a, c according to the practical implementation
956        # guidelines in the ISSO book.
957        self.A = 10 if budget is None else max(10, budget // 20)
958        # TODO: We should spend first 10-20 iterations
959        # to estimate the noise standard deviation and
960        # then set c = standard deviation. 1e-1 is arbitrary.
961        self.c = 1e-1
962        # TODO: We should chose a to be inversely proportional to
963        # the magnitude of gradient and proportional to (1+A)^0.602
964        # we should spend some burn-in iterations to estimate the
965        # magnitude of the gradient. 1e-5 is arbitrary.
966        self.a = 1e-5
967
968    def _ck(self, k: int) -> float:
969        "c_k determines the pertubation."
970        return self.c / (k // 2 + 1) ** 0.101
971
972    def _ak(self, k: int) -> float:
973        "a_k is the learning rate."
974        return self.a / (k // 2 + 1 + self.A) ** 0.602
975
976    def _internal_ask(self) -> tp.ArrayLike:
977        k = self.idx
978        if k % 2 == 0:
979            if not self.init:
980                assert self.yp is not None and self.ym is not None
981                self.t -= (self._ak(k) * (self.yp - self.ym) / 2 / self._ck(k)) * self.delta
982                self.avg += (self.t - self.avg) / (k // 2 + 1)
983            self.delta = 2 * self._rng.randint(2, size=self.dimension) - 1
984            return self.t - self._ck(k) * self.delta  # type:ignore
985        return self.t + self._ck(k) * self.delta  # type: ignore
986
987    def _internal_tell(self, x: tp.ArrayLike, loss: tp.FloatLoss) -> None:
988        setattr(self, ("ym" if self.idx % 2 == 0 else "yp"), np.array(loss, copy=True))
989        self.idx += 1
990        if self.init and self.yp is not None and self.ym is not None:
991            self.init = False
992
993    def _internal_provide_recommendation(self) -> tp.ArrayLike:
994        return self.avg
995
996
997class _Rescaled(base.Optimizer):
998    """Proposes a version of a base optimizer which works at a different scale."""
999
1000    def __init__(
1001        self,
1002        parametrization: IntOrParameter,
1003        budget: tp.Optional[int] = None,
1004        num_workers: int = 1,
1005        base_optimizer: base.OptCls = CMA,
1006        scale: tp.Optional[float] = None,
1007    ) -> None:
1008        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1009        self._optimizer = base_optimizer(self.parametrization, budget=budget, num_workers=num_workers)
1010        self._subcandidates: tp.Dict[str, p.Parameter] = {}
1011        if scale is None:
1012            assert budget is not None, "Either scale or budget must be known in _Rescaled."
1013            scale = np.sqrt(np.log(self.budget) / self.dimension)
1014        self.scale = scale
1015        assert self.scale != 0.0, "scale should be non-zero in Rescaler."
1016
1017    def rescale_candidate(self, candidate: p.Parameter, inverse: bool = False) -> p.Parameter:
1018        data = candidate.get_standardized_data(reference=self.parametrization)
1019        scale = self.scale if not inverse else 1.0 / self.scale
1020        return self.parametrization.spawn_child().set_standardized_data(scale * data)
1021
1022    def _internal_ask_candidate(self) -> p.Parameter:
1023        candidate = self._optimizer.ask()
1024        sent_candidate = self.rescale_candidate(candidate)
1025        # We store the version corresponding to the underlying optimizer.
1026        self._subcandidates[sent_candidate.uid] = candidate
1027        return sent_candidate
1028
1029    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1030        self._optimizer.tell(self._subcandidates.pop(candidate.uid), loss)
1031
1032    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1033        candidate = self.rescale_candidate(candidate, inverse=True)
1034        self._optimizer.tell(candidate, loss)
1035
1036
1037class SplitOptimizer(base.Optimizer):
1038    """Combines optimizers, each of them working on their own variables.
1039
1040    Parameters
1041    ---------
1042    num_optims: int or None
1043        number of optimizers
1044    num_vars: int or None
1045        number of variable per optimizer.
1046    progressive: bool
1047        True if we want to progressively add optimizers during the optimization run.
1048        If progressive = True, the optimizer is forced at OptimisticNoisyOnePlusOne.
1049
1050    Example
1051    -------
1052    for 5 optimizers, each of them working on 2 variables, one can use:
1053
1054    opt = SplitOptimizer(parametrization=10, num_workers=3, num_optims=5, num_vars=[2, 2, 2, 2, 2])
1055    or equivalently:
1056    opt = SplitOptimizer(parametrization=10, num_workers=3, num_vars=[2, 2, 2, 2, 2])
1057    Given that all optimizers have the same number of variables, one can also run:
1058    opt = SplitOptimizer(parametrization=10, num_workers=3, num_optims=5)
1059
1060    Note
1061    ----
1062    By default, it uses CMA for multivariate groups and RandomSearch for monovariate groups.
1063
1064    Caution
1065    -------
1066    The variables refer to the deep representation used by optimizers.
1067    For example, a categorical variable with 5 possible values becomes 5 continuous variables.
1068    """
1069
1070    def __init__(
1071        self,
1072        parametrization: IntOrParameter,
1073        budget: tp.Optional[int] = None,
1074        num_workers: int = 1,
1075        num_optims: tp.Optional[int] = None,
1076        num_vars: tp.Optional[tp.List[int]] = None,
1077        multivariate_optimizer: base.OptCls = CMA,
1078        monovariate_optimizer: base.OptCls = OnePlusOne,
1079        progressive: bool = False,
1080        non_deterministic_descriptor: bool = True,
1081    ) -> None:
1082        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1083        self._subcandidates: tp.Dict[str, tp.List[p.Parameter]] = {}
1084        self._progressive = progressive
1085        subparams: tp.List[p.Parameter] = []
1086        if num_vars is not None:  # The user has specified how are the splits (s)he wants.
1087            assert (
1088                sum(num_vars) == self.dimension
1089            ), f"sum(num_vars)={sum(num_vars)} should be equal to the dimension {self.dimension}."
1090            if num_optims is None:  # we deduce the number of splits.
1091                num_optims = len(num_vars)
1092            assert num_optims == len(
1093                num_vars
1094            ), f"The number {num_optims} of optimizers should match len(num_vars)={len(num_vars)}."
1095        elif num_optims is None:
1096            # if no num_vars and no num_optims, try to guess how to split. Otherwise, just assume 2.
1097            if isinstance(parametrization, p.Parameter):
1098                subparams = p.helpers.list_data(parametrization)  # type: ignore
1099                if len(subparams) == 1:
1100                    subparams.clear()
1101                num_optims = len(subparams)
1102            if not subparams:  # Desperate situation: just split in 2.
1103                num_optims = 2
1104        if not subparams:
1105            # if num_vars not given: we will distribute variables equally.
1106            assert num_optims is not None
1107            num_optims = min(num_optims, self.dimension)
1108            num_vars = num_vars if num_vars else []
1109            for i in range(num_optims):
1110                if len(num_vars) < i + 1:
1111                    num_vars += [(self.dimension // num_optims) + (self.dimension % num_optims > i)]
1112                assert num_vars[i] >= 1, "At least one variable per optimizer."
1113                subparams += [p.Array(shape=(num_vars[i],))]
1114        if non_deterministic_descriptor:
1115            for param in subparams:
1116                param.function.deterministic = False
1117        # synchronize random state and create optimizers
1118        self.optims: tp.List[base.Optimizer] = []
1119        mono, multi = monovariate_optimizer, multivariate_optimizer
1120        for param in subparams:
1121            param.random_state = self.parametrization.random_state
1122            self.optims.append((multi if param.dimension > 1 else mono)(param, budget, num_workers))
1123        # final check for dimension
1124        assert (
1125            sum(opt.dimension for opt in self.optims) == self.dimension
1126        ), "sum of sub-dimensions should be equal to the total dimension."
1127
1128    def _internal_ask_candidate(self) -> p.Parameter:
1129        candidates: tp.List[p.Parameter] = []
1130        for i, opt in enumerate(self.optims):
1131            if self._progressive:
1132                assert self.budget is not None
1133                if i > 0 and i / len(self.optims) > np.sqrt(2.0 * self.num_ask / self.budget):
1134                    candidates.append(opt.parametrization.spawn_child())  # unchanged
1135                    continue
1136            candidates.append(opt.ask())
1137        data = np.concatenate(
1138            [
1139                c.get_standardized_data(reference=opt.parametrization)
1140                for c, opt in zip(candidates, self.optims)
1141            ],
1142            axis=0,
1143        )
1144        cand = self.parametrization.spawn_child().set_standardized_data(data)
1145        self._subcandidates[cand.uid] = candidates
1146        return cand
1147
1148    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1149        candidates = self._subcandidates.pop(candidate.uid)
1150        for cand, opt in zip(candidates, self.optims):
1151            opt.tell(cand, loss)
1152
1153    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1154        data = candidate.get_standardized_data(reference=self.parametrization)
1155        start = 0
1156        for opt in self.optims:
1157            local_data = data[start : start + opt.dimension]
1158            start += opt.dimension
1159            local_candidate = opt.parametrization.spawn_child().set_standardized_data(local_data)
1160            opt.tell(local_candidate, loss)
1161
1162
1163class Rescaled(base.ConfiguredOptimizer):
1164    """Configured optimizer for creating rescaled optimization algorithms.
1165
1166    Parameters
1167    ----------
1168    base_optimizer: base.OptCls
1169        optimization algorithm to be rescaled.
1170    scale: how much do we rescale. E.g. 0.001 if we want to focus on the center
1171        with std 0.001 (assuming the std of the domain is set to 1).
1172    """
1173
1174    # pylint: disable=unused-argument
1175    def __init__(
1176        self,
1177        *,
1178        base_optimizer: base.OptCls = CMA,
1179        scale: tp.Optional[float] = None,
1180    ) -> None:
1181        super().__init__(_Rescaled, locals())
1182
1183
1184RescaledCMA = Rescaled().set_name("RescaledCMA", register=True)
1185
1186
1187class ConfSplitOptimizer(base.ConfiguredOptimizer):
1188    """ "Combines optimizers, each of them working on their own variables.
1189
1190    Parameters
1191    ----------
1192    num_optims: int
1193        number of optimizers
1194    num_vars: optional list of int
1195        number of variable per optimizer.
1196    progressive: optional bool
1197        whether we progressively add optimizers.
1198    non_deterministic_descriptor: bool
1199        subparts parametrization descriptor is set to noisy function.
1200        This can have an impact for optimizer selection for competence maps.
1201    """
1202
1203    # pylint: disable=unused-argument
1204    def __init__(
1205        self,
1206        *,
1207        num_optims: tp.Optional[int] = None,
1208        num_vars: tp.Optional[tp.List[int]] = None,
1209        multivariate_optimizer: base.OptCls = CMA,
1210        monovariate_optimizer: base.OptCls = RandomSearch,
1211        progressive: bool = False,
1212        non_deterministic_descriptor: bool = True,
1213    ) -> None:
1214        super().__init__(SplitOptimizer, locals())
1215
1216
1217@registry.register
1218class Portfolio(base.Optimizer):
1219    """Passive portfolio of CMA, 2-pt DE and Scr-Hammersley."""
1220
1221    def __init__(
1222        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1223    ) -> None:
1224        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1225        assert budget is not None
1226        self.optims = [
1227            CMA(
1228                self.parametrization, budget // 3 + (budget % 3 > 0), num_workers
1229            ),  # share parametrization and its rng
1230            TwoPointsDE(self.parametrization, budget // 3 + (budget % 3 > 1), num_workers),  # noqa: F405
1231            ScrHammersleySearch(self.parametrization, budget // 3, num_workers),
1232        ]  # noqa: F405
1233        if budget < 12 * num_workers:
1234            self.optims = [ScrHammersleySearch(self.parametrization, budget, num_workers)]  # noqa: F405
1235
1236    def _internal_ask_candidate(self) -> p.Parameter:
1237        optim_index = self._num_ask % len(self.optims)
1238        candidate = self.optims[optim_index].ask()
1239        candidate._meta["optim_index"] = optim_index
1240        return candidate
1241
1242    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1243        for opt in self.optims:
1244            try:
1245                opt.tell(candidate, loss)
1246            except errors.TellNotAskedNotSupportedError:
1247                pass
1248        # Presumably better than self.optims[optim_index].tell(candidate, value)
1249
1250    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1251        at_least_one_ok = False
1252        for opt in self.optims:
1253            try:
1254                opt.tell(candidate, loss)
1255                at_least_one_ok = True
1256            except errors.TellNotAskedNotSupportedError:
1257                pass
1258        if not at_least_one_ok:
1259            raise errors.TellNotAskedNotSupportedError
1260
1261
1262class InfiniteMetaModelOptimum(ValueError):
1263    """Sometimes the optimum of the metamodel is at infinity."""
1264
1265
1266def learn_on_k_best(archive: utils.Archive[utils.MultiValue], k: int) -> tp.ArrayLike:
1267    """Approximate optimum learnt from the k best.
1268
1269    Parameters
1270    ----------
1271    archive: utils.Archive[utils.Value]
1272    """
1273    items = list(archive.items_as_arrays())
1274    dimension = len(items[0][0])
1275
1276    # Select the k best.
1277    first_k_individuals = [
1278        x for x in sorted(items, key=lambda indiv: archive[indiv[0]].get_estimation("pessimistic"))[:k]
1279    ]
1280    assert len(first_k_individuals) == k
1281
1282    # Recenter the best.
1283    middle = np.array(sum(p[0] for p in first_k_individuals) / k)
1284    normalization = 1e-15 + np.sqrt(np.sum((first_k_individuals[-1][0] - first_k_individuals[0][0]) ** 2))
1285    y = [archive[c[0]].get_estimation("pessimistic") for c in first_k_individuals]
1286    X = np.asarray([(c[0] - middle) / normalization for c in first_k_individuals])
1287
1288    # We need SKLearn.
1289    from sklearn.linear_model import LinearRegression
1290    from sklearn.preprocessing import PolynomialFeatures
1291
1292    polynomial_features = PolynomialFeatures(degree=2)
1293    X2 = polynomial_features.fit_transform(X)
1294
1295    # Fit a linear model.
1296    model = LinearRegression()
1297    model.fit(X2, y)
1298
1299    try:
1300        for cls in Powell, DE:  # Powell excellent here, DE as a backup for thread safety.
1301            optimizer = cls(parametrization=dimension, budget=45 * dimension + 30)
1302            try:
1303                minimum = optimizer.minimize(
1304                    lambda x: float(model.predict(polynomial_features.fit_transform(x[None, :])))
1305                ).value
1306            except RuntimeError:
1307                assert cls == Powell, "Only Powell is allowed to crash here."
1308            else:
1309                break
1310    except ValueError:
1311        raise InfiniteMetaModelOptimum("Infinite meta-model optimum in learn_on_k_best.")
1312
1313    if np.sum(minimum ** 2) > 1.0:
1314        raise InfiniteMetaModelOptimum("huge meta-model optimum in learn_on_k_best.")
1315    return middle + normalization * minimum
1316
1317
1318@registry.register
1319class MetaModel(base.Optimizer):
1320    """Adding a metamodel into CMA."""
1321
1322    def __init__(
1323        self,
1324        parametrization: IntOrParameter,
1325        budget: tp.Optional[int] = None,
1326        num_workers: int = 1,
1327        multivariate_optimizer: tp.Optional[base.OptCls] = None,
1328    ) -> None:
1329        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1330        if multivariate_optimizer is None:
1331            multivariate_optimizer = CMA if self.dimension > 1 else OnePlusOne
1332        self._optim = multivariate_optimizer(
1333            self.parametrization, budget, num_workers
1334        )  # share parametrization and its rng
1335
1336    def _internal_ask_candidate(self) -> p.Parameter:
1337        # We request a bit more points than what is really necessary for our dimensionality (+dimension).
1338        sample_size = int((self.dimension * (self.dimension - 1)) / 2 + 2 * self.dimension + 1)
1339        if (
1340            self._num_ask % max(13, self.num_workers, self.dimension) == 0
1341            and len(self.archive) >= sample_size
1342        ):
1343            try:
1344                data = learn_on_k_best(self.archive, sample_size)
1345                candidate = self.parametrization.spawn_child().set_standardized_data(data)
1346            except InfiniteMetaModelOptimum:  # The optimum is at infinity. Shit happens.
1347                candidate = self._optim.ask()
1348        else:
1349            candidate = self._optim.ask()
1350        return candidate
1351
1352    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1353        self._optim.tell(candidate, loss)
1354
1355
1356class ParaPortfolio(Portfolio):
1357    """Passive portfolio of CMA, 2-pt DE, PSO, SQP and Scr-Hammersley."""
1358
1359    def __init__(
1360        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1361    ) -> None:
1362        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1363        assert budget is not None
1364
1365        def intshare(n: int, m: int) -> tp.Tuple[int, ...]:
1366            x = [n // m] * m
1367            i = 0
1368            while sum(x) < n:
1369                x[i] += 1
1370                i += 1
1371            return tuple(x)
1372
1373        nw1, nw2, nw3, nw4 = intshare(num_workers - 1, 4)
1374        self.which_optim = [0] * nw1 + [1] * nw2 + [2] * nw3 + [3] + [4] * nw4
1375        assert len(self.which_optim) == num_workers
1376        # b1, b2, b3, b4, b5 = intshare(budget, 5)
1377        self.optims: tp.List[base.Optimizer] = [
1378            CMA(self.parametrization, num_workers=nw1),  # share parametrization and its rng
1379            TwoPointsDE(self.parametrization, num_workers=nw2),  # noqa: F405
1380            PSO(self.parametrization, num_workers=nw3),
1381            SQP(self.parametrization, num_workers=1),  # noqa: F405
1382            ScrHammersleySearch(
1383                self.parametrization, budget=(budget // len(self.which_optim)) * nw4
1384            ),  # noqa: F405
1385        ]
1386
1387    def _internal_ask_candidate(self) -> p.Parameter:
1388        optim_index = self.which_optim[self._num_ask % len(self.which_optim)]
1389        candidate = self.optims[optim_index].ask()
1390        candidate._meta["optim_index"] = optim_index
1391        return candidate
1392
1393
1394@registry.register
1395class SQPCMA(ParaPortfolio):
1396    """Passive portfolio of CMA and many SQP."""
1397
1398    def __init__(
1399        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1400    ) -> None:
1401        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1402        assert budget is not None
1403        nw = num_workers // 2
1404        self.which_optim = [0] * nw
1405        for i in range(num_workers - nw):
1406            self.which_optim += [i + 1]
1407        assert len(self.which_optim) == num_workers
1408        # b1, b2, b3, b4, b5 = intshare(budget, 5)
1409        self.optims = [CMA(self.parametrization, num_workers=nw)]  # share parametrization and its rng
1410        for i in range(num_workers - nw):
1411            self.optims += [SQP(self.parametrization, num_workers=1)]  # noqa: F405
1412            if i > 0:
1413                self.optims[-1].initial_guess = self._rng.normal(0, 1, self.dimension)  # type: ignore
1414
1415
1416@registry.register
1417class ASCMADEthird(Portfolio):
1418    """Algorithm selection, with CMA and Lhs-DE. Active selection at 1/3."""
1419
1420    def __init__(
1421        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1422    ) -> None:
1423        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1424        assert budget is not None
1425        self.optims = [
1426            CMA(
1427                self.parametrization, budget=None, num_workers=num_workers
1428            ),  # share parametrization and its rng
1429            LhsDE(self.parametrization, budget=None, num_workers=num_workers),
1430        ]  # noqa: F405
1431        self.budget_before_choosing = budget // 3
1432        self.best_optim = -1
1433
1434    def _internal_ask_candidate(self) -> p.Parameter:
1435        if self.budget_before_choosing > 0:
1436            self.budget_before_choosing -= 1
1437            optim_index = self._num_ask % len(self.optims)
1438        else:
1439            if self.best_optim is None:
1440                best_loss = float("inf")
1441                optim_index = -1
1442                for i, optim in enumerate(self.optims):
1443                    val = optim.current_bests["pessimistic"].get_estimation("pessimistic")
1444                    if not val > best_loss:
1445                        optim_index = i
1446                        best_loss = val
1447                self.best_optim = optim_index
1448            optim_index = self.best_optim
1449        candidate = self.optims[optim_index].ask()
1450        candidate._meta["optim_index"] = optim_index
1451        return candidate
1452
1453
1454@registry.register
1455class CMandAS2(ASCMADEthird):
1456    """Competence map, with algorithm selection in one of the cases (3 CMAs)."""
1457
1458    def __init__(
1459        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1460    ) -> None:
1461        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1462        self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)]  # noqa: F405
1463        assert budget is not None
1464        self.budget_before_choosing = 2 * budget
1465        if budget < 201:
1466            self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)]
1467        if budget > 50 * self.dimension or num_workers < 30:
1468            self.optims = [
1469                MetaModel(self.parametrization, budget=None, num_workers=num_workers) for _ in range(3)
1470            ]
1471            self.budget_before_choosing = budget // 10
1472
1473
1474@registry.register
1475class CMandAS3(ASCMADEthird):
1476    """Competence map, with algorithm selection in one of the cases (3 CMAs)."""
1477
1478    def __init__(
1479        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1480    ) -> None:
1481        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1482        self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)]  # noqa: F405
1483        assert budget is not None
1484        self.budget_before_choosing = 2 * budget
1485        if budget < 201:
1486            self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)]
1487        if budget > 50 * self.dimension or num_workers < 30:
1488            if num_workers == 1:
1489                self.optims = [
1490                    ChainCMAPowell(
1491                        self.parametrization, budget=None, num_workers=num_workers
1492                    ),  # share parametrization and its rng
1493                    ChainCMAPowell(self.parametrization, budget=None, num_workers=num_workers),
1494                    ChainCMAPowell(self.parametrization, budget=None, num_workers=num_workers),
1495                ]
1496            else:
1497                self.optims = [
1498                    CMA(
1499                        self.parametrization, budget=None, num_workers=num_workers
1500                    ),  # share parametrization and its rng
1501                    CMA(self.parametrization, budget=None, num_workers=num_workers),
1502                    CMA(self.parametrization, budget=None, num_workers=num_workers),
1503                ]
1504            self.budget_before_choosing = budget // 10
1505
1506
1507@registry.register
1508class CM(CMandAS2):
1509    """Competence map, simplest."""
1510
1511    def __init__(
1512        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1513    ) -> None:
1514        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1515        assert budget is not None
1516        # share parametrization and its random number generator between all underlying optimizers
1517        self.optims = [TwoPointsDE(self.parametrization, budget=None, num_workers=num_workers)]  # noqa: F405
1518        self.budget_before_choosing = 2 * budget
1519        if budget < 201:
1520            self.optims = [OnePlusOne(self.parametrization, budget=None, num_workers=num_workers)]
1521        if budget > 50 * self.dimension:
1522            self.optims = [CMA(self.parametrization, budget=None, num_workers=num_workers)]
1523
1524
1525@registry.register
1526class MultiCMA(CM):
1527    """Combining 3 CMAs. Exactly identical. Active selection at 1/10 of the budget."""
1528
1529    def __init__(
1530        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1531    ) -> None:
1532        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1533        assert budget is not None
1534        self.optims = [
1535            CMA(
1536                self.parametrization, budget=None, num_workers=num_workers
1537            ),  # share parametrization and its rng
1538            CMA(self.parametrization, budget=None, num_workers=num_workers),
1539            CMA(self.parametrization, budget=None, num_workers=num_workers),
1540        ]
1541        self.budget_before_choosing = budget // 10
1542
1543
1544@registry.register
1545class MultiDiscrete(CM):
1546    """Combining 3 Discrete(1+1). Exactly identical. Active selection at 1/10 of the budget."""
1547
1548    def __init__(
1549        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1550    ) -> None:
1551        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1552        assert budget is not None
1553        self.optims = [
1554            DiscreteOnePlusOne(
1555                self.parametrization, budget=budget // 12, num_workers=num_workers
1556            ),  # share parametrization and its rng
1557            DiscreteBSOOnePlusOne(self.parametrization, budget=budget // 12, num_workers=num_workers),
1558            DoubleFastGADiscreteOnePlusOne(
1559                self.parametrization, budget=(budget // 4) - 2 * (budget // 12), num_workers=num_workers
1560            ),
1561        ]
1562        self.budget_before_choosing = budget // 4
1563
1564
1565@registry.register
1566class TripleCMA(CM):
1567    """Combining 3 CMAs. Exactly identical. Active selection at 1/3 of the budget."""
1568
1569    def __init__(
1570        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1571    ) -> None:
1572        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1573        assert budget is not None
1574        self.optims = [
1575            ParametrizedCMA(random_init=True)(
1576                self.parametrization, budget=None, num_workers=num_workers
1577            ),  # share parametrization and its rng
1578            ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers),
1579            ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers),
1580        ]
1581        self.budget_before_choosing = budget // 3
1582
1583
1584@registry.register
1585class PolyCMA(CM):
1586    """Combining 20 CMAs. Exactly identical. Active selection at 1/3 of the budget."""
1587
1588    def __init__(
1589        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1590    ) -> None:
1591        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1592        assert budget is not None
1593        self.optims = [
1594            ParametrizedCMA(random_init=True)(self.parametrization, budget=None, num_workers=num_workers)
1595            for _ in range(20)
1596        ]
1597
1598        self.budget_before_choosing = budget // 3
1599
1600
1601@registry.register
1602class MultiScaleCMA(CM):
1603    """Combining 3 CMAs with different init scale. Active selection at 1/3 of the budget."""
1604
1605    def __init__(
1606        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
1607    ) -> None:
1608        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1609        self.optims = [
1610            CMA(
1611                self.parametrization, budget=None, num_workers=num_workers
1612            ),  # share parametrization and its rng
1613            ParametrizedCMA(scale=1e-3, random_init=True)(
1614                self.parametrization, budget=None, num_workers=num_workers
1615            ),
1616            ParametrizedCMA(scale=1e-6, random_init=True)(
1617                self.parametrization, budget=None, num_workers=num_workers
1618            ),
1619        ]
1620        assert budget is not None
1621        self.budget_before_choosing = budget // 3
1622
1623
1624class _FakeFunction:
1625    """Simple function that returns the loss which was registered just before.
1626    This is a hack for BO.
1627    """
1628
1629    def __init__(self, num_digits: int) -> None:
1630        self.num_digits = num_digits
1631        self._registered: tp.List[tp.Tuple[np.ndarray, float]] = []
1632
1633    def key(self, num: int) -> str:
1634        """Key corresponding to the array sample
1635        (uses zero-filling to keep order)
1636        """
1637        return "x" + str(num).zfill(self.num_digits)
1638
1639    def register(self, x: np.ndarray, loss: tp.FloatLoss) -> None:
1640        if self._registered:
1641            raise RuntimeError("Only one call can be registered at a time")
1642        self._registered.append((x, loss))
1643
1644    def __call__(self, **kwargs: float) -> float:
1645        if not self._registered:
1646            raise RuntimeError("Call must be registered first")
1647        x = [kwargs[self.key(i)] for i in range(len(kwargs))]
1648        xr, loss = self._registered[0]
1649        np.testing.assert_array_almost_equal(x, xr, err_msg="Call does not match registered")
1650        self._registered.clear()
1651        return loss
1652
1653
1654class _BO(base.Optimizer):
1655    def __init__(
1656        self,
1657        parametrization: IntOrParameter,
1658        budget: tp.Optional[int] = None,
1659        num_workers: int = 1,
1660        *,
1661        initialization: tp.Optional[str] = None,
1662        init_budget: tp.Optional[int] = None,
1663        middle_point: bool = False,
1664        utility_kind: str = "ucb",  # bayes_opt default
1665        utility_kappa: float = 2.576,
1666        utility_xi: float = 0.0,
1667        gp_parameters: tp.Optional[tp.Dict[str, tp.Any]] = None,
1668    ) -> None:
1669        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1670        self._transform = transforms.ArctanBound(0, 1)
1671        self._bo: tp.Optional[BayesianOptimization] = None
1672        self._fake_function = _FakeFunction(num_digits=len(str(self.dimension)))
1673        # initialization
1674        init = initialization
1675        self._init_budget = init_budget
1676        self._middle_point = middle_point
1677        if init is None:
1678            self._InitOpt: tp.Optional[base.ConfiguredOptimizer] = None
1679        elif init == "random":
1680            self._InitOpt = oneshot.RandomSearch
1681        else:
1682            self._InitOpt = oneshot.SamplingSearch(sampler=init, scrambled=init == "Hammersley")
1683        # configuration
1684        self.utility_kind = utility_kind
1685        self.utility_kappa = utility_kappa
1686        self.utility_xi = utility_xi
1687        self.gp_parameters = {} if gp_parameters is None else gp_parameters
1688        if isinstance(parametrization, p.Parameter) and self.gp_parameters.get("alpha", 0) == 0:
1689            analysis = p.helpers.analyze(parametrization)
1690            noisy = not analysis.deterministic
1691            cont = analysis.continuous
1692            if noisy or not cont:
1693                warnings.warn(
1694                    "Dis-continuous and noisy parametrization require gp_parameters['alpha'] > 0 "
1695                    "(for your parametrization, continuity={cont} and noisy={noisy}).\n"
1696                    "Find more information on BayesianOptimization's github.\n"
1697                    "You should then create a new instance of optimizerlib.ParametrizedBO with appropriate parametrization.",
1698                    errors.InefficientSettingsWarning,
1699                )
1700
1701    @property
1702    def bo(self) -> BayesianOptimization:
1703        if self._bo is None:
1704            bounds = {self._fake_function.key(i): (0.0, 1.0) for i in range(self.dimension)}
1705            self._bo = BayesianOptimization(self._fake_function, bounds, random_state=self._rng)
1706            init_budget = max(
1707                2, int(np.sqrt(self.budget) if self._init_budget is None else self._init_budget)
1708            )
1709            if self.gp_parameters is not None:
1710                self._bo.set_gp_params(**self.gp_parameters)
1711            # init
1712            if self._middle_point:
1713                self._bo.probe([0.5] * self.dimension, lazy=True)
1714                init_budget -= 1
1715            if self._InitOpt is not None and init_budget > 0:
1716                param = p.Array(shape=(self.dimension,)).set_bounds(lower=0, upper=1.0)
1717                param.random_state = self._rng
1718                opt = self._InitOpt(param, budget=init_budget)
1719                for _ in range(init_budget):
1720                    self._bo.probe(opt.ask().value, lazy=True)
1721            else:  # default
1722                for _ in range(init_budget):
1723                    self._bo.probe(self._bo._space.random_sample(), lazy=True)
1724        return self._bo
1725
1726    def _internal_ask_candidate(self) -> p.Parameter:
1727        util = UtilityFunction(kind=self.utility_kind, kappa=self.utility_kappa, xi=self.utility_xi)
1728        if self.bo._queue:
1729            x_probe = next(self.bo._queue)
1730        else:
1731            x_probe = self.bo.suggest(util)  # this is time consuming
1732            x_probe = [x_probe[self._fake_function.key(i)] for i in range(len(x_probe))]
1733        data = self._transform.backward(np.array(x_probe, copy=False))
1734        candidate = self.parametrization.spawn_child().set_standardized_data(data)
1735        candidate._meta["x_probe"] = x_probe
1736        return candidate
1737
1738    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1739        if "x_probe" in candidate._meta:
1740            y = candidate._meta["x_probe"]
1741        else:
1742            data = candidate.get_standardized_data(reference=self.parametrization)
1743            y = self._transform.forward(data)  # tell not asked
1744        self._fake_function.register(y, -loss)  # minimizing
1745        self.bo.probe(y, lazy=False)
1746        # for some unknown reasons, BO wants to evaluate twice the same point,
1747        # but since it keeps a cache of the values, the registered value is not used
1748        # so we should clean the "fake" function
1749        self._fake_function._registered.clear()
1750
1751    def _internal_provide_recommendation(self) -> tp.Optional[tp.ArrayLike]:
1752        if not self.archive:
1753            return None
1754        return self._transform.backward(
1755            np.array([self.bo.max["params"][self._fake_function.key(i)] for i in range(self.dimension)])
1756        )
1757
1758
1759class ParametrizedBO(base.ConfiguredOptimizer):
1760    """Bayesian optimization.
1761    Hyperparameter tuning method, based on statistical modeling of the objective function.
1762    This class is a wrapper over the `bayes_opt <https://github.com/fmfn/BayesianOptimization>`_ package.
1763
1764    Parameters
1765    ----------
1766    initialization: str
1767        Initialization algorithms (None, "Hammersley", "random" or "LHS")
1768    init_budget: int or None
1769        Number of initialization algorithm steps
1770    middle_point: bool
1771        whether to sample the 0 point first
1772    utility_kind: str
1773        Type of utility function to use among "ucb", "ei" and "poi"
1774    utility_kappa: float
1775        Kappa parameter for the utility function
1776    utility_xi: float
1777        Xi parameter for the utility function
1778    gp_parameters: dict
1779        dictionnary of parameters for the gaussian process
1780    """
1781
1782    no_parallelization = True
1783
1784    # pylint: disable=unused-argument
1785    def __init__(
1786        self,
1787        *,
1788        initialization: tp.Optional[str] = None,
1789        init_budget: tp.Optional[int] = None,
1790        middle_point: bool = False,
1791        utility_kind: str = "ucb",  # bayes_opt default
1792        utility_kappa: float = 2.576,
1793        utility_xi: float = 0.0,
1794        gp_parameters: tp.Optional[tp.Dict[str, tp.Any]] = None,
1795    ) -> None:
1796        super().__init__(_BO, locals())
1797
1798
1799BO = ParametrizedBO().set_name("BO", register=True)
1800
1801
1802class _Chain(base.Optimizer):
1803    def __init__(
1804        self,
1805        parametrization: IntOrParameter,
1806        budget: tp.Optional[int] = None,
1807        num_workers: int = 1,
1808        *,
1809        optimizers: tp.Sequence[tp.Union[base.ConfiguredOptimizer, tp.Type[base.Optimizer]]] = [
1810            LHSSearch,
1811            DE,
1812        ],
1813        budgets: tp.Sequence[tp.Union[str, int]] = (10,),
1814    ) -> None:
1815        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1816        # delayed initialization
1817        # Either we have the budget for each algorithm, or the last algorithm uses the rest of the budget, so:
1818        self.optimizers: tp.List[base.Optimizer] = []
1819        converter = {
1820            "num_workers": self.num_workers,
1821            "dimension": self.dimension,
1822            "half": self.budget // 2 if self.budget else self.num_workers,
1823            "third": self.budget // 3 if self.budget else self.num_workers,
1824            "sqrt": int(np.sqrt(self.budget)) if self.budget else self.num_workers,
1825        }
1826        self.budgets = [max(1, converter[b]) if isinstance(b, str) else b for b in budgets]
1827        last_budget = None if self.budget is None else max(4, self.budget - sum(self.budgets))
1828        assert len(optimizers) == len(self.budgets) + 1
1829        assert all(
1830            x in ("third", "half", "dimension", "num_workers", "sqrt") or x > 0 for x in self.budgets
1831        ), str(self.budgets)
1832        for opt, optbudget in zip(optimizers, self.budgets + [last_budget]):  # type: ignore
1833            self.optimizers.append(opt(self.parametrization, budget=optbudget, num_workers=self.num_workers))
1834        if self.name.startswith("chain"):
1835            warnings.warn(
1836                "Chain optimizers are renamed with a capital C for consistency. "
1837                "Eg: chainCMAPowell becomes ChainCMAPowell",
1838                errors.NevergradDeprecationWarning,
1839            )
1840
1841    def _internal_ask_candidate(self) -> p.Parameter:
1842        # Which algorithm are we playing with ?
1843        sum_budget = 0.0
1844        opt = self.optimizers[0]
1845        for opt in self.optimizers:
1846            sum_budget += float("inf") if opt.budget is None else opt.budget
1847            if self.num_ask < sum_budget:
1848                break
1849        # if we are over budget, then use the last one...
1850        return opt.ask()
1851
1852    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1853        # Let us inform all concerned algorithms
1854        sum_budget = 0.0
1855        for opt in self.optimizers:
1856            sum_budget += float("inf") if opt.budget is None else opt.budget
1857            if self.num_tell < sum_budget:
1858                opt.tell(candidate, loss)
1859
1860
1861class Chaining(base.ConfiguredOptimizer):
1862    """
1863    A chaining consists in running algorithm 1 during T1, then algorithm 2 during T2, then algorithm 3 during T3, etc.
1864    Each algorithm is fed with what happened before it.
1865
1866    Parameters
1867    ----------
1868    optimizers: list of Optimizer classes
1869        the sequence of optimizers to use
1870    budgets: list of int
1871        the corresponding budgets for each optimizer but the last one
1872
1873    """
1874
1875    # pylint: disable=unused-argument
1876    def __init__(
1877        self,
1878        optimizers: tp.Sequence[tp.Union[base.ConfiguredOptimizer, tp.Type[base.Optimizer]]],
1879        budgets: tp.Sequence[tp.Union[str, int]],
1880    ) -> None:
1881        super().__init__(_Chain, locals())
1882
1883
1884# depreated: old names (need a capital letter for consistency
1885chainCMAPowell = Chaining([CMA, Powell], ["half"]).set_name("chainCMAPowell", register=True)
1886chainCMAPowell.no_parallelization = True
1887chainMetaModelSQP = Chaining([MetaModel, SQP], ["half"]).set_name("chainMetaModelSQP", register=True)
1888chainMetaModelSQP.no_parallelization = True
1889chainMetaModelPowell = Chaining([MetaModel, Powell], ["half"]).set_name("chainMetaModelPowell", register=True)
1890chainMetaModelPowell.no_parallelization = True
1891chainDiagonalCMAPowell = Chaining([DiagonalCMA, Powell], ["half"]).set_name(
1892    "chainDiagonalCMAPowell", register=True
1893)
1894chainDiagonalCMAPowell.no_parallelization = True
1895chainNaiveTBPSAPowell = Chaining([NaiveTBPSA, Powell], ["half"]).set_name(
1896    "chainNaiveTBPSAPowell", register=True
1897)
1898chainNaiveTBPSAPowell.no_parallelization = True
1899chainNaiveTBPSACMAPowell = Chaining([NaiveTBPSA, CMA, Powell], ["third", "third"]).set_name(
1900    "chainNaiveTBPSACMAPowell", register=True
1901)
1902chainNaiveTBPSACMAPowell.no_parallelization = True
1903
1904# new names
1905ChainCMAPowell = Chaining([CMA, Powell], ["half"]).set_name("ChainCMAPowell", register=True)
1906ChainCMAPowell.no_parallelization = True  # TODO make this automatic
1907ChainMetaModelSQP = Chaining([MetaModel, SQP], ["half"]).set_name("ChainMetaModelSQP", register=True)
1908ChainMetaModelSQP.no_parallelization = True
1909ChainMetaModelPowell = Chaining([MetaModel, Powell], ["half"]).set_name("ChainMetaModelPowell", register=True)
1910ChainMetaModelPowell.no_parallelization = True
1911ChainDiagonalCMAPowell = Chaining([DiagonalCMA, Powell], ["half"]).set_name(
1912    "ChainDiagonalCMAPowell", register=True
1913)
1914ChainDiagonalCMAPowell.no_parallelization = True
1915ChainNaiveTBPSAPowell = Chaining([NaiveTBPSA, Powell], ["half"]).set_name(
1916    "ChainNaiveTBPSAPowell", register=True
1917)
1918ChainNaiveTBPSAPowell.no_parallelization = True
1919ChainNaiveTBPSACMAPowell = Chaining([NaiveTBPSA, CMA, Powell], ["third", "third"]).set_name(
1920    "ChainNaiveTBPSACMAPowell", register=True
1921)
1922ChainNaiveTBPSACMAPowell.no_parallelization = True
1923
1924
1925@registry.register
1926class cGA(base.Optimizer):
1927    """`Compact Genetic Algorithm <https://ieeexplore.ieee.org/document/797971>`_.
1928    A discrete optimization algorithm, introduced in and often used as a first baseline.
1929    """
1930
1931    # pylint: disable=too-many-instance-attributes
1932
1933    def __init__(
1934        self,
1935        parametrization: IntOrParameter,
1936        budget: tp.Optional[int] = None,
1937        num_workers: int = 1,
1938        arity: tp.Optional[int] = None,
1939    ) -> None:
1940        super().__init__(parametrization, budget=budget, num_workers=num_workers)
1941        if arity is None:
1942            all_params = p.helpers.flatten(self.parametrization)
1943            arity = max(
1944                len(param.choices) if isinstance(param, p.TransitionChoice) else 500
1945                for _, param in all_params
1946            )
1947        self._arity = arity
1948        self._penalize_cheap_violations = False  # Not sure this is the optimal decision.
1949        # self.p[i][j] is the probability that the ith variable has value 0<=j< arity.
1950        self.p: np.ndarray = np.ones((self.dimension, arity)) / arity
1951        # Probability increments are of order 1./self.llambda
1952        # and lower bounded by something of order 1./self.llambda.
1953        self.llambda = max(num_workers, 40)  # FIXME: no good heuristic ?
1954        # CGA generates a candidate, then a second candidate;
1955        # then updates depending on the comparison with the first one. We therefore have to store the previous candidate.
1956        self._previous_value_candidate: tp.Optional[tp.Tuple[float, np.ndarray]] = None
1957
1958    def _internal_ask_candidate(self) -> p.Parameter:
1959        # Multinomial.
1960        values: tp.List[int] = [
1961            sum(self._rng.uniform() > cum_proba) for cum_proba in np.cumsum(self.p, axis=1)
1962        ]
1963        data = discretization.noisy_inverse_threshold_discretization(values, arity=self._arity, gen=self._rng)
1964        return self.parametrization.spawn_child().set_standardized_data(data)
1965
1966    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
1967        data = candidate.get_standardized_data(reference=self.parametrization)
1968        if self._previous_value_candidate is None:
1969            self._previous_value_candidate = (loss, data)
1970        else:
1971            winner, loser = self._previous_value_candidate[1], data
1972            if self._previous_value_candidate[0] > loss:
1973                winner, loser = loser, winner
1974            winner_data = discretization.threshold_discretization(np.asarray(winner.data), arity=self._arity)
1975            loser_data = discretization.threshold_discretization(np.asarray(loser.data), arity=self._arity)
1976            for i, _ in enumerate(winner_data):
1977                if winner_data[i] != loser_data[i]:
1978                    self.p[i][winner_data[i]] += 1.0 / self.llambda
1979                    self.p[i][loser_data[i]] -= 1.0 / self.llambda
1980                    for j in range(len(self.p[i])):
1981                        self.p[i][j] = max(self.p[i][j], 1.0 / self.llambda)
1982                    self.p[i] /= sum(self.p[i])
1983            self._previous_value_candidate = None
1984
1985
1986class _EMNA(base.Optimizer):
1987    """Simple Estimation of Multivariate Normal Algorithm (EMNA)."""
1988
1989    # pylint: disable=too-many-instance-attributes
1990
1991    def __init__(
1992        self,
1993        parametrization: IntOrParameter,
1994        budget: tp.Optional[int] = None,
1995        num_workers: int = 1,
1996        isotropic: bool = True,
1997        naive: bool = True,
1998        population_size_adaptation: bool = False,
1999        initial_popsize: tp.Optional[int] = None,
2000    ) -> None:
2001        super().__init__(parametrization, budget=budget, num_workers=num_workers)
2002        self.isotropic: bool = isotropic
2003        self.naive: bool = naive
2004        self.population_size_adaptation = population_size_adaptation
2005        self.min_coef_parallel_context: int = 8
2006        # Sigma initialization
2007        self.sigma: tp.Union[float, np.ndarray]
2008        if initial_popsize is None:
2009            initial_popsize = self.dimension
2010        if self.isotropic:
2011            self.sigma = 1.0
2012        else:
2013            self.sigma = np.ones(self.dimension)
2014        # population size and parent size initializations
2015        self.popsize = _PopulationSizeController(
2016            llambda=4 * initial_popsize, mu=initial_popsize, dimension=self.dimension, num_workers=num_workers
2017        )
2018        if not self.population_size_adaptation:
2019            self.popsize.mu = max(16, self.dimension)
2020            self.popsize.llambda = 4 * self.popsize.mu
2021            self.popsize.llambda = max(self.popsize.llambda, num_workers)
2022            if budget is not None and self.popsize.llambda > budget:
2023                self.popsize.llambda = budget
2024                self.popsize.mu = self.popsize.llambda // 4
2025                warnings.warn(
2026                    "Budget may be too small in front of the dimension for EMNA",
2027                    errors.InefficientSettingsWarning,
2028                )
2029        self.current_center: np.ndarray = np.zeros(self.dimension)
2030        # population
2031        self.parents: tp.List[p.Parameter] = [self.parametrization]
2032        self.children: tp.List[p.Parameter] = []
2033
2034    def recommend(self) -> p.Parameter:
2035        if self.naive:
2036            return self.current_bests["optimistic"].parameter
2037        else:
2038            # This is NOT the naive version. We deal with noise.
2039            out = self.parametrization.spawn_child()
2040            with p.helpers.deterministic_sampling(out):
2041                out.set_standardized_data(self.current_center)
2042            return out
2043
2044    def _internal_ask_candidate(self) -> p.Parameter:
2045        sigma_tmp = self.sigma
2046        if (
2047            self.population_size_adaptation
2048            and self.popsize.llambda < self.min_coef_parallel_context * self.dimension
2049        ):
2050            sigma_tmp = self.sigma * np.exp(self._rng.normal(0, 1) / np.sqrt(self.dimension))
2051        individual = self.current_center + sigma_tmp * self._rng.normal(0, 1, self.dimension)
2052        parent = self.parents[self.num_ask % len(self.parents)]
2053        candidate = parent.spawn_child().set_standardized_data(individual, reference=self.parametrization)
2054        if parent is self.parametrization:
2055            candidate.heritage["lineage"] = candidate.uid
2056        candidate._meta["sigma"] = sigma_tmp
2057        return candidate
2058
2059    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
2060        if self.population_size_adaptation:
2061            self.popsize.add_value(loss)
2062        self.children.append(candidate)
2063        if len(self.children) >= self.popsize.llambda:
2064            # Sorting the population.
2065            self.children.sort(key=base._loss)
2066            # Computing the new parent.
2067            self.parents = self.children[: self.popsize.mu]
2068            self.children = []
2069            self.current_center = (
2070                sum(  # type: ignore
2071                    c.get_standardized_data(reference=self.parametrization) for c in self.parents
2072                )
2073                / self.popsize.mu
2074            )
2075            if self.population_size_adaptation:
2076                if (
2077                    self.popsize.llambda < self.min_coef_parallel_context * self.dimension
2078                ):  # Population size not large enough for emna
2079                    self.sigma = np.exp(
2080                        np.sum(
2081                            np.log([c._meta["sigma"] for c in self.parents]),
2082                            axis=0 if self.isotropic else None,
2083                        )
2084                        / self.popsize.mu
2085                    )
2086                else:
2087                    stdd = [
2088                        (
2089                            self.parents[i].get_standardized_data(reference=self.parametrization)
2090                            - self.current_center
2091                        )
2092                        ** 2
2093                        for i in range(self.popsize.mu)
2094                    ]
2095                    self.sigma = np.sqrt(
2096                        np.sum(stdd) / (self.popsize.mu * (self.dimension if self.isotropic else 1))
2097                    )
2098            else:
2099                # EMNA update
2100                stdd = [
2101                    (
2102                        self.parents[i].get_standardized_data(reference=self.parametrization)
2103                        - self.current_center
2104                    )
2105                    ** 2
2106                    for i in range(self.popsize.mu)
2107                ]
2108                self.sigma = np.sqrt(
2109                    np.sum(stdd, axis=0 if self.isotropic else None)
2110                    / (self.popsize.mu * (self.dimension if self.isotropic else 1))
2111                )
2112
2113            if self.num_workers / self.dimension > 32:  # faster decrease of sigma if large parallel context
2114                imp = max(1, (np.log(self.popsize.llambda) / 2) ** (1 / self.dimension))
2115                self.sigma /= imp
2116
2117    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
2118        raise errors.TellNotAskedNotSupportedError
2119
2120
2121class EMNA(base.ConfiguredOptimizer):
2122    """Estimation of Multivariate Normal Algorithm
2123    This algorithm is quite efficient in a parallel context, i.e. when
2124    the population size is large.
2125
2126    Parameters
2127    ----------
2128    isotropic: bool
2129        isotropic version on EMNA if True, i.e. we have an
2130        identity matrix for the Gaussian, else  we here consider the separable
2131        version, meaning we have a diagonal matrix for the Gaussian (anisotropic)
2132    naive: bool
2133        set to False for noisy problem, so that the best points will be an
2134        average of the final population.
2135    population_size_adaptation: bool
2136        population size automatically adapts to the landscape
2137    initial_popsize: Optional[int]
2138        initial (and minimal) population size (default: 4 x dimension)
2139    """
2140
2141    # pylint: disable=unused-argument
2142    def __init__(
2143        self,
2144        *,
2145        isotropic: bool = True,
2146        naive: bool = True,
2147        population_size_adaptation: bool = False,
2148        initial_popsize: tp.Optional[int] = None,
2149    ) -> None:
2150        super().__init__(_EMNA, locals())
2151
2152
2153NaiveIsoEMNA = EMNA().set_name("NaiveIsoEMNA", register=True)
2154
2155
2156# Discussions with Jialin Liu and Fabien Teytaud helped the following development.
2157# This includes discussion at Dagstuhl's 2019 seminars on randomized search heuristics and computational intelligence in games.
2158@registry.register
2159class NGOptBase(base.Optimizer):
2160    """Nevergrad optimizer by competence map."""
2161
2162    # pylint: disable=too-many-branches
2163    def __init__(
2164        self, parametrization: IntOrParameter, budget: tp.Optional[int] = None, num_workers: int = 1
2165    ) -> None:
2166        super().__init__(parametrization, budget=budget, num_workers=num_workers)
2167        analysis = p.helpers.analyze(self.parametrization)
2168        funcinfo = self.parametrization.function
2169        self.has_noise = not (analysis.deterministic and funcinfo.deterministic)
2170        # The noise coming from discrete variables goes to 0.
2171        self.noise_from_instrumentation = self.has_noise and funcinfo.deterministic
2172        self.fully_continuous = analysis.continuous
2173        all_params = p.helpers.flatten(self.parametrization)
2174        # figure out if there is any discretization layers
2175        int_layers = list(
2176            itertools.chain.from_iterable([_layering.Int.filter_from(x) for _, x in all_params])
2177        )
2178        int_layers = [x for x in int_layers if x.arity is not None]  # only "Choice" instances for now
2179        self.has_discrete_not_softmax = any(
2180            not isinstance(lay, _datalayers.SoftmaxSampling) for lay in int_layers
2181        )
2182        self._has_discrete = bool(int_layers)
2183        self._arity: int = max((lay.arity for lay in int_layers), default=-1)  # type: ignore
2184        if self.fully_continuous:
2185            self._arity = -1
2186        self._optim: tp.Optional[base.Optimizer] = None
2187        self._constraints_manager.update(
2188            max_trials=1000,
2189            penalty_factor=1.0,
2190            penalty_exponent=1.01,
2191        )
2192
2193    @property
2194    def optim(self) -> base.Optimizer:
2195        if self._optim is None:
2196            self._optim = self._select_optimizer_cls()(self.parametrization, self.budget, self.num_workers)
2197            optim = self._optim if not isinstance(self._optim, NGOptBase) else self._optim.optim
2198            logger.debug("%s selected %s optimizer.", *(x.name for x in (self, optim)))
2199        return self._optim
2200
2201    def _select_optimizer_cls(self) -> base.OptCls:
2202        # pylint: disable=too-many-nested-blocks
2203        assert self.budget is not None
2204        if self.has_noise and self.has_discrete_not_softmax:
2205            # noise and discrete: let us merge evolution and bandits.
2206            cls: base.OptCls = DoubleFastGADiscreteOnePlusOne if self.dimension < 60 else CMA
2207        else:
2208            if self.has_noise and self.fully_continuous:
2209                # This is the real of population control. FIXME: should we pair with a bandit ?
2210                cls = TBPSA
2211            else:
2212                if (
2213                    self.has_discrete_not_softmax
2214                    or not self.parametrization.function.metrizable
2215                    or not self.fully_continuous
2216                ):
2217                    cls = DoubleFastGADiscreteOnePlusOne
2218                else:
2219                    if self.num_workers > self.budget / 5:
2220                        if self.num_workers > self.budget / 2.0 or self.budget < self.dimension:
2221                            cls = MetaRecentering
2222                        else:
2223                            cls = NaiveTBPSA
2224                    else:
2225                        # Possibly a good idea to go memetic for large budget, but something goes wrong for the moment.
2226                        if (
2227                            self.num_workers == 1 and self.budget > 6000 and self.dimension > 7
2228                        ):  # Let us go memetic.
2229                            cls = ChainCMAPowell
2230                        else:
2231                            if self.num_workers == 1 and self.budget < self.dimension * 30:
2232                                # One plus one so good in large ratio "dimension / budget".
2233                                cls = OnePlusOne if self.dimension > 30 else Cobyla
2234                            else:
2235                                # DE is great in such a case (?).
2236                                cls = (
2237                                    DE if self.dimension > 2000 else CMA if self.dimension > 1 else OnePlusOne
2238                                )
2239        return cls
2240
2241    def _internal_ask_candidate(self) -> p.Parameter:
2242        return self.optim.ask()
2243
2244    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
2245        self.optim.tell(candidate, loss)
2246
2247    def recommend(self) -> p.Parameter:
2248        return self.optim.recommend()
2249
2250    def _internal_tell_not_asked(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
2251        self.optim.tell(candidate, loss)
2252
2253
2254@registry.register
2255class Shiwa(NGOptBase):
2256    """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map."""
2257
2258    def _select_optimizer_cls(self) -> base.OptCls:
2259        optCls: base.OptCls = NGOptBase
2260        funcinfo = self.parametrization.function
2261        if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable):
2262            optCls = RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne
2263        elif self.dimension >= 60 and not funcinfo.metrizable:
2264            optCls = CMA
2265        return optCls
2266
2267
2268@registry.register
2269class NGO(NGOptBase):  # compatibility
2270    pass
2271
2272
2273@registry.register
2274class NGOpt4(NGOptBase):
2275    """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map."""
2276
2277    def _select_optimizer_cls(self) -> base.OptCls:
2278        self.fully_continuous = (
2279            self.fully_continuous and not self.has_discrete_not_softmax and self._arity < 0
2280        )
2281        budget, num_workers = self.budget, self.num_workers
2282        funcinfo = self.parametrization.function
2283        assert budget is not None
2284        optimClass: base.OptCls
2285        if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable):
2286            mutation = "portfolio" if budget > 1000 else "discrete"
2287            optimClass = ParametrizedOnePlusOne(
2288                crossover=True, mutation=mutation, noise_handling="optimistic"
2289            )
2290        elif self._arity > 0:
2291            if self._arity == 2:
2292                optimClass = DiscreteOnePlusOne
2293            else:
2294                optimClass = AdaptiveDiscreteOnePlusOne if self._arity < 5 else CMandAS2
2295        else:
2296            # pylint: disable=too-many-nested-blocks
2297            if self.has_noise and self.fully_continuous and self.dimension > 100:
2298                # Waow, this is actually a discrete algorithm.
2299                optimClass = ConfSplitOptimizer(
2300                    num_optims=13, progressive=True, multivariate_optimizer=OptimisticDiscreteOnePlusOne
2301                )
2302            else:
2303                if self.has_noise and self.fully_continuous:
2304                    if budget > 100:
2305                        optimClass = (
2306                            OnePlusOne if self.noise_from_instrumentation or self.num_workers > 1 else SQP
2307                        )
2308                    else:
2309                        optimClass = OnePlusOne
2310                else:
2311                    if self.has_discrete_not_softmax or not funcinfo.metrizable or not self.fully_continuous:
2312                        optimClass = DoubleFastGADiscreteOnePlusOne
2313                    else:
2314                        if num_workers > budget / 5:
2315                            if num_workers > budget / 2.0 or budget < self.dimension:
2316                                optimClass = MetaTuneRecentering
2317                            elif self.dimension < 5 and budget < 100:
2318                                optimClass = DiagonalCMA
2319                            elif self.dimension < 5 and budget < 500:
2320                                optimClass = Chaining([DiagonalCMA, MetaModel], [100])
2321                            else:
2322                                optimClass = NaiveTBPSA
2323                        else:
2324                            # Possibly a good idea to go memetic for large budget, but something goes wrong for the moment.
2325                            if (
2326                                num_workers == 1 and budget > 6000 and self.dimension > 7
2327                            ):  # Let us go memetic.
2328                                optimClass = ChainNaiveTBPSACMAPowell
2329                            else:
2330                                if num_workers == 1 and budget < self.dimension * 30:
2331                                    if (
2332                                        self.dimension > 30
2333                                    ):  # One plus one so good in large ratio "dimension / budget".
2334                                        optimClass = OnePlusOne
2335                                    elif self.dimension < 5:
2336                                        optimClass = MetaModel
2337                                    else:
2338                                        optimClass = Cobyla
2339                                else:
2340                                    if self.dimension > 2000:  # DE is great in such a case (?).
2341                                        optimClass = DE
2342                                    else:
2343                                        if self.dimension < 10 and budget < 500:
2344                                            optimClass = MetaModel
2345                                        else:
2346                                            if (
2347                                                self.dimension > 40
2348                                                and num_workers > self.dimension
2349                                                and budget < 7 * self.dimension ** 2
2350                                            ):
2351                                                optimClass = DiagonalCMA
2352                                            elif (
2353                                                3 * num_workers > self.dimension ** 2
2354                                                and budget > self.dimension ** 2
2355                                            ):
2356                                                optimClass = MetaModel
2357                                            else:
2358                                                optimClass = CMA
2359        return optimClass
2360
2361
2362@registry.register
2363class NGOpt8(NGOpt4):
2364    """Nevergrad optimizer by competence map. You might modify this one for designing youe own competence map."""
2365
2366    def _select_optimizer_cls(self) -> base.OptCls:
2367        # Extracting info as far as possible.
2368        assert self.budget is not None
2369        funcinfo = self.parametrization.function
2370        optimClass: base.OptCls
2371        if self.has_noise and (self.has_discrete_not_softmax or not funcinfo.metrizable):
2372            if self.budget > 10000:
2373                optimClass = RecombiningPortfolioOptimisticNoisyDiscreteOnePlusOne
2374            else:
2375                optimClass = ParametrizedOnePlusOne(
2376                    crossover=True, mutation="discrete", noise_handling="optimistic"
2377                )
2378        elif self._arity > 0:
2379            if self.budget < 1000 and self.num_workers == 1:
2380                optimClass = DiscreteBSOOnePlusOne
2381            elif self.num_workers > 2:
2382                optimClass = CMandAS2  # type: ignore
2383            else:
2384                optimClass = super()._select_optimizer_cls()
2385        else:
2386            if (
2387                not (self.has_noise and self.fully_continuous and self.dimension > 100)
2388                and not (self.has_noise and self.fully_continuous)
2389                and not (self.num_workers > self.budget / 5)
2390                and (self.num_workers == 1 and self.budget > 6000 and self.dimension > 7)
2391            ):
2392                optimClass = ChainMetaModelPowell
2393            else:
2394                optimClass = super()._select_optimizer_cls()
2395
2396        return optimClass
2397
2398    def _num_objectives_set_callback(self) -> None:
2399        super()._num_objectives_set_callback()
2400        if self.num_objectives > 1:
2401            if self.noise_from_instrumentation or not self.has_noise:
2402                # override at runtime
2403                self._optim = DE(self.parametrization, self.budget, self.num_workers)
2404
2405
2406@registry.register
2407class NGOpt10(NGOpt8):
2408    def _select_optimizer_cls(self) -> base.OptCls:
2409        if not self.has_noise and self._arity > 0:
2410            return DiscreteLenglerOnePlusOne
2411        else:
2412            return super()._select_optimizer_cls()
2413
2414    def recommend(self) -> p.Parameter:
2415        return base.Optimizer.recommend(self)
2416
2417
2418@registry.register
2419class NGOpt(NGOpt10):
2420    pass
2421
2422
2423class _MSR(CM):
2424    """This code applies multiple copies of NGOpt with random weights for the different objective functions.
2425
2426    Variants dedicated to multiobjective optimization by multiple singleobjective optimization.
2427    """
2428
2429    def __init__(
2430        self,
2431        parametrization: IntOrParameter,
2432        budget: tp.Optional[int] = None,
2433        num_workers: int = 1,
2434        num_single_runs: int = 9,
2435        base_optimizer: base.OptCls = NGOpt,
2436    ) -> None:
2437        super().__init__(parametrization, budget=budget, num_workers=num_workers)
2438        self.num_optims = num_single_runs
2439        self.optims = [
2440            base_optimizer(
2441                self.parametrization,
2442                budget=1 + (budget // self.num_optims) if budget is not None else None,
2443                num_workers=num_workers,
2444            )
2445            for _ in range(self.num_optims)
2446        ]
2447        self.coeff: tp.Optional[tp.List[float]] = None
2448
2449    def _internal_tell_candidate(self, candidate: p.Parameter, loss: tp.FloatLoss) -> None:
2450        if self.coeff is None:
2451            self.coeffs = [
2452                self.parametrization.random_state.uniform(size=self.num_objectives)
2453                for _ in range(self.num_optims)
2454            ]
2455        for coeffs, opt in zip(self.coeffs, self.optims):
2456            this_loss = np.sum(loss * coeffs)
2457            opt.tell(candidate, this_loss)
2458
2459
2460class MultipleSingleRuns(base.ConfiguredOptimizer):
2461    """Multiple single-objective runs, in particular for multi-objective optimization.
2462    Parameters
2463    ----------
2464    num_single_runs: int
2465        number of single runs.
2466    """
2467
2468    # pylint: disable=unused-argument
2469    def __init__(
2470        self,
2471        *,
2472        num_single_runs: int = 9,
2473        base_optimizer: base.OptCls = NGOpt,
2474    ) -> None:
2475        super().__init__(_MSR, locals())
2476