1""" 2 Authors: 3 Original FORTRAN77 version of i4_sobol by Bennett Fox. 4 MATLAB version by John Burkardt. 5 PYTHON version by Corrado Chisari 6 7 Original Python version of is_prime by Corrado Chisari 8 9 Original MATLAB versions of other functions by John Burkardt. 10 PYTHON versions by Corrado Chisari 11 12 Modified Python version by Holger Nahrstaedt 13 14 Original code is available from 15 http://people.sc.fsu.edu/~jburkardt/py_src/sobol/sobol.html 16""" 17 18from __future__ import division 19 20import warnings 21 22import numpy as np 23from .base import InitialPointGenerator 24from ..space import Space 25from sklearn.utils import check_random_state 26 27 28class Sobol(InitialPointGenerator): 29 """Generates a new quasirandom Sobol' vector with each call. 30 31 Parameters 32 ---------- 33 skip : int 34 Skipped seed number. 35 36 randomize : bool, default=False 37 When set to True, random shift is applied. 38 39 Notes 40 ----- 41 Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in 42 :math:`[0,1)^{dim}`. Scrambling them makes them suitable for singular 43 integrands, provides a means of error estimation, and can improve their 44 rate of convergence. 45 46 There are many versions of Sobol' sequences depending on their 47 'direction numbers'. Here, the maximum number of dimension is 40. 48 49 The routine adapts the ideas of Antonov and Saleev [2]_. 50 51 .. warning:: 52 53 Sobol' sequences are a quadrature rule and they lose their balance 54 properties if one uses a sample size that is not a power of 2, or skips 55 the first point, or thins the sequence [5]_. 56 57 If :math:`n=2^m` points are not enough then one should take :math:`2^M` 58 points for :math:`M>m`. When scrambling, the number R of independent 59 replicates does not have to be a power of 2. 60 61 Sobol' sequences are generated to some number :math:`B` of bits. Then 62 after :math:`2^B` points have been generated, the sequence will repeat. 63 Currently :math:`B=30`. 64 65 References 66 ---------- 67 .. [1] I. M. Sobol. The distribution of points in a cube and the accurate 68 evaluation of integrals. Zh. Vychisl. Mat. i Mat. Phys., 7:784-802, 69 1967. 70 71 .. [2] Antonov, Saleev, 72 USSR Computational Mathematics and Mathematical Physics, 73 Volume 19, 1980, pages 252 - 256. 74 75 .. [3] Paul Bratley, Bennett Fox, 76 Algorithm 659: 77 Implementing Sobol's Quasirandom Sequence Generator, 78 ACM Transactions on Mathematical Software, 79 Volume 14, Number 1, pages 88-100, 1988. 80 81 .. [4] Bennett Fox, 82 Algorithm 647: 83 Implementation and Relative Efficiency of Quasirandom 84 Sequence Generators, 85 86 .. [5] Art B. Owen. On dropping the first Sobol' point. arXiv 2008.08051, 87 2020. 88 89 """ 90 def __init__(self, skip=0, randomize=True): 91 92 if not (skip & (skip - 1) == 0): 93 raise ValueError("The balance properties of Sobol' points require" 94 " skipping a power of 2.") 95 if skip != 0: 96 warnings.warn(f"{skip} points have been skipped: " 97 f"{skip} points can be generated before the " 98 f"sequence repeats.") 99 self.skip = skip 100 101 self.num_generated = 0 102 self.randomize = randomize 103 104 self.dim_max = 40 105 self.log_max = 30 106 self.atmost = 2 ** self.log_max - 1 107 self.lastq = None 108 self.maxcol = None 109 self.poly = None 110 self.recipd = None 111 self.seed_save = -1 112 self.v = np.zeros((self.dim_max, self.log_max)) 113 self.dim_num_save = -1 114 115 def init(self, dim_num): 116 self.dim_num_save = dim_num 117 self.v = np.zeros((self.dim_max, self.log_max)) 118 self.v[0:40, 0] = np.transpose([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 121 1, 1, 1, 1]) 122 123 self.v[2:40, 1] = np.transpose([1, 3, 1, 3, 1, 3, 3, 1, 3, 1, 3, 1, 124 3, 1, 1, 3, 1, 3, 1, 3, 1, 3, 3, 1, 125 3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 3, 126 1, 3]) 127 128 self.v[3:40, 2] = np.transpose([7, 5, 1, 3, 3, 7, 5, 5, 7, 7, 1, 129 3, 3, 7, 5, 1, 1, 5, 3, 3, 1, 7, 5, 130 1, 3, 3, 7, 5, 1, 1, 5, 7, 7, 5, 1, 131 3, 3]) 132 133 self.v[5:40, 3] = np.transpose([1, 7, 9, 13, 11, 1, 3, 7, 9, 134 5, 13, 13, 11, 3, 15, 5, 3, 15, 135 7, 9, 13, 9, 1, 11, 7, 5, 15, 136 1, 15, 11, 5, 3, 1, 7, 9]) 137 138 self.v[7:40, 4] = np.transpose([9, 3, 27, 15, 29, 21, 23, 19, 139 11, 25, 7, 13, 17, 1, 25, 29, 140 3, 31, 11, 5, 23, 27, 19, 21, 141 5, 1, 17, 13, 7, 15, 9, 31, 9]) 142 143 self.v[13:40, 5] = np.transpose([37, 33, 7, 5, 11, 39, 63, 27, 144 17, 15, 23, 29, 3, 21, 13, 31, 145 25, 9, 49, 33, 19, 29, 11, 19, 146 27, 15, 25]) 147 148 self.v[19:40, 6] = np.transpose([13, 33, 115, 41, 79, 17, 29, 119, 149 75, 73, 105, 7, 59, 65, 21, 3, 150 113, 61, 89, 45, 107]) 151 152 self.v[37:40, 7] = np.transpose([7, 23, 39]) 153 154 # Set POLY. 155 self.poly = [1, 3, 7, 11, 13, 19, 25, 37, 59, 47, 61, 55, 41, 67, 97, 156 91, 109, 103, 115, 131, 193, 137, 145, 143, 241, 157, 157 185, 167, 229, 171, 213, 191, 253, 203, 211, 239, 247, 158 285, 369, 299] 159 160 # Find the number of bits in ATMOST. 161 self.maxcol = _bit_hi1(self.atmost) 162 163 # Initialize row 1 of V. 164 self.v[0, 0:self.maxcol] = 1 165 166 # Check parameters. 167 if dim_num < 1 or self.dim_max < dim_num: 168 raise ValueError(f'I4_SOBOL - Fatal error!\n' 169 f' The spatial dimension DIM_NUM should ' 170 f'satisfy:\n' 171 f' 1 <= DIM_NUM <= {self.dim_max}\n' 172 f' But this input value is DIM_NUM = {dim_num}') 173 174 # Initialize the remaining rows of V. 175 for i in range(2, dim_num + 1): 176 177 # The bits of the integer POLY(I) gives the form of polynomial I. 178 # Find the degree of polynomial I from binary encoding. 179 j = self.poly[i - 1] 180 m = 0 181 j //= 2 182 while j > 0: 183 j //= 2 184 m += 1 185 186 # Expand this bit pattern to separate components 187 # of the logical array INCLUD. 188 j = self.poly[i - 1] 189 includ = np.zeros(m) 190 for k in range(m, 0, -1): 191 j2 = j // 2 192 includ[k - 1] = (j != 2 * j2) 193 j = j2 194 195 # Calculate the remaining elements of row I as explained 196 # in Bratley and Fox, section 2. 197 for j in range(m + 1, self.maxcol + 1): 198 newv = self.v[i - 1, j - m - 1] 199 p2 = 1 200 for k in range(1, m + 1): 201 p2 *= 2 202 if includ[k - 1]: 203 newv = np.bitwise_xor( 204 int(newv), int(p2 * self.v[i - 1, j - k - 1])) 205 self.v[i - 1, j - 1] = newv 206 # Multiply columns of V by appropriate power of 2. 207 p2 = 1 208 for j in range(self.maxcol - 1, 0, -1): 209 p2 *= 2 210 self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * p2 211 212 # RECIPD is 1/(common denominator of the elements in V). 213 self.recipd = 1.0 / (2 * p2) 214 self.lastq = np.zeros(dim_num) 215 216 def generate(self, dimensions, n_samples, random_state=None): 217 """Creates samples from Sobol' set. 218 219 Parameters 220 ---------- 221 dimensions : list, shape (n_dims,) 222 List of search space dimensions. 223 Each search dimension can be defined either as 224 225 - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer` 226 dimensions), 227 - a `(lower_bound, upper_bound, "prior")` tuple (for `Real` 228 dimensions), 229 - as a list of categories (for `Categorical` dimensions), or 230 - an instance of a `Dimension` object (`Real`, `Integer` or 231 `Categorical`). 232 n_samples : int 233 The order of the Sobol' sequence. Defines the number of samples. 234 random_state : int, RandomState instance, or None (default) 235 Set random state to something other than None for reproducible 236 results. 237 238 Returns 239 ------- 240 sample : array_like (n_samples, dim) 241 Sobol' set. 242 243 """ 244 total_n_samples = self.num_generated + n_samples 245 if not (total_n_samples & (total_n_samples - 1) == 0): 246 warnings.warn("The balance properties of Sobol' points require " 247 "n to be a power of 2. {0} points have been " 248 "previously generated, then: n={0}+{1}={2}. " 249 .format(self.num_generated, n_samples, 250 total_n_samples)) 251 if self.skip != 0 and total_n_samples > self.skip: 252 raise ValueError(f"{self.skip} points have been skipped: " 253 f"generating " 254 f"{n_samples} more points would cause the " 255 f"sequence to repeat.") 256 257 rng = check_random_state(random_state) 258 space = Space(dimensions) 259 n_dim = space.n_dims 260 transformer = space.get_transformer() 261 space.set_transformer("normalize") 262 r = np.full((n_samples, n_dim), np.nan) 263 264 seed = self.skip 265 for j in range(n_samples): 266 r[j, 0:n_dim], seed = self._sobol(n_dim, seed) 267 268 if self.randomize: 269 r = _random_shift(r, rng) 270 271 r = space.inverse_transform(r) 272 space.set_transformer(transformer) 273 274 self.num_generated += n_samples 275 276 return r 277 278 def _sobol(self, dim_num, seed): 279 """Generates a new quasirandom Sobol' vector with each call. 280 281 Parameters 282 ---------- 283 dim_num : int 284 Number of spatial dimensions. 285 `dim_num` must satisfy 1 <= DIM_NUM <= 40. 286 287 seed : int 288 the `seed` for the sequence. 289 This is essentially the index in the sequence of the quasirandom 290 value to be generated. On output, `seed` has been set to the 291 appropriate next value, usually simply `seed`+1. 292 If `seed` is less than 0 on input, it is treated as though it were 0. 293 An input value of 0 requests the first (0-th) element of 294 the sequence. 295 296 Returns 297 ------- 298 vector, seed : np.array (n_dim,), int 299 The next quasirandom vector and the seed of its next vector. 300 301 """ 302 # Things to do only if the dimension changed. 303 if dim_num != self.dim_num_save: 304 self.init(dim_num) 305 306 seed = int(np.floor(seed)) 307 308 if seed < 0: 309 seed = 0 310 311 pos_lo0 = 1 312 if seed == 0: 313 self.lastq = np.zeros(dim_num) 314 315 elif seed == self.seed_save + 1: 316 317 # Find the position of the right-hand zero in SEED. 318 pos_lo0 = _bit_lo0(seed) 319 320 elif seed <= self.seed_save: 321 322 self.seed_save = 0 323 self.lastq = np.zeros(dim_num) 324 325 for seed_temp in range(int(self.seed_save), int(seed)): 326 pos_lo0 = _bit_lo0(seed_temp) 327 for i in range(1, dim_num + 1): 328 self.lastq[i - 1] = np.bitwise_xor( 329 int(self.lastq[i - 1]), 330 int(self.v[i - 1, pos_lo0 - 1])) 331 332 pos_lo0 = _bit_lo0(seed) 333 334 elif self.seed_save + 1 < seed: 335 336 for seed_temp in range(int(self.seed_save + 1), int(seed)): 337 pos_lo0 = _bit_lo0(seed_temp) 338 for i in range(1, dim_num + 1): 339 self.lastq[i - 1] = np.bitwise_xor( 340 int(self.lastq[i - 1]), 341 int(self.v[i - 1, pos_lo0 - 1])) 342 343 pos_lo0 = _bit_lo0(seed) 344 345 # Check that the user is not calling too many times! 346 if self.maxcol < pos_lo0: 347 raise ValueError(f'I4_SOBOL - Fatal error!\n' 348 f' Too many calls!\n' 349 f' MAXCOL = {self.maxcol}\n' 350 f' L = {pos_lo0}\n') 351 352 # Calculate the new components of QUASI. 353 quasi = np.zeros(dim_num) 354 for i in range(1, dim_num + 1): 355 quasi[i - 1] = self.lastq[i - 1] * self.recipd 356 self.lastq[i - 1] = np.bitwise_xor( 357 int(self.lastq[i - 1]), int(self.v[i - 1, pos_lo0 - 1])) 358 359 self.seed_save = seed 360 seed += 1 361 362 return [quasi, seed] 363 364 365def _bit_hi1(n): 366 """Returns the position of the high 1 bit base 2 in an integer. 367 368 Parameters 369 ---------- 370 n : int 371 Input, should be positive. 372 373 """ 374 bin_repr = np.binary_repr(n) 375 most_left_one = bin_repr.find('1') 376 if most_left_one == -1: 377 return 0 378 else: 379 return len(bin_repr) - most_left_one 380 381 382def _bit_lo0(n): 383 """Returns the position of the low 0 bit base 2 in an integer. 384 385 Parameters 386 ---------- 387 n : int 388 Input, should be positive. 389 390 """ 391 bin_repr = np.binary_repr(n) 392 most_right_zero = bin_repr[::-1].find('0') 393 if most_right_zero == -1: 394 most_right_zero = len(bin_repr) 395 return most_right_zero + 1 396 397 398def _random_shift(dm, random_state=None): 399 """Random shifting of a vector. 400 401 Randomization of the quasi-MC samples can be achieved in the easiest manner 402 by random shift (or the Cranley-Patterson rotation). 403 404 References 405 ----------- 406 .. [1] C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer 407 Series in Statistics 692, Springer Science+Business Media, New York, 408 2009 409 410 Parameters 411 ---------- 412 dm : array, shape(n, d) 413 Input matrix. 414 random_state : int, RandomState instance, or None (default) 415 Set random state to something other than None for reproducible 416 results. 417 418 Returns 419 ------- 420 dm : array, shape(n, d) 421 Randomized Sobol' design matrix. 422 423 """ 424 rng = check_random_state(random_state) 425 # Generate random shift matrix from uniform distribution 426 shift = np.repeat(rng.rand(1, dm.shape[1]), dm.shape[0], axis=0) 427 # Return the shifted Sobol' design 428 return (dm + shift) % 1 429