1"""
2  Authors:
3    Original FORTRAN77 version of i4_sobol by Bennett Fox.
4    MATLAB version by John Burkardt.
5    PYTHON version by Corrado Chisari
6
7    Original Python version of is_prime by Corrado Chisari
8
9    Original MATLAB versions of other functions by John Burkardt.
10    PYTHON versions by Corrado Chisari
11
12    Modified Python version by Holger Nahrstaedt
13
14    Original code is available from
15    http://people.sc.fsu.edu/~jburkardt/py_src/sobol/sobol.html
16"""
17
18from __future__ import division
19
20import warnings
21
22import numpy as np
23from .base import InitialPointGenerator
24from ..space import Space
25from sklearn.utils import check_random_state
26
27
28class Sobol(InitialPointGenerator):
29    """Generates a new quasirandom Sobol' vector with each call.
30
31    Parameters
32    ----------
33    skip : int
34        Skipped seed number.
35
36    randomize : bool, default=False
37        When set to True, random shift is applied.
38
39    Notes
40    -----
41    Sobol' sequences [1]_ provide :math:`n=2^m` low discrepancy points in
42    :math:`[0,1)^{dim}`. Scrambling them makes them suitable for singular
43    integrands, provides a means of error estimation, and can improve their
44    rate of convergence.
45
46    There are many versions of Sobol' sequences depending on their
47    'direction numbers'. Here, the maximum number of dimension is 40.
48
49    The routine adapts the ideas of Antonov and Saleev [2]_.
50
51    .. warning::
52
53       Sobol' sequences are a quadrature rule and they lose their balance
54       properties if one uses a sample size that is not a power of 2, or skips
55       the first point, or thins the sequence [5]_.
56
57       If :math:`n=2^m` points are not enough then one should take :math:`2^M`
58       points for :math:`M>m`. When scrambling, the number R of independent
59       replicates does not have to be a power of 2.
60
61       Sobol' sequences are generated to some number :math:`B` of bits. Then
62       after :math:`2^B` points have been generated, the sequence will repeat.
63       Currently :math:`B=30`.
64
65    References
66    ----------
67    .. [1] I. M. Sobol. The distribution of points in a cube and the accurate
68       evaluation of integrals. Zh. Vychisl. Mat. i Mat. Phys., 7:784-802,
69       1967.
70
71    .. [2] Antonov, Saleev,
72       USSR Computational Mathematics and Mathematical Physics,
73       Volume 19, 1980, pages 252 - 256.
74
75    .. [3] Paul Bratley, Bennett Fox,
76       Algorithm 659:
77       Implementing Sobol's Quasirandom Sequence Generator,
78       ACM Transactions on Mathematical Software,
79       Volume 14, Number 1, pages 88-100, 1988.
80
81    .. [4] Bennett Fox,
82       Algorithm 647:
83       Implementation and Relative Efficiency of Quasirandom
84       Sequence Generators,
85
86    .. [5] Art B. Owen. On dropping the first Sobol' point. arXiv 2008.08051,
87       2020.
88
89    """
90    def __init__(self, skip=0, randomize=True):
91
92        if not (skip & (skip - 1) == 0):
93            raise ValueError("The balance properties of Sobol' points require"
94                             " skipping a power of 2.")
95        if skip != 0:
96            warnings.warn(f"{skip} points have been skipped: "
97                          f"{skip} points can be generated before the "
98                          f"sequence repeats.")
99        self.skip = skip
100
101        self.num_generated = 0
102        self.randomize = randomize
103
104        self.dim_max = 40
105        self.log_max = 30
106        self.atmost = 2 ** self.log_max - 1
107        self.lastq = None
108        self.maxcol = None
109        self.poly = None
110        self.recipd = None
111        self.seed_save = -1
112        self.v = np.zeros((self.dim_max, self.log_max))
113        self.dim_num_save = -1
114
115    def init(self, dim_num):
116        self.dim_num_save = dim_num
117        self.v = np.zeros((self.dim_max, self.log_max))
118        self.v[0:40, 0] = np.transpose([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
119                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
120                                        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
121                                        1, 1, 1, 1])
122
123        self.v[2:40, 1] = np.transpose([1, 3, 1, 3, 1, 3, 3, 1, 3, 1, 3, 1,
124                                        3, 1, 1, 3, 1, 3, 1, 3, 1, 3, 3, 1,
125                                        3, 1, 3, 1, 3, 1, 1, 3, 1, 3, 1, 3,
126                                        1, 3])
127
128        self.v[3:40, 2] = np.transpose([7, 5, 1, 3, 3, 7, 5, 5, 7, 7, 1,
129                                        3, 3, 7, 5, 1, 1, 5, 3, 3, 1, 7, 5,
130                                        1, 3, 3, 7, 5, 1, 1, 5, 7, 7, 5, 1,
131                                        3, 3])
132
133        self.v[5:40, 3] = np.transpose([1, 7,  9,  13, 11, 1, 3,  7,  9,
134                                        5,  13, 13, 11, 3,  15, 5, 3, 15,
135                                        7,  9,  13, 9,  1,  11, 7, 5, 15,
136                                        1,  15, 11, 5,  3,  1,  7,  9])
137
138        self.v[7:40, 4] = np.transpose([9,  3,  27, 15, 29, 21, 23, 19,
139                                        11, 25, 7,  13, 17, 1,  25, 29,
140                                        3,  31, 11, 5,  23, 27, 19, 21,
141                                        5,  1,  17, 13, 7,  15, 9,  31, 9])
142
143        self.v[13:40, 5] = np.transpose([37, 33, 7,  5,  11, 39, 63, 27,
144                                         17, 15, 23, 29, 3,  21, 13, 31,
145                                         25, 9,  49, 33, 19, 29, 11, 19,
146                                         27, 15, 25])
147
148        self.v[19:40, 6] = np.transpose([13, 33, 115, 41, 79, 17, 29, 119,
149                                         75, 73, 105, 7,  59,  65, 21, 3,
150                                         113, 61,  89, 45, 107])
151
152        self.v[37:40, 7] = np.transpose([7, 23, 39])
153
154        #  Set POLY.
155        self.poly = [1, 3, 7, 11, 13, 19, 25, 37, 59, 47, 61, 55, 41, 67, 97,
156                     91, 109, 103, 115, 131, 193, 137, 145, 143, 241, 157,
157                     185, 167, 229, 171, 213, 191, 253, 203, 211, 239, 247,
158                     285, 369, 299]
159
160        #  Find the number of bits in ATMOST.
161        self.maxcol = _bit_hi1(self.atmost)
162
163        #  Initialize row 1 of V.
164        self.v[0, 0:self.maxcol] = 1
165
166        #  Check parameters.
167        if dim_num < 1 or self.dim_max < dim_num:
168            raise ValueError(f'I4_SOBOL - Fatal error!\n'
169                             f'  The spatial dimension DIM_NUM should '
170                             f'satisfy:\n'
171                             f'  1 <= DIM_NUM <= {self.dim_max}\n'
172                             f'  But this input value is DIM_NUM = {dim_num}')
173
174        #  Initialize the remaining rows of V.
175        for i in range(2, dim_num + 1):
176
177            #  The bits of the integer POLY(I) gives the form of polynomial I.
178            #  Find the degree of polynomial I from binary encoding.
179            j = self.poly[i - 1]
180            m = 0
181            j //= 2
182            while j > 0:
183                j //= 2
184                m += 1
185
186            #  Expand this bit pattern to separate components
187            #  of the logical array INCLUD.
188            j = self.poly[i - 1]
189            includ = np.zeros(m)
190            for k in range(m, 0, -1):
191                j2 = j // 2
192                includ[k - 1] = (j != 2 * j2)
193                j = j2
194
195            #  Calculate the remaining elements of row I as explained
196            #  in Bratley and Fox, section 2.
197            for j in range(m + 1, self.maxcol + 1):
198                newv = self.v[i - 1, j - m - 1]
199                p2 = 1
200                for k in range(1, m + 1):
201                    p2 *= 2
202                    if includ[k - 1]:
203                        newv = np.bitwise_xor(
204                            int(newv), int(p2 * self.v[i - 1, j - k - 1]))
205                self.v[i - 1, j - 1] = newv
206        #  Multiply columns of V by appropriate power of 2.
207        p2 = 1
208        for j in range(self.maxcol - 1, 0, -1):
209            p2 *= 2
210            self.v[0:dim_num, j - 1] = self.v[0:dim_num, j - 1] * p2
211
212        #  RECIPD is 1/(common denominator of the elements in V).
213        self.recipd = 1.0 / (2 * p2)
214        self.lastq = np.zeros(dim_num)
215
216    def generate(self, dimensions, n_samples, random_state=None):
217        """Creates samples from Sobol' set.
218
219        Parameters
220        ----------
221        dimensions : list, shape (n_dims,)
222            List of search space dimensions.
223            Each search dimension can be defined either as
224
225            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
226              dimensions),
227            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
228              dimensions),
229            - as a list of categories (for `Categorical` dimensions), or
230            - an instance of a `Dimension` object (`Real`, `Integer` or
231              `Categorical`).
232        n_samples : int
233            The order of the Sobol' sequence. Defines the number of samples.
234        random_state : int, RandomState instance, or None (default)
235            Set random state to something other than None for reproducible
236            results.
237
238        Returns
239        -------
240        sample : array_like (n_samples, dim)
241            Sobol' set.
242
243        """
244        total_n_samples = self.num_generated + n_samples
245        if not (total_n_samples & (total_n_samples - 1) == 0):
246            warnings.warn("The balance properties of Sobol' points require "
247                          "n to be a power of 2. {0} points have been "
248                          "previously generated, then: n={0}+{1}={2}. "
249                          .format(self.num_generated, n_samples,
250                                  total_n_samples))
251        if self.skip != 0 and total_n_samples > self.skip:
252            raise ValueError(f"{self.skip} points have been skipped: "
253                             f"generating "
254                             f"{n_samples} more points would cause the "
255                             f"sequence to repeat.")
256
257        rng = check_random_state(random_state)
258        space = Space(dimensions)
259        n_dim = space.n_dims
260        transformer = space.get_transformer()
261        space.set_transformer("normalize")
262        r = np.full((n_samples, n_dim), np.nan)
263
264        seed = self.skip
265        for j in range(n_samples):
266            r[j, 0:n_dim], seed = self._sobol(n_dim, seed)
267
268        if self.randomize:
269            r = _random_shift(r, rng)
270
271        r = space.inverse_transform(r)
272        space.set_transformer(transformer)
273
274        self.num_generated += n_samples
275
276        return r
277
278    def _sobol(self, dim_num, seed):
279        """Generates a new quasirandom Sobol' vector with each call.
280
281        Parameters
282        ----------
283        dim_num : int
284          Number of spatial dimensions.
285          `dim_num` must satisfy 1 <= DIM_NUM <= 40.
286
287        seed : int
288          the `seed` for the sequence.
289          This is essentially the index in the sequence of the quasirandom
290          value to be generated. On output, `seed` has been set to the
291          appropriate next value, usually simply `seed`+1.
292          If `seed` is less than 0 on input, it is treated as though it were 0.
293          An input value of 0 requests the first (0-th) element of
294          the sequence.
295
296        Returns
297        -------
298        vector, seed : np.array (n_dim,), int
299            The next quasirandom vector and the seed of its next vector.
300
301        """
302        #  Things to do only if the dimension changed.
303        if dim_num != self.dim_num_save:
304            self.init(dim_num)
305
306        seed = int(np.floor(seed))
307
308        if seed < 0:
309            seed = 0
310
311        pos_lo0 = 1
312        if seed == 0:
313            self.lastq = np.zeros(dim_num)
314
315        elif seed == self.seed_save + 1:
316
317            #  Find the position of the right-hand zero in SEED.
318            pos_lo0 = _bit_lo0(seed)
319
320        elif seed <= self.seed_save:
321
322            self.seed_save = 0
323            self.lastq = np.zeros(dim_num)
324
325            for seed_temp in range(int(self.seed_save), int(seed)):
326                pos_lo0 = _bit_lo0(seed_temp)
327                for i in range(1, dim_num + 1):
328                    self.lastq[i - 1] = np.bitwise_xor(
329                        int(self.lastq[i - 1]),
330                        int(self.v[i - 1, pos_lo0 - 1]))
331
332            pos_lo0 = _bit_lo0(seed)
333
334        elif self.seed_save + 1 < seed:
335
336            for seed_temp in range(int(self.seed_save + 1), int(seed)):
337                pos_lo0 = _bit_lo0(seed_temp)
338                for i in range(1, dim_num + 1):
339                    self.lastq[i - 1] = np.bitwise_xor(
340                        int(self.lastq[i - 1]),
341                        int(self.v[i - 1, pos_lo0 - 1]))
342
343            pos_lo0 = _bit_lo0(seed)
344
345        #  Check that the user is not calling too many times!
346        if self.maxcol < pos_lo0:
347            raise ValueError(f'I4_SOBOL - Fatal error!\n'
348                             f' Too many calls!\n'
349                             f' MAXCOL = {self.maxcol}\n'
350                             f' L =      {pos_lo0}\n')
351
352        #  Calculate the new components of QUASI.
353        quasi = np.zeros(dim_num)
354        for i in range(1, dim_num + 1):
355            quasi[i - 1] = self.lastq[i - 1] * self.recipd
356            self.lastq[i - 1] = np.bitwise_xor(
357                int(self.lastq[i - 1]), int(self.v[i - 1, pos_lo0 - 1]))
358
359        self.seed_save = seed
360        seed += 1
361
362        return [quasi, seed]
363
364
365def _bit_hi1(n):
366    """Returns the position of the high 1 bit base 2 in an integer.
367
368    Parameters
369    ----------
370    n : int
371        Input, should be positive.
372
373    """
374    bin_repr = np.binary_repr(n)
375    most_left_one = bin_repr.find('1')
376    if most_left_one == -1:
377        return 0
378    else:
379        return len(bin_repr) - most_left_one
380
381
382def _bit_lo0(n):
383    """Returns the position of the low 0 bit base 2 in an integer.
384
385    Parameters
386    ----------
387    n : int
388        Input, should be positive.
389
390    """
391    bin_repr = np.binary_repr(n)
392    most_right_zero = bin_repr[::-1].find('0')
393    if most_right_zero == -1:
394        most_right_zero = len(bin_repr)
395    return most_right_zero + 1
396
397
398def _random_shift(dm, random_state=None):
399    """Random shifting of a vector.
400
401    Randomization of the quasi-MC samples can be achieved in the easiest manner
402    by random shift (or the Cranley-Patterson rotation).
403
404    References
405    -----------
406    .. [1] C. Lemieux, "Monte Carlo and Quasi-Monte Carlo Sampling," Springer
407       Series in Statistics 692, Springer Science+Business Media, New York,
408       2009
409
410    Parameters
411    ----------
412    dm : array, shape(n, d)
413        Input matrix.
414    random_state : int, RandomState instance, or None (default)
415        Set random state to something other than None for reproducible
416        results.
417
418    Returns
419    -------
420    dm :  array, shape(n, d)
421        Randomized Sobol' design matrix.
422
423    """
424    rng = check_random_state(random_state)
425    # Generate random shift matrix from uniform distribution
426    shift = np.repeat(rng.rand(1, dm.shape[1]), dm.shape[0], axis=0)
427    # Return the shifted Sobol' design
428    return (dm + shift) % 1
429