1from .common import Benchmark
2
3import numpy as np
4
5
6class Histogram1D(Benchmark):
7    def setup(self):
8        self.d = np.linspace(0, 100, 100000)
9
10    def time_full_coverage(self):
11        np.histogram(self.d, 200, (0, 100))
12
13    def time_small_coverage(self):
14        np.histogram(self.d, 200, (50, 51))
15
16    def time_fine_binning(self):
17        np.histogram(self.d, 10000, (0, 100))
18
19
20class Histogram2D(Benchmark):
21    def setup(self):
22        self.d = np.linspace(0, 100, 200000).reshape((-1,2))
23
24    def time_full_coverage(self):
25        np.histogramdd(self.d, (200, 200), ((0, 100), (0, 100)))
26
27    def time_small_coverage(self):
28        np.histogramdd(self.d, (200, 200), ((50, 51), (50, 51)))
29
30    def time_fine_binning(self):
31        np.histogramdd(self.d, (10000, 10000), ((0, 100), (0, 100)))
32
33
34class Bincount(Benchmark):
35    def setup(self):
36        self.d = np.arange(80000, dtype=np.intp)
37        self.e = self.d.astype(np.float64)
38
39    def time_bincount(self):
40        np.bincount(self.d)
41
42    def time_weights(self):
43        np.bincount(self.d, weights=self.e)
44
45
46class Median(Benchmark):
47    def setup(self):
48        self.e = np.arange(10000, dtype=np.float32)
49        self.o = np.arange(10001, dtype=np.float32)
50
51    def time_even(self):
52        np.median(self.e)
53
54    def time_odd(self):
55        np.median(self.o)
56
57    def time_even_inplace(self):
58        np.median(self.e, overwrite_input=True)
59
60    def time_odd_inplace(self):
61        np.median(self.o, overwrite_input=True)
62
63    def time_even_small(self):
64        np.median(self.e[:500], overwrite_input=True)
65
66    def time_odd_small(self):
67        np.median(self.o[:500], overwrite_input=True)
68
69
70class Percentile(Benchmark):
71    def setup(self):
72        self.e = np.arange(10000, dtype=np.float32)
73        self.o = np.arange(10001, dtype=np.float32)
74
75    def time_quartile(self):
76        np.percentile(self.e, [25, 75])
77
78    def time_percentile(self):
79        np.percentile(self.e, [25, 35, 55, 65, 75])
80
81
82class Select(Benchmark):
83    def setup(self):
84        self.d = np.arange(20000)
85        self.e = self.d.copy()
86        self.cond = [(self.d > 4), (self.d < 2)]
87        self.cond_large = [(self.d > 4), (self.d < 2)] * 10
88
89    def time_select(self):
90        np.select(self.cond, [self.d, self.e])
91
92    def time_select_larger(self):
93        np.select(self.cond_large, ([self.d, self.e] * 10))
94
95
96def memoize(f):
97    _memoized = {}
98    def wrapped(*args):
99        if args not in _memoized:
100            _memoized[args] = f(*args)
101
102        return _memoized[args].copy()
103
104    return f
105
106
107class SortGenerator:
108    # The size of the unsorted area in the "random unsorted area"
109    # benchmarks
110    AREA_SIZE = 100
111    # The size of the "partially ordered" sub-arrays
112    BUBBLE_SIZE = 100
113
114    @staticmethod
115    @memoize
116    def random(size, dtype):
117        """
118        Returns a randomly-shuffled array.
119        """
120        arr = np.arange(size, dtype=dtype)
121        np.random.shuffle(arr)
122        return arr
123
124    @staticmethod
125    @memoize
126    def ordered(size, dtype):
127        """
128        Returns an ordered array.
129        """
130        return np.arange(size, dtype=dtype)
131
132    @staticmethod
133    @memoize
134    def reversed(size, dtype):
135        """
136        Returns an array that's in descending order.
137        """
138        return np.arange(size-1, -1, -1, dtype=dtype)
139
140    @staticmethod
141    @memoize
142    def uniform(size, dtype):
143        """
144        Returns an array that has the same value everywhere.
145        """
146        return np.ones(size, dtype=dtype)
147
148    @staticmethod
149    @memoize
150    def swapped_pair(size, dtype, swap_frac):
151        """
152        Returns an ordered array, but one that has ``swap_frac * size``
153        pairs swapped.
154        """
155        a = np.arange(size, dtype=dtype)
156        for _ in range(int(size * swap_frac)):
157            x, y = np.random.randint(0, size, 2)
158            a[x], a[y] = a[y], a[x]
159        return a
160
161    @staticmethod
162    @memoize
163    def sorted_block(size, dtype, block_size):
164        """
165        Returns an array with blocks that are all sorted.
166        """
167        a = np.arange(size, dtype=dtype)
168        b = []
169        if size < block_size:
170            return a
171        block_num = size // block_size
172        for i in range(block_num):
173            b.extend(a[i::block_num])
174        return np.array(b)
175
176    @classmethod
177    @memoize
178    def random_unsorted_area(cls, size, dtype, frac, area_size=None):
179        """
180        This type of array has random unsorted areas such that they
181        compose the fraction ``frac`` of the original array.
182        """
183        if area_size is None:
184            area_size = cls.AREA_SIZE
185
186        area_num = int(size * frac / area_size)
187        a = np.arange(size, dtype=dtype)
188        for _ in range(area_num):
189            start = np.random.randint(size-area_size)
190            end = start + area_size
191            np.random.shuffle(a[start:end])
192        return a
193
194    @classmethod
195    @memoize
196    def random_bubble(cls, size, dtype, bubble_num, bubble_size=None):
197        """
198        This type of array has ``bubble_num`` random unsorted areas.
199        """
200        if bubble_size is None:
201            bubble_size = cls.BUBBLE_SIZE
202        frac = bubble_size * bubble_num / size
203
204        return cls.random_unsorted_area(size, dtype, frac, bubble_size)
205
206
207class Sort(Benchmark):
208    """
209    This benchmark tests sorting performance with several
210    different types of arrays that are likely to appear in
211    real-world applications.
212    """
213    params = [
214        # In NumPy 1.17 and newer, 'merge' can be one of several
215        # stable sorts, it isn't necessarily merge sort.
216        ['quick', 'merge', 'heap'],
217        ['float64', 'int64', 'int16'],
218        [
219            ('random',),
220            ('ordered',),
221            ('reversed',),
222            ('uniform',),
223            ('sorted_block', 10),
224            ('sorted_block', 100),
225            ('sorted_block', 1000),
226            # ('swapped_pair', 0.01),
227            # ('swapped_pair', 0.1),
228            # ('swapped_pair', 0.5),
229            # ('random_unsorted_area', 0.5),
230            # ('random_unsorted_area', 0.1),
231            # ('random_unsorted_area', 0.01),
232            # ('random_bubble', 1),
233            # ('random_bubble', 5),
234            # ('random_bubble', 10),
235        ],
236    ]
237    param_names = ['kind', 'dtype', 'array_type']
238
239    # The size of the benchmarked arrays.
240    ARRAY_SIZE = 10000
241
242    def setup(self, kind, dtype, array_type):
243        np.random.seed(1234)
244        array_class = array_type[0]
245        self.arr = getattr(SortGenerator, array_class)(self.ARRAY_SIZE, dtype, *array_type[1:])
246
247    def time_sort(self, kind, dtype, array_type):
248        # Using np.sort(...) instead of arr.sort(...) because it makes a copy.
249        # This is important because the data is prepared once per benchmark, but
250        # used across multiple runs.
251        np.sort(self.arr, kind=kind)
252
253    def time_argsort(self, kind, dtype, array_type):
254        np.argsort(self.arr, kind=kind)
255
256
257class SortWorst(Benchmark):
258    def setup(self):
259        # quicksort median of 3 worst case
260        self.worst = np.arange(1000000)
261        x = self.worst
262        while x.size > 3:
263            mid = x.size // 2
264            x[mid], x[-2] = x[-2], x[mid]
265            x = x[:-2]
266
267    def time_sort_worst(self):
268        np.sort(self.worst)
269
270    # Retain old benchmark name for backward compatibility
271    time_sort_worst.benchmark_name = "bench_function_base.Sort.time_sort_worst"
272
273
274class Where(Benchmark):
275    def setup(self):
276        self.d = np.arange(20000)
277        self.e = self.d.copy()
278        self.cond = (self.d > 5000)
279
280    def time_1(self):
281        np.where(self.cond)
282
283    def time_2(self):
284        np.where(self.cond, self.d, self.e)
285
286    def time_2_broadcast(self):
287        np.where(self.cond, self.d, 0)
288