1from .common import Benchmark 2 3import numpy as np 4 5 6class Histogram1D(Benchmark): 7 def setup(self): 8 self.d = np.linspace(0, 100, 100000) 9 10 def time_full_coverage(self): 11 np.histogram(self.d, 200, (0, 100)) 12 13 def time_small_coverage(self): 14 np.histogram(self.d, 200, (50, 51)) 15 16 def time_fine_binning(self): 17 np.histogram(self.d, 10000, (0, 100)) 18 19 20class Histogram2D(Benchmark): 21 def setup(self): 22 self.d = np.linspace(0, 100, 200000).reshape((-1,2)) 23 24 def time_full_coverage(self): 25 np.histogramdd(self.d, (200, 200), ((0, 100), (0, 100))) 26 27 def time_small_coverage(self): 28 np.histogramdd(self.d, (200, 200), ((50, 51), (50, 51))) 29 30 def time_fine_binning(self): 31 np.histogramdd(self.d, (10000, 10000), ((0, 100), (0, 100))) 32 33 34class Bincount(Benchmark): 35 def setup(self): 36 self.d = np.arange(80000, dtype=np.intp) 37 self.e = self.d.astype(np.float64) 38 39 def time_bincount(self): 40 np.bincount(self.d) 41 42 def time_weights(self): 43 np.bincount(self.d, weights=self.e) 44 45 46class Median(Benchmark): 47 def setup(self): 48 self.e = np.arange(10000, dtype=np.float32) 49 self.o = np.arange(10001, dtype=np.float32) 50 51 def time_even(self): 52 np.median(self.e) 53 54 def time_odd(self): 55 np.median(self.o) 56 57 def time_even_inplace(self): 58 np.median(self.e, overwrite_input=True) 59 60 def time_odd_inplace(self): 61 np.median(self.o, overwrite_input=True) 62 63 def time_even_small(self): 64 np.median(self.e[:500], overwrite_input=True) 65 66 def time_odd_small(self): 67 np.median(self.o[:500], overwrite_input=True) 68 69 70class Percentile(Benchmark): 71 def setup(self): 72 self.e = np.arange(10000, dtype=np.float32) 73 self.o = np.arange(10001, dtype=np.float32) 74 75 def time_quartile(self): 76 np.percentile(self.e, [25, 75]) 77 78 def time_percentile(self): 79 np.percentile(self.e, [25, 35, 55, 65, 75]) 80 81 82class Select(Benchmark): 83 def setup(self): 84 self.d = np.arange(20000) 85 self.e = self.d.copy() 86 self.cond = [(self.d > 4), (self.d < 2)] 87 self.cond_large = [(self.d > 4), (self.d < 2)] * 10 88 89 def time_select(self): 90 np.select(self.cond, [self.d, self.e]) 91 92 def time_select_larger(self): 93 np.select(self.cond_large, ([self.d, self.e] * 10)) 94 95 96def memoize(f): 97 _memoized = {} 98 def wrapped(*args): 99 if args not in _memoized: 100 _memoized[args] = f(*args) 101 102 return _memoized[args].copy() 103 104 return f 105 106 107class SortGenerator: 108 # The size of the unsorted area in the "random unsorted area" 109 # benchmarks 110 AREA_SIZE = 100 111 # The size of the "partially ordered" sub-arrays 112 BUBBLE_SIZE = 100 113 114 @staticmethod 115 @memoize 116 def random(size, dtype): 117 """ 118 Returns a randomly-shuffled array. 119 """ 120 arr = np.arange(size, dtype=dtype) 121 np.random.shuffle(arr) 122 return arr 123 124 @staticmethod 125 @memoize 126 def ordered(size, dtype): 127 """ 128 Returns an ordered array. 129 """ 130 return np.arange(size, dtype=dtype) 131 132 @staticmethod 133 @memoize 134 def reversed(size, dtype): 135 """ 136 Returns an array that's in descending order. 137 """ 138 return np.arange(size-1, -1, -1, dtype=dtype) 139 140 @staticmethod 141 @memoize 142 def uniform(size, dtype): 143 """ 144 Returns an array that has the same value everywhere. 145 """ 146 return np.ones(size, dtype=dtype) 147 148 @staticmethod 149 @memoize 150 def swapped_pair(size, dtype, swap_frac): 151 """ 152 Returns an ordered array, but one that has ``swap_frac * size`` 153 pairs swapped. 154 """ 155 a = np.arange(size, dtype=dtype) 156 for _ in range(int(size * swap_frac)): 157 x, y = np.random.randint(0, size, 2) 158 a[x], a[y] = a[y], a[x] 159 return a 160 161 @staticmethod 162 @memoize 163 def sorted_block(size, dtype, block_size): 164 """ 165 Returns an array with blocks that are all sorted. 166 """ 167 a = np.arange(size, dtype=dtype) 168 b = [] 169 if size < block_size: 170 return a 171 block_num = size // block_size 172 for i in range(block_num): 173 b.extend(a[i::block_num]) 174 return np.array(b) 175 176 @classmethod 177 @memoize 178 def random_unsorted_area(cls, size, dtype, frac, area_size=None): 179 """ 180 This type of array has random unsorted areas such that they 181 compose the fraction ``frac`` of the original array. 182 """ 183 if area_size is None: 184 area_size = cls.AREA_SIZE 185 186 area_num = int(size * frac / area_size) 187 a = np.arange(size, dtype=dtype) 188 for _ in range(area_num): 189 start = np.random.randint(size-area_size) 190 end = start + area_size 191 np.random.shuffle(a[start:end]) 192 return a 193 194 @classmethod 195 @memoize 196 def random_bubble(cls, size, dtype, bubble_num, bubble_size=None): 197 """ 198 This type of array has ``bubble_num`` random unsorted areas. 199 """ 200 if bubble_size is None: 201 bubble_size = cls.BUBBLE_SIZE 202 frac = bubble_size * bubble_num / size 203 204 return cls.random_unsorted_area(size, dtype, frac, bubble_size) 205 206 207class Sort(Benchmark): 208 """ 209 This benchmark tests sorting performance with several 210 different types of arrays that are likely to appear in 211 real-world applications. 212 """ 213 params = [ 214 # In NumPy 1.17 and newer, 'merge' can be one of several 215 # stable sorts, it isn't necessarily merge sort. 216 ['quick', 'merge', 'heap'], 217 ['float64', 'int64', 'int16'], 218 [ 219 ('random',), 220 ('ordered',), 221 ('reversed',), 222 ('uniform',), 223 ('sorted_block', 10), 224 ('sorted_block', 100), 225 ('sorted_block', 1000), 226 # ('swapped_pair', 0.01), 227 # ('swapped_pair', 0.1), 228 # ('swapped_pair', 0.5), 229 # ('random_unsorted_area', 0.5), 230 # ('random_unsorted_area', 0.1), 231 # ('random_unsorted_area', 0.01), 232 # ('random_bubble', 1), 233 # ('random_bubble', 5), 234 # ('random_bubble', 10), 235 ], 236 ] 237 param_names = ['kind', 'dtype', 'array_type'] 238 239 # The size of the benchmarked arrays. 240 ARRAY_SIZE = 10000 241 242 def setup(self, kind, dtype, array_type): 243 np.random.seed(1234) 244 array_class = array_type[0] 245 self.arr = getattr(SortGenerator, array_class)(self.ARRAY_SIZE, dtype, *array_type[1:]) 246 247 def time_sort(self, kind, dtype, array_type): 248 # Using np.sort(...) instead of arr.sort(...) because it makes a copy. 249 # This is important because the data is prepared once per benchmark, but 250 # used across multiple runs. 251 np.sort(self.arr, kind=kind) 252 253 def time_argsort(self, kind, dtype, array_type): 254 np.argsort(self.arr, kind=kind) 255 256 257class SortWorst(Benchmark): 258 def setup(self): 259 # quicksort median of 3 worst case 260 self.worst = np.arange(1000000) 261 x = self.worst 262 while x.size > 3: 263 mid = x.size // 2 264 x[mid], x[-2] = x[-2], x[mid] 265 x = x[:-2] 266 267 def time_sort_worst(self): 268 np.sort(self.worst) 269 270 # Retain old benchmark name for backward compatibility 271 time_sort_worst.benchmark_name = "bench_function_base.Sort.time_sort_worst" 272 273 274class Where(Benchmark): 275 def setup(self): 276 self.d = np.arange(20000) 277 self.e = self.d.copy() 278 self.cond = (self.d > 5000) 279 280 def time_1(self): 281 np.where(self.cond) 282 283 def time_2(self): 284 np.where(self.cond, self.d, self.e) 285 286 def time_2_broadcast(self): 287 np.where(self.cond, self.d, 0) 288