1 /* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 // First include (the generated) my_config.h, to get correct platform defines.
24 #include "my_config.h"
25 #include <gtest/gtest.h>
26
27 #include "filesort_utils.h"
28
29 #include <algorithm>
30 #include <memory>
31 #include <vector>
32
33 namespace filesort_compare_unittest {
34
35 /*
36 Below are some performance microbenchmarks in order to compare our sorting
37 options:
38 my_qsort2 - requires no extra memory, uses insert sort on small ranges,
39 uses quicksort on larger ranges
40 radixsort - requires extra memory: array of n pointers,
41 seems to be quite fast on intel *when it is appliccable*:
42 if (size <= 20 && items >= 1000 && items < 100000)
43 std::sort - requires no extra memory,
44 typically implemented with introsort/insertion sort
45 std::stable_sort - requires extra memory: array of n pointers,
46 typically implemented with mergesort
47
48 The record format for filesort is constructed in such a way that we can
49 compare records byte-by-byte, without knowing the data types.
50 Nullable fields (maybe_null()) are pre-pended with an extra byte.
51 If we are sorting in descending mode, all the bytes are simply flipped.
52
53 This means that any variant of memcmp() can be used for comparing record.
54 Below we test different variants, including memcmp() itself.
55 */
56
57 // A simple helper function to determine array size.
58 template <class T, int size>
array_size(const T (&)[size])59 int array_size(const T (&)[size])
60 {
61 return size;
62 }
63
bytes_to_int(const uchar * s)64 inline int bytes_to_int(const uchar *s)
65 {
66 int val;
67 longget(val, s);
68 return val ^ 0x80000000;
69 }
70
int_to_bytes(uchar * s,int val)71 inline void int_to_bytes(uchar *s, int val)
72 {
73 val= val ^ 0x80000000;
74 longstore(s, val);
75 }
76
77
TEST(BufferAlignmentTest,IntsToBytesToInt)78 TEST(BufferAlignmentTest, IntsToBytesToInt)
79 {
80 uchar buf[10];
81 memset(buf, 0, sizeof(buf));
82 for (int ix= 0; ix < 6; ++ix)
83 {
84 int test_data[]= { INT_MIN32, -42, -1, 0, 1, 42, INT_MAX32 };
85 for (int iy= 0; iy < array_size(test_data); ++iy)
86 {
87 int val= test_data[iy];
88 int_to_bytes(buf+ix, val);
89 EXPECT_EQ(val, bytes_to_int(buf+ix));
90 }
91 }
92 }
93
94
95 class FileSortCompareTest : public ::testing::Test
96 {
97 protected:
98 // Do each sort algorithm this many times. Increase value for benchmarking!
99 static const int num_iterations= 1;
100 // Number of records.
101 static const int num_records= 100 * 1000;
102 // Number of keys in each record.
103 static const int keys_per_record= 4;
104 // Size of each record.
105 static const int record_size= keys_per_record * sizeof(int);
106
107 // Static buffer containing data to be sorted.
108 // (actually: we only sort the sort_keys below, data is stable).
109 static std::vector<int> test_data;
110
SetUpTestCase()111 static void SetUpTestCase()
112 {
113 test_data.reserve(num_records * keys_per_record);
114 union { int val; uchar buf[sizeof(int)]; } sort_str;
115
116 for (int ix= 0; ix < num_records * keys_per_record; ++ix)
117 {
118 int val= ix / (10 * keys_per_record);
119 if (ix % 10 == 0) val= -val;
120 int_to_bytes(sort_str.buf, val);
121 test_data.push_back(sort_str.val);
122 }
123 // Comment away shuffling for testing partially pre-sorted data.
124 std::random_shuffle(test_data.begin(), test_data.end());
125 }
126
TearDownTestCase()127 static void TearDownTestCase()
128 {
129 // Delete the data now, rather than during exit().
130 std::vector<int>().swap(test_data);
131 }
132
SetUp()133 virtual void SetUp()
134 {
135 sort_keys= new uchar* [num_records];
136 for (int ix= 0; ix < num_records; ++ix)
137 sort_keys[ix]=
138 static_cast<uchar*>(static_cast<void*>(&test_data[keys_per_record*ix]));
139 }
140
TearDown()141 virtual void TearDown()
142 {
143 delete[] sort_keys;
144 }
145
146 uchar **sort_keys;
147 };
148 std::vector<int> FileSortCompareTest::test_data;
149
150 /*
151 Some different mem_compare functions.
152 The first one seems to win on all platforms, except sparc,
153 where the builtin memcmp() wins.
154 */
mem_compare_1(const uchar * s1,const uchar * s2,size_t len)155 inline bool mem_compare_1(const uchar *s1, const uchar *s2, size_t len)
156 {
157 do {
158 if (*s1++ != *s2++)
159 return *--s1 < *--s2;
160 } while (--len != 0);
161 return false;
162 }
163
mem_compare_2(const uchar * s1,const uchar * s2,size_t len)164 inline bool mem_compare_2(const uchar *s1, const uchar *s2, size_t len)
165 {
166 int v= 0;
167 while (len-- > 0 && v == 0)
168 {
169 v= *(s1++) - *(s2++);
170 }
171 return v < 0;
172 }
173
mem_compare_3(const uchar * s1,const uchar * s2,size_t len)174 inline bool mem_compare_3(const uchar *s1, const uchar *s2, size_t len)
175 {
176 while (--len && (s1[0] == s2[0]))
177 {
178 ++s1; ++s2;
179 }
180 return s1[0] < s2[0];
181 }
182
183 #if defined(__WIN__)
184 #pragma intrinsic(memcmp)
185 #endif
186 // For gcc, __builtin_memcmp is actually *slower* than the library call:
187 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
188
189
190 class Mem_compare_memcmp :
191 public std::binary_function<const uchar*, const uchar*, bool>
192 {
193 public:
Mem_compare_memcmp(size_t n)194 Mem_compare_memcmp(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)195 bool operator()(const uchar *s1, const uchar *s2)
196 {
197 return memcmp(s1, s2, m_size) < 0;
198 }
199 size_t m_size;
200 };
201
202
203 class Mem_compare_1 :
204 public std::binary_function<const uchar*, const uchar*, bool>
205 {
206 public:
Mem_compare_1(size_t n)207 Mem_compare_1(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)208 bool operator()(const uchar *s1, const uchar *s2)
209 {
210 return mem_compare_1(s1, s2, m_size);
211 }
212 size_t m_size;
213 };
214
215
216 class Mem_compare_2 :
217 public std::binary_function<const uchar*, const uchar*, bool>
218 {
219 public:
Mem_compare_2(size_t n)220 Mem_compare_2(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)221 bool operator()(const uchar *s1, const uchar *s2)
222 {
223 return mem_compare_2(s1, s2, m_size);
224 }
225 size_t m_size;
226 };
227
228
229 class Mem_compare_3 :
230 public std::binary_function<const uchar*, const uchar*, bool>
231 {
232 public:
Mem_compare_3(size_t n)233 Mem_compare_3(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)234 bool operator()(const uchar *s1, const uchar *s2)
235 {
236 return mem_compare_3(s1, s2, m_size);
237 }
238 size_t m_size;
239 };
240
241
242 #define COMPARE(N) if (s1[N] != s2[N]) return s1[N] < s2[N]
243
244 class Mem_compare_0 :
245 public std::binary_function<const uchar*, const uchar*, bool>
246 {
247 public:
Mem_compare_0(size_t n)248 Mem_compare_0(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)249 bool operator()(const uchar *s1, const uchar *s2)
250 {
251 size_t len= m_size;
252 while(len > 0)
253 {
254 COMPARE(0);
255 COMPARE(1);
256 COMPARE(2);
257 COMPARE(3);
258 len-= 4;
259 s1 += 4;
260 s2 += 4;
261 }
262 return false;
263 }
264 size_t m_size;
265 };
266
267
268 // This one works for any number of keys.
269 // We treat the first key as int, the rest byte-by-byte.
270 class Mem_compare_int :
271 public std::binary_function<const uchar*, const uchar*, bool>
272 {
273 public:
Mem_compare_int(size_t n)274 Mem_compare_int(size_t n) : m_size(n), rest(n - sizeof(int)) {}
operator ()(const uchar * s1,const uchar * s2)275 bool operator()(const uchar *s1, const uchar *s2)
276 {
277 int int1= bytes_to_int(s1);
278 int int2= bytes_to_int(s2);
279 if (int1 == int2)
280 return mem_compare_1(s1 + rest, s2 + rest, rest);
281 return int1 < int2;
282 }
283 private:
284 size_t m_size;
285 const size_t rest;
286 };
287
288 class Mem_compare_int_4 :
289 public std::binary_function<const uchar*, const uchar*, bool>
290 {
291 public:
Mem_compare_int_4(size_t)292 Mem_compare_int_4(size_t) : keyno(1) {}
operator ()(const uchar * s1,const uchar * s2)293 bool operator() (const uchar *s1, const uchar *s2)
294 {
295 int inta1= bytes_to_int(s1);
296 int intb1= bytes_to_int(s2);
297 if (keyno < 4 && inta1 == intb1)
298 {
299 ++keyno;
300 return operator()(s1 + sizeof(int), s2 + sizeof(int));
301 }
302 return inta1 < intb1;
303 }
304 int keyno;
305 };
306
307 /*
308 Several sorting tests below, each one runs num_iterations.
309 For each iteration we take a copy of the key pointers, and sort the copy.
310 Most of the tests below are run with std::sort and std::stable_sort.
311 Stable sort seems to be faster for all test cases, on all platforms.
312 */
TEST_F(FileSortCompareTest,SetUpOnly)313 TEST_F(FileSortCompareTest, SetUpOnly)
314 {
315 for (int ix= 0; ix < num_iterations; ++ix)
316 {
317 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
318 }
319 }
320
TEST_F(FileSortCompareTest,RadixSort)321 TEST_F(FileSortCompareTest, RadixSort)
322 {
323 for (int ix= 0; ix < num_iterations; ++ix)
324 {
325 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
326 std::pair<uchar**, ptrdiff_t> buffer=
327 std::get_temporary_buffer<uchar*>(num_records);
328 radixsort_for_str_ptr(&keys[0], num_records, record_size, buffer.first);
329 std::return_temporary_buffer(buffer.first);
330 }
331 }
332
TEST_F(FileSortCompareTest,MyQsort)333 TEST_F(FileSortCompareTest, MyQsort)
334 {
335 size_t size= record_size;
336 for (int ix= 0; ix < num_iterations; ++ix)
337 {
338 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
339 my_qsort2((uchar*) &keys[0], num_records, sizeof(uchar*),
340 get_ptr_compare(record_size), &size);
341 }
342 }
343
TEST_F(FileSortCompareTest,StdSortmemcmp)344 TEST_F(FileSortCompareTest, StdSortmemcmp)
345 {
346 for (int ix= 0; ix < num_iterations; ++ix)
347 {
348 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
349 std::sort(keys.begin(), keys.end(), Mem_compare_memcmp(record_size));
350 }
351 }
352
TEST_F(FileSortCompareTest,StdStableSortmemcmp)353 TEST_F(FileSortCompareTest, StdStableSortmemcmp)
354 {
355 for (int ix= 0; ix < num_iterations; ++ix)
356 {
357 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
358 std::stable_sort(keys.begin(), keys.end(),
359 Mem_compare_memcmp(record_size));
360 }
361 }
362
TEST_F(FileSortCompareTest,StdSortCompare1)363 TEST_F(FileSortCompareTest, StdSortCompare1)
364 {
365 for (int ix= 0; ix < num_iterations; ++ix)
366 {
367 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
368 std::sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
369 }
370 }
371
TEST_F(FileSortCompareTest,StdStableSortCompare1)372 TEST_F(FileSortCompareTest, StdStableSortCompare1)
373 {
374 for (int ix= 0; ix < num_iterations; ++ix)
375 {
376 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
377 std::stable_sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
378 }
379 }
380
TEST_F(FileSortCompareTest,StdSortCompare2)381 TEST_F(FileSortCompareTest, StdSortCompare2)
382 {
383 for (int ix= 0; ix < num_iterations; ++ix)
384 {
385 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
386 std::sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
387 }
388 }
389
TEST_F(FileSortCompareTest,StdStableSortCompare2)390 TEST_F(FileSortCompareTest, StdStableSortCompare2)
391 {
392 for (int ix= 0; ix < num_iterations; ++ix)
393 {
394 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
395 std::stable_sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
396 }
397 }
398
TEST_F(FileSortCompareTest,StdSortCompare3)399 TEST_F(FileSortCompareTest, StdSortCompare3)
400 {
401 for (int ix= 0; ix < num_iterations; ++ix)
402 {
403 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
404 std::sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
405 }
406 }
407
TEST_F(FileSortCompareTest,StdStableSortCompare3)408 TEST_F(FileSortCompareTest, StdStableSortCompare3)
409 {
410 for (int ix= 0; ix < num_iterations; ++ix)
411 {
412 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
413 std::stable_sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
414 }
415 }
416
TEST_F(FileSortCompareTest,StdSortCompare4)417 TEST_F(FileSortCompareTest, StdSortCompare4)
418 {
419 for (int ix= 0; ix < num_iterations; ++ix)
420 {
421 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
422 std::sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
423 }
424 }
425
TEST_F(FileSortCompareTest,StdStableSortCompare4)426 TEST_F(FileSortCompareTest, StdStableSortCompare4)
427 {
428 for (int ix= 0; ix < num_iterations; ++ix)
429 {
430 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
431 std::stable_sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
432 }
433 }
434
TEST_F(FileSortCompareTest,DISABLED_StdSortIntCompare)435 TEST_F(FileSortCompareTest, DISABLED_StdSortIntCompare)
436 {
437 for (int ix= 0; ix < num_iterations; ++ix)
438 {
439 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
440 std::sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
441 }
442 }
443
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntCompare)444 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntCompare)
445 {
446 for (int ix= 0; ix < num_iterations; ++ix)
447 {
448 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
449 std::stable_sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
450 }
451 }
452
TEST_F(FileSortCompareTest,DISABLED_StdSortIntIntIntInt)453 TEST_F(FileSortCompareTest, DISABLED_StdSortIntIntIntInt)
454 {
455 for (int ix= 0; ix < num_iterations; ++ix)
456 {
457 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
458 std::sort(keys.begin(), keys.end(),
459 Mem_compare_int_4(record_size));
460 }
461 }
462
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntIntIntInt)463 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntIntIntInt)
464 {
465 for (int ix= 0; ix < num_iterations; ++ix)
466 {
467 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
468 std::stable_sort(keys.begin(), keys.end(),
469 Mem_compare_int_4(record_size));
470 }
471 }
472
473 } // namespace
474