1 /* Copyright (c) 2012, 2021, Oracle and/or its affiliates.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 // First include (the generated) my_config.h, to get correct platform defines.
24 #include "my_config.h"
25 #include <gtest/gtest.h>
26
27 #include "filesort_utils.h"
28 #include "test_utils.h"
29
30 #include <algorithm>
31 #include <memory>
32 #include <vector>
33
34 namespace filesort_compare_unittest {
35
36 /*
37 Below are some performance microbenchmarks in order to compare our sorting
38 options:
39 my_qsort2 - requires no extra memory, uses insert sort on small ranges,
40 uses quicksort on larger ranges
41 radixsort - requires extra memory: array of n pointers,
42 seems to be quite fast on intel *when it is appliccable*:
43 if (size <= 20 && items >= 1000 && items < 100000)
44 std::sort - requires no extra memory,
45 typically implemented with introsort/insertion sort
46 std::stable_sort - requires extra memory: array of n pointers,
47 typically implemented with mergesort
48
49 The record format for filesort is constructed in such a way that we can
50 compare records byte-by-byte, without knowing the data types.
51 Nullable fields (maybe_null()) are pre-pended with an extra byte.
52 If we are sorting in descending mode, all the bytes are simply flipped.
53
54 This means that any variant of memcmp() can be used for comparing record.
55 Below we test different variants, including memcmp() itself.
56 */
57
58 // A simple helper function to determine array size.
59 template <class T, int size>
array_size(const T (&)[size])60 int array_size(const T (&)[size])
61 {
62 return size;
63 }
64
bytes_to_int(const uchar * s)65 inline int bytes_to_int(const uchar *s)
66 {
67 int val;
68 longget(&val, s);
69 return val ^ 0x80000000;
70 }
71
int_to_bytes(uchar * s,int val)72 inline void int_to_bytes(uchar *s, int val)
73 {
74 val= val ^ 0x80000000;
75 longstore(s, val);
76 }
77
78
TEST(BufferAlignmentTest,IntsToBytesToInt)79 TEST(BufferAlignmentTest, IntsToBytesToInt)
80 {
81 uchar buf[10];
82 memset(buf, 0, sizeof(buf));
83 for (int ix= 0; ix < 6; ++ix)
84 {
85 int test_data[]= { INT_MIN32, -42, -1, 0, 1, 42, INT_MAX32 };
86 for (int iy= 0; iy < array_size(test_data); ++iy)
87 {
88 int val= test_data[iy];
89 int_to_bytes(buf+ix, val);
90 EXPECT_EQ(val, bytes_to_int(buf+ix));
91 }
92 }
93 }
94
95
96 class FileSortCompareTest : public ::testing::Test
97 {
98 protected:
99 // Do each sort algorithm this many times. Increase value for benchmarking!
100 static const int num_iterations= 1;
101 // Number of records.
102 static const int num_records= 100 * 100;
103 // Number of keys in each record.
104 static const int keys_per_record= 4;
105 // Size of each record.
106 static const int record_size= keys_per_record * sizeof(int);
107
108 // Static buffer containing data to be sorted.
109 // (actually: we only sort the sort_keys below, data is stable).
110 static std::vector<int> test_data;
111
SetUpTestCase()112 static void SetUpTestCase()
113 {
114 test_data.reserve(num_records * keys_per_record);
115 union { int val; uchar buf[sizeof(int)]; } sort_str;
116
117 for (int ix= 0; ix < num_records * keys_per_record; ++ix)
118 {
119 int val= ix / (10 * keys_per_record);
120 if (ix % 10 == 0) val= -val;
121 int_to_bytes(sort_str.buf, val);
122 test_data.push_back(sort_str.val);
123 }
124 // Comment away shuffling for testing partially pre-sorted data.
125 // std::random_shuffle(test_data.begin(), test_data.end());
126 }
127
TearDownTestCase()128 static void TearDownTestCase()
129 {
130 // Delete the data now, rather than during exit().
131 std::vector<int>().swap(test_data);
132 }
133
SetUp()134 virtual void SetUp()
135 {
136 sort_keys= new uchar* [num_records];
137 for (int ix= 0; ix < num_records; ++ix)
138 sort_keys[ix]=
139 static_cast<uchar*>(static_cast<void*>(&test_data[keys_per_record*ix]));
140 }
141
TearDown()142 virtual void TearDown()
143 {
144 delete[] sort_keys;
145 }
146
147 uchar **sort_keys;
148 };
149 std::vector<int> FileSortCompareTest::test_data;
150
151 /*
152 Some different mem_compare functions.
153 The first one seems to win on all platforms, except sparc,
154 where the builtin memcmp() wins.
155 */
mem_compare_0(const uchar * s1,const uchar * s2,size_t len)156 inline bool mem_compare_0(const uchar *s1, const uchar *s2, size_t len)
157 {
158 do {
159 if (*s1++ != *s2++)
160 return *--s1 < *--s2;
161 } while (--len != 0);
162 return s1 > s2; // Return false for duplicate keys.
163 }
164
mem_compare_1(const uchar * s1,const uchar * s2,size_t len)165 inline bool mem_compare_1(const uchar *s1, const uchar *s2, size_t len)
166 {
167 do {
168 if (*s1++ != *s2++)
169 return *--s1 < *--s2;
170 } while (--len != 0);
171 return false;
172 }
173
mem_compare_2(const uchar * s1,const uchar * s2,size_t len)174 inline bool mem_compare_2(const uchar *s1, const uchar *s2, size_t len)
175 {
176 int v= 0;
177 while (len-- > 0 && v == 0)
178 {
179 v= *(s1++) - *(s2++);
180 }
181 return v < 0;
182 }
183
mem_compare_3(const uchar * s1,const uchar * s2,size_t len)184 inline bool mem_compare_3(const uchar *s1, const uchar *s2, size_t len)
185 {
186 while (--len && (s1[0] == s2[0]))
187 {
188 ++s1; ++s2;
189 }
190 return s1[0] < s2[0];
191 }
192
193 #if defined(_WIN32)
194 #pragma intrinsic(memcmp)
195 #endif
196 // For gcc, __builtin_memcmp is actually *slower* than the library call:
197 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
198
199
200 class Mem_compare_memcmp :
201 public std::binary_function<const uchar*, const uchar*, bool>
202 {
203 public:
Mem_compare_memcmp(size_t n)204 Mem_compare_memcmp(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)205 bool operator()(const uchar *s1, const uchar *s2)
206 {
207 return memcmp(s1, s2, m_size) < 0;
208 }
209 size_t m_size;
210 };
211
212
213 class Mem_compare_0 :
214 public std::binary_function<const uchar*, const uchar*, bool>
215 {
216 public:
Mem_compare_0(size_t n)217 Mem_compare_0(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)218 bool operator()(const uchar *s1, const uchar *s2)
219 {
220 return mem_compare_0(s1, s2, m_size);
221 }
222 size_t m_size;
223 };
224
225
226 class Mem_compare_1 :
227 public std::binary_function<const uchar*, const uchar*, bool>
228 {
229 public:
Mem_compare_1(size_t n)230 Mem_compare_1(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)231 bool operator()(const uchar *s1, const uchar *s2)
232 {
233 return mem_compare_1(s1, s2, m_size);
234 }
235 size_t m_size;
236 };
237
238
239 class Mem_compare_2 :
240 public std::binary_function<const uchar*, const uchar*, bool>
241 {
242 public:
Mem_compare_2(size_t n)243 Mem_compare_2(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)244 bool operator()(const uchar *s1, const uchar *s2)
245 {
246 return mem_compare_2(s1, s2, m_size);
247 }
248 size_t m_size;
249 };
250
251
252 class Mem_compare_3 :
253 public std::binary_function<const uchar*, const uchar*, bool>
254 {
255 public:
Mem_compare_3(size_t n)256 Mem_compare_3(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)257 bool operator()(const uchar *s1, const uchar *s2)
258 {
259 return mem_compare_3(s1, s2, m_size);
260 }
261 size_t m_size;
262 };
263
264
265 #define COMPARE(N) if (s1[N] != s2[N]) return s1[N] < s2[N]
266
267 class Mem_compare_4 :
268 public std::binary_function<const uchar*, const uchar*, bool>
269 {
270 public:
Mem_compare_4(size_t n)271 Mem_compare_4(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)272 bool operator()(const uchar *s1, const uchar *s2)
273 {
274 size_t len= m_size;
275 while(len > 0)
276 {
277 COMPARE(0);
278 COMPARE(1);
279 COMPARE(2);
280 COMPARE(3);
281 len-= 4;
282 s1 += 4;
283 s2 += 4;
284 }
285 return false;
286 }
287 size_t m_size;
288 };
289
290
291 class Mem_compare_5 :
292 public std::binary_function<const uchar*, const uchar*, bool>
293 {
294 public:
Mem_compare_5(size_t n)295 Mem_compare_5(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)296 bool operator()(const uchar *s1, const uchar *s2)
297 {
298 COMPARE(0);
299 COMPARE(1);
300 COMPARE(2);
301 COMPARE(3);
302 return memcmp(s1 + 4, s2 + 4, m_size - 4) < 0;
303 }
304 size_t m_size;
305 };
306
307
308 // This one works for any number of keys.
309 // We treat the first key as int, the rest byte-by-byte.
310 class Mem_compare_int :
311 public std::binary_function<const uchar*, const uchar*, bool>
312 {
313 public:
Mem_compare_int(size_t n)314 Mem_compare_int(size_t n) : m_size(n), rest(n - sizeof(int)) {}
operator ()(const uchar * s1,const uchar * s2)315 bool operator()(const uchar *s1, const uchar *s2)
316 {
317 int int1= bytes_to_int(s1);
318 int int2= bytes_to_int(s2);
319 if (int1 == int2)
320 return mem_compare_1(s1 + rest, s2 + rest, rest);
321 return int1 < int2;
322 }
323 private:
324 size_t m_size;
325 const size_t rest;
326 };
327
328 class Mem_compare_int_4 :
329 public std::binary_function<const uchar*, const uchar*, bool>
330 {
331 public:
Mem_compare_int_4(size_t)332 Mem_compare_int_4(size_t) : keyno(1) {}
operator ()(const uchar * s1,const uchar * s2)333 bool operator() (const uchar *s1, const uchar *s2)
334 {
335 int inta1= bytes_to_int(s1);
336 int intb1= bytes_to_int(s2);
337 if (keyno < 4 && inta1 == intb1)
338 {
339 ++keyno;
340 return operator()(s1 + sizeof(int), s2 + sizeof(int));
341 }
342 return inta1 < intb1;
343 }
344 int keyno;
345 };
346
347 /*
348 Several sorting tests below, each one runs num_iterations.
349 For each iteration we take a copy of the key pointers, and sort the copy.
350 Most of the tests below are run with std::sort and std::stable_sort.
351 Stable sort seems to be faster for all test cases, on all platforms.
352 */
TEST_F(FileSortCompareTest,SetUpOnly)353 TEST_F(FileSortCompareTest, SetUpOnly)
354 {
355 for (int ix= 0; ix < num_iterations; ++ix)
356 {
357 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
358 }
359 }
360
361 // Disabled because radixsort takes forever when benchmarking.
TEST_F(FileSortCompareTest,DISABLED_RadixSort)362 TEST_F(FileSortCompareTest, DISABLED_RadixSort)
363 {
364 for (int ix= 0; ix < num_iterations; ++ix)
365 {
366 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
367 std::pair<uchar**, ptrdiff_t> buffer=
368 std::get_temporary_buffer<uchar*>(num_records);
369 radixsort_for_str_ptr(&keys[0], num_records, record_size, buffer.first);
370 std::return_temporary_buffer(buffer.first);
371 }
372 }
373
TEST_F(FileSortCompareTest,MyQsort)374 TEST_F(FileSortCompareTest, MyQsort)
375 {
376 size_t size= record_size;
377 for (int ix= 0; ix < num_iterations; ++ix)
378 {
379 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
380 my_qsort2((uchar*) &keys[0], num_records, sizeof(uchar*),
381 my_testing::get_ptr_compare(record_size), &size);
382 }
383 }
384
TEST_F(FileSortCompareTest,StdSortmemcmp)385 TEST_F(FileSortCompareTest, StdSortmemcmp)
386 {
387 for (int ix= 0; ix < num_iterations; ++ix)
388 {
389 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
390 std::sort(keys.begin(), keys.end(), Mem_compare_memcmp(record_size));
391 }
392 }
393
TEST_F(FileSortCompareTest,StdStableSortmemcmp)394 TEST_F(FileSortCompareTest, StdStableSortmemcmp)
395 {
396 for (int ix= 0; ix < num_iterations; ++ix)
397 {
398 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
399 std::stable_sort(keys.begin(), keys.end(),
400 Mem_compare_memcmp(record_size));
401 }
402 }
403
TEST_F(FileSortCompareTest,StdSortCompare0)404 TEST_F(FileSortCompareTest, StdSortCompare0)
405 {
406 for (int ix= 0; ix < num_iterations; ++ix)
407 {
408 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
409 std::sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
410 }
411 }
412
TEST_F(FileSortCompareTest,StdStableSortCompare0)413 TEST_F(FileSortCompareTest, StdStableSortCompare0)
414 {
415 for (int ix= 0; ix < num_iterations; ++ix)
416 {
417 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
418 std::stable_sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
419 }
420 }
421
TEST_F(FileSortCompareTest,StdSortCompare1)422 TEST_F(FileSortCompareTest, StdSortCompare1)
423 {
424 for (int ix= 0; ix < num_iterations; ++ix)
425 {
426 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
427 std::sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
428 }
429 }
430
TEST_F(FileSortCompareTest,StdStableSortCompare1)431 TEST_F(FileSortCompareTest, StdStableSortCompare1)
432 {
433 for (int ix= 0; ix < num_iterations; ++ix)
434 {
435 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
436 std::stable_sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
437 }
438 }
439
TEST_F(FileSortCompareTest,StdSortCompare2)440 TEST_F(FileSortCompareTest, StdSortCompare2)
441 {
442 for (int ix= 0; ix < num_iterations; ++ix)
443 {
444 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
445 std::sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
446 }
447 }
448
TEST_F(FileSortCompareTest,StdStableSortCompare2)449 TEST_F(FileSortCompareTest, StdStableSortCompare2)
450 {
451 for (int ix= 0; ix < num_iterations; ++ix)
452 {
453 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
454 std::stable_sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
455 }
456 }
457
TEST_F(FileSortCompareTest,StdSortCompare3)458 TEST_F(FileSortCompareTest, StdSortCompare3)
459 {
460 for (int ix= 0; ix < num_iterations; ++ix)
461 {
462 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
463 std::sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
464 }
465 }
466
TEST_F(FileSortCompareTest,StdStableSortCompare3)467 TEST_F(FileSortCompareTest, StdStableSortCompare3)
468 {
469 for (int ix= 0; ix < num_iterations; ++ix)
470 {
471 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
472 std::stable_sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
473 }
474 }
475
TEST_F(FileSortCompareTest,StdSortCompare4)476 TEST_F(FileSortCompareTest, StdSortCompare4)
477 {
478 for (int ix= 0; ix < num_iterations; ++ix)
479 {
480 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
481 std::sort(keys.begin(), keys.end(), Mem_compare_4(record_size));
482 }
483 }
484
TEST_F(FileSortCompareTest,StdStableSortCompare4)485 TEST_F(FileSortCompareTest, StdStableSortCompare4)
486 {
487 for (int ix= 0; ix < num_iterations; ++ix)
488 {
489 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
490 std::stable_sort(keys.begin(), keys.end(), Mem_compare_4(record_size));
491 }
492 }
493
TEST_F(FileSortCompareTest,StdSortCompare5)494 TEST_F(FileSortCompareTest, StdSortCompare5)
495 {
496 for (int ix= 0; ix < num_iterations; ++ix)
497 {
498 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
499 std::sort(keys.begin(), keys.end(), Mem_compare_5(record_size));
500 }
501 }
502
TEST_F(FileSortCompareTest,StdStableSortCompare5)503 TEST_F(FileSortCompareTest, StdStableSortCompare5)
504 {
505 for (int ix= 0; ix < num_iterations; ++ix)
506 {
507 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
508 std::stable_sort(keys.begin(), keys.end(), Mem_compare_5(record_size));
509 }
510 }
511
512 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdSortIntCompare)513 TEST_F(FileSortCompareTest, DISABLED_StdSortIntCompare)
514 {
515 for (int ix= 0; ix < num_iterations; ++ix)
516 {
517 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
518 std::sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
519 }
520 }
521
522 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntCompare)523 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntCompare)
524 {
525 for (int ix= 0; ix < num_iterations; ++ix)
526 {
527 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
528 std::stable_sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
529 }
530 }
531
532 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdSortIntIntIntInt)533 TEST_F(FileSortCompareTest, DISABLED_StdSortIntIntIntInt)
534 {
535 for (int ix= 0; ix < num_iterations; ++ix)
536 {
537 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
538 std::sort(keys.begin(), keys.end(),
539 Mem_compare_int_4(record_size));
540 }
541 }
542
543 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntIntIntInt)544 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntIntIntInt)
545 {
546 for (int ix= 0; ix < num_iterations; ++ix)
547 {
548 std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
549 std::stable_sort(keys.begin(), keys.end(),
550 Mem_compare_int_4(record_size));
551 }
552 }
553
554 } // namespace
555