1 /* Copyright (c) 2012, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
22 
23 // First include (the generated) my_config.h, to get correct platform defines.
24 #include "my_config.h"
25 #include <gtest/gtest.h>
26 
27 #include "filesort_utils.h"
28 #include "test_utils.h"
29 
30 #include <algorithm>
31 #include <memory>
32 #include <vector>
33 
34 namespace filesort_compare_unittest {
35 
36 /*
37   Below are some performance microbenchmarks in order to compare our sorting
38   options:
39   my_qsort2        - requires no extra memory, uses insert sort on small ranges,
40                      uses quicksort on larger ranges
41   radixsort -        requires extra memory: array of n pointers,
42                      seems to be quite fast on intel *when it is appliccable*:
43                      if (size <= 20 && items >= 1000 && items < 100000)
44   std::sort -        requires no extra memory,
45                      typically implemented with introsort/insertion sort
46   std::stable_sort - requires extra memory: array of n pointers,
47                      typically implemented with mergesort
48 
49   The record format for filesort is constructed in such a way that we can
50   compare records byte-by-byte, without knowing the data types.
51   Nullable fields (maybe_null()) are pre-pended with an extra byte.
52   If we are sorting in descending mode, all the bytes are simply flipped.
53 
54   This means that any variant of memcmp() can be used for comparing record.
55   Below we test different variants, including memcmp() itself.
56 */
57 
58 // A simple helper function to determine array size.
59 template <class T, int size>
array_size(const T (&)[size])60 int array_size(const T (&)[size])
61 {
62   return size;
63 }
64 
bytes_to_int(const uchar * s)65 inline int bytes_to_int(const uchar *s)
66 {
67   int val;
68   longget(&val, s);
69   return val ^ 0x80000000;
70 }
71 
int_to_bytes(uchar * s,int val)72 inline void int_to_bytes(uchar *s, int val)
73 {
74   val= val ^ 0x80000000;
75   longstore(s, val);
76 }
77 
78 
TEST(BufferAlignmentTest,IntsToBytesToInt)79 TEST(BufferAlignmentTest, IntsToBytesToInt)
80 {
81   uchar buf[10];
82   memset(buf, 0, sizeof(buf));
83   for (int ix= 0; ix < 6; ++ix)
84   {
85     int test_data[]= { INT_MIN32, -42, -1, 0, 1, 42, INT_MAX32 };
86     for (int iy= 0; iy < array_size(test_data); ++iy)
87     {
88       int val= test_data[iy];
89       int_to_bytes(buf+ix, val);
90       EXPECT_EQ(val, bytes_to_int(buf+ix));
91     }
92   }
93 }
94 
95 
96 class FileSortCompareTest : public ::testing::Test
97 {
98 protected:
99   // Do each sort algorithm this many times. Increase value for benchmarking!
100   static const int num_iterations= 1;
101   // Number of records.
102   static const int num_records= 100 * 100;
103   // Number of keys in each record.
104   static const int keys_per_record= 4;
105   // Size of each record.
106   static const int record_size= keys_per_record * sizeof(int);
107 
108   // Static buffer containing data to be sorted.
109   // (actually: we only sort the sort_keys below, data is stable).
110   static std::vector<int> test_data;
111 
SetUpTestCase()112   static void SetUpTestCase()
113   {
114     test_data.reserve(num_records * keys_per_record);
115     union { int val; uchar buf[sizeof(int)]; } sort_str;
116 
117     for (int ix= 0; ix < num_records * keys_per_record; ++ix)
118     {
119       int val= ix / (10 * keys_per_record);
120       if (ix % 10 == 0) val= -val;
121       int_to_bytes(sort_str.buf, val);
122       test_data.push_back(sort_str.val);
123     }
124     // Comment away shuffling for testing partially pre-sorted data.
125     // std::random_shuffle(test_data.begin(), test_data.end());
126   }
127 
TearDownTestCase()128   static void TearDownTestCase()
129   {
130     // Delete the data now, rather than during exit().
131     std::vector<int>().swap(test_data);
132   }
133 
SetUp()134   virtual void SetUp()
135   {
136     sort_keys= new uchar* [num_records];
137     for (int ix= 0; ix < num_records; ++ix)
138       sort_keys[ix]=
139         static_cast<uchar*>(static_cast<void*>(&test_data[keys_per_record*ix]));
140   }
141 
TearDown()142   virtual void TearDown()
143   {
144     delete[] sort_keys;
145   }
146 
147   uchar **sort_keys;
148 };
149 std::vector<int> FileSortCompareTest::test_data;
150 
151 /*
152   Some different mem_compare functions.
153   The first one seems to win on all platforms, except sparc,
154   where the builtin memcmp() wins.
155  */
mem_compare_0(const uchar * s1,const uchar * s2,size_t len)156 inline bool mem_compare_0(const uchar *s1, const uchar *s2, size_t len)
157 {
158   do {
159     if (*s1++ != *s2++)
160       return *--s1 < *--s2;
161   } while (--len != 0);
162   return s1 > s2; // Return false for duplicate keys.
163 }
164 
mem_compare_1(const uchar * s1,const uchar * s2,size_t len)165 inline bool mem_compare_1(const uchar *s1, const uchar *s2, size_t len)
166 {
167   do {
168     if (*s1++ != *s2++)
169       return *--s1 < *--s2;
170   } while (--len != 0);
171   return false;
172 }
173 
mem_compare_2(const uchar * s1,const uchar * s2,size_t len)174 inline bool mem_compare_2(const uchar *s1, const uchar *s2, size_t len)
175 {
176   int v= 0;
177   while (len-- > 0 && v == 0)
178   {
179     v= *(s1++) - *(s2++);
180   }
181   return v < 0;
182 }
183 
mem_compare_3(const uchar * s1,const uchar * s2,size_t len)184 inline bool mem_compare_3(const uchar *s1, const uchar *s2, size_t len)
185 {
186   while (--len && (s1[0] == s2[0]))
187   {
188     ++s1; ++s2;
189   }
190   return s1[0] < s2[0];
191 }
192 
193 #if defined(_WIN32)
194 #pragma intrinsic(memcmp)
195 #endif
196 // For gcc, __builtin_memcmp is actually *slower* than the library call:
197 // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
198 
199 
200 class Mem_compare_memcmp :
201   public std::binary_function<const uchar*, const uchar*, bool>
202 {
203 public:
Mem_compare_memcmp(size_t n)204   Mem_compare_memcmp(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)205   bool operator()(const uchar *s1, const uchar *s2)
206   {
207     return memcmp(s1, s2, m_size) < 0;
208   }
209   size_t m_size;
210 };
211 
212 
213 class Mem_compare_0 :
214   public std::binary_function<const uchar*, const uchar*, bool>
215 {
216 public:
Mem_compare_0(size_t n)217   Mem_compare_0(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)218   bool operator()(const uchar *s1, const uchar *s2)
219   {
220     return mem_compare_0(s1, s2, m_size);
221   }
222   size_t m_size;
223 };
224 
225 
226 class Mem_compare_1 :
227   public std::binary_function<const uchar*, const uchar*, bool>
228 {
229 public:
Mem_compare_1(size_t n)230   Mem_compare_1(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)231   bool operator()(const uchar *s1, const uchar *s2)
232   {
233     return mem_compare_1(s1, s2, m_size);
234   }
235   size_t m_size;
236 };
237 
238 
239 class Mem_compare_2 :
240   public std::binary_function<const uchar*, const uchar*, bool>
241 {
242 public:
Mem_compare_2(size_t n)243   Mem_compare_2(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)244   bool operator()(const uchar *s1, const uchar *s2)
245   {
246     return mem_compare_2(s1, s2, m_size);
247   }
248   size_t m_size;
249 };
250 
251 
252 class Mem_compare_3 :
253   public std::binary_function<const uchar*, const uchar*, bool>
254 {
255 public:
Mem_compare_3(size_t n)256   Mem_compare_3(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)257   bool operator()(const uchar *s1, const uchar *s2)
258   {
259     return mem_compare_3(s1, s2, m_size);
260   }
261   size_t m_size;
262 };
263 
264 
265 #define COMPARE(N) if (s1[N] != s2[N]) return s1[N] < s2[N]
266 
267 class Mem_compare_4 :
268   public std::binary_function<const uchar*, const uchar*, bool>
269 {
270 public:
Mem_compare_4(size_t n)271   Mem_compare_4(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)272   bool operator()(const uchar *s1, const uchar *s2)
273   {
274     size_t len= m_size;
275     while(len > 0)
276     {
277       COMPARE(0);
278       COMPARE(1);
279       COMPARE(2);
280       COMPARE(3);
281       len-= 4;
282       s1 += 4;
283       s2 += 4;
284     }
285     return false;
286   }
287   size_t m_size;
288 };
289 
290 
291 class Mem_compare_5 :
292   public std::binary_function<const uchar*, const uchar*, bool>
293 {
294 public:
Mem_compare_5(size_t n)295   Mem_compare_5(size_t n) : m_size(n) {}
operator ()(const uchar * s1,const uchar * s2)296   bool operator()(const uchar *s1, const uchar *s2)
297   {
298     COMPARE(0);
299     COMPARE(1);
300     COMPARE(2);
301     COMPARE(3);
302     return memcmp(s1 + 4, s2 + 4, m_size - 4) < 0;
303   }
304   size_t m_size;
305 };
306 
307 
308 // This one works for any number of keys.
309 // We treat the first key as int, the rest byte-by-byte.
310 class Mem_compare_int :
311   public std::binary_function<const uchar*, const uchar*, bool>
312 {
313 public:
Mem_compare_int(size_t n)314   Mem_compare_int(size_t n) : m_size(n), rest(n - sizeof(int)) {}
operator ()(const uchar * s1,const uchar * s2)315   bool operator()(const uchar *s1, const uchar *s2)
316   {
317     int int1= bytes_to_int(s1);
318     int int2= bytes_to_int(s2);
319     if (int1 == int2)
320       return mem_compare_1(s1 + rest, s2 + rest, rest);
321     return int1 < int2;
322   }
323 private:
324   size_t m_size;
325   const size_t rest;
326 };
327 
328 class Mem_compare_int_4 :
329   public std::binary_function<const uchar*, const uchar*, bool>
330 {
331 public:
Mem_compare_int_4(size_t)332   Mem_compare_int_4(size_t) : keyno(1) {}
operator ()(const uchar * s1,const uchar * s2)333   bool operator() (const uchar *s1, const uchar *s2)
334   {
335     int inta1= bytes_to_int(s1);
336     int intb1= bytes_to_int(s2);
337     if (keyno < 4 && inta1 == intb1)
338     {
339       ++keyno;
340       return operator()(s1 + sizeof(int), s2 + sizeof(int));
341     }
342     return inta1 < intb1;
343   }
344   int keyno;
345 };
346 
347 /*
348   Several sorting tests below, each one runs num_iterations.
349   For each iteration we take a copy of the key pointers, and sort the copy.
350   Most of the tests below are run with std::sort and std::stable_sort.
351   Stable sort seems to be faster for all test cases, on all platforms.
352  */
TEST_F(FileSortCompareTest,SetUpOnly)353 TEST_F(FileSortCompareTest, SetUpOnly)
354 {
355   for (int ix= 0; ix < num_iterations; ++ix)
356   {
357     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
358   }
359 }
360 
361 // Disabled because radixsort takes forever when benchmarking.
TEST_F(FileSortCompareTest,DISABLED_RadixSort)362 TEST_F(FileSortCompareTest, DISABLED_RadixSort)
363 {
364   for (int ix= 0; ix < num_iterations; ++ix)
365   {
366     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
367     std::pair<uchar**, ptrdiff_t> buffer=
368       std::get_temporary_buffer<uchar*>(num_records);
369     radixsort_for_str_ptr(&keys[0], num_records, record_size, buffer.first);
370     std::return_temporary_buffer(buffer.first);
371   }
372 }
373 
TEST_F(FileSortCompareTest,MyQsort)374 TEST_F(FileSortCompareTest, MyQsort)
375 {
376   size_t size= record_size;
377   for (int ix= 0; ix < num_iterations; ++ix)
378   {
379     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
380     my_qsort2((uchar*) &keys[0], num_records, sizeof(uchar*),
381               my_testing::get_ptr_compare(record_size), &size);
382   }
383 }
384 
TEST_F(FileSortCompareTest,StdSortmemcmp)385 TEST_F(FileSortCompareTest, StdSortmemcmp)
386 {
387   for (int ix= 0; ix < num_iterations; ++ix)
388   {
389     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
390     std::sort(keys.begin(), keys.end(), Mem_compare_memcmp(record_size));
391   }
392 }
393 
TEST_F(FileSortCompareTest,StdStableSortmemcmp)394 TEST_F(FileSortCompareTest, StdStableSortmemcmp)
395 {
396   for (int ix= 0; ix < num_iterations; ++ix)
397   {
398     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
399     std::stable_sort(keys.begin(), keys.end(),
400                      Mem_compare_memcmp(record_size));
401   }
402 }
403 
TEST_F(FileSortCompareTest,StdSortCompare0)404 TEST_F(FileSortCompareTest, StdSortCompare0)
405 {
406   for (int ix= 0; ix < num_iterations; ++ix)
407   {
408     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
409     std::sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
410   }
411 }
412 
TEST_F(FileSortCompareTest,StdStableSortCompare0)413 TEST_F(FileSortCompareTest, StdStableSortCompare0)
414 {
415   for (int ix= 0; ix < num_iterations; ++ix)
416   {
417     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
418     std::stable_sort(keys.begin(), keys.end(), Mem_compare_0(record_size));
419   }
420 }
421 
TEST_F(FileSortCompareTest,StdSortCompare1)422 TEST_F(FileSortCompareTest, StdSortCompare1)
423 {
424   for (int ix= 0; ix < num_iterations; ++ix)
425   {
426     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
427     std::sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
428   }
429 }
430 
TEST_F(FileSortCompareTest,StdStableSortCompare1)431 TEST_F(FileSortCompareTest, StdStableSortCompare1)
432 {
433   for (int ix= 0; ix < num_iterations; ++ix)
434   {
435     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
436     std::stable_sort(keys.begin(), keys.end(), Mem_compare_1(record_size));
437   }
438 }
439 
TEST_F(FileSortCompareTest,StdSortCompare2)440 TEST_F(FileSortCompareTest, StdSortCompare2)
441 {
442   for (int ix= 0; ix < num_iterations; ++ix)
443   {
444     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
445     std::sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
446   }
447 }
448 
TEST_F(FileSortCompareTest,StdStableSortCompare2)449 TEST_F(FileSortCompareTest, StdStableSortCompare2)
450 {
451   for (int ix= 0; ix < num_iterations; ++ix)
452   {
453     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
454     std::stable_sort(keys.begin(), keys.end(), Mem_compare_2(record_size));
455   }
456 }
457 
TEST_F(FileSortCompareTest,StdSortCompare3)458 TEST_F(FileSortCompareTest, StdSortCompare3)
459 {
460   for (int ix= 0; ix < num_iterations; ++ix)
461   {
462     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
463     std::sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
464   }
465 }
466 
TEST_F(FileSortCompareTest,StdStableSortCompare3)467 TEST_F(FileSortCompareTest, StdStableSortCompare3)
468 {
469   for (int ix= 0; ix < num_iterations; ++ix)
470   {
471     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
472     std::stable_sort(keys.begin(), keys.end(), Mem_compare_3(record_size));
473   }
474 }
475 
TEST_F(FileSortCompareTest,StdSortCompare4)476 TEST_F(FileSortCompareTest, StdSortCompare4)
477 {
478   for (int ix= 0; ix < num_iterations; ++ix)
479   {
480     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
481     std::sort(keys.begin(), keys.end(), Mem_compare_4(record_size));
482   }
483 }
484 
TEST_F(FileSortCompareTest,StdStableSortCompare4)485 TEST_F(FileSortCompareTest, StdStableSortCompare4)
486 {
487   for (int ix= 0; ix < num_iterations; ++ix)
488   {
489     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
490     std::stable_sort(keys.begin(), keys.end(), Mem_compare_4(record_size));
491   }
492 }
493 
TEST_F(FileSortCompareTest,StdSortCompare5)494 TEST_F(FileSortCompareTest, StdSortCompare5)
495 {
496   for (int ix= 0; ix < num_iterations; ++ix)
497   {
498     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
499     std::sort(keys.begin(), keys.end(), Mem_compare_5(record_size));
500   }
501 }
502 
TEST_F(FileSortCompareTest,StdStableSortCompare5)503 TEST_F(FileSortCompareTest, StdStableSortCompare5)
504 {
505   for (int ix= 0; ix < num_iterations; ++ix)
506   {
507     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
508     std::stable_sort(keys.begin(), keys.end(), Mem_compare_5(record_size));
509   }
510 }
511 
512 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdSortIntCompare)513 TEST_F(FileSortCompareTest, DISABLED_StdSortIntCompare)
514 {
515   for (int ix= 0; ix < num_iterations; ++ix)
516   {
517     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
518     std::sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
519   }
520 }
521 
522 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntCompare)523 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntCompare)
524 {
525   for (int ix= 0; ix < num_iterations; ++ix)
526   {
527     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
528     std::stable_sort(keys.begin(), keys.end(), Mem_compare_int(record_size));
529   }
530 }
531 
532 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdSortIntIntIntInt)533 TEST_F(FileSortCompareTest, DISABLED_StdSortIntIntIntInt)
534 {
535   for (int ix= 0; ix < num_iterations; ++ix)
536   {
537     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
538     std::sort(keys.begin(), keys.end(),
539               Mem_compare_int_4(record_size));
540   }
541 }
542 
543 // Disabled: experimental.
TEST_F(FileSortCompareTest,DISABLED_StdStableSortIntIntIntInt)544 TEST_F(FileSortCompareTest, DISABLED_StdStableSortIntIntIntInt)
545 {
546   for (int ix= 0; ix < num_iterations; ++ix)
547   {
548     std::vector<uchar*> keys(sort_keys, sort_keys + num_records);
549     std::stable_sort(keys.begin(), keys.end(),
550                      Mem_compare_int_4(record_size));
551   }
552 }
553 
554 }  // namespace
555