1 #ifndef SQL_SORT_INCLUDED
2 #define SQL_SORT_INCLUDED
3 
4 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License, version 2.0,
8    as published by the Free Software Foundation.
9 
10    This program is also distributed with certain software (including
11    but not limited to OpenSSL) that is licensed under separate terms,
12    as designated in a particular file or component or in included license
13    documentation.  The authors of MySQL hereby grant you an additional
14    permission to link the program and your derivative works with the
15    separately licensed software that they have included with MySQL.
16 
17    This program is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License, version 2.0, for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
25 
26 #include "my_global.h"                          /* uchar */
27 #include "my_base.h"                            /* ha_rows */
28 #include "sql_array.h"
29 #include "mysql_com.h"
30 #include "filesort_utils.h"
31 #include "sql_alloc.h"
32 #include <string.h>                             /* memset */
33 #include <vector>
34 
35 class Field;
36 class Item;
37 struct TABLE;
38 class Filesort;
39 
40 /* Defines used by filesort and uniques */
41 
42 #define MERGEBUFF		7
43 #define MERGEBUFF2		15
44 
45 /* Structs used when sorting */
46 
47 struct st_sort_field {
48   Field *field;				/* Field to sort */
49   Item	*item;				/* Item if not sorting fields */
50   uint	 length;			/* Length of sort field */
51   uint   suffix_length;                 /* Length suffix (0-4) */
52   Item_result result_type;		/* Type of item */
53   enum_field_types field_type;          /* Field type of the field or item */
54   bool reverse;				/* if descending sort */
55   bool need_strxnfrm;			/* If we have to use strxnfrm() */
56 };
57 
58 
59 /**
60   The structure Sort_addon_field describes the layout
61   for field values appended to sorted values in records to be sorted
62   in the sort buffer.
63 
64   Null bit maps for the appended values is placed before the values
65   themselves. Offsets are from the last sorted field.
66 
67   The structure is used to store values of the additional fields
68   in the sort buffer. It is used also when these values are read
69   from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
70 */
71 
72 struct Sort_addon_field {/* Sort addon packed field */
73   Field *field;          /* Original field */
74   uint   offset;         /* Offset from the last sorted field */
75   uint   null_offset;    /* Offset to to null bit from the last sorted field */
76   uint   max_length;     /* Maximum length in the sort buffer */
77   uint8  null_bit;       /* Null bit mask for the field */
78 };
79 
80 struct Merge_chunk_compare_context
81 {
82   qsort_cmp2 key_compare;
83   const void *key_compare_arg;
84 };
85 
86 /**
87   Descriptor for a merge chunk to be sort-merged.
88   A merge chunk is a sequence of pre-sorted records, written to a
89   temporary file. A Merge_chunk instance describes where this chunk is stored
90   in the file, and where it is located when it is in memory.
91 
92   It is a POD because
93    - we read/write them from/to files.
94 
95   We have accessors (getters/setters) for all struct members.
96  */
97 struct Merge_chunk
98 {
99 public:
Merge_chunkMerge_chunk100   Merge_chunk()
101     : m_current_key(NULL),
102       m_file_position(0),
103       m_buffer_start(NULL),
104       m_buffer_end(NULL),
105       m_rowcount(0),
106       m_mem_count(0),
107       m_max_keys(0)
108   {}
109 
file_positionMerge_chunk110   my_off_t file_position() const { return m_file_position; }
set_file_positionMerge_chunk111   void set_file_position(my_off_t val) { m_file_position= val; }
advance_file_positionMerge_chunk112   void advance_file_position(my_off_t val) { m_file_position+= val; }
113 
buffer_startMerge_chunk114   uchar *buffer_start() { return m_buffer_start; }
buffer_endMerge_chunk115   const uchar *buffer_end() const { return m_buffer_end; }
116 
set_bufferMerge_chunk117   void set_buffer(uchar *start, uchar *end)
118   {
119     m_buffer_start= start;
120     m_buffer_end= end;
121   }
set_buffer_startMerge_chunk122   void set_buffer_start(uchar *start)
123   {
124     m_buffer_start= start;
125   }
set_buffer_endMerge_chunk126   void set_buffer_end(uchar *end)
127   {
128     DBUG_ASSERT(m_buffer_end == NULL || end <= m_buffer_end);
129     m_buffer_end= end;
130   }
131 
init_current_keyMerge_chunk132   void init_current_key() { m_current_key= m_buffer_start; }
current_keyMerge_chunk133   uchar *current_key() { return m_current_key; }
advance_current_keyMerge_chunk134   void advance_current_key(uint val) { m_current_key+= val; }
135 
decrement_rowcountMerge_chunk136   void decrement_rowcount(ha_rows val) { m_rowcount-= val; }
set_rowcountMerge_chunk137   void set_rowcount(ha_rows val)       { m_rowcount= val; }
rowcountMerge_chunk138   ha_rows rowcount() const             { return m_rowcount; }
139 
mem_countMerge_chunk140   ha_rows mem_count() const { return m_mem_count; }
set_mem_countMerge_chunk141   void set_mem_count(ha_rows val) { m_mem_count= val; }
decrement_mem_countMerge_chunk142   ha_rows decrement_mem_count() { return --m_mem_count; }
143 
max_keysMerge_chunk144   ha_rows max_keys() const { return m_max_keys; }
set_max_keysMerge_chunk145   void set_max_keys(ha_rows val) { m_max_keys= val; }
146 
buffer_sizeMerge_chunk147   size_t  buffer_size() const { return m_buffer_end - m_buffer_start; }
148 
149   /**
150     Tries to merge *this with *mc, returns true if successful.
151     The assumption is that *this is no longer in use,
152     and the space it has been allocated can be handed over to a
153     buffer which is adjacent to it.
154    */
merge_freed_buffMerge_chunk155   bool merge_freed_buff(Merge_chunk *mc) const
156   {
157     if (mc->m_buffer_end == m_buffer_start)
158     {
159       mc->m_buffer_end= m_buffer_end;
160       mc->m_max_keys+= m_max_keys;
161       return true;
162     }
163     else if (mc->m_buffer_start == m_buffer_end)
164     {
165       mc->m_buffer_start= m_buffer_start;
166       mc->m_max_keys+= m_max_keys;
167       return true;
168     }
169     return false;
170   }
171 
172 private:
173   uchar   *m_current_key;  /// The current key for this chunk.
174   my_off_t m_file_position;/// Current position in the file to be sorted.
175   uchar   *m_buffer_start; /// Start of main-memory buffer for this chunk.
176   uchar   *m_buffer_end;   /// End of main-memory buffer for this chunk.
177   ha_rows  m_rowcount;     /// Number of unread rows in this chunk.
178   ha_rows  m_mem_count;    /// Number of rows in the main-memory buffer.
179   ha_rows  m_max_keys;     /// If we have fixed-size rows:
180                            ///    max number of rows in buffer.
181 };
182 
183 typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;
184 typedef Bounds_checked_array<Merge_chunk>      Merge_chunk_array;
185 
186 /**
187   This class wraps information about usage of addon fields.
188   An Addon_fields object is used both during packing of data in the filesort
189   buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
190 
191   @see documentation for the Sort_addon_field struct.
192   @see documentation for get_addon_fields()
193  */
194 class Addon_fields {
195 public:
Addon_fields(Addon_fields_array arr)196   Addon_fields(Addon_fields_array arr)
197     : m_field_descriptors(arr),
198       m_addon_buf(NULL),
199       m_addon_buf_length(0),
200       m_using_packed_addons(false)
201   {
202     DBUG_ASSERT(!arr.is_null());
203   }
204 
begin()205   Sort_addon_field *begin() { return m_field_descriptors.begin(); }
end()206   Sort_addon_field *end()   { return m_field_descriptors.end(); }
num_field_descriptors()207   size_t num_field_descriptors() const { return m_field_descriptors.size(); }
208 
209   /// rr_unpack_from_tempfile needs an extra buffer when unpacking.
allocate_addon_buf(uint sz)210   uchar *allocate_addon_buf(uint sz)
211   {
212     if (m_addon_buf != NULL)
213     {
214       DBUG_ASSERT(m_addon_buf_length == sz);
215       return m_addon_buf;
216     }
217     m_addon_buf= static_cast<uchar*>(sql_alloc(sz));
218     if (m_addon_buf)
219       m_addon_buf_length= sz;
220     return m_addon_buf;
221   }
222 
get_addon_buf()223   uchar *get_addon_buf()              { return m_addon_buf; }
get_addon_buf_length()224   uint   get_addon_buf_length() const { return m_addon_buf_length; }
225 
set_using_packed_addons(bool val)226   void set_using_packed_addons(bool val)
227   {
228     m_using_packed_addons= val;
229   }
230 
using_packed_addons()231   bool using_packed_addons() const
232   {
233     return m_using_packed_addons;
234   }
235 
can_pack_addon_fields(uint record_length)236   static bool can_pack_addon_fields(uint record_length)
237   {
238     return (record_length <= (0xFFFF));
239   }
240 
241   /**
242     @returns Total number of bytes used for packed addon fields.
243     the size of the length field + size of null bits + sum of field sizes.
244    */
read_addon_length(uchar * p)245   static uint read_addon_length(uchar *p)
246   {
247     return size_of_length_field + uint2korr(p);
248   }
249 
250   /**
251     Stores the number of bytes used for packed addon fields.
252    */
store_addon_length(uchar * p,uint sz)253   static void store_addon_length(uchar *p, uint sz)
254   {
255     // We actually store the length of everything *after* the length field.
256     int2store(p, sz - size_of_length_field);
257   }
258 
259   static const uint size_of_length_field= 2;
260 
261 private:
262   Addon_fields_array m_field_descriptors;
263 
264   uchar    *m_addon_buf;            ///< Buffer for unpacking addon fields.
265   uint      m_addon_buf_length;     ///< Length of the buffer.
266   bool      m_using_packed_addons;  ///< Are we packing the addon fields?
267 };
268 
269 /**
270   There are two record formats for sorting:
271     |<key a><key b>...|<rowid>|
272     /  sort_length    / ref_l /
273 
274   or with "addon fields"
275     |<key a><key b>...|<null bits>|<field a><field b>...|
276     /  sort_length    /         addon_length            /
277 
278   The packed format for "addon fields"
279     |<key a><key b>...|<length>|<null bits>|<field a><field b>...|
280     /  sort_length    /         addon_length                     /
281 
282   <key>       Fields are fixed-size, specially encoded with
283               Field::make_sort_key() so we can do byte-by-byte compare.
284   <length>    Contains the *actual* packed length (after packing) of
285               everything after the sort keys.
286               The size of the length field is 2 bytes,
287               which should cover most use cases: addon data <= 65535 bytes.
288               This is the same as max record size in MySQL.
289   <null bits> One bit for each nullable field, indicating whether the field
290               is null or not. May have size zero if no fields are nullable.
291   <field xx>  Are stored with field->pack(), and retrieved with field->unpack().
292               Addon fields within a record are stored consecutively, with no
293               "holes" or padding. They will have zero size for NULL values.
294 
295  */
296 class Sort_param {
297 public:
298   uint rec_length;            // Length of sorted records.
299   uint sort_length;           // Length of sorted columns.
300   uint ref_length;            // Length of record ref.
301   uint addon_length;          // Length of added packed fields.
302   uint res_length;            // Length of records in final sorted file/buffer.
303   uint max_keys_per_buffer;   // Max keys / buffer.
304   ha_rows max_rows;           // Select limit, or HA_POS_ERROR if unlimited.
305   ha_rows examined_rows;      // Number of examined rows.
306   TABLE *sort_form;           // For quicker make_sortkey.
307   bool use_hash;              // Whether to use hash to distinguish cut JSON
308 
309   /**
310     ORDER BY list with some precalculated info for filesort.
311     Array is created and owned by a Filesort instance.
312    */
313   Bounds_checked_array<st_sort_field> local_sortorder;
314 
315   Addon_fields *addon_fields; ///< Descriptors for addon fields.
316   uchar *unique_buff;
317   bool not_killable;
318   bool using_pq;
319   char* tmp_buffer;
320 
321   // The fields below are used only by Unique class.
322   Merge_chunk_compare_context cmp_context;
323   typedef int (*chunk_compare_fun)(Merge_chunk_compare_context* ctx,
324                                    uchar* arg1, uchar* arg2);
325   chunk_compare_fun compare;
326 
Sort_param()327   Sort_param()
328   {
329     memset(this, 0, sizeof(*this));
330   }
331   /**
332     Initialize this struct for filesort() usage.
333     @see description of record layout above.
334     @param [in,out] file_sort Sorting information which may be re-used on
335                               subsequent invocations of filesort().
336     @param sortlen   Length of sorted columns.
337     @param table     Table to be sorted.
338     @param max_length_for_sort_data From thd->variables.
339     @param maxrows   HA_POS_ERROR or possible LIMIT value.
340     @param sort_positions @see documentation for the filesort() function.
341   */
342   void init_for_filesort(Filesort *file_sort,
343                          uint sortlen, TABLE *table,
344                          ulong max_length_for_sort_data,
345                          ha_rows maxrows, bool sort_positions);
346 
347   /// Enables the packing of addons if possible.
348   void try_to_pack_addons(ulong max_length_for_sort_data);
349 
350   /// Are we packing the "addon fields"?
using_packed_addons()351   bool using_packed_addons() const
352   {
353     DBUG_ASSERT(m_using_packed_addons ==
354                 (addon_fields != NULL && addon_fields->using_packed_addons()));
355     return m_using_packed_addons;
356   }
357 
358   /// Are we using "addon fields"?
using_addon_fields()359   bool using_addon_fields() const
360   {
361     return addon_fields != NULL;
362   }
363 
364   /**
365     Stores key fields in *to.
366     Then appends either *ref_pos (the <rowid>) or the "addon fields".
367     @param  to      out Where to store the result
368     @param  ref_pos in  Where to find the <rowid>
369     @returns Number of bytes stored.
370    */
371   uint make_sortkey(uchar *to, const uchar *ref_pos);
372 
373   /// @returns The number of bytes used for sorting.
compare_length()374   size_t compare_length() const {
375     return sort_length;
376   }
377 
378   /**
379     Getter for record length and result length.
380     @param record_start Pointer to record.
381     @param [out] recl   Store record length here.
382     @param [out] resl   Store result length here.
383    */
get_rec_and_res_len(uchar * record_start,uint * recl,uint * resl)384   void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl)
385   {
386     if (!using_packed_addons())
387     {
388       *recl= rec_length;
389       *resl= res_length;
390       return;
391     }
392     uchar *plen= record_start + sort_length;
393     *resl= Addon_fields::read_addon_length(plen);
394     DBUG_ASSERT(*resl <= res_length);
395     const uchar *record_end= plen + *resl;
396     *recl= static_cast<uint>(record_end - record_start);
397   }
398 
399 private:
400   uint m_packable_length;     ///< total length of fields which have a packable type
401   bool m_using_packed_addons; ///< caches the value of using_packed_addons()
402 
403   // Not copyable.
404   Sort_param(const Sort_param&);
405   Sort_param &operator=(const Sort_param&);
406 };
407 
408 
409 /**
410   A class wrapping misc buffers used for sorting.
411   It is part of 'struct TABLE' which is still initialized using memset(),
412   so do not add any virtual functions to this class.
413  */
414 class Filesort_info
415 {
416   /// Buffer for sorting keys.
417   Filesort_buffer filesort_buffer;
418 
419 public:
420   IO_CACHE *io_cache;             ///< If sorted through filesort
421   Merge_chunk_array merge_chunks; ///< Array of chunk descriptors
422 
423   Addon_fields *addon_fields;     ///< Addon field descriptors.
424 
425   /**
426     If the entire result of filesort fits in memory, we skip the merge phase.
427     We may leave the result in filesort_buffer
428     (indicated by sorted_result_in_fsbuf), or we may strip away
429     the sort keys, and copy the sorted result into a new buffer.
430     This new buffer is [sorted_result ... sorted_result_end]
431     @see save_index()
432    */
433   bool      sorted_result_in_fsbuf;
434   uchar     *sorted_result;
435   uchar     *sorted_result_end;
436 
437   ha_rows   found_records;        ///< How many records in sort.
438 
439   // Note that we use the default copy CTOR / assignment operator in filesort().
Filesort_info()440   Filesort_info()
441     : sorted_result_in_fsbuf(false),
442       sorted_result(NULL), sorted_result_end(NULL)
443   {};
444 
has_filesort_result_in_memory()445   bool has_filesort_result_in_memory() const
446   {
447     return sorted_result || sorted_result_in_fsbuf;
448   }
449 
has_filesort_result()450   bool has_filesort_result() const
451   {
452     return has_filesort_result_in_memory() ||
453       (io_cache && my_b_inited(io_cache));
454   }
455 
456   /** Sort filesort_buffer */
sort_buffer(Sort_param * param,uint count)457   void sort_buffer(Sort_param *param, uint count)
458   { filesort_buffer.sort_buffer(param, count); }
459 
460   /**
461     Copies (unpacks) values appended to sorted fields from a buffer back to
462     their regular positions specified by the Field::ptr pointers.
463     @param buff            Buffer which to unpack the value from
464   */
465   template<bool Packed_addon_fields>
466     inline void unpack_addon_fields(uchar *buff);
467 
468   /**
469     Reads 'count' number of chunk descriptors into the merge_chunks array.
470     In case of error, the merge_chunks array will be empty.
471     @param chunk_file File containing the descriptors.
472     @param count      Number of chunks to read.
473   */
474   void read_chunk_descriptors(IO_CACHE* chunk_file, uint count);
475 
476   /// Are we using "addon fields"?
using_addon_fields()477   bool using_addon_fields() const
478   {
479     return addon_fields != NULL;
480   }
481 
482   /// Are we using "packed addon fields"?
using_packed_addons()483   bool using_packed_addons() const
484   {
485     return addon_fields != NULL && addon_fields->using_packed_addons();
486   }
487 
488   /**
489     Accessors for filesort_buffer (@see Filesort_buffer for documentation).
490   */
space_used_for_data()491   size_t space_used_for_data() const
492   { return filesort_buffer.space_used_for_data(); }
493 
isfull()494   bool isfull() const
495   { return filesort_buffer.isfull(); }
496 
init_next_record_pointer()497   void init_next_record_pointer()
498   { filesort_buffer.init_next_record_pointer(); }
499 
get_next_record_pointer()500   uchar *get_next_record_pointer()
501   { return filesort_buffer.get_next_record_pointer(); }
502 
adjust_next_record_pointer(uint32 val)503   void adjust_next_record_pointer(uint32 val)
504   { filesort_buffer.adjust_next_record_pointer(val); }
505 
get_sorted_record(uint idx)506   uchar* get_sorted_record(uint idx)
507   { return filesort_buffer.get_sorted_record(idx); }
508 
get_sort_keys()509   uchar **get_sort_keys()
510   { return filesort_buffer.get_sort_keys(); }
511 
get_raw_buf()512   Bounds_checked_array<uchar> get_raw_buf()
513   { return filesort_buffer.get_raw_buf(); }
514 
alloc_sort_buffer(uint num_records,uint record_length)515   uchar *alloc_sort_buffer(uint num_records, uint record_length)
516   { return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
517 
free_sort_buffer()518   void free_sort_buffer()
519   { filesort_buffer.free_sort_buffer(); }
520 
init_record_pointers()521   void init_record_pointers()
522   { filesort_buffer.init_record_pointers(); }
523 
sort_buffer_size()524   size_t sort_buffer_size() const
525   { return filesort_buffer.sort_buffer_size(); }
526 
get_sort_length()527   uint get_sort_length() const
528   { return filesort_buffer.get_sort_length(); }
529 
set_sort_length(uint val)530   void set_sort_length(uint val)
531   { filesort_buffer.set_sort_length(val); }
532 };
533 
534 typedef Bounds_checked_array<uchar> Sort_buffer;
535 
536 int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer,
537 		    Merge_chunk_array chunk_array,
538 		    size_t *num_chunks, IO_CACHE *t_file);
539 uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
540                     Sort_param *param);
541 int merge_buffers(Sort_param *param,IO_CACHE *from_file,
542                   IO_CACHE *to_file, Sort_buffer sort_buffer,
543                   Merge_chunk *lastbuff,
544                   Merge_chunk_array chunk_array,
545                   int flag);
546 
547 /**
548   Put all room used by freed buffer to use in adjacent buffer.
549 
550   Note, that we can't simply distribute memory evenly between all buffers,
551   because new areas must not overlap with old ones.
552 */
553 template<typename Heap_type>
reuse_freed_buff(Merge_chunk * old_top,Heap_type * heap)554 void reuse_freed_buff(Merge_chunk *old_top, Heap_type *heap)
555 {
556   typename Heap_type::iterator it= heap->begin();
557   typename Heap_type::iterator end= heap->end();
558   for (; it != end; ++it)
559   {
560     if (old_top->merge_freed_buff(*it))
561       return;
562   }
563   DBUG_ASSERT(0);
564 }
565 
566 #endif /* SQL_SORT_INCLUDED */
567