1 #ifndef SQL_SORT_INCLUDED
2 #define SQL_SORT_INCLUDED
3
4 /* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26 #include "my_global.h" /* uchar */
27 #include "my_base.h" /* ha_rows */
28 #include "sql_array.h"
29 #include "mysql_com.h"
30 #include "filesort_utils.h"
31 #include "sql_alloc.h"
32 #include <string.h> /* memset */
33 #include <vector>
34
35 class Field;
36 class Item;
37 struct TABLE;
38 class Filesort;
39
40 /* Defines used by filesort and uniques */
41
42 #define MERGEBUFF 7
43 #define MERGEBUFF2 15
44
45 /* Structs used when sorting */
46
47 struct st_sort_field {
48 Field *field; /* Field to sort */
49 Item *item; /* Item if not sorting fields */
50 uint length; /* Length of sort field */
51 uint suffix_length; /* Length suffix (0-4) */
52 Item_result result_type; /* Type of item */
53 enum_field_types field_type; /* Field type of the field or item */
54 bool reverse; /* if descending sort */
55 bool need_strxnfrm; /* If we have to use strxnfrm() */
56 };
57
58
59 /**
60 The structure Sort_addon_field describes the layout
61 for field values appended to sorted values in records to be sorted
62 in the sort buffer.
63
64 Null bit maps for the appended values is placed before the values
65 themselves. Offsets are from the last sorted field.
66
67 The structure is used to store values of the additional fields
68 in the sort buffer. It is used also when these values are read
69 from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
70 */
71
72 struct Sort_addon_field {/* Sort addon packed field */
73 Field *field; /* Original field */
74 uint offset; /* Offset from the last sorted field */
75 uint null_offset; /* Offset to to null bit from the last sorted field */
76 uint max_length; /* Maximum length in the sort buffer */
77 uint8 null_bit; /* Null bit mask for the field */
78 };
79
80 struct Merge_chunk_compare_context
81 {
82 qsort_cmp2 key_compare;
83 const void *key_compare_arg;
84 };
85
86 /**
87 Descriptor for a merge chunk to be sort-merged.
88 A merge chunk is a sequence of pre-sorted records, written to a
89 temporary file. A Merge_chunk instance describes where this chunk is stored
90 in the file, and where it is located when it is in memory.
91
92 It is a POD because
93 - we read/write them from/to files.
94
95 We have accessors (getters/setters) for all struct members.
96 */
97 struct Merge_chunk
98 {
99 public:
Merge_chunkMerge_chunk100 Merge_chunk()
101 : m_current_key(NULL),
102 m_file_position(0),
103 m_buffer_start(NULL),
104 m_buffer_end(NULL),
105 m_rowcount(0),
106 m_mem_count(0),
107 m_max_keys(0)
108 {}
109
file_positionMerge_chunk110 my_off_t file_position() const { return m_file_position; }
set_file_positionMerge_chunk111 void set_file_position(my_off_t val) { m_file_position= val; }
advance_file_positionMerge_chunk112 void advance_file_position(my_off_t val) { m_file_position+= val; }
113
buffer_startMerge_chunk114 uchar *buffer_start() { return m_buffer_start; }
buffer_endMerge_chunk115 const uchar *buffer_end() const { return m_buffer_end; }
116
set_bufferMerge_chunk117 void set_buffer(uchar *start, uchar *end)
118 {
119 m_buffer_start= start;
120 m_buffer_end= end;
121 }
set_buffer_startMerge_chunk122 void set_buffer_start(uchar *start)
123 {
124 m_buffer_start= start;
125 }
set_buffer_endMerge_chunk126 void set_buffer_end(uchar *end)
127 {
128 DBUG_ASSERT(m_buffer_end == NULL || end <= m_buffer_end);
129 m_buffer_end= end;
130 }
131
init_current_keyMerge_chunk132 void init_current_key() { m_current_key= m_buffer_start; }
current_keyMerge_chunk133 uchar *current_key() { return m_current_key; }
advance_current_keyMerge_chunk134 void advance_current_key(uint val) { m_current_key+= val; }
135
decrement_rowcountMerge_chunk136 void decrement_rowcount(ha_rows val) { m_rowcount-= val; }
set_rowcountMerge_chunk137 void set_rowcount(ha_rows val) { m_rowcount= val; }
rowcountMerge_chunk138 ha_rows rowcount() const { return m_rowcount; }
139
mem_countMerge_chunk140 ha_rows mem_count() const { return m_mem_count; }
set_mem_countMerge_chunk141 void set_mem_count(ha_rows val) { m_mem_count= val; }
decrement_mem_countMerge_chunk142 ha_rows decrement_mem_count() { return --m_mem_count; }
143
max_keysMerge_chunk144 ha_rows max_keys() const { return m_max_keys; }
set_max_keysMerge_chunk145 void set_max_keys(ha_rows val) { m_max_keys= val; }
146
buffer_sizeMerge_chunk147 size_t buffer_size() const { return m_buffer_end - m_buffer_start; }
148
149 /**
150 Tries to merge *this with *mc, returns true if successful.
151 The assumption is that *this is no longer in use,
152 and the space it has been allocated can be handed over to a
153 buffer which is adjacent to it.
154 */
merge_freed_buffMerge_chunk155 bool merge_freed_buff(Merge_chunk *mc) const
156 {
157 if (mc->m_buffer_end == m_buffer_start)
158 {
159 mc->m_buffer_end= m_buffer_end;
160 mc->m_max_keys+= m_max_keys;
161 return true;
162 }
163 else if (mc->m_buffer_start == m_buffer_end)
164 {
165 mc->m_buffer_start= m_buffer_start;
166 mc->m_max_keys+= m_max_keys;
167 return true;
168 }
169 return false;
170 }
171
172 private:
173 uchar *m_current_key; /// The current key for this chunk.
174 my_off_t m_file_position;/// Current position in the file to be sorted.
175 uchar *m_buffer_start; /// Start of main-memory buffer for this chunk.
176 uchar *m_buffer_end; /// End of main-memory buffer for this chunk.
177 ha_rows m_rowcount; /// Number of unread rows in this chunk.
178 ha_rows m_mem_count; /// Number of rows in the main-memory buffer.
179 ha_rows m_max_keys; /// If we have fixed-size rows:
180 /// max number of rows in buffer.
181 };
182
183 typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;
184 typedef Bounds_checked_array<Merge_chunk> Merge_chunk_array;
185
186 /**
187 This class wraps information about usage of addon fields.
188 An Addon_fields object is used both during packing of data in the filesort
189 buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
190
191 @see documentation for the Sort_addon_field struct.
192 @see documentation for get_addon_fields()
193 */
194 class Addon_fields {
195 public:
Addon_fields(Addon_fields_array arr)196 Addon_fields(Addon_fields_array arr)
197 : m_field_descriptors(arr),
198 m_addon_buf(NULL),
199 m_addon_buf_length(0),
200 m_using_packed_addons(false)
201 {
202 DBUG_ASSERT(!arr.is_null());
203 }
204
begin()205 Sort_addon_field *begin() { return m_field_descriptors.begin(); }
end()206 Sort_addon_field *end() { return m_field_descriptors.end(); }
num_field_descriptors()207 size_t num_field_descriptors() const { return m_field_descriptors.size(); }
208
209 /// rr_unpack_from_tempfile needs an extra buffer when unpacking.
allocate_addon_buf(uint sz)210 uchar *allocate_addon_buf(uint sz)
211 {
212 if (m_addon_buf != NULL)
213 {
214 DBUG_ASSERT(m_addon_buf_length == sz);
215 return m_addon_buf;
216 }
217 m_addon_buf= static_cast<uchar*>(sql_alloc(sz));
218 if (m_addon_buf)
219 m_addon_buf_length= sz;
220 return m_addon_buf;
221 }
222
get_addon_buf()223 uchar *get_addon_buf() { return m_addon_buf; }
get_addon_buf_length()224 uint get_addon_buf_length() const { return m_addon_buf_length; }
225
set_using_packed_addons(bool val)226 void set_using_packed_addons(bool val)
227 {
228 m_using_packed_addons= val;
229 }
230
using_packed_addons()231 bool using_packed_addons() const
232 {
233 return m_using_packed_addons;
234 }
235
can_pack_addon_fields(uint record_length)236 static bool can_pack_addon_fields(uint record_length)
237 {
238 return (record_length <= (0xFFFF));
239 }
240
241 /**
242 @returns Total number of bytes used for packed addon fields.
243 the size of the length field + size of null bits + sum of field sizes.
244 */
read_addon_length(uchar * p)245 static uint read_addon_length(uchar *p)
246 {
247 return size_of_length_field + uint2korr(p);
248 }
249
250 /**
251 Stores the number of bytes used for packed addon fields.
252 */
store_addon_length(uchar * p,uint sz)253 static void store_addon_length(uchar *p, uint sz)
254 {
255 // We actually store the length of everything *after* the length field.
256 int2store(p, sz - size_of_length_field);
257 }
258
259 static const uint size_of_length_field= 2;
260
261 private:
262 Addon_fields_array m_field_descriptors;
263
264 uchar *m_addon_buf; ///< Buffer for unpacking addon fields.
265 uint m_addon_buf_length; ///< Length of the buffer.
266 bool m_using_packed_addons; ///< Are we packing the addon fields?
267 };
268
269 /**
270 There are two record formats for sorting:
271 |<key a><key b>...|<rowid>|
272 / sort_length / ref_l /
273
274 or with "addon fields"
275 |<key a><key b>...|<null bits>|<field a><field b>...|
276 / sort_length / addon_length /
277
278 The packed format for "addon fields"
279 |<key a><key b>...|<length>|<null bits>|<field a><field b>...|
280 / sort_length / addon_length /
281
282 <key> Fields are fixed-size, specially encoded with
283 Field::make_sort_key() so we can do byte-by-byte compare.
284 <length> Contains the *actual* packed length (after packing) of
285 everything after the sort keys.
286 The size of the length field is 2 bytes,
287 which should cover most use cases: addon data <= 65535 bytes.
288 This is the same as max record size in MySQL.
289 <null bits> One bit for each nullable field, indicating whether the field
290 is null or not. May have size zero if no fields are nullable.
291 <field xx> Are stored with field->pack(), and retrieved with field->unpack().
292 Addon fields within a record are stored consecutively, with no
293 "holes" or padding. They will have zero size for NULL values.
294
295 */
296 class Sort_param {
297 public:
298 uint rec_length; // Length of sorted records.
299 uint sort_length; // Length of sorted columns.
300 uint ref_length; // Length of record ref.
301 uint addon_length; // Length of added packed fields.
302 uint res_length; // Length of records in final sorted file/buffer.
303 uint max_keys_per_buffer; // Max keys / buffer.
304 ha_rows max_rows; // Select limit, or HA_POS_ERROR if unlimited.
305 ha_rows examined_rows; // Number of examined rows.
306 TABLE *sort_form; // For quicker make_sortkey.
307 bool use_hash; // Whether to use hash to distinguish cut JSON
308
309 /**
310 ORDER BY list with some precalculated info for filesort.
311 Array is created and owned by a Filesort instance.
312 */
313 Bounds_checked_array<st_sort_field> local_sortorder;
314
315 Addon_fields *addon_fields; ///< Descriptors for addon fields.
316 uchar *unique_buff;
317 bool not_killable;
318 bool using_pq;
319 char* tmp_buffer;
320
321 // The fields below are used only by Unique class.
322 Merge_chunk_compare_context cmp_context;
323 typedef int (*chunk_compare_fun)(Merge_chunk_compare_context* ctx,
324 uchar* arg1, uchar* arg2);
325 chunk_compare_fun compare;
326
Sort_param()327 Sort_param()
328 {
329 memset(this, 0, sizeof(*this));
330 }
331 /**
332 Initialize this struct for filesort() usage.
333 @see description of record layout above.
334 @param [in,out] file_sort Sorting information which may be re-used on
335 subsequent invocations of filesort().
336 @param sortlen Length of sorted columns.
337 @param table Table to be sorted.
338 @param max_length_for_sort_data From thd->variables.
339 @param maxrows HA_POS_ERROR or possible LIMIT value.
340 @param sort_positions @see documentation for the filesort() function.
341 */
342 void init_for_filesort(Filesort *file_sort,
343 uint sortlen, TABLE *table,
344 ulong max_length_for_sort_data,
345 ha_rows maxrows, bool sort_positions);
346
347 /// Enables the packing of addons if possible.
348 void try_to_pack_addons(ulong max_length_for_sort_data);
349
350 /// Are we packing the "addon fields"?
using_packed_addons()351 bool using_packed_addons() const
352 {
353 DBUG_ASSERT(m_using_packed_addons ==
354 (addon_fields != NULL && addon_fields->using_packed_addons()));
355 return m_using_packed_addons;
356 }
357
358 /// Are we using "addon fields"?
using_addon_fields()359 bool using_addon_fields() const
360 {
361 return addon_fields != NULL;
362 }
363
364 /**
365 Stores key fields in *to.
366 Then appends either *ref_pos (the <rowid>) or the "addon fields".
367 @param to out Where to store the result
368 @param ref_pos in Where to find the <rowid>
369 @returns Number of bytes stored.
370 */
371 uint make_sortkey(uchar *to, const uchar *ref_pos);
372
373 /// @returns The number of bytes used for sorting.
compare_length()374 size_t compare_length() const {
375 return sort_length;
376 }
377
378 /**
379 Getter for record length and result length.
380 @param record_start Pointer to record.
381 @param [out] recl Store record length here.
382 @param [out] resl Store result length here.
383 */
get_rec_and_res_len(uchar * record_start,uint * recl,uint * resl)384 void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl)
385 {
386 if (!using_packed_addons())
387 {
388 *recl= rec_length;
389 *resl= res_length;
390 return;
391 }
392 uchar *plen= record_start + sort_length;
393 *resl= Addon_fields::read_addon_length(plen);
394 DBUG_ASSERT(*resl <= res_length);
395 const uchar *record_end= plen + *resl;
396 *recl= static_cast<uint>(record_end - record_start);
397 }
398
399 private:
400 uint m_packable_length; ///< total length of fields which have a packable type
401 bool m_using_packed_addons; ///< caches the value of using_packed_addons()
402
403 // Not copyable.
404 Sort_param(const Sort_param&);
405 Sort_param &operator=(const Sort_param&);
406 };
407
408
409 /**
410 A class wrapping misc buffers used for sorting.
411 It is part of 'struct TABLE' which is still initialized using memset(),
412 so do not add any virtual functions to this class.
413 */
414 class Filesort_info
415 {
416 /// Buffer for sorting keys.
417 Filesort_buffer filesort_buffer;
418
419 public:
420 IO_CACHE *io_cache; ///< If sorted through filesort
421 Merge_chunk_array merge_chunks; ///< Array of chunk descriptors
422
423 Addon_fields *addon_fields; ///< Addon field descriptors.
424
425 /**
426 If the entire result of filesort fits in memory, we skip the merge phase.
427 We may leave the result in filesort_buffer
428 (indicated by sorted_result_in_fsbuf), or we may strip away
429 the sort keys, and copy the sorted result into a new buffer.
430 This new buffer is [sorted_result ... sorted_result_end]
431 @see save_index()
432 */
433 bool sorted_result_in_fsbuf;
434 uchar *sorted_result;
435 uchar *sorted_result_end;
436
437 ha_rows found_records; ///< How many records in sort.
438
439 // Note that we use the default copy CTOR / assignment operator in filesort().
Filesort_info()440 Filesort_info()
441 : sorted_result_in_fsbuf(false),
442 sorted_result(NULL), sorted_result_end(NULL)
443 {};
444
has_filesort_result_in_memory()445 bool has_filesort_result_in_memory() const
446 {
447 return sorted_result || sorted_result_in_fsbuf;
448 }
449
has_filesort_result()450 bool has_filesort_result() const
451 {
452 return has_filesort_result_in_memory() ||
453 (io_cache && my_b_inited(io_cache));
454 }
455
456 /** Sort filesort_buffer */
sort_buffer(Sort_param * param,uint count)457 void sort_buffer(Sort_param *param, uint count)
458 { filesort_buffer.sort_buffer(param, count); }
459
460 /**
461 Copies (unpacks) values appended to sorted fields from a buffer back to
462 their regular positions specified by the Field::ptr pointers.
463 @param buff Buffer which to unpack the value from
464 */
465 template<bool Packed_addon_fields>
466 inline void unpack_addon_fields(uchar *buff);
467
468 /**
469 Reads 'count' number of chunk descriptors into the merge_chunks array.
470 In case of error, the merge_chunks array will be empty.
471 @param chunk_file File containing the descriptors.
472 @param count Number of chunks to read.
473 */
474 void read_chunk_descriptors(IO_CACHE* chunk_file, uint count);
475
476 /// Are we using "addon fields"?
using_addon_fields()477 bool using_addon_fields() const
478 {
479 return addon_fields != NULL;
480 }
481
482 /// Are we using "packed addon fields"?
using_packed_addons()483 bool using_packed_addons() const
484 {
485 return addon_fields != NULL && addon_fields->using_packed_addons();
486 }
487
488 /**
489 Accessors for filesort_buffer (@see Filesort_buffer for documentation).
490 */
space_used_for_data()491 size_t space_used_for_data() const
492 { return filesort_buffer.space_used_for_data(); }
493
isfull()494 bool isfull() const
495 { return filesort_buffer.isfull(); }
496
init_next_record_pointer()497 void init_next_record_pointer()
498 { filesort_buffer.init_next_record_pointer(); }
499
get_next_record_pointer()500 uchar *get_next_record_pointer()
501 { return filesort_buffer.get_next_record_pointer(); }
502
adjust_next_record_pointer(uint32 val)503 void adjust_next_record_pointer(uint32 val)
504 { filesort_buffer.adjust_next_record_pointer(val); }
505
get_sorted_record(uint idx)506 uchar* get_sorted_record(uint idx)
507 { return filesort_buffer.get_sorted_record(idx); }
508
get_sort_keys()509 uchar **get_sort_keys()
510 { return filesort_buffer.get_sort_keys(); }
511
get_raw_buf()512 Bounds_checked_array<uchar> get_raw_buf()
513 { return filesort_buffer.get_raw_buf(); }
514
alloc_sort_buffer(uint num_records,uint record_length)515 uchar *alloc_sort_buffer(uint num_records, uint record_length)
516 { return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
517
free_sort_buffer()518 void free_sort_buffer()
519 { filesort_buffer.free_sort_buffer(); }
520
init_record_pointers()521 void init_record_pointers()
522 { filesort_buffer.init_record_pointers(); }
523
sort_buffer_size()524 size_t sort_buffer_size() const
525 { return filesort_buffer.sort_buffer_size(); }
526
get_sort_length()527 uint get_sort_length() const
528 { return filesort_buffer.get_sort_length(); }
529
set_sort_length(uint val)530 void set_sort_length(uint val)
531 { filesort_buffer.set_sort_length(val); }
532 };
533
534 typedef Bounds_checked_array<uchar> Sort_buffer;
535
536 int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer,
537 Merge_chunk_array chunk_array,
538 size_t *num_chunks, IO_CACHE *t_file);
539 uint read_to_buffer(IO_CACHE *fromfile, Merge_chunk *merge_chunk,
540 Sort_param *param);
541 int merge_buffers(Sort_param *param,IO_CACHE *from_file,
542 IO_CACHE *to_file, Sort_buffer sort_buffer,
543 Merge_chunk *lastbuff,
544 Merge_chunk_array chunk_array,
545 int flag);
546
547 /**
548 Put all room used by freed buffer to use in adjacent buffer.
549
550 Note, that we can't simply distribute memory evenly between all buffers,
551 because new areas must not overlap with old ones.
552 */
553 template<typename Heap_type>
reuse_freed_buff(Merge_chunk * old_top,Heap_type * heap)554 void reuse_freed_buff(Merge_chunk *old_top, Heap_type *heap)
555 {
556 typename Heap_type::iterator it= heap->begin();
557 typename Heap_type::iterator end= heap->end();
558 for (; it != end; ++it)
559 {
560 if (old_top->merge_freed_buff(*it))
561 return;
562 }
563 DBUG_ASSERT(0);
564 }
565
566 #endif /* SQL_SORT_INCLUDED */
567