1 /*
2 Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3 Copyright (c) 2018, Percona and/or its affiliates. All rights reserved.
4 Copyright (c) 2009, 2015, MariaDB
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation. The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License, version 2.0, for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with this program; if not, write to the Free Software
24 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
25
26
27 /**
28 @file
29
30 @brief
31 Sorts a database
32 */
33
34 #include "filesort.h"
35 #include <m_ctype.h>
36 #include "sql_sort.h"
37 #include "probes_mysql.h"
38 #include "opt_range.h" // QUICK
39 #include "bounded_queue.h"
40 #include "filesort_utils.h"
41 #include "sql_select.h"
42 #include "debug_sync.h"
43 #include "opt_trace.h"
44 #include "sql_optimizer.h" // JOIN
45 #include "sql_base.h"
46 #include "opt_costmodel.h"
47 #include "priority_queue.h"
48 #include "log.h"
49 #include "item_sum.h" // Item_sum
50 #include "json_dom.h" // Json_wrapper
51 #include "template_utils.h"
52
53 #include "pfs_file_provider.h"
54 #include "mysql/psi/mysql_file.h"
55
56 #include <algorithm>
57 #include <utility>
58 using std::max;
59 using std::min;
60
61 namespace {
62
63 struct Mem_compare
64 {
Mem_compare__anond7565cf50111::Mem_compare65 Mem_compare() : m_compare_length(0) {}
66
Mem_compare__anond7565cf50111::Mem_compare67 Mem_compare(const Mem_compare &that)
68 : m_compare_length(that.m_compare_length)
69 {
70 }
71
operator ()__anond7565cf50111::Mem_compare72 bool operator()(const uchar *s1, const uchar *s2) const
73 {
74 // memcmp(s1, s2, 0) is guaranteed to return zero.
75 return memcmp(s1, s2, m_compare_length) < 0;
76 }
77
78 size_t m_compare_length;
79 };
80 }
81
82
83 /* functions defined in this file */
84
85 static ha_rows find_all_keys(Sort_param *param, QEP_TAB *qep_tab,
86 Filesort_info *fs_info,
87 IO_CACHE *buffer_file,
88 IO_CACHE *chunk_file,
89 Bounded_queue<uchar *, uchar *, Sort_param,
90 Mem_compare> *pq,
91 ha_rows *found_rows);
92 static int write_keys(Sort_param *param, Filesort_info *fs_info,
93 uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
94 static void register_used_fields(Sort_param *param);
95 static int merge_index(Sort_param *param,
96 Sort_buffer sort_buffer,
97 Merge_chunk_array chunk_array,
98 IO_CACHE *tempfile,
99 IO_CACHE *outfile);
100 static bool save_index(Sort_param *param, uint count,
101 Filesort_info *table_sort);
102 static uint suffix_length(ulong string_length);
103
104 static bool check_if_pq_applicable(Opt_trace_context *trace,
105 Sort_param *param, Filesort_info *info,
106 TABLE *table,
107 ha_rows records, ulong memory_available,
108 bool keep_addon_fields);
109
110
init_for_filesort(Filesort * file_sort,uint sortlen,TABLE * table,ulong max_length_for_sort_data,ha_rows maxrows,bool sort_positions)111 void Sort_param::init_for_filesort(Filesort *file_sort,
112 uint sortlen, TABLE *table,
113 ulong max_length_for_sort_data,
114 ha_rows maxrows, bool sort_positions)
115 {
116 assert(max_rows == 0); // function should not be called twice
117 sort_length= sortlen;
118 ref_length= table->file->ref_length;
119 if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
120 !table->fulltext_searched && !sort_positions)
121 {
122 /*
123 Get the descriptors of all fields whose values are appended
124 to sorted fields and get its total length in addon_length.
125 */
126 addon_fields=
127 file_sort->get_addon_fields(max_length_for_sort_data,
128 table->field, sort_length, &addon_length,
129 &m_packable_length);
130 }
131 if (using_addon_fields())
132 {
133 res_length= addon_length;
134 }
135 else
136 {
137 res_length= ref_length;
138 /*
139 The reference to the record is considered
140 as an additional sorted field
141 */
142 sort_length+= ref_length;
143 }
144 /*
145 Add hash at the end of sort key to order cut values correctly.
146 Needed for GROUPing, rather than for ORDERing.
147 */
148 if (use_hash)
149 sort_length+= sizeof(ulonglong);
150
151 rec_length= sort_length + addon_length;
152 max_rows= maxrows;
153 }
154
155
try_to_pack_addons(ulong max_length_for_sort_data)156 void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data)
157 {
158 if (!using_addon_fields() || // no addons, or
159 using_packed_addons()) // already packed
160 return;
161
162 if (!Addon_fields::can_pack_addon_fields(res_length))
163 return;
164
165 const uint sz= Addon_fields::size_of_length_field;
166 if (rec_length + sz > max_length_for_sort_data)
167 return;
168
169 // Heuristic: skip packing if potential savings are less than 10 bytes.
170 if (m_packable_length < (10 + sz))
171 return;
172
173 Addon_fields_array::iterator addonf= addon_fields->begin();
174 for ( ; addonf != addon_fields->end(); ++addonf)
175 {
176 addonf->offset+= sz;
177 addonf->null_offset+= sz;
178 }
179 addon_fields->set_using_packed_addons(true);
180 m_using_packed_addons= true;
181
182 addon_length+= sz;
183 res_length+= sz;
184 rec_length+= sz;
185 }
186
187
trace_filesort_information(Opt_trace_context * trace,const st_sort_field * sortorder,uint s_length)188 static void trace_filesort_information(Opt_trace_context *trace,
189 const st_sort_field *sortorder,
190 uint s_length)
191 {
192 if (!trace->is_started())
193 return;
194
195 Opt_trace_array trace_filesort(trace, "filesort_information");
196 for (; s_length-- ; sortorder++)
197 {
198 Opt_trace_object oto(trace);
199 oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
200
201 if (sortorder->field)
202 {
203 if (strlen(sortorder->field->table->alias) != 0)
204 oto.add_utf8_table(sortorder->field->table->pos_in_table_list);
205 else
206 oto.add_alnum("table", "intermediate_tmp_table");
207 oto.add_alnum("field", sortorder->field->field_name ?
208 sortorder->field->field_name : "tmp_table_column");
209 }
210 else
211 oto.add("expression", sortorder->item);
212 }
213 }
214
215
216 /**
217 Sort a table.
218 Creates a set of pointers that can be used to read the rows
219 in sorted order. This should be done with the functions
220 in records.cc.
221
222 Before calling filesort, one must have done
223 table->file->info(HA_STATUS_VARIABLE)
224
225 The result set is stored in table->sort.io_cache or
226 table->sort.sorted_result, or left in the main filesort buffer.
227
228 @param thd Current thread
229 @param filesort Table and how to sort it
230 @param sort_positions Set to TRUE if we want to force sorting by position
231 (Needed by UPDATE/INSERT or ALTER TABLE or
232 when rowids are required by executor)
233 @param[out] examined_rows Store number of examined rows here
234 This is the number of found rows before
235 applying WHERE condition.
236 @param[out] found_rows Store the number of found rows here.
237 This is the number of found rows after
238 applying WHERE condition.
239 @param[out] returned_rows Number of rows in the result, could be less than
240 found_rows if LIMIT is provided.
241
242 @note
243 If we sort by position (like if sort_positions is 1) filesort() will
244 call table->prepare_for_position().
245
246 @returns False if success, true if error
247 */
248
filesort(THD * thd,Filesort * filesort,bool sort_positions,ha_rows * examined_rows,ha_rows * found_rows,ha_rows * returned_rows)249 bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
250 ha_rows *examined_rows, ha_rows *found_rows,
251 ha_rows *returned_rows)
252 {
253 int error;
254 ulong memory_available= thd->variables.sortbuff_size;
255 size_t num_chunks;
256 ha_rows num_rows= HA_POS_ERROR;
257 IO_CACHE tempfile; // Temporary file for storing intermediate results.
258 IO_CACHE chunk_file; // For saving Merge_chunk structs.
259 IO_CACHE *outfile; // Contains the final, sorted result.
260 Sort_param param;
261 bool multi_byte_charset;
262 Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare>
263 pq((Malloc_allocator<uchar*>
264 (key_memory_Filesort_info_record_pointers)));
265 Opt_trace_context * const trace= &thd->opt_trace;
266 QEP_TAB *const tab= filesort->tab;
267 TABLE *const table= tab->table();
268 ha_rows max_rows= filesort->limit;
269 uint s_length= 0;
270
271 DBUG_ENTER("filesort");
272
273 if (!(s_length= filesort->make_sortorder()))
274 DBUG_RETURN(true); /* purecov: inspected */
275
276 /*
277 We need a nameless wrapper, since we may be inside the "steps" of
278 "join_execution".
279 */
280 Opt_trace_object trace_wrapper(trace);
281 trace_filesort_information(trace, filesort->sortorder, s_length);
282
283 assert(!table->reginfo.join_tab);
284 assert(tab == table->reginfo.qep_tab);
285 Item_subselect *const subselect= tab && tab->join() ?
286 tab->join()->select_lex->master_unit()->item : NULL;
287
288 MYSQL_FILESORT_START(const_cast<char*>(table->s->db.str),
289 const_cast<char*>(table->s->table_name.str));
290 DEBUG_SYNC(thd, "filesort_start");
291
292 /*
293 Release InnoDB's adaptive hash index latch (if holding) before
294 running a sort.
295 */
296 ha_release_temporary_latches(thd);
297
298 /*
299 Don't use table->sort in filesort as it is also used by
300 QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
301 when index_merge select has finished with it.
302 */
303 Filesort_info table_sort= table->sort;
304 table->sort.io_cache= NULL;
305 assert(table_sort.sorted_result == NULL);
306 table_sort.sorted_result_in_fsbuf= false;
307
308 outfile= table_sort.io_cache;
309 my_b_clear(&tempfile);
310 my_b_clear(&chunk_file);
311 error= 1;
312
313 param.init_for_filesort(filesort,
314 sortlength(thd, filesort->sortorder, s_length,
315 &multi_byte_charset,
316 ¶m.use_hash),
317 table,
318 thd->variables.max_length_for_sort_data,
319 max_rows, sort_positions);
320
321 table_sort.addon_fields= param.addon_fields;
322
323 if (tab->quick())
324 thd->inc_status_sort_range();
325 else
326 thd->inc_status_sort_scan();
327
328 thd->query_plan_flags|= QPLAN_FILESORT;
329
330 // If number of rows is not known, use as much of sort buffer as possible.
331 num_rows= table->file->estimate_rows_upper_bound();
332
333 if (multi_byte_charset &&
334 !(param.tmp_buffer= (char*) my_malloc(key_memory_Sort_param_tmp_buffer,
335 param.sort_length,MYF(MY_WME))))
336 goto err;
337
338 if (check_if_pq_applicable(trace, ¶m, &table_sort,
339 table, num_rows, memory_available,
340 subselect != NULL))
341 {
342 DBUG_PRINT("info", ("filesort PQ is applicable"));
343 /*
344 For PQ queries (with limit) we know exactly how many pointers/records
345 we have in the buffer, so to simplify things, we initialize
346 all pointers here. (We cannot pack fields anyways, so there is no
347 point in doing lazy initialization).
348 */
349 table_sort.init_record_pointers();
350
351 if (pq.init(param.max_rows,
352 ¶m, table_sort.get_sort_keys()))
353 {
354 /*
355 If we fail to init pq, we have to give up:
356 out of memory means my_malloc() will call my_error().
357 */
358 DBUG_PRINT("info", ("failed to allocate PQ"));
359 table_sort.free_sort_buffer();
360 assert(thd->is_error());
361 goto err;
362 }
363 filesort->using_pq= true;
364 param.using_pq= true;
365 }
366 else
367 {
368 DBUG_PRINT("info", ("filesort PQ is not applicable"));
369 filesort->using_pq= false;
370 param.using_pq= false;
371
372 /*
373 When sorting using priority queue, we cannot use packed addons.
374 Without PQ, we can try.
375 */
376 param.try_to_pack_addons(thd->variables.max_length_for_sort_data);
377
378 /*
379 We need space for at least one record from each merge chunk, i.e.
380 param->max_keys_per_buffer >= MERGEBUFF2
381 See merge_buffers()),
382 memory_available must be large enough for
383 param->max_keys_per_buffer * (record + record pointer) bytes
384 (the main sort buffer, see alloc_sort_buffer()).
385 Hence this minimum:
386 */
387 const ulong min_sort_memory=
388 max<ulong>(MIN_SORT_MEMORY,
389 ALIGN_SIZE(MERGEBUFF2 * (param.rec_length + sizeof(uchar*))));
390 /*
391 Cannot depend on num_rows. For external sort, space for upto MERGEBUFF2
392 rows is required.
393 */
394 if (num_rows < MERGEBUFF2)
395 num_rows= MERGEBUFF2;
396
397 while (memory_available >= min_sort_memory)
398 {
399 ha_rows keys= memory_available / (param.rec_length + sizeof(char*));
400 // If the table is empty, allocate space for one row.
401 param.max_keys_per_buffer= (uint) min(num_rows > 0 ? num_rows : 1, keys);
402
403 table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
404 if (table_sort.sort_buffer_size() > 0)
405 break;
406 ulong old_memory_available= memory_available;
407 memory_available= memory_available/4*3;
408 if (memory_available < min_sort_memory &&
409 old_memory_available > min_sort_memory)
410 memory_available= min_sort_memory;
411 }
412 if (memory_available < min_sort_memory)
413 {
414 my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERRORLOG + ME_FATALERROR));
415 goto err;
416 }
417 }
418
419 if (open_cached_file(&chunk_file,mysql_tmpdir,TEMP_PREFIX,
420 DISK_BUFFER_SIZE, MYF(MY_WME)))
421 goto err;
422
423 param.sort_form= table;
424 param.local_sortorder=
425 Bounds_checked_array<st_sort_field>(filesort->sortorder, s_length);
426 // New scope, because subquery execution must be traced within an array.
427 {
428 Opt_trace_array ota(trace, "filesort_execution");
429 num_rows= find_all_keys(¶m, tab,
430 &table_sort,
431 &chunk_file,
432 &tempfile,
433 param.using_pq ? &pq : NULL,
434 found_rows);
435 if (num_rows == HA_POS_ERROR)
436 goto err;
437 }
438 DEBUG_SYNC(thd, "after_find_all_keys");
439
440 num_chunks= static_cast<size_t>(my_b_tell(&chunk_file)) /
441 sizeof(Merge_chunk);
442
443 Opt_trace_object(trace, "filesort_summary")
444 .add("rows", num_rows)
445 .add("examined_rows", param.examined_rows)
446 .add("number_of_tmp_files", num_chunks)
447 .add("sort_buffer_size", table_sort.sort_buffer_size())
448 .add_alnum("sort_mode",
449 param.using_packed_addons() ?
450 "<sort_key, packed_additional_fields>" :
451 param.using_addon_fields() ?
452 "<sort_key, additional_fields>" : "<sort_key, rowid>");
453
454 if (num_chunks == 0) // The whole set is in memory
455 {
456 if (save_index(¶m, (uint) num_rows, &table_sort))
457 goto err;
458 }
459 else
460 {
461 thd->query_plan_flags|= QPLAN_FILESORT_DISK;
462
463 // We will need an extra buffer in rr_unpack_from_tempfile()
464 if (table_sort.using_addon_fields() &&
465 !(table_sort.addon_fields->allocate_addon_buf(param.addon_length)))
466 goto err; /* purecov: inspected */
467
468 table_sort.read_chunk_descriptors(&chunk_file, num_chunks);
469 if (table_sort.merge_chunks.is_null())
470 goto err; /* purecov: inspected */
471
472 close_cached_file(&chunk_file);
473
474 /* Open cached file if it isn't open */
475 if (! my_b_inited(outfile) &&
476 open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
477 MYF(MY_WME)))
478 goto err;
479 if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
480 goto err;
481
482 /*
483 Use also the space previously used by string pointers in sort_buffer
484 for temporary key storage.
485 */
486 param.max_keys_per_buffer=
487 table_sort.sort_buffer_size() / param.rec_length;
488
489 if (merge_many_buff(¶m,
490 table_sort.get_raw_buf(),
491 table_sort.merge_chunks,
492 &num_chunks,
493 &tempfile))
494 goto err;
495 if (flush_io_cache(&tempfile) ||
496 reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
497 goto err;
498 if (merge_index(¶m,
499 table_sort.get_raw_buf(),
500 Merge_chunk_array(table_sort.merge_chunks.begin(),
501 num_chunks),
502 &tempfile,
503 outfile))
504 goto err;
505 }
506
507 if (num_rows > param.max_rows)
508 {
509 // If find_all_keys() produced more results than the query LIMIT.
510 num_rows= param.max_rows;
511 }
512 error= 0;
513
514 err:
515 my_free(param.tmp_buffer);
516 if (!subselect || !subselect->is_uncacheable())
517 {
518 if (!table_sort.sorted_result_in_fsbuf)
519 table_sort.free_sort_buffer();
520 my_free(table_sort.merge_chunks.array());
521 table_sort.merge_chunks= Merge_chunk_array(NULL, 0);
522 }
523 close_cached_file(&tempfile);
524 close_cached_file(&chunk_file);
525 if (my_b_inited(outfile))
526 {
527 if (flush_io_cache(outfile))
528 error=1;
529 {
530 my_off_t save_pos=outfile->pos_in_file;
531 /* For following reads */
532 if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
533 error=1;
534 outfile->end_of_file=save_pos;
535 }
536 }
537 if (error)
538 {
539 int kill_errno= thd->killed_errno();
540
541 assert(thd->is_error() || kill_errno);
542
543 /*
544 We replace the table->sort at the end.
545 Hence calling free_io_cache to make sure table->sort.io_cache
546 used for QUICK_INDEX_MERGE_SELECT is free.
547 */
548 free_io_cache(table);
549
550 /*
551 Guard against Bug#11745656 -- KILL QUERY should not send "server shutdown"
552 to client!
553 */
554 const char *cause= kill_errno
555 ? ((kill_errno == THD::KILL_CONNECTION && !abort_loop)
556 ? ER(THD::KILL_QUERY)
557 : ER(kill_errno))
558 : thd->get_stmt_da()->message_text();
559 const char *msg= ER_THD(thd, ER_FILSORT_ABORT);
560
561 my_printf_error(ER_FILSORT_ABORT,
562 "%s: %s",
563 MYF(0),
564 msg,
565 cause);
566
567 if (thd->is_fatal_error)
568 sql_print_information("%s, host: %s, user: %s, "
569 "thread: %u, error: %s, query: %-.4096s",
570 msg,
571 thd->security_context()->host_or_ip().str,
572 thd->security_context()->priv_user().str,
573 thd->thread_id(),
574 cause,
575 thd->query().str);
576 }
577 else
578 thd->inc_status_sort_rows(num_rows);
579 *examined_rows= param.examined_rows;
580 *returned_rows= num_rows;
581
582 /* table->sort.io_cache should be free by this time */
583 assert(NULL == table->sort.io_cache);
584
585 // Assign the copy back!
586 table->sort= table_sort;
587
588 DBUG_PRINT("exit",
589 ("num_rows: %ld examined_rows: %ld found_rows: %ld",
590 (long) num_rows, (long) *examined_rows, (long) *found_rows));
591 MYSQL_FILESORT_DONE(error, num_rows);
592 DBUG_RETURN(error);
593 } /* filesort */
594
595
filesort_free_buffers(TABLE * table,bool full)596 void filesort_free_buffers(TABLE *table, bool full)
597 {
598 DBUG_ENTER("filesort_free_buffers");
599 my_free(table->sort.sorted_result);
600 table->sort.sorted_result= NULL;
601 table->sort.sorted_result_in_fsbuf= false;
602
603 if (full)
604 {
605 table->sort.free_sort_buffer();
606 my_free(table->sort.merge_chunks.array());
607 table->sort.merge_chunks= Merge_chunk_array(NULL, 0);
608 }
609
610 table->sort.addon_fields= NULL;
611 DBUG_VOID_RETURN;
612 }
613
614
make_sortorder()615 uint Filesort::make_sortorder()
616 {
617 uint count;
618 st_sort_field *sort,*pos;
619 ORDER *ord;
620 DBUG_ENTER("make_sortorder");
621
622
623 count=0;
624 for (ord = order; ord; ord= ord->next)
625 count++;
626 if (!sortorder)
627 sortorder= (st_sort_field*) sql_alloc(sizeof(st_sort_field) * (count + 1));
628 pos= sort= sortorder;
629
630 if (!pos)
631 DBUG_RETURN(0);
632
633 for (ord= order; ord; ord= ord->next, pos++)
634 {
635 Item *const item= ord->item[0], *const real_item= item->real_item();
636 pos->field= 0; pos->item= 0;
637 if (real_item->type() == Item::FIELD_ITEM)
638 {
639 /*
640 Could be a field, or Item_direct_view_ref/Item_ref wrapping a field
641 If it is an Item_outer_ref, only_full_group_by has been switched off.
642 */
643 assert
644 (item->type() == Item::FIELD_ITEM ||
645 (item->type() == Item::REF_ITEM &&
646 (down_cast<Item_ref*>(item)->ref_type() == Item_ref::VIEW_REF
647 || down_cast<Item_ref*>(item)->ref_type() == Item_ref::OUTER_REF
648 || down_cast<Item_ref*>(item)->ref_type() == Item_ref::REF)
649 ));
650 pos->field= down_cast<Item_field*>(real_item)->field;
651 }
652 else if (real_item->type() == Item::SUM_FUNC_ITEM &&
653 !real_item->const_item())
654 {
655 // Aggregate, or Item_aggregate_ref
656 assert(item->type() == Item::SUM_FUNC_ITEM ||
657 (item->type() == Item::REF_ITEM &&
658 static_cast<Item_ref*>(item)->ref_type() ==
659 Item_ref::AGGREGATE_REF));
660 pos->field= item->get_tmp_table_field();
661 }
662 else if (real_item->type() == Item::COPY_STR_ITEM)
663 { // Blob patch
664 pos->item= static_cast<Item_copy*>(real_item)->get_item();
665 }
666 else
667 pos->item= item;
668 pos->reverse= (ord->direction == ORDER::ORDER_DESC);
669 assert(pos->field != NULL || pos->item != NULL);
670 }
671 DBUG_RETURN(count);
672 }
673
674
read_chunk_descriptors(IO_CACHE * chunk_file,uint count)675 void Filesort_info::read_chunk_descriptors(IO_CACHE *chunk_file, uint count)
676 {
677 DBUG_ENTER("Filesort_info::read_chunk_descriptors");
678
679 // If we already have a chunk array, we're doing sort in a subquery.
680 if (!merge_chunks.is_null() &&
681 merge_chunks.size() < count)
682 {
683 my_free(merge_chunks.array()); /* purecov: inspected */
684 merge_chunks= Merge_chunk_array(NULL, 0); /* purecov: inspected */
685 }
686
687 void *rawmem= merge_chunks.array();
688 const size_t length= sizeof(Merge_chunk) * count;
689 if (NULL == rawmem)
690 {
691 rawmem= my_malloc(key_memory_Filesort_info_merge, length, MYF(MY_WME));
692 if (rawmem == NULL)
693 DBUG_VOID_RETURN; /* purecov: inspected */
694 }
695
696 if (reinit_io_cache(chunk_file, READ_CACHE, 0L, 0, 0) ||
697 my_b_read(chunk_file, static_cast<uchar*>(rawmem), length))
698 {
699 my_free(rawmem); /* purecov: inspected */
700 rawmem= NULL; /* purecov: inspected */
701 count= 0; /* purecov: inspected */
702 }
703
704 merge_chunks= Merge_chunk_array(static_cast<Merge_chunk*>(rawmem), count);
705 DBUG_VOID_RETURN;
706 }
707
708 #ifndef NDEBUG
709 /*
710 Print a text, SQL-like record representation into dbug trace.
711
712 Note: this function is a work in progress: at the moment
713 - column read bitmap is ignored (can print garbage for unused columns)
714 - there is no quoting
715 */
dbug_print_record(TABLE * table,bool print_rowid)716 static void dbug_print_record(TABLE *table, bool print_rowid)
717 {
718 char buff[1024];
719 Field **pfield;
720 String tmp(buff,sizeof(buff),&my_charset_bin);
721 DBUG_LOCK_FILE;
722
723 fprintf(DBUG_FILE, "record (");
724 for (pfield= table->field; *pfield ; pfield++)
725 fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1])? ", ":"");
726 fprintf(DBUG_FILE, ") = ");
727
728 fprintf(DBUG_FILE, "(");
729 for (pfield= table->field; *pfield ; pfield++)
730 {
731 Field *field= *pfield;
732
733 if (field->is_null()) {
734 if (fwrite("NULL", sizeof(char), 4, DBUG_FILE) != 4) {
735 goto unlock_file_and_quit;
736 }
737 }
738
739 if (field->type() == MYSQL_TYPE_BIT)
740 (void) field->val_int_as_str(&tmp, 1);
741 else
742 field->val_str(&tmp);
743
744 if (fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE) != tmp.length()) {
745 goto unlock_file_and_quit;
746 }
747
748 if (pfield[1]) {
749 if (fwrite(", ", sizeof(char), 2, DBUG_FILE) != 2) {
750 goto unlock_file_and_quit;
751 }
752 }
753 }
754 fprintf(DBUG_FILE, ")");
755 if (print_rowid)
756 {
757 fprintf(DBUG_FILE, " rowid ");
758 for (uint i=0; i < table->file->ref_length; i++)
759 {
760 fprintf(DBUG_FILE, "%x", table->file->ref[i]);
761 }
762 }
763 fprintf(DBUG_FILE, "\n");
764 unlock_file_and_quit:
765 DBUG_UNLOCK_FILE;
766 }
767 #endif
768
769
770 /// Error handler for filesort.
771 class Filesort_error_handler : public Internal_error_handler
772 {
773 THD *m_thd; ///< The THD in which filesort is executed.
774 bool m_seen_not_supported; ///< Has a not supported warning has been seen?
775 public:
776 /**
777 Create an error handler and push it onto the error handler
778 stack. The handler will be automatically popped from the error
779 handler stack when it is destroyed.
780 */
Filesort_error_handler(THD * thd)781 Filesort_error_handler(THD *thd)
782 : m_thd(thd), m_seen_not_supported(false)
783 {
784 thd->push_internal_handler(this);
785 }
786
787 /**
788 Pop the error handler from the error handler stack, and destroy
789 it.
790 */
~Filesort_error_handler()791 ~Filesort_error_handler()
792 {
793 m_thd->pop_internal_handler();
794 }
795
796 /**
797 Handle a condition.
798
799 The handler will make sure that no more than a single
800 ER_NOT_SUPPORTED_YET warning will be seen by the higher
801 layers. This warning is generated by Json_wrapper::make_sort_key()
802 for every value that it doesn't know how to create a sort key
803 for. It is sufficient for the higher layers to report this warning
804 only once per sort.
805 */
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)806 virtual bool handle_condition(THD *thd,
807 uint sql_errno,
808 const char* sqlstate,
809 Sql_condition::enum_severity_level *level,
810 const char* msg)
811 {
812 if (*level == Sql_condition::SL_WARNING &&
813 sql_errno == ER_NOT_SUPPORTED_YET)
814 {
815 if (m_seen_not_supported)
816 return true;
817 m_seen_not_supported= true;
818 }
819
820 return false;
821 }
822
823 };
824
825
826 static const Item::enum_walk walk_subquery=
827 Item::enum_walk(Item::WALK_POSTFIX | Item::WALK_SUBQUERY);
828
829 /**
830 Search after sort_keys, and write them into tempfile
831 (if we run out of space in the sort buffer).
832 All produced sequences are guaranteed to be non-empty.
833
834 @param param Sorting parameter
835 @param select Use this to get source data
836 @param fs_info Struct containing sort buffer etc.
837 @param chunk_file File to write Merge_chunks describing sorted segments
838 in tempfile.
839 @param tempfile File to write sorted sequences of sortkeys to.
840 @param pq If !NULL, use it for keeping top N elements
841 @param [out] found_rows The number of FOUND_ROWS().
842 For a query with LIMIT, this value will typically
843 be larger than the function return value.
844
845 @note
846 Basic idea:
847 @verbatim
848 while (get_next_sortkey())
849 {
850 if (using priority queue)
851 push sort key into queue
852 else
853 {
854 if (no free space in sort buffer)
855 {
856 sort buffer;
857 dump sorted sequence to 'tempfile';
858 dump Merge_chunk describing sequence location into 'chunk_file';
859 }
860 put sort key into buffer;
861 if (key was packed)
862 tell sort buffer the actual number of bytes used;
863 }
864 }
865 if (buffer has some elements && dumped at least once)
866 sort-dump-dump as above;
867 else
868 don't sort, leave sort buffer to be sorted by caller.
869 @endverbatim
870
871 @returns
872 Number of records written on success.
873 @returns
874 HA_POS_ERROR on error.
875 */
876
find_all_keys(Sort_param * param,QEP_TAB * qep_tab,Filesort_info * fs_info,IO_CACHE * chunk_file,IO_CACHE * tempfile,Bounded_queue<uchar *,uchar *,Sort_param,Mem_compare> * pq,ha_rows * found_rows)877 static ha_rows find_all_keys(Sort_param *param, QEP_TAB *qep_tab,
878 Filesort_info *fs_info,
879 IO_CACHE *chunk_file,
880 IO_CACHE *tempfile,
881 Bounded_queue<uchar *, uchar *, Sort_param,
882 Mem_compare> *pq,
883 ha_rows *found_rows)
884 {
885 int error,flag;
886 uint idx,indexpos,ref_length;
887 uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
888 my_off_t record;
889 TABLE *sort_form;
890 THD *thd= current_thd;
891 volatile THD::killed_state *killed= &thd->killed;
892 handler *file;
893 MY_BITMAP *save_read_set, *save_write_set;
894 bool skip_record;
895 ha_rows num_records= 0;
896 const bool packed_addon_fields= param->using_packed_addons();
897
898 /*
899 Set up an error handler for filesort. It is automatically pushed
900 onto the internal error handler stack upon creation, and will be
901 popped off the stack automatically when the handler goes out of
902 scope.
903 */
904 Filesort_error_handler error_handler(thd);
905
906 DBUG_ENTER("find_all_keys");
907 DBUG_PRINT("info",("using: %s",
908 (qep_tab->condition() ? qep_tab->quick() ? "ranges" : "where":
909 "every row")));
910
911 idx=indexpos=0;
912 error= 0;
913 sort_form=param->sort_form;
914 file=sort_form->file;
915 ref_length=param->ref_length;
916 ref_pos= ref_buff;
917 const bool quick_select= qep_tab->quick() != NULL;
918 record=0;
919 *found_rows= 0;
920 flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
921 if (flag)
922 ref_pos= &file->ref[0];
923 next_pos=ref_pos;
924 if (!quick_select)
925 {
926 next_pos=(uchar*) 0; /* Find records in sequence */
927 DBUG_EXECUTE_IF("bug14365043_1",
928 DBUG_SET("+d,ha_rnd_init_fail"););
929 if ((error= file->ha_rnd_init(1)))
930 {
931 file->print_error(error, MYF(0));
932 DBUG_RETURN(HA_POS_ERROR);
933 }
934 file->extra_opt(HA_EXTRA_CACHE,
935 current_thd->variables.read_buff_size);
936 }
937
938 if (quick_select)
939 {
940 if ((error= qep_tab->quick()->reset()))
941 {
942 file->print_error(error, MYF(0));
943 DBUG_RETURN(HA_POS_ERROR);
944 }
945 }
946
947 /* Remember original bitmaps */
948 save_read_set= sort_form->read_set;
949 save_write_set= sort_form->write_set;
950 /*
951 Set up temporary column read map for columns used by sort and verify
952 it's not used
953 */
954 assert(sort_form->tmp_set.n_bits == 0 ||
955 bitmap_is_clear_all(&sort_form->tmp_set));
956
957 // Temporary set for register_used_fields and mark_field_in_map()
958 sort_form->read_set= &sort_form->tmp_set;
959 // Include fields used for sorting in the read_set.
960 register_used_fields(param);
961
962 // Include fields used by conditions in the read_set.
963 if (qep_tab->condition())
964 {
965 Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
966 qep_tab->condition()->walk(&Item::mark_field_in_map,
967 walk_subquery, (uchar*) &mf);
968 }
969 // Include fields used by pushed conditions in the read_set.
970 if (qep_tab->table()->file->pushed_idx_cond)
971 {
972 Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
973 qep_tab->table()->file->pushed_idx_cond->walk(&Item::mark_field_in_map,
974 walk_subquery,
975 (uchar*) &mf);
976 }
977 sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
978
979 DEBUG_SYNC(thd, "after_index_merge_phase1");
980 for (;;)
981 {
982 if (quick_select)
983 {
984 if ((error= qep_tab->quick()->get_next()))
985 break;
986 file->position(sort_form->record[0]);
987 DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
988 }
989 else /* Not quick-select */
990 {
991 DBUG_EXECUTE_IF("bug19656296", DBUG_SET("+d,ha_rnd_next_deadlock"););
992 {
993 error= file->ha_rnd_next(sort_form->record[0]);
994 if (!flag)
995 {
996 my_store_ptr(ref_pos,ref_length,record); // Position to row
997 record+= sort_form->s->db_record_offset;
998 }
999 else if (!error)
1000 file->position(sort_form->record[0]);
1001 }
1002 if (error && error != HA_ERR_RECORD_DELETED)
1003 break;
1004 }
1005
1006 if (*killed)
1007 {
1008 DBUG_PRINT("info",("Sort killed by user"));
1009 if (!quick_select)
1010 {
1011 (void) file->extra(HA_EXTRA_NO_CACHE);
1012 file->ha_rnd_end();
1013 }
1014 num_records= HA_POS_ERROR;
1015 goto cleanup;
1016 }
1017 if (error == 0)
1018 param->examined_rows++;
1019 if (!error && !qep_tab->skip_record(thd, &skip_record) && !skip_record)
1020 {
1021 ++(*found_rows);
1022 if (pq)
1023 pq->push(ref_pos);
1024 else
1025 {
1026 if (fs_info->isfull())
1027 {
1028 if (write_keys(param, fs_info, idx, chunk_file, tempfile))
1029 {
1030 num_records= HA_POS_ERROR;
1031 goto cleanup;
1032 }
1033 idx= 0;
1034 indexpos++;
1035 }
1036 if (idx == 0)
1037 fs_info->init_next_record_pointer();
1038 uchar *start_of_rec= fs_info->get_next_record_pointer();
1039
1040 const uint rec_sz= param->make_sortkey(start_of_rec, ref_pos);
1041 if (packed_addon_fields && rec_sz != param->rec_length)
1042 fs_info->adjust_next_record_pointer(rec_sz);
1043
1044 idx++;
1045 num_records++;
1046 }
1047 }
1048 /*
1049 Don't try unlocking the row if skip_record reported an error since in
1050 this case the transaction might have been rolled back already.
1051 */
1052 else if (!thd->is_error())
1053 file->unlock_row();
1054 /* It does not make sense to read more keys in case of a fatal error */
1055 if (thd->is_error())
1056 break;
1057 }
1058 if (!quick_select)
1059 {
1060 (void) file->extra(HA_EXTRA_NO_CACHE); /* End cacheing of records */
1061 if (!next_pos)
1062 file->ha_rnd_end();
1063 }
1064
1065 if (thd->is_error())
1066 {
1067 num_records= HA_POS_ERROR;
1068 goto cleanup;
1069 }
1070
1071 /* Signal we should use orignal column read and write maps */
1072 sort_form->column_bitmaps_set(save_read_set, save_write_set);
1073
1074 DBUG_PRINT("test",("error: %d indexpos: %d",error,indexpos));
1075 if (error != HA_ERR_END_OF_FILE)
1076 {
1077 myf my_flags;
1078 switch (error) {
1079 case HA_ERR_LOCK_DEADLOCK:
1080 case HA_ERR_LOCK_WAIT_TIMEOUT:
1081 my_flags= MYF(0);
1082 break;
1083 default:
1084 my_flags= MYF(ME_ERRORLOG);
1085 }
1086 file->print_error(error, my_flags);
1087 num_records= HA_POS_ERROR;
1088 goto cleanup;
1089 }
1090 if (indexpos && idx &&
1091 write_keys(param, fs_info, idx, chunk_file, tempfile))
1092 {
1093 num_records= HA_POS_ERROR; // purecov: inspected
1094 goto cleanup;
1095 }
1096
1097 if (pq)
1098 num_records= pq->num_elements();
1099
1100 cleanup:
1101 // Clear tmp_set so it can be used elsewhere
1102 bitmap_clear_all(&sort_form->tmp_set);
1103
1104 DBUG_PRINT("info", ("find_all_keys return %lu", (ulong) num_records));
1105
1106 DBUG_RETURN(num_records);
1107 } /* find_all_keys */
1108
1109
1110 /**
1111 @details
1112 Sort the buffer and write:
1113 -# the sorted sequence to tempfile
1114 -# a Merge_chunk describing the sorted sequence position to chunk_file
1115
1116 @param param Sort parameters
1117 @param fs_info Contains the buffer to be sorted and written.
1118 @param count Number of records to write.
1119 @param chunk_file One 'Merge_chunk' struct will be written into this file.
1120 The Merge_chunk::{file_pos, count} will indicate where
1121 the sorted data was stored.
1122 @param tempfile The sorted sequence will be written into this file.
1123
1124 @returns
1125 0 OK
1126 @returns
1127 1 Error
1128 */
1129
1130 static int
write_keys(Sort_param * param,Filesort_info * fs_info,uint count,IO_CACHE * chunk_file,IO_CACHE * tempfile)1131 write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
1132 IO_CACHE *chunk_file, IO_CACHE *tempfile)
1133 {
1134 Merge_chunk merge_chunk;
1135 DBUG_ENTER("write_keys");
1136
1137 fs_info->sort_buffer(param, count);
1138
1139 if (!my_b_inited(tempfile) &&
1140 open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
1141 MYF(MY_WME)))
1142 DBUG_RETURN(1); /* purecov: inspected */
1143
1144 // Check that we wont have more chunks than we can possibly keep in memory.
1145 if (my_b_tell(chunk_file) + sizeof(Merge_chunk) > (ulonglong)UINT_MAX)
1146 DBUG_RETURN(1); /* purecov: inspected */
1147
1148 merge_chunk.set_file_position(my_b_tell(tempfile));
1149 if (static_cast<ha_rows>(count) > param->max_rows)
1150 {
1151 // Write only SELECT LIMIT rows to the file
1152 count= static_cast<uint>(param->max_rows); /* purecov: inspected */
1153 }
1154 merge_chunk.set_rowcount(static_cast<ha_rows>(count));
1155
1156 const bool packed_addon_fields= param->using_packed_addons();
1157 for (uint ix= 0; ix < count; ++ix)
1158 {
1159 uint rec_length;
1160 uchar *record= fs_info->get_sorted_record(ix);
1161 if (packed_addon_fields)
1162 {
1163 rec_length= param->sort_length +
1164 Addon_fields::read_addon_length(record + param->sort_length);
1165 }
1166 else
1167 rec_length= param->rec_length;
1168
1169 if (my_b_write(tempfile, record, rec_length))
1170 DBUG_RETURN(1); /* purecov: inspected */
1171 }
1172
1173 if (my_b_write(chunk_file, reinterpret_cast<uchar*>(&merge_chunk),
1174 sizeof(merge_chunk)))
1175 DBUG_RETURN(1); /* purecov: inspected */
1176
1177 DBUG_RETURN(0);
1178 } /* write_keys */
1179
1180
1181 /**
1182 Store length as suffix in high-byte-first order.
1183 */
1184
store_length(uchar * to,size_t length,uint pack_length)1185 static inline void store_length(uchar *to, size_t length, uint pack_length)
1186 {
1187 switch (pack_length) {
1188 case 1:
1189 *to= (uchar) length;
1190 break;
1191 case 2:
1192 mi_int2store(to, length);
1193 break;
1194 case 3:
1195 mi_int3store(to, length);
1196 break;
1197 default:
1198 mi_int4store(to, length);
1199 break;
1200 }
1201 }
1202
1203
1204 #ifdef WORDS_BIGENDIAN
1205 const bool Is_big_endian= true;
1206 #else
1207 const bool Is_big_endian= false;
1208 #endif
copy_native_longlong(uchar * to,size_t to_length,longlong val,bool is_unsigned)1209 void copy_native_longlong(uchar *to, size_t to_length,
1210 longlong val, bool is_unsigned)
1211 {
1212 copy_integer<Is_big_endian>(to, to_length,
1213 static_cast<uchar*>(static_cast<void*>(&val)),
1214 sizeof(longlong),
1215 is_unsigned);
1216 }
1217
1218
1219 /**
1220 Make a sort key for the JSON value in an Item.
1221
1222 This function is called by Sort_param::make_sortkey(). We don't want
1223 it to be inlined, since that seemed to have a negative impact on
1224 some performance tests.
1225
1226 @param[in] item The item for which to create a sort key.
1227
1228 @param[out] to Pointer into the buffer to which the sort key should
1229 be written. It will point to where the data portion
1230 of the key should start. For nullable items, this
1231 means right after the NULL indicator byte. The NULL
1232 indicator byte (to[-1]) should be initialized by the
1233 caller to a value that indicates not NULL.
1234
1235 @param[in] length The length of the sort key, not including the NULL
1236 indicator byte at the beginning of the sort key for
1237 nullable items.
1238
1239 @param[in,out] hash The hash key of the JSON values in the current row.
1240 */
1241 static void MY_ATTRIBUTE((noinline))
make_json_sort_key(Item * item,uchar * to,size_t length,ulonglong * hash)1242 make_json_sort_key(Item *item, uchar *to, size_t length, ulonglong *hash)
1243 {
1244 assert(!item->maybe_null || to[-1] == 1);
1245
1246 Json_wrapper wr;
1247 if (item->val_json(&wr))
1248 {
1249 // An error happened when reading the JSON value. Give up.
1250 memset(to, 0, length);
1251 return;
1252 }
1253
1254 if (item->null_value)
1255 {
1256 /*
1257 Got NULL. The sort key should be all zeros. The caller has
1258 already tentatively set the NULL indicator byte at to[-1] to
1259 not-NULL, so we need to clear that byte too.
1260 */
1261 if (item->maybe_null)
1262 {
1263 memset(to - 1, 0, length + 1);
1264 }
1265 else
1266 {
1267 /* purecov: begin inspected */
1268 DBUG_PRINT("warning",
1269 ("Got null on something that shouldn't be null"));
1270 DBUG_ABORT();
1271 memset(to, 0, length);
1272 /* purecov: end */
1273 }
1274 }
1275 else
1276 {
1277 wr.make_sort_key(to, length);
1278 *hash= wr.make_hash_key(hash);
1279 }
1280 }
1281
1282
make_sortkey(uchar * to,const uchar * ref_pos)1283 uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
1284 {
1285 uchar *orig_to= to;
1286 const st_sort_field *sort_field;
1287 ulonglong hash= 0;
1288
1289 for (sort_field= local_sortorder.begin() ;
1290 sort_field != local_sortorder.end() ;
1291 sort_field++)
1292 {
1293 bool maybe_null= false;
1294 if (sort_field->field)
1295 {
1296 Field *field= sort_field->field;
1297 assert(sort_field->field_type == field->type());
1298 if (field->maybe_null())
1299 {
1300 if (field->is_null())
1301 {
1302 if (sort_field->reverse)
1303 memset(to, 255, sort_field->length+1);
1304 else
1305 memset(to, 0, sort_field->length+1);
1306 to+= sort_field->length+1;
1307 continue;
1308 }
1309 else
1310 *to++=1;
1311 }
1312 field->make_sort_key(to, sort_field->length);
1313 if (sort_field->field_type == MYSQL_TYPE_JSON)
1314 {
1315 assert(use_hash);
1316 unique_hash(field, &hash);
1317 }
1318 }
1319 else
1320 { // Item
1321 Item *item=sort_field->item;
1322 maybe_null= item->maybe_null;
1323 assert(sort_field->field_type == item->field_type());
1324 switch (sort_field->result_type) {
1325 case STRING_RESULT:
1326 {
1327 if (maybe_null)
1328 *to++= 1;
1329
1330 if (sort_field->field_type == MYSQL_TYPE_JSON)
1331 {
1332 assert(use_hash);
1333 /*
1334 We don't want the code for creating JSON sort keys to be
1335 inlined here, as increasing the size of the surrounding
1336 "else" branch seems to have a negative impact on some
1337 performance tests, even if those tests never execute the
1338 "else" branch.
1339 */
1340 make_json_sort_key(item, to, sort_field->length, &hash);
1341 break;
1342 }
1343
1344 const CHARSET_INFO *cs=item->collation.collation;
1345 char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
1346
1347 /* All item->str() to use some extra byte for end null.. */
1348 String tmp((char*) to,sort_field->length+4,cs);
1349 String *res= item->str_result(&tmp);
1350 if (!res)
1351 {
1352 if (maybe_null)
1353 memset(to-1, 0, sort_field->length+1);
1354 else
1355 {
1356 /* purecov: begin deadcode */
1357 /*
1358 This should only happen during extreme conditions if we run out
1359 of memory or have an item marked not null when it can be null.
1360 This code is here mainly to avoid a hard crash in this case.
1361 */
1362 assert(0);
1363 DBUG_PRINT("warning",
1364 ("Got null on something that shouldn't be null"));
1365 memset(to, 0, sort_field->length); // Avoid crash
1366 /* purecov: end */
1367 }
1368 break;
1369 }
1370 size_t length= res->length();
1371 if (sort_field->need_strxnfrm)
1372 {
1373 char *from=(char*) res->ptr();
1374 size_t tmp_length MY_ATTRIBUTE((unused));
1375 if ((uchar*) from == to)
1376 {
1377 assert(sort_field->length >= length);
1378 set_if_smaller(length,sort_field->length);
1379 memcpy(tmp_buffer, from, length);
1380 from= tmp_buffer;
1381 }
1382 tmp_length=
1383 cs->coll->strnxfrm(cs, to, sort_field->length,
1384 item->max_char_length(),
1385 (uchar*) from, length,
1386 MY_STRXFRM_PAD_WITH_SPACE |
1387 MY_STRXFRM_PAD_TO_MAXLEN);
1388 assert(tmp_length == sort_field->length);
1389 }
1390 else
1391 {
1392 size_t diff;
1393 uint sort_field_length= sort_field->length -
1394 sort_field->suffix_length;
1395 if (sort_field_length < length)
1396 {
1397 diff= 0;
1398 length= sort_field_length;
1399 }
1400 else
1401 diff= sort_field_length - length;
1402 if (sort_field->suffix_length)
1403 {
1404 /* Store length last in result_string */
1405 store_length(to + sort_field_length, length,
1406 sort_field->suffix_length);
1407 }
1408
1409 my_strnxfrm(cs, to,length,(const uchar*)res->ptr(),length);
1410 cs->cset->fill(cs, (char *)to+length,diff,fill_char);
1411 }
1412 break;
1413 }
1414 case INT_RESULT:
1415 {
1416 longlong value= item->field_type() == MYSQL_TYPE_TIME ?
1417 item->val_time_temporal_result() :
1418 item->is_temporal_with_date() ?
1419 item->val_date_temporal_result() :
1420 item->val_int_result();
1421 if (maybe_null)
1422 {
1423 *to++=1; /* purecov: inspected */
1424 if (item->null_value)
1425 {
1426 if (maybe_null)
1427 memset(to-1, 0, sort_field->length+1);
1428 else
1429 {
1430 DBUG_PRINT("warning",
1431 ("Got null on something that shouldn't be null"));
1432 memset(to, 0, sort_field->length);
1433 }
1434 break;
1435 }
1436 }
1437 copy_native_longlong(to, sort_field->length,
1438 value, item->unsigned_flag);
1439 break;
1440 }
1441 case DECIMAL_RESULT:
1442 {
1443 my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
1444 if (maybe_null)
1445 {
1446 if (item->null_value)
1447 {
1448 memset(to, 0, sort_field->length+1);
1449 to++;
1450 break;
1451 }
1452 *to++=1;
1453 }
1454 if (sort_field->length < DECIMAL_MAX_FIELD_SIZE)
1455 {
1456 uchar buf[DECIMAL_MAX_FIELD_SIZE];
1457 my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
1458 item->max_length - (item->decimals ? 1:0),
1459 item->decimals);
1460 memcpy(to, buf, sort_field->length);
1461 }
1462 else
1463 {
1464 my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
1465 item->max_length - (item->decimals ? 1:0),
1466 item->decimals);
1467 }
1468 break;
1469 }
1470 case REAL_RESULT:
1471 {
1472 double value= item->val_result();
1473 if (maybe_null)
1474 {
1475 if (item->null_value)
1476 {
1477 memset(to, 0, sort_field->length+1);
1478 to++;
1479 break;
1480 }
1481 *to++=1;
1482 }
1483 if (sort_field->length < sizeof(double))
1484 {
1485 uchar buf[sizeof(double)];
1486 change_double_for_sort(value, buf);
1487 memcpy(to, buf, sort_field->length);
1488 }
1489 else
1490 {
1491 change_double_for_sort(value, to);
1492 }
1493 break;
1494 }
1495 case ROW_RESULT:
1496 default:
1497 // This case should never be choosen
1498 assert(0);
1499 break;
1500 }
1501 }
1502 if (sort_field->reverse)
1503 { /* Revers key */
1504 if (maybe_null)
1505 to[-1]= ~to[-1];
1506 uint length= sort_field->length;
1507 while (length--)
1508 {
1509 *to = (uchar) (~ *to);
1510 to++;
1511 }
1512 }
1513 else
1514 to+= sort_field->length;
1515 }
1516
1517 if (use_hash)
1518 {
1519 int8store(to, hash);
1520 to+= 8;
1521 }
1522
1523 if (using_addon_fields())
1524 {
1525 /*
1526 Save field values appended to sorted fields.
1527 First null bit indicators are appended then field values follow.
1528 */
1529 uchar *nulls= to;
1530 uchar *p_len= to;
1531
1532 Addon_fields_array::const_iterator addonf= addon_fields->begin();
1533 uint32 res_len= addonf->offset;
1534 const bool packed_addon_fields= addon_fields->using_packed_addons();
1535 memset(nulls, 0, addonf->offset);
1536 to+= addonf->offset;
1537 for ( ; addonf != addon_fields->end(); ++addonf)
1538 {
1539 Field *field= addonf->field;
1540 if (addonf->null_bit && field->is_null())
1541 {
1542 nulls[addonf->null_offset]|= addonf->null_bit;
1543 if (!packed_addon_fields)
1544 to+= addonf->max_length;
1545 }
1546 else
1547 {
1548 uchar *ptr= field->pack(to, field->ptr);
1549 int sz= static_cast<int>(ptr - to);
1550 res_len += sz;
1551 if (packed_addon_fields)
1552 to+= sz;
1553 else
1554 to+= addonf->max_length;
1555 }
1556 }
1557 if (packed_addon_fields)
1558 Addon_fields::store_addon_length(p_len, res_len);
1559 DBUG_PRINT("info", ("make_sortkey %p %u", orig_to, res_len));
1560 }
1561 else
1562 {
1563 /* Save filepos last */
1564 memcpy(to, ref_pos, ref_length);
1565 to+= ref_length;
1566 }
1567 return to - orig_to;
1568 }
1569
1570
1571 /*
1572 Register fields used by sorting in the sorted table's read set
1573 */
1574
register_used_fields(Sort_param * param)1575 static void register_used_fields(Sort_param *param)
1576 {
1577 Bounds_checked_array<st_sort_field>::const_iterator sort_field;
1578 TABLE *table=param->sort_form;
1579 MY_BITMAP *bitmap= table->read_set;
1580 Mark_field mf(table, MARK_COLUMNS_TEMP);
1581
1582 for (sort_field= param->local_sortorder.begin() ;
1583 sort_field != param->local_sortorder.end() ;
1584 sort_field++)
1585 {
1586 Field *field;
1587 if ((field= sort_field->field))
1588 {
1589 if (field->table == table)
1590 {
1591 bitmap_set_bit(bitmap, field->field_index);
1592 if (field->is_virtual_gcol())
1593 table->mark_gcol_in_maps(field);
1594 }
1595 }
1596 else
1597 { // Item
1598 sort_field->item->walk(&Item::mark_field_in_map, walk_subquery,
1599 (uchar *)&mf);
1600 }
1601 }
1602
1603 if (param->using_addon_fields())
1604 {
1605 Addon_fields_array::const_iterator addonf= param->addon_fields->begin();
1606 for ( ; addonf != param->addon_fields->end(); ++addonf)
1607 {
1608 Field *field= addonf->field;
1609 bitmap_set_bit(bitmap, field->field_index);
1610 if (field->is_virtual_gcol())
1611 table->mark_gcol_in_maps(field);
1612 }
1613 }
1614 else
1615 {
1616 /* Save filepos last */
1617 table->prepare_for_position();
1618 }
1619 }
1620
1621 /**
1622 This function is used only if the entire result set fits in memory.
1623
1624 For addon fields, we keep the result in the filesort buffer.
1625 This saves us a lot of memcpy calls.
1626
1627 For row references, we copy the final sorted result into a buffer,
1628 but we do not copy the actual sort-keys, as they are no longer needed.
1629 We could have kept the result in the sort buffere here as well,
1630 but the new buffer - containing only row references - is probably a
1631 lot smaller.
1632
1633 The result data will be unpacked by rr_unpack_from_buffer()
1634 or rr_from_pointers()
1635
1636 @param [in] param Sort parameters.
1637 @param count Number of records
1638 @param [in,out] table_sort Information used by rr_unpack_from_buffer() /
1639 rr_from_pointers()
1640 */
save_index(Sort_param * param,uint count,Filesort_info * table_sort)1641 static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
1642 {
1643 uchar *to;
1644 DBUG_ENTER("save_index");
1645
1646 table_sort->sort_buffer(param, count);
1647
1648 if (param->using_addon_fields())
1649 {
1650 table_sort->sorted_result_in_fsbuf= true;
1651 table_sort->set_sort_length(param->sort_length);
1652 DBUG_RETURN(0);
1653 }
1654
1655 table_sort->sorted_result_in_fsbuf= false;
1656 const size_t buf_size= param->res_length * count;
1657
1658 assert(table_sort->sorted_result == NULL);
1659 if (!(to= table_sort->sorted_result=
1660 static_cast<uchar*>(my_malloc(key_memory_Filesort_info_record_pointers,
1661 buf_size, MYF(MY_WME)))))
1662 DBUG_RETURN(1); /* purecov: inspected */
1663 table_sort->sorted_result_end=
1664 table_sort->sorted_result + buf_size;
1665
1666 uint res_length= param->res_length;
1667 uint offset= param->rec_length - res_length;
1668 for (uint ix= 0; ix < count; ++ix)
1669 {
1670 uchar *record= table_sort->get_sorted_record(ix);
1671 memcpy(to, record + offset, res_length);
1672 to+= res_length;
1673 }
1674 DBUG_RETURN(0);
1675 }
1676
1677
1678 /**
1679 Test whether priority queue is worth using to get top elements of an
1680 ordered result set. If it is, then allocates buffer for required amount of
1681 records
1682
1683 @param trace Current trace context.
1684 @param param Sort parameters.
1685 @param filesort_info Filesort information.
1686 @param table Table to sort.
1687 @param num_rows Estimate of number of rows in source record set.
1688 @param memory_available Memory available for sorting.
1689 @param keep_addon_fields Do not try to strip off addon fields.
1690
1691 DESCRIPTION
1692 Given a query like this:
1693 SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
1694 This function tests whether a priority queue should be used to keep
1695 the result. Necessary conditions are:
1696 - estimate that it is actually cheaper than merge-sort
1697 - enough memory to store the <max_rows> records.
1698
1699 If we don't have space for <max_rows> records, but we *do* have
1700 space for <max_rows> keys, we may rewrite 'table' to sort with
1701 references to records instead of additional data.
1702 (again, based on estimates that it will actually be cheaper).
1703
1704 @returns
1705 true - if it's ok to use PQ
1706 false - PQ will be slower than merge-sort, or there is not enough memory.
1707 */
1708
check_if_pq_applicable(Opt_trace_context * trace,Sort_param * param,Filesort_info * filesort_info,TABLE * table,ha_rows num_rows,ulong memory_available,bool keep_addon_fields)1709 bool check_if_pq_applicable(Opt_trace_context *trace,
1710 Sort_param *param,
1711 Filesort_info *filesort_info,
1712 TABLE *table, ha_rows num_rows,
1713 ulong memory_available,
1714 bool keep_addon_fields)
1715 {
1716 DBUG_ENTER("check_if_pq_applicable");
1717
1718 /*
1719 How much Priority Queue sort is slower than qsort.
1720 Measurements (see unit test) indicate that PQ is roughly 3 times slower.
1721 */
1722 const double PQ_slowness= 3.0;
1723
1724 Opt_trace_object trace_filesort(trace,
1725 "filesort_priority_queue_optimization");
1726 if (param->max_rows == HA_POS_ERROR)
1727 {
1728 trace_filesort
1729 .add("usable", false)
1730 .add_alnum("cause", "not applicable (no LIMIT)");
1731 DBUG_RETURN(false);
1732 }
1733
1734 trace_filesort
1735 .add("limit", param->max_rows)
1736 .add("rows_estimate", num_rows)
1737 .add("row_size", param->rec_length)
1738 .add("memory_available", memory_available);
1739
1740 if (param->max_rows + 2 >= UINT_MAX)
1741 {
1742 trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
1743 DBUG_RETURN(false);
1744 }
1745
1746 ulong num_available_keys=
1747 memory_available / (param->rec_length + sizeof(char*));
1748 // We need 1 extra record in the buffer, when using PQ.
1749 param->max_keys_per_buffer= (uint) param->max_rows + 1;
1750
1751 if (num_rows < num_available_keys)
1752 {
1753 // The whole source set fits into memory.
1754 if (param->max_rows < num_rows/PQ_slowness )
1755 {
1756 filesort_info->
1757 alloc_sort_buffer(param->max_keys_per_buffer, param->rec_length);
1758 trace_filesort.add("chosen", true);
1759 DBUG_RETURN(filesort_info->sort_buffer_size() > 0);
1760 }
1761 else
1762 {
1763 // PQ will be slower.
1764 trace_filesort.add("chosen", false)
1765 .add_alnum("cause", "quicksort_is_cheaper");
1766 DBUG_RETURN(false);
1767 }
1768 }
1769
1770 // Do we have space for LIMIT rows in memory?
1771 if (param->max_keys_per_buffer < num_available_keys)
1772 {
1773 filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1774 param->rec_length);
1775 trace_filesort.add("chosen", true);
1776 DBUG_RETURN(filesort_info->sort_buffer_size() > 0);
1777 }
1778
1779 // Try to strip off addon fields.
1780 if (!keep_addon_fields && param->using_addon_fields())
1781 {
1782 const ulong row_length=
1783 param->sort_length + param->ref_length + sizeof(char*);
1784 num_available_keys= memory_available / row_length;
1785
1786 Opt_trace_object trace_addon(trace, "strip_additional_fields");
1787 trace_addon.add("row_size", row_length);
1788
1789 // Can we fit all the keys in memory?
1790 if (param->max_keys_per_buffer >= num_available_keys)
1791 {
1792 trace_addon.add("chosen", false).add_alnum("cause", "not_enough_space");
1793 }
1794 else
1795 {
1796 const Cost_model_table *cost_model= table->cost_model();
1797 const double sort_merge_cost=
1798 get_merge_many_buffs_cost_fast(num_rows,
1799 num_available_keys,
1800 row_length, cost_model);
1801 trace_addon.add("sort_merge_cost", sort_merge_cost);
1802 /*
1803 PQ has cost:
1804 (insert + qsort) * log(queue size) * key_compare_cost() +
1805 cost of file lookup afterwards.
1806 The lookup cost is a bit pessimistic: we take table scan cost and
1807 assume that on average we find the row after scanning half of the file.
1808 A better estimate would be lookup cost, but note that we are doing
1809 random lookups here, rather than sequential scan.
1810 */
1811 const double pq_cpu_cost=
1812 (PQ_slowness * num_rows + param->max_keys_per_buffer) *
1813 cost_model->key_compare_cost(log((double) param->max_keys_per_buffer));
1814 const Cost_estimate scan_cost= table->file->table_scan_cost();
1815 const double pq_io_cost=
1816 param->max_rows * scan_cost.total_cost() / 2.0;
1817 const double pq_cost= pq_cpu_cost + pq_io_cost;
1818 trace_addon.add("priority_queue_cost", pq_cost);
1819
1820 if (sort_merge_cost < pq_cost)
1821 {
1822 trace_addon.add("chosen", false);
1823 DBUG_RETURN(false);
1824 }
1825
1826 trace_addon.add("chosen", true);
1827 filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1828 param->sort_length + param->ref_length);
1829 if (filesort_info->sort_buffer_size() > 0)
1830 {
1831 // Make attached data to be references instead of fields.
1832 filesort_info->addon_fields= NULL;
1833 param->addon_fields= NULL;
1834
1835 param->res_length= param->ref_length;
1836 param->sort_length+= param->ref_length;
1837 param->rec_length= param->sort_length;
1838
1839 DBUG_RETURN(true);
1840 }
1841 }
1842 }
1843 DBUG_RETURN(false);
1844 }
1845
1846
1847 /**
1848 Merges buffers to make < MERGEBUFF2 buffers.
1849
1850 @param param Sort parameters.
1851 @param sort_buffer The main memory buffer.
1852 @param chunk_array Array of chunk descriptors to merge.
1853 @param p_num_chunks [out]
1854 output: the number of chunks left in the output file.
1855 @param t_file Where to store the result.
1856 */
merge_many_buff(Sort_param * param,Sort_buffer sort_buffer,Merge_chunk_array chunk_array,size_t * p_num_chunks,IO_CACHE * t_file)1857 int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer,
1858 Merge_chunk_array chunk_array,
1859 size_t *p_num_chunks, IO_CACHE *t_file)
1860 {
1861 uint i;
1862 IO_CACHE t_file2,*from_file,*to_file,*temp;
1863 DBUG_ENTER("merge_many_buff");
1864
1865 size_t num_chunks= chunk_array.size();
1866 *p_num_chunks= num_chunks;
1867
1868 if (num_chunks <= MERGEBUFF2)
1869 DBUG_RETURN(0); /* purecov: inspected */
1870 if (flush_io_cache(t_file) ||
1871 open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE,
1872 MYF(MY_WME)))
1873 DBUG_RETURN(1); /* purecov: inspected */
1874
1875 from_file= t_file ; to_file= &t_file2;
1876 while (num_chunks > MERGEBUFF2)
1877 {
1878 if (reinit_io_cache(from_file,READ_CACHE,0L,0,0))
1879 goto cleanup;
1880 if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0))
1881 goto cleanup;
1882 Merge_chunk *last_chunk= chunk_array.begin();;
1883 for (i=0 ; i < num_chunks - MERGEBUFF * 3 / 2 ; i+= MERGEBUFF)
1884 {
1885 if (merge_buffers(param, // param
1886 from_file, // from_file
1887 to_file, // to_file
1888 sort_buffer, // sort_buffer
1889 last_chunk++, // last_chunk [out]
1890 Merge_chunk_array(&chunk_array[i], MERGEBUFF),
1891 0)) // flag
1892 goto cleanup;
1893 }
1894 if (merge_buffers(param,
1895 from_file,
1896 to_file,
1897 sort_buffer,
1898 last_chunk++,
1899 Merge_chunk_array(&chunk_array[i], num_chunks - i),
1900 0))
1901 break; /* purecov: inspected */
1902 if (flush_io_cache(to_file))
1903 break; /* purecov: inspected */
1904 temp=from_file; from_file=to_file; to_file=temp;
1905 setup_io_cache(from_file);
1906 setup_io_cache(to_file);
1907 num_chunks= last_chunk - chunk_array.begin();
1908 }
1909 cleanup:
1910 close_cached_file(to_file); // This holds old result
1911 if (to_file == t_file)
1912 {
1913 *t_file=t_file2; // Copy result file
1914 setup_io_cache(t_file);
1915 }
1916
1917 *p_num_chunks= num_chunks;
1918 DBUG_RETURN(num_chunks > MERGEBUFF2); /* Return 1 if interrupted */
1919 } /* merge_many_buff */
1920
1921
1922 /**
1923 Read data to buffer.
1924
1925 @returns
1926 (uint)-1 if something goes wrong
1927 */
1928
read_to_buffer(IO_CACHE * fromfile,Merge_chunk * merge_chunk,Sort_param * param)1929 uint read_to_buffer(IO_CACHE *fromfile,
1930 Merge_chunk *merge_chunk,
1931 Sort_param *param)
1932 {
1933 DBUG_ENTER("read_to_buffer");
1934 uint rec_length= param->rec_length;
1935 ha_rows count;
1936
1937 if ((count= min(merge_chunk->max_keys(), merge_chunk->rowcount())))
1938 {
1939 size_t bytes_to_read;
1940 if (param->using_packed_addons())
1941 {
1942 count= merge_chunk->rowcount();
1943 bytes_to_read=
1944 min(merge_chunk->buffer_size(),
1945 static_cast<size_t>(fromfile->end_of_file -
1946 merge_chunk->file_position()));
1947 }
1948 else
1949 bytes_to_read= rec_length * static_cast<size_t>(count);
1950
1951 DBUG_PRINT("info", ("read_to_buffer %p at file_pos %llu bytes %llu",
1952 merge_chunk,
1953 static_cast<ulonglong>(merge_chunk->file_position()),
1954 static_cast<ulonglong>(bytes_to_read)));
1955 if (my_b_pread(fromfile, merge_chunk->buffer_start(), bytes_to_read,
1956 merge_chunk->file_position()))
1957 DBUG_RETURN((uint) -1); /* purecov: inspected */
1958
1959 size_t num_bytes_read;
1960 if (param->using_packed_addons())
1961 {
1962 /*
1963 The last record read is most likely not complete here.
1964 We need to loop through all the records, reading the length fields,
1965 and then "chop off" the final incomplete record.
1966 */
1967 uchar *record= merge_chunk->buffer_start();
1968 uint ix= 0;
1969 for (; ix < count; ++ix)
1970 {
1971 if (record + param->sort_length + Addon_fields::size_of_length_field >=
1972 merge_chunk->buffer_end())
1973 break; // Incomplete record.
1974 uchar *plen= record + param->sort_length;
1975 uint res_length= Addon_fields::read_addon_length(plen);
1976 if (plen + res_length >= merge_chunk->buffer_end())
1977 break; // Incomplete record.
1978 assert(res_length > 0);
1979 record+= param->sort_length;
1980 record+= res_length;
1981 }
1982 assert(ix > 0);
1983 count= ix;
1984 num_bytes_read= record - merge_chunk->buffer_start();
1985 DBUG_PRINT("info", ("read %llu bytes of complete records",
1986 static_cast<ulonglong>(bytes_to_read)));
1987 }
1988 else
1989 num_bytes_read= bytes_to_read;
1990
1991 merge_chunk->init_current_key();
1992 merge_chunk->advance_file_position(num_bytes_read);
1993 merge_chunk->decrement_rowcount(count);
1994 merge_chunk->set_mem_count(count);
1995 DBUG_RETURN(num_bytes_read);
1996 }
1997
1998 DBUG_RETURN (0);
1999 } /* read_to_buffer */
2000
2001
2002 namespace {
2003
2004 /**
2005 This struct is used for merging chunks for filesort() and for Unique::get().
2006 For filesort() we use memcmp to compare rows.
2007 For Unique::get() we use the provided compare function.
2008 */
2009 struct Merge_chunk_less
2010 {
2011 size_t m_len;
2012 Sort_param::chunk_compare_fun m_fun;
2013 Merge_chunk_compare_context *m_arg;
2014
2015 // CTOR for filesort()
Merge_chunk_less__anond7565cf50211::Merge_chunk_less2016 explicit Merge_chunk_less(size_t len)
2017 : m_len(len), m_fun(NULL), m_arg(NULL)
2018 {}
2019
2020 // CTOR for Unique::get()
Merge_chunk_less__anond7565cf50211::Merge_chunk_less2021 Merge_chunk_less(Sort_param::chunk_compare_fun fun,
2022 Merge_chunk_compare_context *arg)
2023 : m_len(0), m_fun(fun), m_arg(arg)
2024 {}
2025
operator ()__anond7565cf50211::Merge_chunk_less2026 bool operator()(Merge_chunk *a, Merge_chunk *b)
2027 {
2028 uchar *key1= a->current_key();
2029 uchar *key2= b->current_key();
2030 if (m_len)
2031 return memcmp(key1, key2, m_len) > 0;
2032
2033 if (m_fun)
2034 return (*m_fun)(m_arg, key1, key2) > 0;
2035
2036 // We can actually have zero-length sort key for filesort().
2037 return false;
2038 }
2039 };
2040
2041 } // namespace
2042
2043
2044 /**
2045 Merge buffers to one buffer.
2046
2047 @param param Sort parameter
2048 @param from_file File with source data (Merge_chunks point to this file)
2049 @param to_file File to write the sorted result data.
2050 @param sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys.
2051 @param [out] last_chunk Store here Merge_chunk describing data written to
2052 to_file.
2053 @param chunk_array Array of chunks to merge.
2054 @param flag
2055
2056 @returns
2057 0 OK
2058 @returns
2059 other error
2060 */
2061
merge_buffers(Sort_param * param,IO_CACHE * from_file,IO_CACHE * to_file,Sort_buffer sort_buffer,Merge_chunk * last_chunk,Merge_chunk_array chunk_array,int flag)2062 int merge_buffers(Sort_param *param, IO_CACHE *from_file,
2063 IO_CACHE *to_file, Sort_buffer sort_buffer,
2064 Merge_chunk *last_chunk,
2065 Merge_chunk_array chunk_array,
2066 int flag)
2067 {
2068 int error;
2069 uint rec_length,res_length;
2070 size_t sort_length;
2071 ha_rows maxcount;
2072 ha_rows max_rows,org_max_rows;
2073 my_off_t to_start_filepos;
2074 uchar *strpos;
2075 Merge_chunk *merge_chunk;
2076 Sort_param::chunk_compare_fun cmp;
2077 Merge_chunk_compare_context *first_cmp_arg;
2078 THD * const thd = current_thd;
2079 volatile THD::killed_state *killed= &thd->killed;
2080 THD::killed_state not_killable;
2081 DBUG_ENTER("merge_buffers");
2082
2083 thd->inc_status_sort_merge_passes();
2084 thd->query_plan_fsort_passes++;
2085 if (param->not_killable)
2086 {
2087 killed= ¬_killable;
2088 not_killable= THD::NOT_KILLED;
2089 }
2090
2091 error=0;
2092 rec_length= param->rec_length;
2093 res_length= param->res_length;
2094 sort_length= param->sort_length;
2095 const uint offset= (flag == 0) ? 0 : (rec_length - res_length);
2096 maxcount= (ulong) (param->max_keys_per_buffer / chunk_array.size());
2097 to_start_filepos= my_b_tell(to_file);
2098 strpos= sort_buffer.array();
2099 org_max_rows=max_rows= param->max_rows;
2100
2101 /* The following will fire if there is not enough space in sort_buffer */
2102 assert(maxcount!=0);
2103
2104 const bool doing_unique= (param->unique_buff != NULL);
2105 if (doing_unique)
2106 {
2107 cmp= param->compare;
2108 first_cmp_arg= ¶m->cmp_context;
2109 }
2110 else
2111 {
2112 cmp= NULL;
2113 first_cmp_arg= NULL;
2114 }
2115
2116 Merge_chunk_less mcl=
2117 doing_unique ?
2118 Merge_chunk_less(cmp, first_cmp_arg) :
2119 Merge_chunk_less(sort_length);
2120 Priority_queue<Merge_chunk*,
2121 std::vector<Merge_chunk*, Malloc_allocator<Merge_chunk*> >,
2122 Merge_chunk_less>
2123 queue(mcl,
2124 Malloc_allocator<Merge_chunk*>(key_memory_Filesort_info_merge));
2125
2126 if (queue.reserve(chunk_array.size()))
2127 DBUG_RETURN(1);
2128
2129 for (merge_chunk= chunk_array.begin() ;
2130 merge_chunk != chunk_array.end() ; merge_chunk++)
2131 {
2132 merge_chunk->set_buffer(strpos,
2133 strpos + (sort_buffer.size()/(chunk_array.size())));
2134 merge_chunk->set_max_keys(maxcount);
2135 strpos+=
2136 (uint) (error= (int)read_to_buffer(from_file, merge_chunk, param));
2137 merge_chunk->set_buffer_end(strpos);
2138 if (error == -1)
2139 DBUG_RETURN(error); /* purecov: inspected */
2140 // If less data in buffers than expected
2141 merge_chunk->set_max_keys(merge_chunk->mem_count());
2142 (void) queue.push(merge_chunk);
2143 }
2144
2145 if (doing_unique)
2146 {
2147 assert(!param->using_packed_addons());
2148 /*
2149 Called by Unique::get()
2150 Copy the first argument to param->unique_buff for unique removal.
2151 Store it also in 'to_file'.
2152 */
2153 merge_chunk= queue.top();
2154 memcpy(param->unique_buff, merge_chunk->current_key(), rec_length);
2155 if (my_b_write(to_file, merge_chunk->current_key(), rec_length))
2156 {
2157 DBUG_RETURN(1); /* purecov: inspected */
2158 }
2159 merge_chunk->advance_current_key(rec_length);
2160 merge_chunk->decrement_mem_count();
2161 if (!--max_rows)
2162 {
2163 error= 0; /* purecov: inspected */
2164 goto end; /* purecov: inspected */
2165 }
2166 // The top chunk may actually contain only a single element
2167 if (merge_chunk->mem_count() == 0)
2168 {
2169 if (!(error= (int) read_to_buffer(from_file, merge_chunk, param)))
2170 {
2171 queue.pop();
2172 reuse_freed_buff(merge_chunk, &queue);
2173 }
2174 else if (error == -1)
2175 DBUG_RETURN(error);
2176 }
2177 queue.update_top(); // Top element has been used
2178 }
2179
2180 while (queue.size() > 1)
2181 {
2182 if (*killed)
2183 {
2184 DBUG_RETURN(1); /* purecov: inspected */
2185 }
2186 for (;;)
2187 {
2188 merge_chunk= queue.top();
2189 if (doing_unique) // Remove duplicates
2190 {
2191 assert(!param->using_packed_addons());
2192 uchar *current_key= merge_chunk->current_key();
2193 if (!(*cmp)(first_cmp_arg, param->unique_buff, current_key))
2194 goto skip_duplicate;
2195 memcpy(param->unique_buff, merge_chunk->current_key(), rec_length);
2196 }
2197 {
2198 param->get_rec_and_res_len(merge_chunk->current_key(),
2199 &rec_length, &res_length);
2200 const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
2201
2202 DBUG_PRINT("info", ("write record at %llu len %u",
2203 my_b_tell(to_file), bytes_to_write));
2204 if (my_b_write(to_file,
2205 merge_chunk->current_key() + offset, bytes_to_write))
2206 {
2207 DBUG_RETURN(1); /* purecov: inspected */
2208 }
2209 if (!--max_rows)
2210 {
2211 error= 0; /* purecov: inspected */
2212 goto end; /* purecov: inspected */
2213 }
2214 }
2215
2216 skip_duplicate:
2217 merge_chunk->advance_current_key(rec_length);
2218 merge_chunk->decrement_mem_count();
2219 if (0 == merge_chunk->mem_count())
2220 {
2221 if (!(error= (int) read_to_buffer(from_file, merge_chunk, param)))
2222 {
2223 queue.pop();
2224 reuse_freed_buff(merge_chunk, &queue);
2225 break; /* One buffer have been removed */
2226 }
2227 else if (error == -1)
2228 DBUG_RETURN(error); /* purecov: inspected */
2229 }
2230 /*
2231 The Merge_chunk at the queue's top had one of its keys consumed, thus
2232 it may now rank differently in the comparison order of the queue, so:
2233 */
2234 queue.update_top();
2235 }
2236 }
2237 merge_chunk= queue.top();
2238 merge_chunk->set_buffer(sort_buffer.array(),
2239 sort_buffer.array() + sort_buffer.size());
2240 merge_chunk->set_max_keys(param->max_keys_per_buffer);
2241
2242 /*
2243 As we know all entries in the buffer are unique, we only have to
2244 check if the first one is the same as the last one we wrote
2245 */
2246 if (doing_unique)
2247 {
2248 uchar *current_key= merge_chunk->current_key();
2249 if (!(*cmp)(first_cmp_arg, param->unique_buff, current_key))
2250 {
2251 merge_chunk->advance_current_key(rec_length); // Remove duplicate
2252 merge_chunk->decrement_mem_count();
2253 }
2254 }
2255
2256 do
2257 {
2258 if (merge_chunk->mem_count() > max_rows)
2259 {
2260 merge_chunk->set_mem_count(max_rows); /* Don't write too many records */
2261 merge_chunk->set_rowcount(0); /* Don't read more */
2262 }
2263 max_rows-= merge_chunk->mem_count();
2264
2265 for (uint ix= 0; ix < merge_chunk->mem_count(); ++ix)
2266 {
2267 param->get_rec_and_res_len(merge_chunk->current_key(),
2268 &rec_length, &res_length);
2269 const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
2270 if (my_b_write(to_file,
2271 merge_chunk->current_key() + offset,
2272 bytes_to_write))
2273 {
2274 DBUG_RETURN(1); /* purecov: inspected */
2275 }
2276 merge_chunk->advance_current_key(rec_length);
2277 }
2278 }
2279 while ((error=(int) read_to_buffer(from_file, merge_chunk, param))
2280 != -1 && error != 0);
2281
2282 end:
2283 last_chunk->set_rowcount(min(org_max_rows-max_rows, param->max_rows));
2284 last_chunk->set_file_position(to_start_filepos);
2285
2286 DBUG_RETURN(error);
2287 } /* merge_buffers */
2288
2289
2290 /* Do a merge to output-file (save only positions) */
2291
merge_index(Sort_param * param,Sort_buffer sort_buffer,Merge_chunk_array chunk_array,IO_CACHE * tempfile,IO_CACHE * outfile)2292 static int merge_index(Sort_param *param, Sort_buffer sort_buffer,
2293 Merge_chunk_array chunk_array,
2294 IO_CACHE *tempfile, IO_CACHE *outfile)
2295 {
2296 DBUG_ENTER("merge_index");
2297 if (merge_buffers(param, // param
2298 tempfile, // from_file
2299 outfile, // to_file
2300 sort_buffer, // sort_buffer
2301 chunk_array.begin(), // last_chunk [out]
2302 chunk_array,
2303 1)) // flag
2304 DBUG_RETURN(1); /* purecov: inspected */
2305 DBUG_RETURN(0);
2306 } /* merge_index */
2307
2308
suffix_length(ulong string_length)2309 static uint suffix_length(ulong string_length)
2310 {
2311 if (string_length < 256)
2312 return 1;
2313 if (string_length < 256L*256L)
2314 return 2;
2315 if (string_length < 256L*256L*256L)
2316 return 3;
2317 return 4; // Can't sort longer than 4G
2318 }
2319
2320
2321
2322 /**
2323 Calculate length of sort key.
2324
2325 @param thd Thread handler
2326 @param sortorder Order of items to sort
2327 @param s_length Number of items to sort
2328 @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
2329 (In which case we have to use strxnfrm())
2330 @param[out] use_hash Set to true when make_sortkey should
2331 calculate and append hash for each sort key
2332 @see Sort_param::make_sortkey()
2333
2334 @note
2335 sortorder->length is updated for each sort item.
2336 @n
2337 sortorder->need_strxnfrm is set 1 if we have to use strxnfrm
2338
2339 @return
2340 Total length of sort buffer in bytes
2341 */
2342
2343 uint
sortlength(THD * thd,st_sort_field * sortorder,uint s_length,bool * multi_byte_charset,bool * use_hash)2344 sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
2345 bool *multi_byte_charset, bool *use_hash)
2346 {
2347 uint total_length= 0;
2348 const CHARSET_INFO *cs;
2349 *multi_byte_charset= false;
2350 *use_hash= false;
2351
2352 for (; s_length-- ; sortorder++)
2353 {
2354 sortorder->need_strxnfrm= 0;
2355 sortorder->suffix_length= 0;
2356 if (sortorder->field)
2357 {
2358 cs= sortorder->field->sort_charset();
2359 sortorder->length= sortorder->field->sort_length();
2360
2361 if (use_strnxfrm((cs=sortorder->field->sort_charset())))
2362 {
2363 sortorder->need_strxnfrm= 1;
2364 *multi_byte_charset= 1;
2365 sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
2366 }
2367 if (sortorder->field->maybe_null())
2368 total_length++; // Place for NULL marker
2369
2370 if (sortorder->field->result_type() == STRING_RESULT &&
2371 !sortorder->field->is_temporal())
2372 {
2373 set_if_smaller(sortorder->length, thd->variables.max_sort_length);
2374 }
2375
2376 sortorder->field_type= sortorder->field->type();
2377 if (sortorder->field_type == MYSQL_TYPE_JSON)
2378 *use_hash= true;
2379 }
2380 else
2381 {
2382 sortorder->result_type= sortorder->item->result_type();
2383 sortorder->field_type= sortorder->item->field_type();
2384 if (sortorder->item->is_temporal())
2385 sortorder->result_type= INT_RESULT;
2386 switch (sortorder->result_type) {
2387 case STRING_RESULT:
2388 sortorder->length= sortorder->item->max_length;
2389 set_if_smaller(sortorder->length, thd->variables.max_sort_length);
2390 if (use_strnxfrm((cs=sortorder->item->collation.collation)))
2391 {
2392 sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
2393 sortorder->need_strxnfrm= 1;
2394 *multi_byte_charset= 1;
2395 }
2396 else if (cs == &my_charset_bin)
2397 {
2398 /* Store length last to be able to sort blob/varbinary */
2399 sortorder->suffix_length= suffix_length(sortorder->length);
2400 sortorder->length+= sortorder->suffix_length;
2401 }
2402 if (sortorder->field_type == MYSQL_TYPE_JSON)
2403 *use_hash= true;
2404 break;
2405 case INT_RESULT:
2406 #if SIZEOF_LONG_LONG > 4
2407 sortorder->length=8; // Size of intern longlong
2408 #else
2409 sortorder->length=4;
2410 #endif
2411 break;
2412 case DECIMAL_RESULT:
2413 sortorder->length=
2414 my_decimal_get_binary_size(sortorder->item->max_length -
2415 (sortorder->item->decimals ? 1 : 0),
2416 sortorder->item->decimals);
2417 break;
2418 case REAL_RESULT:
2419 sortorder->length=sizeof(double);
2420 break;
2421 case ROW_RESULT:
2422 default:
2423 // This case should never be choosen
2424 assert(0);
2425 break;
2426 }
2427 if (sortorder->item->maybe_null)
2428 total_length++; // Place for NULL marker
2429 }
2430 total_length+= sortorder->length;
2431 }
2432 sortorder->field= NULL; // end marker
2433 DBUG_PRINT("info",("sort_length: %u", total_length));
2434 return total_length;
2435 }
2436
2437
2438 /**
2439 Get descriptors of fields appended to sorted fields and
2440 calculate their total length.
2441
2442 The function first finds out what fields are used in the result set.
2443 Then it calculates the length of the buffer to store the values of
2444 these fields together with the value of sort values.
2445 If the calculated length is not greater than max_length_for_sort_data
2446 the function allocates memory for an array of descriptors containing
2447 layouts for the values of the non-sorted fields in the buffer and
2448 fills them.
2449
2450 @param max_length_for_sort_data Value of session variable.
2451 @param ptabfield Array of references to the table fields
2452 @param sortlength Total length of sorted fields
2453 @param[out] plength Total length of appended fields
2454 @param[out] ppackable_length Total length of appended fields having a
2455 packable type
2456
2457 @note
2458 The null bits for the appended values are supposed to be put together
2459 and stored into the buffer just ahead of the value of the first field.
2460
2461 @return
2462 Pointer to the layout descriptors for the appended fields, if any
2463 @returns
2464 NULL if we do not store field values with sort data.
2465 */
2466
2467 Addon_fields *
get_addon_fields(ulong max_length_for_sort_data,Field ** ptabfield,uint sortlength,uint * plength,uint * ppackable_length)2468 Filesort::get_addon_fields(ulong max_length_for_sort_data,
2469 Field **ptabfield, uint sortlength, uint *plength,
2470 uint *ppackable_length)
2471 {
2472 Field **pfield;
2473 Field *field;
2474 uint total_length= 0;
2475 uint packable_length= 0;
2476 uint num_fields= 0;
2477 uint null_fields= 0;
2478 TABLE *const table= tab->table();
2479 MY_BITMAP *read_set= table->read_set;
2480
2481 // Locate the effective index for the table to be sorted (if any)
2482 const uint index= tab->effective_index();
2483 /*
2484 filter_covering is true if access is via an index that is covering,
2485 regardless of whether the access is by the covering index or by
2486 index and base table, since the query has to be fulfilled with fields
2487 from that index only.
2488 This information is later used to filter out base columns for virtual
2489 generated columns, since these are only needed when reading the table.
2490 During sorting, trust that values for all generated columns have been
2491 materialized, which means that base columns are no longer necessary.
2492 */
2493 const bool filter_covering=
2494 index != MAX_KEY &&
2495 table->covering_keys.is_set(index) &&
2496 table->index_contains_some_virtual_gcol(index);
2497
2498 /*
2499 If there is a reference to a field in the query add it
2500 to the the set of appended fields.
2501 Note for future refinement:
2502 This this a too strong condition.
2503 Actually we need only the fields referred in the
2504 result set. And for some of them it makes sense to use
2505 the values directly from sorted fields.
2506 */
2507 *plength= *ppackable_length= 0;
2508
2509 for (pfield= ptabfield; (field= *pfield) ; pfield++)
2510 {
2511 if (!bitmap_is_set(read_set, field->field_index))
2512 continue;
2513 // part_of_key is empty for a BLOB, so apply this check before the next.
2514 if (field->flags & BLOB_FLAG)
2515 {
2516 assert(addon_fields == NULL);
2517 return NULL;
2518 }
2519 if (filter_covering && !field->part_of_key.is_set(index))
2520 continue; // See explanation above filter_covering
2521
2522 const uint field_length= field->max_packed_col_length();
2523 total_length+= field_length;
2524
2525 const enum_field_types field_type= field->type();
2526 if (field->maybe_null() ||
2527 field_type == MYSQL_TYPE_STRING ||
2528 field_type == MYSQL_TYPE_VARCHAR ||
2529 field_type == MYSQL_TYPE_VAR_STRING)
2530 packable_length+= field_length;
2531 if (field->maybe_null())
2532 null_fields++;
2533 num_fields++;
2534 }
2535 if (0 == num_fields)
2536 return NULL;
2537
2538 total_length+= (null_fields + 7) / 8;
2539
2540 *ppackable_length= packable_length;
2541
2542 if (total_length + sortlength > max_length_for_sort_data)
2543 {
2544 assert(addon_fields == NULL);
2545 return NULL;
2546 }
2547
2548 if (addon_fields == NULL)
2549 {
2550 void *rawmem1= sql_alloc(sizeof(Addon_fields));
2551 void *rawmem2= sql_alloc(sizeof(Sort_addon_field) * num_fields);
2552 if (rawmem1 == NULL || rawmem2 == NULL)
2553 return NULL; /* purecov: inspected */
2554 Addon_fields_array
2555 addon_array(static_cast<Sort_addon_field*>(rawmem2), num_fields);
2556 addon_fields= new (rawmem1) Addon_fields(addon_array);
2557 }
2558 else
2559 {
2560 /*
2561 Allocate memory only once, reuse descriptor array and buffer.
2562 Set using_packed_addons here, and size/offset details below.
2563 */
2564 assert(num_fields == addon_fields->num_field_descriptors());
2565 addon_fields->set_using_packed_addons(false);
2566 }
2567
2568 *plength= total_length;
2569
2570 uint length= (null_fields + 7) / 8;
2571 null_fields= 0;
2572 Addon_fields_array::iterator addonf= addon_fields->begin();
2573 for (pfield= ptabfield; (field= *pfield) ; pfield++)
2574 {
2575 if (!bitmap_is_set(read_set, field->field_index))
2576 continue;
2577 if (filter_covering && !field->part_of_key.is_set(index))
2578 continue;
2579 assert(addonf != addon_fields->end());
2580
2581 addonf->field= field;
2582 addonf->offset= length;
2583 if (field->maybe_null())
2584 {
2585 addonf->null_offset= null_fields / 8;
2586 addonf->null_bit= 1 << (null_fields & 7);
2587 null_fields++;
2588 }
2589 else
2590 {
2591 addonf->null_offset= 0;
2592 addonf->null_bit= 0;
2593 }
2594 addonf->max_length= field->max_packed_col_length();
2595 DBUG_PRINT("info", ("addon_field %s max_length %u",
2596 addonf->field->field_name, addonf->max_length));
2597
2598 length+= addonf->max_length;
2599 addonf++;
2600 }
2601
2602 DBUG_PRINT("info",("addon_length: %d",length));
2603 return addon_fields;
2604 }
2605
2606
2607 /*
2608 ** functions to change a double or float to a sortable string
2609 ** The following should work for IEEE
2610 */
2611
2612 #define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG)
2613
change_double_for_sort(double nr,uchar * to)2614 void change_double_for_sort(double nr,uchar *to)
2615 {
2616 uchar *tmp= to;
2617 if (nr == 0.0)
2618 { /* Change to zero string */
2619 tmp[0]=(uchar) 128;
2620 memset(tmp+1, 0, sizeof(nr)-1);
2621 }
2622 else
2623 {
2624 #ifdef WORDS_BIGENDIAN
2625 memcpy(tmp, &nr, sizeof(nr));
2626 #else
2627 {
2628 uchar *ptr= (uchar*) &nr;
2629 #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
2630 tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0];
2631 tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4];
2632 #else
2633 tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4];
2634 tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0];
2635 #endif
2636 }
2637 #endif
2638 if (tmp[0] & 128) /* Negative */
2639 { /* make complement */
2640 uint i;
2641 for (i=0 ; i < sizeof(nr); i++)
2642 tmp[i]=tmp[i] ^ (uchar) 255;
2643 }
2644 else
2645 { /* Set high and move exponent one up */
2646 ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] |
2647 (ushort) 32768);
2648 exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG);
2649 tmp[0]= (uchar) (exp_part >> 8);
2650 tmp[1]= (uchar) exp_part;
2651 }
2652 }
2653 }
2654