1 /*
2    Copyright (c) 2000, 2021, Oracle and/or its affiliates.
3    Copyright (c) 2018, Percona and/or its affiliates. All rights reserved.
4    Copyright (c) 2009, 2015, MariaDB
5 
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License, version 2.0,
8    as published by the Free Software Foundation.
9 
10    This program is also distributed with certain software (including
11    but not limited to OpenSSL) that is licensed under separate terms,
12    as designated in a particular file or component or in included license
13    documentation.  The authors of MySQL hereby grant you an additional
14    permission to link the program and your derivative works with the
15    separately licensed software that they have included with MySQL.
16 
17    This program is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20    GNU General Public License, version 2.0, for more details.
21 
22    You should have received a copy of the GNU General Public License
23    along with this program; if not, write to the Free Software
24    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
25 
26 
27 /**
28   @file
29 
30   @brief
31   Sorts a database
32 */
33 
34 #include "filesort.h"
35 #include <m_ctype.h>
36 #include "sql_sort.h"
37 #include "probes_mysql.h"
38 #include "opt_range.h"                          // QUICK
39 #include "bounded_queue.h"
40 #include "filesort_utils.h"
41 #include "sql_select.h"
42 #include "debug_sync.h"
43 #include "opt_trace.h"
44 #include "sql_optimizer.h"              // JOIN
45 #include "sql_base.h"
46 #include "opt_costmodel.h"
47 #include "priority_queue.h"
48 #include "log.h"
49 #include "item_sum.h"                   // Item_sum
50 #include "json_dom.h"                   // Json_wrapper
51 #include "template_utils.h"
52 
53 #include "pfs_file_provider.h"
54 #include "mysql/psi/mysql_file.h"
55 
56 #include <algorithm>
57 #include <utility>
58 using std::max;
59 using std::min;
60 
61 namespace {
62 
63 struct Mem_compare
64 {
Mem_compare__anond7565cf50111::Mem_compare65   Mem_compare() : m_compare_length(0) {}
66 
Mem_compare__anond7565cf50111::Mem_compare67   Mem_compare(const Mem_compare &that)
68     : m_compare_length(that.m_compare_length)
69   {
70   }
71 
operator ()__anond7565cf50111::Mem_compare72   bool operator()(const uchar *s1, const uchar *s2) const
73   {
74     // memcmp(s1, s2, 0) is guaranteed to return zero.
75     return memcmp(s1, s2, m_compare_length) < 0;
76   }
77 
78   size_t m_compare_length;
79 };
80 }
81 
82 
83 	/* functions defined in this file */
84 
85 static ha_rows find_all_keys(Sort_param *param, QEP_TAB *qep_tab,
86                              Filesort_info *fs_info,
87                              IO_CACHE *buffer_file,
88                              IO_CACHE *chunk_file,
89                              Bounded_queue<uchar *, uchar *, Sort_param,
90                                            Mem_compare> *pq,
91                              ha_rows *found_rows);
92 static int write_keys(Sort_param *param, Filesort_info *fs_info,
93                       uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
94 static void register_used_fields(Sort_param *param);
95 static int merge_index(Sort_param *param,
96                        Sort_buffer sort_buffer,
97                        Merge_chunk_array chunk_array,
98                        IO_CACHE *tempfile,
99                        IO_CACHE *outfile);
100 static bool save_index(Sort_param *param, uint count,
101                        Filesort_info *table_sort);
102 static uint suffix_length(ulong string_length);
103 
104 static bool check_if_pq_applicable(Opt_trace_context *trace,
105                                    Sort_param *param, Filesort_info *info,
106                                    TABLE *table,
107                                    ha_rows records, ulong memory_available,
108                                    bool keep_addon_fields);
109 
110 
init_for_filesort(Filesort * file_sort,uint sortlen,TABLE * table,ulong max_length_for_sort_data,ha_rows maxrows,bool sort_positions)111 void Sort_param::init_for_filesort(Filesort *file_sort,
112                                    uint sortlen, TABLE *table,
113                                    ulong max_length_for_sort_data,
114                                    ha_rows maxrows, bool sort_positions)
115 {
116   assert(max_rows == 0);   // function should not be called twice
117   sort_length= sortlen;
118   ref_length= table->file->ref_length;
119   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
120       !table->fulltext_searched && !sort_positions)
121   {
122     /*
123       Get the descriptors of all fields whose values are appended
124       to sorted fields and get its total length in addon_length.
125     */
126     addon_fields=
127       file_sort->get_addon_fields(max_length_for_sort_data,
128                                   table->field, sort_length, &addon_length,
129                                   &m_packable_length);
130   }
131   if (using_addon_fields())
132   {
133     res_length= addon_length;
134   }
135   else
136   {
137     res_length= ref_length;
138     /*
139       The reference to the record is considered
140       as an additional sorted field
141     */
142     sort_length+= ref_length;
143   }
144   /*
145     Add hash at the end of sort key to order cut values correctly.
146     Needed for GROUPing, rather than for ORDERing.
147   */
148   if (use_hash)
149     sort_length+= sizeof(ulonglong);
150 
151   rec_length= sort_length + addon_length;
152   max_rows= maxrows;
153 }
154 
155 
try_to_pack_addons(ulong max_length_for_sort_data)156 void Sort_param::try_to_pack_addons(ulong max_length_for_sort_data)
157 {
158   if (!using_addon_fields() ||                  // no addons, or
159       using_packed_addons())                    // already packed
160     return;
161 
162   if (!Addon_fields::can_pack_addon_fields(res_length))
163     return;
164 
165   const uint sz= Addon_fields::size_of_length_field;
166   if (rec_length + sz > max_length_for_sort_data)
167     return;
168 
169   // Heuristic: skip packing if potential savings are less than 10 bytes.
170   if (m_packable_length < (10 + sz))
171     return;
172 
173   Addon_fields_array::iterator addonf= addon_fields->begin();
174   for ( ; addonf != addon_fields->end(); ++addonf)
175   {
176     addonf->offset+= sz;
177     addonf->null_offset+= sz;
178   }
179   addon_fields->set_using_packed_addons(true);
180   m_using_packed_addons= true;
181 
182   addon_length+= sz;
183   res_length+= sz;
184   rec_length+= sz;
185 }
186 
187 
trace_filesort_information(Opt_trace_context * trace,const st_sort_field * sortorder,uint s_length)188 static void trace_filesort_information(Opt_trace_context *trace,
189                                        const st_sort_field *sortorder,
190                                        uint s_length)
191 {
192   if (!trace->is_started())
193     return;
194 
195   Opt_trace_array trace_filesort(trace, "filesort_information");
196   for (; s_length-- ; sortorder++)
197   {
198     Opt_trace_object oto(trace);
199     oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
200 
201     if (sortorder->field)
202     {
203       if (strlen(sortorder->field->table->alias) != 0)
204         oto.add_utf8_table(sortorder->field->table->pos_in_table_list);
205       else
206         oto.add_alnum("table", "intermediate_tmp_table");
207       oto.add_alnum("field", sortorder->field->field_name ?
208                     sortorder->field->field_name : "tmp_table_column");
209     }
210     else
211       oto.add("expression", sortorder->item);
212   }
213 }
214 
215 
216 /**
217   Sort a table.
218   Creates a set of pointers that can be used to read the rows
219   in sorted order. This should be done with the functions
220   in records.cc.
221 
222   Before calling filesort, one must have done
223   table->file->info(HA_STATUS_VARIABLE)
224 
225   The result set is stored in table->sort.io_cache or
226   table->sort.sorted_result, or left in the main filesort buffer.
227 
228   @param      thd            Current thread
229   @param      filesort       Table and how to sort it
230   @param      sort_positions Set to TRUE if we want to force sorting by position
231                              (Needed by UPDATE/INSERT or ALTER TABLE or
232                               when rowids are required by executor)
233   @param[out] examined_rows  Store number of examined rows here
234                              This is the number of found rows before
235                              applying WHERE condition.
236   @param[out] found_rows     Store the number of found rows here.
237                              This is the number of found rows after
238                              applying WHERE condition.
239   @param[out] returned_rows  Number of rows in the result, could be less than
240                              found_rows if LIMIT is provided.
241 
242   @note
243     If we sort by position (like if sort_positions is 1) filesort() will
244     call table->prepare_for_position().
245 
246   @returns   False if success, true if error
247 */
248 
filesort(THD * thd,Filesort * filesort,bool sort_positions,ha_rows * examined_rows,ha_rows * found_rows,ha_rows * returned_rows)249 bool filesort(THD *thd, Filesort *filesort, bool sort_positions,
250               ha_rows *examined_rows, ha_rows *found_rows,
251               ha_rows *returned_rows)
252 {
253   int error;
254   ulong memory_available= thd->variables.sortbuff_size;
255   size_t num_chunks;
256   ha_rows num_rows= HA_POS_ERROR;
257   IO_CACHE tempfile;   // Temporary file for storing intermediate results.
258   IO_CACHE chunk_file; // For saving Merge_chunk structs.
259   IO_CACHE *outfile;   // Contains the final, sorted result.
260   Sort_param param;
261   bool multi_byte_charset;
262   Bounded_queue<uchar *, uchar *, Sort_param, Mem_compare>
263     pq((Malloc_allocator<uchar*>
264         (key_memory_Filesort_info_record_pointers)));
265   Opt_trace_context * const trace= &thd->opt_trace;
266   QEP_TAB *const tab= filesort->tab;
267   TABLE *const table= tab->table();
268   ha_rows max_rows= filesort->limit;
269   uint s_length= 0;
270 
271   DBUG_ENTER("filesort");
272 
273   if (!(s_length= filesort->make_sortorder()))
274     DBUG_RETURN(true);  /* purecov: inspected */
275 
276   /*
277     We need a nameless wrapper, since we may be inside the "steps" of
278     "join_execution".
279   */
280   Opt_trace_object trace_wrapper(trace);
281   trace_filesort_information(trace, filesort->sortorder, s_length);
282 
283   assert(!table->reginfo.join_tab);
284   assert(tab == table->reginfo.qep_tab);
285   Item_subselect *const subselect= tab && tab->join() ?
286     tab->join()->select_lex->master_unit()->item : NULL;
287 
288   MYSQL_FILESORT_START(const_cast<char*>(table->s->db.str),
289                        const_cast<char*>(table->s->table_name.str));
290   DEBUG_SYNC(thd, "filesort_start");
291 
292   /*
293    Release InnoDB's adaptive hash index latch (if holding) before
294    running a sort.
295   */
296   ha_release_temporary_latches(thd);
297 
298   /*
299     Don't use table->sort in filesort as it is also used by
300     QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
301     when index_merge select has finished with it.
302   */
303   Filesort_info table_sort= table->sort;
304   table->sort.io_cache= NULL;
305   assert(table_sort.sorted_result == NULL);
306   table_sort.sorted_result_in_fsbuf= false;
307 
308   outfile= table_sort.io_cache;
309   my_b_clear(&tempfile);
310   my_b_clear(&chunk_file);
311   error= 1;
312 
313   param.init_for_filesort(filesort,
314                           sortlength(thd, filesort->sortorder, s_length,
315                                      &multi_byte_charset,
316                                      &param.use_hash),
317                           table,
318                           thd->variables.max_length_for_sort_data,
319                           max_rows, sort_positions);
320 
321   table_sort.addon_fields= param.addon_fields;
322 
323   if (tab->quick())
324     thd->inc_status_sort_range();
325   else
326     thd->inc_status_sort_scan();
327 
328   thd->query_plan_flags|= QPLAN_FILESORT;
329 
330   // If number of rows is not known, use as much of sort buffer as possible.
331   num_rows= table->file->estimate_rows_upper_bound();
332 
333   if (multi_byte_charset &&
334       !(param.tmp_buffer= (char*) my_malloc(key_memory_Sort_param_tmp_buffer,
335                                             param.sort_length,MYF(MY_WME))))
336     goto err;
337 
338   if (check_if_pq_applicable(trace, &param, &table_sort,
339                              table, num_rows, memory_available,
340                              subselect != NULL))
341   {
342     DBUG_PRINT("info", ("filesort PQ is applicable"));
343     /*
344       For PQ queries (with limit) we know exactly how many pointers/records
345       we have in the buffer, so to simplify things, we initialize
346       all pointers here. (We cannot pack fields anyways, so there is no
347       point in doing lazy initialization).
348      */
349     table_sort.init_record_pointers();
350 
351     if (pq.init(param.max_rows,
352                 &param, table_sort.get_sort_keys()))
353     {
354       /*
355        If we fail to init pq, we have to give up:
356        out of memory means my_malloc() will call my_error().
357       */
358       DBUG_PRINT("info", ("failed to allocate PQ"));
359       table_sort.free_sort_buffer();
360       assert(thd->is_error());
361       goto err;
362     }
363     filesort->using_pq= true;
364     param.using_pq= true;
365   }
366   else
367   {
368     DBUG_PRINT("info", ("filesort PQ is not applicable"));
369     filesort->using_pq= false;
370     param.using_pq= false;
371 
372     /*
373       When sorting using priority queue, we cannot use packed addons.
374       Without PQ, we can try.
375     */
376     param.try_to_pack_addons(thd->variables.max_length_for_sort_data);
377 
378     /*
379       We need space for at least one record from each merge chunk, i.e.
380         param->max_keys_per_buffer >= MERGEBUFF2
381       See merge_buffers()),
382       memory_available must be large enough for
383         param->max_keys_per_buffer * (record + record pointer) bytes
384       (the main sort buffer, see alloc_sort_buffer()).
385       Hence this minimum:
386     */
387     const ulong min_sort_memory=
388       max<ulong>(MIN_SORT_MEMORY,
389                  ALIGN_SIZE(MERGEBUFF2 * (param.rec_length + sizeof(uchar*))));
390     /*
391       Cannot depend on num_rows. For external sort, space for upto MERGEBUFF2
392       rows is required.
393     */
394     if (num_rows < MERGEBUFF2)
395       num_rows= MERGEBUFF2;
396 
397     while (memory_available >= min_sort_memory)
398     {
399       ha_rows keys= memory_available / (param.rec_length + sizeof(char*));
400       // If the table is empty, allocate space for one row.
401       param.max_keys_per_buffer= (uint) min(num_rows > 0 ? num_rows : 1, keys);
402 
403       table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
404       if (table_sort.sort_buffer_size() > 0)
405         break;
406       ulong old_memory_available= memory_available;
407       memory_available= memory_available/4*3;
408       if (memory_available < min_sort_memory &&
409           old_memory_available > min_sort_memory)
410         memory_available= min_sort_memory;
411     }
412     if (memory_available < min_sort_memory)
413     {
414       my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERRORLOG + ME_FATALERROR));
415       goto err;
416     }
417   }
418 
419   if (open_cached_file(&chunk_file,mysql_tmpdir,TEMP_PREFIX,
420 		       DISK_BUFFER_SIZE, MYF(MY_WME)))
421     goto err;
422 
423   param.sort_form= table;
424   param.local_sortorder=
425     Bounds_checked_array<st_sort_field>(filesort->sortorder, s_length);
426   // New scope, because subquery execution must be traced within an array.
427   {
428     Opt_trace_array ota(trace, "filesort_execution");
429     num_rows= find_all_keys(&param, tab,
430                             &table_sort,
431                             &chunk_file,
432                             &tempfile,
433                             param.using_pq ? &pq : NULL,
434                             found_rows);
435     if (num_rows == HA_POS_ERROR)
436       goto err;
437   }
438   DEBUG_SYNC(thd, "after_find_all_keys");
439 
440   num_chunks= static_cast<size_t>(my_b_tell(&chunk_file)) /
441     sizeof(Merge_chunk);
442 
443   Opt_trace_object(trace, "filesort_summary")
444     .add("rows", num_rows)
445     .add("examined_rows", param.examined_rows)
446     .add("number_of_tmp_files", num_chunks)
447     .add("sort_buffer_size", table_sort.sort_buffer_size())
448     .add_alnum("sort_mode",
449                param.using_packed_addons() ?
450                "<sort_key, packed_additional_fields>" :
451                param.using_addon_fields() ?
452                "<sort_key, additional_fields>" : "<sort_key, rowid>");
453 
454   if (num_chunks == 0)                   // The whole set is in memory
455   {
456     if (save_index(&param, (uint) num_rows, &table_sort))
457       goto err;
458   }
459   else
460   {
461     thd->query_plan_flags|= QPLAN_FILESORT_DISK;
462 
463     // We will need an extra buffer in rr_unpack_from_tempfile()
464     if (table_sort.using_addon_fields() &&
465         !(table_sort.addon_fields->allocate_addon_buf(param.addon_length)))
466       goto err;                                 /* purecov: inspected */
467 
468     table_sort.read_chunk_descriptors(&chunk_file, num_chunks);
469     if (table_sort.merge_chunks.is_null())
470       goto err;                                 /* purecov: inspected */
471 
472     close_cached_file(&chunk_file);
473 
474     /* Open cached file if it isn't open */
475     if (! my_b_inited(outfile) &&
476 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
477 			  MYF(MY_WME)))
478       goto err;
479     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
480       goto err;
481 
482     /*
483       Use also the space previously used by string pointers in sort_buffer
484       for temporary key storage.
485     */
486     param.max_keys_per_buffer=
487       table_sort.sort_buffer_size() / param.rec_length;
488 
489     if (merge_many_buff(&param,
490                         table_sort.get_raw_buf(),
491                         table_sort.merge_chunks,
492                         &num_chunks,
493                         &tempfile))
494       goto err;
495     if (flush_io_cache(&tempfile) ||
496 	reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
497       goto err;
498     if (merge_index(&param,
499                     table_sort.get_raw_buf(),
500                     Merge_chunk_array(table_sort.merge_chunks.begin(),
501                                       num_chunks),
502                     &tempfile,
503                     outfile))
504       goto err;
505   }
506 
507   if (num_rows > param.max_rows)
508   {
509     // If find_all_keys() produced more results than the query LIMIT.
510     num_rows= param.max_rows;
511   }
512   error= 0;
513 
514  err:
515   my_free(param.tmp_buffer);
516   if (!subselect || !subselect->is_uncacheable())
517   {
518     if (!table_sort.sorted_result_in_fsbuf)
519       table_sort.free_sort_buffer();
520     my_free(table_sort.merge_chunks.array());
521     table_sort.merge_chunks= Merge_chunk_array(NULL, 0);
522   }
523   close_cached_file(&tempfile);
524   close_cached_file(&chunk_file);
525   if (my_b_inited(outfile))
526   {
527     if (flush_io_cache(outfile))
528       error=1;
529     {
530       my_off_t save_pos=outfile->pos_in_file;
531       /* For following reads */
532       if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
533 	error=1;
534       outfile->end_of_file=save_pos;
535     }
536   }
537   if (error)
538   {
539     int kill_errno= thd->killed_errno();
540 
541     assert(thd->is_error() || kill_errno);
542 
543     /*
544       We replace the table->sort at the end.
545       Hence calling free_io_cache to make sure table->sort.io_cache
546       used for QUICK_INDEX_MERGE_SELECT is free.
547     */
548     free_io_cache(table);
549 
550     /*
551       Guard against Bug#11745656 -- KILL QUERY should not send "server shutdown"
552       to client!
553     */
554     const char *cause= kill_errno
555                        ? ((kill_errno == THD::KILL_CONNECTION && !abort_loop)
556                          ? ER(THD::KILL_QUERY)
557                          : ER(kill_errno))
558                        : thd->get_stmt_da()->message_text();
559     const char *msg=   ER_THD(thd, ER_FILSORT_ABORT);
560 
561     my_printf_error(ER_FILSORT_ABORT,
562                     "%s: %s",
563                     MYF(0),
564                     msg,
565                     cause);
566 
567     if (thd->is_fatal_error)
568       sql_print_information("%s, host: %s, user: %s, "
569                             "thread: %u, error: %s, query: %-.4096s",
570                             msg,
571                             thd->security_context()->host_or_ip().str,
572                             thd->security_context()->priv_user().str,
573                             thd->thread_id(),
574                             cause,
575                             thd->query().str);
576   }
577   else
578     thd->inc_status_sort_rows(num_rows);
579   *examined_rows= param.examined_rows;
580   *returned_rows= num_rows;
581 
582   /* table->sort.io_cache should be free by this time */
583   assert(NULL == table->sort.io_cache);
584 
585   // Assign the copy back!
586   table->sort= table_sort;
587 
588   DBUG_PRINT("exit",
589              ("num_rows: %ld examined_rows: %ld found_rows: %ld",
590               (long) num_rows, (long) *examined_rows, (long) *found_rows));
591   MYSQL_FILESORT_DONE(error, num_rows);
592   DBUG_RETURN(error);
593 } /* filesort */
594 
595 
filesort_free_buffers(TABLE * table,bool full)596 void filesort_free_buffers(TABLE *table, bool full)
597 {
598   DBUG_ENTER("filesort_free_buffers");
599   my_free(table->sort.sorted_result);
600   table->sort.sorted_result= NULL;
601   table->sort.sorted_result_in_fsbuf= false;
602 
603   if (full)
604   {
605     table->sort.free_sort_buffer();
606     my_free(table->sort.merge_chunks.array());
607     table->sort.merge_chunks= Merge_chunk_array(NULL, 0);
608   }
609 
610   table->sort.addon_fields= NULL;
611   DBUG_VOID_RETURN;
612 }
613 
614 
make_sortorder()615 uint Filesort::make_sortorder()
616 {
617   uint count;
618   st_sort_field *sort,*pos;
619   ORDER *ord;
620   DBUG_ENTER("make_sortorder");
621 
622 
623   count=0;
624   for (ord = order; ord; ord= ord->next)
625     count++;
626   if (!sortorder)
627     sortorder= (st_sort_field*) sql_alloc(sizeof(st_sort_field) * (count + 1));
628   pos= sort= sortorder;
629 
630   if (!pos)
631     DBUG_RETURN(0);
632 
633   for (ord= order; ord; ord= ord->next, pos++)
634   {
635     Item *const item= ord->item[0], *const real_item= item->real_item();
636     pos->field= 0; pos->item= 0;
637     if (real_item->type() == Item::FIELD_ITEM)
638     {
639       /*
640         Could be a field, or Item_direct_view_ref/Item_ref wrapping a field
641         If it is an Item_outer_ref, only_full_group_by has been switched off.
642       */
643       assert
644         (item->type() == Item::FIELD_ITEM ||
645          (item->type() == Item::REF_ITEM &&
646           (down_cast<Item_ref*>(item)->ref_type() == Item_ref::VIEW_REF
647            || down_cast<Item_ref*>(item)->ref_type() == Item_ref::OUTER_REF
648            || down_cast<Item_ref*>(item)->ref_type() == Item_ref::REF)
649           ));
650       pos->field= down_cast<Item_field*>(real_item)->field;
651     }
652     else if (real_item->type() == Item::SUM_FUNC_ITEM &&
653              !real_item->const_item())
654     {
655       // Aggregate, or Item_aggregate_ref
656       assert(item->type() == Item::SUM_FUNC_ITEM ||
657              (item->type() == Item::REF_ITEM &&
658               static_cast<Item_ref*>(item)->ref_type() ==
659               Item_ref::AGGREGATE_REF));
660       pos->field= item->get_tmp_table_field();
661     }
662     else if (real_item->type() == Item::COPY_STR_ITEM)
663     {						// Blob patch
664       pos->item= static_cast<Item_copy*>(real_item)->get_item();
665     }
666     else
667       pos->item= item;
668     pos->reverse= (ord->direction == ORDER::ORDER_DESC);
669     assert(pos->field != NULL || pos->item != NULL);
670   }
671   DBUG_RETURN(count);
672 }
673 
674 
read_chunk_descriptors(IO_CACHE * chunk_file,uint count)675 void Filesort_info::read_chunk_descriptors(IO_CACHE *chunk_file, uint count)
676 {
677   DBUG_ENTER("Filesort_info::read_chunk_descriptors");
678 
679   // If we already have a chunk array, we're doing sort in a subquery.
680   if (!merge_chunks.is_null() &&
681       merge_chunks.size() < count)
682   {
683     my_free(merge_chunks.array());              /* purecov: inspected */
684     merge_chunks= Merge_chunk_array(NULL, 0);   /* purecov: inspected */
685   }
686 
687   void *rawmem= merge_chunks.array();
688   const size_t length= sizeof(Merge_chunk) * count;
689   if (NULL == rawmem)
690   {
691     rawmem= my_malloc(key_memory_Filesort_info_merge, length, MYF(MY_WME));
692     if (rawmem == NULL)
693       DBUG_VOID_RETURN;                         /* purecov: inspected */
694   }
695 
696   if (reinit_io_cache(chunk_file, READ_CACHE, 0L, 0, 0) ||
697       my_b_read(chunk_file, static_cast<uchar*>(rawmem), length))
698   {
699     my_free(rawmem);                            /* purecov: inspected */
700     rawmem= NULL;                               /* purecov: inspected */
701     count= 0;                                   /* purecov: inspected */
702   }
703 
704   merge_chunks= Merge_chunk_array(static_cast<Merge_chunk*>(rawmem), count);
705   DBUG_VOID_RETURN;
706 }
707 
708 #ifndef NDEBUG
709 /*
710   Print a text, SQL-like record representation into dbug trace.
711 
712   Note: this function is a work in progress: at the moment
713    - column read bitmap is ignored (can print garbage for unused columns)
714    - there is no quoting
715 */
dbug_print_record(TABLE * table,bool print_rowid)716 static void dbug_print_record(TABLE *table, bool print_rowid)
717 {
718   char buff[1024];
719   Field **pfield;
720   String tmp(buff,sizeof(buff),&my_charset_bin);
721   DBUG_LOCK_FILE;
722 
723   fprintf(DBUG_FILE, "record (");
724   for (pfield= table->field; *pfield ; pfield++)
725     fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1])? ", ":"");
726   fprintf(DBUG_FILE, ") = ");
727 
728   fprintf(DBUG_FILE, "(");
729   for (pfield= table->field; *pfield ; pfield++)
730   {
731     Field *field=  *pfield;
732 
733     if (field->is_null()) {
734       if (fwrite("NULL", sizeof(char), 4, DBUG_FILE) != 4) {
735         goto unlock_file_and_quit;
736       }
737     }
738 
739     if (field->type() == MYSQL_TYPE_BIT)
740       (void) field->val_int_as_str(&tmp, 1);
741     else
742       field->val_str(&tmp);
743 
744     if (fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE) != tmp.length()) {
745       goto unlock_file_and_quit;
746     }
747 
748     if (pfield[1]) {
749       if (fwrite(", ", sizeof(char), 2, DBUG_FILE) != 2) {
750         goto unlock_file_and_quit;
751       }
752     }
753   }
754   fprintf(DBUG_FILE, ")");
755   if (print_rowid)
756   {
757     fprintf(DBUG_FILE, " rowid ");
758     for (uint i=0; i < table->file->ref_length; i++)
759     {
760       fprintf(DBUG_FILE, "%x", table->file->ref[i]);
761     }
762   }
763   fprintf(DBUG_FILE, "\n");
764 unlock_file_and_quit:
765   DBUG_UNLOCK_FILE;
766 }
767 #endif
768 
769 
770 /// Error handler for filesort.
771 class Filesort_error_handler : public Internal_error_handler
772 {
773   THD *m_thd;                ///< The THD in which filesort is executed.
774   bool m_seen_not_supported; ///< Has a not supported warning has been seen?
775 public:
776   /**
777     Create an error handler and push it onto the error handler
778     stack. The handler will be automatically popped from the error
779     handler stack when it is destroyed.
780   */
Filesort_error_handler(THD * thd)781   Filesort_error_handler(THD *thd)
782     : m_thd(thd), m_seen_not_supported(false)
783   {
784     thd->push_internal_handler(this);
785   }
786 
787   /**
788     Pop the error handler from the error handler stack, and destroy
789     it.
790   */
~Filesort_error_handler()791   ~Filesort_error_handler()
792   {
793     m_thd->pop_internal_handler();
794   }
795 
796   /**
797     Handle a condition.
798 
799     The handler will make sure that no more than a single
800     ER_NOT_SUPPORTED_YET warning will be seen by the higher
801     layers. This warning is generated by Json_wrapper::make_sort_key()
802     for every value that it doesn't know how to create a sort key
803     for. It is sufficient for the higher layers to report this warning
804     only once per sort.
805   */
handle_condition(THD * thd,uint sql_errno,const char * sqlstate,Sql_condition::enum_severity_level * level,const char * msg)806   virtual bool handle_condition(THD *thd,
807                                 uint sql_errno,
808                                 const char* sqlstate,
809                                 Sql_condition::enum_severity_level *level,
810                                 const char* msg)
811   {
812     if (*level == Sql_condition::SL_WARNING &&
813         sql_errno == ER_NOT_SUPPORTED_YET)
814     {
815       if (m_seen_not_supported)
816         return true;
817       m_seen_not_supported= true;
818     }
819 
820     return false;
821   }
822 
823 };
824 
825 
826 static const Item::enum_walk walk_subquery=
827   Item::enum_walk(Item::WALK_POSTFIX | Item::WALK_SUBQUERY);
828 
829 /**
830   Search after sort_keys, and write them into tempfile
831   (if we run out of space in the sort buffer).
832   All produced sequences are guaranteed to be non-empty.
833 
834   @param param             Sorting parameter
835   @param select            Use this to get source data
836   @param fs_info           Struct containing sort buffer etc.
837   @param chunk_file        File to write Merge_chunks describing sorted segments
838                            in tempfile.
839   @param tempfile          File to write sorted sequences of sortkeys to.
840   @param pq                If !NULL, use it for keeping top N elements
841   @param [out] found_rows  The number of FOUND_ROWS().
842                            For a query with LIMIT, this value will typically
843                            be larger than the function return value.
844 
845   @note
846     Basic idea:
847     @verbatim
848      while (get_next_sortkey())
849      {
850        if (using priority queue)
851          push sort key into queue
852        else
853        {
854          if (no free space in sort buffer)
855          {
856            sort buffer;
857            dump sorted sequence to 'tempfile';
858            dump Merge_chunk describing sequence location into 'chunk_file';
859          }
860          put sort key into buffer;
861          if (key was packed)
862            tell sort buffer the actual number of bytes used;
863        }
864      }
865      if (buffer has some elements && dumped at least once)
866        sort-dump-dump as above;
867      else
868        don't sort, leave sort buffer to be sorted by caller.
869   @endverbatim
870 
871   @returns
872     Number of records written on success.
873   @returns
874     HA_POS_ERROR on error.
875 */
876 
find_all_keys(Sort_param * param,QEP_TAB * qep_tab,Filesort_info * fs_info,IO_CACHE * chunk_file,IO_CACHE * tempfile,Bounded_queue<uchar *,uchar *,Sort_param,Mem_compare> * pq,ha_rows * found_rows)877 static ha_rows find_all_keys(Sort_param *param, QEP_TAB *qep_tab,
878                              Filesort_info *fs_info,
879                              IO_CACHE *chunk_file,
880                              IO_CACHE *tempfile,
881                              Bounded_queue<uchar *, uchar *, Sort_param,
882                                            Mem_compare> *pq,
883                              ha_rows *found_rows)
884 {
885   int error,flag;
886   uint idx,indexpos,ref_length;
887   uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
888   my_off_t record;
889   TABLE *sort_form;
890   THD *thd= current_thd;
891   volatile THD::killed_state *killed= &thd->killed;
892   handler *file;
893   MY_BITMAP *save_read_set, *save_write_set;
894   bool skip_record;
895   ha_rows num_records= 0;
896   const bool packed_addon_fields= param->using_packed_addons();
897 
898   /*
899     Set up an error handler for filesort. It is automatically pushed
900     onto the internal error handler stack upon creation, and will be
901     popped off the stack automatically when the handler goes out of
902     scope.
903   */
904   Filesort_error_handler error_handler(thd);
905 
906   DBUG_ENTER("find_all_keys");
907   DBUG_PRINT("info",("using: %s",
908                      (qep_tab->condition() ? qep_tab->quick() ? "ranges" : "where":
909                       "every row")));
910 
911   idx=indexpos=0;
912   error= 0;
913   sort_form=param->sort_form;
914   file=sort_form->file;
915   ref_length=param->ref_length;
916   ref_pos= ref_buff;
917   const bool quick_select= qep_tab->quick() != NULL;
918   record=0;
919   *found_rows= 0;
920   flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
921   if (flag)
922     ref_pos= &file->ref[0];
923   next_pos=ref_pos;
924   if (!quick_select)
925   {
926     next_pos=(uchar*) 0;			/* Find records in sequence */
927     DBUG_EXECUTE_IF("bug14365043_1",
928                     DBUG_SET("+d,ha_rnd_init_fail"););
929     if ((error= file->ha_rnd_init(1)))
930     {
931       file->print_error(error, MYF(0));
932       DBUG_RETURN(HA_POS_ERROR);
933     }
934     file->extra_opt(HA_EXTRA_CACHE,
935 		    current_thd->variables.read_buff_size);
936   }
937 
938   if (quick_select)
939   {
940     if ((error= qep_tab->quick()->reset()))
941     {
942       file->print_error(error, MYF(0));
943       DBUG_RETURN(HA_POS_ERROR);
944     }
945   }
946 
947   /* Remember original bitmaps */
948   save_read_set=  sort_form->read_set;
949   save_write_set= sort_form->write_set;
950   /*
951     Set up temporary column read map for columns used by sort and verify
952     it's not used
953   */
954   assert(sort_form->tmp_set.n_bits == 0 ||
955          bitmap_is_clear_all(&sort_form->tmp_set));
956 
957   // Temporary set for register_used_fields and mark_field_in_map()
958   sort_form->read_set= &sort_form->tmp_set;
959   // Include fields used for sorting in the read_set.
960   register_used_fields(param);
961 
962   // Include fields used by conditions in the read_set.
963   if (qep_tab->condition())
964   {
965     Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
966     qep_tab->condition()->walk(&Item::mark_field_in_map,
967                                walk_subquery, (uchar*) &mf);
968   }
969   // Include fields used by pushed conditions in the read_set.
970   if (qep_tab->table()->file->pushed_idx_cond)
971   {
972     Mark_field mf(sort_form, MARK_COLUMNS_TEMP);
973     qep_tab->table()->file->pushed_idx_cond->walk(&Item::mark_field_in_map,
974                                                   walk_subquery,
975                                                   (uchar*) &mf);
976   }
977   sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
978 
979   DEBUG_SYNC(thd, "after_index_merge_phase1");
980   for (;;)
981   {
982     if (quick_select)
983     {
984       if ((error= qep_tab->quick()->get_next()))
985         break;
986       file->position(sort_form->record[0]);
987       DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
988     }
989     else					/* Not quick-select */
990     {
991       DBUG_EXECUTE_IF("bug19656296", DBUG_SET("+d,ha_rnd_next_deadlock"););
992       {
993 	error= file->ha_rnd_next(sort_form->record[0]);
994 	if (!flag)
995 	{
996 	  my_store_ptr(ref_pos,ref_length,record); // Position to row
997 	  record+= sort_form->s->db_record_offset;
998 	}
999 	else if (!error)
1000 	  file->position(sort_form->record[0]);
1001       }
1002       if (error && error != HA_ERR_RECORD_DELETED)
1003 	break;
1004     }
1005 
1006     if (*killed)
1007     {
1008       DBUG_PRINT("info",("Sort killed by user"));
1009       if (!quick_select)
1010       {
1011         (void) file->extra(HA_EXTRA_NO_CACHE);
1012         file->ha_rnd_end();
1013       }
1014       num_records= HA_POS_ERROR;
1015       goto cleanup;
1016     }
1017     if (error == 0)
1018       param->examined_rows++;
1019     if (!error && !qep_tab->skip_record(thd, &skip_record) && !skip_record)
1020     {
1021       ++(*found_rows);
1022       if (pq)
1023         pq->push(ref_pos);
1024       else
1025       {
1026         if (fs_info->isfull())
1027         {
1028           if (write_keys(param, fs_info, idx, chunk_file, tempfile))
1029           {
1030             num_records= HA_POS_ERROR;
1031             goto cleanup;
1032           }
1033           idx= 0;
1034           indexpos++;
1035         }
1036         if (idx == 0)
1037           fs_info->init_next_record_pointer();
1038         uchar *start_of_rec= fs_info->get_next_record_pointer();
1039 
1040         const uint rec_sz= param->make_sortkey(start_of_rec, ref_pos);
1041         if (packed_addon_fields && rec_sz != param->rec_length)
1042           fs_info->adjust_next_record_pointer(rec_sz);
1043 
1044         idx++;
1045         num_records++;
1046       }
1047     }
1048     /*
1049       Don't try unlocking the row if skip_record reported an error since in
1050       this case the transaction might have been rolled back already.
1051     */
1052     else if (!thd->is_error())
1053       file->unlock_row();
1054     /* It does not make sense to read more keys in case of a fatal error */
1055     if (thd->is_error())
1056       break;
1057   }
1058   if (!quick_select)
1059   {
1060     (void) file->extra(HA_EXTRA_NO_CACHE);	/* End cacheing of records */
1061     if (!next_pos)
1062       file->ha_rnd_end();
1063   }
1064 
1065   if (thd->is_error())
1066   {
1067     num_records= HA_POS_ERROR;
1068     goto cleanup;
1069   }
1070 
1071   /* Signal we should use orignal column read and write maps */
1072   sort_form->column_bitmaps_set(save_read_set, save_write_set);
1073 
1074   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
1075   if (error != HA_ERR_END_OF_FILE)
1076   {
1077     myf my_flags;
1078     switch (error) {
1079     case HA_ERR_LOCK_DEADLOCK:
1080     case HA_ERR_LOCK_WAIT_TIMEOUT:
1081       my_flags= MYF(0);
1082       break;
1083     default:
1084       my_flags= MYF(ME_ERRORLOG);
1085     }
1086     file->print_error(error, my_flags);
1087     num_records= HA_POS_ERROR;
1088     goto cleanup;
1089   }
1090   if (indexpos && idx &&
1091       write_keys(param, fs_info, idx, chunk_file, tempfile))
1092   {
1093     num_records= HA_POS_ERROR;                            // purecov: inspected
1094     goto cleanup;
1095   }
1096 
1097   if (pq)
1098     num_records= pq->num_elements();
1099 
1100 cleanup:
1101   // Clear tmp_set so it can be used elsewhere
1102   bitmap_clear_all(&sort_form->tmp_set);
1103 
1104   DBUG_PRINT("info", ("find_all_keys return %lu", (ulong) num_records));
1105 
1106   DBUG_RETURN(num_records);
1107 } /* find_all_keys */
1108 
1109 
1110 /**
1111   @details
1112   Sort the buffer and write:
1113   -# the sorted sequence to tempfile
1114   -# a Merge_chunk describing the sorted sequence position to chunk_file
1115 
1116   @param param          Sort parameters
1117   @param fs_info        Contains the buffer to be sorted and written.
1118   @param count          Number of records to write.
1119   @param chunk_file     One 'Merge_chunk' struct will be written into this file.
1120                         The Merge_chunk::{file_pos, count} will indicate where
1121                         the sorted data was stored.
1122   @param tempfile       The sorted sequence will be written into this file.
1123 
1124   @returns
1125     0 OK
1126   @returns
1127     1 Error
1128 */
1129 
1130 static int
write_keys(Sort_param * param,Filesort_info * fs_info,uint count,IO_CACHE * chunk_file,IO_CACHE * tempfile)1131 write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
1132            IO_CACHE *chunk_file, IO_CACHE *tempfile)
1133 {
1134   Merge_chunk merge_chunk;
1135   DBUG_ENTER("write_keys");
1136 
1137   fs_info->sort_buffer(param, count);
1138 
1139   if (!my_b_inited(tempfile) &&
1140       open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
1141                        MYF(MY_WME)))
1142     DBUG_RETURN(1);                             /* purecov: inspected */
1143 
1144   // Check that we wont have more chunks than we can possibly keep in memory.
1145   if (my_b_tell(chunk_file) + sizeof(Merge_chunk) > (ulonglong)UINT_MAX)
1146     DBUG_RETURN(1);                             /* purecov: inspected */
1147 
1148   merge_chunk.set_file_position(my_b_tell(tempfile));
1149   if (static_cast<ha_rows>(count) > param->max_rows)
1150   {
1151     // Write only SELECT LIMIT rows to the file
1152     count= static_cast<uint>(param->max_rows);  /* purecov: inspected */
1153   }
1154   merge_chunk.set_rowcount(static_cast<ha_rows>(count));
1155 
1156   const bool packed_addon_fields= param->using_packed_addons();
1157   for (uint ix= 0; ix < count; ++ix)
1158   {
1159     uint rec_length;
1160     uchar *record= fs_info->get_sorted_record(ix);
1161     if (packed_addon_fields)
1162     {
1163       rec_length= param->sort_length +
1164         Addon_fields::read_addon_length(record + param->sort_length);
1165     }
1166     else
1167       rec_length= param->rec_length;
1168 
1169     if (my_b_write(tempfile, record, rec_length))
1170       DBUG_RETURN(1);                           /* purecov: inspected */
1171   }
1172 
1173   if (my_b_write(chunk_file, reinterpret_cast<uchar*>(&merge_chunk),
1174                  sizeof(merge_chunk)))
1175     DBUG_RETURN(1);                             /* purecov: inspected */
1176 
1177   DBUG_RETURN(0);
1178 } /* write_keys */
1179 
1180 
1181 /**
1182   Store length as suffix in high-byte-first order.
1183 */
1184 
store_length(uchar * to,size_t length,uint pack_length)1185 static inline void store_length(uchar *to, size_t length, uint pack_length)
1186 {
1187   switch (pack_length) {
1188   case 1:
1189     *to= (uchar) length;
1190     break;
1191   case 2:
1192     mi_int2store(to, length);
1193     break;
1194   case 3:
1195     mi_int3store(to, length);
1196     break;
1197   default:
1198     mi_int4store(to, length);
1199     break;
1200   }
1201 }
1202 
1203 
1204 #ifdef WORDS_BIGENDIAN
1205 const bool Is_big_endian= true;
1206 #else
1207 const bool Is_big_endian= false;
1208 #endif
copy_native_longlong(uchar * to,size_t to_length,longlong val,bool is_unsigned)1209 void copy_native_longlong(uchar *to, size_t to_length,
1210                           longlong val, bool is_unsigned)
1211 {
1212   copy_integer<Is_big_endian>(to, to_length,
1213                               static_cast<uchar*>(static_cast<void*>(&val)),
1214                               sizeof(longlong),
1215                               is_unsigned);
1216 }
1217 
1218 
1219 /**
1220   Make a sort key for the JSON value in an Item.
1221 
1222   This function is called by Sort_param::make_sortkey(). We don't want
1223   it to be inlined, since that seemed to have a negative impact on
1224   some performance tests.
1225 
1226   @param[in]     item    The item for which to create a sort key.
1227 
1228   @param[out]    to      Pointer into the buffer to which the sort key should
1229                          be written. It will point to where the data portion
1230                          of the key should start. For nullable items, this
1231                          means right after the NULL indicator byte. The NULL
1232                          indicator byte (to[-1]) should be initialized by the
1233                          caller to a value that indicates not NULL.
1234 
1235   @param[in]     length  The length of the sort key, not including the NULL
1236                          indicator byte at the beginning of the sort key for
1237                          nullable items.
1238 
1239   @param[in,out] hash    The hash key of the JSON values in the current row.
1240 */
1241 static void MY_ATTRIBUTE((noinline))
make_json_sort_key(Item * item,uchar * to,size_t length,ulonglong * hash)1242 make_json_sort_key(Item *item, uchar *to, size_t length, ulonglong *hash)
1243 {
1244   assert(!item->maybe_null || to[-1] == 1);
1245 
1246   Json_wrapper wr;
1247   if (item->val_json(&wr))
1248   {
1249     // An error happened when reading the JSON value. Give up.
1250     memset(to, 0, length);
1251     return;
1252   }
1253 
1254   if (item->null_value)
1255   {
1256     /*
1257       Got NULL. The sort key should be all zeros. The caller has
1258       already tentatively set the NULL indicator byte at to[-1] to
1259       not-NULL, so we need to clear that byte too.
1260     */
1261     if (item->maybe_null)
1262     {
1263       memset(to - 1, 0, length + 1);
1264     }
1265     else
1266     {
1267       /* purecov: begin inspected */
1268       DBUG_PRINT("warning",
1269                  ("Got null on something that shouldn't be null"));
1270       DBUG_ABORT();
1271       memset(to, 0, length);
1272       /* purecov: end */
1273     }
1274   }
1275   else
1276   {
1277     wr.make_sort_key(to, length);
1278     *hash= wr.make_hash_key(hash);
1279   }
1280 }
1281 
1282 
make_sortkey(uchar * to,const uchar * ref_pos)1283 uint Sort_param::make_sortkey(uchar *to, const uchar *ref_pos)
1284 {
1285   uchar *orig_to= to;
1286   const st_sort_field *sort_field;
1287   ulonglong hash= 0;
1288 
1289   for (sort_field= local_sortorder.begin() ;
1290        sort_field != local_sortorder.end() ;
1291        sort_field++)
1292   {
1293     bool maybe_null= false;
1294     if (sort_field->field)
1295     {
1296       Field *field= sort_field->field;
1297       assert(sort_field->field_type == field->type());
1298       if (field->maybe_null())
1299       {
1300 	if (field->is_null())
1301 	{
1302 	  if (sort_field->reverse)
1303 	    memset(to, 255, sort_field->length+1);
1304 	  else
1305 	    memset(to, 0, sort_field->length+1);
1306 	  to+= sort_field->length+1;
1307 	  continue;
1308 	}
1309 	else
1310 	  *to++=1;
1311       }
1312       field->make_sort_key(to, sort_field->length);
1313       if (sort_field->field_type == MYSQL_TYPE_JSON)
1314       {
1315         assert(use_hash);
1316         unique_hash(field, &hash);
1317       }
1318     }
1319     else
1320     {						// Item
1321       Item *item=sort_field->item;
1322       maybe_null= item->maybe_null;
1323       assert(sort_field->field_type == item->field_type());
1324       switch (sort_field->result_type) {
1325       case STRING_RESULT:
1326       {
1327         if (maybe_null)
1328           *to++= 1;
1329 
1330         if (sort_field->field_type == MYSQL_TYPE_JSON)
1331         {
1332           assert(use_hash);
1333           /*
1334             We don't want the code for creating JSON sort keys to be
1335             inlined here, as increasing the size of the surrounding
1336             "else" branch seems to have a negative impact on some
1337             performance tests, even if those tests never execute the
1338             "else" branch.
1339           */
1340           make_json_sort_key(item, to, sort_field->length, &hash);
1341           break;
1342         }
1343 
1344         const CHARSET_INFO *cs=item->collation.collation;
1345         char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
1346 
1347         /* All item->str() to use some extra byte for end null.. */
1348         String tmp((char*) to,sort_field->length+4,cs);
1349         String *res= item->str_result(&tmp);
1350         if (!res)
1351         {
1352           if (maybe_null)
1353             memset(to-1, 0, sort_field->length+1);
1354           else
1355           {
1356             /* purecov: begin deadcode */
1357             /*
1358               This should only happen during extreme conditions if we run out
1359               of memory or have an item marked not null when it can be null.
1360               This code is here mainly to avoid a hard crash in this case.
1361             */
1362             assert(0);
1363             DBUG_PRINT("warning",
1364                        ("Got null on something that shouldn't be null"));
1365             memset(to, 0, sort_field->length);	// Avoid crash
1366             /* purecov: end */
1367           }
1368           break;
1369         }
1370         size_t length= res->length();
1371         if (sort_field->need_strxnfrm)
1372         {
1373           char *from=(char*) res->ptr();
1374           size_t tmp_length MY_ATTRIBUTE((unused));
1375           if ((uchar*) from == to)
1376           {
1377             assert(sort_field->length >= length);
1378             set_if_smaller(length,sort_field->length);
1379             memcpy(tmp_buffer, from, length);
1380             from= tmp_buffer;
1381           }
1382           tmp_length=
1383             cs->coll->strnxfrm(cs, to, sort_field->length,
1384                                item->max_char_length(),
1385                                (uchar*) from, length,
1386                                MY_STRXFRM_PAD_WITH_SPACE |
1387                                MY_STRXFRM_PAD_TO_MAXLEN);
1388           assert(tmp_length == sort_field->length);
1389         }
1390         else
1391         {
1392           size_t diff;
1393           uint sort_field_length= sort_field->length -
1394             sort_field->suffix_length;
1395           if (sort_field_length < length)
1396           {
1397             diff= 0;
1398             length= sort_field_length;
1399           }
1400           else
1401             diff= sort_field_length - length;
1402           if (sort_field->suffix_length)
1403           {
1404             /* Store length last in result_string */
1405             store_length(to + sort_field_length, length,
1406                          sort_field->suffix_length);
1407           }
1408 
1409           my_strnxfrm(cs, to,length,(const uchar*)res->ptr(),length);
1410           cs->cset->fill(cs, (char *)to+length,diff,fill_char);
1411         }
1412         break;
1413       }
1414       case INT_RESULT:
1415 	{
1416           longlong value= item->field_type() == MYSQL_TYPE_TIME ?
1417                           item->val_time_temporal_result() :
1418                           item->is_temporal_with_date() ?
1419                           item->val_date_temporal_result() :
1420                           item->val_int_result();
1421           if (maybe_null)
1422           {
1423 	    *to++=1;				/* purecov: inspected */
1424             if (item->null_value)
1425             {
1426               if (maybe_null)
1427                 memset(to-1, 0, sort_field->length+1);
1428               else
1429               {
1430                 DBUG_PRINT("warning",
1431                            ("Got null on something that shouldn't be null"));
1432                 memset(to, 0, sort_field->length);
1433               }
1434               break;
1435             }
1436           }
1437           copy_native_longlong(to, sort_field->length,
1438                                value, item->unsigned_flag);
1439 	  break;
1440 	}
1441       case DECIMAL_RESULT:
1442         {
1443           my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
1444           if (maybe_null)
1445           {
1446             if (item->null_value)
1447             {
1448               memset(to, 0, sort_field->length+1);
1449               to++;
1450               break;
1451             }
1452             *to++=1;
1453           }
1454           if (sort_field->length < DECIMAL_MAX_FIELD_SIZE)
1455           {
1456             uchar buf[DECIMAL_MAX_FIELD_SIZE];
1457             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
1458                               item->max_length - (item->decimals ? 1:0),
1459                               item->decimals);
1460             memcpy(to, buf, sort_field->length);
1461           }
1462           else
1463           {
1464             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
1465                               item->max_length - (item->decimals ? 1:0),
1466                               item->decimals);
1467           }
1468          break;
1469         }
1470       case REAL_RESULT:
1471 	{
1472           double value= item->val_result();
1473 	  if (maybe_null)
1474           {
1475             if (item->null_value)
1476             {
1477               memset(to, 0, sort_field->length+1);
1478               to++;
1479               break;
1480             }
1481 	    *to++=1;
1482           }
1483           if (sort_field->length < sizeof(double))
1484           {
1485             uchar buf[sizeof(double)];
1486             change_double_for_sort(value, buf);
1487             memcpy(to, buf, sort_field->length);
1488           }
1489           else
1490           {
1491             change_double_for_sort(value, to);
1492           }
1493 	  break;
1494 	}
1495       case ROW_RESULT:
1496       default:
1497 	// This case should never be choosen
1498 	assert(0);
1499 	break;
1500       }
1501     }
1502     if (sort_field->reverse)
1503     {							/* Revers key */
1504       if (maybe_null)
1505         to[-1]= ~to[-1];
1506       uint length= sort_field->length;
1507       while (length--)
1508       {
1509 	*to = (uchar) (~ *to);
1510 	to++;
1511       }
1512     }
1513     else
1514       to+= sort_field->length;
1515   }
1516 
1517   if (use_hash)
1518   {
1519     int8store(to, hash);
1520     to+= 8;
1521   }
1522 
1523   if (using_addon_fields())
1524   {
1525     /*
1526       Save field values appended to sorted fields.
1527       First null bit indicators are appended then field values follow.
1528     */
1529     uchar *nulls= to;
1530     uchar *p_len= to;
1531 
1532     Addon_fields_array::const_iterator addonf= addon_fields->begin();
1533     uint32 res_len= addonf->offset;
1534     const bool packed_addon_fields= addon_fields->using_packed_addons();
1535     memset(nulls, 0, addonf->offset);
1536     to+= addonf->offset;
1537     for ( ; addonf != addon_fields->end(); ++addonf)
1538     {
1539       Field *field= addonf->field;
1540       if (addonf->null_bit && field->is_null())
1541       {
1542         nulls[addonf->null_offset]|= addonf->null_bit;
1543         if (!packed_addon_fields)
1544           to+= addonf->max_length;
1545       }
1546       else
1547       {
1548         uchar *ptr= field->pack(to, field->ptr);
1549         int sz= static_cast<int>(ptr - to);
1550         res_len += sz;
1551         if (packed_addon_fields)
1552           to+= sz;
1553         else
1554           to+= addonf->max_length;
1555       }
1556     }
1557     if (packed_addon_fields)
1558       Addon_fields::store_addon_length(p_len, res_len);
1559     DBUG_PRINT("info", ("make_sortkey %p %u", orig_to, res_len));
1560   }
1561   else
1562   {
1563     /* Save filepos last */
1564     memcpy(to, ref_pos, ref_length);
1565     to+= ref_length;
1566   }
1567   return to - orig_to;
1568 }
1569 
1570 
1571 /*
1572   Register fields used by sorting in the sorted table's read set
1573 */
1574 
register_used_fields(Sort_param * param)1575 static void register_used_fields(Sort_param *param)
1576 {
1577   Bounds_checked_array<st_sort_field>::const_iterator sort_field;
1578   TABLE *table=param->sort_form;
1579   MY_BITMAP *bitmap= table->read_set;
1580   Mark_field mf(table, MARK_COLUMNS_TEMP);
1581 
1582   for (sort_field= param->local_sortorder.begin() ;
1583        sort_field != param->local_sortorder.end() ;
1584        sort_field++)
1585   {
1586     Field *field;
1587     if ((field= sort_field->field))
1588     {
1589       if (field->table == table)
1590       {
1591         bitmap_set_bit(bitmap, field->field_index);
1592         if (field->is_virtual_gcol())
1593           table->mark_gcol_in_maps(field);
1594       }
1595     }
1596     else
1597     {						// Item
1598       sort_field->item->walk(&Item::mark_field_in_map, walk_subquery,
1599                              (uchar *)&mf);
1600     }
1601   }
1602 
1603   if (param->using_addon_fields())
1604   {
1605     Addon_fields_array::const_iterator addonf= param->addon_fields->begin();
1606     for ( ; addonf != param->addon_fields->end(); ++addonf)
1607     {
1608       Field *field= addonf->field;
1609       bitmap_set_bit(bitmap, field->field_index);
1610       if (field->is_virtual_gcol())
1611           table->mark_gcol_in_maps(field);
1612     }
1613   }
1614   else
1615   {
1616     /* Save filepos last */
1617     table->prepare_for_position();
1618   }
1619 }
1620 
1621 /**
1622   This function is used only if the entire result set fits in memory.
1623 
1624   For addon fields, we keep the result in the filesort buffer.
1625   This saves us a lot of memcpy calls.
1626 
1627   For row references, we copy the final sorted result into a buffer,
1628   but we do not copy the actual sort-keys, as they are no longer needed.
1629   We could have kept the result in the sort buffere here as well,
1630   but the new buffer - containing only row references - is probably a
1631   lot smaller.
1632 
1633   The result data will be unpacked by rr_unpack_from_buffer()
1634   or rr_from_pointers()
1635 
1636   @param [in]     param      Sort parameters.
1637   @param          count      Number of records
1638   @param [in,out] table_sort Information used by rr_unpack_from_buffer() /
1639                              rr_from_pointers()
1640  */
save_index(Sort_param * param,uint count,Filesort_info * table_sort)1641 static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
1642 {
1643   uchar *to;
1644   DBUG_ENTER("save_index");
1645 
1646   table_sort->sort_buffer(param, count);
1647 
1648   if (param->using_addon_fields())
1649   {
1650     table_sort->sorted_result_in_fsbuf= true;
1651     table_sort->set_sort_length(param->sort_length);
1652     DBUG_RETURN(0);
1653   }
1654 
1655   table_sort->sorted_result_in_fsbuf= false;
1656   const size_t buf_size= param->res_length * count;
1657 
1658   assert(table_sort->sorted_result == NULL);
1659   if (!(to= table_sort->sorted_result=
1660         static_cast<uchar*>(my_malloc(key_memory_Filesort_info_record_pointers,
1661                                       buf_size, MYF(MY_WME)))))
1662     DBUG_RETURN(1);                 /* purecov: inspected */
1663   table_sort->sorted_result_end=
1664     table_sort->sorted_result + buf_size;
1665 
1666   uint res_length= param->res_length;
1667   uint offset= param->rec_length - res_length;
1668   for (uint ix= 0; ix < count; ++ix)
1669   {
1670     uchar *record= table_sort->get_sorted_record(ix);
1671     memcpy(to, record + offset, res_length);
1672     to+= res_length;
1673   }
1674   DBUG_RETURN(0);
1675 }
1676 
1677 
1678 /**
1679   Test whether priority queue is worth using to get top elements of an
1680   ordered result set. If it is, then allocates buffer for required amount of
1681   records
1682 
1683   @param trace            Current trace context.
1684   @param param            Sort parameters.
1685   @param filesort_info    Filesort information.
1686   @param table            Table to sort.
1687   @param num_rows         Estimate of number of rows in source record set.
1688   @param memory_available Memory available for sorting.
1689   @param keep_addon_fields Do not try to strip off addon fields.
1690 
1691   DESCRIPTION
1692     Given a query like this:
1693       SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
1694     This function tests whether a priority queue should be used to keep
1695     the result. Necessary conditions are:
1696     - estimate that it is actually cheaper than merge-sort
1697     - enough memory to store the <max_rows> records.
1698 
1699     If we don't have space for <max_rows> records, but we *do* have
1700     space for <max_rows> keys, we may rewrite 'table' to sort with
1701     references to records instead of additional data.
1702     (again, based on estimates that it will actually be cheaper).
1703 
1704    @returns
1705     true  - if it's ok to use PQ
1706     false - PQ will be slower than merge-sort, or there is not enough memory.
1707 */
1708 
check_if_pq_applicable(Opt_trace_context * trace,Sort_param * param,Filesort_info * filesort_info,TABLE * table,ha_rows num_rows,ulong memory_available,bool keep_addon_fields)1709 bool check_if_pq_applicable(Opt_trace_context *trace,
1710                             Sort_param *param,
1711                             Filesort_info *filesort_info,
1712                             TABLE *table, ha_rows num_rows,
1713                             ulong memory_available,
1714                             bool keep_addon_fields)
1715 {
1716   DBUG_ENTER("check_if_pq_applicable");
1717 
1718   /*
1719     How much Priority Queue sort is slower than qsort.
1720     Measurements (see unit test) indicate that PQ is roughly 3 times slower.
1721   */
1722   const double PQ_slowness= 3.0;
1723 
1724   Opt_trace_object trace_filesort(trace,
1725                                   "filesort_priority_queue_optimization");
1726   if (param->max_rows == HA_POS_ERROR)
1727   {
1728     trace_filesort
1729       .add("usable", false)
1730       .add_alnum("cause", "not applicable (no LIMIT)");
1731     DBUG_RETURN(false);
1732   }
1733 
1734   trace_filesort
1735     .add("limit", param->max_rows)
1736     .add("rows_estimate", num_rows)
1737     .add("row_size", param->rec_length)
1738     .add("memory_available", memory_available);
1739 
1740   if (param->max_rows + 2 >= UINT_MAX)
1741   {
1742     trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
1743     DBUG_RETURN(false);
1744   }
1745 
1746   ulong num_available_keys=
1747     memory_available / (param->rec_length + sizeof(char*));
1748   // We need 1 extra record in the buffer, when using PQ.
1749   param->max_keys_per_buffer= (uint) param->max_rows + 1;
1750 
1751   if (num_rows < num_available_keys)
1752   {
1753     // The whole source set fits into memory.
1754     if (param->max_rows < num_rows/PQ_slowness )
1755     {
1756       filesort_info->
1757         alloc_sort_buffer(param->max_keys_per_buffer, param->rec_length);
1758       trace_filesort.add("chosen", true);
1759       DBUG_RETURN(filesort_info->sort_buffer_size() > 0);
1760     }
1761     else
1762     {
1763       // PQ will be slower.
1764       trace_filesort.add("chosen", false)
1765         .add_alnum("cause", "quicksort_is_cheaper");
1766       DBUG_RETURN(false);
1767     }
1768   }
1769 
1770   // Do we have space for LIMIT rows in memory?
1771   if (param->max_keys_per_buffer < num_available_keys)
1772   {
1773     filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1774                                      param->rec_length);
1775     trace_filesort.add("chosen", true);
1776     DBUG_RETURN(filesort_info->sort_buffer_size() > 0);
1777   }
1778 
1779   // Try to strip off addon fields.
1780   if (!keep_addon_fields && param->using_addon_fields())
1781   {
1782     const ulong row_length=
1783       param->sort_length + param->ref_length + sizeof(char*);
1784     num_available_keys= memory_available / row_length;
1785 
1786     Opt_trace_object trace_addon(trace, "strip_additional_fields");
1787     trace_addon.add("row_size", row_length);
1788 
1789     // Can we fit all the keys in memory?
1790     if (param->max_keys_per_buffer >= num_available_keys)
1791     {
1792       trace_addon.add("chosen", false).add_alnum("cause", "not_enough_space");
1793     }
1794     else
1795     {
1796       const Cost_model_table *cost_model= table->cost_model();
1797       const double sort_merge_cost=
1798         get_merge_many_buffs_cost_fast(num_rows,
1799                                        num_available_keys,
1800                                        row_length, cost_model);
1801       trace_addon.add("sort_merge_cost", sort_merge_cost);
1802       /*
1803         PQ has cost:
1804         (insert + qsort) * log(queue size) * key_compare_cost() +
1805         cost of file lookup afterwards.
1806         The lookup cost is a bit pessimistic: we take table scan cost and
1807         assume that on average we find the row after scanning half of the file.
1808         A better estimate would be lookup cost, but note that we are doing
1809         random lookups here, rather than sequential scan.
1810       */
1811       const double pq_cpu_cost=
1812         (PQ_slowness * num_rows + param->max_keys_per_buffer) *
1813         cost_model->key_compare_cost(log((double) param->max_keys_per_buffer));
1814       const Cost_estimate scan_cost= table->file->table_scan_cost();
1815       const double pq_io_cost=
1816         param->max_rows * scan_cost.total_cost() / 2.0;
1817       const double pq_cost= pq_cpu_cost + pq_io_cost;
1818       trace_addon.add("priority_queue_cost", pq_cost);
1819 
1820       if (sort_merge_cost < pq_cost)
1821       {
1822         trace_addon.add("chosen", false);
1823         DBUG_RETURN(false);
1824       }
1825 
1826       trace_addon.add("chosen", true);
1827       filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1828                                        param->sort_length + param->ref_length);
1829       if (filesort_info->sort_buffer_size() > 0)
1830       {
1831         // Make attached data to be references instead of fields.
1832         filesort_info->addon_fields= NULL;
1833         param->addon_fields= NULL;
1834 
1835         param->res_length= param->ref_length;
1836         param->sort_length+= param->ref_length;
1837         param->rec_length= param->sort_length;
1838 
1839         DBUG_RETURN(true);
1840       }
1841     }
1842   }
1843   DBUG_RETURN(false);
1844 }
1845 
1846 
1847 /**
1848   Merges buffers to make < MERGEBUFF2 buffers.
1849 
1850   @param param        Sort parameters.
1851   @param sort_buffer  The main memory buffer.
1852   @param chunk_array  Array of chunk descriptors to merge.
1853   @param p_num_chunks [out]
1854                       output: the number of chunks left in the output file.
1855   @param t_file       Where to store the result.
1856 */
merge_many_buff(Sort_param * param,Sort_buffer sort_buffer,Merge_chunk_array chunk_array,size_t * p_num_chunks,IO_CACHE * t_file)1857 int merge_many_buff(Sort_param *param, Sort_buffer sort_buffer,
1858                     Merge_chunk_array chunk_array,
1859                     size_t *p_num_chunks, IO_CACHE *t_file)
1860 {
1861   uint i;
1862   IO_CACHE t_file2,*from_file,*to_file,*temp;
1863   DBUG_ENTER("merge_many_buff");
1864 
1865   size_t num_chunks= chunk_array.size();
1866   *p_num_chunks= num_chunks;
1867 
1868   if (num_chunks <= MERGEBUFF2)
1869     DBUG_RETURN(0);				/* purecov: inspected */
1870   if (flush_io_cache(t_file) ||
1871       open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE,
1872                        MYF(MY_WME)))
1873     DBUG_RETURN(1);				/* purecov: inspected */
1874 
1875   from_file= t_file ; to_file= &t_file2;
1876   while (num_chunks > MERGEBUFF2)
1877   {
1878     if (reinit_io_cache(from_file,READ_CACHE,0L,0,0))
1879       goto cleanup;
1880     if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0))
1881       goto cleanup;
1882     Merge_chunk *last_chunk= chunk_array.begin();;
1883     for (i=0 ; i < num_chunks - MERGEBUFF * 3 / 2 ; i+= MERGEBUFF)
1884     {
1885       if (merge_buffers(param,                  // param
1886                         from_file,              // from_file
1887                         to_file,                // to_file
1888                         sort_buffer,            // sort_buffer
1889                         last_chunk++,           // last_chunk [out]
1890                         Merge_chunk_array(&chunk_array[i], MERGEBUFF),
1891                         0))                     // flag
1892       goto cleanup;
1893     }
1894     if (merge_buffers(param,
1895                       from_file,
1896                       to_file,
1897                       sort_buffer,
1898                       last_chunk++,
1899                       Merge_chunk_array(&chunk_array[i], num_chunks - i),
1900                       0))
1901       break;					/* purecov: inspected */
1902     if (flush_io_cache(to_file))
1903       break;					/* purecov: inspected */
1904     temp=from_file; from_file=to_file; to_file=temp;
1905     setup_io_cache(from_file);
1906     setup_io_cache(to_file);
1907     num_chunks= last_chunk - chunk_array.begin();
1908   }
1909 cleanup:
1910   close_cached_file(to_file);			// This holds old result
1911   if (to_file == t_file)
1912   {
1913     *t_file=t_file2;				// Copy result file
1914     setup_io_cache(t_file);
1915   }
1916 
1917   *p_num_chunks= num_chunks;
1918   DBUG_RETURN(num_chunks > MERGEBUFF2);  /* Return 1 if interrupted */
1919 } /* merge_many_buff */
1920 
1921 
1922 /**
1923   Read data to buffer.
1924 
1925   @returns
1926     (uint)-1 if something goes wrong
1927 */
1928 
read_to_buffer(IO_CACHE * fromfile,Merge_chunk * merge_chunk,Sort_param * param)1929 uint read_to_buffer(IO_CACHE *fromfile,
1930                     Merge_chunk *merge_chunk,
1931                     Sort_param *param)
1932 {
1933   DBUG_ENTER("read_to_buffer");
1934   uint rec_length= param->rec_length;
1935   ha_rows count;
1936 
1937   if ((count= min(merge_chunk->max_keys(), merge_chunk->rowcount())))
1938   {
1939     size_t bytes_to_read;
1940     if (param->using_packed_addons())
1941     {
1942       count= merge_chunk->rowcount();
1943       bytes_to_read=
1944         min(merge_chunk->buffer_size(),
1945             static_cast<size_t>(fromfile->end_of_file -
1946                                 merge_chunk->file_position()));
1947     }
1948     else
1949       bytes_to_read= rec_length * static_cast<size_t>(count);
1950 
1951     DBUG_PRINT("info", ("read_to_buffer %p at file_pos %llu bytes %llu",
1952                         merge_chunk,
1953                         static_cast<ulonglong>(merge_chunk->file_position()),
1954                         static_cast<ulonglong>(bytes_to_read)));
1955     if (my_b_pread(fromfile, merge_chunk->buffer_start(), bytes_to_read,
1956                    merge_chunk->file_position()))
1957       DBUG_RETURN((uint) -1);			/* purecov: inspected */
1958 
1959     size_t num_bytes_read;
1960     if (param->using_packed_addons())
1961     {
1962       /*
1963         The last record read is most likely not complete here.
1964         We need to loop through all the records, reading the length fields,
1965         and then "chop off" the final incomplete record.
1966        */
1967       uchar *record= merge_chunk->buffer_start();
1968       uint ix= 0;
1969       for (; ix < count; ++ix)
1970       {
1971         if (record + param->sort_length + Addon_fields::size_of_length_field >=
1972             merge_chunk->buffer_end())
1973           break;                                // Incomplete record.
1974         uchar *plen= record + param->sort_length;
1975         uint res_length= Addon_fields::read_addon_length(plen);
1976         if (plen + res_length >= merge_chunk->buffer_end())
1977           break;                                // Incomplete record.
1978         assert(res_length > 0);
1979         record+= param->sort_length;
1980         record+= res_length;
1981       }
1982       assert(ix > 0);
1983       count= ix;
1984       num_bytes_read= record - merge_chunk->buffer_start();
1985       DBUG_PRINT("info", ("read %llu bytes of complete records",
1986                           static_cast<ulonglong>(bytes_to_read)));
1987     }
1988     else
1989       num_bytes_read= bytes_to_read;
1990 
1991     merge_chunk->init_current_key();
1992     merge_chunk->advance_file_position(num_bytes_read);
1993     merge_chunk->decrement_rowcount(count);
1994     merge_chunk->set_mem_count(count);
1995     DBUG_RETURN(num_bytes_read);
1996   }
1997 
1998   DBUG_RETURN (0);
1999 } /* read_to_buffer */
2000 
2001 
2002 namespace {
2003 
2004 /**
2005   This struct is used for merging chunks for filesort() and for Unique::get().
2006   For filesort() we use memcmp to compare rows.
2007   For Unique::get() we use the provided compare function.
2008  */
2009 struct Merge_chunk_less
2010 {
2011   size_t m_len;
2012   Sort_param::chunk_compare_fun m_fun;
2013   Merge_chunk_compare_context *m_arg;
2014 
2015   // CTOR for filesort()
Merge_chunk_less__anond7565cf50211::Merge_chunk_less2016   explicit Merge_chunk_less(size_t len)
2017     : m_len(len), m_fun(NULL), m_arg(NULL)
2018   {}
2019 
2020   // CTOR for Unique::get()
Merge_chunk_less__anond7565cf50211::Merge_chunk_less2021   Merge_chunk_less(Sort_param::chunk_compare_fun fun,
2022                    Merge_chunk_compare_context *arg)
2023     : m_len(0), m_fun(fun), m_arg(arg)
2024   {}
2025 
operator ()__anond7565cf50211::Merge_chunk_less2026   bool operator()(Merge_chunk *a, Merge_chunk *b)
2027   {
2028     uchar *key1= a->current_key();
2029     uchar *key2= b->current_key();
2030     if (m_len)
2031       return memcmp(key1, key2, m_len) > 0;
2032 
2033     if (m_fun)
2034       return (*m_fun)(m_arg, key1, key2) > 0;
2035 
2036     // We can actually have zero-length sort key for filesort().
2037     return false;
2038   }
2039 };
2040 
2041 } // namespace
2042 
2043 
2044 /**
2045   Merge buffers to one buffer.
2046 
2047   @param param          Sort parameter
2048   @param from_file      File with source data (Merge_chunks point to this file)
2049   @param to_file        File to write the sorted result data.
2050   @param sort_buffer    Buffer for data to store up to MERGEBUFF2 sort keys.
2051   @param [out] last_chunk Store here Merge_chunk describing data written to
2052                         to_file.
2053   @param chunk_array    Array of chunks to merge.
2054   @param flag
2055 
2056   @returns
2057     0      OK
2058   @returns
2059     other  error
2060 */
2061 
merge_buffers(Sort_param * param,IO_CACHE * from_file,IO_CACHE * to_file,Sort_buffer sort_buffer,Merge_chunk * last_chunk,Merge_chunk_array chunk_array,int flag)2062 int merge_buffers(Sort_param *param, IO_CACHE *from_file,
2063                   IO_CACHE *to_file, Sort_buffer sort_buffer,
2064                   Merge_chunk *last_chunk,
2065                   Merge_chunk_array chunk_array,
2066                   int flag)
2067 {
2068   int error;
2069   uint rec_length,res_length;
2070   size_t sort_length;
2071   ha_rows maxcount;
2072   ha_rows max_rows,org_max_rows;
2073   my_off_t to_start_filepos;
2074   uchar *strpos;
2075   Merge_chunk *merge_chunk;
2076   Sort_param::chunk_compare_fun cmp;
2077   Merge_chunk_compare_context *first_cmp_arg;
2078   THD * const thd = current_thd;
2079   volatile THD::killed_state *killed= &thd->killed;
2080   THD::killed_state not_killable;
2081   DBUG_ENTER("merge_buffers");
2082 
2083   thd->inc_status_sort_merge_passes();
2084   thd->query_plan_fsort_passes++;
2085   if (param->not_killable)
2086   {
2087     killed= &not_killable;
2088     not_killable= THD::NOT_KILLED;
2089   }
2090 
2091   error=0;
2092   rec_length= param->rec_length;
2093   res_length= param->res_length;
2094   sort_length= param->sort_length;
2095   const uint offset= (flag == 0) ? 0 : (rec_length - res_length);
2096   maxcount= (ulong) (param->max_keys_per_buffer / chunk_array.size());
2097   to_start_filepos= my_b_tell(to_file);
2098   strpos= sort_buffer.array();
2099   org_max_rows=max_rows= param->max_rows;
2100 
2101   /* The following will fire if there is not enough space in sort_buffer */
2102   assert(maxcount!=0);
2103 
2104   const bool doing_unique= (param->unique_buff != NULL);
2105   if (doing_unique)
2106   {
2107     cmp= param->compare;
2108     first_cmp_arg= &param->cmp_context;
2109   }
2110   else
2111   {
2112     cmp= NULL;
2113     first_cmp_arg= NULL;
2114   }
2115 
2116   Merge_chunk_less mcl=
2117     doing_unique ?
2118     Merge_chunk_less(cmp, first_cmp_arg) :
2119     Merge_chunk_less(sort_length);
2120   Priority_queue<Merge_chunk*,
2121                  std::vector<Merge_chunk*, Malloc_allocator<Merge_chunk*> >,
2122                  Merge_chunk_less>
2123     queue(mcl,
2124           Malloc_allocator<Merge_chunk*>(key_memory_Filesort_info_merge));
2125 
2126   if (queue.reserve(chunk_array.size()))
2127     DBUG_RETURN(1);
2128 
2129   for (merge_chunk= chunk_array.begin() ;
2130        merge_chunk != chunk_array.end() ; merge_chunk++)
2131   {
2132     merge_chunk->set_buffer(strpos,
2133                             strpos + (sort_buffer.size()/(chunk_array.size())));
2134     merge_chunk->set_max_keys(maxcount);
2135     strpos+=
2136       (uint) (error= (int)read_to_buffer(from_file, merge_chunk, param));
2137     merge_chunk->set_buffer_end(strpos);
2138     if (error == -1)
2139       DBUG_RETURN(error);     /* purecov: inspected */
2140     // If less data in buffers than expected
2141     merge_chunk->set_max_keys(merge_chunk->mem_count());
2142     (void) queue.push(merge_chunk);
2143   }
2144 
2145   if (doing_unique)
2146   {
2147     assert(!param->using_packed_addons());
2148     /*
2149        Called by Unique::get()
2150        Copy the first argument to param->unique_buff for unique removal.
2151        Store it also in 'to_file'.
2152     */
2153     merge_chunk= queue.top();
2154     memcpy(param->unique_buff, merge_chunk->current_key(), rec_length);
2155     if (my_b_write(to_file, merge_chunk->current_key(), rec_length))
2156     {
2157       DBUG_RETURN(1);                         /* purecov: inspected */
2158     }
2159     merge_chunk->advance_current_key(rec_length);
2160     merge_chunk->decrement_mem_count();
2161     if (!--max_rows)
2162     {
2163       error= 0;                                       /* purecov: inspected */
2164       goto end;                                       /* purecov: inspected */
2165     }
2166     // The top chunk may actually contain only a single element
2167     if (merge_chunk->mem_count() == 0)
2168     {
2169       if (!(error= (int) read_to_buffer(from_file, merge_chunk, param)))
2170       {
2171         queue.pop();
2172         reuse_freed_buff(merge_chunk, &queue);
2173       }
2174       else if (error == -1)
2175         DBUG_RETURN(error);
2176     }
2177     queue.update_top();                   // Top element has been used
2178   }
2179 
2180   while (queue.size() > 1)
2181   {
2182     if (*killed)
2183     {
2184       DBUG_RETURN(1);                         /* purecov: inspected */
2185     }
2186     for (;;)
2187     {
2188       merge_chunk= queue.top();
2189       if (doing_unique)                         // Remove duplicates
2190       {
2191         assert(!param->using_packed_addons());
2192         uchar *current_key= merge_chunk->current_key();
2193         if (!(*cmp)(first_cmp_arg, param->unique_buff, current_key))
2194           goto skip_duplicate;
2195         memcpy(param->unique_buff, merge_chunk->current_key(), rec_length);
2196       }
2197       {
2198         param->get_rec_and_res_len(merge_chunk->current_key(),
2199                                    &rec_length, &res_length);
2200         const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
2201 
2202         DBUG_PRINT("info", ("write record at %llu len %u",
2203                             my_b_tell(to_file), bytes_to_write));
2204         if (my_b_write(to_file,
2205                        merge_chunk->current_key() + offset, bytes_to_write))
2206         {
2207           DBUG_RETURN(1);                     /* purecov: inspected */
2208         }
2209         if (!--max_rows)
2210         {
2211           error= 0;                             /* purecov: inspected */
2212           goto end;                             /* purecov: inspected */
2213         }
2214       }
2215 
2216     skip_duplicate:
2217       merge_chunk->advance_current_key(rec_length);
2218       merge_chunk->decrement_mem_count();
2219       if (0 == merge_chunk->mem_count())
2220       {
2221         if (!(error= (int) read_to_buffer(from_file, merge_chunk, param)))
2222         {
2223           queue.pop();
2224           reuse_freed_buff(merge_chunk, &queue);
2225           break;                        /* One buffer have been removed */
2226         }
2227         else if (error == -1)
2228           DBUG_RETURN(error);                 /* purecov: inspected */
2229       }
2230       /*
2231         The Merge_chunk at the queue's top had one of its keys consumed, thus
2232         it may now rank differently in the comparison order of the queue, so:
2233       */
2234       queue.update_top();
2235     }
2236   }
2237   merge_chunk= queue.top();
2238   merge_chunk->set_buffer(sort_buffer.array(),
2239                           sort_buffer.array() + sort_buffer.size());
2240   merge_chunk->set_max_keys(param->max_keys_per_buffer);
2241 
2242   /*
2243     As we know all entries in the buffer are unique, we only have to
2244     check if the first one is the same as the last one we wrote
2245   */
2246   if (doing_unique)
2247   {
2248     uchar *current_key= merge_chunk->current_key();
2249     if (!(*cmp)(first_cmp_arg, param->unique_buff, current_key))
2250     {
2251       merge_chunk->advance_current_key(rec_length); // Remove duplicate
2252       merge_chunk->decrement_mem_count();
2253     }
2254   }
2255 
2256   do
2257   {
2258     if (merge_chunk->mem_count() > max_rows)
2259     {
2260       merge_chunk->set_mem_count(max_rows); /* Don't write too many records */
2261       merge_chunk->set_rowcount(0);         /* Don't read more */
2262     }
2263     max_rows-= merge_chunk->mem_count();
2264 
2265     for (uint ix= 0; ix < merge_chunk->mem_count(); ++ix)
2266     {
2267       param->get_rec_and_res_len(merge_chunk->current_key(),
2268                                  &rec_length, &res_length);
2269       const uint bytes_to_write= (flag == 0) ? rec_length : res_length;
2270       if (my_b_write(to_file,
2271                      merge_chunk->current_key() + offset,
2272                      bytes_to_write))
2273       {
2274         DBUG_RETURN(1);                       /* purecov: inspected */
2275       }
2276       merge_chunk->advance_current_key(rec_length);
2277     }
2278   }
2279   while ((error=(int) read_to_buffer(from_file, merge_chunk, param))
2280          != -1 && error != 0);
2281 
2282 end:
2283   last_chunk->set_rowcount(min(org_max_rows-max_rows, param->max_rows));
2284   last_chunk->set_file_position(to_start_filepos);
2285 
2286   DBUG_RETURN(error);
2287 } /* merge_buffers */
2288 
2289 
2290 	/* Do a merge to output-file (save only positions) */
2291 
merge_index(Sort_param * param,Sort_buffer sort_buffer,Merge_chunk_array chunk_array,IO_CACHE * tempfile,IO_CACHE * outfile)2292 static int merge_index(Sort_param *param, Sort_buffer sort_buffer,
2293                        Merge_chunk_array chunk_array,
2294                        IO_CACHE *tempfile, IO_CACHE *outfile)
2295 {
2296   DBUG_ENTER("merge_index");
2297   if (merge_buffers(param,                    // param
2298                     tempfile,                 // from_file
2299                     outfile,                  // to_file
2300                     sort_buffer,              // sort_buffer
2301                     chunk_array.begin(),      // last_chunk [out]
2302                     chunk_array,
2303                     1))                       // flag
2304     DBUG_RETURN(1);                           /* purecov: inspected */
2305   DBUG_RETURN(0);
2306 } /* merge_index */
2307 
2308 
suffix_length(ulong string_length)2309 static uint suffix_length(ulong string_length)
2310 {
2311   if (string_length < 256)
2312     return 1;
2313   if (string_length < 256L*256L)
2314     return 2;
2315   if (string_length < 256L*256L*256L)
2316     return 3;
2317   return 4;                                     // Can't sort longer than 4G
2318 }
2319 
2320 
2321 
2322 /**
2323   Calculate length of sort key.
2324 
2325   @param thd			  Thread handler
2326   @param sortorder		  Order of items to sort
2327   @param s_length	          Number of items to sort
2328   @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
2329                                  (In which case we have to use strxnfrm())
2330   @param[out] use_hash           Set to true when make_sortkey should
2331                                  calculate and append hash for each sort key
2332                                  @see Sort_param::make_sortkey()
2333 
2334   @note
2335     sortorder->length is updated for each sort item.
2336   @n
2337     sortorder->need_strxnfrm is set 1 if we have to use strxnfrm
2338 
2339   @return
2340     Total length of sort buffer in bytes
2341 */
2342 
2343 uint
sortlength(THD * thd,st_sort_field * sortorder,uint s_length,bool * multi_byte_charset,bool * use_hash)2344 sortlength(THD *thd, st_sort_field *sortorder, uint s_length,
2345            bool *multi_byte_charset, bool *use_hash)
2346 {
2347   uint total_length= 0;
2348   const CHARSET_INFO *cs;
2349   *multi_byte_charset= false;
2350   *use_hash= false;
2351 
2352   for (; s_length-- ; sortorder++)
2353   {
2354     sortorder->need_strxnfrm= 0;
2355     sortorder->suffix_length= 0;
2356     if (sortorder->field)
2357     {
2358       cs= sortorder->field->sort_charset();
2359       sortorder->length= sortorder->field->sort_length();
2360 
2361       if (use_strnxfrm((cs=sortorder->field->sort_charset())))
2362       {
2363         sortorder->need_strxnfrm= 1;
2364         *multi_byte_charset= 1;
2365         sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
2366       }
2367       if (sortorder->field->maybe_null())
2368         total_length++;                       // Place for NULL marker
2369 
2370       if (sortorder->field->result_type() == STRING_RESULT &&
2371           !sortorder->field->is_temporal())
2372       {
2373         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
2374       }
2375 
2376       sortorder->field_type= sortorder->field->type();
2377       if (sortorder->field_type == MYSQL_TYPE_JSON)
2378         *use_hash= true;
2379     }
2380     else
2381     {
2382       sortorder->result_type= sortorder->item->result_type();
2383       sortorder->field_type= sortorder->item->field_type();
2384       if (sortorder->item->is_temporal())
2385         sortorder->result_type= INT_RESULT;
2386       switch (sortorder->result_type) {
2387       case STRING_RESULT:
2388 	sortorder->length= sortorder->item->max_length;
2389         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
2390 	if (use_strnxfrm((cs=sortorder->item->collation.collation)))
2391 	{
2392           sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
2393 	  sortorder->need_strxnfrm= 1;
2394 	  *multi_byte_charset= 1;
2395 	}
2396         else if (cs == &my_charset_bin)
2397         {
2398           /* Store length last to be able to sort blob/varbinary */
2399           sortorder->suffix_length= suffix_length(sortorder->length);
2400           sortorder->length+= sortorder->suffix_length;
2401         }
2402         if (sortorder->field_type == MYSQL_TYPE_JSON)
2403           *use_hash= true;
2404 	break;
2405       case INT_RESULT:
2406 #if SIZEOF_LONG_LONG > 4
2407 	sortorder->length=8;			// Size of intern longlong
2408 #else
2409 	sortorder->length=4;
2410 #endif
2411 	break;
2412       case DECIMAL_RESULT:
2413         sortorder->length=
2414           my_decimal_get_binary_size(sortorder->item->max_length -
2415                                      (sortorder->item->decimals ? 1 : 0),
2416                                      sortorder->item->decimals);
2417         break;
2418       case REAL_RESULT:
2419 	sortorder->length=sizeof(double);
2420 	break;
2421       case ROW_RESULT:
2422       default:
2423 	// This case should never be choosen
2424 	assert(0);
2425 	break;
2426       }
2427       if (sortorder->item->maybe_null)
2428         total_length++;                       // Place for NULL marker
2429     }
2430     total_length+= sortorder->length;
2431   }
2432   sortorder->field= NULL;                       // end marker
2433   DBUG_PRINT("info",("sort_length: %u", total_length));
2434   return total_length;
2435 }
2436 
2437 
2438 /**
2439   Get descriptors of fields appended to sorted fields and
2440   calculate their total length.
2441 
2442   The function first finds out what fields are used in the result set.
2443   Then it calculates the length of the buffer to store the values of
2444   these fields together with the value of sort values.
2445   If the calculated length is not greater than max_length_for_sort_data
2446   the function allocates memory for an array of descriptors containing
2447   layouts for the values of the non-sorted fields in the buffer and
2448   fills them.
2449 
2450   @param max_length_for_sort_data Value of session variable.
2451   @param ptabfield             Array of references to the table fields
2452   @param sortlength            Total length of sorted fields
2453   @param[out] plength          Total length of appended fields
2454   @param[out] ppackable_length Total length of appended fields having a
2455                                packable type
2456 
2457   @note
2458     The null bits for the appended values are supposed to be put together
2459     and stored into the buffer just ahead of the value of the first field.
2460 
2461   @return
2462     Pointer to the layout descriptors for the appended fields, if any
2463   @returns
2464     NULL   if we do not store field values with sort data.
2465 */
2466 
2467 Addon_fields *
get_addon_fields(ulong max_length_for_sort_data,Field ** ptabfield,uint sortlength,uint * plength,uint * ppackable_length)2468 Filesort::get_addon_fields(ulong max_length_for_sort_data,
2469                            Field **ptabfield, uint sortlength, uint *plength,
2470                            uint *ppackable_length)
2471 {
2472   Field **pfield;
2473   Field *field;
2474   uint total_length= 0;
2475   uint packable_length= 0;
2476   uint num_fields= 0;
2477   uint null_fields= 0;
2478   TABLE *const table= tab->table();
2479   MY_BITMAP *read_set= table->read_set;
2480 
2481   // Locate the effective index for the table to be sorted (if any)
2482   const uint index= tab->effective_index();
2483   /*
2484     filter_covering is true if access is via an index that is covering,
2485     regardless of whether the access is by the covering index or by
2486     index and base table, since the query has to be fulfilled with fields
2487     from that index only.
2488     This information is later used to filter out base columns for virtual
2489     generated columns, since these are only needed when reading the table.
2490     During sorting, trust that values for all generated columns have been
2491     materialized, which means that base columns are no longer necessary.
2492   */
2493   const bool filter_covering=
2494     index != MAX_KEY &&
2495     table->covering_keys.is_set(index) &&
2496     table->index_contains_some_virtual_gcol(index);
2497 
2498   /*
2499     If there is a reference to a field in the query add it
2500     to the the set of appended fields.
2501     Note for future refinement:
2502     This this a too strong condition.
2503     Actually we need only the fields referred in the
2504     result set. And for some of them it makes sense to use
2505     the values directly from sorted fields.
2506   */
2507   *plength= *ppackable_length= 0;
2508 
2509   for (pfield= ptabfield; (field= *pfield) ; pfield++)
2510   {
2511     if (!bitmap_is_set(read_set, field->field_index))
2512       continue;
2513     // part_of_key is empty for a BLOB, so apply this check before the next.
2514     if (field->flags & BLOB_FLAG)
2515     {
2516       assert(addon_fields == NULL);
2517       return NULL;
2518     }
2519     if (filter_covering && !field->part_of_key.is_set(index))
2520       continue;                   // See explanation above filter_covering
2521 
2522     const uint field_length= field->max_packed_col_length();
2523     total_length+= field_length;
2524 
2525     const enum_field_types field_type= field->type();
2526     if (field->maybe_null() ||
2527         field_type == MYSQL_TYPE_STRING ||
2528         field_type == MYSQL_TYPE_VARCHAR ||
2529         field_type == MYSQL_TYPE_VAR_STRING)
2530       packable_length+= field_length;
2531     if (field->maybe_null())
2532       null_fields++;
2533     num_fields++;
2534   }
2535   if (0 == num_fields)
2536     return NULL;
2537 
2538   total_length+= (null_fields + 7) / 8;
2539 
2540   *ppackable_length= packable_length;
2541 
2542   if (total_length + sortlength > max_length_for_sort_data)
2543   {
2544     assert(addon_fields == NULL);
2545     return NULL;
2546   }
2547 
2548   if (addon_fields == NULL)
2549   {
2550     void *rawmem1= sql_alloc(sizeof(Addon_fields));
2551     void *rawmem2= sql_alloc(sizeof(Sort_addon_field) * num_fields);
2552     if (rawmem1 == NULL || rawmem2 == NULL)
2553       return NULL;                            /* purecov: inspected */
2554     Addon_fields_array
2555       addon_array(static_cast<Sort_addon_field*>(rawmem2), num_fields);
2556     addon_fields= new (rawmem1) Addon_fields(addon_array);
2557   }
2558   else
2559   {
2560     /*
2561       Allocate memory only once, reuse descriptor array and buffer.
2562       Set using_packed_addons here, and size/offset details below.
2563      */
2564     assert(num_fields == addon_fields->num_field_descriptors());
2565     addon_fields->set_using_packed_addons(false);
2566   }
2567 
2568   *plength= total_length;
2569 
2570   uint length= (null_fields + 7) / 8;
2571   null_fields= 0;
2572   Addon_fields_array::iterator addonf= addon_fields->begin();
2573   for (pfield= ptabfield; (field= *pfield) ; pfield++)
2574   {
2575     if (!bitmap_is_set(read_set, field->field_index))
2576       continue;
2577     if (filter_covering && !field->part_of_key.is_set(index))
2578       continue;
2579     assert(addonf != addon_fields->end());
2580 
2581     addonf->field= field;
2582     addonf->offset= length;
2583     if (field->maybe_null())
2584     {
2585       addonf->null_offset= null_fields / 8;
2586       addonf->null_bit= 1 << (null_fields & 7);
2587       null_fields++;
2588     }
2589     else
2590     {
2591       addonf->null_offset= 0;
2592       addonf->null_bit= 0;
2593     }
2594     addonf->max_length= field->max_packed_col_length();
2595     DBUG_PRINT("info", ("addon_field %s max_length %u",
2596                         addonf->field->field_name, addonf->max_length));
2597 
2598     length+= addonf->max_length;
2599     addonf++;
2600   }
2601 
2602   DBUG_PRINT("info",("addon_length: %d",length));
2603   return addon_fields;
2604 }
2605 
2606 
2607 /*
2608 ** functions to change a double or float to a sortable string
2609 ** The following should work for IEEE
2610 */
2611 
2612 #define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG)
2613 
change_double_for_sort(double nr,uchar * to)2614 void change_double_for_sort(double nr,uchar *to)
2615 {
2616   uchar *tmp= to;
2617   if (nr == 0.0)
2618   {						/* Change to zero string */
2619     tmp[0]=(uchar) 128;
2620     memset(tmp+1, 0, sizeof(nr)-1);
2621   }
2622   else
2623   {
2624 #ifdef WORDS_BIGENDIAN
2625     memcpy(tmp, &nr, sizeof(nr));
2626 #else
2627     {
2628       uchar *ptr= (uchar*) &nr;
2629 #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
2630       tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0];
2631       tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4];
2632 #else
2633       tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4];
2634       tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0];
2635 #endif
2636     }
2637 #endif
2638     if (tmp[0] & 128)				/* Negative */
2639     {						/* make complement */
2640       uint i;
2641       for (i=0 ; i < sizeof(nr); i++)
2642 	tmp[i]=tmp[i] ^ (uchar) 255;
2643     }
2644     else
2645     {					/* Set high and move exponent one up */
2646       ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] |
2647 		       (ushort) 32768);
2648       exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG);
2649       tmp[0]= (uchar) (exp_part >> 8);
2650       tmp[1]= (uchar) exp_part;
2651     }
2652   }
2653 }
2654