1 /*
2    Copyright (c) 2000, 2020, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 
25 /**
26   @file
27 
28   @brief
29   Sorts a database
30 */
31 
32 #include "sql_priv.h"
33 #include "filesort.h"
34 #include "unireg.h"                      // REQUIRED by other includes
35 #ifdef HAVE_STDDEF_H
36 #include <stddef.h>			/* for macro offsetof */
37 #endif
38 #include <m_ctype.h>
39 #include "sql_sort.h"
40 #include "probes_mysql.h"
41 #include "opt_range.h"                          // SQL_SELECT
42 #include "bounded_queue.h"
43 #include "filesort_utils.h"
44 #include "sql_select.h"
45 #include "debug_sync.h"
46 #include "opt_trace.h"
47 #include "sql_optimizer.h"              // JOIN
48 #include "sql_base.h"
49 
50 #include <algorithm>
51 #include <utility>
52 using std::max;
53 using std::min;
54 
55 	/* functions defined in this file */
56 
57 static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count,
58                                      uchar *buf);
59 static ha_rows find_all_keys(Sort_param *param,SQL_SELECT *select,
60                              Filesort_info *fs_info,
61                              IO_CACHE *buffer_file,
62                              IO_CACHE *tempfile,
63                              Bounded_queue<uchar, uchar> *pq,
64                              ha_rows *found_rows);
65 static int write_keys(Sort_param *param, Filesort_info *fs_info,
66                       uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
67 static void register_used_fields(Sort_param *param);
68 static int merge_index(Sort_param *param,uchar *sort_buffer,
69                        BUFFPEK *buffpek,
70                        uint maxbuffer,IO_CACHE *tempfile,
71                        IO_CACHE *outfile);
72 static bool save_index(Sort_param *param, uint count,
73                        Filesort_info *table_sort);
74 static uint suffix_length(ulong string_length);
75 static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
76                                           TABLE *const table,
77                                           Field **ptabfield,
78                                           uint sortlength, uint *plength);
79 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
80                                 uchar *buff);
81 static bool check_if_pq_applicable(Opt_trace_context *trace,
82                                    Sort_param *param, Filesort_info *info,
83                                    TABLE *table,
84                                    ha_rows records, ulong memory_available);
85 
86 
init_for_filesort(uint sortlen,TABLE * table,ulong max_length_for_sort_data,ha_rows maxrows,bool sort_positions)87 void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
88                                    ulong max_length_for_sort_data,
89                                    ha_rows maxrows, bool sort_positions)
90 {
91   sort_length= sortlen;
92   ref_length= table->file->ref_length;
93   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
94       !table->fulltext_searched && !sort_positions)
95   {
96     /*
97       Get the descriptors of all fields whose values are appended
98       to sorted fields and get its total length in addon_length.
99     */
100     addon_field= get_addon_fields(max_length_for_sort_data,
101                                   table,
102                                   table->field, sort_length, &addon_length);
103   }
104   if (addon_field)
105     res_length= addon_length;
106   else
107   {
108     res_length= ref_length;
109     /*
110       The reference to the record is considered
111       as an additional sorted field
112     */
113     sort_length+= ref_length;
114   }
115   rec_length= sort_length + addon_length;
116   max_rows= maxrows;
117 }
118 
119 
trace_filesort_information(Opt_trace_context * trace,const SORT_FIELD * sortorder,uint s_length)120 static void trace_filesort_information(Opt_trace_context *trace,
121                                        const SORT_FIELD *sortorder,
122                                        uint s_length)
123 {
124   if (!trace->is_started())
125     return;
126 
127   Opt_trace_array trace_filesort(trace, "filesort_information");
128   for (; s_length-- ; sortorder++)
129   {
130     Opt_trace_object oto(trace);
131     oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
132 
133     if (sortorder->field)
134     {
135       if (strlen(sortorder->field->table->alias) != 0)
136         oto.add_utf8_table(sortorder->field->table);
137       else
138         oto.add_alnum("table", "intermediate_tmp_table");
139       oto.add_alnum("field", sortorder->field->field_name ?
140                     sortorder->field->field_name : "tmp_table_column");
141     }
142     else
143       oto.add("expression", sortorder->item);
144   }
145 }
146 
147 
148 /**
149   Sort a table.
150   Creates a set of pointers that can be used to read the rows
151   in sorted order. This should be done with the functions
152   in records.cc.
153 
154   Before calling filesort, one must have done
155   table->file->info(HA_STATUS_VARIABLE)
156 
157   The result set is stored in table->io_cache or
158   table->record_pointers.
159 
160   @param      thd            Current thread
161   @param      table          Table to sort
162   @param      filesort       How to sort the table
163   @param      sort_positions Set to TRUE if we want to force sorting by position
164                              (Needed by UPDATE/INSERT or ALTER TABLE or
165                               when rowids are required by executor)
166   @param[out] examined_rows  Store number of examined rows here
167                              This is the number of found rows before
168                              applying WHERE condition.
169   @param[out] found_rows     Store the number of found rows here.
170                              This is the number of found rows after
171                              applying WHERE condition.
172 
173   @note
174     If we sort by position (like if sort_positions is 1) filesort() will
175     call table->prepare_for_position().
176 
177   @retval
178     HA_POS_ERROR	Error
179   @retval
180     \#			Number of rows in the result, could be less than
181                         found_rows if LIMIT were provided.
182 */
183 
filesort(THD * thd,TABLE * table,Filesort * filesort,bool sort_positions,ha_rows * examined_rows,ha_rows * found_rows)184 ha_rows filesort(THD *thd, TABLE *table, Filesort *filesort,
185                  bool sort_positions, ha_rows *examined_rows,
186                  ha_rows *found_rows)
187 {
188   int error;
189   ulong memory_available= thd->variables.sortbuff_size;
190   uint maxbuffer;
191   BUFFPEK *buffpek;
192   ha_rows num_rows= HA_POS_ERROR;
193   IO_CACHE tempfile, buffpek_pointers, *outfile;
194   Sort_param param;
195   bool multi_byte_charset;
196   Bounded_queue<uchar, uchar> pq;
197   Opt_trace_context * const trace= &thd->opt_trace;
198   SQL_SELECT *const select= filesort->select;
199   ha_rows max_rows= filesort->limit;
200   uint s_length= 0;
201 
202   DBUG_ENTER("filesort");
203 
204   if (!(s_length= filesort->make_sortorder()))
205     DBUG_RETURN(HA_POS_ERROR);  /* purecov: inspected */
206 
207   /*
208     We need a nameless wrapper, since we may be inside the "steps" of
209     "join_execution".
210   */
211   Opt_trace_object trace_wrapper(trace);
212   trace_filesort_information(trace, filesort->sortorder, s_length);
213 
214 #ifdef SKIP_DBUG_IN_FILESORT
215   DBUG_PUSH("");		/* No DBUG here */
216 #endif
217   Item_subselect *subselect= table->reginfo.join_tab ?
218      table->reginfo.join_tab->join->select_lex->master_unit()->item :
219      NULL;
220 
221   MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
222   DEBUG_SYNC(thd, "filesort_start");
223 
224   /*
225    Release InnoDB's adaptive hash index latch (if holding) before
226    running a sort.
227   */
228   ha_release_temporary_latches(thd);
229 
230   /*
231     Don't use table->sort in filesort as it is also used by
232     QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
233     when index_merge select has finished with it.
234   */
235   Filesort_info table_sort= table->sort;
236   table->sort.io_cache= NULL;
237   DBUG_ASSERT(table_sort.record_pointers == NULL);
238 
239   outfile= table_sort.io_cache;
240   my_b_clear(&tempfile);
241   my_b_clear(&buffpek_pointers);
242   buffpek=0;
243   error= 1;
244 
245   param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
246                                      &multi_byte_charset),
247                           table,
248                           thd->variables.max_length_for_sort_data,
249                           max_rows, sort_positions);
250 
251   table_sort.addon_buf= 0;
252   table_sort.addon_length= param.addon_length;
253   table_sort.addon_field= param.addon_field;
254   table_sort.unpack= unpack_addon_fields;
255   if (param.addon_field &&
256       !(table_sort.addon_buf=
257         (uchar *) my_malloc(param.addon_length, MYF(MY_WME))))
258       goto err;
259 
260   if (select && select->quick)
261     thd->inc_status_sort_range();
262   else
263     thd->inc_status_sort_scan();
264 
265   // If number of rows is not known, use as much of sort buffer as possible.
266   num_rows= table->file->estimate_rows_upper_bound();
267 
268   if (multi_byte_charset &&
269       !(param.tmp_buffer= (char*) my_malloc(param.sort_length,MYF(MY_WME))))
270     goto err;
271 
272   if (check_if_pq_applicable(trace, &param, &table_sort,
273                              table, num_rows, memory_available))
274   {
275     DBUG_PRINT("info", ("filesort PQ is applicable"));
276     const size_t compare_length= param.sort_length;
277     if (pq.init(param.max_rows,
278                 true,                           // max_at_top
279                 NULL,                           // compare_function
280                 compare_length,
281                 &make_sortkey, &param, table_sort.get_sort_keys()))
282     {
283       /*
284        If we fail to init pq, we have to give up:
285        out of memory means my_malloc() will call my_error().
286       */
287       DBUG_PRINT("info", ("failed to allocate PQ"));
288       table_sort.free_sort_buffer();
289       DBUG_ASSERT(thd->is_error());
290       goto err;
291     }
292     // For PQ queries (with limit) we initialize all pointers.
293     table_sort.init_record_pointers();
294     filesort->using_pq= true;
295   }
296   else
297   {
298     DBUG_PRINT("info", ("filesort PQ is not applicable"));
299 
300     /*
301       We need space for at least one record from each merge chunk, i.e.
302         param->max_keys_per_buffer >= MERGEBUFF2
303       See merge_buffers()),
304       memory_available must be large enough for
305         param->max_keys_per_buffer * (record + record pointer) bytes
306       (the main sort buffer, see alloc_sort_buffer()).
307       Hence this minimum:
308     */
309     const ulong min_sort_memory=
310       max<ulong>(MIN_SORT_MEMORY,
311                  ALIGN_SIZE(MERGEBUFF2 * (param.rec_length + sizeof(uchar*))));
312     /*
313       Cannot depend on num_rows. For external sort, space for upto MERGEBUFF2
314       rows is required.
315     */
316     if (num_rows < MERGEBUFF2)
317       num_rows= MERGEBUFF2;
318 
319     while (memory_available >= min_sort_memory)
320     {
321       ha_rows keys= memory_available / (param.rec_length + sizeof(char*));
322       param.max_keys_per_buffer= (uint) min(num_rows, keys);
323 
324       table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
325       if (table_sort.sort_buffer_size() > 0 && table_sort.get_sort_keys())
326         break;
327       ulong old_memory_available= memory_available;
328       memory_available= memory_available/4*3;
329       if (memory_available < min_sort_memory &&
330           old_memory_available > min_sort_memory)
331         memory_available= min_sort_memory;
332     }
333     if (memory_available < min_sort_memory)
334     {
335       my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
336       goto err;
337     }
338   }
339 
340   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
341 		       DISK_BUFFER_SIZE, MYF(MY_WME)))
342     goto err;
343 
344   param.sort_form= table;
345   param.end= (param.local_sortorder= filesort->sortorder) + s_length;
346   // New scope, because subquery execution must be traced within an array.
347   {
348     Opt_trace_array ota(trace, "filesort_execution");
349     num_rows= find_all_keys(&param, select,
350                             &table_sort,
351                             &buffpek_pointers,
352                             &tempfile,
353                             pq.is_initialized() ? &pq : NULL,
354                             found_rows);
355     if (num_rows == HA_POS_ERROR)
356       goto err;
357   }
358 
359   maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
360 
361   Opt_trace_object(trace, "filesort_summary")
362     .add("rows", num_rows)
363     .add("examined_rows", param.examined_rows)
364     .add("number_of_tmp_files", maxbuffer)
365     .add("sort_buffer_size", table_sort.sort_buffer_size())
366     .add_alnum("sort_mode",
367                param.addon_field ?
368                "<sort_key, additional_fields>" : "<sort_key, rowid>");
369 
370   if (maxbuffer == 0)			// The whole set is in memory
371   {
372     if (save_index(&param, (uint) num_rows, &table_sort))
373       goto err;
374   }
375   else
376   {
377     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
378     {
379       my_free(table_sort.buffpek);
380       table_sort.buffpek= 0;
381     }
382     if (!(table_sort.buffpek=
383           (uchar *) read_buffpek_from_file(&buffpek_pointers, maxbuffer,
384                                  table_sort.buffpek)))
385       goto err;
386     buffpek= (BUFFPEK *) table_sort.buffpek;
387     table_sort.buffpek_len= maxbuffer;
388     close_cached_file(&buffpek_pointers);
389 	/* Open cached file if it isn't open */
390     if (! my_b_inited(outfile) &&
391 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
392 			  MYF(MY_WME)))
393       goto err;
394     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
395       goto err;
396 
397     /*
398       Use also the space previously used by string pointers in sort_buffer
399       for temporary key storage.
400     */
401     param.max_keys_per_buffer= table_sort.sort_buffer_size() / param.rec_length;
402     maxbuffer--;				// Offset from 0
403     if (merge_many_buff(&param,
404                         (uchar*) table_sort.get_sort_keys(),
405                         buffpek,&maxbuffer,
406 			&tempfile))
407       goto err;
408     if (flush_io_cache(&tempfile) ||
409 	reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
410       goto err;
411     if (merge_index(&param,
412                     (uchar*) table_sort.get_sort_keys(),
413                     buffpek,
414                     maxbuffer,
415                     &tempfile,
416 		    outfile))
417       goto err;
418   }
419 
420   if (num_rows > param.max_rows)
421   {
422     // If find_all_keys() produced more results than the query LIMIT.
423     num_rows= param.max_rows;
424   }
425   error= 0;
426 
427  err:
428   my_free(param.tmp_buffer);
429   if (!subselect || !subselect->is_uncacheable())
430   {
431     table_sort.free_sort_buffer();
432     my_free(buffpek);
433     table_sort.buffpek= 0;
434     table_sort.buffpek_len= 0;
435   }
436   close_cached_file(&tempfile);
437   close_cached_file(&buffpek_pointers);
438 
439   /* free resources allocated  for QUICK_INDEX_MERGE_SELECT */
440   free_io_cache(table);
441 
442   if (my_b_inited(outfile))
443   {
444     if (flush_io_cache(outfile))
445       error=1;
446     {
447       my_off_t save_pos=outfile->pos_in_file;
448       /* For following reads */
449       if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
450 	error=1;
451       outfile->end_of_file=save_pos;
452     }
453   }
454   if (error)
455   {
456     int kill_errno= thd->killed_errno();
457     DBUG_ASSERT(thd->is_error() || kill_errno);
458     my_printf_error(ER_FILSORT_ABORT,
459                     "%s: %s",
460                     MYF(ME_ERROR + ME_WAITTANG),
461                     ER_THD(thd, ER_FILSORT_ABORT),
462                     kill_errno ? ((kill_errno == THD::KILL_CONNECTION &&
463                                  !shutdown_in_progress) ? ER(THD::KILL_QUERY) :
464                                                           ER(kill_errno)) :
465                                  thd->get_stmt_da()->message());
466 
467     if (log_warnings > 1)
468     {
469       sql_print_warning("%s, host: %s, user: %s, thread: %lu, query: %-.4096s",
470                         ER_THD(thd, ER_FILSORT_ABORT),
471                         thd->security_ctx->host_or_ip,
472                         &thd->security_ctx->priv_user[0],
473                         (ulong) thd->thread_id,
474                         thd->query());
475     }
476   }
477   else
478     thd->inc_status_sort_rows(num_rows);
479   *examined_rows= param.examined_rows;
480 #ifdef SKIP_DBUG_IN_FILESORT
481   DBUG_POP();			/* Ok to DBUG */
482 #endif
483 
484   // Assign the copy back!
485   table->sort= table_sort;
486 
487   DBUG_PRINT("exit",
488              ("num_rows: %ld examined_rows: %ld found_rows: %ld",
489               (long) num_rows, (long) *examined_rows, (long) *found_rows));
490   MYSQL_FILESORT_DONE(error, num_rows);
491   DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
492 } /* filesort */
493 
494 
filesort_free_buffers(TABLE * table,bool full)495 void filesort_free_buffers(TABLE *table, bool full)
496 {
497   DBUG_ENTER("filesort_free_buffers");
498   my_free(table->sort.record_pointers);
499   table->sort.record_pointers= NULL;
500 
501   if (full)
502   {
503     table->sort.free_sort_buffer();
504     my_free(table->sort.buffpek);
505     table->sort.buffpek= NULL;
506     table->sort.buffpek_len= 0;
507   }
508 
509   my_free(table->sort.addon_buf);
510   my_free(table->sort.addon_field);
511   table->sort.addon_buf= NULL;
512   table->sort.addon_field= NULL;
513   DBUG_VOID_RETURN;
514 }
515 
cleanup()516 void Filesort::cleanup()
517 {
518   if (select && own_select)
519   {
520     select->cleanup();
521     select= NULL;
522   }
523 }
524 
make_sortorder()525 uint Filesort::make_sortorder()
526 {
527   uint count;
528   SORT_FIELD *sort,*pos;
529   ORDER *ord;
530   DBUG_ENTER("make_sortorder");
531 
532 
533   count=0;
534   for (ord = order; ord; ord= ord->next)
535     count++;
536   if (!sortorder)
537     sortorder= (SORT_FIELD*) sql_alloc(sizeof(SORT_FIELD) * (count + 1));
538   pos= sort= sortorder;
539 
540   if (!pos)
541     DBUG_RETURN(0);
542 
543   for (ord= order; ord; ord= ord->next, pos++)
544   {
545     Item *const item= ord->item[0], *const real_item= item->real_item();
546     pos->field= 0; pos->item= 0;
547     if (real_item->type() == Item::FIELD_ITEM)
548     {
549       // Could be a field, or Item_direct_view_ref wrapping a field
550       DBUG_ASSERT(item->type() == Item::FIELD_ITEM ||
551                   (item->type() == Item::REF_ITEM &&
552                    static_cast<Item_ref*>(item)->ref_type() ==
553                    Item_ref::VIEW_REF));
554       pos->field= static_cast<Item_field*>(real_item)->field;
555     }
556     else if (real_item->type() == Item::SUM_FUNC_ITEM &&
557              !real_item->const_item())
558     {
559       // Aggregate, or Item_aggregate_ref
560       DBUG_ASSERT(item->type() == Item::SUM_FUNC_ITEM ||
561                   (item->type() == Item::REF_ITEM &&
562                    static_cast<Item_ref*>(item)->ref_type() ==
563                    Item_ref::AGGREGATE_REF));
564       pos->field= item->get_tmp_table_field();
565     }
566     else if (real_item->type() == Item::COPY_STR_ITEM)
567     {						// Blob patch
568       pos->item= static_cast<Item_copy*>(real_item)->get_item();
569     }
570     else
571       pos->item= item;
572     pos->reverse= (ord->direction == ORDER::ORDER_DESC);
573     DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
574   }
575   DBUG_RETURN(count);
576 }
577 
578 
579 /**
580   Makes an array of string pointers for info->sort_keys.
581 
582   @param info         Filesort_info struct owning the allocated array.
583   @param num_records  Number of records.
584   @param length       Length of each record.
585 */
586 
587 /** Read 'count' number of buffer pointers into memory. */
588 
read_buffpek_from_file(IO_CACHE * buffpek_pointers,uint count,uchar * buf)589 static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count,
590                                      uchar *buf)
591 {
592   ulong length= sizeof(BUFFPEK)*count;
593   uchar *tmp= buf;
594   DBUG_ENTER("read_buffpek_from_file");
595   if (count > UINT_MAX/sizeof(BUFFPEK))
596     return 0; /* sizeof(BUFFPEK)*count will overflow */
597   if (!tmp)
598     tmp= (uchar *)my_malloc(length, MYF(MY_WME));
599   if (tmp)
600   {
601     if (reinit_io_cache(buffpek_pointers,READ_CACHE,0L,0,0) ||
602 	my_b_read(buffpek_pointers, (uchar*) tmp, length))
603     {
604       my_free(tmp);
605       tmp=0;
606     }
607   }
608   DBUG_RETURN(tmp);
609 }
610 
611 #ifndef DBUG_OFF
612 /*
613   Print a text, SQL-like record representation into dbug trace.
614 
615   Note: this function is a work in progress: at the moment
616    - column read bitmap is ignored (can print garbage for unused columns)
617    - there is no quoting
618 */
dbug_print_record(TABLE * table,bool print_rowid)619 static void dbug_print_record(TABLE *table, bool print_rowid)
620 {
621   char buff[1024];
622   Field **pfield;
623   String tmp(buff,sizeof(buff),&my_charset_bin);
624   DBUG_LOCK_FILE;
625 
626   fprintf(DBUG_FILE, "record (");
627   for (pfield= table->field; *pfield ; pfield++)
628     fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1])? ", ":"");
629   fprintf(DBUG_FILE, ") = ");
630 
631   fprintf(DBUG_FILE, "(");
632   for (pfield= table->field; *pfield ; pfield++)
633   {
634     Field *field=  *pfield;
635 
636     if (field->is_null())
637       fwrite("NULL", sizeof(char), 4, DBUG_FILE);
638 
639     if (field->type() == MYSQL_TYPE_BIT)
640       (void) field->val_int_as_str(&tmp, 1);
641     else
642       field->val_str(&tmp);
643 
644     fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
645     if (pfield[1])
646       fwrite(", ", sizeof(char), 2, DBUG_FILE);
647   }
648   fprintf(DBUG_FILE, ")");
649   if (print_rowid)
650   {
651     fprintf(DBUG_FILE, " rowid ");
652     for (uint i=0; i < table->file->ref_length; i++)
653     {
654       fprintf(DBUG_FILE, "%x", (uchar)table->file->ref[i]);
655     }
656   }
657   fprintf(DBUG_FILE, "\n");
658   DBUG_UNLOCK_FILE;
659 }
660 #endif
661 
662 /**
663   Search after sort_keys, and write them into tempfile
664   (if we run out of space in the sort_keys buffer).
665   All produced sequences are guaranteed to be non-empty.
666 
667   @param param             Sorting parameter
668   @param select            Use this to get source data
669   @param sort_keys         Array of pointers to sort key + addon buffers.
670   @param buffpek_pointers  File to write BUFFPEKs describing sorted segments
671                            in tempfile.
672   @param tempfile          File to write sorted sequences of sortkeys to.
673   @param pq                If !NULL, use it for keeping top N elements
674   @param [out] found_rows  The number of FOUND_ROWS().
675                            For a query with LIMIT, this value will typically
676                            be larger than the function return value.
677 
678   @note
679     Basic idea:
680     @verbatim
681      while (get_next_sortkey())
682      {
683        if (using priority queue)
684          push sort key into queue
685        else
686        {
687          if (no free space in sort_keys buffers)
688          {
689            sort sort_keys buffer;
690            dump sorted sequence to 'tempfile';
691            dump BUFFPEK describing sequence location into 'buffpek_pointers';
692          }
693          put sort key into 'sort_keys';
694        }
695      }
696      if (sort_keys has some elements && dumped at least once)
697        sort-dump-dump as above;
698      else
699        don't sort, leave sort_keys array to be sorted by caller.
700   @endverbatim
701 
702   @retval
703     Number of records written on success.
704   @retval
705     HA_POS_ERROR on error.
706 */
707 
find_all_keys(Sort_param * param,SQL_SELECT * select,Filesort_info * fs_info,IO_CACHE * buffpek_pointers,IO_CACHE * tempfile,Bounded_queue<uchar,uchar> * pq,ha_rows * found_rows)708 static ha_rows find_all_keys(Sort_param *param, SQL_SELECT *select,
709                              Filesort_info *fs_info,
710                              IO_CACHE *buffpek_pointers,
711                              IO_CACHE *tempfile,
712                              Bounded_queue<uchar, uchar> *pq,
713                              ha_rows *found_rows)
714 {
715   int error,flag,quick_select;
716   uint idx,indexpos,ref_length;
717   uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
718   my_off_t record;
719   TABLE *sort_form;
720   THD *thd= current_thd;
721   volatile THD::killed_state *killed= &thd->killed;
722   handler *file;
723   MY_BITMAP *save_read_set, *save_write_set;
724   bool skip_record;
725 
726   DBUG_ENTER("find_all_keys");
727   DBUG_PRINT("info",("using: %s",
728                      (select ? select->quick ? "ranges" : "where":
729                       "every row")));
730 
731   idx=indexpos=0;
732   error=quick_select=0;
733   sort_form=param->sort_form;
734   file=sort_form->file;
735   ref_length=param->ref_length;
736   ref_pos= ref_buff;
737   quick_select=select && select->quick;
738   record=0;
739   *found_rows= 0;
740   flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
741   if (flag)
742     ref_pos= &file->ref[0];
743   next_pos=ref_pos;
744   if (!quick_select)
745   {
746     next_pos=(uchar*) 0;			/* Find records in sequence */
747     DBUG_EXECUTE_IF("bug14365043_1",
748                     DBUG_SET("+d,ha_rnd_init_fail"););
749     if ((error= file->ha_rnd_init(1)))
750     {
751       file->print_error(error, MYF(0));
752       DBUG_RETURN(HA_POS_ERROR);
753     }
754     file->extra_opt(HA_EXTRA_CACHE,
755 		    current_thd->variables.read_buff_size);
756   }
757 
758   if (quick_select)
759   {
760     if (select->quick->reset())
761       DBUG_RETURN(HA_POS_ERROR);
762   }
763 
764   /* Remember original bitmaps */
765   save_read_set=  sort_form->read_set;
766   save_write_set= sort_form->write_set;
767   /* Set up temporary column read map for columns used by sort */
768   bitmap_clear_all(&sort_form->tmp_set);
769   /* Temporary set for register_used_fields and register_field_in_read_map */
770   sort_form->read_set= &sort_form->tmp_set;
771   // Include fields used for sorting in the read_set.
772   register_used_fields(param);
773 
774   // Include fields used by conditions in the read_set.
775   if (select && select->cond)
776     select->cond->walk(&Item::register_field_in_read_map, 1,
777                        (uchar*) sort_form);
778 
779   // Include fields used by pushed conditions in the read_set.
780   if (select && select->icp_cond)
781     select->icp_cond->walk(&Item::register_field_in_read_map, 1,
782                            (uchar*) sort_form);
783 
784   sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
785 
786   DEBUG_SYNC(thd, "after_index_merge_phase1");
787 
788   for (;;)
789   {
790     if (quick_select)
791     {
792       if ((error= select->quick->get_next()))
793         break;
794       file->position(sort_form->record[0]);
795       DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
796     }
797     else					/* Not quick-select */
798     {
799       {
800 	error= file->ha_rnd_next(sort_form->record[0]);
801 	if (!flag)
802 	{
803 	  my_store_ptr(ref_pos,ref_length,record); // Position to row
804 	  record+= sort_form->s->db_record_offset;
805 	}
806 	else if (!error)
807 	  file->position(sort_form->record[0]);
808       }
809       if (error && error != HA_ERR_RECORD_DELETED)
810 	break;
811     }
812 
813     if (*killed)
814     {
815       DBUG_PRINT("info",("Sort killed by user"));
816       if (!quick_select)
817       {
818         (void) file->extra(HA_EXTRA_NO_CACHE);
819         file->ha_rnd_end();
820       }
821       DBUG_RETURN(HA_POS_ERROR);		/* purecov: inspected */
822     }
823     if (error == 0)
824       param->examined_rows++;
825     if (!error && (!select ||
826                    (!select->skip_record(thd, &skip_record) && !skip_record)))
827     {
828       ++(*found_rows);
829       if (pq)
830       {
831         pq->push(ref_pos);
832         idx= pq->num_elements();
833       }
834       else
835       {
836         if (idx == param->max_keys_per_buffer)
837         {
838           if (write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
839              DBUG_RETURN(HA_POS_ERROR);
840           idx= 0;
841           indexpos++;
842         }
843         make_sortkey(param, fs_info->get_record_buffer(idx++), ref_pos);
844       }
845     }
846     /*
847       Don't try unlocking the row if skip_record reported an error since in
848       this case the transaction might have been rolled back already.
849     */
850     else if (!thd->is_error())
851       file->unlock_row();
852     /* It does not make sense to read more keys in case of a fatal error */
853     if (thd->is_error())
854       break;
855   }
856   if (!quick_select)
857   {
858     (void) file->extra(HA_EXTRA_NO_CACHE);	/* End cacheing of records */
859     if (!next_pos)
860       file->ha_rnd_end();
861   }
862 
863   if (thd->is_error())
864     DBUG_RETURN(HA_POS_ERROR);
865 
866   /* Signal we should use orignal column read and write maps */
867   sort_form->column_bitmaps_set(save_read_set, save_write_set);
868 
869   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
870   if (error != HA_ERR_END_OF_FILE)
871   {
872     file->print_error(error,MYF(ME_ERROR | ME_WAITTANG)); // purecov: inspected
873     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
874   }
875   if (indexpos && idx &&
876       write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
877     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
878   const ha_rows retval=
879     my_b_inited(tempfile) ?
880     (ha_rows) (my_b_tell(tempfile)/param->rec_length) : idx;
881   DBUG_PRINT("info", ("find_all_keys return %u", (uint) retval));
882   DBUG_RETURN(retval);
883 } /* find_all_keys */
884 
885 
886 /**
887   @details
888   Sort the buffer and write:
889   -# the sorted sequence to tempfile
890   -# a BUFFPEK describing the sorted sequence position to buffpek_pointers
891 
892     (was: Skriver en buffert med nycklar till filen)
893 
894   @param param             Sort parameters
895   @param sort_keys         Array of pointers to keys to sort
896   @param count             Number of elements in sort_keys array
897   @param buffpek_pointers  One 'BUFFPEK' struct will be written into this file.
898                            The BUFFPEK::{file_pos, count} will indicate where
899                            the sorted data was stored.
900   @param tempfile          The sorted sequence will be written into this file.
901 
902   @retval
903     0 OK
904   @retval
905     1 Error
906 */
907 
908 static int
write_keys(Sort_param * param,Filesort_info * fs_info,uint count,IO_CACHE * buffpek_pointers,IO_CACHE * tempfile)909 write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
910            IO_CACHE *buffpek_pointers, IO_CACHE *tempfile)
911 {
912   size_t rec_length;
913   uchar **end;
914   BUFFPEK buffpek;
915   DBUG_ENTER("write_keys");
916 
917   rec_length= param->rec_length;
918   uchar **sort_keys= fs_info->get_sort_keys();
919 
920   fs_info->sort_buffer(param, count);
921 
922   if (!my_b_inited(tempfile) &&
923       open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
924                        MYF(MY_WME)))
925     goto err;                                   /* purecov: inspected */
926   /* check we won't have more buffpeks than we can possibly keep in memory */
927   if (my_b_tell(buffpek_pointers) + sizeof(BUFFPEK) > (ulonglong)UINT_MAX)
928     goto err;
929   buffpek.file_pos= my_b_tell(tempfile);
930   if ((ha_rows) count > param->max_rows)
931     count=(uint) param->max_rows;               /* purecov: inspected */
932   buffpek.count=(ha_rows) count;
933   for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
934     if (my_b_write(tempfile, (uchar*) *sort_keys, (uint) rec_length))
935       goto err;
936   if (my_b_write(buffpek_pointers, (uchar*) &buffpek, sizeof(buffpek)))
937     goto err;
938   DBUG_RETURN(0);
939 
940 err:
941   DBUG_RETURN(1);
942 } /* write_keys */
943 
944 
945 /**
946   Store length as suffix in high-byte-first order.
947 */
948 
store_length(uchar * to,uint length,uint pack_length)949 static inline void store_length(uchar *to, uint length, uint pack_length)
950 {
951   switch (pack_length) {
952   case 1:
953     *to= (uchar) length;
954     break;
955   case 2:
956     mi_int2store(to, length);
957     break;
958   case 3:
959     mi_int3store(to, length);
960     break;
961   default:
962     mi_int4store(to, length);
963     break;
964   }
965 }
966 
967 
968 #ifdef WORDS_BIGENDIAN
969 const bool Is_big_endian= true;
970 #else
971 const bool Is_big_endian= false;
972 #endif
copy_native_longlong(uchar * to,int to_length,longlong val,bool is_unsigned)973 void copy_native_longlong(uchar *to, int to_length,
974                           longlong val, bool is_unsigned)
975 {
976   copy_integer<Is_big_endian>(to, to_length,
977                               static_cast<uchar*>(static_cast<void*>(&val)),
978                               sizeof(longlong),
979                               is_unsigned);
980 }
981 
982 
983 /** Make a sort-key from record. */
984 
make_sortkey(Sort_param * param,uchar * to,uchar * ref_pos)985 void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos)
986 {
987   SORT_FIELD *sort_field;
988 
989   for (sort_field= param->local_sortorder ;
990        sort_field != param->end ;
991        sort_field++)
992   {
993     bool maybe_null= false;
994     if (sort_field->field)
995     {
996       Field *field= sort_field->field;
997       if (field->maybe_null())
998       {
999 	if (field->is_null())
1000 	{
1001 	  if (sort_field->reverse)
1002 	    memset(to, 255, sort_field->length+1);
1003 	  else
1004 	    memset(to, 0, sort_field->length+1);
1005 	  to+= sort_field->length+1;
1006 	  continue;
1007 	}
1008 	else
1009 	  *to++=1;
1010       }
1011       field->make_sort_key(to, sort_field->length);
1012     }
1013     else
1014     {						// Item
1015       Item *item=sort_field->item;
1016       maybe_null= item->maybe_null;
1017       switch (sort_field->result_type) {
1018       case STRING_RESULT:
1019       {
1020         const CHARSET_INFO *cs=item->collation.collation;
1021         char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
1022 
1023         if (maybe_null)
1024           *to++=1;
1025         /* All item->str() to use some extra byte for end null.. */
1026         String tmp((char*) to,sort_field->length+4,cs);
1027         String *res= item->str_result(&tmp);
1028         if (!res)
1029         {
1030           if (maybe_null)
1031             memset(to-1, 0, sort_field->length+1);
1032           else
1033           {
1034             /* purecov: begin deadcode */
1035             /*
1036               This should only happen during extreme conditions if we run out
1037               of memory or have an item marked not null when it can be null.
1038               This code is here mainly to avoid a hard crash in this case.
1039             */
1040             DBUG_ASSERT(0);
1041             DBUG_PRINT("warning",
1042                        ("Got null on something that shouldn't be null"));
1043             memset(to, 0, sort_field->length);	// Avoid crash
1044             /* purecov: end */
1045           }
1046           break;
1047         }
1048         uint length= res->length();
1049         if (sort_field->need_strxnfrm)
1050         {
1051           char *from=(char*) res->ptr();
1052           uint tmp_length MY_ATTRIBUTE((unused));
1053           if ((uchar*) from == to)
1054           {
1055             DBUG_ASSERT(sort_field->length >= length);
1056             set_if_smaller(length,sort_field->length);
1057             memcpy(param->tmp_buffer,from,length);
1058             from=param->tmp_buffer;
1059           }
1060           tmp_length= cs->coll->strnxfrm(cs, to, sort_field->length,
1061                                          item->max_char_length(),
1062                                          (uchar*) from, length,
1063                                          MY_STRXFRM_PAD_WITH_SPACE |
1064                                          MY_STRXFRM_PAD_TO_MAXLEN);
1065           DBUG_ASSERT(tmp_length == sort_field->length);
1066         }
1067         else
1068         {
1069           uint diff;
1070           uint sort_field_length= sort_field->length -
1071             sort_field->suffix_length;
1072           if (sort_field_length < length)
1073           {
1074             diff= 0;
1075             length= sort_field_length;
1076           }
1077           else
1078             diff= sort_field_length - length;
1079           if (sort_field->suffix_length)
1080           {
1081             /* Store length last in result_string */
1082             store_length(to + sort_field_length, length,
1083                          sort_field->suffix_length);
1084           }
1085 
1086           my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
1087           cs->cset->fill(cs, (char *)to+length,diff,fill_char);
1088         }
1089         break;
1090       }
1091       case INT_RESULT:
1092 	{
1093           longlong value= item->field_type() == MYSQL_TYPE_TIME ?
1094                           item->val_time_temporal_result() :
1095                           item->is_temporal_with_date() ?
1096                           item->val_date_temporal_result() :
1097                           item->val_int_result();
1098           if (maybe_null)
1099           {
1100 	    *to++=1;				/* purecov: inspected */
1101             if (item->null_value)
1102             {
1103               if (maybe_null)
1104                 memset(to-1, 0, sort_field->length+1);
1105               else
1106               {
1107                 DBUG_PRINT("warning",
1108                            ("Got null on something that shouldn't be null"));
1109                 memset(to, 0, sort_field->length);
1110               }
1111               break;
1112             }
1113           }
1114           copy_native_longlong(to, sort_field->length,
1115                                value, item->unsigned_flag);
1116 	  break;
1117 	}
1118       case DECIMAL_RESULT:
1119         {
1120           my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
1121           if (maybe_null)
1122           {
1123             if (item->null_value)
1124             {
1125               memset(to, 0, sort_field->length+1);
1126               to++;
1127               break;
1128             }
1129             *to++=1;
1130           }
1131           if (sort_field->length < DECIMAL_MAX_FIELD_SIZE)
1132           {
1133             uchar buf[DECIMAL_MAX_FIELD_SIZE];
1134             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
1135                               item->max_length - (item->decimals ? 1:0),
1136                               item->decimals);
1137             memcpy(to, buf, sort_field->length);
1138           }
1139           else
1140           {
1141             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
1142                               item->max_length - (item->decimals ? 1:0),
1143                               item->decimals);
1144           }
1145          break;
1146         }
1147       case REAL_RESULT:
1148 	{
1149           double value= item->val_result();
1150 	  if (maybe_null)
1151           {
1152             if (item->null_value)
1153             {
1154               memset(to, 0, sort_field->length+1);
1155               to++;
1156               break;
1157             }
1158 	    *to++=1;
1159           }
1160           if (sort_field->length < sizeof(double))
1161           {
1162             uchar buf[sizeof(double)];
1163             change_double_for_sort(value, buf);
1164             memcpy(to, buf, sort_field->length);
1165           }
1166           else
1167           {
1168             change_double_for_sort(value, (uchar*) to);
1169           }
1170 	  break;
1171 	}
1172       case ROW_RESULT:
1173       default:
1174 	// This case should never be choosen
1175 	DBUG_ASSERT(0);
1176 	break;
1177       }
1178     }
1179     if (sort_field->reverse)
1180     {							/* Revers key */
1181       if (maybe_null)
1182         to[-1]= ~to[-1];
1183       uint length= sort_field->length;
1184       while (length--)
1185       {
1186 	*to = (uchar) (~ *to);
1187 	to++;
1188       }
1189     }
1190     else
1191       to+= sort_field->length;
1192   }
1193 
1194   if (param->addon_field)
1195   {
1196     /*
1197       Save field values appended to sorted fields.
1198       First null bit indicators are appended then field values follow.
1199       In this implementation we use fixed layout for field values -
1200       the same for all records.
1201     */
1202     SORT_ADDON_FIELD *addonf= param->addon_field;
1203     uchar *nulls= to;
1204     DBUG_ASSERT(addonf != 0);
1205     memset(nulls, 0, addonf->offset);
1206     to+= addonf->offset;
1207     for (Field* field; (field= addonf->field) ; addonf++)
1208     {
1209       if (addonf->null_bit && field->is_null())
1210       {
1211         nulls[addonf->null_offset]|= addonf->null_bit;
1212       }
1213       else
1214       {
1215         (void) field->pack(to, field->ptr);
1216       }
1217       to+= addonf->length;
1218     }
1219   }
1220   else
1221   {
1222     /* Save filepos last */
1223     memcpy((uchar*) to, ref_pos, (size_t) param->ref_length);
1224   }
1225   return;
1226 }
1227 
1228 
1229 /*
1230   Register fields used by sorting in the sorted table's read set
1231 */
1232 
register_used_fields(Sort_param * param)1233 static void register_used_fields(Sort_param *param)
1234 {
1235   reg1 SORT_FIELD *sort_field;
1236   TABLE *table=param->sort_form;
1237   MY_BITMAP *bitmap= table->read_set;
1238 
1239   for (sort_field= param->local_sortorder ;
1240        sort_field != param->end ;
1241        sort_field++)
1242   {
1243     Field *field;
1244     if ((field= sort_field->field))
1245     {
1246       if (field->table == table)
1247       bitmap_set_bit(bitmap, field->field_index);
1248     }
1249     else
1250     {						// Item
1251       sort_field->item->walk(&Item::register_field_in_read_map, 1,
1252                              (uchar *) table);
1253     }
1254   }
1255 
1256   if (param->addon_field)
1257   {
1258     SORT_ADDON_FIELD *addonf= param->addon_field;
1259     Field *field;
1260     for ( ; (field= addonf->field) ; addonf++)
1261       bitmap_set_bit(bitmap, field->field_index);
1262   }
1263   else
1264   {
1265     /* Save filepos last */
1266     table->prepare_for_position();
1267   }
1268 }
1269 
save_index(Sort_param * param,uint count,Filesort_info * table_sort)1270 static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
1271 {
1272   uint offset,res_length;
1273   uchar *to;
1274   DBUG_ENTER("save_index");
1275 
1276   table_sort->sort_buffer(param, count);
1277   res_length= param->res_length;
1278   offset= param->rec_length-res_length;
1279   if (!(to= table_sort->record_pointers=
1280         (uchar*) my_malloc(res_length*count, MYF(MY_WME))))
1281     DBUG_RETURN(1);                 /* purecov: inspected */
1282   uchar **sort_keys= table_sort->get_sort_keys();
1283   for (uchar **end= sort_keys+count ; sort_keys != end ; sort_keys++)
1284   {
1285     memcpy(to, *sort_keys+offset, res_length);
1286     to+= res_length;
1287   }
1288   DBUG_RETURN(0);
1289 }
1290 
1291 
1292 /**
1293   Test whether priority queue is worth using to get top elements of an
1294   ordered result set. If it is, then allocates buffer for required amount of
1295   records
1296 
1297   @param trace            Current trace context.
1298   @param param            Sort parameters.
1299   @param filesort_info    Filesort information.
1300   @param table            Table to sort.
1301   @param num_rows         Estimate of number of rows in source record set.
1302   @param memory_available Memory available for sorting.
1303 
1304   DESCRIPTION
1305     Given a query like this:
1306       SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
1307     This function tests whether a priority queue should be used to keep
1308     the result. Necessary conditions are:
1309     - estimate that it is actually cheaper than merge-sort
1310     - enough memory to store the <max_rows> records.
1311 
1312     If we don't have space for <max_rows> records, but we *do* have
1313     space for <max_rows> keys, we may rewrite 'table' to sort with
1314     references to records instead of additional data.
1315     (again, based on estimates that it will actually be cheaper).
1316 
1317    @retval
1318     true  - if it's ok to use PQ
1319     false - PQ will be slower than merge-sort, or there is not enough memory.
1320 */
1321 
check_if_pq_applicable(Opt_trace_context * trace,Sort_param * param,Filesort_info * filesort_info,TABLE * table,ha_rows num_rows,ulong memory_available)1322 bool check_if_pq_applicable(Opt_trace_context *trace,
1323                             Sort_param *param,
1324                             Filesort_info *filesort_info,
1325                             TABLE *table, ha_rows num_rows,
1326                             ulong memory_available)
1327 {
1328   DBUG_ENTER("check_if_pq_applicable");
1329 
1330   /*
1331     How much Priority Queue sort is slower than qsort.
1332     Measurements (see unit test) indicate that PQ is roughly 3 times slower.
1333   */
1334   const double PQ_slowness= 3.0;
1335 
1336   Opt_trace_object trace_filesort(trace,
1337                                   "filesort_priority_queue_optimization");
1338   if (param->max_rows == HA_POS_ERROR)
1339   {
1340     trace_filesort
1341       .add("usable", false)
1342       .add_alnum("cause", "not applicable (no LIMIT)");
1343     DBUG_RETURN(false);
1344   }
1345 
1346   trace_filesort
1347     .add("limit", param->max_rows)
1348     .add("rows_estimate", num_rows)
1349     .add("row_size", param->rec_length)
1350     .add("memory_available", memory_available);
1351 
1352   if (param->max_rows + 2 >= UINT_MAX)
1353   {
1354     trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
1355     DBUG_RETURN(false);
1356   }
1357 
1358   ulong num_available_keys=
1359     memory_available / (param->rec_length + sizeof(char*));
1360   // We need 1 extra record in the buffer, when using PQ.
1361   param->max_keys_per_buffer= (uint) param->max_rows + 1;
1362 
1363   if (num_rows < num_available_keys)
1364   {
1365     // The whole source set fits into memory.
1366     if (param->max_rows < num_rows/PQ_slowness )
1367     {
1368       filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1369                                        param->rec_length);
1370       trace_filesort.add("chosen", true);
1371       DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1372     }
1373     else
1374     {
1375       // PQ will be slower.
1376       trace_filesort.add("chosen", false)
1377         .add_alnum("cause", "quicksort_is_cheaper");
1378       DBUG_RETURN(false);
1379     }
1380   }
1381 
1382   // Do we have space for LIMIT rows in memory?
1383   if (param->max_keys_per_buffer < num_available_keys)
1384   {
1385     filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1386                                      param->rec_length);
1387     trace_filesort.add("chosen", true);
1388     DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1389   }
1390 
1391   // Try to strip off addon fields.
1392   if (param->addon_field)
1393   {
1394     const ulong row_length=
1395       param->sort_length + param->ref_length + sizeof(char*);
1396     num_available_keys= memory_available / row_length;
1397 
1398     Opt_trace_object trace_addon(trace, "strip_additional_fields");
1399     trace_addon.add("row_size", row_length);
1400 
1401     // Can we fit all the keys in memory?
1402     if (param->max_keys_per_buffer >= num_available_keys)
1403     {
1404       trace_addon.add("chosen", false).add_alnum("cause", "not_enough_space");
1405     }
1406     else
1407     {
1408       const double sort_merge_cost=
1409         get_merge_many_buffs_cost_fast(num_rows,
1410                                        num_available_keys,
1411                                        row_length);
1412       trace_addon.add("sort_merge_cost", sort_merge_cost);
1413       /*
1414         PQ has cost:
1415         (insert + qsort) * log(queue size) * ROWID_COMPARE_COST +
1416         cost of file lookup afterwards.
1417         The lookup cost is a bit pessimistic: we take scan_time and assume
1418         that on average we find the row after scanning half of the file.
1419         A better estimate would be lookup cost, but note that we are doing
1420         random lookups here, rather than sequential scan.
1421       */
1422       const double pq_cpu_cost=
1423         (PQ_slowness * num_rows + param->max_keys_per_buffer) *
1424         log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST;
1425       const double pq_io_cost=
1426         param->max_rows * table->file->scan_time() / 2.0;
1427       const double pq_cost= pq_cpu_cost + pq_io_cost;
1428       trace_addon.add("priority_queue_cost", pq_cost);
1429 
1430       if (sort_merge_cost < pq_cost)
1431       {
1432         trace_addon.add("chosen", false);
1433         DBUG_RETURN(false);
1434       }
1435 
1436       trace_addon.add("chosen", true);
1437       filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1438                                        param->sort_length + param->ref_length);
1439       if (filesort_info->get_sort_keys())
1440       {
1441         // Make attached data to be references instead of fields.
1442         my_free(filesort_info->addon_buf);
1443         my_free(filesort_info->addon_field);
1444         filesort_info->addon_buf= NULL;
1445         filesort_info->addon_field= NULL;
1446         param->addon_field= NULL;
1447         param->addon_length= 0;
1448 
1449         param->res_length= param->ref_length;
1450         param->sort_length+= param->ref_length;
1451         param->rec_length= param->sort_length;
1452 
1453         DBUG_RETURN(true);
1454       }
1455     }
1456   }
1457   DBUG_RETURN(false);
1458 }
1459 
1460 
1461 /** Merge buffers to make < MERGEBUFF2 buffers. */
1462 
merge_many_buff(Sort_param * param,uchar * sort_buffer,BUFFPEK * buffpek,uint * maxbuffer,IO_CACHE * t_file)1463 int merge_many_buff(Sort_param *param, uchar *sort_buffer,
1464                     BUFFPEK *buffpek, uint *maxbuffer, IO_CACHE *t_file)
1465 {
1466   register uint i;
1467   IO_CACHE t_file2,*from_file,*to_file,*temp;
1468   BUFFPEK *lastbuff;
1469   DBUG_ENTER("merge_many_buff");
1470 
1471   if (*maxbuffer < MERGEBUFF2)
1472     DBUG_RETURN(0);				/* purecov: inspected */
1473   if (flush_io_cache(t_file) ||
1474       open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE,
1475 			MYF(MY_WME)))
1476     DBUG_RETURN(1);				/* purecov: inspected */
1477 
1478   from_file= t_file ; to_file= &t_file2;
1479   while (*maxbuffer >= MERGEBUFF2)
1480   {
1481     if (reinit_io_cache(from_file,READ_CACHE,0L,0,0))
1482       goto cleanup;
1483     if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0))
1484       goto cleanup;
1485     lastbuff=buffpek;
1486     for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF)
1487     {
1488       if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1489 			buffpek+i,buffpek+i+MERGEBUFF-1,0))
1490       goto cleanup;
1491     }
1492     if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1493 		      buffpek+i,buffpek+ *maxbuffer,0))
1494       break;					/* purecov: inspected */
1495     if (flush_io_cache(to_file))
1496       break;					/* purecov: inspected */
1497     temp=from_file; from_file=to_file; to_file=temp;
1498     setup_io_cache(from_file);
1499     setup_io_cache(to_file);
1500     *maxbuffer= (uint) (lastbuff-buffpek)-1;
1501   }
1502 cleanup:
1503   close_cached_file(to_file);			// This holds old result
1504   if (to_file == t_file)
1505   {
1506     *t_file=t_file2;				// Copy result file
1507     setup_io_cache(t_file);
1508   }
1509 
1510   DBUG_RETURN(*maxbuffer >= MERGEBUFF2);	/* Return 1 if interrupted */
1511 } /* merge_many_buff */
1512 
1513 
1514 /**
1515   Read data to buffer.
1516 
1517   @retval
1518     (uint)-1 if something goes wrong
1519 */
1520 
read_to_buffer(IO_CACHE * fromfile,BUFFPEK * buffpek,uint rec_length)1521 uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
1522 		    uint rec_length)
1523 {
1524   register uint count;
1525   uint length;
1526 
1527   if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
1528   {
1529     if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
1530                          (length= rec_length*count),
1531                          buffpek->file_pos, MYF_RW))
1532       return((uint) -1);			/* purecov: inspected */
1533     buffpek->key=buffpek->base;
1534     buffpek->file_pos+= length;			/* New filepos */
1535     buffpek->count-=	count;
1536     buffpek->mem_count= count;
1537   }
1538   return (count*rec_length);
1539 } /* read_to_buffer */
1540 
1541 
1542 /**
1543   Put all room used by freed buffer to use in adjacent buffer.
1544 
1545   Note, that we can't simply distribute memory evenly between all buffers,
1546   because new areas must not overlap with old ones.
1547 
1548   @param[in] queue      list of non-empty buffers, without freed buffer
1549   @param[in] reuse      empty buffer
1550   @param[in] key_length key length
1551 */
1552 
reuse_freed_buff(QUEUE * queue,BUFFPEK * reuse,uint key_length)1553 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
1554 {
1555   uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
1556   for (uint i= 0; i < queue->elements; ++i)
1557   {
1558     BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
1559     if (bp->base + bp->max_keys * key_length == reuse->base)
1560     {
1561       bp->max_keys+= reuse->max_keys;
1562       return;
1563     }
1564     else if (bp->base == reuse_end)
1565     {
1566       bp->base= reuse->base;
1567       bp->max_keys+= reuse->max_keys;
1568       return;
1569     }
1570   }
1571   DBUG_ASSERT(0);
1572 }
1573 
1574 
1575 /**
1576   Merge buffers to one buffer.
1577 
1578   @param param        Sort parameter
1579   @param from_file    File with source data (BUFFPEKs point to this file)
1580   @param to_file      File to write the sorted result data.
1581   @param sort_buffer  Buffer for data to store up to MERGEBUFF2 sort keys.
1582   @param lastbuff     OUT Store here BUFFPEK describing data written to to_file
1583   @param Fb           First element in source BUFFPEKs array
1584   @param Tb           Last element in source BUFFPEKs array
1585   @param flag
1586 
1587   @retval
1588     0      OK
1589   @retval
1590     other  error
1591 */
1592 
merge_buffers(Sort_param * param,IO_CACHE * from_file,IO_CACHE * to_file,uchar * sort_buffer,BUFFPEK * lastbuff,BUFFPEK * Fb,BUFFPEK * Tb,int flag)1593 int merge_buffers(Sort_param *param, IO_CACHE *from_file,
1594                   IO_CACHE *to_file, uchar *sort_buffer,
1595                   BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb,
1596                   int flag)
1597 {
1598   int error;
1599   uint rec_length,res_length,offset;
1600   size_t sort_length;
1601   ulong maxcount;
1602   ha_rows max_rows,org_max_rows;
1603   my_off_t to_start_filepos;
1604   uchar *strpos;
1605   BUFFPEK *buffpek;
1606   QUEUE queue;
1607   qsort2_cmp cmp;
1608   void *first_cmp_arg;
1609   volatile THD::killed_state *killed= &current_thd->killed;
1610   THD::killed_state not_killable;
1611   DBUG_ENTER("merge_buffers");
1612 
1613   current_thd->inc_status_sort_merge_passes();
1614   if (param->not_killable)
1615   {
1616     killed= &not_killable;
1617     not_killable= THD::NOT_KILLED;
1618   }
1619 
1620   error=0;
1621   rec_length= param->rec_length;
1622   res_length= param->res_length;
1623   sort_length= param->sort_length;
1624   offset= rec_length-res_length;
1625   maxcount= (ulong) (param->max_keys_per_buffer / ((uint) (Tb-Fb) +1));
1626   to_start_filepos= my_b_tell(to_file);
1627   strpos= (uchar*) sort_buffer;
1628   org_max_rows=max_rows= param->max_rows;
1629 
1630   /* The following will fire if there is not enough space in sort_buffer */
1631   DBUG_ASSERT(maxcount!=0);
1632 
1633   if (param->unique_buff)
1634   {
1635     cmp= param->compare;
1636     first_cmp_arg= (void *) &param->cmp_context;
1637   }
1638   else
1639   {
1640     cmp= get_ptr_compare(sort_length);
1641     first_cmp_arg= (void*) &sort_length;
1642   }
1643   if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
1644                  (queue_compare) cmp, first_cmp_arg))
1645     DBUG_RETURN(1);                                /* purecov: inspected */
1646   for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
1647   {
1648     buffpek->base= strpos;
1649     buffpek->max_keys= maxcount;
1650     strpos+=
1651       (uint) (error= (int)read_to_buffer(from_file, buffpek, rec_length));
1652     if (error == -1)
1653       goto err;					/* purecov: inspected */
1654     buffpek->max_keys= buffpek->mem_count;	// If less data in buffers than expected
1655     queue_insert(&queue, (uchar*) buffpek);
1656   }
1657 
1658   if (param->unique_buff)
1659   {
1660     /*
1661        Called by Unique::get()
1662        Copy the first argument to param->unique_buff for unique removal.
1663        Store it also in 'to_file'.
1664 
1665        This is safe as we know that there is always more than one element
1666        in each block to merge (This is guaranteed by the Unique:: algorithm
1667     */
1668     buffpek= (BUFFPEK*) queue_top(&queue);
1669     memcpy(param->unique_buff, buffpek->key, rec_length);
1670     if (my_b_write(to_file, (uchar*) buffpek->key, rec_length))
1671     {
1672       error=1; goto err;                        /* purecov: inspected */
1673     }
1674     buffpek->key+= rec_length;
1675     buffpek->mem_count--;
1676     if (!--max_rows)
1677     {
1678       error= 0;                                       /* purecov: inspected */
1679       goto end;                                       /* purecov: inspected */
1680     }
1681     queue_replaced(&queue);                        // Top element has been used
1682   }
1683   else
1684     cmp= 0;                                        // Not unique
1685 
1686   while (queue.elements > 1)
1687   {
1688     if (*killed)
1689     {
1690       error= 1; goto err;                        /* purecov: inspected */
1691     }
1692     for (;;)
1693     {
1694       buffpek= (BUFFPEK*) queue_top(&queue);
1695       if (cmp)                                        // Remove duplicates
1696       {
1697         if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
1698                     (uchar**) &buffpek->key))
1699               goto skip_duplicate;
1700             memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
1701       }
1702       if (flag == 0)
1703       {
1704         if (my_b_write(to_file,(uchar*) buffpek->key, rec_length))
1705         {
1706           error=1; goto err;                        /* purecov: inspected */
1707         }
1708       }
1709       else
1710       {
1711         if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length))
1712         {
1713           error=1; goto err;                        /* purecov: inspected */
1714         }
1715       }
1716       if (!--max_rows)
1717       {
1718         error= 0;                               /* purecov: inspected */
1719         goto end;                               /* purecov: inspected */
1720       }
1721 
1722     skip_duplicate:
1723       buffpek->key+= rec_length;
1724       if (! --buffpek->mem_count)
1725       {
1726         if (!(error= (int) read_to_buffer(from_file,buffpek,
1727                                           rec_length)))
1728         {
1729           (void) queue_remove(&queue,0);
1730           reuse_freed_buff(&queue, buffpek, rec_length);
1731           break;                        /* One buffer have been removed */
1732         }
1733         else if (error == -1)
1734           goto err;                        /* purecov: inspected */
1735       }
1736       queue_replaced(&queue);              /* Top element has been replaced */
1737     }
1738   }
1739   buffpek= (BUFFPEK*) queue_top(&queue);
1740   buffpek->base= sort_buffer;
1741   buffpek->max_keys= param->max_keys_per_buffer;
1742 
1743   /*
1744     As we know all entries in the buffer are unique, we only have to
1745     check if the first one is the same as the last one we wrote
1746   */
1747   if (cmp)
1748   {
1749     if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
1750     {
1751       buffpek->key+= rec_length;         // Remove duplicate
1752       --buffpek->mem_count;
1753     }
1754   }
1755 
1756   do
1757   {
1758     if ((ha_rows) buffpek->mem_count > max_rows)
1759     {                                        /* Don't write too many records */
1760       buffpek->mem_count= (uint) max_rows;
1761       buffpek->count= 0;                        /* Don't read more */
1762     }
1763     max_rows-= buffpek->mem_count;
1764     if (flag == 0)
1765     {
1766       if (my_b_write(to_file,(uchar*) buffpek->key,
1767                      (rec_length*buffpek->mem_count)))
1768       {
1769         error= 1; goto err;                        /* purecov: inspected */
1770       }
1771     }
1772     else
1773     {
1774       register uchar *end;
1775       strpos= buffpek->key+offset;
1776       for (end= strpos+buffpek->mem_count*rec_length ;
1777            strpos != end ;
1778            strpos+= rec_length)
1779       {
1780         if (my_b_write(to_file, (uchar *) strpos, res_length))
1781         {
1782           error=1; goto err;
1783         }
1784       }
1785     }
1786   }
1787   while ((error=(int) read_to_buffer(from_file,buffpek, rec_length))
1788          != -1 && error != 0);
1789 
1790 end:
1791   lastbuff->count= min(org_max_rows-max_rows, param->max_rows);
1792   lastbuff->file_pos= to_start_filepos;
1793 err:
1794   delete_queue(&queue);
1795   DBUG_RETURN(error);
1796 } /* merge_buffers */
1797 
1798 
1799 	/* Do a merge to output-file (save only positions) */
1800 
merge_index(Sort_param * param,uchar * sort_buffer,BUFFPEK * buffpek,uint maxbuffer,IO_CACHE * tempfile,IO_CACHE * outfile)1801 static int merge_index(Sort_param *param, uchar *sort_buffer,
1802                        BUFFPEK *buffpek, uint maxbuffer,
1803                        IO_CACHE *tempfile, IO_CACHE *outfile)
1804 {
1805   DBUG_ENTER("merge_index");
1806   if (merge_buffers(param,tempfile,outfile,sort_buffer,buffpek,buffpek,
1807 		    buffpek+maxbuffer,1))
1808     DBUG_RETURN(1);				/* purecov: inspected */
1809   DBUG_RETURN(0);
1810 } /* merge_index */
1811 
1812 
suffix_length(ulong string_length)1813 static uint suffix_length(ulong string_length)
1814 {
1815   if (string_length < 256)
1816     return 1;
1817   if (string_length < 256L*256L)
1818     return 2;
1819   if (string_length < 256L*256L*256L)
1820     return 3;
1821   return 4;                                     // Can't sort longer than 4G
1822 }
1823 
1824 
1825 
1826 /**
1827   Calculate length of sort key.
1828 
1829   @param thd			  Thread handler
1830   @param sortorder		  Order of items to sort
1831   @param s_length	          Number of items to sort
1832   @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
1833                                  (In which case we have to use strxnfrm())
1834 
1835   @note
1836     sortorder->length is updated for each sort item.
1837   @n
1838     sortorder->need_strxnfrm is set 1 if we have to use strxnfrm
1839 
1840   @return
1841     Total length of sort buffer in bytes
1842 */
1843 
1844 uint
sortlength(THD * thd,SORT_FIELD * sortorder,uint s_length,bool * multi_byte_charset)1845 sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
1846            bool *multi_byte_charset)
1847 {
1848   uint total_length= 0;
1849   const CHARSET_INFO *cs;
1850   *multi_byte_charset= false;
1851 
1852   for (; s_length-- ; sortorder++)
1853   {
1854     sortorder->need_strxnfrm= 0;
1855     sortorder->suffix_length= 0;
1856     if (sortorder->field)
1857     {
1858       cs= sortorder->field->sort_charset();
1859       sortorder->length= sortorder->field->sort_length();
1860 
1861       if (use_strnxfrm((cs=sortorder->field->sort_charset())))
1862       {
1863         sortorder->need_strxnfrm= 1;
1864         *multi_byte_charset= 1;
1865         sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1866       }
1867       if (sortorder->field->maybe_null())
1868         total_length++;                       // Place for NULL marker
1869 
1870       if (sortorder->field->result_type() == STRING_RESULT &&
1871           !sortorder->field->is_temporal())
1872       {
1873         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1874       }
1875     }
1876     else
1877     {
1878       sortorder->result_type= sortorder->item->result_type();
1879       if (sortorder->item->is_temporal())
1880         sortorder->result_type= INT_RESULT;
1881       switch (sortorder->result_type) {
1882       case STRING_RESULT:
1883 	sortorder->length= sortorder->item->max_length;
1884         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1885 	if (use_strnxfrm((cs=sortorder->item->collation.collation)))
1886 	{
1887           sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1888 	  sortorder->need_strxnfrm= 1;
1889 	  *multi_byte_charset= 1;
1890 	}
1891         else if (cs == &my_charset_bin)
1892         {
1893           /* Store length last to be able to sort blob/varbinary */
1894           sortorder->suffix_length= suffix_length(sortorder->length);
1895           sortorder->length+= sortorder->suffix_length;
1896         }
1897 	break;
1898       case INT_RESULT:
1899 #if SIZEOF_LONG_LONG > 4
1900 	sortorder->length=8;			// Size of intern longlong
1901 #else
1902 	sortorder->length=4;
1903 #endif
1904 	break;
1905       case DECIMAL_RESULT:
1906         sortorder->length=
1907           my_decimal_get_binary_size(sortorder->item->max_length -
1908                                      (sortorder->item->decimals ? 1 : 0),
1909                                      sortorder->item->decimals);
1910         break;
1911       case REAL_RESULT:
1912 	sortorder->length=sizeof(double);
1913 	break;
1914       case ROW_RESULT:
1915       default:
1916 	// This case should never be choosen
1917 	DBUG_ASSERT(0);
1918 	break;
1919       }
1920       if (sortorder->item->maybe_null)
1921         total_length++;                       // Place for NULL marker
1922     }
1923     total_length+= sortorder->length;
1924   }
1925   sortorder->field= NULL;                       // end marker
1926   DBUG_PRINT("info",("sort_length: %u", total_length));
1927   return total_length;
1928 }
1929 
1930 
1931 /**
1932   Get descriptors of fields appended to sorted fields and
1933   calculate its total length.
1934 
1935   The function first finds out what fields are used in the result set.
1936   Then it calculates the length of the buffer to store the values of
1937   these fields together with the value of sort values.
1938   If the calculated length is not greater than max_length_for_sort_data
1939   the function allocates memory for an array of descriptors containing
1940   layouts for the values of the non-sorted fields in the buffer and
1941   fills them.
1942 
1943   @param thd                 Current thread
1944   @param ptabfield           Array of references to the table fields
1945   @param sortlength          Total length of sorted fields
1946   @param[out] plength        Total length of appended fields
1947 
1948   @note
1949     The null bits for the appended values are supposed to be put together
1950     and stored the buffer just ahead of the value of the first field.
1951 
1952   @return
1953     Pointer to the layout descriptors for the appended fields, if any
1954   @retval
1955     NULL   if we do not store field values with sort data.
1956 */
1957 
1958 static SORT_ADDON_FIELD *
get_addon_fields(ulong max_length_for_sort_data,TABLE * const table,Field ** ptabfield,uint sortlength,uint * plength)1959 get_addon_fields(ulong max_length_for_sort_data,
1960                  TABLE *const table,
1961                  Field **ptabfield, uint sortlength, uint *plength)
1962 {
1963   Field **pfield;
1964   Field *field;
1965   SORT_ADDON_FIELD *addonf;
1966   uint length= 0;
1967   uint fields= 0;
1968   uint null_fields= 0;
1969   MY_BITMAP *read_set= table->read_set;
1970 
1971   /*
1972     If there is a reference to a field in the query add it
1973     to the the set of appended fields.
1974     Note for future refinement:
1975     This this a too strong condition.
1976     Actually we need only the fields referred in the
1977     result set. And for some of them it makes sense to use
1978     the values directly from sorted fields.
1979   */
1980   *plength= 0;
1981 
1982   for (pfield= ptabfield; (field= *pfield) ; pfield++)
1983   {
1984     if (!bitmap_is_set(read_set, field->field_index))
1985       continue;
1986     if (field->flags & BLOB_FLAG)
1987       return 0;
1988     length+= field->max_packed_col_length(field->pack_length());
1989     if (field->maybe_null())
1990       null_fields++;
1991     fields++;
1992   }
1993   if (!fields)
1994     return 0;
1995   length+= (null_fields+7)/8;
1996 
1997   if (length+sortlength > max_length_for_sort_data ||
1998       !(addonf= (SORT_ADDON_FIELD *) my_malloc(sizeof(SORT_ADDON_FIELD)*
1999                                                (fields+1), MYF(MY_WME))))
2000     return 0;
2001 
2002   *plength= length;
2003   length= (null_fields+7)/8;
2004   null_fields= 0;
2005   for (pfield= ptabfield; (field= *pfield) ; pfield++)
2006   {
2007     if (!bitmap_is_set(read_set, field->field_index))
2008       continue;
2009     addonf->field= field;
2010     addonf->offset= length;
2011     if (field->maybe_null())
2012     {
2013       addonf->null_offset= null_fields/8;
2014       addonf->null_bit= 1<<(null_fields & 7);
2015       null_fields++;
2016     }
2017     else
2018     {
2019       addonf->null_offset= 0;
2020       addonf->null_bit= 0;
2021     }
2022     addonf->length= field->max_packed_col_length(field->pack_length());
2023     length+= addonf->length;
2024     addonf++;
2025   }
2026   addonf->field= 0;     // Put end marker
2027 
2028   DBUG_PRINT("info",("addon_length: %d",length));
2029   return (addonf-fields);
2030 }
2031 
2032 
2033 /**
2034   Copy (unpack) values appended to sorted fields from a buffer back to
2035   their regular positions specified by the Field::ptr pointers.
2036 
2037   @param addon_field     Array of descriptors for appended fields
2038   @param buff            Buffer which to unpack the value from
2039 
2040   @note
2041     The function is supposed to be used only as a callback function
2042     when getting field values for the sorted result set.
2043 
2044   @return
2045     void.
2046 */
2047 
2048 static void
unpack_addon_fields(struct st_sort_addon_field * addon_field,uchar * buff)2049 unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff)
2050 {
2051   Field *field;
2052   SORT_ADDON_FIELD *addonf= addon_field;
2053 
2054   for ( ; (field= addonf->field) ; addonf++)
2055   {
2056     if (addonf->null_bit && (addonf->null_bit & buff[addonf->null_offset]))
2057     {
2058       field->set_null();
2059       continue;
2060     }
2061     field->set_notnull();
2062     field->unpack(field->ptr, buff + addonf->offset);
2063   }
2064 }
2065 
2066 /*
2067 ** functions to change a double or float to a sortable string
2068 ** The following should work for IEEE
2069 */
2070 
2071 #define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG)
2072 
change_double_for_sort(double nr,uchar * to)2073 void change_double_for_sort(double nr,uchar *to)
2074 {
2075   uchar *tmp=(uchar*) to;
2076   if (nr == 0.0)
2077   {						/* Change to zero string */
2078     tmp[0]=(uchar) 128;
2079     memset(tmp+1, 0, sizeof(nr)-1);
2080   }
2081   else
2082   {
2083 #ifdef WORDS_BIGENDIAN
2084     memcpy(tmp, &nr, sizeof(nr));
2085 #else
2086     {
2087       uchar *ptr= (uchar*) &nr;
2088 #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
2089       tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0];
2090       tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4];
2091 #else
2092       tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4];
2093       tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0];
2094 #endif
2095     }
2096 #endif
2097     if (tmp[0] & 128)				/* Negative */
2098     {						/* make complement */
2099       uint i;
2100       for (i=0 ; i < sizeof(nr); i++)
2101 	tmp[i]=tmp[i] ^ (uchar) 255;
2102     }
2103     else
2104     {					/* Set high and move exponent one up */
2105       ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] |
2106 		       (ushort) 32768);
2107       exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG);
2108       tmp[0]= (uchar) (exp_part >> 8);
2109       tmp[1]= (uchar) exp_part;
2110     }
2111   }
2112 }
2113