1 /*
2    Copyright (c) 2000, 2020, Oracle and/or its affiliates.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License, version 2.0,
6    as published by the Free Software Foundation.
7 
8    This program is also distributed with certain software (including
9    but not limited to OpenSSL) that is licensed under separate terms,
10    as designated in a particular file or component or in included license
11    documentation.  The authors of MySQL hereby grant you an additional
12    permission to link the program and your derivative works with the
13    separately licensed software that they have included with MySQL.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License, version 2.0, for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to the Free Software
22    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
23 
24 
25 /**
26   @file
27 
28   @brief
29   Sorts a database
30 */
31 
32 #include "sql_priv.h"
33 #include "filesort.h"
34 #include "unireg.h"                      // REQUIRED by other includes
35 #ifdef HAVE_STDDEF_H
36 #include <stddef.h>			/* for macro offsetof */
37 #endif
38 #include <m_ctype.h>
39 #include "sql_sort.h"
40 #include "probes_mysql.h"
41 #include "opt_range.h"                          // SQL_SELECT
42 #include "bounded_queue.h"
43 #include "filesort_utils.h"
44 #include "sql_select.h"
45 #include "debug_sync.h"
46 #include "opt_trace.h"
47 #include "sql_optimizer.h"              // JOIN
48 #include "sql_base.h"
49 
50 #include <algorithm>
51 #include <utility>
52 using std::max;
53 using std::min;
54 
55 	/* functions defined in this file */
56 
57 static uchar *read_buffpek_from_file(IO_CACHE *buffer_file, uint count,
58                                      uchar *buf);
59 static ha_rows find_all_keys(Sort_param *param,SQL_SELECT *select,
60                              Filesort_info *fs_info,
61                              IO_CACHE *buffer_file,
62                              IO_CACHE *tempfile,
63                              Bounded_queue<uchar, uchar> *pq,
64                              ha_rows *found_rows);
65 static int write_keys(Sort_param *param, Filesort_info *fs_info,
66                       uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
67 static void register_used_fields(Sort_param *param);
68 static int merge_index(Sort_param *param,uchar *sort_buffer,
69                        BUFFPEK *buffpek,
70                        uint maxbuffer,IO_CACHE *tempfile,
71                        IO_CACHE *outfile);
72 static bool save_index(Sort_param *param, uint count,
73                        Filesort_info *table_sort);
74 static uint suffix_length(ulong string_length);
75 static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
76                                           TABLE *const table,
77                                           Field **ptabfield,
78                                           uint sortlength, uint *plength);
79 static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
80                                 uchar *buff);
81 static bool check_if_pq_applicable(Opt_trace_context *trace,
82                                    Sort_param *param, Filesort_info *info,
83                                    TABLE *table,
84                                    ha_rows records, ulong memory_available);
85 
86 
init_for_filesort(uint sortlen,TABLE * table,ulong max_length_for_sort_data,ha_rows maxrows,bool sort_positions)87 void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
88                                    ulong max_length_for_sort_data,
89                                    ha_rows maxrows, bool sort_positions)
90 {
91   sort_length= sortlen;
92   ref_length= table->file->ref_length;
93   if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
94       !table->fulltext_searched && !sort_positions)
95   {
96     /*
97       Get the descriptors of all fields whose values are appended
98       to sorted fields and get its total length in addon_length.
99     */
100     addon_field= get_addon_fields(max_length_for_sort_data,
101                                   table,
102                                   table->field, sort_length, &addon_length);
103   }
104   if (addon_field)
105     res_length= addon_length;
106   else
107   {
108     res_length= ref_length;
109     /*
110       The reference to the record is considered
111       as an additional sorted field
112     */
113     sort_length+= ref_length;
114   }
115   rec_length= sort_length + addon_length;
116   max_rows= maxrows;
117 }
118 
119 
trace_filesort_information(Opt_trace_context * trace,const SORT_FIELD * sortorder,uint s_length)120 static void trace_filesort_information(Opt_trace_context *trace,
121                                        const SORT_FIELD *sortorder,
122                                        uint s_length)
123 {
124   if (!trace->is_started())
125     return;
126 
127   Opt_trace_array trace_filesort(trace, "filesort_information");
128   for (; s_length-- ; sortorder++)
129   {
130     Opt_trace_object oto(trace);
131     oto.add_alnum("direction", sortorder->reverse ? "desc" : "asc");
132 
133     if (sortorder->field)
134     {
135       if (strlen(sortorder->field->table->alias) != 0)
136         oto.add_utf8_table(sortorder->field->table);
137       else
138         oto.add_alnum("table", "intermediate_tmp_table");
139       oto.add_alnum("field", sortorder->field->field_name ?
140                     sortorder->field->field_name : "tmp_table_column");
141     }
142     else
143       oto.add("expression", sortorder->item);
144   }
145 }
146 
147 
148 /**
149   Sort a table.
150   Creates a set of pointers that can be used to read the rows
151   in sorted order. This should be done with the functions
152   in records.cc.
153 
154   Before calling filesort, one must have done
155   table->file->info(HA_STATUS_VARIABLE)
156 
157   The result set is stored in table->io_cache or
158   table->record_pointers.
159 
160   @param      thd            Current thread
161   @param      table          Table to sort
162   @param      filesort       How to sort the table
163   @param      sort_positions Set to TRUE if we want to force sorting by position
164                              (Needed by UPDATE/INSERT or ALTER TABLE or
165                               when rowids are required by executor)
166   @param[out] examined_rows  Store number of examined rows here
167                              This is the number of found rows before
168                              applying WHERE condition.
169   @param[out] found_rows     Store the number of found rows here.
170                              This is the number of found rows after
171                              applying WHERE condition.
172 
173   @note
174     If we sort by position (like if sort_positions is 1) filesort() will
175     call table->prepare_for_position().
176 
177   @retval
178     HA_POS_ERROR	Error
179   @retval
180     \#			Number of rows in the result, could be less than
181                         found_rows if LIMIT were provided.
182 */
183 
filesort(THD * thd,TABLE * table,Filesort * filesort,bool sort_positions,ha_rows * examined_rows,ha_rows * found_rows)184 ha_rows filesort(THD *thd, TABLE *table, Filesort *filesort,
185                  bool sort_positions, ha_rows *examined_rows,
186                  ha_rows *found_rows)
187 {
188   int error;
189   ulong memory_available= thd->variables.sortbuff_size;
190   uint maxbuffer;
191   BUFFPEK *buffpek;
192   ha_rows num_rows= HA_POS_ERROR;
193   IO_CACHE tempfile, buffpek_pointers, *outfile;
194   Sort_param param;
195   bool multi_byte_charset;
196   Bounded_queue<uchar, uchar> pq;
197   Opt_trace_context * const trace= &thd->opt_trace;
198   SQL_SELECT *const select= filesort->select;
199   ha_rows max_rows= filesort->limit;
200   uint s_length= 0;
201 
202   DBUG_ENTER("filesort");
203 
204   if (!(s_length= filesort->make_sortorder()))
205     DBUG_RETURN(HA_POS_ERROR);  /* purecov: inspected */
206 
207   /*
208     We need a nameless wrapper, since we may be inside the "steps" of
209     "join_execution".
210   */
211   Opt_trace_object trace_wrapper(trace);
212   trace_filesort_information(trace, filesort->sortorder, s_length);
213 
214 #ifdef SKIP_DBUG_IN_FILESORT
215   DBUG_PUSH("");		/* No DBUG here */
216 #endif
217   Item_subselect *subselect= table->reginfo.join_tab ?
218      table->reginfo.join_tab->join->select_lex->master_unit()->item :
219      NULL;
220 
221   MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
222   DEBUG_SYNC(thd, "filesort_start");
223 
224   /*
225    Release InnoDB's adaptive hash index latch (if holding) before
226    running a sort.
227   */
228   ha_release_temporary_latches(thd);
229 
230   /*
231     Don't use table->sort in filesort as it is also used by
232     QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
233     when index_merge select has finished with it.
234   */
235   Filesort_info table_sort= table->sort;
236   table->sort.io_cache= NULL;
237   DBUG_ASSERT(table_sort.record_pointers == NULL);
238 
239   outfile= table_sort.io_cache;
240   my_b_clear(&tempfile);
241   my_b_clear(&buffpek_pointers);
242   buffpek=0;
243   error= 1;
244 
245   param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
246                                      &multi_byte_charset),
247                           table,
248                           thd->variables.max_length_for_sort_data,
249                           max_rows, sort_positions);
250 
251   table_sort.addon_buf= 0;
252   table_sort.addon_length= param.addon_length;
253   table_sort.addon_field= param.addon_field;
254   table_sort.unpack= unpack_addon_fields;
255   if (param.addon_field &&
256       !(table_sort.addon_buf=
257         (uchar *) my_malloc(param.addon_length, MYF(MY_WME))))
258       goto err;
259 
260   if (select && select->quick)
261     thd->inc_status_sort_range();
262   else
263     thd->inc_status_sort_scan();
264 
265   thd->query_plan_flags|= QPLAN_FILESORT;
266 
267   // If number of rows is not known, use as much of sort buffer as possible.
268   num_rows= table->file->estimate_rows_upper_bound();
269 
270   if (multi_byte_charset &&
271       !(param.tmp_buffer= (char*) my_malloc(param.sort_length,MYF(MY_WME))))
272     goto err;
273 
274   if (check_if_pq_applicable(trace, &param, &table_sort,
275                              table, num_rows, memory_available))
276   {
277     DBUG_PRINT("info", ("filesort PQ is applicable"));
278     const size_t compare_length= param.sort_length;
279     if (pq.init(param.max_rows,
280                 true,                           // max_at_top
281                 NULL,                           // compare_function
282                 compare_length,
283                 &make_sortkey, &param, table_sort.get_sort_keys()))
284     {
285       /*
286        If we fail to init pq, we have to give up:
287        out of memory means my_malloc() will call my_error().
288       */
289       DBUG_PRINT("info", ("failed to allocate PQ"));
290       table_sort.free_sort_buffer();
291       DBUG_ASSERT(thd->is_error());
292       goto err;
293     }
294     // For PQ queries (with limit) we initialize all pointers.
295     table_sort.init_record_pointers();
296     filesort->using_pq= true;
297   }
298   else
299   {
300     DBUG_PRINT("info", ("filesort PQ is not applicable"));
301 
302     /*
303       We need space for at least one record from each merge chunk, i.e.
304         param->max_keys_per_buffer >= MERGEBUFF2
305       See merge_buffers()),
306       memory_available must be large enough for
307         param->max_keys_per_buffer * (record + record pointer) bytes
308       (the main sort buffer, see alloc_sort_buffer()).
309       Hence this minimum:
310     */
311     const ulong min_sort_memory=
312       max<ulong>(MIN_SORT_MEMORY,
313                  ALIGN_SIZE(MERGEBUFF2 * (param.rec_length + sizeof(uchar*))));
314     /*
315       Cannot depend on num_rows. For external sort, space for upto MERGEBUFF2
316       rows is required.
317     */
318     if (num_rows < MERGEBUFF2)
319       num_rows= MERGEBUFF2;
320 
321     while (memory_available >= min_sort_memory)
322     {
323       ha_rows keys= memory_available / (param.rec_length + sizeof(char*));
324       param.max_keys_per_buffer= (uint) min(num_rows, keys);
325 
326       table_sort.alloc_sort_buffer(param.max_keys_per_buffer, param.rec_length);
327       if (table_sort.sort_buffer_size() > 0 && table_sort.get_sort_keys())
328         break;
329       ulong old_memory_available= memory_available;
330       memory_available= memory_available/4*3;
331       if (memory_available < min_sort_memory &&
332           old_memory_available > min_sort_memory)
333         memory_available= min_sort_memory;
334     }
335     if (memory_available < min_sort_memory)
336     {
337       my_error(ER_OUT_OF_SORTMEMORY,MYF(ME_ERROR + ME_FATALERROR));
338       goto err;
339     }
340   }
341 
342   if (open_cached_file(&buffpek_pointers,mysql_tmpdir,TEMP_PREFIX,
343 		       DISK_BUFFER_SIZE, MYF(MY_WME)))
344     goto err;
345 
346   param.sort_form= table;
347   param.end= (param.local_sortorder= filesort->sortorder) + s_length;
348   // New scope, because subquery execution must be traced within an array.
349   {
350     Opt_trace_array ota(trace, "filesort_execution");
351     num_rows= find_all_keys(&param, select,
352                             &table_sort,
353                             &buffpek_pointers,
354                             &tempfile,
355                             pq.is_initialized() ? &pq : NULL,
356                             found_rows);
357     if (num_rows == HA_POS_ERROR)
358       goto err;
359   }
360 
361   maxbuffer= (uint) (my_b_tell(&buffpek_pointers)/sizeof(*buffpek));
362 
363   Opt_trace_object(trace, "filesort_summary")
364     .add("rows", num_rows)
365     .add("examined_rows", param.examined_rows)
366     .add("number_of_tmp_files", maxbuffer)
367     .add("sort_buffer_size", table_sort.sort_buffer_size())
368     .add_alnum("sort_mode",
369                param.addon_field ?
370                "<sort_key, additional_fields>" : "<sort_key, rowid>");
371 
372   if (maxbuffer == 0)			// The whole set is in memory
373   {
374     if (save_index(&param, (uint) num_rows, &table_sort))
375       goto err;
376   }
377   else
378   {
379     thd->query_plan_flags|= QPLAN_FILESORT_DISK;
380     if (table_sort.buffpek && table_sort.buffpek_len < maxbuffer)
381     {
382       my_free(table_sort.buffpek);
383       table_sort.buffpek= 0;
384     }
385     if (!(table_sort.buffpek=
386           (uchar *) read_buffpek_from_file(&buffpek_pointers, maxbuffer,
387                                  table_sort.buffpek)))
388       goto err;
389     buffpek= (BUFFPEK *) table_sort.buffpek;
390     table_sort.buffpek_len= maxbuffer;
391     close_cached_file(&buffpek_pointers);
392 	/* Open cached file if it isn't open */
393     if (! my_b_inited(outfile) &&
394 	open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
395 			  MYF(MY_WME)))
396       goto err;
397     if (reinit_io_cache(outfile,WRITE_CACHE,0L,0,0))
398       goto err;
399 
400     /*
401       Use also the space previously used by string pointers in sort_buffer
402       for temporary key storage.
403     */
404     param.max_keys_per_buffer= table_sort.sort_buffer_size() / param.rec_length;
405     maxbuffer--;				// Offset from 0
406     if (merge_many_buff(&param,
407                         (uchar*) table_sort.get_sort_keys(),
408                         buffpek,&maxbuffer,
409 			&tempfile))
410       goto err;
411     if (flush_io_cache(&tempfile) ||
412 	reinit_io_cache(&tempfile,READ_CACHE,0L,0,0))
413       goto err;
414     if (merge_index(&param,
415                     (uchar*) table_sort.get_sort_keys(),
416                     buffpek,
417                     maxbuffer,
418                     &tempfile,
419 		    outfile))
420       goto err;
421   }
422 
423   if (num_rows > param.max_rows)
424   {
425     // If find_all_keys() produced more results than the query LIMIT.
426     num_rows= param.max_rows;
427   }
428   error= 0;
429 
430  err:
431   my_free(param.tmp_buffer);
432   if (!subselect || !subselect->is_uncacheable())
433   {
434     table_sort.free_sort_buffer();
435     my_free(buffpek);
436     table_sort.buffpek= 0;
437     table_sort.buffpek_len= 0;
438   }
439   close_cached_file(&tempfile);
440   close_cached_file(&buffpek_pointers);
441 
442   /* free resources allocated  for QUICK_INDEX_MERGE_SELECT */
443   free_io_cache(table);
444 
445   if (my_b_inited(outfile))
446   {
447     if (flush_io_cache(outfile))
448       error=1;
449     {
450       my_off_t save_pos=outfile->pos_in_file;
451       /* For following reads */
452       if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
453 	error=1;
454       outfile->end_of_file=save_pos;
455     }
456   }
457   if (error)
458   {
459     int kill_errno= thd->killed_errno();
460     DBUG_ASSERT(thd->is_error() || kill_errno);
461     my_printf_error(ER_FILSORT_ABORT,
462                     "%s: %s",
463                     MYF(ME_ERROR + ME_WAITTANG),
464                     ER_THD(thd, ER_FILSORT_ABORT),
465                     kill_errno ? ((kill_errno == THD::KILL_CONNECTION &&
466                                  !shutdown_in_progress) ? ER(THD::KILL_QUERY) :
467                                                           ER(kill_errno)) :
468                                  thd->get_stmt_da()->message());
469 
470     if (log_warnings > 1)
471     {
472       sql_print_warning("%s, host: %s, user: %s, thread: %lu, query: %-.4096s",
473                         ER_THD(thd, ER_FILSORT_ABORT),
474                         thd->security_ctx->host_or_ip,
475                         &thd->security_ctx->priv_user[0],
476                         (ulong) thd->thread_id,
477                         thd->query());
478     }
479   }
480   else
481     thd->inc_status_sort_rows(num_rows);
482   *examined_rows= param.examined_rows;
483 #ifdef SKIP_DBUG_IN_FILESORT
484   DBUG_POP();			/* Ok to DBUG */
485 #endif
486 
487   // Assign the copy back!
488   table->sort= table_sort;
489 
490   DBUG_PRINT("exit",
491              ("num_rows: %ld examined_rows: %ld found_rows: %ld",
492               (long) num_rows, (long) *examined_rows, (long) *found_rows));
493   MYSQL_FILESORT_DONE(error, num_rows);
494   DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
495 } /* filesort */
496 
497 
filesort_free_buffers(TABLE * table,bool full)498 void filesort_free_buffers(TABLE *table, bool full)
499 {
500   DBUG_ENTER("filesort_free_buffers");
501   my_free(table->sort.record_pointers);
502   table->sort.record_pointers= NULL;
503 
504   if (full)
505   {
506     table->sort.free_sort_buffer();
507     my_free(table->sort.buffpek);
508     table->sort.buffpek= NULL;
509     table->sort.buffpek_len= 0;
510   }
511 
512   my_free(table->sort.addon_buf);
513   my_free(table->sort.addon_field);
514   table->sort.addon_buf= NULL;
515   table->sort.addon_field= NULL;
516   DBUG_VOID_RETURN;
517 }
518 
cleanup()519 void Filesort::cleanup()
520 {
521   if (select && own_select)
522   {
523     select->cleanup();
524     select= NULL;
525   }
526 }
527 
make_sortorder()528 uint Filesort::make_sortorder()
529 {
530   uint count;
531   SORT_FIELD *sort,*pos;
532   ORDER *ord;
533   DBUG_ENTER("make_sortorder");
534 
535 
536   count=0;
537   for (ord = order; ord; ord= ord->next)
538     count++;
539   if (!sortorder)
540     sortorder= (SORT_FIELD*) sql_alloc(sizeof(SORT_FIELD) * (count + 1));
541   pos= sort= sortorder;
542 
543   if (!pos)
544     DBUG_RETURN(0);
545 
546   for (ord= order; ord; ord= ord->next, pos++)
547   {
548     Item *const item= ord->item[0], *const real_item= item->real_item();
549     pos->field= 0; pos->item= 0;
550     if (real_item->type() == Item::FIELD_ITEM)
551     {
552       // Could be a field, or Item_direct_view_ref wrapping a field
553       DBUG_ASSERT(item->type() == Item::FIELD_ITEM ||
554                   (item->type() == Item::REF_ITEM &&
555                    static_cast<Item_ref*>(item)->ref_type() ==
556                    Item_ref::VIEW_REF));
557       pos->field= static_cast<Item_field*>(real_item)->field;
558     }
559     else if (real_item->type() == Item::SUM_FUNC_ITEM &&
560              !real_item->const_item())
561     {
562       // Aggregate, or Item_aggregate_ref
563       DBUG_ASSERT(item->type() == Item::SUM_FUNC_ITEM ||
564                   (item->type() == Item::REF_ITEM &&
565                    static_cast<Item_ref*>(item)->ref_type() ==
566                    Item_ref::AGGREGATE_REF));
567       pos->field= item->get_tmp_table_field();
568     }
569     else if (real_item->type() == Item::COPY_STR_ITEM)
570     {						// Blob patch
571       pos->item= static_cast<Item_copy*>(real_item)->get_item();
572     }
573     else
574       pos->item= item;
575     pos->reverse= (ord->direction == ORDER::ORDER_DESC);
576     DBUG_ASSERT(pos->field != NULL || pos->item != NULL);
577   }
578   DBUG_RETURN(count);
579 }
580 
581 
582 /**
583   Makes an array of string pointers for info->sort_keys.
584 
585   @param info         Filesort_info struct owning the allocated array.
586   @param num_records  Number of records.
587   @param length       Length of each record.
588 */
589 
590 /** Read 'count' number of buffer pointers into memory. */
591 
read_buffpek_from_file(IO_CACHE * buffpek_pointers,uint count,uchar * buf)592 static uchar *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count,
593                                      uchar *buf)
594 {
595   ulong length= sizeof(BUFFPEK)*count;
596   uchar *tmp= buf;
597   DBUG_ENTER("read_buffpek_from_file");
598   if (count > UINT_MAX/sizeof(BUFFPEK))
599     return 0; /* sizeof(BUFFPEK)*count will overflow */
600   if (!tmp)
601     tmp= (uchar *)my_malloc(length, MYF(MY_WME));
602   if (tmp)
603   {
604     if (reinit_io_cache(buffpek_pointers,READ_CACHE,0L,0,0) ||
605 	my_b_read(buffpek_pointers, (uchar*) tmp, length))
606     {
607       my_free(tmp);
608       tmp=0;
609     }
610   }
611   DBUG_RETURN(tmp);
612 }
613 
614 #ifndef DBUG_OFF
615 /*
616   Print a text, SQL-like record representation into dbug trace.
617 
618   Note: this function is a work in progress: at the moment
619    - column read bitmap is ignored (can print garbage for unused columns)
620    - there is no quoting
621 */
dbug_print_record(TABLE * table,bool print_rowid)622 static void dbug_print_record(TABLE *table, bool print_rowid)
623 {
624   char buff[1024];
625   Field **pfield;
626   String tmp(buff,sizeof(buff),&my_charset_bin);
627   DBUG_LOCK_FILE;
628 
629   fprintf(DBUG_FILE, "record (");
630   for (pfield= table->field; *pfield ; pfield++)
631     fprintf(DBUG_FILE, "%s%s", (*pfield)->field_name, (pfield[1])? ", ":"");
632   fprintf(DBUG_FILE, ") = ");
633 
634   fprintf(DBUG_FILE, "(");
635   for (pfield= table->field; *pfield ; pfield++)
636   {
637     Field *field=  *pfield;
638 
639     if (field->is_null())
640       fwrite("NULL", sizeof(char), 4, DBUG_FILE);
641 
642     if (field->type() == MYSQL_TYPE_BIT)
643       (void) field->val_int_as_str(&tmp, 1);
644     else
645       field->val_str(&tmp);
646 
647     fwrite(tmp.ptr(),sizeof(char),tmp.length(),DBUG_FILE);
648     if (pfield[1])
649       fwrite(", ", sizeof(char), 2, DBUG_FILE);
650   }
651   fprintf(DBUG_FILE, ")");
652   if (print_rowid)
653   {
654     fprintf(DBUG_FILE, " rowid ");
655     for (uint i=0; i < table->file->ref_length; i++)
656     {
657       fprintf(DBUG_FILE, "%x", (uchar)table->file->ref[i]);
658     }
659   }
660   fprintf(DBUG_FILE, "\n");
661   DBUG_UNLOCK_FILE;
662 }
663 #endif
664 
665 /**
666   Search after sort_keys, and write them into tempfile
667   (if we run out of space in the sort_keys buffer).
668   All produced sequences are guaranteed to be non-empty.
669 
670   @param param             Sorting parameter
671   @param select            Use this to get source data
672   @param sort_keys         Array of pointers to sort key + addon buffers.
673   @param buffpek_pointers  File to write BUFFPEKs describing sorted segments
674                            in tempfile.
675   @param tempfile          File to write sorted sequences of sortkeys to.
676   @param pq                If !NULL, use it for keeping top N elements
677   @param [out] found_rows  The number of FOUND_ROWS().
678                            For a query with LIMIT, this value will typically
679                            be larger than the function return value.
680 
681   @note
682     Basic idea:
683     @verbatim
684      while (get_next_sortkey())
685      {
686        if (using priority queue)
687          push sort key into queue
688        else
689        {
690          if (no free space in sort_keys buffers)
691          {
692            sort sort_keys buffer;
693            dump sorted sequence to 'tempfile';
694            dump BUFFPEK describing sequence location into 'buffpek_pointers';
695          }
696          put sort key into 'sort_keys';
697        }
698      }
699      if (sort_keys has some elements && dumped at least once)
700        sort-dump-dump as above;
701      else
702        don't sort, leave sort_keys array to be sorted by caller.
703   @endverbatim
704 
705   @retval
706     Number of records written on success.
707   @retval
708     HA_POS_ERROR on error.
709 */
710 
find_all_keys(Sort_param * param,SQL_SELECT * select,Filesort_info * fs_info,IO_CACHE * buffpek_pointers,IO_CACHE * tempfile,Bounded_queue<uchar,uchar> * pq,ha_rows * found_rows)711 static ha_rows find_all_keys(Sort_param *param, SQL_SELECT *select,
712                              Filesort_info *fs_info,
713                              IO_CACHE *buffpek_pointers,
714                              IO_CACHE *tempfile,
715                              Bounded_queue<uchar, uchar> *pq,
716                              ha_rows *found_rows)
717 {
718   int error,flag,quick_select;
719   uint idx,indexpos,ref_length;
720   uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
721   my_off_t record;
722   TABLE *sort_form;
723   THD *thd= current_thd;
724   volatile THD::killed_state *killed= &thd->killed;
725   handler *file;
726   MY_BITMAP *save_read_set, *save_write_set;
727   bool skip_record;
728 
729   DBUG_ENTER("find_all_keys");
730   DBUG_PRINT("info",("using: %s",
731                      (select ? select->quick ? "ranges" : "where":
732                       "every row")));
733 
734   idx=indexpos=0;
735   error=quick_select=0;
736   sort_form=param->sort_form;
737   file=sort_form->file;
738   ref_length=param->ref_length;
739   ref_pos= ref_buff;
740   quick_select=select && select->quick;
741   record=0;
742   *found_rows= 0;
743   flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
744   if (flag)
745     ref_pos= &file->ref[0];
746   next_pos=ref_pos;
747   if (!quick_select)
748   {
749     next_pos=(uchar*) 0;			/* Find records in sequence */
750     DBUG_EXECUTE_IF("bug14365043_1",
751                     DBUG_SET("+d,ha_rnd_init_fail"););
752     if ((error= file->ha_rnd_init(1)))
753     {
754       file->print_error(error, MYF(0));
755       DBUG_RETURN(HA_POS_ERROR);
756     }
757     file->extra_opt(HA_EXTRA_CACHE,
758 		    current_thd->variables.read_buff_size);
759   }
760 
761   if (quick_select)
762   {
763     if (select->quick->reset())
764       DBUG_RETURN(HA_POS_ERROR);
765   }
766 
767   /* Remember original bitmaps */
768   save_read_set=  sort_form->read_set;
769   save_write_set= sort_form->write_set;
770   /* Set up temporary column read map for columns used by sort */
771   bitmap_clear_all(&sort_form->tmp_set);
772   /* Temporary set for register_used_fields and register_field_in_read_map */
773   sort_form->read_set= &sort_form->tmp_set;
774   // Include fields used for sorting in the read_set.
775   register_used_fields(param);
776 
777   // Include fields used by conditions in the read_set.
778   if (select && select->cond)
779     select->cond->walk(&Item::register_field_in_read_map, 1,
780                        (uchar*) sort_form);
781 
782   // Include fields used by pushed conditions in the read_set.
783   if (select && select->icp_cond)
784     select->icp_cond->walk(&Item::register_field_in_read_map, 1,
785                            (uchar*) sort_form);
786 
787   sort_form->column_bitmaps_set(&sort_form->tmp_set, &sort_form->tmp_set);
788 
789   DEBUG_SYNC(thd, "after_index_merge_phase1");
790 
791   for (;;)
792   {
793     if (quick_select)
794     {
795       if ((error= select->quick->get_next()))
796         break;
797       file->position(sort_form->record[0]);
798       DBUG_EXECUTE_IF("debug_filesort", dbug_print_record(sort_form, TRUE););
799     }
800     else					/* Not quick-select */
801     {
802       {
803 	error= file->ha_rnd_next(sort_form->record[0]);
804 	if (!flag)
805 	{
806 	  my_store_ptr(ref_pos,ref_length,record); // Position to row
807 	  record+= sort_form->s->db_record_offset;
808 	}
809 	else if (!error)
810 	  file->position(sort_form->record[0]);
811       }
812       if (error && error != HA_ERR_RECORD_DELETED)
813 	break;
814     }
815 
816     if (*killed)
817     {
818       DBUG_PRINT("info",("Sort killed by user"));
819       if (!quick_select)
820       {
821         (void) file->extra(HA_EXTRA_NO_CACHE);
822         file->ha_rnd_end();
823       }
824       DBUG_RETURN(HA_POS_ERROR);		/* purecov: inspected */
825     }
826     if (error == 0)
827       param->examined_rows++;
828     if (!error && (!select ||
829                    (!select->skip_record(thd, &skip_record) && !skip_record)))
830     {
831       ++(*found_rows);
832       if (pq)
833       {
834         pq->push(ref_pos);
835         idx= pq->num_elements();
836       }
837       else
838       {
839         if (idx == param->max_keys_per_buffer)
840         {
841           if (write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
842              DBUG_RETURN(HA_POS_ERROR);
843           idx= 0;
844           indexpos++;
845         }
846         make_sortkey(param, fs_info->get_record_buffer(idx++), ref_pos);
847       }
848     }
849     /*
850       Don't try unlocking the row if skip_record reported an error since in
851       this case the transaction might have been rolled back already.
852     */
853     else if (!thd->is_error())
854       file->unlock_row();
855     /* It does not make sense to read more keys in case of a fatal error */
856     if (thd->is_error())
857       break;
858   }
859   if (!quick_select)
860   {
861     (void) file->extra(HA_EXTRA_NO_CACHE);	/* End cacheing of records */
862     if (!next_pos)
863       file->ha_rnd_end();
864   }
865 
866   if (thd->is_error())
867     DBUG_RETURN(HA_POS_ERROR);
868 
869   /* Signal we should use orignal column read and write maps */
870   sort_form->column_bitmaps_set(save_read_set, save_write_set);
871 
872   DBUG_PRINT("test",("error: %d  indexpos: %d",error,indexpos));
873   if (error != HA_ERR_END_OF_FILE)
874   {
875     file->print_error(error,MYF(ME_ERROR | ME_WAITTANG)); // purecov: inspected
876     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
877   }
878   if (indexpos && idx &&
879       write_keys(param, fs_info, idx, buffpek_pointers, tempfile))
880     DBUG_RETURN(HA_POS_ERROR);			/* purecov: inspected */
881   const ha_rows retval=
882     my_b_inited(tempfile) ?
883     (ha_rows) (my_b_tell(tempfile)/param->rec_length) : idx;
884   DBUG_PRINT("info", ("find_all_keys return %u", (uint) retval));
885   DBUG_RETURN(retval);
886 } /* find_all_keys */
887 
888 
889 /**
890   @details
891   Sort the buffer and write:
892   -# the sorted sequence to tempfile
893   -# a BUFFPEK describing the sorted sequence position to buffpek_pointers
894 
895     (was: Skriver en buffert med nycklar till filen)
896 
897   @param param             Sort parameters
898   @param sort_keys         Array of pointers to keys to sort
899   @param count             Number of elements in sort_keys array
900   @param buffpek_pointers  One 'BUFFPEK' struct will be written into this file.
901                            The BUFFPEK::{file_pos, count} will indicate where
902                            the sorted data was stored.
903   @param tempfile          The sorted sequence will be written into this file.
904 
905   @retval
906     0 OK
907   @retval
908     1 Error
909 */
910 
911 static int
write_keys(Sort_param * param,Filesort_info * fs_info,uint count,IO_CACHE * buffpek_pointers,IO_CACHE * tempfile)912 write_keys(Sort_param *param, Filesort_info *fs_info, uint count,
913            IO_CACHE *buffpek_pointers, IO_CACHE *tempfile)
914 {
915   size_t rec_length;
916   uchar **end;
917   BUFFPEK buffpek;
918   DBUG_ENTER("write_keys");
919 
920   rec_length= param->rec_length;
921   uchar **sort_keys= fs_info->get_sort_keys();
922 
923   fs_info->sort_buffer(param, count);
924 
925   if (!my_b_inited(tempfile) &&
926       open_cached_file(tempfile, mysql_tmpdir, TEMP_PREFIX, DISK_BUFFER_SIZE,
927                        MYF(MY_WME)))
928     goto err;                                   /* purecov: inspected */
929   /* check we won't have more buffpeks than we can possibly keep in memory */
930   if (my_b_tell(buffpek_pointers) + sizeof(BUFFPEK) > (ulonglong)UINT_MAX)
931     goto err;
932   buffpek.file_pos= my_b_tell(tempfile);
933   if ((ha_rows) count > param->max_rows)
934     count=(uint) param->max_rows;               /* purecov: inspected */
935   buffpek.count=(ha_rows) count;
936   for (end=sort_keys+count ; sort_keys != end ; sort_keys++)
937     if (my_b_write(tempfile, (uchar*) *sort_keys, (uint) rec_length))
938       goto err;
939   if (my_b_write(buffpek_pointers, (uchar*) &buffpek, sizeof(buffpek)))
940     goto err;
941   DBUG_RETURN(0);
942 
943 err:
944   DBUG_RETURN(1);
945 } /* write_keys */
946 
947 
948 /**
949   Store length as suffix in high-byte-first order.
950 */
951 
store_length(uchar * to,uint length,uint pack_length)952 static inline void store_length(uchar *to, uint length, uint pack_length)
953 {
954   switch (pack_length) {
955   case 1:
956     *to= (uchar) length;
957     break;
958   case 2:
959     mi_int2store(to, length);
960     break;
961   case 3:
962     mi_int3store(to, length);
963     break;
964   default:
965     mi_int4store(to, length);
966     break;
967   }
968 }
969 
970 
971 #ifdef WORDS_BIGENDIAN
972 const bool Is_big_endian= true;
973 #else
974 const bool Is_big_endian= false;
975 #endif
copy_native_longlong(uchar * to,int to_length,longlong val,bool is_unsigned)976 void copy_native_longlong(uchar *to, int to_length,
977                           longlong val, bool is_unsigned)
978 {
979   copy_integer<Is_big_endian>(to, to_length,
980                               static_cast<uchar*>(static_cast<void*>(&val)),
981                               sizeof(longlong),
982                               is_unsigned);
983 }
984 
985 
986 /** Make a sort-key from record. */
987 
make_sortkey(Sort_param * param,uchar * to,uchar * ref_pos)988 void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos)
989 {
990   SORT_FIELD *sort_field;
991 
992   for (sort_field= param->local_sortorder ;
993        sort_field != param->end ;
994        sort_field++)
995   {
996     bool maybe_null= false;
997     if (sort_field->field)
998     {
999       Field *field= sort_field->field;
1000       if (field->maybe_null())
1001       {
1002 	if (field->is_null())
1003 	{
1004 	  if (sort_field->reverse)
1005 	    memset(to, 255, sort_field->length+1);
1006 	  else
1007 	    memset(to, 0, sort_field->length+1);
1008 	  to+= sort_field->length+1;
1009 	  continue;
1010 	}
1011 	else
1012 	  *to++=1;
1013       }
1014       field->make_sort_key(to, sort_field->length);
1015     }
1016     else
1017     {						// Item
1018       Item *item=sort_field->item;
1019       maybe_null= item->maybe_null;
1020       switch (sort_field->result_type) {
1021       case STRING_RESULT:
1022       {
1023         const CHARSET_INFO *cs=item->collation.collation;
1024         char fill_char= ((cs->state & MY_CS_BINSORT) ? (char) 0 : ' ');
1025 
1026         if (maybe_null)
1027           *to++=1;
1028         /* All item->str() to use some extra byte for end null.. */
1029         String tmp((char*) to,sort_field->length+4,cs);
1030         String *res= item->str_result(&tmp);
1031         if (!res)
1032         {
1033           if (maybe_null)
1034             memset(to-1, 0, sort_field->length+1);
1035           else
1036           {
1037             /* purecov: begin deadcode */
1038             /*
1039               This should only happen during extreme conditions if we run out
1040               of memory or have an item marked not null when it can be null.
1041               This code is here mainly to avoid a hard crash in this case.
1042             */
1043             DBUG_ASSERT(0);
1044             DBUG_PRINT("warning",
1045                        ("Got null on something that shouldn't be null"));
1046             memset(to, 0, sort_field->length);	// Avoid crash
1047             /* purecov: end */
1048           }
1049           break;
1050         }
1051         uint length= res->length();
1052         if (sort_field->need_strxnfrm)
1053         {
1054           char *from=(char*) res->ptr();
1055           uint tmp_length MY_ATTRIBUTE((unused));
1056           if ((uchar*) from == to)
1057           {
1058             DBUG_ASSERT(sort_field->length >= length);
1059             set_if_smaller(length,sort_field->length);
1060             memcpy(param->tmp_buffer,from,length);
1061             from=param->tmp_buffer;
1062           }
1063           tmp_length= cs->coll->strnxfrm(cs, to, sort_field->length,
1064                                          item->max_char_length(),
1065                                          (uchar*) from, length,
1066                                          MY_STRXFRM_PAD_WITH_SPACE |
1067                                          MY_STRXFRM_PAD_TO_MAXLEN);
1068           DBUG_ASSERT(tmp_length == sort_field->length);
1069         }
1070         else
1071         {
1072           uint diff;
1073           uint sort_field_length= sort_field->length -
1074             sort_field->suffix_length;
1075           if (sort_field_length < length)
1076           {
1077             diff= 0;
1078             length= sort_field_length;
1079           }
1080           else
1081             diff= sort_field_length - length;
1082           if (sort_field->suffix_length)
1083           {
1084             /* Store length last in result_string */
1085             store_length(to + sort_field_length, length,
1086                          sort_field->suffix_length);
1087           }
1088 
1089           my_strnxfrm(cs,(uchar*)to,length,(const uchar*)res->ptr(),length);
1090           cs->cset->fill(cs, (char *)to+length,diff,fill_char);
1091         }
1092         break;
1093       }
1094       case INT_RESULT:
1095 	{
1096           longlong value= item->field_type() == MYSQL_TYPE_TIME ?
1097                           item->val_time_temporal_result() :
1098                           item->is_temporal_with_date() ?
1099                           item->val_date_temporal_result() :
1100                           item->val_int_result();
1101           if (maybe_null)
1102           {
1103 	    *to++=1;				/* purecov: inspected */
1104             if (item->null_value)
1105             {
1106               if (maybe_null)
1107                 memset(to-1, 0, sort_field->length+1);
1108               else
1109               {
1110                 DBUG_PRINT("warning",
1111                            ("Got null on something that shouldn't be null"));
1112                 memset(to, 0, sort_field->length);
1113               }
1114               break;
1115             }
1116           }
1117           copy_native_longlong(to, sort_field->length,
1118                                value, item->unsigned_flag);
1119 	  break;
1120 	}
1121       case DECIMAL_RESULT:
1122         {
1123           my_decimal dec_buf, *dec_val= item->val_decimal_result(&dec_buf);
1124           if (maybe_null)
1125           {
1126             if (item->null_value)
1127             {
1128               memset(to, 0, sort_field->length+1);
1129               to++;
1130               break;
1131             }
1132             *to++=1;
1133           }
1134           if (sort_field->length < DECIMAL_MAX_FIELD_SIZE)
1135           {
1136             uchar buf[DECIMAL_MAX_FIELD_SIZE];
1137             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, buf,
1138                               item->max_length - (item->decimals ? 1:0),
1139                               item->decimals);
1140             memcpy(to, buf, sort_field->length);
1141           }
1142           else
1143           {
1144             my_decimal2binary(E_DEC_FATAL_ERROR, dec_val, to,
1145                               item->max_length - (item->decimals ? 1:0),
1146                               item->decimals);
1147           }
1148          break;
1149         }
1150       case REAL_RESULT:
1151 	{
1152           double value= item->val_result();
1153 	  if (maybe_null)
1154           {
1155             if (item->null_value)
1156             {
1157               memset(to, 0, sort_field->length+1);
1158               to++;
1159               break;
1160             }
1161 	    *to++=1;
1162           }
1163           if (sort_field->length < sizeof(double))
1164           {
1165             uchar buf[sizeof(double)];
1166             change_double_for_sort(value, buf);
1167             memcpy(to, buf, sort_field->length);
1168           }
1169           else
1170           {
1171             change_double_for_sort(value, (uchar*) to);
1172           }
1173 	  break;
1174 	}
1175       case ROW_RESULT:
1176       default:
1177 	// This case should never be choosen
1178 	DBUG_ASSERT(0);
1179 	break;
1180       }
1181     }
1182     if (sort_field->reverse)
1183     {							/* Revers key */
1184       if (maybe_null)
1185         to[-1]= ~to[-1];
1186       uint length= sort_field->length;
1187       while (length--)
1188       {
1189 	*to = (uchar) (~ *to);
1190 	to++;
1191       }
1192     }
1193     else
1194       to+= sort_field->length;
1195   }
1196 
1197   if (param->addon_field)
1198   {
1199     /*
1200       Save field values appended to sorted fields.
1201       First null bit indicators are appended then field values follow.
1202       In this implementation we use fixed layout for field values -
1203       the same for all records.
1204     */
1205     SORT_ADDON_FIELD *addonf= param->addon_field;
1206     uchar *nulls= to;
1207     DBUG_ASSERT(addonf != 0);
1208     memset(nulls, 0, addonf->offset);
1209     to+= addonf->offset;
1210     for (Field* field; (field= addonf->field) ; addonf++)
1211     {
1212       if (addonf->null_bit && field->is_null())
1213       {
1214         nulls[addonf->null_offset]|= addonf->null_bit;
1215       }
1216       else
1217       {
1218         (void) field->pack(to, field->ptr);
1219       }
1220       to+= addonf->length;
1221     }
1222   }
1223   else
1224   {
1225     /* Save filepos last */
1226     memcpy((uchar*) to, ref_pos, (size_t) param->ref_length);
1227   }
1228   return;
1229 }
1230 
1231 
1232 /*
1233   Register fields used by sorting in the sorted table's read set
1234 */
1235 
register_used_fields(Sort_param * param)1236 static void register_used_fields(Sort_param *param)
1237 {
1238   SORT_FIELD *sort_field;
1239   TABLE *table=param->sort_form;
1240   MY_BITMAP *bitmap= table->read_set;
1241 
1242   for (sort_field= param->local_sortorder ;
1243        sort_field != param->end ;
1244        sort_field++)
1245   {
1246     Field *field;
1247     if ((field= sort_field->field))
1248     {
1249       if (field->table == table)
1250       bitmap_set_bit(bitmap, field->field_index);
1251     }
1252     else
1253     {						// Item
1254       sort_field->item->walk(&Item::register_field_in_read_map, 1,
1255                              (uchar *) table);
1256     }
1257   }
1258 
1259   if (param->addon_field)
1260   {
1261     SORT_ADDON_FIELD *addonf= param->addon_field;
1262     Field *field;
1263     for ( ; (field= addonf->field) ; addonf++)
1264       bitmap_set_bit(bitmap, field->field_index);
1265   }
1266   else
1267   {
1268     /* Save filepos last */
1269     table->prepare_for_position();
1270   }
1271 }
1272 
save_index(Sort_param * param,uint count,Filesort_info * table_sort)1273 static bool save_index(Sort_param *param, uint count, Filesort_info *table_sort)
1274 {
1275   uint offset,res_length;
1276   uchar *to;
1277   DBUG_ENTER("save_index");
1278 
1279   table_sort->sort_buffer(param, count);
1280   res_length= param->res_length;
1281   offset= param->rec_length-res_length;
1282   if (!(to= table_sort->record_pointers=
1283         (uchar*) my_malloc(res_length*count, MYF(MY_WME))))
1284     DBUG_RETURN(1);                 /* purecov: inspected */
1285   uchar **sort_keys= table_sort->get_sort_keys();
1286   for (uchar **end= sort_keys+count ; sort_keys != end ; sort_keys++)
1287   {
1288     memcpy(to, *sort_keys+offset, res_length);
1289     to+= res_length;
1290   }
1291   DBUG_RETURN(0);
1292 }
1293 
1294 
1295 /**
1296   Test whether priority queue is worth using to get top elements of an
1297   ordered result set. If it is, then allocates buffer for required amount of
1298   records
1299 
1300   @param trace            Current trace context.
1301   @param param            Sort parameters.
1302   @param filesort_info    Filesort information.
1303   @param table            Table to sort.
1304   @param num_rows         Estimate of number of rows in source record set.
1305   @param memory_available Memory available for sorting.
1306 
1307   DESCRIPTION
1308     Given a query like this:
1309       SELECT ... FROM t ORDER BY a1,...,an LIMIT max_rows;
1310     This function tests whether a priority queue should be used to keep
1311     the result. Necessary conditions are:
1312     - estimate that it is actually cheaper than merge-sort
1313     - enough memory to store the <max_rows> records.
1314 
1315     If we don't have space for <max_rows> records, but we *do* have
1316     space for <max_rows> keys, we may rewrite 'table' to sort with
1317     references to records instead of additional data.
1318     (again, based on estimates that it will actually be cheaper).
1319 
1320    @retval
1321     true  - if it's ok to use PQ
1322     false - PQ will be slower than merge-sort, or there is not enough memory.
1323 */
1324 
check_if_pq_applicable(Opt_trace_context * trace,Sort_param * param,Filesort_info * filesort_info,TABLE * table,ha_rows num_rows,ulong memory_available)1325 bool check_if_pq_applicable(Opt_trace_context *trace,
1326                             Sort_param *param,
1327                             Filesort_info *filesort_info,
1328                             TABLE *table, ha_rows num_rows,
1329                             ulong memory_available)
1330 {
1331   DBUG_ENTER("check_if_pq_applicable");
1332 
1333   /*
1334     How much Priority Queue sort is slower than qsort.
1335     Measurements (see unit test) indicate that PQ is roughly 3 times slower.
1336   */
1337   const double PQ_slowness= 3.0;
1338 
1339   Opt_trace_object trace_filesort(trace,
1340                                   "filesort_priority_queue_optimization");
1341   if (param->max_rows == HA_POS_ERROR)
1342   {
1343     trace_filesort
1344       .add("usable", false)
1345       .add_alnum("cause", "not applicable (no LIMIT)");
1346     DBUG_RETURN(false);
1347   }
1348 
1349   trace_filesort
1350     .add("limit", param->max_rows)
1351     .add("rows_estimate", num_rows)
1352     .add("row_size", param->rec_length)
1353     .add("memory_available", memory_available);
1354 
1355   if (param->max_rows + 2 >= UINT_MAX)
1356   {
1357     trace_filesort.add("usable", false).add_alnum("cause", "limit too large");
1358     DBUG_RETURN(false);
1359   }
1360 
1361   ulong num_available_keys=
1362     memory_available / (param->rec_length + sizeof(char*));
1363   // We need 1 extra record in the buffer, when using PQ.
1364   param->max_keys_per_buffer= (uint) param->max_rows + 1;
1365 
1366   if (num_rows < num_available_keys)
1367   {
1368     // The whole source set fits into memory.
1369     if (param->max_rows < num_rows/PQ_slowness )
1370     {
1371       filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1372                                        param->rec_length);
1373       trace_filesort.add("chosen", true);
1374       DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1375     }
1376     else
1377     {
1378       // PQ will be slower.
1379       trace_filesort.add("chosen", false)
1380         .add_alnum("cause", "quicksort_is_cheaper");
1381       DBUG_RETURN(false);
1382     }
1383   }
1384 
1385   // Do we have space for LIMIT rows in memory?
1386   if (param->max_keys_per_buffer < num_available_keys)
1387   {
1388     filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1389                                      param->rec_length);
1390     trace_filesort.add("chosen", true);
1391     DBUG_RETURN(filesort_info->get_sort_keys() != NULL);
1392   }
1393 
1394   // Try to strip off addon fields.
1395   if (param->addon_field)
1396   {
1397     const ulong row_length=
1398       param->sort_length + param->ref_length + sizeof(char*);
1399     num_available_keys= memory_available / row_length;
1400 
1401     Opt_trace_object trace_addon(trace, "strip_additional_fields");
1402     trace_addon.add("row_size", row_length);
1403 
1404     // Can we fit all the keys in memory?
1405     if (param->max_keys_per_buffer >= num_available_keys)
1406     {
1407       trace_addon.add("chosen", false).add_alnum("cause", "not_enough_space");
1408     }
1409     else
1410     {
1411       const double sort_merge_cost=
1412         get_merge_many_buffs_cost_fast(num_rows,
1413                                        num_available_keys,
1414                                        row_length);
1415       trace_addon.add("sort_merge_cost", sort_merge_cost);
1416       /*
1417         PQ has cost:
1418         (insert + qsort) * log(queue size) * ROWID_COMPARE_COST +
1419         cost of file lookup afterwards.
1420         The lookup cost is a bit pessimistic: we take scan_time and assume
1421         that on average we find the row after scanning half of the file.
1422         A better estimate would be lookup cost, but note that we are doing
1423         random lookups here, rather than sequential scan.
1424       */
1425       const double pq_cpu_cost=
1426         (PQ_slowness * num_rows + param->max_keys_per_buffer) *
1427         log((double) param->max_keys_per_buffer) * ROWID_COMPARE_COST;
1428       const double pq_io_cost=
1429         param->max_rows * table->file->scan_time() / 2.0;
1430       const double pq_cost= pq_cpu_cost + pq_io_cost;
1431       trace_addon.add("priority_queue_cost", pq_cost);
1432 
1433       if (sort_merge_cost < pq_cost)
1434       {
1435         trace_addon.add("chosen", false);
1436         DBUG_RETURN(false);
1437       }
1438 
1439       trace_addon.add("chosen", true);
1440       filesort_info->alloc_sort_buffer(param->max_keys_per_buffer,
1441                                        param->sort_length + param->ref_length);
1442       if (filesort_info->get_sort_keys())
1443       {
1444         // Make attached data to be references instead of fields.
1445         my_free(filesort_info->addon_buf);
1446         my_free(filesort_info->addon_field);
1447         filesort_info->addon_buf= NULL;
1448         filesort_info->addon_field= NULL;
1449         param->addon_field= NULL;
1450         param->addon_length= 0;
1451 
1452         param->res_length= param->ref_length;
1453         param->sort_length+= param->ref_length;
1454         param->rec_length= param->sort_length;
1455 
1456         DBUG_RETURN(true);
1457       }
1458     }
1459   }
1460   DBUG_RETURN(false);
1461 }
1462 
1463 
1464 /** Merge buffers to make < MERGEBUFF2 buffers. */
1465 
merge_many_buff(Sort_param * param,uchar * sort_buffer,BUFFPEK * buffpek,uint * maxbuffer,IO_CACHE * t_file)1466 int merge_many_buff(Sort_param *param, uchar *sort_buffer,
1467                     BUFFPEK *buffpek, uint *maxbuffer, IO_CACHE *t_file)
1468 {
1469   uint i;
1470   IO_CACHE t_file2,*from_file,*to_file,*temp;
1471   BUFFPEK *lastbuff;
1472   DBUG_ENTER("merge_many_buff");
1473 
1474   if (*maxbuffer < MERGEBUFF2)
1475     DBUG_RETURN(0);				/* purecov: inspected */
1476   if (flush_io_cache(t_file) ||
1477       open_cached_file(&t_file2,mysql_tmpdir,TEMP_PREFIX,DISK_BUFFER_SIZE,
1478 			MYF(MY_WME)))
1479     DBUG_RETURN(1);				/* purecov: inspected */
1480 
1481   from_file= t_file ; to_file= &t_file2;
1482   while (*maxbuffer >= MERGEBUFF2)
1483   {
1484     if (reinit_io_cache(from_file,READ_CACHE,0L,0,0))
1485       goto cleanup;
1486     if (reinit_io_cache(to_file,WRITE_CACHE,0L,0,0))
1487       goto cleanup;
1488     lastbuff=buffpek;
1489     for (i=0 ; i <= *maxbuffer-MERGEBUFF*3/2 ; i+=MERGEBUFF)
1490     {
1491       if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1492 			buffpek+i,buffpek+i+MERGEBUFF-1,0))
1493       goto cleanup;
1494     }
1495     if (merge_buffers(param,from_file,to_file,sort_buffer,lastbuff++,
1496 		      buffpek+i,buffpek+ *maxbuffer,0))
1497       break;					/* purecov: inspected */
1498     if (flush_io_cache(to_file))
1499       break;					/* purecov: inspected */
1500     temp=from_file; from_file=to_file; to_file=temp;
1501     setup_io_cache(from_file);
1502     setup_io_cache(to_file);
1503     *maxbuffer= (uint) (lastbuff-buffpek)-1;
1504   }
1505 cleanup:
1506   close_cached_file(to_file);			// This holds old result
1507   if (to_file == t_file)
1508   {
1509     *t_file=t_file2;				// Copy result file
1510     setup_io_cache(t_file);
1511   }
1512 
1513   DBUG_RETURN(*maxbuffer >= MERGEBUFF2);	/* Return 1 if interrupted */
1514 } /* merge_many_buff */
1515 
1516 
1517 /**
1518   Read data to buffer.
1519 
1520   @retval
1521     (uint)-1 if something goes wrong
1522 */
1523 
read_to_buffer(IO_CACHE * fromfile,BUFFPEK * buffpek,uint rec_length)1524 uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
1525 		    uint rec_length)
1526 {
1527   uint count;
1528   uint length;
1529 
1530   if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
1531   {
1532     if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
1533                          (length= rec_length*count),
1534                          buffpek->file_pos, MYF_RW))
1535       return((uint) -1);			/* purecov: inspected */
1536     buffpek->key=buffpek->base;
1537     buffpek->file_pos+= length;			/* New filepos */
1538     buffpek->count-=	count;
1539     buffpek->mem_count= count;
1540   }
1541   return (count*rec_length);
1542 } /* read_to_buffer */
1543 
1544 
1545 /**
1546   Put all room used by freed buffer to use in adjacent buffer.
1547 
1548   Note, that we can't simply distribute memory evenly between all buffers,
1549   because new areas must not overlap with old ones.
1550 
1551   @param[in] queue      list of non-empty buffers, without freed buffer
1552   @param[in] reuse      empty buffer
1553   @param[in] key_length key length
1554 */
1555 
reuse_freed_buff(QUEUE * queue,BUFFPEK * reuse,uint key_length)1556 void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length)
1557 {
1558   uchar *reuse_end= reuse->base + reuse->max_keys * key_length;
1559   for (uint i= 0; i < queue->elements; ++i)
1560   {
1561     BUFFPEK *bp= (BUFFPEK *) queue_element(queue, i);
1562     if (bp->base + bp->max_keys * key_length == reuse->base)
1563     {
1564       bp->max_keys+= reuse->max_keys;
1565       return;
1566     }
1567     else if (bp->base == reuse_end)
1568     {
1569       bp->base= reuse->base;
1570       bp->max_keys+= reuse->max_keys;
1571       return;
1572     }
1573   }
1574   DBUG_ASSERT(0);
1575 }
1576 
1577 
1578 /**
1579   Merge buffers to one buffer.
1580 
1581   @param param        Sort parameter
1582   @param from_file    File with source data (BUFFPEKs point to this file)
1583   @param to_file      File to write the sorted result data.
1584   @param sort_buffer  Buffer for data to store up to MERGEBUFF2 sort keys.
1585   @param lastbuff     OUT Store here BUFFPEK describing data written to to_file
1586   @param Fb           First element in source BUFFPEKs array
1587   @param Tb           Last element in source BUFFPEKs array
1588   @param flag
1589 
1590   @retval
1591     0      OK
1592   @retval
1593     other  error
1594 */
1595 
merge_buffers(Sort_param * param,IO_CACHE * from_file,IO_CACHE * to_file,uchar * sort_buffer,BUFFPEK * lastbuff,BUFFPEK * Fb,BUFFPEK * Tb,int flag)1596 int merge_buffers(Sort_param *param, IO_CACHE *from_file,
1597                   IO_CACHE *to_file, uchar *sort_buffer,
1598                   BUFFPEK *lastbuff, BUFFPEK *Fb, BUFFPEK *Tb,
1599                   int flag)
1600 {
1601   int error;
1602   uint rec_length,res_length,offset;
1603   size_t sort_length;
1604   ulong maxcount;
1605   ha_rows max_rows,org_max_rows;
1606   my_off_t to_start_filepos;
1607   uchar *strpos;
1608   BUFFPEK *buffpek;
1609   QUEUE queue;
1610   qsort2_cmp cmp;
1611   void *first_cmp_arg;
1612   THD * const thd = current_thd;
1613   volatile THD::killed_state *killed= &thd->killed;
1614   THD::killed_state not_killable;
1615   DBUG_ENTER("merge_buffers");
1616 
1617   thd->inc_status_sort_merge_passes();
1618   thd->query_plan_fsort_passes++;
1619   if (param->not_killable)
1620   {
1621     killed= &not_killable;
1622     not_killable= THD::NOT_KILLED;
1623   }
1624 
1625   error=0;
1626   rec_length= param->rec_length;
1627   res_length= param->res_length;
1628   sort_length= param->sort_length;
1629   offset= rec_length-res_length;
1630   maxcount= (ulong) (param->max_keys_per_buffer / ((uint) (Tb-Fb) +1));
1631   to_start_filepos= my_b_tell(to_file);
1632   strpos= (uchar*) sort_buffer;
1633   org_max_rows=max_rows= param->max_rows;
1634 
1635   /* The following will fire if there is not enough space in sort_buffer */
1636   DBUG_ASSERT(maxcount!=0);
1637 
1638   if (param->unique_buff)
1639   {
1640     cmp= param->compare;
1641     first_cmp_arg= (void *) &param->cmp_context;
1642   }
1643   else
1644   {
1645     cmp= get_ptr_compare(sort_length);
1646     first_cmp_arg= (void*) &sort_length;
1647   }
1648   if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
1649                  (queue_compare) cmp, first_cmp_arg))
1650     DBUG_RETURN(1);                                /* purecov: inspected */
1651   for (buffpek= Fb ; buffpek <= Tb ; buffpek++)
1652   {
1653     buffpek->base= strpos;
1654     buffpek->max_keys= maxcount;
1655     strpos+=
1656       (uint) (error= (int)read_to_buffer(from_file, buffpek, rec_length));
1657     if (error == -1)
1658       goto err;					/* purecov: inspected */
1659     buffpek->max_keys= buffpek->mem_count;	// If less data in buffers than expected
1660     queue_insert(&queue, (uchar*) buffpek);
1661   }
1662 
1663   if (param->unique_buff)
1664   {
1665     /*
1666        Called by Unique::get()
1667        Copy the first argument to param->unique_buff for unique removal.
1668        Store it also in 'to_file'.
1669 
1670        This is safe as we know that there is always more than one element
1671        in each block to merge (This is guaranteed by the Unique:: algorithm
1672     */
1673     buffpek= (BUFFPEK*) queue_top(&queue);
1674     memcpy(param->unique_buff, buffpek->key, rec_length);
1675     if (my_b_write(to_file, (uchar*) buffpek->key, rec_length))
1676     {
1677       error=1; goto err;                        /* purecov: inspected */
1678     }
1679     buffpek->key+= rec_length;
1680     buffpek->mem_count--;
1681     if (!--max_rows)
1682     {
1683       error= 0;                                       /* purecov: inspected */
1684       goto end;                                       /* purecov: inspected */
1685     }
1686     queue_replaced(&queue);                        // Top element has been used
1687   }
1688   else
1689     cmp= 0;                                        // Not unique
1690 
1691   while (queue.elements > 1)
1692   {
1693     if (*killed)
1694     {
1695       error= 1; goto err;                        /* purecov: inspected */
1696     }
1697     for (;;)
1698     {
1699       buffpek= (BUFFPEK*) queue_top(&queue);
1700       if (cmp)                                        // Remove duplicates
1701       {
1702         if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
1703                     (uchar**) &buffpek->key))
1704               goto skip_duplicate;
1705             memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
1706       }
1707       if (flag == 0)
1708       {
1709         if (my_b_write(to_file,(uchar*) buffpek->key, rec_length))
1710         {
1711           error=1; goto err;                        /* purecov: inspected */
1712         }
1713       }
1714       else
1715       {
1716         if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length))
1717         {
1718           error=1; goto err;                        /* purecov: inspected */
1719         }
1720       }
1721       if (!--max_rows)
1722       {
1723         error= 0;                               /* purecov: inspected */
1724         goto end;                               /* purecov: inspected */
1725       }
1726 
1727     skip_duplicate:
1728       buffpek->key+= rec_length;
1729       if (! --buffpek->mem_count)
1730       {
1731         if (!(error= (int) read_to_buffer(from_file,buffpek,
1732                                           rec_length)))
1733         {
1734           (void) queue_remove(&queue,0);
1735           reuse_freed_buff(&queue, buffpek, rec_length);
1736           break;                        /* One buffer have been removed */
1737         }
1738         else if (error == -1)
1739           goto err;                        /* purecov: inspected */
1740       }
1741       queue_replaced(&queue);              /* Top element has been replaced */
1742     }
1743   }
1744   buffpek= (BUFFPEK*) queue_top(&queue);
1745   buffpek->base= sort_buffer;
1746   buffpek->max_keys= param->max_keys_per_buffer;
1747 
1748   /*
1749     As we know all entries in the buffer are unique, we only have to
1750     check if the first one is the same as the last one we wrote
1751   */
1752   if (cmp)
1753   {
1754     if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
1755     {
1756       buffpek->key+= rec_length;         // Remove duplicate
1757       --buffpek->mem_count;
1758     }
1759   }
1760 
1761   do
1762   {
1763     if ((ha_rows) buffpek->mem_count > max_rows)
1764     {                                        /* Don't write too many records */
1765       buffpek->mem_count= (uint) max_rows;
1766       buffpek->count= 0;                        /* Don't read more */
1767     }
1768     max_rows-= buffpek->mem_count;
1769     if (flag == 0)
1770     {
1771       if (my_b_write(to_file,(uchar*) buffpek->key,
1772                      (rec_length*buffpek->mem_count)))
1773       {
1774         error= 1; goto err;                        /* purecov: inspected */
1775       }
1776     }
1777     else
1778     {
1779       uchar *end;
1780       strpos= buffpek->key+offset;
1781       for (end= strpos+buffpek->mem_count*rec_length ;
1782            strpos != end ;
1783            strpos+= rec_length)
1784       {
1785         if (my_b_write(to_file, (uchar *) strpos, res_length))
1786         {
1787           error=1; goto err;
1788         }
1789       }
1790     }
1791   }
1792   while ((error=(int) read_to_buffer(from_file,buffpek, rec_length))
1793          != -1 && error != 0);
1794 
1795 end:
1796   lastbuff->count= min(org_max_rows-max_rows, param->max_rows);
1797   lastbuff->file_pos= to_start_filepos;
1798 err:
1799   delete_queue(&queue);
1800   DBUG_RETURN(error);
1801 } /* merge_buffers */
1802 
1803 
1804 	/* Do a merge to output-file (save only positions) */
1805 
merge_index(Sort_param * param,uchar * sort_buffer,BUFFPEK * buffpek,uint maxbuffer,IO_CACHE * tempfile,IO_CACHE * outfile)1806 static int merge_index(Sort_param *param, uchar *sort_buffer,
1807                        BUFFPEK *buffpek, uint maxbuffer,
1808                        IO_CACHE *tempfile, IO_CACHE *outfile)
1809 {
1810   DBUG_ENTER("merge_index");
1811   if (merge_buffers(param,tempfile,outfile,sort_buffer,buffpek,buffpek,
1812 		    buffpek+maxbuffer,1))
1813     DBUG_RETURN(1);				/* purecov: inspected */
1814   DBUG_RETURN(0);
1815 } /* merge_index */
1816 
1817 
suffix_length(ulong string_length)1818 static uint suffix_length(ulong string_length)
1819 {
1820   if (string_length < 256)
1821     return 1;
1822   if (string_length < 256L*256L)
1823     return 2;
1824   if (string_length < 256L*256L*256L)
1825     return 3;
1826   return 4;                                     // Can't sort longer than 4G
1827 }
1828 
1829 
1830 
1831 /**
1832   Calculate length of sort key.
1833 
1834   @param thd			  Thread handler
1835   @param sortorder		  Order of items to sort
1836   @param s_length	          Number of items to sort
1837   @param[out] multi_byte_charset Set to 1 if we are using multi-byte charset
1838                                  (In which case we have to use strxnfrm())
1839 
1840   @note
1841     sortorder->length is updated for each sort item.
1842   @n
1843     sortorder->need_strxnfrm is set 1 if we have to use strxnfrm
1844 
1845   @return
1846     Total length of sort buffer in bytes
1847 */
1848 
1849 uint
sortlength(THD * thd,SORT_FIELD * sortorder,uint s_length,bool * multi_byte_charset)1850 sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
1851            bool *multi_byte_charset)
1852 {
1853   uint total_length= 0;
1854   const CHARSET_INFO *cs;
1855   *multi_byte_charset= false;
1856 
1857   for (; s_length-- ; sortorder++)
1858   {
1859     sortorder->need_strxnfrm= 0;
1860     sortorder->suffix_length= 0;
1861     if (sortorder->field)
1862     {
1863       cs= sortorder->field->sort_charset();
1864       sortorder->length= sortorder->field->sort_length();
1865 
1866       if (use_strnxfrm((cs=sortorder->field->sort_charset())))
1867       {
1868         sortorder->need_strxnfrm= 1;
1869         *multi_byte_charset= 1;
1870         sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1871       }
1872       if (sortorder->field->maybe_null())
1873         total_length++;                       // Place for NULL marker
1874 
1875       if (sortorder->field->result_type() == STRING_RESULT &&
1876           !sortorder->field->is_temporal())
1877       {
1878         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1879       }
1880     }
1881     else
1882     {
1883       sortorder->result_type= sortorder->item->result_type();
1884       if (sortorder->item->is_temporal())
1885         sortorder->result_type= INT_RESULT;
1886       switch (sortorder->result_type) {
1887       case STRING_RESULT:
1888 	sortorder->length= sortorder->item->max_length;
1889         set_if_smaller(sortorder->length, thd->variables.max_sort_length);
1890 	if (use_strnxfrm((cs=sortorder->item->collation.collation)))
1891 	{
1892           sortorder->length= cs->coll->strnxfrmlen(cs, sortorder->length);
1893 	  sortorder->need_strxnfrm= 1;
1894 	  *multi_byte_charset= 1;
1895 	}
1896         else if (cs == &my_charset_bin)
1897         {
1898           /* Store length last to be able to sort blob/varbinary */
1899           sortorder->suffix_length= suffix_length(sortorder->length);
1900           sortorder->length+= sortorder->suffix_length;
1901         }
1902 	break;
1903       case INT_RESULT:
1904 #if SIZEOF_LONG_LONG > 4
1905 	sortorder->length=8;			// Size of intern longlong
1906 #else
1907 	sortorder->length=4;
1908 #endif
1909 	break;
1910       case DECIMAL_RESULT:
1911         sortorder->length=
1912           my_decimal_get_binary_size(sortorder->item->max_length -
1913                                      (sortorder->item->decimals ? 1 : 0),
1914                                      sortorder->item->decimals);
1915         break;
1916       case REAL_RESULT:
1917 	sortorder->length=sizeof(double);
1918 	break;
1919       case ROW_RESULT:
1920       default:
1921 	// This case should never be choosen
1922 	DBUG_ASSERT(0);
1923 	break;
1924       }
1925       if (sortorder->item->maybe_null)
1926         total_length++;                       // Place for NULL marker
1927     }
1928     total_length+= sortorder->length;
1929   }
1930   sortorder->field= NULL;                       // end marker
1931   DBUG_PRINT("info",("sort_length: %u", total_length));
1932   return total_length;
1933 }
1934 
1935 
1936 /**
1937   Get descriptors of fields appended to sorted fields and
1938   calculate its total length.
1939 
1940   The function first finds out what fields are used in the result set.
1941   Then it calculates the length of the buffer to store the values of
1942   these fields together with the value of sort values.
1943   If the calculated length is not greater than max_length_for_sort_data
1944   the function allocates memory for an array of descriptors containing
1945   layouts for the values of the non-sorted fields in the buffer and
1946   fills them.
1947 
1948   @param thd                 Current thread
1949   @param ptabfield           Array of references to the table fields
1950   @param sortlength          Total length of sorted fields
1951   @param[out] plength        Total length of appended fields
1952 
1953   @note
1954     The null bits for the appended values are supposed to be put together
1955     and stored the buffer just ahead of the value of the first field.
1956 
1957   @return
1958     Pointer to the layout descriptors for the appended fields, if any
1959   @retval
1960     NULL   if we do not store field values with sort data.
1961 */
1962 
1963 static SORT_ADDON_FIELD *
get_addon_fields(ulong max_length_for_sort_data,TABLE * const table,Field ** ptabfield,uint sortlength,uint * plength)1964 get_addon_fields(ulong max_length_for_sort_data,
1965                  TABLE *const table,
1966                  Field **ptabfield, uint sortlength, uint *plength)
1967 {
1968   Field **pfield;
1969   Field *field;
1970   SORT_ADDON_FIELD *addonf;
1971   uint length= 0;
1972   uint fields= 0;
1973   uint null_fields= 0;
1974   MY_BITMAP *read_set= table->read_set;
1975 
1976   /*
1977     If there is a reference to a field in the query add it
1978     to the the set of appended fields.
1979     Note for future refinement:
1980     This this a too strong condition.
1981     Actually we need only the fields referred in the
1982     result set. And for some of them it makes sense to use
1983     the values directly from sorted fields.
1984   */
1985   *plength= 0;
1986 
1987   for (pfield= ptabfield; (field= *pfield) ; pfield++)
1988   {
1989     if (!bitmap_is_set(read_set, field->field_index))
1990       continue;
1991     if (field->flags & BLOB_FLAG)
1992       return 0;
1993     length+= field->max_packed_col_length(field->pack_length());
1994     if (field->maybe_null())
1995       null_fields++;
1996     fields++;
1997   }
1998   if (!fields)
1999     return 0;
2000   length+= (null_fields+7)/8;
2001 
2002   if (length+sortlength > max_length_for_sort_data ||
2003       !(addonf= (SORT_ADDON_FIELD *) my_malloc(sizeof(SORT_ADDON_FIELD)*
2004                                                (fields+1), MYF(MY_WME))))
2005     return 0;
2006 
2007   *plength= length;
2008   length= (null_fields+7)/8;
2009   null_fields= 0;
2010   for (pfield= ptabfield; (field= *pfield) ; pfield++)
2011   {
2012     if (!bitmap_is_set(read_set, field->field_index))
2013       continue;
2014     addonf->field= field;
2015     addonf->offset= length;
2016     if (field->maybe_null())
2017     {
2018       addonf->null_offset= null_fields/8;
2019       addonf->null_bit= 1<<(null_fields & 7);
2020       null_fields++;
2021     }
2022     else
2023     {
2024       addonf->null_offset= 0;
2025       addonf->null_bit= 0;
2026     }
2027     addonf->length= field->max_packed_col_length(field->pack_length());
2028     length+= addonf->length;
2029     addonf++;
2030   }
2031   addonf->field= 0;     // Put end marker
2032 
2033   DBUG_PRINT("info",("addon_length: %d",length));
2034   return (addonf-fields);
2035 }
2036 
2037 
2038 /**
2039   Copy (unpack) values appended to sorted fields from a buffer back to
2040   their regular positions specified by the Field::ptr pointers.
2041 
2042   @param addon_field     Array of descriptors for appended fields
2043   @param buff            Buffer which to unpack the value from
2044 
2045   @note
2046     The function is supposed to be used only as a callback function
2047     when getting field values for the sorted result set.
2048 
2049   @return
2050     void.
2051 */
2052 
2053 static void
unpack_addon_fields(struct st_sort_addon_field * addon_field,uchar * buff)2054 unpack_addon_fields(struct st_sort_addon_field *addon_field, uchar *buff)
2055 {
2056   Field *field;
2057   SORT_ADDON_FIELD *addonf= addon_field;
2058 
2059   for ( ; (field= addonf->field) ; addonf++)
2060   {
2061     if (addonf->null_bit && (addonf->null_bit & buff[addonf->null_offset]))
2062     {
2063       field->set_null();
2064       continue;
2065     }
2066     field->set_notnull();
2067     field->unpack(field->ptr, buff + addonf->offset);
2068   }
2069 }
2070 
2071 /*
2072 ** functions to change a double or float to a sortable string
2073 ** The following should work for IEEE
2074 */
2075 
2076 #define DBL_EXP_DIG (sizeof(double)*8-DBL_MANT_DIG)
2077 
change_double_for_sort(double nr,uchar * to)2078 void change_double_for_sort(double nr,uchar *to)
2079 {
2080   uchar *tmp=(uchar*) to;
2081   if (nr == 0.0)
2082   {						/* Change to zero string */
2083     tmp[0]=(uchar) 128;
2084     memset(tmp+1, 0, sizeof(nr)-1);
2085   }
2086   else
2087   {
2088 #ifdef WORDS_BIGENDIAN
2089     memcpy(tmp, &nr, sizeof(nr));
2090 #else
2091     {
2092       uchar *ptr= (uchar*) &nr;
2093 #if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
2094       tmp[0]= ptr[3]; tmp[1]=ptr[2]; tmp[2]= ptr[1]; tmp[3]=ptr[0];
2095       tmp[4]= ptr[7]; tmp[5]=ptr[6]; tmp[6]= ptr[5]; tmp[7]=ptr[4];
2096 #else
2097       tmp[0]= ptr[7]; tmp[1]=ptr[6]; tmp[2]= ptr[5]; tmp[3]=ptr[4];
2098       tmp[4]= ptr[3]; tmp[5]=ptr[2]; tmp[6]= ptr[1]; tmp[7]=ptr[0];
2099 #endif
2100     }
2101 #endif
2102     if (tmp[0] & 128)				/* Negative */
2103     {						/* make complement */
2104       uint i;
2105       for (i=0 ; i < sizeof(nr); i++)
2106 	tmp[i]=tmp[i] ^ (uchar) 255;
2107     }
2108     else
2109     {					/* Set high and move exponent one up */
2110       ushort exp_part=(((ushort) tmp[0] << 8) | (ushort) tmp[1] |
2111 		       (ushort) 32768);
2112       exp_part+= (ushort) 1 << (16-1-DBL_EXP_DIG);
2113       tmp[0]= (uchar) (exp_part >> 8);
2114       tmp[1]= (uchar) exp_part;
2115     }
2116   }
2117 }
2118