1 /* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #ifndef SORT_PARAM_INCLUDED
24 #define SORT_PARAM_INCLUDED
25 
26 #include "field_types.h"   // enum_field_types
27 #include "my_base.h"       // ha_rows
28 #include "my_byteorder.h"  // uint4korr
29 #include "my_dbug.h"
30 #include "my_inttypes.h"
31 #include "my_io.h"          // mysql_com.h needs my_socket
32 #include "mysql_com.h"      // Item_result
33 #include "sql/sql_array.h"  // Bounds_checked_array
34 #include "sql/sql_const.h"
35 #include "sql/sql_sort.h"  // Filesort_info
36 #include "sql/thr_malloc.h"
37 #include "sql_string.h"
38 
39 class Field;
40 class Filesort;
41 class Item;
42 struct TABLE;
43 
44 enum class Addon_fields_status {
45   unknown_status,
46   using_addon_fields,
47 
48   // The remainder are reasons why we are _not_ using addon fields.
49   fulltext_searched,
50   keep_rowid,
51   row_not_packable,
52   row_contains_blob,
53   skip_heuristic,
54   using_priority_queue
55 };
56 
addon_fields_text(Addon_fields_status afs)57 inline const char *addon_fields_text(Addon_fields_status afs) {
58   switch (afs) {
59     default:
60       return "unknown";
61     case Addon_fields_status::using_addon_fields:
62       return "using_addon_fields";
63     case Addon_fields_status::fulltext_searched:
64       return "fulltext_searched";
65     case Addon_fields_status::keep_rowid:
66       return "keep_rowid";
67     case Addon_fields_status::row_not_packable:
68       return "row_not_packable";
69     case Addon_fields_status::row_contains_blob:
70       return "row_contains_blob";
71     case Addon_fields_status::skip_heuristic:
72       return "skip_heuristic";
73     case Addon_fields_status::using_priority_queue:
74       return "using_priority_queue";
75   }
76 }
77 
78 /* Structs used when sorting */
79 
80 /// Struct that holds information about a sort field.
81 struct st_sort_field {
82   Item *item;  ///< Item to sort
83 
84   /// Length of sort field. Beware, can be 0xFFFFFFFFu (infinite)!
85   uint length;
86 
87   Item_result result_type;      ///< Type of item
88   enum_field_types field_type;  ///< Field type of the item
89   bool reverse;                 ///< if descending sort
90   bool is_varlen;               ///< If key part has variable length
91   bool maybe_null;              ///< If key part is nullable
92 };
93 
94 /**
95   The structure Sort_addon_field describes the layout
96   for field values appended to sorted values in records to be sorted
97   in the sort buffer.
98 
99   Null bit maps for the appended values is placed before the values
100   themselves. Offsets are from the last sorted field.
101 
102   The structure is used to store values of the additional fields
103   in the sort buffer. It is used also when these values are read
104   from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
105 */
106 
107 struct Sort_addon_field { /* Sort addon packed field */
108   Field *field;           /* Original field */
109   uint offset;            /* Offset from the last sorted field */
110   uint null_offset;       /* Offset to to null bit from the last sorted field */
111   uint max_length;        /* Maximum length in the sort buffer */
112   uint8 null_bit;         /* Null bit mask for the field */
113 };
114 
115 typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;
116 
117 /**
118   This class wraps information about usage of addon fields.
119   An Addon_fields object is used both during packing of data in the filesort
120   buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
121 
122   @see documentation for the Sort_addon_field struct.
123   @see documentation for get_addon_fields()
124  */
125 class Addon_fields {
126  public:
Addon_fields(Addon_fields_array arr)127   Addon_fields(Addon_fields_array arr)
128       : m_field_descriptors(arr),
129         m_addon_buf(nullptr),
130         m_addon_buf_length(0),
131         m_using_packed_addons(false) {
132     DBUG_ASSERT(!arr.is_null());
133   }
134 
begin()135   Sort_addon_field *begin() { return m_field_descriptors.begin(); }
end()136   Sort_addon_field *end() { return m_field_descriptors.end(); }
num_field_descriptors()137   size_t num_field_descriptors() const { return m_field_descriptors.size(); }
138 
139   /// SortFileIterator needs an extra buffer when unpacking.
allocate_addon_buf(uint sz)140   uchar *allocate_addon_buf(uint sz) {
141     if (m_addon_buf != nullptr) {
142       DBUG_ASSERT(m_addon_buf_length == sz);
143       return m_addon_buf;
144     }
145     m_addon_buf = static_cast<uchar *>((*THR_MALLOC)->Alloc(sz));
146     if (m_addon_buf) m_addon_buf_length = sz;
147     return m_addon_buf;
148   }
149 
get_addon_buf()150   uchar *get_addon_buf() { return m_addon_buf; }
get_addon_buf_length()151   uint get_addon_buf_length() const { return m_addon_buf_length; }
152 
set_using_packed_addons(bool val)153   void set_using_packed_addons(bool val) { m_using_packed_addons = val; }
154 
using_packed_addons()155   bool using_packed_addons() const { return m_using_packed_addons; }
156 
157   /**
158     @returns Total number of bytes used for packed addon fields.
159     the size of the length field + size of null bits + sum of field sizes.
160    */
read_addon_length(uchar * p)161   static uint read_addon_length(uchar *p) {
162     return size_of_length_field + uint4korr(p);
163   }
164 
165   /**
166     Stores the number of bytes used for packed addon fields.
167    */
store_addon_length(uchar * p,uint sz)168   static void store_addon_length(uchar *p, uint sz) {
169     // We actually store the length of everything *after* the length field.
170     int4store(p, sz - size_of_length_field);
171   }
172 
173   static const uint size_of_length_field = 4;
174 
175  private:
176   Addon_fields_array m_field_descriptors;
177 
178   uchar *m_addon_buf;          ///< Buffer for unpacking addon fields.
179   uint m_addon_buf_length;     ///< Length of the buffer.
180   bool m_using_packed_addons;  ///< Are we packing the addon fields?
181 };
182 
183 /**
184   There are several record formats for sorting:
185 @verbatim
186     |<key a><key b>...    | <rowid> |
187     / m_fixed_sort_length / ref_len /
188 @endverbatim
189 
190   or with "addon fields"
191 @verbatim
192     |<key a><key b>...    |<null bits>|<field a><field b>...|
193     / m_fixed_sort_length /         addon_length            /
194 @endverbatim
195 
196   The packed format for "addon fields"
197 @verbatim
198     |<key a><key b>...    |<length>|<null bits>|<field a><field b>...|
199     / m_fixed_sort_length /         addon_length                     /
200 @endverbatim
201 
202   All the formats above have fixed-size keys, with appropriate padding.
203   Fixed-size keys can be compared/sorted using memcmp().
204 
205   The packed (variable length) format for keys:
206 @verbatim
207     |<keylen>|<varkey a><key b>...<hash>|<rowid>  or <addons>     |
208     / 4 bytes/   keylen bytes           / ref_len or addon_length /
209 @endverbatim
210 
211   This format is currently only used if we are sorting JSON data.
212   Variable-size keys must be compared piece-by-piece, using type information
213   about each individual key part, @see cmp_varlen_keys.
214 
215   All the record formats consist of a (possibly composite) key,
216   followed by a (possibly composite) payload.
217   The key is used for sorting data. Once sorting is done, the payload is
218   stored in some buffer, and read by some RowIterator.
219 
220   For fixed-size keys, with @<rowid@> payload, the @<rowid@> is also
221   considered to be part of the key.
222 
223 <dl>
224 <dt>@<key@>
225           <dd>  Fields are fixed-size, specially encoded with
226                 Field::make_sort_key() so we can do byte-by-byte compare.
227 <dt>@<length@>
228           <dd>  Contains the *actual* packed length (after packing) of
229                 everything after the sort keys.
230                 The size of the length field is 2 bytes,
231                 which should cover most use cases: addon data <= 65535 bytes.
232                 This is the same as max record size in MySQL.
233 <dt>@<null bits@>
234           <dd>  One bit for each nullable field, indicating whether the field
235                 is null or not. May have size zero if no fields are nullable.
236 <dt>@<field xx@>
237           <dd>  Are stored with field->pack(), and retrieved with
238                 field->unpack().
239                 Addon fields within a record are stored consecutively, with no
240                 "holes" or padding. They will have zero size for NULL values.
241 <dt>@<keylen@>
242           <dd>  Contains the *actual* packed length of all the keys.
243                 We may have an arbitrary mix of fixed and variable-sized keys.
244 <dt>@<hash@>
245           <dd>  Optional 8 byte hash, used for GROUPing of JSON values.
246 <dt>@<varkey@>
247           <dd>  Used for JSON and variable-length string values, the format is:
248 </dl>
249 @verbatim
250                 |<null value>|<key length>|<sort key>        |
251                 / 1 byte     /   4 bytes  / key length bytes /
252 @endverbatim
253 <dl>
254 <dt>@<null value@>
255           <dd>  0x00 for NULL. 0xff for NULL under DESC sort. 0x01 for NOT NULL.
256 <dt>@<key length@>
257           <dd>  The length of the sort key, *including* the four bytes for the
258                 key length. Does not exist if the field is NULL.
259 </dl>
260  */
261 class Sort_param {
262   uint m_fixed_rec_length{0};   ///< Maximum length of a record, see above.
263   uint m_fixed_sort_length{0};  ///< Maximum number of bytes used for sorting.
264  public:
265   uint ref_length{0};        // Length of record ref.
266   uint m_addon_length{0};    // Length of added packed fields.
267   uint fixed_res_length{0};  // Length of records in final sorted file/buffer.
268   uint max_rows_per_buffer{0};  // Max (unpacked) rows / buffer.
269   ha_rows max_rows{0};          // Select limit, or HA_POS_ERROR if unlimited.
270   TABLE *sort_form{nullptr};    // For quicker make_sortkey.
271   bool use_hash{false};         // Whether to use hash to distinguish cut JSON
272   bool m_force_stable_sort{false};  // Keep relative order of equal elements
273   bool m_remove_duplicates{
274       false};  ///< Whether we want to remove duplicate rows
275 
276   /// If we are removing duplicate rows and merging, contains a buffer where we
277   /// can store the last key seen.
278   uchar *m_last_key_seen{nullptr};
279 
280   /**
281     ORDER BY list with some precalculated info for filesort.
282     Array is created and owned by a Filesort instance.
283    */
284   Bounds_checked_array<st_sort_field> local_sortorder;
285 
286   Addon_fields *addon_fields{nullptr};  ///< Descriptors for addon fields.
287   bool using_pq{false};
288   StringBuffer<STRING_BUFFER_USUAL_SIZE> tmp_buffer;
289 
290   /// Decide whether we are to use addon fields (sort rows instead of sorting
291   /// row IDs or not). See using_addon_fields().
292   ///
293   /// Note that currently, this function must _not_ be called from the Filesort
294   /// constructor, as the read sets are not fully set up at that time
295   /// (see filter_virtual_gcol_base_cols(), which runs very late in
296   /// optimization). If we want to change this, we can probably have
297   /// make_sortkey() check the read set at runtime, at the cost of slightly less
298   /// precise estimation of packed row size.
299   void decide_addon_fields(Filesort *file_sort, TABLE *table,
300                            bool sort_positions);
301 
302   /**
303     Initialize this struct for filesort() usage.
304     @see description of record layout above
305     @param [in,out] file_sort sorting information which may be re-used on
306                               subsequent invocations of filesort()
307     @param sf_array  initialization value for local_sortorder
308     @param sortlen   length of sorted columns
309     @param table     table to be sorted
310     @param maxrows   HA_POS_ERROR or possible LIMIT value
311     @param remove_duplicates if true, items with duplicate keys will be removed
312   */
313   void init_for_filesort(Filesort *file_sort,
314                          Bounds_checked_array<st_sort_field> sf_array,
315                          uint sortlen, TABLE *table, ha_rows maxrows,
316                          bool remove_duplicates);
317 
318   /// Enables the packing of addons if possible.
319   void try_to_pack_addons();
320 
321   /// Are we packing the "addon fields"?
using_packed_addons()322   bool using_packed_addons() const {
323     DBUG_ASSERT(m_using_packed_addons == (addon_fields != nullptr &&
324                                           addon_fields->using_packed_addons()));
325     return m_using_packed_addons;
326   }
327 
328   /// Are we using varlen key fields?
using_varlen_keys()329   bool using_varlen_keys() const { return m_num_varlen_keys > 0; }
330 
331   /// Are we using any JSON key fields?
using_json_keys()332   bool using_json_keys() const { return m_num_json_keys > 0; }
333 
334   /// Are we using "addon fields"? Note that decide_addon_fields() or
335   /// init_for_filesort() must be called before checking this.
using_addon_fields()336   bool using_addon_fields() const { return addon_fields != nullptr; }
337 
338   /**
339     Stores key fields in *dst.
340     Then appends either *ref_pos (the @<rowid@>) or the "addon fields".
341     @param  [out] dst   Where to store the result
342     @param  ref_pos     Where to find the @<rowid@>
343     @param  [in,out] longest_addons
344        The longest addon field row (sum of all addon fields for any single
345        given row) found.
346     @returns Number of bytes stored, or UINT_MAX if the result could not
347       provably fit within the destination buffer.
348    */
349   uint make_sortkey(Bounds_checked_array<uchar> dst, const uchar *ref_pos,
350                     size_t *longest_addons);
351 
352   // Adapter for Bounded_queue.
make_sortkey(uchar * dst,size_t dst_len,const uchar * ref_pos)353   uint make_sortkey(uchar *dst, size_t dst_len, const uchar *ref_pos) {
354     size_t longest_addons = 0;  // Unused.
355     return make_sortkey(Bounds_checked_array<uchar>(dst, dst_len), ref_pos,
356                         &longest_addons);
357   }
358 
359   /// Stores the length of a variable-sized key.
store_varlen_key_length(uchar * p,uint sz)360   static void store_varlen_key_length(uchar *p, uint sz) { int4store(p, sz); }
361 
362   /// Skips the key part, and returns address of payload.
get_start_of_payload(uchar * p)363   uchar *get_start_of_payload(uchar *p) const {
364     size_t offset = using_varlen_keys() ? uint4korr(p) : max_compare_length();
365     if (!using_addon_fields() && !using_varlen_keys())
366       offset -= ref_length;  // The reference is also part of the sort key.
367     return p + offset;
368   }
369 
370   /**
371     Skips the key part, and returns address of payload.
372     For SortBufferIterator, which does not have access to Sort_param.
373    */
get_start_of_payload(uint default_val,bool is_varlen,uchar * p)374   static uchar *get_start_of_payload(uint default_val, bool is_varlen,
375                                      uchar *p) {
376     size_t offset = is_varlen ? uint4korr(p) : default_val;
377     return p + offset;
378   }
379 
380   /// @returns The number of bytes used for sorting of fixed-size keys.
max_compare_length()381   uint max_compare_length() const { return m_fixed_sort_length; }
382 
set_max_compare_length(uint len)383   void set_max_compare_length(uint len) { m_fixed_sort_length = len; }
384 
385   /// @returns The actual size of a record (key + addons)
386   size_t get_record_length(uchar *p) const;
387 
388   /// @returns The maximum size of a record (key + addons)
max_record_length()389   uint max_record_length() const { return m_fixed_rec_length; }
390 
set_max_record_length(uint len)391   void set_max_record_length(uint len) { m_fixed_rec_length = len; }
392 
393   /**
394     Getter for record length and result length.
395     @param record_start Pointer to record.
396     @param [out] recl   Store record length here.
397     @param [out] resl   Store result length here.
398    */
399   void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl);
400 
401   enum enum_sort_algorithm {
402     FILESORT_ALG_NONE,
403     FILESORT_ALG_STD_SORT,
404     FILESORT_ALG_STD_STABLE
405   };
406   enum_sort_algorithm m_sort_algorithm{FILESORT_ALG_NONE};
407 
408   Addon_fields_status m_addon_fields_status{
409       Addon_fields_status::unknown_status};
410 
411   static const uint size_of_varlength_field = 4;
412 
413  private:
414   /// Counts number of varlen keys
415   int count_varlen_keys() const;
416   /// Counts number of JSON keys
417   int count_json_keys() const;
418 
419   /// total length of fields which have a packable type
420   uint m_packable_length{0};
421   /// caches the value of using_packed_addons()
422   bool m_using_packed_addons{false};
423   int m_num_varlen_keys{0};  ///< number of varlen keys
424   int m_num_json_keys{0};    ///< number of JSON keys
425 
426  public:
427   Sort_param() = default;
428   // Not copyable.
429   Sort_param(const Sort_param &) = delete;
430   Sort_param &operator=(const Sort_param &) = delete;
431 };
432 
get_start_of_payload(const Filesort_info * fsi,uchar * p)433 inline uchar *get_start_of_payload(const Filesort_info *fsi, uchar *p) {
434   return Sort_param::get_start_of_payload(fsi->sort_length(),
435                                           fsi->using_varlen_keys(), p);
436 }
437 
438 /// Are we using "packed addon fields"?
using_packed_addons(const Filesort_info * fsi)439 inline bool using_packed_addons(const Filesort_info *fsi) {
440   return fsi->addon_fields != nullptr &&
441          fsi->addon_fields->using_packed_addons();
442 }
443 
444 #endif  // SORT_PARAM_INCLUDED
445