1 /* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License, version 2.0,
5 as published by the Free Software Foundation.
6
7 This program is also distributed with certain software (including
8 but not limited to OpenSSL) that is licensed under separate terms,
9 as designated in a particular file or component or in included license
10 documentation. The authors of MySQL hereby grant you an additional
11 permission to link the program and your derivative works with the
12 separately licensed software that they have included with MySQL.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License, version 2.0, for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
22
23 #ifndef SORT_PARAM_INCLUDED
24 #define SORT_PARAM_INCLUDED
25
26 #include "field_types.h" // enum_field_types
27 #include "my_base.h" // ha_rows
28 #include "my_byteorder.h" // uint4korr
29 #include "my_dbug.h"
30 #include "my_inttypes.h"
31 #include "my_io.h" // mysql_com.h needs my_socket
32 #include "mysql_com.h" // Item_result
33 #include "sql/sql_array.h" // Bounds_checked_array
34 #include "sql/sql_const.h"
35 #include "sql/sql_sort.h" // Filesort_info
36 #include "sql/thr_malloc.h"
37 #include "sql_string.h"
38
39 class Field;
40 class Filesort;
41 class Item;
42 struct TABLE;
43
44 enum class Addon_fields_status {
45 unknown_status,
46 using_addon_fields,
47
48 // The remainder are reasons why we are _not_ using addon fields.
49 fulltext_searched,
50 keep_rowid,
51 row_not_packable,
52 row_contains_blob,
53 skip_heuristic,
54 using_priority_queue
55 };
56
addon_fields_text(Addon_fields_status afs)57 inline const char *addon_fields_text(Addon_fields_status afs) {
58 switch (afs) {
59 default:
60 return "unknown";
61 case Addon_fields_status::using_addon_fields:
62 return "using_addon_fields";
63 case Addon_fields_status::fulltext_searched:
64 return "fulltext_searched";
65 case Addon_fields_status::keep_rowid:
66 return "keep_rowid";
67 case Addon_fields_status::row_not_packable:
68 return "row_not_packable";
69 case Addon_fields_status::row_contains_blob:
70 return "row_contains_blob";
71 case Addon_fields_status::skip_heuristic:
72 return "skip_heuristic";
73 case Addon_fields_status::using_priority_queue:
74 return "using_priority_queue";
75 }
76 }
77
78 /* Structs used when sorting */
79
80 /// Struct that holds information about a sort field.
81 struct st_sort_field {
82 Item *item; ///< Item to sort
83
84 /// Length of sort field. Beware, can be 0xFFFFFFFFu (infinite)!
85 uint length;
86
87 Item_result result_type; ///< Type of item
88 enum_field_types field_type; ///< Field type of the item
89 bool reverse; ///< if descending sort
90 bool is_varlen; ///< If key part has variable length
91 bool maybe_null; ///< If key part is nullable
92 };
93
94 /**
95 The structure Sort_addon_field describes the layout
96 for field values appended to sorted values in records to be sorted
97 in the sort buffer.
98
99 Null bit maps for the appended values is placed before the values
100 themselves. Offsets are from the last sorted field.
101
102 The structure is used to store values of the additional fields
103 in the sort buffer. It is used also when these values are read
104 from a temporary file/buffer in 'Filesort_info::unpack_addon_fields'.
105 */
106
107 struct Sort_addon_field { /* Sort addon packed field */
108 Field *field; /* Original field */
109 uint offset; /* Offset from the last sorted field */
110 uint null_offset; /* Offset to to null bit from the last sorted field */
111 uint max_length; /* Maximum length in the sort buffer */
112 uint8 null_bit; /* Null bit mask for the field */
113 };
114
115 typedef Bounds_checked_array<Sort_addon_field> Addon_fields_array;
116
117 /**
118 This class wraps information about usage of addon fields.
119 An Addon_fields object is used both during packing of data in the filesort
120 buffer, and later during unpacking in 'Filesort_info::unpack_addon_fields'.
121
122 @see documentation for the Sort_addon_field struct.
123 @see documentation for get_addon_fields()
124 */
125 class Addon_fields {
126 public:
Addon_fields(Addon_fields_array arr)127 Addon_fields(Addon_fields_array arr)
128 : m_field_descriptors(arr),
129 m_addon_buf(nullptr),
130 m_addon_buf_length(0),
131 m_using_packed_addons(false) {
132 DBUG_ASSERT(!arr.is_null());
133 }
134
begin()135 Sort_addon_field *begin() { return m_field_descriptors.begin(); }
end()136 Sort_addon_field *end() { return m_field_descriptors.end(); }
num_field_descriptors()137 size_t num_field_descriptors() const { return m_field_descriptors.size(); }
138
139 /// SortFileIterator needs an extra buffer when unpacking.
allocate_addon_buf(uint sz)140 uchar *allocate_addon_buf(uint sz) {
141 if (m_addon_buf != nullptr) {
142 DBUG_ASSERT(m_addon_buf_length == sz);
143 return m_addon_buf;
144 }
145 m_addon_buf = static_cast<uchar *>((*THR_MALLOC)->Alloc(sz));
146 if (m_addon_buf) m_addon_buf_length = sz;
147 return m_addon_buf;
148 }
149
get_addon_buf()150 uchar *get_addon_buf() { return m_addon_buf; }
get_addon_buf_length()151 uint get_addon_buf_length() const { return m_addon_buf_length; }
152
set_using_packed_addons(bool val)153 void set_using_packed_addons(bool val) { m_using_packed_addons = val; }
154
using_packed_addons()155 bool using_packed_addons() const { return m_using_packed_addons; }
156
157 /**
158 @returns Total number of bytes used for packed addon fields.
159 the size of the length field + size of null bits + sum of field sizes.
160 */
read_addon_length(uchar * p)161 static uint read_addon_length(uchar *p) {
162 return size_of_length_field + uint4korr(p);
163 }
164
165 /**
166 Stores the number of bytes used for packed addon fields.
167 */
store_addon_length(uchar * p,uint sz)168 static void store_addon_length(uchar *p, uint sz) {
169 // We actually store the length of everything *after* the length field.
170 int4store(p, sz - size_of_length_field);
171 }
172
173 static const uint size_of_length_field = 4;
174
175 private:
176 Addon_fields_array m_field_descriptors;
177
178 uchar *m_addon_buf; ///< Buffer for unpacking addon fields.
179 uint m_addon_buf_length; ///< Length of the buffer.
180 bool m_using_packed_addons; ///< Are we packing the addon fields?
181 };
182
183 /**
184 There are several record formats for sorting:
185 @verbatim
186 |<key a><key b>... | <rowid> |
187 / m_fixed_sort_length / ref_len /
188 @endverbatim
189
190 or with "addon fields"
191 @verbatim
192 |<key a><key b>... |<null bits>|<field a><field b>...|
193 / m_fixed_sort_length / addon_length /
194 @endverbatim
195
196 The packed format for "addon fields"
197 @verbatim
198 |<key a><key b>... |<length>|<null bits>|<field a><field b>...|
199 / m_fixed_sort_length / addon_length /
200 @endverbatim
201
202 All the formats above have fixed-size keys, with appropriate padding.
203 Fixed-size keys can be compared/sorted using memcmp().
204
205 The packed (variable length) format for keys:
206 @verbatim
207 |<keylen>|<varkey a><key b>...<hash>|<rowid> or <addons> |
208 / 4 bytes/ keylen bytes / ref_len or addon_length /
209 @endverbatim
210
211 This format is currently only used if we are sorting JSON data.
212 Variable-size keys must be compared piece-by-piece, using type information
213 about each individual key part, @see cmp_varlen_keys.
214
215 All the record formats consist of a (possibly composite) key,
216 followed by a (possibly composite) payload.
217 The key is used for sorting data. Once sorting is done, the payload is
218 stored in some buffer, and read by some RowIterator.
219
220 For fixed-size keys, with @<rowid@> payload, the @<rowid@> is also
221 considered to be part of the key.
222
223 <dl>
224 <dt>@<key@>
225 <dd> Fields are fixed-size, specially encoded with
226 Field::make_sort_key() so we can do byte-by-byte compare.
227 <dt>@<length@>
228 <dd> Contains the *actual* packed length (after packing) of
229 everything after the sort keys.
230 The size of the length field is 2 bytes,
231 which should cover most use cases: addon data <= 65535 bytes.
232 This is the same as max record size in MySQL.
233 <dt>@<null bits@>
234 <dd> One bit for each nullable field, indicating whether the field
235 is null or not. May have size zero if no fields are nullable.
236 <dt>@<field xx@>
237 <dd> Are stored with field->pack(), and retrieved with
238 field->unpack().
239 Addon fields within a record are stored consecutively, with no
240 "holes" or padding. They will have zero size for NULL values.
241 <dt>@<keylen@>
242 <dd> Contains the *actual* packed length of all the keys.
243 We may have an arbitrary mix of fixed and variable-sized keys.
244 <dt>@<hash@>
245 <dd> Optional 8 byte hash, used for GROUPing of JSON values.
246 <dt>@<varkey@>
247 <dd> Used for JSON and variable-length string values, the format is:
248 </dl>
249 @verbatim
250 |<null value>|<key length>|<sort key> |
251 / 1 byte / 4 bytes / key length bytes /
252 @endverbatim
253 <dl>
254 <dt>@<null value@>
255 <dd> 0x00 for NULL. 0xff for NULL under DESC sort. 0x01 for NOT NULL.
256 <dt>@<key length@>
257 <dd> The length of the sort key, *including* the four bytes for the
258 key length. Does not exist if the field is NULL.
259 </dl>
260 */
261 class Sort_param {
262 uint m_fixed_rec_length{0}; ///< Maximum length of a record, see above.
263 uint m_fixed_sort_length{0}; ///< Maximum number of bytes used for sorting.
264 public:
265 uint ref_length{0}; // Length of record ref.
266 uint m_addon_length{0}; // Length of added packed fields.
267 uint fixed_res_length{0}; // Length of records in final sorted file/buffer.
268 uint max_rows_per_buffer{0}; // Max (unpacked) rows / buffer.
269 ha_rows max_rows{0}; // Select limit, or HA_POS_ERROR if unlimited.
270 TABLE *sort_form{nullptr}; // For quicker make_sortkey.
271 bool use_hash{false}; // Whether to use hash to distinguish cut JSON
272 bool m_force_stable_sort{false}; // Keep relative order of equal elements
273 bool m_remove_duplicates{
274 false}; ///< Whether we want to remove duplicate rows
275
276 /// If we are removing duplicate rows and merging, contains a buffer where we
277 /// can store the last key seen.
278 uchar *m_last_key_seen{nullptr};
279
280 /**
281 ORDER BY list with some precalculated info for filesort.
282 Array is created and owned by a Filesort instance.
283 */
284 Bounds_checked_array<st_sort_field> local_sortorder;
285
286 Addon_fields *addon_fields{nullptr}; ///< Descriptors for addon fields.
287 bool using_pq{false};
288 StringBuffer<STRING_BUFFER_USUAL_SIZE> tmp_buffer;
289
290 /// Decide whether we are to use addon fields (sort rows instead of sorting
291 /// row IDs or not). See using_addon_fields().
292 ///
293 /// Note that currently, this function must _not_ be called from the Filesort
294 /// constructor, as the read sets are not fully set up at that time
295 /// (see filter_virtual_gcol_base_cols(), which runs very late in
296 /// optimization). If we want to change this, we can probably have
297 /// make_sortkey() check the read set at runtime, at the cost of slightly less
298 /// precise estimation of packed row size.
299 void decide_addon_fields(Filesort *file_sort, TABLE *table,
300 bool sort_positions);
301
302 /**
303 Initialize this struct for filesort() usage.
304 @see description of record layout above
305 @param [in,out] file_sort sorting information which may be re-used on
306 subsequent invocations of filesort()
307 @param sf_array initialization value for local_sortorder
308 @param sortlen length of sorted columns
309 @param table table to be sorted
310 @param maxrows HA_POS_ERROR or possible LIMIT value
311 @param remove_duplicates if true, items with duplicate keys will be removed
312 */
313 void init_for_filesort(Filesort *file_sort,
314 Bounds_checked_array<st_sort_field> sf_array,
315 uint sortlen, TABLE *table, ha_rows maxrows,
316 bool remove_duplicates);
317
318 /// Enables the packing of addons if possible.
319 void try_to_pack_addons();
320
321 /// Are we packing the "addon fields"?
using_packed_addons()322 bool using_packed_addons() const {
323 DBUG_ASSERT(m_using_packed_addons == (addon_fields != nullptr &&
324 addon_fields->using_packed_addons()));
325 return m_using_packed_addons;
326 }
327
328 /// Are we using varlen key fields?
using_varlen_keys()329 bool using_varlen_keys() const { return m_num_varlen_keys > 0; }
330
331 /// Are we using any JSON key fields?
using_json_keys()332 bool using_json_keys() const { return m_num_json_keys > 0; }
333
334 /// Are we using "addon fields"? Note that decide_addon_fields() or
335 /// init_for_filesort() must be called before checking this.
using_addon_fields()336 bool using_addon_fields() const { return addon_fields != nullptr; }
337
338 /**
339 Stores key fields in *dst.
340 Then appends either *ref_pos (the @<rowid@>) or the "addon fields".
341 @param [out] dst Where to store the result
342 @param ref_pos Where to find the @<rowid@>
343 @param [in,out] longest_addons
344 The longest addon field row (sum of all addon fields for any single
345 given row) found.
346 @returns Number of bytes stored, or UINT_MAX if the result could not
347 provably fit within the destination buffer.
348 */
349 uint make_sortkey(Bounds_checked_array<uchar> dst, const uchar *ref_pos,
350 size_t *longest_addons);
351
352 // Adapter for Bounded_queue.
make_sortkey(uchar * dst,size_t dst_len,const uchar * ref_pos)353 uint make_sortkey(uchar *dst, size_t dst_len, const uchar *ref_pos) {
354 size_t longest_addons = 0; // Unused.
355 return make_sortkey(Bounds_checked_array<uchar>(dst, dst_len), ref_pos,
356 &longest_addons);
357 }
358
359 /// Stores the length of a variable-sized key.
store_varlen_key_length(uchar * p,uint sz)360 static void store_varlen_key_length(uchar *p, uint sz) { int4store(p, sz); }
361
362 /// Skips the key part, and returns address of payload.
get_start_of_payload(uchar * p)363 uchar *get_start_of_payload(uchar *p) const {
364 size_t offset = using_varlen_keys() ? uint4korr(p) : max_compare_length();
365 if (!using_addon_fields() && !using_varlen_keys())
366 offset -= ref_length; // The reference is also part of the sort key.
367 return p + offset;
368 }
369
370 /**
371 Skips the key part, and returns address of payload.
372 For SortBufferIterator, which does not have access to Sort_param.
373 */
get_start_of_payload(uint default_val,bool is_varlen,uchar * p)374 static uchar *get_start_of_payload(uint default_val, bool is_varlen,
375 uchar *p) {
376 size_t offset = is_varlen ? uint4korr(p) : default_val;
377 return p + offset;
378 }
379
380 /// @returns The number of bytes used for sorting of fixed-size keys.
max_compare_length()381 uint max_compare_length() const { return m_fixed_sort_length; }
382
set_max_compare_length(uint len)383 void set_max_compare_length(uint len) { m_fixed_sort_length = len; }
384
385 /// @returns The actual size of a record (key + addons)
386 size_t get_record_length(uchar *p) const;
387
388 /// @returns The maximum size of a record (key + addons)
max_record_length()389 uint max_record_length() const { return m_fixed_rec_length; }
390
set_max_record_length(uint len)391 void set_max_record_length(uint len) { m_fixed_rec_length = len; }
392
393 /**
394 Getter for record length and result length.
395 @param record_start Pointer to record.
396 @param [out] recl Store record length here.
397 @param [out] resl Store result length here.
398 */
399 void get_rec_and_res_len(uchar *record_start, uint *recl, uint *resl);
400
401 enum enum_sort_algorithm {
402 FILESORT_ALG_NONE,
403 FILESORT_ALG_STD_SORT,
404 FILESORT_ALG_STD_STABLE
405 };
406 enum_sort_algorithm m_sort_algorithm{FILESORT_ALG_NONE};
407
408 Addon_fields_status m_addon_fields_status{
409 Addon_fields_status::unknown_status};
410
411 static const uint size_of_varlength_field = 4;
412
413 private:
414 /// Counts number of varlen keys
415 int count_varlen_keys() const;
416 /// Counts number of JSON keys
417 int count_json_keys() const;
418
419 /// total length of fields which have a packable type
420 uint m_packable_length{0};
421 /// caches the value of using_packed_addons()
422 bool m_using_packed_addons{false};
423 int m_num_varlen_keys{0}; ///< number of varlen keys
424 int m_num_json_keys{0}; ///< number of JSON keys
425
426 public:
427 Sort_param() = default;
428 // Not copyable.
429 Sort_param(const Sort_param &) = delete;
430 Sort_param &operator=(const Sort_param &) = delete;
431 };
432
get_start_of_payload(const Filesort_info * fsi,uchar * p)433 inline uchar *get_start_of_payload(const Filesort_info *fsi, uchar *p) {
434 return Sort_param::get_start_of_payload(fsi->sort_length(),
435 fsi->using_varlen_keys(), p);
436 }
437
438 /// Are we using "packed addon fields"?
using_packed_addons(const Filesort_info * fsi)439 inline bool using_packed_addons(const Filesort_info *fsi) {
440 return fsi->addon_fields != nullptr &&
441 fsi->addon_fields->using_packed_addons();
442 }
443
444 #endif // SORT_PARAM_INCLUDED
445