1 /* Copyright (c) 2010, 2021, Oracle and/or its affiliates.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #ifndef FILESORT_UTILS_INCLUDED
24 #define FILESORT_UTILS_INCLUDED
25 
26 #include "my_global.h"
27 #include "my_base.h"
28 #include "sql_array.h"
29 #include "my_sys.h"
30 
31 #include <algorithm>
32 #include <utility>
33 
34 class Cost_model_table;
35 class Sort_param;
36 /*
37   Calculate cost of merge sort
38 
39     @param num_rows            Total number of rows.
40     @param num_keys_per_buffer Number of keys per buffer.
41     @param elem_size           Size of each element.
42     @param cost_model          Cost model object that provides cost data.
43 
44     Calculates cost of merge sort by simulating call to merge_many_buff().
45 
46   @returns
47     Computed cost of merge sort in disk seeks.
48 
49   @note
50     Declared here in order to be able to unit test it,
51     since library dependencies have not been sorted out yet.
52 
53     See also comments get_merge_many_buffs_cost().
54 */
55 
56 double get_merge_many_buffs_cost_fast(ha_rows num_rows,
57                                       ha_rows num_keys_per_buffer,
58                                       uint elem_size,
59                                       const Cost_model_table *cost_model);
60 
61 
62 /**
63   A wrapper class around the buffer used by filesort().
64   The sort buffer is a contiguous chunk of memory,
65   containing both records to be sorted, and pointers to said records:
66 
67   <start of buffer       | still unused   |                      end of buffer>
68   |rec 0|record 1  |rec 2|  ............  |ptr to rec2|ptr to rec1|ptr to rec0|
69 
70   Records will be inserted "left-to-right". Records are not necessarily
71   fixed-size, they can be packed and stored without any "gaps".
72 
73   Record pointers will be inserted "right-to-left", as a side-effect
74   of inserting the actual records.
75 
76   We wrap the buffer in order to be able to do lazy initialization of the
77   pointers: the buffer is often much larger than what we actually need.
78 
79   With this allocation scheme, and lazy initialization of the pointers,
80   we are able to pack variable-sized records in the buffer,
81   and thus possibly have space for more records than we initially estimated.
82 
83   The buffer must be kept available for multiple executions of the
84   same sort operation, so we have explicit allocate and free functions,
85   rather than doing alloc/free in CTOR/DTOR.
86  */
87 class Filesort_buffer
88 {
89 public:
Filesort_buffer()90   Filesort_buffer() :
91     m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL),
92     m_sort_keys(NULL),
93     m_num_records(0), m_record_length(0),
94     m_sort_length(0),
95     m_size_in_bytes(0), m_idx(0)
96   {}
97 
98   /** Sort me... */
99   void sort_buffer(const Sort_param *param, uint count);
100 
101   /**
102     Reverses the record pointer array, to avoid recording new results for
103     non-deterministic mtr tests.
104   */
reverse_record_pointers()105   void reverse_record_pointers()
106   {
107     if (m_idx < 2) // There is nothing to swap.
108       return;
109     uchar **keys= get_sort_keys();
110     const longlong count= m_idx - 1;
111     for (longlong ix= 0; ix <= count/2; ++ix)
112       std::swap(keys[ix], keys[count - ix]);
113   }
114 
115   /**
116     Initializes all the record pointers.
117   */
init_record_pointers()118   void init_record_pointers()
119   {
120     init_next_record_pointer();
121     while (m_idx < m_num_records)
122       (void) get_next_record_pointer();
123     reverse_record_pointers();
124   }
125 
126   /**
127     Prepares the buffer for the next batch of records to process.
128    */
init_next_record_pointer()129   void init_next_record_pointer()
130   {
131     m_idx= 0;
132     m_next_rec_ptr= m_rawmem;
133     m_sort_keys= NULL;
134   }
135 
136   /**
137     @returns the number of bytes currently in use for data.
138    */
space_used_for_data()139   size_t space_used_for_data() const
140   {
141     return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0;
142   }
143 
144   /**
145     @returns the number of bytes left in the buffer.
146   */
spaceleft()147   size_t spaceleft() const
148   {
149     assert(m_next_rec_ptr >= m_rawmem);
150     const size_t spaceused=
151       (m_next_rec_ptr - m_rawmem) +
152       (static_cast<size_t>(m_idx) * sizeof(uchar*));
153     return m_size_in_bytes - spaceused;
154   }
155 
156   /**
157     Is the buffer full?
158   */
isfull()159   bool isfull() const
160   {
161     if (m_idx < m_num_records)
162       return false;
163     return spaceleft() < (m_record_length + sizeof(uchar*));
164   }
165 
166   /**
167     Where should the next record be stored?
168    */
get_next_record_pointer()169   uchar *get_next_record_pointer()
170   {
171     uchar *retval= m_next_rec_ptr;
172     // Save the return value in the record pointer array.
173     m_record_pointers[-m_idx]= m_next_rec_ptr;
174     // Prepare for the subsequent request.
175     m_idx++;
176     m_next_rec_ptr+= m_record_length;
177     return retval;
178   }
179 
180   /**
181     Adjusts for actual record length. get_next_record_pointer() above was
182     pessimistic, and assumed that the record could not be packed.
183    */
adjust_next_record_pointer(uint val)184   void adjust_next_record_pointer(uint val)
185   {
186     m_next_rec_ptr-= (m_record_length - val);
187   }
188 
189   /**
190     @returns total size of buffer: pointer array + record buffers.
191   */
sort_buffer_size()192   size_t sort_buffer_size() const
193   {
194     return m_size_in_bytes;
195   }
196 
197   /**
198     Allocates the buffer, but does *not* initialize pointers.
199     Total size = (num_records * record_length) + (num_records * sizeof(pointer))
200                   space for records               space for pointer to records
201     Caller is responsible for raising an error if allocation fails.
202 
203     @param num_records   Number of records.
204     @param record_length (maximum) size of each record.
205     @returns Pointer to allocated area, or NULL in case of out-of-memory.
206   */
207   uchar *alloc_sort_buffer(uint num_records, uint record_length);
208 
209   /// Frees the buffer.
free_sort_buffer()210   void free_sort_buffer()
211   {
212     my_free(m_rawmem);
213     *this= Filesort_buffer();
214   }
215 
216   /**
217     Used to access the "right-to-left" array of record pointers as an ordinary
218     "left-to-right" array, so that we can pass it directly on to std::sort().
219   */
get_sort_keys()220   uchar **get_sort_keys()
221   {
222     if (m_idx == 0)
223       return NULL;
224     return &m_record_pointers[1 - m_idx];
225   }
226 
227   /**
228     Gets sorted record number ix. @see get_sort_keys()
229     Only valid after buffer has been sorted!
230   */
get_sorted_record(uint ix)231   uchar *get_sorted_record(uint ix)
232   {
233     return m_sort_keys[ix];
234   }
235 
236   /**
237     @returns The entire buffer, as a character array.
238     This is for reusing the memory for merge buffers.
239    */
get_raw_buf()240   Bounds_checked_array<uchar> get_raw_buf()
241   {
242     return Bounds_checked_array<uchar>(m_rawmem, m_size_in_bytes);
243   }
244 
245   /**
246     We need an assignment operator, see filesort().
247     This happens to have the same semantics as the one that would be
248     generated by the compiler. We still implement it here, to show shallow
249     assignment explicitly: we have two objects sharing the same array.
250   */
251   Filesort_buffer &operator=(const Filesort_buffer &rhs)
252   {
253     m_next_rec_ptr= rhs.m_next_rec_ptr;
254     m_rawmem= rhs.m_rawmem;
255     m_record_pointers= rhs.m_record_pointers;
256     m_sort_keys= rhs.m_sort_keys;
257     m_num_records= rhs.m_num_records;
258     m_record_length= rhs.m_record_length;
259     m_sort_length= rhs.m_sort_length;
260     m_size_in_bytes= rhs.m_size_in_bytes;
261     m_idx= rhs.m_idx;
262     return *this;
263   }
264 
get_sort_length()265   uint get_sort_length() const { return m_sort_length; }
set_sort_length(uint val)266   void set_sort_length(uint val) { m_sort_length= val; }
267 
268 private:
269   uchar  *m_next_rec_ptr;    /// The next record will be inserted here.
270   uchar  *m_rawmem;          /// The raw memory buffer.
271   uchar **m_record_pointers; /// The "right-to-left" array of record pointers.
272   uchar **m_sort_keys;       /// Caches the value of get_sort_keys()
273   uint    m_num_records;     /// Saved value from alloc_sort_buffer()
274   uint    m_record_length;   /// Saved value from alloc_sort_buffer()
275   uint    m_sort_length;     /// The length of the sort key.
276   size_t  m_size_in_bytes;   /// Size of raw buffer, in bytes.
277 
278   /**
279     This is the index in the "right-to-left" array of the next record to
280     be inserted into the buffer. It is signed, because we use it in signed
281     expressions like:
282         m_record_pointers[-m_idx];
283     It is longlong rather than int, to ensure that it covers UINT_MAX32
284     without any casting/warning.
285   */
286   longlong m_idx;
287 };
288 
289 #endif  // FILESORT_UTILS_INCLUDED
290