1 /* Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #ifndef FILESORT_UTILS_INCLUDED
24 #define FILESORT_UTILS_INCLUDED
25 
26 #include <stddef.h>
27 #include <sys/types.h>
28 #include <memory>
29 #include <utility>
30 #include <vector>
31 
32 #include "map_helpers.h"
33 #include "my_base.h"  // ha_rows
34 #include "my_dbug.h"
35 #include "my_inttypes.h"
36 #include "mysql/service_mysql_alloc.h"  // my_free
37 #include "sql/sql_array.h"              // Bounds_checked_array
38 
39 class Cost_model_table;
40 class Sort_param;
41 
42 /**
43   Buffer used for storing records to be sorted. The records are stored in
44   a series of buffers that are allocated incrementally, growing 50% each
45   time, similar to how a MEM_ROOT works. This allows the user to set a large
46   maximum buffer size without getting huge allocations for sorting small result
47   sets. It means that if you actually _do_ use the entire buffer, there will be
48   more allocations than one large allocation up-front, but this is a worthwhile
49   tradeoff (those allocation will tend to disappear into the cost of actually
50   getting all the rows and sorting them).
51 
52   In addition, Filesort_buffer stores a vector of pointers to the beginning of
53   each record. It is these pointers that are actually sorted in filesort.
54   If the records are small, this can add up to overhead on top of the amount of
55   memory the user expected to use. We _do_ take already allocated pointers into
56   account when calculating how big a new block can be, so the amount of badness
57   is bounded:
58 
59   Assume that we have set maximum record size to infinity, but that in
60   practice, they are are about the smallest size possible (4-byte sort key plus
61   4-byte rowid) and that we are on a 64-bit system. Then, the worst possible
62   overhead is that we use as much space on pointers as the size of the last
63   (largest) block. We can look at the two possible extremes:
64 
65     - Smallest possible sort buffer (32 kB): 32 kB overhead.
66     - A huge sort buffer (x kB): If the last block is y kB, the total size will
67       be y + 2/3y + (2/3)²y + ... = 3y, which means the last block is 1/3 of the
68       total size. Thus, pointer overhead will be no worse than 33%.
69 
70   In most practical cases, it will be much better than this. In particular,
71   even when allocating a block (where we don't yet know how many records will
72   fit), we allow space for the record pointers we'd need given maximum-sized
73   rows.
74 
75   The buffer must be kept available for multiple executions of the
76   same sort operation, so one can call reset() for reuse. Again similar
77   to MEM_ROOT, this keeps the last (largest) block and discards the others.
78 */
79 class Filesort_buffer {
80  public:
Filesort_buffer()81   Filesort_buffer()
82       : m_next_rec_ptr(nullptr),
83         m_current_block_end(nullptr),
84         m_max_size_in_bytes(0),
85         m_current_block_size(0),
86         m_space_used_other_blocks(0) {}
87 
88   /** Sort me...
89     @return Number of records, after any deduplication
90    */
91   unsigned sort_buffer(Sort_param *param, uint count);
92 
93   /**
94     Prepares the buffer for the next batch of records to process.
95    */
96   void reset();
97 
98   /**
99     Where should the next record be stored?
100 
101     If a block is returned, it is always at least "min_size" bytes long.
102     If the returned block is not large enough for your purposes,
103     call get_next_record_pointer() again with a larger value of min_size than
104     the size you got back. Just increasing the size by one byte is fine;
105     the class will still try to make exponentially larger blocks each time.
106 
107     If there's no room for a record of the given size, returns nullptr.
108 
109     After you've written data to the given record, call commit_used_memory()
110     with the number of bytes you've actually written. This ensures it will
111     not get reused for subsequent records.
112   */
get_next_record_pointer(size_t min_size)113   Bounds_checked_array<uchar> get_next_record_pointer(size_t min_size) {
114     DBUG_ASSERT(min_size != 0xFFFFFFFFu);
115     // See if we need to allocate a new block.
116     if (m_next_rec_ptr == nullptr ||
117         m_next_rec_ptr + min_size > m_current_block_end) {
118       if (allocate_block(min_size)) return Bounds_checked_array<uchar>();
119     }
120 
121     // Allocate space within the current block.
122     return Bounds_checked_array<uchar>(m_next_rec_ptr,
123                                        m_current_block_end - m_next_rec_ptr);
124   }
125 
commit_used_memory(size_t num_bytes)126   void commit_used_memory(size_t num_bytes) {
127     m_record_pointers.push_back(m_next_rec_ptr);
128     m_next_rec_ptr += num_bytes;
129   }
130 
131   /**
132     Removes any existing rows and allocates `num_records` maximum-sized rows
133     (call get_sorted_record() to get their pointers). This is somewhat more
134     efficient than calling reset() and then get_next_record_pointer()
135     repeatedly, as it guarantees that at most one allocation is needed.
136 
137     @returns true on memory allocation error, including if the allocated
138     size would exceed max_size_in_bytes().
139   */
140   bool preallocate_records(size_t num_records);
141 
max_size_in_bytes()142   size_t max_size_in_bytes() const { return m_max_size_in_bytes; }
143 
144   /**
145     How much memory has been allocated (counting both the sort buffer and the
146     record pointers) at most since last call to clear_peak_memory_used().
147     Note in particular that reset() and free_sort_buffer() does _not_ zero this
148     counter.
149   */
peak_memory_used()150   size_t peak_memory_used() const {
151     update_peak_memory_used();
152     return m_peak_memory_used;
153   }
154 
155   /// See peak_memory_used.
clear_peak_memory_used()156   void clear_peak_memory_used() { m_peak_memory_used = 0; }
157 
158   /**
159     Set the memory limit for the sort buffer before starting to add records.
160     If trying to allocate space for a new row (in get_next_record_pointer)
161     would take us past the set limit, allocation will fail. Note that we can go
162     a bit over this limit due to having to store record pointers; see the class
163     comment.
164 
165     @param max_size       Maximum size of the sort buffer, in bytes.
166     @param record_length  Worst-case size of each record, in bytes.
167   */
set_max_size(size_t max_size,size_t record_length)168   void set_max_size(size_t max_size, size_t record_length) {
169     m_max_size_in_bytes = max_size;
170     m_max_record_length = record_length;
171   }
172 
173   /**
174     Frees all memory. Unlike reset(), which keeps one block for future use,
175     this actually releases all blocks. It is intended to release memory
176     in an error situation, for final shutdown, or if even the largest block
177     will not be large enough for future allocations.
178 
179     You do not need to call this if you are destroying the object anyway.
180   */
181   void free_sort_buffer();
182 
183   /**
184     Get the list of record pointers as a contiguous array. Will be invalidated
185     by calling get_next_record_pointer() or otherwise changing the number of
186     records.
187   */
get_sort_keys()188   uchar **get_sort_keys() {
189     if (m_record_pointers.empty()) return nullptr;
190     return m_record_pointers.data();
191   }
192 
193   /**
194     Gets sorted record number ix. @see get_sort_keys()
195     Only valid after buffer has been sorted!
196   */
get_sorted_record(size_t ix)197   uchar *get_sorted_record(size_t ix) {
198     DBUG_ASSERT(ix < m_record_pointers.size());
199     return m_record_pointers[ix];
200   }
201 
202   /**
203     Clears all rows, then returns a contiguous buffer of maximum size.
204     (This may or may not involve allocation.) This is for reusing the memory
205     for merge buffers, which requires the memory to be a single contiguous
206     chunk; one could in theory adjust merging to allow using multiple buffers
207     like sorting does, but once we need to merge, that means we've hit disk
208     anyway (or at the very least, need to talk to the OS' buffer cache),
209     and the cost of a single allocation is small compared to I/O.
210 
211     If you use this memory area, you cannot also use the Filesort_buffer to
212     store sort records (get_next_record_pointer etc.); that would use the
213     same memory.
214 
215     Can return nullptr, if allocation fails.
216   */
217   Bounds_checked_array<uchar> get_contiguous_buffer();
218 
219  private:
220   /**
221     Allocate a new block with space for at least `num_bytes` bytes.
222 
223     @returns true if the allocation failed (including if m_max_size_in_bytes
224     was exceeded).
225   */
226   bool allocate_block(size_t num_bytes);
227 
228   /**
229     Allocate a new block of exactly `block_size` bytes, and sets it
230     as the current block. Does not check m_max_size_in_bytes.
231 
232     @returns true if the allocation failed
233   */
234   bool allocate_sized_block(size_t num_bytes);
235 
236   /// See m_peak_memory_used.
237   void update_peak_memory_used() const;
238 
239   uchar *m_next_rec_ptr;       ///< The next record will be inserted here.
240   uchar *m_current_block_end;  ///< The limit of the current block, exclusive.
241 
242   /// The memory blocks used for the actual data.
243   std::vector<unique_ptr_my_free<uchar[]>> m_blocks;
244 
245   /// Pointer to the beginning of each record.
246   std::vector<uchar *> m_record_pointers;
247 
248   size_t m_max_record_length;  ///< Worst-case length of each record.
249 
250   /// Maximum number of bytes we are allowed to allocate in all.
251   size_t m_max_size_in_bytes;
252 
253   /**
254     The size of the current memory block (m_blocks.back()), in bytes
255     (or 0 if no block). If nothing has been allocated from the block yet,
256     the invariant m_next_rec_ptr + m_current_block_size == m_current_block_end
257     holds.
258   */
259   size_t m_current_block_size;
260 
261   /**
262     The total size of all blocks except the current one, not including
263     record pointers. Used for bookkeeping how far away we are from
264     reaching m_max_size_in_bytes.
265   */
266   size_t m_space_used_other_blocks;
267 
268   /**
269     The largest amount of total memory we've been using since last call to
270     clear_peak_memory_used(). This is updated lazily so that we don't need
271     to do the calculations for every record (and thus is mutable). The only
272     point where it _must_ be explicitly updated (by calling
273     update_peak_memory_used()), except when being asked for the value,
274     is right before we deallocate memory, as otherwise, there could be a peak
275     we had forgotten.
276   */
277   mutable size_t m_peak_memory_used{0};
278 
279   // Privately movable, but not copyable.
280   Filesort_buffer &operator=(const Filesort_buffer &rhs) = delete;
281   Filesort_buffer &operator=(Filesort_buffer &&rhs) = default;
282 };
283 
284 #endif  // FILESORT_UTILS_INCLUDED
285