1 /* Copyright (c) 2010, 2020, Oracle and/or its affiliates. All rights reserved. 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License, version 2.0, 5 as published by the Free Software Foundation. 6 7 This program is also distributed with certain software (including 8 but not limited to OpenSSL) that is licensed under separate terms, 9 as designated in a particular file or component or in included license 10 documentation. The authors of MySQL hereby grant you an additional 11 permission to link the program and your derivative works with the 12 separately licensed software that they have included with MySQL. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License, version 2.0, for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 23 #ifndef FILESORT_UTILS_INCLUDED 24 #define FILESORT_UTILS_INCLUDED 25 26 #include <stddef.h> 27 #include <sys/types.h> 28 #include <memory> 29 #include <utility> 30 #include <vector> 31 32 #include "map_helpers.h" 33 #include "my_base.h" // ha_rows 34 #include "my_dbug.h" 35 #include "my_inttypes.h" 36 #include "mysql/service_mysql_alloc.h" // my_free 37 #include "sql/sql_array.h" // Bounds_checked_array 38 39 class Cost_model_table; 40 class Sort_param; 41 42 /** 43 Buffer used for storing records to be sorted. The records are stored in 44 a series of buffers that are allocated incrementally, growing 50% each 45 time, similar to how a MEM_ROOT works. This allows the user to set a large 46 maximum buffer size without getting huge allocations for sorting small result 47 sets. It means that if you actually _do_ use the entire buffer, there will be 48 more allocations than one large allocation up-front, but this is a worthwhile 49 tradeoff (those allocation will tend to disappear into the cost of actually 50 getting all the rows and sorting them). 51 52 In addition, Filesort_buffer stores a vector of pointers to the beginning of 53 each record. It is these pointers that are actually sorted in filesort. 54 If the records are small, this can add up to overhead on top of the amount of 55 memory the user expected to use. We _do_ take already allocated pointers into 56 account when calculating how big a new block can be, so the amount of badness 57 is bounded: 58 59 Assume that we have set maximum record size to infinity, but that in 60 practice, they are are about the smallest size possible (4-byte sort key plus 61 4-byte rowid) and that we are on a 64-bit system. Then, the worst possible 62 overhead is that we use as much space on pointers as the size of the last 63 (largest) block. We can look at the two possible extremes: 64 65 - Smallest possible sort buffer (32 kB): 32 kB overhead. 66 - A huge sort buffer (x kB): If the last block is y kB, the total size will 67 be y + 2/3y + (2/3)²y + ... = 3y, which means the last block is 1/3 of the 68 total size. Thus, pointer overhead will be no worse than 33%. 69 70 In most practical cases, it will be much better than this. In particular, 71 even when allocating a block (where we don't yet know how many records will 72 fit), we allow space for the record pointers we'd need given maximum-sized 73 rows. 74 75 The buffer must be kept available for multiple executions of the 76 same sort operation, so one can call reset() for reuse. Again similar 77 to MEM_ROOT, this keeps the last (largest) block and discards the others. 78 */ 79 class Filesort_buffer { 80 public: Filesort_buffer()81 Filesort_buffer() 82 : m_next_rec_ptr(nullptr), 83 m_current_block_end(nullptr), 84 m_max_size_in_bytes(0), 85 m_current_block_size(0), 86 m_space_used_other_blocks(0) {} 87 88 /** Sort me... 89 @return Number of records, after any deduplication 90 */ 91 unsigned sort_buffer(Sort_param *param, uint count); 92 93 /** 94 Prepares the buffer for the next batch of records to process. 95 */ 96 void reset(); 97 98 /** 99 Where should the next record be stored? 100 101 If a block is returned, it is always at least "min_size" bytes long. 102 If the returned block is not large enough for your purposes, 103 call get_next_record_pointer() again with a larger value of min_size than 104 the size you got back. Just increasing the size by one byte is fine; 105 the class will still try to make exponentially larger blocks each time. 106 107 If there's no room for a record of the given size, returns nullptr. 108 109 After you've written data to the given record, call commit_used_memory() 110 with the number of bytes you've actually written. This ensures it will 111 not get reused for subsequent records. 112 */ get_next_record_pointer(size_t min_size)113 Bounds_checked_array<uchar> get_next_record_pointer(size_t min_size) { 114 DBUG_ASSERT(min_size != 0xFFFFFFFFu); 115 // See if we need to allocate a new block. 116 if (m_next_rec_ptr == nullptr || 117 m_next_rec_ptr + min_size > m_current_block_end) { 118 if (allocate_block(min_size)) return Bounds_checked_array<uchar>(); 119 } 120 121 // Allocate space within the current block. 122 return Bounds_checked_array<uchar>(m_next_rec_ptr, 123 m_current_block_end - m_next_rec_ptr); 124 } 125 commit_used_memory(size_t num_bytes)126 void commit_used_memory(size_t num_bytes) { 127 m_record_pointers.push_back(m_next_rec_ptr); 128 m_next_rec_ptr += num_bytes; 129 } 130 131 /** 132 Removes any existing rows and allocates `num_records` maximum-sized rows 133 (call get_sorted_record() to get their pointers). This is somewhat more 134 efficient than calling reset() and then get_next_record_pointer() 135 repeatedly, as it guarantees that at most one allocation is needed. 136 137 @returns true on memory allocation error, including if the allocated 138 size would exceed max_size_in_bytes(). 139 */ 140 bool preallocate_records(size_t num_records); 141 max_size_in_bytes()142 size_t max_size_in_bytes() const { return m_max_size_in_bytes; } 143 144 /** 145 How much memory has been allocated (counting both the sort buffer and the 146 record pointers) at most since last call to clear_peak_memory_used(). 147 Note in particular that reset() and free_sort_buffer() does _not_ zero this 148 counter. 149 */ peak_memory_used()150 size_t peak_memory_used() const { 151 update_peak_memory_used(); 152 return m_peak_memory_used; 153 } 154 155 /// See peak_memory_used. clear_peak_memory_used()156 void clear_peak_memory_used() { m_peak_memory_used = 0; } 157 158 /** 159 Set the memory limit for the sort buffer before starting to add records. 160 If trying to allocate space for a new row (in get_next_record_pointer) 161 would take us past the set limit, allocation will fail. Note that we can go 162 a bit over this limit due to having to store record pointers; see the class 163 comment. 164 165 @param max_size Maximum size of the sort buffer, in bytes. 166 @param record_length Worst-case size of each record, in bytes. 167 */ set_max_size(size_t max_size,size_t record_length)168 void set_max_size(size_t max_size, size_t record_length) { 169 m_max_size_in_bytes = max_size; 170 m_max_record_length = record_length; 171 } 172 173 /** 174 Frees all memory. Unlike reset(), which keeps one block for future use, 175 this actually releases all blocks. It is intended to release memory 176 in an error situation, for final shutdown, or if even the largest block 177 will not be large enough for future allocations. 178 179 You do not need to call this if you are destroying the object anyway. 180 */ 181 void free_sort_buffer(); 182 183 /** 184 Get the list of record pointers as a contiguous array. Will be invalidated 185 by calling get_next_record_pointer() or otherwise changing the number of 186 records. 187 */ get_sort_keys()188 uchar **get_sort_keys() { 189 if (m_record_pointers.empty()) return nullptr; 190 return m_record_pointers.data(); 191 } 192 193 /** 194 Gets sorted record number ix. @see get_sort_keys() 195 Only valid after buffer has been sorted! 196 */ get_sorted_record(size_t ix)197 uchar *get_sorted_record(size_t ix) { 198 DBUG_ASSERT(ix < m_record_pointers.size()); 199 return m_record_pointers[ix]; 200 } 201 202 /** 203 Clears all rows, then returns a contiguous buffer of maximum size. 204 (This may or may not involve allocation.) This is for reusing the memory 205 for merge buffers, which requires the memory to be a single contiguous 206 chunk; one could in theory adjust merging to allow using multiple buffers 207 like sorting does, but once we need to merge, that means we've hit disk 208 anyway (or at the very least, need to talk to the OS' buffer cache), 209 and the cost of a single allocation is small compared to I/O. 210 211 If you use this memory area, you cannot also use the Filesort_buffer to 212 store sort records (get_next_record_pointer etc.); that would use the 213 same memory. 214 215 Can return nullptr, if allocation fails. 216 */ 217 Bounds_checked_array<uchar> get_contiguous_buffer(); 218 219 private: 220 /** 221 Allocate a new block with space for at least `num_bytes` bytes. 222 223 @returns true if the allocation failed (including if m_max_size_in_bytes 224 was exceeded). 225 */ 226 bool allocate_block(size_t num_bytes); 227 228 /** 229 Allocate a new block of exactly `block_size` bytes, and sets it 230 as the current block. Does not check m_max_size_in_bytes. 231 232 @returns true if the allocation failed 233 */ 234 bool allocate_sized_block(size_t num_bytes); 235 236 /// See m_peak_memory_used. 237 void update_peak_memory_used() const; 238 239 uchar *m_next_rec_ptr; ///< The next record will be inserted here. 240 uchar *m_current_block_end; ///< The limit of the current block, exclusive. 241 242 /// The memory blocks used for the actual data. 243 std::vector<unique_ptr_my_free<uchar[]>> m_blocks; 244 245 /// Pointer to the beginning of each record. 246 std::vector<uchar *> m_record_pointers; 247 248 size_t m_max_record_length; ///< Worst-case length of each record. 249 250 /// Maximum number of bytes we are allowed to allocate in all. 251 size_t m_max_size_in_bytes; 252 253 /** 254 The size of the current memory block (m_blocks.back()), in bytes 255 (or 0 if no block). If nothing has been allocated from the block yet, 256 the invariant m_next_rec_ptr + m_current_block_size == m_current_block_end 257 holds. 258 */ 259 size_t m_current_block_size; 260 261 /** 262 The total size of all blocks except the current one, not including 263 record pointers. Used for bookkeeping how far away we are from 264 reaching m_max_size_in_bytes. 265 */ 266 size_t m_space_used_other_blocks; 267 268 /** 269 The largest amount of total memory we've been using since last call to 270 clear_peak_memory_used(). This is updated lazily so that we don't need 271 to do the calculations for every record (and thus is mutable). The only 272 point where it _must_ be explicitly updated (by calling 273 update_peak_memory_used()), except when being asked for the value, 274 is right before we deallocate memory, as otherwise, there could be a peak 275 we had forgotten. 276 */ 277 mutable size_t m_peak_memory_used{0}; 278 279 // Privately movable, but not copyable. 280 Filesort_buffer &operator=(const Filesort_buffer &rhs) = delete; 281 Filesort_buffer &operator=(Filesort_buffer &&rhs) = default; 282 }; 283 284 #endif // FILESORT_UTILS_INCLUDED 285