1 /* Copyright (c) 2010, 2021, Oracle and/or its affiliates. 2 3 This program is free software; you can redistribute it and/or modify 4 it under the terms of the GNU General Public License, version 2.0, 5 as published by the Free Software Foundation. 6 7 This program is also distributed with certain software (including 8 but not limited to OpenSSL) that is licensed under separate terms, 9 as designated in a particular file or component or in included license 10 documentation. The authors of MySQL hereby grant you an additional 11 permission to link the program and your derivative works with the 12 separately licensed software that they have included with MySQL. 13 14 This program is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License, version 2.0, for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with this program; if not, write to the Free Software 21 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 22 23 #ifndef FILESORT_UTILS_INCLUDED 24 #define FILESORT_UTILS_INCLUDED 25 26 #include "my_global.h" 27 #include "my_base.h" 28 #include "sql_array.h" 29 #include "my_sys.h" 30 31 #include <algorithm> 32 #include <utility> 33 34 class Cost_model_table; 35 class Sort_param; 36 /* 37 Calculate cost of merge sort 38 39 @param num_rows Total number of rows. 40 @param num_keys_per_buffer Number of keys per buffer. 41 @param elem_size Size of each element. 42 @param cost_model Cost model object that provides cost data. 43 44 Calculates cost of merge sort by simulating call to merge_many_buff(). 45 46 @returns 47 Computed cost of merge sort in disk seeks. 48 49 @note 50 Declared here in order to be able to unit test it, 51 since library dependencies have not been sorted out yet. 52 53 See also comments get_merge_many_buffs_cost(). 54 */ 55 56 double get_merge_many_buffs_cost_fast(ha_rows num_rows, 57 ha_rows num_keys_per_buffer, 58 uint elem_size, 59 const Cost_model_table *cost_model); 60 61 62 /** 63 A wrapper class around the buffer used by filesort(). 64 The sort buffer is a contiguous chunk of memory, 65 containing both records to be sorted, and pointers to said records: 66 67 <start of buffer | still unused | end of buffer> 68 |rec 0|record 1 |rec 2| ............ |ptr to rec2|ptr to rec1|ptr to rec0| 69 70 Records will be inserted "left-to-right". Records are not necessarily 71 fixed-size, they can be packed and stored without any "gaps". 72 73 Record pointers will be inserted "right-to-left", as a side-effect 74 of inserting the actual records. 75 76 We wrap the buffer in order to be able to do lazy initialization of the 77 pointers: the buffer is often much larger than what we actually need. 78 79 With this allocation scheme, and lazy initialization of the pointers, 80 we are able to pack variable-sized records in the buffer, 81 and thus possibly have space for more records than we initially estimated. 82 83 The buffer must be kept available for multiple executions of the 84 same sort operation, so we have explicit allocate and free functions, 85 rather than doing alloc/free in CTOR/DTOR. 86 */ 87 class Filesort_buffer 88 { 89 public: Filesort_buffer()90 Filesort_buffer() : 91 m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL), 92 m_sort_keys(NULL), 93 m_num_records(0), m_record_length(0), 94 m_sort_length(0), 95 m_size_in_bytes(0), m_idx(0) 96 {} 97 98 /** Sort me... */ 99 void sort_buffer(const Sort_param *param, uint count); 100 101 /** 102 Reverses the record pointer array, to avoid recording new results for 103 non-deterministic mtr tests. 104 */ reverse_record_pointers()105 void reverse_record_pointers() 106 { 107 if (m_idx < 2) // There is nothing to swap. 108 return; 109 uchar **keys= get_sort_keys(); 110 const longlong count= m_idx - 1; 111 for (longlong ix= 0; ix <= count/2; ++ix) 112 std::swap(keys[ix], keys[count - ix]); 113 } 114 115 /** 116 Initializes all the record pointers. 117 */ init_record_pointers()118 void init_record_pointers() 119 { 120 init_next_record_pointer(); 121 while (m_idx < m_num_records) 122 (void) get_next_record_pointer(); 123 reverse_record_pointers(); 124 } 125 126 /** 127 Prepares the buffer for the next batch of records to process. 128 */ init_next_record_pointer()129 void init_next_record_pointer() 130 { 131 m_idx= 0; 132 m_next_rec_ptr= m_rawmem; 133 m_sort_keys= NULL; 134 } 135 136 /** 137 @returns the number of bytes currently in use for data. 138 */ space_used_for_data()139 size_t space_used_for_data() const 140 { 141 return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0; 142 } 143 144 /** 145 @returns the number of bytes left in the buffer. 146 */ spaceleft()147 size_t spaceleft() const 148 { 149 assert(m_next_rec_ptr >= m_rawmem); 150 const size_t spaceused= 151 (m_next_rec_ptr - m_rawmem) + 152 (static_cast<size_t>(m_idx) * sizeof(uchar*)); 153 return m_size_in_bytes - spaceused; 154 } 155 156 /** 157 Is the buffer full? 158 */ isfull()159 bool isfull() const 160 { 161 if (m_idx < m_num_records) 162 return false; 163 return spaceleft() < (m_record_length + sizeof(uchar*)); 164 } 165 166 /** 167 Where should the next record be stored? 168 */ get_next_record_pointer()169 uchar *get_next_record_pointer() 170 { 171 uchar *retval= m_next_rec_ptr; 172 // Save the return value in the record pointer array. 173 m_record_pointers[-m_idx]= m_next_rec_ptr; 174 // Prepare for the subsequent request. 175 m_idx++; 176 m_next_rec_ptr+= m_record_length; 177 return retval; 178 } 179 180 /** 181 Adjusts for actual record length. get_next_record_pointer() above was 182 pessimistic, and assumed that the record could not be packed. 183 */ adjust_next_record_pointer(uint val)184 void adjust_next_record_pointer(uint val) 185 { 186 m_next_rec_ptr-= (m_record_length - val); 187 } 188 189 /** 190 @returns total size of buffer: pointer array + record buffers. 191 */ sort_buffer_size()192 size_t sort_buffer_size() const 193 { 194 return m_size_in_bytes; 195 } 196 197 /** 198 Allocates the buffer, but does *not* initialize pointers. 199 Total size = (num_records * record_length) + (num_records * sizeof(pointer)) 200 space for records space for pointer to records 201 Caller is responsible for raising an error if allocation fails. 202 203 @param num_records Number of records. 204 @param record_length (maximum) size of each record. 205 @returns Pointer to allocated area, or NULL in case of out-of-memory. 206 */ 207 uchar *alloc_sort_buffer(uint num_records, uint record_length); 208 209 /// Frees the buffer. free_sort_buffer()210 void free_sort_buffer() 211 { 212 my_free(m_rawmem); 213 *this= Filesort_buffer(); 214 } 215 216 /** 217 Used to access the "right-to-left" array of record pointers as an ordinary 218 "left-to-right" array, so that we can pass it directly on to std::sort(). 219 */ get_sort_keys()220 uchar **get_sort_keys() 221 { 222 if (m_idx == 0) 223 return NULL; 224 return &m_record_pointers[1 - m_idx]; 225 } 226 227 /** 228 Gets sorted record number ix. @see get_sort_keys() 229 Only valid after buffer has been sorted! 230 */ get_sorted_record(uint ix)231 uchar *get_sorted_record(uint ix) 232 { 233 return m_sort_keys[ix]; 234 } 235 236 /** 237 @returns The entire buffer, as a character array. 238 This is for reusing the memory for merge buffers. 239 */ get_raw_buf()240 Bounds_checked_array<uchar> get_raw_buf() 241 { 242 return Bounds_checked_array<uchar>(m_rawmem, m_size_in_bytes); 243 } 244 245 /** 246 We need an assignment operator, see filesort(). 247 This happens to have the same semantics as the one that would be 248 generated by the compiler. We still implement it here, to show shallow 249 assignment explicitly: we have two objects sharing the same array. 250 */ 251 Filesort_buffer &operator=(const Filesort_buffer &rhs) 252 { 253 m_next_rec_ptr= rhs.m_next_rec_ptr; 254 m_rawmem= rhs.m_rawmem; 255 m_record_pointers= rhs.m_record_pointers; 256 m_sort_keys= rhs.m_sort_keys; 257 m_num_records= rhs.m_num_records; 258 m_record_length= rhs.m_record_length; 259 m_sort_length= rhs.m_sort_length; 260 m_size_in_bytes= rhs.m_size_in_bytes; 261 m_idx= rhs.m_idx; 262 return *this; 263 } 264 get_sort_length()265 uint get_sort_length() const { return m_sort_length; } set_sort_length(uint val)266 void set_sort_length(uint val) { m_sort_length= val; } 267 268 private: 269 uchar *m_next_rec_ptr; /// The next record will be inserted here. 270 uchar *m_rawmem; /// The raw memory buffer. 271 uchar **m_record_pointers; /// The "right-to-left" array of record pointers. 272 uchar **m_sort_keys; /// Caches the value of get_sort_keys() 273 uint m_num_records; /// Saved value from alloc_sort_buffer() 274 uint m_record_length; /// Saved value from alloc_sort_buffer() 275 uint m_sort_length; /// The length of the sort key. 276 size_t m_size_in_bytes; /// Size of raw buffer, in bytes. 277 278 /** 279 This is the index in the "right-to-left" array of the next record to 280 be inserted into the buffer. It is signed, because we use it in signed 281 expressions like: 282 m_record_pointers[-m_idx]; 283 It is longlong rather than int, to ensure that it covers UINT_MAX32 284 without any casting/warning. 285 */ 286 longlong m_idx; 287 }; 288 289 #endif // FILESORT_UTILS_INCLUDED 290