1 /***************************************************************************** 2 3 Copyright (c) 2014, 2020, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/btr0bulk.h 28 The B-tree bulk load 29 30 Created 03/11/2014 Shaohua Wang 31 *************************************************************************/ 32 33 #ifndef btr0bulk_h 34 #define btr0bulk_h 35 36 #include <stddef.h> 37 #include <vector> 38 39 #include "dict0dict.h" 40 #include "page0cur.h" 41 #include "ut0class_life_cycle.h" 42 #include "ut0new.h" 43 44 /** Innodb B-tree index fill factor for bulk load. */ 45 extern long innobase_fill_factor; 46 47 /* 48 The proper function call sequence of PageBulk is as below: 49 -- PageBulk::init 50 -- PageBulk::insert 51 -- PageBulk::finish 52 -- PageBulk::compress(COMPRESSED table only) 53 -- PageBulk::pageSplit(COMPRESSED table only) 54 -- PageBulk::commit 55 */ 56 57 class PageBulk : private ut::Non_copyable { 58 public: 59 /** Page split point descriptor. */ 60 struct SplitPoint { 61 /** Record being the point of split. 62 * All records before this record should stay on current on page. 63 * This record and all following records should be moved to new page. */ 64 rec_t *m_rec; 65 /** Number of records before this record. */ 66 ulint m_n_rec_before; 67 }; 68 69 /** Constructor 70 @param[in] index B-tree index 71 @param[in] page_no page number 72 @param[in] level page level 73 @param[in] trx_id transaction id 74 @param[in] observer flush observer */ PageBulk(dict_index_t * index,trx_id_t trx_id,page_no_t page_no,ulint level,FlushObserver * observer)75 PageBulk(dict_index_t *index, trx_id_t trx_id, page_no_t page_no, ulint level, 76 FlushObserver *observer) 77 : m_heap(nullptr), 78 m_index(index), 79 m_mtr(nullptr), 80 m_trx_id(trx_id), 81 m_block(nullptr), 82 m_page(nullptr), 83 m_page_zip(nullptr), 84 m_cur_rec(nullptr), 85 m_page_no(page_no), 86 m_level(level), 87 m_is_comp(dict_table_is_comp(index->table)), 88 m_heap_top(nullptr), 89 m_rec_no(0), 90 m_free_space(0), 91 m_reserved_space(0), 92 m_padding_space(0), 93 #ifdef UNIV_DEBUG 94 m_total_data(0), 95 #endif /* UNIV_DEBUG */ 96 m_modify_clock(0), 97 m_flush_observer(observer), 98 m_last_slotted_rec(nullptr), 99 m_slotted_rec_no(0), 100 m_modified(false) { 101 ut_ad(!dict_index_is_spatial(m_index)); 102 } 103 104 /** Destructor */ ~PageBulk()105 ~PageBulk() { 106 if (m_heap) { 107 /* mtr is allocated using heap. */ 108 if (m_mtr != nullptr) { 109 m_mtr->~mtr_t(); 110 } 111 mem_heap_free(m_heap); 112 } 113 } 114 115 /** Initialize members and allocate page if needed and start mtr. 116 @note Must be called and only once right after constructor. 117 @return error code */ 118 dberr_t init() MY_ATTRIBUTE((warn_unused_result)); 119 120 /** Insert a tuple in the page. 121 @param[in] tuple tuple to insert 122 @param[in] big_rec external record 123 @param[in] rec_size record size 124 @return error code */ 125 dberr_t insert(const dtuple_t *tuple, const big_rec_t *big_rec, 126 ulint rec_size) MY_ATTRIBUTE((warn_unused_result)); 127 128 /** Mark end of insertion to the page. Scan records to set page dirs, 129 and set page header members. The scan is incremental (slots and records 130 which assignment could be "finalized" are not checked again. Check the 131 m_slotted_rec_no usage, note it could be reset in some cases like 132 during split. 133 Note: we refer to page_copy_rec_list_end_to_created_page. */ 134 void finish(); 135 136 /** Commit mtr for a page 137 @param[in] success Flag whether all inserts succeed. */ 138 void commit(bool success); 139 140 /** Compress if it is compressed table 141 @return true compress successfully or no need to compress 142 @return false compress failed. */ 143 bool compress() MY_ATTRIBUTE((warn_unused_result)); 144 145 /** Check whether the record needs to be stored externally. 146 @return false if the entire record can be stored locally on the page */ 147 bool needExt(const dtuple_t *tuple, ulint rec_size) const 148 MY_ATTRIBUTE((warn_unused_result)); 149 150 /** Get node pointer 151 @return node pointer */ 152 dtuple_t *getNodePtr(); 153 154 /** Split the page records between this and given bulk. 155 * @param new_page_bulk The new bulk to store split records. */ 156 void split(PageBulk &new_page_bulk); 157 158 /** Copy all records from page. 159 @param[in] src_page Page with records to copy. */ 160 void copyAll(const page_t *src_page); 161 162 /** Set next page 163 @param[in] next_page_no next page no */ 164 void setNext(page_no_t next_page_no); 165 166 /** Set previous page 167 @param[in] prev_page_no previous page no */ 168 void setPrev(page_no_t prev_page_no); 169 170 /** Release block by committing mtr */ 171 inline void release(); 172 173 /** Start mtr and latch block */ 174 inline void latch(); 175 176 /** Check if required space is available in the page for the rec 177 to be inserted. We check fill factor & padding here. 178 @param[in] rec_size required space 179 @return true if space is available */ 180 inline bool isSpaceAvailable(ulint rec_size) const; 181 182 /** Get page no */ getPageNo()183 page_no_t getPageNo() const { return (m_page_no); } 184 185 /** Get page level */ getLevel()186 ulint getLevel() const { return (m_level); } 187 188 /** Get record no */ getRecNo()189 ulint getRecNo() const { return (m_rec_no); } 190 191 /** Get page */ getPage()192 const page_t *getPage() const { return (m_page); } 193 194 /** Check if table is compressed. 195 @return true if table is compressed, false otherwise. */ isTableCompressed()196 bool isTableCompressed() const { return (m_page_zip != nullptr); } 197 198 #ifdef UNIV_DEBUG 199 /** Check if index is X locked */ 200 bool isIndexXLocked(); 201 #endif // UNIV_DEBUG 202 203 private: 204 /** Get page split point. We split a page in half when compression 205 fails, and the split record and all following records should be copied 206 to the new page. 207 @return split record descriptor */ 208 SplitPoint getSplitRec(); 209 210 /** Copy given and all following records. 211 @param[in] first_rec first record to copy */ 212 void copyRecords(const rec_t *first_rec); 213 214 /** Remove all records after split rec including itself. 215 @param[in] split_point split point descriptor */ 216 void splitTrim(const SplitPoint &split_point); 217 218 /** Insert a record in the page. 219 @param[in] rec record 220 @param[in] offsets record offsets */ 221 void insert(const rec_t *rec, ulint *offsets); 222 223 /** Store external record 224 Since the record is not logged yet, so we don't log update to the record. 225 the blob data is logged first, then the record is logged in bulk mode. 226 @param[in] big_rec external record 227 @param[in] offsets record offsets 228 @return error code */ 229 dberr_t storeExt(const big_rec_t *big_rec, ulint *offsets) 230 MY_ATTRIBUTE((warn_unused_result)); 231 232 /** Memory heap for internal allocation */ 233 mem_heap_t *m_heap; 234 235 /** The index B-tree */ 236 dict_index_t *m_index; 237 238 /** The min-transaction */ 239 mtr_t *m_mtr; 240 241 /** The transaction id */ 242 trx_id_t m_trx_id; 243 244 /** The buffer block */ 245 buf_block_t *m_block; 246 247 /** The page */ 248 page_t *m_page; 249 250 /** The page zip descriptor */ 251 page_zip_des_t *m_page_zip; 252 253 /** The current rec, just before the next insert rec */ 254 rec_t *m_cur_rec; 255 256 /** The page no */ 257 page_no_t m_page_no; 258 259 /** The page level in B-tree */ 260 ulint m_level; 261 262 /** Flag: is page in compact format */ 263 const bool m_is_comp; 264 265 /** The heap top in page for next insert */ 266 byte *m_heap_top; 267 268 /** User record no */ 269 ulint m_rec_no; 270 271 /** The free space left in the page */ 272 ulint m_free_space; 273 274 /** The reserved space for fill factor */ 275 ulint m_reserved_space; 276 277 /** The padding space for compressed page */ 278 ulint m_padding_space; 279 280 #ifdef UNIV_DEBUG 281 /** Total data in the page */ 282 ulint m_total_data; 283 #endif /* UNIV_DEBUG */ 284 285 /** The modify clock value of the buffer block 286 when the block is re-pinned */ 287 ib_uint64_t m_modify_clock; 288 289 /** Flush observer */ 290 FlushObserver *m_flush_observer; 291 292 /** Last record assigned to a slot. */ 293 rec_t *m_last_slotted_rec; 294 295 /** Number of records assigned to slots. */ 296 ulint m_slotted_rec_no; 297 298 /** Page modified flag. */ 299 bool m_modified; 300 }; 301 302 class BtrBulk { 303 public: 304 using page_bulk_vector = std::vector<PageBulk *, ut_allocator<PageBulk *>>; 305 306 /** Constructor 307 @param[in] index B-tree index 308 @param[in] trx_id transaction id 309 @param[in] observer flush observer */ 310 BtrBulk(dict_index_t *index, trx_id_t trx_id, FlushObserver *observer); 311 312 /** Destructor */ 313 ~BtrBulk(); 314 315 /** Initialization 316 @note Must be called right after constructor. */ 317 dberr_t init() MY_ATTRIBUTE((warn_unused_result)); 318 319 /** Insert a tuple 320 @param[in] tuple tuple to insert. 321 @return error code */ insert(dtuple_t * tuple)322 dberr_t insert(dtuple_t *tuple) MY_ATTRIBUTE((warn_unused_result)) { 323 return (insert(tuple, 0)); 324 } 325 326 /** Btree bulk load finish. We commit the last page in each level 327 and copy the last page in top level to the root page of the index 328 if no error occurs. 329 @param[in] err whether bulk load was successful until now 330 @return error code */ 331 dberr_t finish(dberr_t err) MY_ATTRIBUTE((warn_unused_result)); 332 333 /** Release all latches */ 334 void release(); 335 336 /** Re-latch all latches */ 337 void latch(); 338 339 private: 340 /** Insert a tuple to a page in a level 341 @param[in] tuple tuple to insert 342 @param[in] level B-tree level 343 @return error code */ 344 dberr_t insert(dtuple_t *tuple, ulint level) 345 MY_ATTRIBUTE((warn_unused_result)); 346 347 /** Split a page 348 @param[in] page_bulk page to split 349 @param[in] next_page_bulk next page 350 @return error code */ 351 dberr_t pageSplit(PageBulk *page_bulk, PageBulk *next_page_bulk) 352 MY_ATTRIBUTE((warn_unused_result)); 353 354 /** Commit(finish) a page. We set next/prev page no, compress a page of 355 compressed table and split the page if compression fails, insert a node 356 pointer to father page if needed, and commit mini-transaction. 357 @param[in] page_bulk page to commit 358 @param[in] next_page_bulk next page 359 @param[in] insert_father flag whether need to insert node ptr 360 @return error code */ 361 dberr_t pageCommit(PageBulk *page_bulk, PageBulk *next_page_bulk, 362 bool insert_father) MY_ATTRIBUTE((warn_unused_result)); 363 364 /** Abort a page when an error occurs 365 @param[in] page_bulk page bulk object 366 @note We should call pageAbort for a PageBulk object, which is not in 367 m_page_bulks after pageCommit, and we will commit or abort PageBulk 368 objects in function "finish". */ pageAbort(PageBulk * page_bulk)369 void pageAbort(PageBulk *page_bulk) { page_bulk->commit(false); } 370 371 /** Prepare space to insert a tuple. 372 @param[in,out] page_bulk page bulk that will be used to store the record. 373 It may be replaced if there is not enough space 374 to hold the record. 375 @param[in] level B-tree level 376 @param[in] rec_size record size 377 @return error code */ 378 dberr_t prepareSpace(PageBulk *&page_bulk, ulint level, ulint rec_size) 379 MY_ATTRIBUTE((warn_unused_result)); 380 381 /** Insert a tuple to a page. 382 @param[in] page_bulk page bulk object 383 @param[in] tuple tuple to insert 384 @param[in] big_rec big record vector, maybe NULL if there is no 385 data to be stored externally. 386 @param[in] rec_size record size 387 @return error code */ 388 dberr_t insert(PageBulk *page_bulk, dtuple_t *tuple, big_rec_t *big_rec, 389 ulint rec_size) MY_ATTRIBUTE((warn_unused_result)); 390 391 /** Log free check */ 392 void logFreeCheck(); 393 394 /** Btree page bulk load finish. Commits the last page in each level 395 if no error occurs. Also releases all page bulks. 396 @param[in] err whether bulk load was successful until now 397 @param[out] last_page_no last page number 398 @return error code */ 399 dberr_t finishAllPageBulks(dberr_t err, page_no_t &last_page_no) 400 MY_ATTRIBUTE((warn_unused_result)); 401 402 private: 403 /** B-tree index */ 404 dict_index_t *m_index; 405 406 /** Transaction id */ 407 trx_id_t m_trx_id; 408 409 /** Root page level */ 410 ulint m_root_level; 411 412 /** Flush observer */ 413 FlushObserver *m_flush_observer; 414 415 /** Page cursor vector for all level */ 416 page_bulk_vector *m_page_bulks; 417 418 #ifdef UNIV_DEBUG 419 /** State of the index. Used for asserting at the end of a 420 bulk load operation to ensure that the online status of the 421 index does not change */ 422 unsigned m_index_online; 423 #endif // UNIV_DEBUG 424 }; 425 426 #endif 427