1 /***************************************************************************** 2 3 Copyright (c) 2014, 2015, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2019, 2020, MariaDB Corporation. 5 6 This program is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free Software 8 Foundation; version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along with 15 this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 18 *****************************************************************************/ 19 20 /********************************************************************//** 21 @file include/btr0bulk.h 22 The B-tree bulk load 23 24 Created 03/11/2014 Shaohua Wang 25 *************************************************************************/ 26 27 #ifndef btr0bulk_h 28 #define btr0bulk_h 29 30 #include "dict0dict.h" 31 #include "rem0types.h" 32 #include "page0cur.h" 33 34 #include <vector> 35 36 /** Innodb B-tree index fill factor for bulk load. */ 37 extern uint innobase_fill_factor; 38 /** whether to reduce redo logging during ALTER TABLE */ 39 extern my_bool innodb_log_optimize_ddl; 40 41 /* 42 The proper function call sequence of PageBulk is as below: 43 -- PageBulk::init 44 -- PageBulk::insert 45 -- PageBulk::finish 46 -- PageBulk::compress(COMPRESSED table only) 47 -- PageBulk::pageSplit(COMPRESSED table only) 48 -- PageBulk::commit 49 */ 50 51 class PageBulk 52 { 53 public: 54 /** Constructor 55 @param[in] index B-tree index 56 @param[in] page_no page number 57 @param[in] level page level 58 @param[in] trx_id transaction id 59 @param[in] observer flush observer */ PageBulk(dict_index_t * index,trx_id_t trx_id,ulint page_no,ulint level,FlushObserver * observer)60 PageBulk( 61 dict_index_t* index, 62 trx_id_t trx_id, 63 ulint page_no, 64 ulint level, 65 FlushObserver* observer) 66 : 67 m_heap(NULL), 68 m_index(index), 69 m_mtr(), 70 m_trx_id(trx_id), 71 m_block(NULL), 72 m_page(NULL), 73 m_page_zip(NULL), 74 m_cur_rec(NULL), 75 m_page_no(page_no), 76 m_level(level), 77 m_is_comp(dict_table_is_comp(index->table)), 78 m_heap_top(NULL), 79 m_rec_no(0), 80 m_free_space(0), 81 m_reserved_space(0), 82 #ifdef UNIV_DEBUG 83 m_total_data(0), 84 #endif /* UNIV_DEBUG */ 85 m_modify_clock(0), 86 m_flush_observer(observer), 87 m_err(DB_SUCCESS) 88 { 89 ut_ad(!dict_index_is_spatial(m_index)); 90 ut_ad(!m_index->table->is_temporary()); 91 } 92 93 /** Deconstructor */ ~PageBulk()94 ~PageBulk() 95 { 96 mem_heap_free(m_heap); 97 } 98 99 /** Initialize members and allocate page if needed and start mtr. 100 Note: must be called and only once right after constructor. 101 @return error code */ 102 dberr_t init(); 103 104 /** Insert a record in the page. 105 @param[in] rec record 106 @param[in] offsets record offsets */ 107 void insert(const rec_t* rec, rec_offs* offsets); 108 109 /** Mark end of insertion to the page. Scan all records to set page 110 dirs, and set page header members. */ 111 void finish(); 112 113 /** @return whether finish() actually needs to do something */ 114 inline bool needs_finish() const; 115 116 /** Commit mtr for a page 117 @param[in] success Flag whether all inserts succeed. */ 118 void commit(bool success); 119 120 /** Compress if it is compressed table 121 @return true compress successfully or no need to compress 122 @return false compress failed. */ 123 bool compress(); 124 125 /** Check whether the record needs to be stored externally. 126 @return true 127 @return false */ 128 bool needExt(const dtuple_t* tuple, ulint rec_size); 129 130 /** Store external record 131 @param[in] big_rec external recrod 132 @param[in] offsets record offsets 133 @return error code */ 134 dberr_t storeExt(const big_rec_t* big_rec, rec_offs* offsets); 135 136 /** Get node pointer 137 @return node pointer */ 138 dtuple_t* getNodePtr(); 139 140 /** Get split rec in the page. We split a page in half when compresssion 141 fails, and the split rec should be copied to the new page. 142 @return split rec */ 143 rec_t* getSplitRec(); 144 145 /** Copy all records after split rec including itself. 146 @param[in] rec split rec */ 147 void copyIn(rec_t* split_rec); 148 149 /** Remove all records after split rec including itself. 150 @param[in] rec split rec */ 151 void copyOut(rec_t* split_rec); 152 153 /** Set next page 154 @param[in] next_page_no next page no */ 155 inline void setNext(ulint next_page_no); 156 157 /** Set previous page 158 @param[in] prev_page_no previous page no */ 159 inline void setPrev(ulint prev_page_no); 160 161 /** Release block by commiting mtr */ 162 inline void release(); 163 164 /** Start mtr and latch block */ 165 inline dberr_t latch(); 166 167 /** Check if required space is available in the page for the rec 168 to be inserted. We check fill factor & padding here. 169 @param[in] length required length 170 @return true if space is available */ 171 inline bool isSpaceAvailable(ulint rec_size); 172 173 /** Get page no */ getPageNo()174 ulint getPageNo() 175 { 176 return(m_page_no); 177 } 178 179 /** Get page level */ getLevel()180 ulint getLevel() 181 { 182 return(m_level); 183 } 184 185 /** Get record no */ getRecNo()186 ulint getRecNo() 187 { 188 return(m_rec_no); 189 } 190 191 /** Get page */ getPage()192 page_t* getPage() 193 { 194 return(m_page); 195 } 196 197 /** Get page zip */ getPageZip()198 page_zip_des_t* getPageZip() 199 { 200 return(m_page_zip); 201 } 202 getError()203 dberr_t getError() 204 { 205 return(m_err); 206 } 207 208 /* Memory heap for internal allocation */ 209 mem_heap_t* m_heap; 210 211 private: 212 /** The index B-tree */ 213 dict_index_t* m_index; 214 215 /** The mini-transaction */ 216 mtr_t m_mtr; 217 218 /** The transaction id */ 219 trx_id_t m_trx_id; 220 221 /** The buffer block */ 222 buf_block_t* m_block; 223 224 /** The page */ 225 page_t* m_page; 226 227 /** The page zip descriptor */ 228 page_zip_des_t* m_page_zip; 229 230 /** The current rec, just before the next insert rec */ 231 rec_t* m_cur_rec; 232 233 /** The page no */ 234 ulint m_page_no; 235 236 /** The page level in B-tree */ 237 ulint m_level; 238 239 /** Flag: is page in compact format */ 240 const bool m_is_comp; 241 242 /** The heap top in page for next insert */ 243 byte* m_heap_top; 244 245 /** User record no */ 246 ulint m_rec_no; 247 248 /** The free space left in the page */ 249 ulint m_free_space; 250 251 /** The reserved space for fill factor */ 252 ulint m_reserved_space; 253 254 /** The padding space for compressed page */ 255 ulint m_padding_space; 256 257 #ifdef UNIV_DEBUG 258 /** Total data in the page */ 259 ulint m_total_data; 260 #endif /* UNIV_DEBUG */ 261 262 /** The modify clock value of the buffer block 263 when the block is re-pinned */ 264 ib_uint64_t m_modify_clock; 265 266 /** Flush observer, or NULL if redo logging is enabled */ 267 FlushObserver* m_flush_observer; 268 269 /** Operation result DB_SUCCESS or error code */ 270 dberr_t m_err; 271 }; 272 273 typedef std::vector<PageBulk*, ut_allocator<PageBulk*> > 274 page_bulk_vector; 275 276 class BtrBulk 277 { 278 public: 279 /** Constructor 280 @param[in] index B-tree index 281 @param[in] trx transaction 282 @param[in] observer flush observer */ BtrBulk(dict_index_t * index,const trx_t * trx,FlushObserver * observer)283 BtrBulk( 284 dict_index_t* index, 285 const trx_t* trx, 286 FlushObserver* observer) 287 : 288 m_index(index), 289 m_trx(trx), 290 m_flush_observer(observer) 291 { 292 ut_ad(!dict_index_is_spatial(index)); 293 #ifdef UNIV_DEBUG 294 if (m_flush_observer) 295 m_index->table->space->redo_skipped_count++; 296 #endif /* UNIV_DEBUG */ 297 } 298 299 /** Destructor */ ~BtrBulk()300 ~BtrBulk() 301 { 302 #ifdef UNIV_DEBUG 303 if (m_flush_observer) 304 m_index->table->space->redo_skipped_count--; 305 #endif /* UNIV_DEBUG */ 306 } 307 308 /** Insert a tuple 309 @param[in] tuple tuple to insert. 310 @return error code */ insert(dtuple_t * tuple)311 dberr_t insert(dtuple_t* tuple) 312 { 313 return(insert(tuple, 0)); 314 } 315 316 /** Btree bulk load finish. We commit the last page in each level 317 and copy the last page in top level to the root page of the index 318 if no error occurs. 319 @param[in] err whether bulk load was successful until now 320 @return error code */ 321 dberr_t finish(dberr_t err); 322 323 /** Release all latches */ 324 void release(); 325 326 /** Re-latch all latches */ 327 void latch(); 328 table_name()329 table_name_t table_name() { return m_index->table->name; } 330 331 private: 332 /** Insert a tuple to a page in a level 333 @param[in] tuple tuple to insert 334 @param[in] level B-tree level 335 @return error code */ 336 dberr_t insert(dtuple_t* tuple, ulint level); 337 338 /** Split a page 339 @param[in] page_bulk page to split 340 @param[in] next_page_bulk next page 341 @return error code */ 342 dberr_t pageSplit(PageBulk* page_bulk, 343 PageBulk* next_page_bulk); 344 345 /** Commit(finish) a page. We set next/prev page no, compress a page of 346 compressed table and split the page if compression fails, insert a node 347 pointer to father page if needed, and commit mini-transaction. 348 @param[in] page_bulk page to commit 349 @param[in] next_page_bulk next page 350 @param[in] insert_father flag whether need to insert node ptr 351 @return error code */ 352 dberr_t pageCommit(PageBulk* page_bulk, 353 PageBulk* next_page_bulk, 354 bool insert_father); 355 356 /** Abort a page when an error occurs 357 @param[in] page_bulk page bulk object 358 Note: we should call pageAbort for a PageBulk object, which is not in 359 m_page_bulks after pageCommit, and we will commit or abort PageBulk 360 objects in function "finish". */ pageAbort(PageBulk * page_bulk)361 void pageAbort(PageBulk* page_bulk) 362 { 363 page_bulk->commit(false); 364 } 365 366 /** Log free check */ 367 inline void logFreeCheck(); 368 369 private: 370 /** B-tree index */ 371 dict_index_t*const m_index; 372 373 /** Transaction */ 374 const trx_t*const m_trx; 375 376 /** Root page level */ 377 ulint m_root_level; 378 379 /** Flush observer, or NULL if redo logging is enabled */ 380 FlushObserver*const m_flush_observer; 381 382 /** Page cursor vector for all level */ 383 page_bulk_vector m_page_bulks; 384 }; 385 386 #endif 387