1 /*****************************************************************************
2 
3 Copyright (c) 2014, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/btr0bulk.h
28  The B-tree bulk load
29 
30  Created 03/11/2014 Shaohua Wang
31  *************************************************************************/
32 
33 #ifndef btr0bulk_h
34 #define btr0bulk_h
35 
36 #include <stddef.h>
37 #include <vector>
38 
39 #include "dict0dict.h"
40 #include "page0cur.h"
41 #include "ut0class_life_cycle.h"
42 #include "ut0new.h"
43 
44 /** Innodb B-tree index fill factor for bulk load. */
45 extern long innobase_fill_factor;
46 
47 /*
48 The proper function call sequence of PageBulk is as below:
49 -- PageBulk::init
50 -- PageBulk::insert
51 -- PageBulk::finish
52 -- PageBulk::compress(COMPRESSED table only)
53 -- PageBulk::pageSplit(COMPRESSED table only)
54 -- PageBulk::commit
55 */
56 
57 class PageBulk : private ut::Non_copyable {
58  public:
59   /** Page split point descriptor. */
60   struct SplitPoint {
61     /** Record being the point of split.
62      * All records before this record should stay on current on page.
63      * This record and all following records should be moved to new page. */
64     rec_t *m_rec;
65     /** Number of records before this record. */
66     ulint m_n_rec_before;
67   };
68 
69   /** Constructor
70   @param[in]	index		B-tree index
71   @param[in]	page_no		page number
72   @param[in]	level		page level
73   @param[in]	trx_id		transaction id
74   @param[in]	observer	flush observer */
PageBulk(dict_index_t * index,trx_id_t trx_id,page_no_t page_no,ulint level,FlushObserver * observer)75   PageBulk(dict_index_t *index, trx_id_t trx_id, page_no_t page_no, ulint level,
76            FlushObserver *observer)
77       : m_heap(nullptr),
78         m_index(index),
79         m_mtr(nullptr),
80         m_trx_id(trx_id),
81         m_block(nullptr),
82         m_page(nullptr),
83         m_page_zip(nullptr),
84         m_cur_rec(nullptr),
85         m_page_no(page_no),
86         m_level(level),
87         m_is_comp(dict_table_is_comp(index->table)),
88         m_heap_top(nullptr),
89         m_rec_no(0),
90         m_free_space(0),
91         m_reserved_space(0),
92         m_padding_space(0),
93 #ifdef UNIV_DEBUG
94         m_total_data(0),
95 #endif /* UNIV_DEBUG */
96         m_modify_clock(0),
97         m_flush_observer(observer),
98         m_last_slotted_rec(nullptr),
99         m_slotted_rec_no(0),
100         m_modified(false) {
101     ut_ad(!dict_index_is_spatial(m_index));
102   }
103 
104   /** Destructor */
~PageBulk()105   ~PageBulk() {
106     if (m_heap) {
107       /* mtr is allocated using heap. */
108       if (m_mtr != nullptr) {
109         m_mtr->~mtr_t();
110       }
111       mem_heap_free(m_heap);
112     }
113   }
114 
115   /** Initialize members and allocate page if needed and start mtr.
116   @note Must be called and only once right after constructor.
117   @return error code */
118   dberr_t init() MY_ATTRIBUTE((warn_unused_result));
119 
120   /** Insert a tuple in the page.
121   @param[in]  tuple     tuple to insert
122   @param[in]  big_rec   external record
123   @param[in]  rec_size  record size
124   @return error code */
125   dberr_t insert(const dtuple_t *tuple, const big_rec_t *big_rec,
126                  ulint rec_size) MY_ATTRIBUTE((warn_unused_result));
127 
128   /** Mark end of insertion to the page. Scan records to set page dirs,
129   and set page header members. The scan is incremental (slots and records
130   which assignment could be "finalized" are not checked again. Check the
131   m_slotted_rec_no usage, note it could be reset in some cases like
132   during split.
133   Note: we refer to page_copy_rec_list_end_to_created_page. */
134   void finish();
135 
136   /** Commit mtr for a page
137   @param[in]	success		Flag whether all inserts succeed. */
138   void commit(bool success);
139 
140   /** Compress if it is compressed table
141   @return	true	compress successfully or no need to compress
142   @return	false	compress failed. */
143   bool compress() MY_ATTRIBUTE((warn_unused_result));
144 
145   /** Check whether the record needs to be stored externally.
146   @return false if the entire record can be stored locally on the page */
147   bool needExt(const dtuple_t *tuple, ulint rec_size) const
148       MY_ATTRIBUTE((warn_unused_result));
149 
150   /** Get node pointer
151   @return node pointer */
152   dtuple_t *getNodePtr();
153 
154   /** Split the page records between this and given bulk.
155    * @param new_page_bulk  The new bulk to store split records. */
156   void split(PageBulk &new_page_bulk);
157 
158   /** Copy all records from page.
159   @param[in]  src_page  Page with records to copy. */
160   void copyAll(const page_t *src_page);
161 
162   /** Set next page
163   @param[in]	next_page_no	next page no */
164   void setNext(page_no_t next_page_no);
165 
166   /** Set previous page
167   @param[in]	prev_page_no	previous page no */
168   void setPrev(page_no_t prev_page_no);
169 
170   /** Release block by committing mtr */
171   inline void release();
172 
173   /** Start mtr and latch block */
174   inline void latch();
175 
176   /** Check if required space is available in the page for the rec
177   to be inserted.	We check fill factor & padding here.
178   @param[in]	rec_size	required space
179   @return true	if space is available */
180   inline bool isSpaceAvailable(ulint rec_size) const;
181 
182   /** Get page no */
getPageNo()183   page_no_t getPageNo() const { return (m_page_no); }
184 
185   /** Get page level */
getLevel()186   ulint getLevel() const { return (m_level); }
187 
188   /** Get record no */
getRecNo()189   ulint getRecNo() const { return (m_rec_no); }
190 
191   /** Get page */
getPage()192   const page_t *getPage() const { return (m_page); }
193 
194   /** Check if table is compressed.
195   @return true if table is compressed, false otherwise. */
isTableCompressed()196   bool isTableCompressed() const { return (m_page_zip != nullptr); }
197 
198 #ifdef UNIV_DEBUG
199   /** Check if index is X locked */
200   bool isIndexXLocked();
201 #endif  // UNIV_DEBUG
202 
203  private:
204   /** Get page split point. We split a page in half when compression
205   fails, and the split record and all following records should be copied
206   to the new page.
207   @return split record descriptor */
208   SplitPoint getSplitRec();
209 
210   /** Copy given and all following records.
211   @param[in]  first_rec  first record to copy */
212   void copyRecords(const rec_t *first_rec);
213 
214   /** Remove all records after split rec including itself.
215   @param[in]  split_point  split point descriptor */
216   void splitTrim(const SplitPoint &split_point);
217 
218   /** Insert a record in the page.
219   @param[in]  rec   record
220   @param[in]  offsets   record offsets */
221   void insert(const rec_t *rec, ulint *offsets);
222 
223   /** Store external record
224   Since the record is not logged yet, so we don't log update to the record.
225   the blob data is logged first, then the record is logged in bulk mode.
226   @param[in]  big_rec   external record
227   @param[in]  offsets   record offsets
228   @return error code */
229   dberr_t storeExt(const big_rec_t *big_rec, ulint *offsets)
230       MY_ATTRIBUTE((warn_unused_result));
231 
232   /** Memory heap for internal allocation */
233   mem_heap_t *m_heap;
234 
235   /** The index B-tree */
236   dict_index_t *m_index;
237 
238   /** The min-transaction */
239   mtr_t *m_mtr;
240 
241   /** The transaction id */
242   trx_id_t m_trx_id;
243 
244   /** The buffer block */
245   buf_block_t *m_block;
246 
247   /** The page */
248   page_t *m_page;
249 
250   /** The page zip descriptor */
251   page_zip_des_t *m_page_zip;
252 
253   /** The current rec, just before the next insert rec */
254   rec_t *m_cur_rec;
255 
256   /** The page no */
257   page_no_t m_page_no;
258 
259   /** The page level in B-tree */
260   ulint m_level;
261 
262   /** Flag: is page in compact format */
263   const bool m_is_comp;
264 
265   /** The heap top in page for next insert */
266   byte *m_heap_top;
267 
268   /** User record no */
269   ulint m_rec_no;
270 
271   /** The free space left in the page */
272   ulint m_free_space;
273 
274   /** The reserved space for fill factor */
275   ulint m_reserved_space;
276 
277   /** The padding space for compressed page */
278   ulint m_padding_space;
279 
280 #ifdef UNIV_DEBUG
281   /** Total data in the page */
282   ulint m_total_data;
283 #endif /* UNIV_DEBUG */
284 
285   /** The modify clock value of the buffer block
286   when the block is re-pinned */
287   ib_uint64_t m_modify_clock;
288 
289   /** Flush observer */
290   FlushObserver *m_flush_observer;
291 
292   /** Last record assigned to a slot. */
293   rec_t *m_last_slotted_rec;
294 
295   /** Number of records assigned to slots. */
296   ulint m_slotted_rec_no;
297 
298   /** Page modified flag. */
299   bool m_modified;
300 };
301 
302 class BtrBulk {
303  public:
304   using page_bulk_vector = std::vector<PageBulk *, ut_allocator<PageBulk *>>;
305 
306   /** Constructor
307   @param[in]	index		B-tree index
308   @param[in]	trx_id		transaction id
309   @param[in]	observer	flush observer */
310   BtrBulk(dict_index_t *index, trx_id_t trx_id, FlushObserver *observer);
311 
312   /** Destructor */
313   ~BtrBulk();
314 
315   /** Initialization
316   @note Must be called right after constructor. */
317   dberr_t init() MY_ATTRIBUTE((warn_unused_result));
318 
319   /** Insert a tuple
320   @param[in]	tuple	tuple to insert.
321   @return error code */
insert(dtuple_t * tuple)322   dberr_t insert(dtuple_t *tuple) MY_ATTRIBUTE((warn_unused_result)) {
323     return (insert(tuple, 0));
324   }
325 
326   /** Btree bulk load finish. We commit the last page in each level
327   and copy the last page in top level to the root page of the index
328   if no error occurs.
329   @param[in]	err	whether bulk load was successful until now
330   @return error code  */
331   dberr_t finish(dberr_t err) MY_ATTRIBUTE((warn_unused_result));
332 
333   /** Release all latches */
334   void release();
335 
336   /** Re-latch all latches */
337   void latch();
338 
339  private:
340   /** Insert a tuple to a page in a level
341   @param[in]	tuple	tuple to insert
342   @param[in]	level	B-tree level
343   @return error code */
344   dberr_t insert(dtuple_t *tuple, ulint level)
345       MY_ATTRIBUTE((warn_unused_result));
346 
347   /** Split a page
348   @param[in]	page_bulk	page to split
349   @param[in]	next_page_bulk	next page
350   @return	error code */
351   dberr_t pageSplit(PageBulk *page_bulk, PageBulk *next_page_bulk)
352       MY_ATTRIBUTE((warn_unused_result));
353 
354   /** Commit(finish) a page. We set next/prev page no, compress a page of
355   compressed table and split the page if compression fails, insert a node
356   pointer to father page if needed, and commit mini-transaction.
357   @param[in]	page_bulk	page to commit
358   @param[in]	next_page_bulk	next page
359   @param[in]	insert_father	flag whether need to insert node ptr
360   @return	error code */
361   dberr_t pageCommit(PageBulk *page_bulk, PageBulk *next_page_bulk,
362                      bool insert_father) MY_ATTRIBUTE((warn_unused_result));
363 
364   /** Abort a page when an error occurs
365   @param[in]	page_bulk	page bulk object
366   @note We should call pageAbort for a PageBulk object, which is not in
367   m_page_bulks after pageCommit, and we will commit or abort PageBulk
368   objects in function "finish". */
pageAbort(PageBulk * page_bulk)369   void pageAbort(PageBulk *page_bulk) { page_bulk->commit(false); }
370 
371   /** Prepare space to insert a tuple.
372   @param[in,out]  page_bulk   page bulk that will be used to store the record.
373                               It may be replaced if there is not enough space
374                               to hold the record.
375   @param[in]  level           B-tree level
376   @param[in]  rec_size        record size
377   @return error code */
378   dberr_t prepareSpace(PageBulk *&page_bulk, ulint level, ulint rec_size)
379       MY_ATTRIBUTE((warn_unused_result));
380 
381   /** Insert a tuple to a page.
382   @param[in]  page_bulk   page bulk object
383   @param[in]  tuple       tuple to insert
384   @param[in]  big_rec     big record vector, maybe NULL if there is no
385                           data to be stored externally.
386   @param[in]  rec_size    record size
387   @return error code */
388   dberr_t insert(PageBulk *page_bulk, dtuple_t *tuple, big_rec_t *big_rec,
389                  ulint rec_size) MY_ATTRIBUTE((warn_unused_result));
390 
391   /** Log free check */
392   void logFreeCheck();
393 
394   /** Btree page bulk load finish. Commits the last page in each level
395   if no error occurs. Also releases all page bulks.
396   @param[in]  err           whether bulk load was successful until now
397   @param[out] last_page_no  last page number
398   @return error code  */
399   dberr_t finishAllPageBulks(dberr_t err, page_no_t &last_page_no)
400       MY_ATTRIBUTE((warn_unused_result));
401 
402  private:
403   /** B-tree index */
404   dict_index_t *m_index;
405 
406   /** Transaction id */
407   trx_id_t m_trx_id;
408 
409   /** Root page level */
410   ulint m_root_level;
411 
412   /** Flush observer */
413   FlushObserver *m_flush_observer;
414 
415   /** Page cursor vector for all level */
416   page_bulk_vector *m_page_bulks;
417 
418 #ifdef UNIV_DEBUG
419   /** State of the index. Used for asserting at the end of a
420   bulk load operation to ensure that the online status of the
421   index does not change */
422   unsigned m_index_online;
423 #endif  // UNIV_DEBUG
424 };
425 
426 #endif
427