1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file fsp/fsp0fsp.cc
28  File space management
29 
30  Created 11/29/1995 Heikki Tuuri
31  ***********************************************************************/
32 
33 #include "fsp0fsp.h"
34 #include "buf0buf.h"
35 #include "fil0fil.h"
36 #include "ha_prototypes.h"
37 #include "mtr0log.h"
38 
39 #include "my_dbug.h"
40 
41 #include "page0page.h"
42 #include "page0zip.h"
43 #include "ut0byte.h"
44 #ifdef UNIV_HOTBACKUP
45 #include "fut0lst.h"
46 #endif /* UNIV_HOTBACKUP */
47 #include <my_aes.h>
48 
49 #ifndef UNIV_HOTBACKUP
50 #include <debug_sync.h>
51 #include "btr0btr.h"
52 #include "btr0sea.h"
53 #include "dict0boot.h"
54 #include "dict0dd.h"
55 #include "fut0fut.h"
56 #include "ibuf0ibuf.h"
57 #include "log0log.h"
58 #include "srv0srv.h"
59 #endif /* !UNIV_HOTBACKUP */
60 #include "dict0mem.h"
61 #include "fsp0sysspace.h"
62 #include "srv0start.h"
63 #include "trx0purge.h"
64 
65 #ifndef UNIV_HOTBACKUP
66 
67 #include "dd/types/tablespace.h"
68 #include "dict0dd.h"
69 #include "sql_backup_lock.h"
70 #include "sql_thd_internal_api.h"
71 #include "thd_raii.h"
72 #include "transaction.h"
73 #include "ut0stage.h"
74 
75 /** DDL records for tablespace (un)encryption. */
76 std::vector<DDL_Record *> ts_encrypt_ddl_records;
77 
78 /** Group of pages to be marked dirty together during (un)encryption. */
79 #define PAGE_GROUP_SIZE 1
80 
81 /** Returns an extent to the free list of a space.
82 @param[in]	page_id		page id in the extent
83 @param[in]	page_size	page size
84 @param[in,out]	mtr		mini-transaction */
85 static void fsp_free_extent(const page_id_t &page_id,
86                             const page_size_t &page_size, mtr_t *mtr);
87 
88 /** Determine if extent belongs to a given segment.
89 @param[in]	descr	extent descriptor
90 @param[in]	seg_id	segment identifier
91 @param[in]	mtr	mini-transaction
92 @return	true if extent is part of the segment, false otherwise */
93 static bool xdes_in_segment(const xdes_t *descr, ib_id_t seg_id, mtr_t *mtr);
94 
95 /** Marks a page used. The page must reside within the extents of the given
96 segment.
97 @param[in]   space_id   tablespace identifier
98 @param[in]   page_size  Size of each page in the tablespace.
99 @param[in]   seg_inode  the file segment inode pointer
100 @param[in]   page       the page number to be marked as used.
101 @param[in]   descr      extent descriptor containing information about page.
102 @param[in]   mtr        mini transaction context for modification. */
103 static void fseg_mark_page_used(space_id_t space_id,
104                                 const page_size_t &page_size,
105                                 fseg_inode_t *seg_inode, page_no_t page,
106                                 xdes_t *descr, mtr_t *mtr);
107 
108 /** Returns the first extent descriptor for a segment.
109 We think of the extent lists of the segment catenated in the order
110 FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
111 @param[in]	inode		segment inode
112 @param[in]	space_id	space id
113 @param[in]	page_size	page size
114 @param[in,out]	mtr		mini-transaction
115 @return the first extent descriptor, or NULL if none */
116 static xdes_t *fseg_get_first_extent(fseg_inode_t *inode, space_id_t space_id,
117                                      const page_size_t &page_size, mtr_t *mtr);
118 
119 /** Put new extents to the free list if there are free extents above the free
120 limit. If an extent happens to contain an extent descriptor page, the extent
121 is put to the FSP_FREE_FRAG list with the page marked as used.
122 @param[in]	init_space	true if this is a single-table tablespace
123 and we are only initializing the first extent and the first bitmap pages;
124 then we will not allocate more extents
125 @param[in,out]	space		tablespace
126 @param[in,out]	header		tablespace header
127 @param[in,out]	mtr		mini-transaction */
128 static UNIV_COLD void fsp_fill_free_list(bool init_space, fil_space_t *space,
129                                          fsp_header_t *header, mtr_t *mtr);
130 
131 /** Allocates a single free page from a segment.
132 This function implements the intelligent allocation strategy which tries
133 to minimize file space fragmentation.
134 @param[in,out]	space			tablespace
135 @param[in]	page_size		page size
136 @param[in,out]	seg_inode		segment inode
137 @param[in]	hint			hint of which page would be desirable
138 @param[in]	direction		if the new page is needed because of
139 an index page split, and records are inserted there in order, into which
140 direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
141 @param[in]	rw_latch		RW_SX_LATCH, RW_X_LATCH
142 @param[in,out]	mtr			mini-transaction
143 @param[in,out]	init_mtr		mtr or another mini-transaction in
144 which the page should be initialized. If init_mtr != mtr, but the page is
145 already latched in mtr, do not initialize the page */
146 #ifdef UNIV_DEBUG
147 /**
148 @param[in]	has_done_reservation	TRUE if the space has already been
149 reserved, in this case we will never return NULL */
150 #endif /* UNIV_DEBUG */
151 /**
152 @retval NULL	if no page could be allocated
153 @retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
154 (init_mtr == mtr, or the page was not previously freed in mtr)
155 @retval block	(not allocated or initialized) otherwise */
156 static buf_block_t *fseg_alloc_free_page_low(
157     fil_space_t *space, const page_size_t &page_size, fseg_inode_t *seg_inode,
158     page_no_t hint, byte direction, rw_lock_type_t rw_latch, mtr_t *mtr,
159     mtr_t *init_mtr
160 #ifdef UNIV_DEBUG
161     ,
162     ibool has_done_reservation
163 #endif /* UNIV_DEBUG */
164     ) MY_ATTRIBUTE((warn_unused_result));
165 #endif /* !UNIV_HOTBACKUP */
166 
167 /** Get the segment identifier to which the extent belongs to.
168 @param[in]	descr	extent descriptor
169 @return	the segment identifier */
xdes_get_segment_id(const xdes_t * descr)170 inline ib_id_t xdes_get_segment_id(const xdes_t *descr) {
171   return (mach_read_from_8(descr + XDES_ID));
172 }
173 
174 /** Get the segment identifier to which the extent belongs to.
175 @param[in]	descr	extent descriptor
176 @param[in]	mtr	mini-transaction
177 @return	the segment identifier */
xdes_get_segment_id(const xdes_t * descr,mtr_t * mtr)178 inline ib_id_t xdes_get_segment_id(const xdes_t *descr, mtr_t *mtr) {
179 #ifndef UNIV_HOTBACKUP
180   ut_ad(mtr_memo_contains_page_flagged(
181       mtr, descr,
182       MTR_MEMO_PAGE_S_FIX | MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
183 #endif /* !UNIV_HOTBACKUP */
184 
185   return (xdes_get_segment_id(descr));
186 }
187 
188 #ifndef UNIV_HOTBACKUP
189 /** Gets a pointer to the space header and x-locks its page.
190 @param[in]	id		space id
191 @param[in]	page_size	page size
192 @param[in,out]	mtr		mini-transaction
193 @return pointer to the space header, page x-locked */
fsp_get_space_header(space_id_t id,const page_size_t & page_size,mtr_t * mtr)194 fsp_header_t *fsp_get_space_header(space_id_t id, const page_size_t &page_size,
195                                    mtr_t *mtr) {
196   buf_block_t *block;
197   fsp_header_t *header;
198 
199   ut_ad(id != 0 || !page_size.is_compressed());
200 
201   block = buf_page_get(page_id_t(id, 0), page_size, RW_SX_LATCH, mtr);
202   header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
203   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
204 
205   ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
206 #ifdef UNIV_DEBUG
207   const uint32_t flags = mach_read_from_4(FSP_SPACE_FLAGS + header);
208   ut_ad(page_size_t(flags).equals_to(page_size));
209 #endif /* UNIV_DEBUG */
210   return (header);
211 }
212 
213 /** Convert a 32 bit integer tablespace flags to the 32 bit table flags.
214 This can only be done for a tablespace that was built as a file-per-table
215 tablespace. Note that the fsp_flags cannot show the difference between a
216 Compact and Redundant table, so an extra Compact boolean must be supplied.
217                         Low order bit
218                     | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
219 fil_space_t::flags  |     0     |    0    |     1      |    1
220 dict_table_t::flags |     0     |    1    |     1      |    1
221 @param[in]	fsp_flags	fil_space_t::flags
222 @param[in]	compact		true if not Redundant row format
223 @return tablespace flags (fil_space_t::flags) */
fsp_flags_to_dict_tf(uint32_t fsp_flags,bool compact)224 uint32_t fsp_flags_to_dict_tf(uint32_t fsp_flags, bool compact) {
225   /* If the table in this file-per-table tablespace is Compact
226   row format, the low order bit will not indicate Compact. */
227   bool post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
228   ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
229   bool atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
230   bool data_dir = FSP_FLAGS_HAS_DATA_DIR(fsp_flags);
231   bool shared_space = FSP_FLAGS_GET_SHARED(fsp_flags);
232   /* FSP_FLAGS_GET_TEMPORARY(fsp_flags) does not have an equivalent
233   flag position in the table flags. But it would go into flags2 if
234   any code is created where that is needed. */
235 
236   uint32_t flags = dict_tf_init(post_antelope | compact, zip_ssize,
237                                 atomic_blobs, data_dir, shared_space);
238 
239   return (flags);
240 }
241 #endif /* !UNIV_HOTBACKUP */
242 
243 /** Check if tablespace is dd tablespace.
244 @param[in]      space_id        tablespace ID
245 @return true if tablespace is dd tablespace. */
fsp_is_dd_tablespace(space_id_t space_id)246 bool fsp_is_dd_tablespace(space_id_t space_id) {
247   return (space_id == dict_sys_t::s_space_id);
248 }
249 
250 /** Check whether a space id is an undo tablespace ID
251 Undo tablespaces have space_id's starting 1 less than the redo logs.
252 They are numbered down from this.  Since rseg_id=0 always refers to the
253 system tablespace, undo_space_num values start at 1.  The current limit
254 is 127. The translation from an undo_space_num is:
255    undo space_id = log_first_space_id - undo_space_num
256 @param[in]	space_id	space id to check
257 @return true if it is undo tablespace else false. */
fsp_is_undo_tablespace(space_id_t space_id)258 bool fsp_is_undo_tablespace(space_id_t space_id) {
259   /* Starting with v8, undo space_ids have a unique range. */
260   if (space_id >= dict_sys_t::s_min_undo_space_id &&
261       space_id <= dict_sys_t::s_max_undo_space_id) {
262     return (true);
263   }
264 
265   /* If upgrading from 5.7, there may be a list of old-style
266   undo tablespaces.  Search them. */
267   if (trx_sys_undo_spaces != nullptr) {
268     return (trx_sys_undo_spaces->contains(space_id));
269   }
270 
271   return (false);
272 }
273 
274 /** Check if tablespace is global temporary.
275 @param[in]	space_id	tablespace ID
276 @return true if tablespace is global temporary. */
fsp_is_global_temporary(space_id_t space_id)277 bool fsp_is_global_temporary(space_id_t space_id) {
278   return (space_id == srv_tmp_space.space_id());
279 }
280 
281 /** Check if the tablespace is session temporary.
282 @param[in]      space_id        tablespace ID
283 @return true if tablespace is a session temporary tablespace. */
fsp_is_session_temporary(space_id_t space_id)284 bool fsp_is_session_temporary(space_id_t space_id) {
285   return (space_id > dict_sys_t::s_min_temp_space_id &&
286           space_id <= dict_sys_t::s_max_temp_space_id);
287 }
288 
289 /** Check if tablespace is system temporary.
290 @param[in]	space_id	tablespace ID
291 @return true if tablespace is system temporary. */
fsp_is_system_temporary(space_id_t space_id)292 bool fsp_is_system_temporary(space_id_t space_id) {
293   return (fsp_is_global_temporary(space_id) ||
294           fsp_is_session_temporary(space_id));
295 }
296 
297 /** Check if checksum is disabled for the given space.
298 @param[in]	space_id	tablespace ID
299 @return true if checksum is disabled for given space. */
fsp_is_checksum_disabled(space_id_t space_id)300 bool fsp_is_checksum_disabled(space_id_t space_id) {
301   return (fsp_is_system_temporary(space_id));
302 }
303 
304 #ifndef UNIV_HOTBACKUP
305 #ifdef UNIV_DEBUG
306 
307 /** Skip some of the sanity checks that are time consuming even in debug mode
308 and can affect frequent verification runs that are done to ensure stability of
309 the product.
310 @return true if check should be skipped for given space. */
fsp_skip_sanity_check(space_id_t space_id)311 bool fsp_skip_sanity_check(space_id_t space_id) {
312   return (srv_skip_temp_table_checks_debug &&
313           fsp_is_system_temporary(space_id));
314 }
315 
316 #endif /* UNIV_DEBUG */
317 
318 /** Gets a descriptor bit of a page.
319  @return true if free */
320 UNIV_INLINE
xdes_mtr_get_bit(const xdes_t * descr,ulint bit,page_no_t offset,mtr_t * mtr)321 ibool xdes_mtr_get_bit(const xdes_t *descr, /*!< in: descriptor */
322                        ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
323                        page_no_t offset, /*!< in: page offset within extent:
324                                          0 ... FSP_EXTENT_SIZE - 1 */
325                        mtr_t *mtr)       /*!< in: mini-transaction */
326 {
327   ut_ad(mtr->is_active());
328   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
329 
330   return (xdes_get_bit(descr, bit, offset));
331 }
332 
333 /** Sets a descriptor bit of a page. */
334 UNIV_INLINE
xdes_set_bit(xdes_t * descr,ulint bit,page_no_t offset,ibool val,mtr_t * mtr)335 void xdes_set_bit(xdes_t *descr,    /*!< in: descriptor */
336                   ulint bit,        /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
337                   page_no_t offset, /*!< in: page offset within extent:
338                                     0 ... FSP_EXTENT_SIZE - 1 */
339                   ibool val,        /*!< in: bit value */
340                   mtr_t *mtr)       /*!< in/out: mini-transaction */
341 {
342   ulint index;
343   ulint byte_index;
344   ulint bit_index;
345   ulint descr_byte;
346 
347   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
348   ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
349   ut_ad(offset < FSP_EXTENT_SIZE);
350 
351   index = bit + XDES_BITS_PER_PAGE * offset;
352 
353   byte_index = index / 8;
354   bit_index = index % 8;
355 
356   descr_byte = mach_read_from_1(descr + XDES_BITMAP + byte_index);
357   descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
358 
359   mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte, MLOG_1BYTE,
360                    mtr);
361 }
362 
363 /** Looks for a descriptor bit having the desired value. Starts from hint
364  and scans upward; at the end of the extent the search is wrapped to
365  the start of the extent.
366  @return bit index of the bit, ULINT_UNDEFINED if not found */
367 UNIV_INLINE
xdes_find_bit(xdes_t * descr,ulint bit,ibool val,page_no_t hint,mtr_t * mtr)368 page_no_t xdes_find_bit(xdes_t *descr, /*!< in: descriptor */
369                         ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
370                         ibool val, /*!< in: desired bit value */
371                         page_no_t hint, /*!< in: hint of which bit position
372                                         would be desirable */
373                         mtr_t *mtr)     /*!< in/out: mini-transaction */
374 {
375   page_no_t i;
376 
377   ut_ad(descr && mtr);
378   ut_ad(val <= TRUE);
379   ut_ad(hint < FSP_EXTENT_SIZE);
380   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
381   for (i = hint; i < FSP_EXTENT_SIZE; i++) {
382     if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
383       return (i);
384     }
385   }
386 
387   for (i = 0; i < hint; i++) {
388     if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
389       return (i);
390     }
391   }
392 
393   return (FIL_NULL);
394 }
395 
396 /** Returns the number of used pages in a descriptor.
397  @return number of pages used */
398 UNIV_INLINE
xdes_get_n_used(const xdes_t * descr,mtr_t * mtr)399 page_no_t xdes_get_n_used(const xdes_t *descr, /*!< in: descriptor */
400                           mtr_t *mtr)          /*!< in/out: mini-transaction */
401 {
402   page_no_t count = 0;
403 
404   ut_ad(descr && mtr);
405   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
406   for (page_no_t i = 0; i < FSP_EXTENT_SIZE; ++i) {
407     if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
408       count++;
409     }
410   }
411 
412   return (count);
413 }
414 
415 #ifdef UNIV_DEBUG
416 /** Check if the state of extent descriptor is valid.
417 @param[in]	state	the extent descriptor state
418 @return	true if state is valid, false otherwise */
xdes_state_is_valid(ulint state)419 bool xdes_state_is_valid(ulint state) {
420   switch (state) {
421     case XDES_NOT_INITED:
422     case XDES_FREE:
423     case XDES_FREE_FRAG:
424     case XDES_FULL_FRAG:
425     case XDES_FSEG:
426     case XDES_FSEG_FRAG:
427       return (true);
428   }
429   return (false);
430 }
431 #endif /* UNIV_DEBUG */
432 
433 /** Returns true if extent contains no used pages.
434  @return true if totally free */
435 UNIV_INLINE
xdes_is_free(const xdes_t * descr,mtr_t * mtr)436 ibool xdes_is_free(const xdes_t *descr, /*!< in: descriptor */
437                    mtr_t *mtr)          /*!< in/out: mini-transaction */
438 {
439   if (0 == xdes_get_n_used(descr, mtr)) {
440     ut_ad(xdes_get_state(descr, mtr) != XDES_FSEG_FRAG);
441 
442     return (TRUE);
443   }
444 
445   return (FALSE);
446 }
447 
448 /** Returns true if extent contains no free pages.
449  @return true if full */
450 UNIV_INLINE
xdes_is_full(const xdes_t * descr,mtr_t * mtr)451 ibool xdes_is_full(const xdes_t *descr, /*!< in: descriptor */
452                    mtr_t *mtr)          /*!< in/out: mini-transaction */
453 {
454   if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
455     return (TRUE);
456   }
457 
458   return (FALSE);
459 }
460 
461 /** Sets the state of an xdes. */
462 UNIV_INLINE
xdes_set_state(xdes_t * descr,xdes_state_t state,mtr_t * mtr)463 void xdes_set_state(xdes_t *descr,      /*!< in/out: descriptor */
464                     xdes_state_t state, /*!< in: state to set */
465                     mtr_t *mtr)         /*!< in/out: mini-transaction */
466 {
467   ut_ad(descr && mtr);
468   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
469 
470 #ifdef UNIV_DEBUG
471   switch (xdes_get_state(descr, mtr)) {
472     case XDES_FREE:
473       ut_ad(state == XDES_FSEG || state == XDES_FREE_FRAG);
474       break;
475     case XDES_FREE_FRAG:
476       ut_ad(state == XDES_FULL_FRAG || state == XDES_FSEG_FRAG ||
477             state == XDES_FREE);
478       break;
479     case XDES_FULL_FRAG:
480       ut_ad(state == XDES_FREE_FRAG);
481       break;
482     case XDES_FSEG:
483       ut_ad(state == XDES_FREE);
484       break;
485     case XDES_FSEG_FRAG:
486       ut_ad(state == XDES_FREE_FRAG || state == XDES_FULL_FRAG ||
487             state == XDES_FREE);
488       break;
489     case XDES_NOT_INITED:
490       /* The state is not yet initialized. */
491       ut_ad(state == XDES_FREE);
492       break;
493   }
494 #endif /* UNIV_DEBUG */
495 
496   mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
497 }
498 
499 /** Update the segment identifier to which the extent belongs to.
500 @param[in,out]	descr	extent descriptor
501 @param[in,out]	seg_id	segment identifier
502 @param[in]	state	state of the extent.
503 @param[in,out]	mtr	mini-transaction. */
xdes_set_segment_id(xdes_t * descr,const ib_id_t seg_id,xdes_state_t state,mtr_t * mtr)504 inline void xdes_set_segment_id(xdes_t *descr, const ib_id_t seg_id,
505                                 xdes_state_t state, mtr_t *mtr) {
506   ut_ad(mtr != nullptr);
507   mlog_write_ull(descr + XDES_ID, seg_id, mtr);
508   xdes_set_state(descr, state, mtr);
509 }
510 
511 /** Inits an extent descriptor to the free and clean state. */
512 UNIV_INLINE
xdes_init(xdes_t * descr,mtr_t * mtr)513 void xdes_init(xdes_t *descr, /*!< in: descriptor */
514                mtr_t *mtr)    /*!< in/out: mini-transaction */
515 {
516   ulint i;
517 
518   ut_ad(descr && mtr);
519   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
520   ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
521 
522   xdes_set_segment_id(descr, 0, XDES_FREE, mtr);
523   flst_write_addr(descr + XDES_FLST_NODE + FLST_PREV, fil_addr_null, mtr);
524   flst_write_addr(descr + XDES_FLST_NODE + FLST_NEXT, fil_addr_null, mtr);
525 
526   for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
527     mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
528   }
529 }
530 
531 /** Get pointer to a the extent descriptor of a page.
532 @param[in,out]	sp_header	tablespace header page, x-latched
533 @param[in]	space		tablespace identifier
534 @param[in]	offset		page offset
535 @param[in,out]	mtr		mini-transaction
536 @param[in]	init_space	whether the tablespace is being initialized
537 @param[out]	desc_block	descriptor block, or NULL if it is
538 the same as the tablespace header
539 @return pointer to the extent descriptor, NULL if the page does not
540 exist in the space or if the offset exceeds free limit */
UNIV_INLINE(warn_unused_result)541 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) xdes_t
542     *xdes_get_descriptor_with_space_hdr(fsp_header_t *sp_header,
543                                         space_id_t space, page_no_t offset,
544                                         mtr_t *mtr, bool init_space = false,
545                                         buf_block_t **desc_block = nullptr) {
546   ulint limit;
547   ulint size;
548   page_no_t descr_page_no;
549   uint32_t flags;
550   page_t *descr_page;
551 #ifdef UNIV_DEBUG
552   const fil_space_t *fspace = fil_space_get(space);
553   ut_ad(fspace != nullptr);
554 #endif /* UNIV_DEBUG */
555   ut_ad(mtr_memo_contains(mtr, &fspace->latch, MTR_MEMO_X_LOCK));
556   ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_SX_FIX));
557   ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
558   /* Read free limit and space size */
559   limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
560   size = mach_read_from_4(sp_header + FSP_SIZE);
561   flags = mach_read_from_4(sp_header + FSP_SPACE_FLAGS);
562   ut_ad(limit == fspace->free_limit ||
563         (fspace->free_limit == 0 &&
564          (init_space || fspace->purpose == FIL_TYPE_TEMPORARY ||
565           (srv_startup_is_before_trx_rollback_phase &&
566            fsp_is_undo_tablespace(fspace->id)))));
567   ut_ad(size == fspace->size_in_header);
568 #ifdef UNIV_DEBUG
569   /* Exclude Encryption flag as it might have been changed In Memory flags but
570   not on disk. */
571   ut_ad(!((flags ^ fspace->flags) & ~(FSP_FLAGS_MASK_ENCRYPTION)));
572 #endif /* UNIV_DEBUG */
573 
574   if ((offset >= size) || (offset >= limit)) {
575     return (nullptr);
576   }
577 
578   const page_size_t page_size(flags);
579 
580   descr_page_no = xdes_calc_descriptor_page(page_size, offset);
581 
582   buf_block_t *block;
583 
584   if (descr_page_no == 0) {
585     /* It is on the space header page */
586 
587     descr_page = page_align(sp_header);
588     block = nullptr;
589   } else {
590     block = buf_page_get(page_id_t(space, descr_page_no), page_size,
591                          RW_SX_LATCH, mtr);
592 
593     buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
594 
595     descr_page = buf_block_get_frame(block);
596   }
597 
598   if (desc_block != nullptr) {
599     *desc_block = block;
600   }
601 
602   return (descr_page + XDES_ARR_OFFSET +
603           XDES_SIZE * xdes_calc_descriptor_index(page_size, offset));
604 }
605 
606 /** Gets pointer to a the extent descriptor of a page.
607 The page where the extent descriptor resides is x-locked. If the page offset
608 is equal to the free limit of the space, adds new extents from above the free
609 limit to the space free list, if not free limit == space size. This adding
610 is necessary to make the descriptor defined, as they are uninitialized
611 above the free limit.
612 @param[in]	space_id	space id
613 @param[in]	offset		page offset; if equal to the free limit, we
614 try to add new extents to the space free list
615 @param[in]	page_size	page size
616 @param[in,out]	mtr		mini-transaction
617 @return pointer to the extent descriptor, NULL if the page does not
618 exist in the space or if the offset exceeds the free limit */
xdes_get_descriptor(space_id_t space_id,page_no_t offset,const page_size_t & page_size,mtr_t * mtr)619 static MY_ATTRIBUTE((warn_unused_result)) xdes_t *xdes_get_descriptor(
620     space_id_t space_id, page_no_t offset, const page_size_t &page_size,
621     mtr_t *mtr) {
622   buf_block_t *block;
623   fsp_header_t *sp_header;
624 
625   block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
626 
627   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
628 
629   sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
630   return (xdes_get_descriptor_with_space_hdr(sp_header, space_id, offset, mtr));
631 }
632 
633 /** Gets pointer to a the extent descriptor if the file address of the
634 descriptor list node is known. The page where the extent descriptor resides
635 is x-locked.
636 @param[in]	space		space id
637 @param[in]	page_size	page size
638 @param[in]	lst_node	file address of the list node contained in the
639                                 descriptor
640 @param[in,out]	mtr		mini-transaction
641 @return pointer to the extent descriptor */
642 UNIV_INLINE
xdes_lst_get_descriptor(space_id_t space,const page_size_t & page_size,fil_addr_t lst_node,mtr_t * mtr)643 xdes_t *xdes_lst_get_descriptor(space_id_t space, const page_size_t &page_size,
644                                 fil_addr_t lst_node, mtr_t *mtr) {
645   xdes_t *descr;
646 
647   ut_ad(mtr);
648   ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
649 
650   descr = fut_get_ptr(space, page_size, lst_node, RW_SX_LATCH, mtr) -
651           XDES_FLST_NODE;
652 
653   return (descr);
654 }
655 
656 /** Returns page offset of the first page in extent described by a descriptor.
657  @return offset of the first page in extent */
658 UNIV_INLINE
xdes_get_offset(const xdes_t * descr)659 page_no_t xdes_get_offset(const xdes_t *descr) /*!< in: extent descriptor */
660 {
661   ut_ad(descr);
662 
663   return (page_get_page_no(page_align(descr)) +
664           static_cast<page_no_t>(
665               ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE) *
666               FSP_EXTENT_SIZE));
667 }
668 #endif /* !UNIV_HOTBACKUP */
669 
670 /** Inits a file page whose prior contents should be ignored. */
fsp_init_file_page_low(buf_block_t * block)671 static void fsp_init_file_page_low(
672     buf_block_t *block) /*!< in: pointer to a page */
673 {
674   page_t *page = buf_block_get_frame(block);
675   page_zip_des_t *page_zip = buf_block_get_page_zip(block);
676 
677   if (!fsp_is_system_temporary(block->page.id.space())) {
678     memset(page, 0, UNIV_PAGE_SIZE);
679   }
680 
681   mach_write_to_4(page + FIL_PAGE_OFFSET, block->page.id.page_no());
682   mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
683                   block->page.id.space());
684 
685   /* Reset FRAME LSN, which otherwise points to the LSN of the last
686   page that used this buffer block. This is needed by CLONE for
687   tracking dirty pages. */
688   memset(page + FIL_PAGE_LSN, 0, 8);
689 
690   if (page_zip) {
691     memset(page_zip->data, 0, page_zip_get_size(page_zip));
692     memcpy(page_zip->data + FIL_PAGE_OFFSET, page + FIL_PAGE_OFFSET, 4);
693     memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
694            page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
695     memcpy(page_zip->data + FIL_PAGE_LSN, page + FIL_PAGE_LSN, 8);
696   }
697 }
698 
699 #ifndef UNIV_HOTBACKUP
700 #ifdef UNIV_DEBUG
701 /** Assert that the mini-transaction is compatible with
702 updating an allocation bitmap page.
703 @param[in]	id	tablespace identifier
704 @param[in]	mtr	mini-transaction */
fsp_space_modify_check(space_id_t id,const mtr_t * mtr)705 static void fsp_space_modify_check(space_id_t id, const mtr_t *mtr) {
706   ut_ad(mtr);
707   switch (mtr->get_log_mode()) {
708     case MTR_LOG_SHORT_INSERTS:
709       /* These modes are only allowed within a non-bitmap page
710       when there is a higher-level redo log record written. */
711       break;
712 
713     case MTR_LOG_NONE:
714       /* We allow MTR_LOG_NONE to be set over MTR_LOG_NO_REDO. */
715       if (!mtr_t::s_logging.is_enabled()) {
716         return;
717       }
718       break;
719 
720     case MTR_LOG_NO_REDO:
721 #ifdef UNIV_DEBUG
722     {
723       const fil_type_t type = fil_space_get_type(id);
724       ut_a(fsp_is_system_temporary(id) || !mtr_t::s_logging.is_enabled() ||
725            fil_space_get_flags(id) == UINT32_UNDEFINED ||
726            type == FIL_TYPE_TEMPORARY || type == FIL_TYPE_IMPORT ||
727            fil_space_is_redo_skipped(id) || !undo::is_active(id, false));
728     }
729 #endif /* UNIV_DEBUG */
730       return;
731     case MTR_LOG_ALL:
732       /* We must not write redo log for the shared temporary
733       tablespace. */
734       ut_ad(!fsp_is_system_temporary(id));
735       /* If we write redo log, the tablespace must exist. */
736       ut_ad(fil_space_get_type(id) == FIL_TYPE_TABLESPACE);
737       return;
738 
739     default:
740       break;
741   }
742 
743   ut_ad(0);
744 }
745 #endif /* UNIV_DEBUG */
746 
747 /** Initialize a file page.
748 @param[in,out]	block	file page
749 @param[in,out]	mtr	mini-transaction */
fsp_init_file_page(buf_block_t * block,mtr_t * mtr)750 static void fsp_init_file_page(buf_block_t *block, mtr_t *mtr) {
751   fsp_init_file_page_low(block);
752 
753   ut_d(fsp_space_modify_check(block->page.id.space(), mtr));
754   mlog_write_initial_log_record(buf_block_get_frame(block),
755                                 MLOG_INIT_FILE_PAGE2, mtr);
756 }
757 #endif /* !UNIV_HOTBACKUP */
758 
759 /** Parses a redo log record of a file page init.
760  @return end of log record or NULL */
fsp_parse_init_file_page(byte * ptr,byte * end_ptr MY_ATTRIBUTE ((unused)),buf_block_t * block)761 byte *fsp_parse_init_file_page(
762     byte *ptr,                            /*!< in: buffer */
763     byte *end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
764     buf_block_t *block)                   /*!< in: block or NULL */
765 {
766   ut_ad(ptr != nullptr);
767   ut_ad(end_ptr != nullptr);
768 
769   if (block) {
770     fsp_init_file_page_low(block);
771   }
772 
773   return (ptr);
774 }
775 
776 /** Initializes the fsp system. */
fsp_init()777 void fsp_init() {
778   /* FSP_EXTENT_SIZE must be a multiple of page & zip size */
779   ut_a(UNIV_PAGE_SIZE > 0);
780   ut_a(0 == (UNIV_PAGE_SIZE % FSP_EXTENT_SIZE));
781 
782   static_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX),
783                 "UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX != 0");
784 
785   static_assert(!(UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN),
786                 "UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN != 0");
787 
788   /* Does nothing at the moment */
789 }
790 
791 /** Writes the space id and flags to a tablespace header.  The flags contain
792  row type, physical/compressed page size, and logical/uncompressed page
793  size of the tablespace. */
fsp_header_init_fields(page_t * page,space_id_t space_id,uint32_t flags)794 void fsp_header_init_fields(
795     page_t *page,        /*!< in/out: first page in the space */
796     space_id_t space_id, /*!< in: space id */
797     uint32_t flags)      /*!< in: tablespace flags
798                       (FSP_SPACE_FLAGS) */
799 {
800   ut_a(fsp_flags_is_valid(flags));
801 
802   mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page, space_id);
803   mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page, flags);
804 }
805 
806 /** Get the offset of encrytion information in page 0.
807 @param[in]	page_size	page size.
808 @return	offset on success, otherwise 0. */
fsp_header_get_encryption_offset(const page_size_t & page_size)809 ulint fsp_header_get_encryption_offset(const page_size_t &page_size) {
810   ulint offset;
811 #ifdef UNIV_DEBUG
812   ulint left_size;
813 #endif
814 
815   offset = XDES_ARR_OFFSET + XDES_SIZE * xdes_arr_size(page_size);
816 #ifdef UNIV_DEBUG
817   left_size =
818       page_size.physical() - FSP_HEADER_OFFSET - offset - FIL_PAGE_DATA_END;
819 
820   ut_ad(left_size >= Encryption::INFO_SIZE);
821 #endif
822 
823   return offset;
824 }
825 
826 #ifndef UNIV_HOTBACKUP
827 /** Write the (un)encryption progress info into the space header.
828 @param[in]      space_id		tablespace id
829 @param[in]      space_flags		tablespace flags
830 @param[in]      progress_info		max pages (un)encrypted
831 @param[in]      operation_type		Type of operation
832 @param[in]      update_operation_type   is operation to be updated
833 @param[in,out]	mtr			mini-transaction
834 @return true if success. */
fsp_header_write_encryption_progress(space_id_t space_id,uint32_t space_flags,ulint progress_info,byte operation_type,bool update_operation_type,mtr_t * mtr)835 bool fsp_header_write_encryption_progress(
836     space_id_t space_id, uint32_t space_flags, ulint progress_info,
837     byte operation_type, bool update_operation_type, mtr_t *mtr) {
838   buf_block_t *block;
839   ulint offset;
840 
841   const page_size_t page_size(space_flags);
842 
843   /* Save the encryption info to the page 0. */
844   block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
845 
846   if (block == nullptr) {
847     return false;
848   }
849 
850   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
851   ut_ad(space_id == page_get_space_id(buf_block_get_frame(block)));
852 
853   offset = fsp_header_get_encryption_progress_offset(page_size);
854   ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
855 
856   page_t *page = buf_block_get_frame(block);
857 
858   /* Write operation type if needed */
859   if (update_operation_type) {
860     mlog_write_ulint(page + offset, operation_type, MLOG_1BYTE, mtr);
861   }
862 
863   mlog_write_ulint(page + offset + Encryption::OPERATION_INFO_SIZE,
864                    progress_info, MLOG_4BYTES, mtr);
865   return (true);
866 }
867 
868 /** Get encryption operation type in progress from the first
869 page of a tablespace.
870 @param[in]	page		first page of a tablespace
871 @param[in]	page_size	tablespace page size
872 @return encryption operation
873 */
fsp_header_encryption_op_type_in_progress(const page_t * page,page_size_t page_size)874 encryption_op_type fsp_header_encryption_op_type_in_progress(
875     const page_t *page, page_size_t page_size) {
876   ulint offset;
877   encryption_op_type op;
878   offset = fsp_header_get_encryption_progress_offset(page_size);
879   ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
880 
881   /* Read operation type (1 byte) */
882   byte operation = mach_read_from_1(page + offset);
883   switch (operation) {
884     case Encryption::ENCRYPT_IN_PROGRESS:
885       op = ENCRYPTION;
886       break;
887     case Encryption::DECRYPT_IN_PROGRESS:
888       op = DECRYPTION;
889       break;
890     default:
891       op = NONE;
892       break;
893   }
894 
895   return (op);
896 }
897 
898 /** Write the encryption info into the space header.
899 @param[in]      space_id		tablespace id
900 @param[in]      space_flags		tablespace flags
901 @param[in]      encrypt_info		buffer for re-encrypt key
902 @param[in]      update_fsp_flags	if it need to update the space flags
903 @param[in]      rotate_encryption	if it is called during key rotation
904 @param[in,out]	mtr			mini-transaction
905 @return true if success. */
fsp_header_write_encryption(space_id_t space_id,uint32_t space_flags,byte * encrypt_info,bool update_fsp_flags,bool rotate_encryption,mtr_t * mtr)906 bool fsp_header_write_encryption(space_id_t space_id, uint32_t space_flags,
907                                  byte *encrypt_info, bool update_fsp_flags,
908                                  bool rotate_encryption, mtr_t *mtr) {
909   buf_block_t *block;
910   ulint offset;
911   page_t *page;
912   uint32_t master_key_id;
913 
914   const page_size_t page_size(space_flags);
915 
916   /* Save the encryption info to the page 0. */
917   block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
918   if (block == nullptr) {
919     return (false);
920   }
921 
922   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
923   ut_ad(space_id == page_get_space_id(buf_block_get_frame(block)));
924 
925   offset = fsp_header_get_encryption_offset(page_size);
926   ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
927 
928   page = buf_block_get_frame(block);
929 
930   /* Write the new fsp flags into be update to the header if needed */
931   if (update_fsp_flags) {
932     mlog_write_ulint(page + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS, space_flags,
933                      MLOG_4BYTES, mtr);
934   }
935 
936   if (rotate_encryption) {
937     /* If called during recovery, skip all tablespaces which have updated
938     master_key_id. */
939     master_key_id = mach_read_from_4(page + offset + Encryption::MAGIC_SIZE);
940     if (srv_is_being_started &&
941         master_key_id == Encryption::get_master_key_id()) {
942       ut_ad(memcmp(page + offset, Encryption::KEY_MAGIC_V1,
943                    Encryption::MAGIC_SIZE) == 0 ||
944             memcmp(page + offset, Encryption::KEY_MAGIC_V2,
945                    Encryption::MAGIC_SIZE) == 0 ||
946             memcmp(page + offset, Encryption::KEY_MAGIC_V3,
947                    Encryption::MAGIC_SIZE) == 0);
948       return (true);
949     }
950   }
951 
952   /* Write encryption info passed */
953   mlog_write_string(page + offset, encrypt_info, Encryption::INFO_SIZE, mtr);
954 
955   return (true);
956 }
957 
958 /** Rotate the encryption info in the space header.
959 @param[in]	space		tablespace
960 @param[in]      encrypt_info	buffer for re-encrypt key.
961 @param[in,out]	mtr		mini-transaction
962 @return true if success. */
fsp_header_rotate_encryption(fil_space_t * space,byte * encrypt_info,mtr_t * mtr)963 bool fsp_header_rotate_encryption(fil_space_t *space, byte *encrypt_info,
964                                   mtr_t *mtr) {
965   ut_ad(mtr);
966   ut_ad(space->encryption_type != Encryption::NONE);
967 
968   DBUG_EXECUTE_IF("fsp_header_rotate_encryption_failure", return (false););
969 
970   /* Fill encryption info. */
971   if (!Encryption::fill_encryption_info(space->encryption_key,
972                                         space->encryption_iv, encrypt_info,
973                                         false, true)) {
974     return (false);
975   }
976 
977   /* Write encryption info into space header. */
978   return (fsp_header_write_encryption(space->id, space->flags, encrypt_info,
979                                       false, true, mtr));
980 }
981 
982 /** Read the server version number from the DD tablespace header.
983 @param[out]     version server version from tablespace header
984 @return false if success. */
fsp_header_dict_get_server_version(uint * version)985 bool fsp_header_dict_get_server_version(uint *version) {
986   fil_space_t *space = fil_space_acquire(dict_sys_t::s_space_id);
987 
988   if (space == nullptr) {
989     return (true);
990   }
991 
992   buf_block_t *block;
993   page_t *page;
994   mtr_t mtr;
995 
996   const page_size_t page_size(space->flags);
997 
998   mtr_start(&mtr);
999   block = buf_page_get(page_id_t(dict_sys_t::s_space_id, 0), page_size,
1000                        RW_SX_LATCH, &mtr);
1001   page = buf_block_get_frame(block);
1002   *version = fsp_header_get_server_version(page);
1003 
1004   mtr_commit(&mtr);
1005   fil_space_release(space);
1006 
1007   return (false);
1008 }
1009 
1010 /** Initializes the space header of a new created space and creates also the
1011 insert buffer tree root if space == 0.
1012 @param[in]	space_id	space id
1013 @param[in]	size		current size in blocks
1014 @param[in,out]	mtr		min-transaction
1015 @param[in]	is_boot		if it's for bootstrap
1016 @return	true on success, otherwise false. */
fsp_header_init(space_id_t space_id,page_no_t size,mtr_t * mtr,bool is_boot)1017 bool fsp_header_init(space_id_t space_id, page_no_t size, mtr_t *mtr,
1018                      bool is_boot) {
1019   fsp_header_t *header;
1020   buf_block_t *block;
1021   page_t *page;
1022 
1023   ut_ad(mtr);
1024 
1025   fil_space_t *space = fil_space_get(space_id);
1026   ut_ad(space != nullptr);
1027 
1028   mtr_x_lock_space(space, mtr);
1029 
1030   const page_id_t page_id(space_id, 0);
1031   const page_size_t page_size(space->flags);
1032 
1033   block = buf_page_create(page_id, page_size, RW_SX_LATCH, mtr);
1034   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1035 
1036   space->size_in_header = size;
1037   space->free_len = 0;
1038   space->free_limit = 0;
1039 
1040   /* The prior contents of the file page should be ignored */
1041 
1042   fsp_init_file_page(block, mtr);
1043   page = buf_block_get_frame(block);
1044 
1045   mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR, MLOG_2BYTES,
1046                    mtr);
1047 
1048   mlog_write_ulint(page + FIL_PAGE_SRV_VERSION, DD_SPACE_CURRENT_SRV_VERSION,
1049                    MLOG_4BYTES, mtr);
1050   mlog_write_ulint(page + FIL_PAGE_SPACE_VERSION,
1051                    DD_SPACE_CURRENT_SPACE_VERSION, MLOG_4BYTES, mtr);
1052 
1053   header = FSP_HEADER_OFFSET + page;
1054 
1055   mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr);
1056   mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
1057 
1058   fsp_header_size_update(header, size, mtr);
1059   mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
1060   mlog_write_ulint(header + FSP_SPACE_FLAGS, space->flags, MLOG_4BYTES, mtr);
1061   mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
1062 
1063   flst_init(header + FSP_FREE, mtr);
1064   flst_init(header + FSP_FREE_FRAG, mtr);
1065   flst_init(header + FSP_FULL_FRAG, mtr);
1066   flst_init(header + FSP_SEG_INODES_FULL, mtr);
1067   flst_init(header + FSP_SEG_INODES_FREE, mtr);
1068 
1069   mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
1070 
1071   fsp_fill_free_list(
1072       !fsp_is_system_tablespace(space_id) && !fsp_is_global_temporary(space_id),
1073       space, header, mtr);
1074 
1075   /* For encryption tablespace, we need to save the encryption
1076   info to the page 0. */
1077   if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
1078     ulint offset = fsp_header_get_encryption_offset(page_size);
1079     byte encryption_info[Encryption::INFO_SIZE];
1080 
1081     if (offset == 0) return (false);
1082 
1083     if (!Encryption::fill_encryption_info(space->encryption_key,
1084                                           space->encryption_iv, encryption_info,
1085                                           is_boot, true)) {
1086       space->encryption_type = Encryption::NONE;
1087       memset(space->encryption_key, 0, Encryption::KEY_LEN);
1088       memset(space->encryption_iv, 0, Encryption::KEY_LEN);
1089       return (false);
1090     }
1091 
1092     mlog_write_string(page + offset, encryption_info, Encryption::INFO_SIZE,
1093                       mtr);
1094   }
1095   space->encryption_op_in_progress = NONE;
1096 
1097   if (space_id == TRX_SYS_SPACE) {
1098     if (btr_create(DICT_CLUSTERED | DICT_IBUF, 0, univ_page_size,
1099                    DICT_IBUF_ID_MIN + space_id, dict_ind_redundant,
1100                    mtr) == FIL_NULL) {
1101       return (false);
1102     }
1103   }
1104 
1105   return (true);
1106 }
1107 #endif /* !UNIV_HOTBACKUP */
1108 
1109 /** Reads the space id from the first page of a tablespace.
1110  @return space id, ULINT UNDEFINED if error */
fsp_header_get_space_id(const page_t * page)1111 space_id_t fsp_header_get_space_id(
1112     const page_t *page) /*!< in: first page of a tablespace */
1113 {
1114   space_id_t fsp_id;
1115   space_id_t id;
1116 
1117   fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
1118 
1119   id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1120 
1121   DBUG_EXECUTE_IF("fsp_header_get_space_id_failure", id = SPACE_UNKNOWN;);
1122 
1123   if (id != fsp_id) {
1124     ib::error(ER_IB_MSG_414) << "Space ID in fsp header is " << fsp_id
1125                              << ", but in the page header it is " << id << ".";
1126     return (SPACE_UNKNOWN);
1127   }
1128 
1129   return (id);
1130 }
1131 
1132 /** Reads the page size from the first page of a tablespace.
1133 @param[in]	page	first page of a tablespace
1134 @return page size */
fsp_header_get_page_size(const page_t * page)1135 page_size_t fsp_header_get_page_size(const page_t *page) {
1136   return (page_size_t(fsp_header_get_flags(page)));
1137 }
1138 
1139 /** Reads the encryption key from the first page of a tablespace.
1140 @param[in]	fsp_flags	tablespace flags
1141 @param[in,out]	key		tablespace key
1142 @param[in,out]	iv		tablespace iv
1143 @param[in]	page	first page of a tablespace
1144 @return true if success */
fsp_header_get_encryption_key(uint32_t fsp_flags,byte * key,byte * iv,page_t * page)1145 bool fsp_header_get_encryption_key(uint32_t fsp_flags, byte *key, byte *iv,
1146                                    page_t *page) {
1147   ulint offset;
1148   const page_size_t page_size(fsp_flags);
1149 
1150   offset = fsp_header_get_encryption_offset(page_size);
1151   if (offset == 0) {
1152     return (false);
1153   }
1154 
1155   return (Encryption::decode_encryption_info(key, iv, page + offset, true));
1156 }
1157 
1158 #ifndef UNIV_HOTBACKUP
1159 /** Increases the space size field of a space. */
fsp_header_inc_size(space_id_t space_id,page_no_t size_inc,mtr_t * mtr)1160 void fsp_header_inc_size(space_id_t space_id, /*!< in: space id */
1161                          page_no_t size_inc, /*!< in: size increment in pages */
1162                          mtr_t *mtr)         /*!< in/out: mini-transaction */
1163 {
1164   fil_space_t *space = fil_space_get(space_id);
1165 
1166   mtr_x_lock_space(space, mtr);
1167 
1168   ut_d(fsp_space_modify_check(space_id, mtr));
1169 
1170   fsp_header_t *header;
1171 
1172   header = fsp_get_space_header(space_id, page_size_t(space->flags), mtr);
1173 
1174   page_no_t size;
1175 
1176   size = mach_read_from_4(header + FSP_SIZE);
1177   ut_ad(size == space->size_in_header);
1178 
1179   size += size_inc;
1180 
1181   fsp_header_size_update(header, size, mtr);
1182   space->size_in_header = size;
1183 }
1184 
1185 /** Gets the size of the system tablespace from the tablespace header.  If
1186  we do not have an auto-extending data file, this should be equal to
1187  the size of the data files.  If there is an auto-extending data file,
1188  this can be smaller.
1189  @return size in pages */
fsp_header_get_tablespace_size(void)1190 page_no_t fsp_header_get_tablespace_size(void) {
1191   fil_space_t *space = fil_space_get_sys_space();
1192 
1193   mtr_t mtr;
1194 
1195   mtr_start(&mtr);
1196 
1197   mtr_x_lock_space(space, &mtr);
1198 
1199   fsp_header_t *header;
1200 
1201   header = fsp_get_space_header(TRX_SYS_SPACE, univ_page_size, &mtr);
1202 
1203   page_no_t size;
1204 
1205   size = mach_read_from_4(header + FSP_SIZE);
1206 
1207   ut_ad(space->size_in_header == size);
1208 
1209   mtr_commit(&mtr);
1210 
1211   return (size);
1212 }
1213 
1214 /** Try to extend a single-table tablespace so that a page would fit in the
1215 data file.
1216 @param[in,out]	space	tablespace
1217 @param[in]	page_no	page number
1218 @param[in,out]	header	tablespace header
1219 @param[in,out]	mtr	mini-transaction
1220 @return true if success */
UNIV_COLD(warn_unused_result)1221 static UNIV_COLD
1222 MY_ATTRIBUTE((warn_unused_result)) bool fsp_try_extend_data_file_with_pages(
1223     fil_space_t *space, page_no_t page_no, fsp_header_t *header, mtr_t *mtr) {
1224   DBUG_TRACE;
1225 
1226   ut_ad(!fsp_is_system_tablespace(space->id));
1227   ut_ad(!fsp_is_global_temporary(space->id));
1228   ut_d(fsp_space_modify_check(space->id, mtr));
1229 
1230   page_no_t size = mach_read_from_4(header + FSP_SIZE);
1231   ut_ad(size == space->size_in_header);
1232 
1233   ut_a(page_no >= size);
1234 
1235   bool success = fil_space_extend(space, page_no + 1);
1236 
1237   /* The size may be less than we wanted if we ran out of disk space. */
1238   fsp_header_size_update(header, space->size, mtr);
1239   space->size_in_header = space->size;
1240 
1241   return success;
1242 }
1243 
1244 /** Try to extend the last data file of a tablespace if it is auto-extending.
1245 @param[in,out]	space	tablespace
1246 @param[in,out]	header	tablespace header
1247 @param[in,out]	mtr	mini-transaction
1248 @return whether the tablespace was extended */
fsp_try_extend_data_file(fil_space_t * space,fsp_header_t * header,mtr_t * mtr)1249 static UNIV_COLD ulint fsp_try_extend_data_file(fil_space_t *space,
1250                                                 fsp_header_t *header,
1251                                                 mtr_t *mtr) {
1252   page_no_t size;          /* current number of pages
1253                            in the datafile */
1254   page_no_t size_increase; /* number of pages to extend
1255                            this file */
1256   const char *OUT_OF_SPACE_MSG =
1257       "ran out of space. Please add another file or use"
1258       " 'autoextend' for the last file in setting";
1259   DBUG_TRACE;
1260 
1261   ut_d(fsp_space_modify_check(space->id, mtr));
1262 
1263   if (space->id == TRX_SYS_SPACE &&
1264       !srv_sys_space.can_auto_extend_last_file()) {
1265     /* We print the error message only once to avoid
1266     spamming the error log. Note that we don't need
1267     to reset the flag to false as dealing with this
1268     error requires server restart. */
1269     if (!srv_sys_space.get_tablespace_full_status()) {
1270       ib::error(ER_IB_MSG_415) << "Tablespace " << srv_sys_space.name() << " "
1271                                << OUT_OF_SPACE_MSG << " innodb_data_file_path.";
1272       srv_sys_space.set_tablespace_full_status(true);
1273     }
1274     return false;
1275   } else if (fsp_is_global_temporary(space->id) &&
1276              !srv_tmp_space.can_auto_extend_last_file()) {
1277     /* We print the error message only once to avoid
1278     spamming the error log. Note that we don't need
1279     to reset the flag to false as dealing with this
1280     error requires server restart. */
1281     if (!srv_tmp_space.get_tablespace_full_status()) {
1282       ib::error(ER_IB_MSG_416)
1283           << "Tablespace " << srv_tmp_space.name() << " " << OUT_OF_SPACE_MSG
1284           << " innodb_temp_data_file_path.";
1285       srv_tmp_space.set_tablespace_full_status(true);
1286     }
1287     return false;
1288   }
1289 
1290   size = mach_read_from_4(header + FSP_SIZE);
1291   ut_ad(size == space->size_in_header);
1292 
1293   const page_size_t page_size(mach_read_from_4(header + FSP_SPACE_FLAGS));
1294 
1295   if (space->id == TRX_SYS_SPACE) {
1296     size_increase = srv_sys_space.get_increment();
1297 
1298   } else if (fsp_is_global_temporary(space->id)) {
1299     size_increase = srv_tmp_space.get_increment();
1300 
1301   } else {
1302     page_no_t extent_pages = fsp_get_extent_size_in_pages(page_size);
1303     if (size < extent_pages) {
1304       /* Let us first extend the file to extent_size */
1305       if (!fsp_try_extend_data_file_with_pages(space, extent_pages - 1, header,
1306                                                mtr)) {
1307         return false;
1308       }
1309 
1310       size = extent_pages;
1311     }
1312 
1313     size_increase = fsp_get_pages_to_extend_ibd(page_size, size);
1314   }
1315 
1316   if (size_increase == 0) {
1317     return false;
1318   }
1319 
1320   if (!fil_space_extend(space, size + size_increase)) {
1321     return false;
1322   }
1323 
1324   /* We ignore any fragments of a full megabyte when storing the size
1325   to the space header */
1326 
1327   space->size_in_header =
1328       ut_calc_align_down(space->size, (1024 * 1024) / page_size.physical());
1329 
1330   fsp_header_size_update(header, space->size_in_header, mtr);
1331 
1332   return true;
1333 }
1334 
1335 /** Calculate the number of pages to extend a datafile.
1336 We extend single-table and general tablespaces first one extent at a time,
1337 but 4 at a time for bigger tablespaces. It is not enough to extend always
1338 by one extent, because we need to add at least one extent to FSP_FREE.
1339 A single extent descriptor page will track many extents. And the extent
1340 that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
1341 Extents that do not use their extent descriptor page are added to FSP_FREE.
1342 The physical page size is used to determine how many extents are tracked
1343 on one extent descriptor page. See xdes_calc_descriptor_page().
1344 @param[in]	page_size	page_size of the datafile
1345 @param[in]	size		current number of pages in the datafile
1346 @return number of pages to extend the file. */
fsp_get_pages_to_extend_ibd(const page_size_t & page_size,page_no_t size)1347 page_no_t fsp_get_pages_to_extend_ibd(const page_size_t &page_size,
1348                                       page_no_t size) {
1349   page_no_t size_increase; /* number of pages to extend this file */
1350   page_no_t extent_size;   /* one megabyte, in pages */
1351   page_no_t threshold;     /* The size of the tablespace (in number
1352                            of pages) where we start allocating more
1353                            than one extent at a time. */
1354 
1355   extent_size = fsp_get_extent_size_in_pages(page_size);
1356 
1357   /* The threshold is set at 32MiB except when the physical page
1358   size is small enough that it must be done sooner. */
1359   threshold =
1360       std::min(32 * extent_size, static_cast<page_no_t>(page_size.physical()));
1361 
1362   if (size < threshold) {
1363     size_increase = extent_size;
1364   } else {
1365     /* Below in fsp_fill_free_list() we assume
1366     that we add at most FSP_FREE_ADD extents at
1367     a time */
1368     size_increase = FSP_FREE_ADD * extent_size;
1369   }
1370 
1371   return (size_increase);
1372 }
1373 
1374 /** Initialize a fragment extent and puts it into the free fragment list.
1375 @param[in,out]	header	tablespace header
1376 @param[in,out]	descr	extent descriptor
1377 @param[in,out]	mtr	mini-transaction */
fsp_init_xdes_free_frag(fsp_header_t * header,xdes_t * descr,mtr_t * mtr)1378 static void fsp_init_xdes_free_frag(fsp_header_t *header, xdes_t *descr,
1379                                     mtr_t *mtr) {
1380   ulint n_used;
1381 
1382   /* The first page in the extent is a extent descriptor page
1383   and the second is an ibuf bitmap page: mark them used */
1384   xdes_set_bit(descr, XDES_FREE_BIT, FSP_XDES_OFFSET, FALSE, mtr);
1385   xdes_set_bit(descr, XDES_FREE_BIT, FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
1386 
1387   xdes_set_segment_id(descr, 0, XDES_FREE_FRAG, mtr);
1388   flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1389 
1390   n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
1391   mlog_write_ulint(header + FSP_FRAG_N_USED, n_used + XDES_FRAG_N_USED,
1392                    MLOG_4BYTES, mtr);
1393 }
1394 
1395 /** Put new extents to the free list if there are free extents above the free
1396 limit. If an extent happens to contain an extent descriptor page, the extent
1397 is put to the FSP_FREE_FRAG list with the page marked as used.
1398 @param[in]	init_space	true if this is a single-table tablespace
1399 and we are only initializing the first extent and the first bitmap pages;
1400 then we will not allocate more extents
1401 @param[in,out]	space		tablespace
1402 @param[in,out]	header		tablespace header
1403 @param[in,out]	mtr		mini-transaction */
fsp_fill_free_list(bool init_space,fil_space_t * space,fsp_header_t * header,mtr_t * mtr)1404 static void fsp_fill_free_list(bool init_space, fil_space_t *space,
1405                                fsp_header_t *header, mtr_t *mtr) {
1406   page_no_t limit;
1407   page_no_t size;
1408   uint32_t flags;
1409   xdes_t *descr;
1410   ulint count = 0;
1411   page_no_t i;
1412 
1413   ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
1414   ut_d(fsp_space_modify_check(space->id, mtr));
1415 
1416   /* Check if we can fill free list from above the free list limit */
1417   size = mach_read_from_4(header + FSP_SIZE);
1418   limit = mach_read_from_4(header + FSP_FREE_LIMIT);
1419   flags = mach_read_from_4(header + FSP_SPACE_FLAGS);
1420 
1421   ut_ad(size == space->size_in_header);
1422   ut_ad(limit == space->free_limit);
1423 
1424   /* Exclude Encryption flag as it might have been changed In Memory flags but
1425   not on disk. */
1426   ut_ad(!((flags ^ space->flags) & ~(FSP_FLAGS_MASK_ENCRYPTION)));
1427 
1428   const page_size_t page_size(flags);
1429 
1430   if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
1431     if ((!init_space && !fsp_is_system_tablespace(space->id) &&
1432          !fsp_is_global_temporary(space->id)) ||
1433         (space->id == TRX_SYS_SPACE &&
1434          srv_sys_space.can_auto_extend_last_file()) ||
1435         (fsp_is_global_temporary(space->id) &&
1436          srv_tmp_space.can_auto_extend_last_file())) {
1437       fsp_try_extend_data_file(space, header, mtr);
1438       size = space->size_in_header;
1439     }
1440   }
1441 
1442   i = limit;
1443 
1444   while ((init_space && i < 1) ||
1445          ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
1446     bool init_xdes = (ut_2pow_remainder(i, page_size.physical()) == 0);
1447 
1448     space->free_limit = i + FSP_EXTENT_SIZE;
1449     mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE, MLOG_4BYTES,
1450                      mtr);
1451 
1452     if (init_xdes) {
1453       buf_block_t *block;
1454 
1455       /* We are going to initialize a new descriptor page
1456       and a new ibuf bitmap page: the prior contents of the
1457       pages should be ignored. */
1458 
1459       if (i > 0) {
1460         const page_id_t page_id(space->id, i);
1461 
1462         block = buf_page_create(page_id, page_size, RW_SX_LATCH, mtr);
1463 
1464         buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1465 
1466         fsp_init_file_page(block, mtr);
1467         mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
1468                          FIL_PAGE_TYPE_XDES, MLOG_2BYTES, mtr);
1469       }
1470 
1471       /* Initialize the ibuf bitmap page in a separate
1472       mini-transaction because it is low in the latching
1473       order, and we must be able to release its latch.
1474       Note: Insert-Buffering is disabled for tables that
1475       reside in the temp-tablespace. */
1476       if (!fsp_is_system_temporary(space->id)) {
1477         mtr_t ibuf_mtr;
1478 
1479         mtr_start(&ibuf_mtr);
1480 
1481         if (space->purpose == FIL_TYPE_TEMPORARY) {
1482           mtr_set_log_mode(&ibuf_mtr, MTR_LOG_NO_REDO);
1483         }
1484 
1485         const page_id_t page_id(space->id, i + FSP_IBUF_BITMAP_OFFSET);
1486 
1487         block = buf_page_create(page_id, page_size, RW_SX_LATCH, &ibuf_mtr);
1488 
1489         buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1490 
1491         fsp_init_file_page(block, &ibuf_mtr);
1492 
1493         ibuf_bitmap_page_init(block, &ibuf_mtr);
1494 
1495         mtr_commit(&ibuf_mtr);
1496       }
1497     }
1498 
1499     buf_block_t *desc_block = nullptr;
1500     descr = xdes_get_descriptor_with_space_hdr(header, space->id, i, mtr,
1501                                                init_space, &desc_block);
1502     if (desc_block != nullptr) {
1503       fil_block_check_type(desc_block, FIL_PAGE_TYPE_XDES, mtr);
1504     }
1505     xdes_init(descr, mtr);
1506 
1507     if (init_xdes) {
1508       fsp_init_xdes_free_frag(header, descr, mtr);
1509     } else {
1510       flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1511       count++;
1512     }
1513 
1514     i += FSP_EXTENT_SIZE;
1515   }
1516   ut_a(count < std::numeric_limits<uint32_t>::max());
1517   space->free_len += (uint32_t)count;
1518 }
1519 
1520 /** Allocates a new free extent.
1521 @param[in]	space_id	tablespace identifier
1522 @param[in]	page_size	page size
1523 @param[in]	hint		hint of which extent would be desirable: any
1524 page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT
1525 @param[in,out]	mtr		mini-transaction
1526 @return extent descriptor, NULL if cannot be allocated */
fsp_alloc_free_extent(space_id_t space_id,const page_size_t & page_size,page_no_t hint,mtr_t * mtr)1527 static xdes_t *fsp_alloc_free_extent(space_id_t space_id,
1528                                      const page_size_t &page_size,
1529                                      page_no_t hint, mtr_t *mtr) {
1530   fsp_header_t *header;
1531   fil_addr_t first;
1532   xdes_t *descr;
1533   buf_block_t *desc_block = nullptr;
1534 
1535   header = fsp_get_space_header(space_id, page_size, mtr);
1536 
1537   descr = xdes_get_descriptor_with_space_hdr(header, space_id, hint, mtr, false,
1538                                              &desc_block);
1539 
1540   fil_space_t *space = fil_space_get(space_id);
1541   ut_a(space != nullptr);
1542 
1543   if (desc_block != nullptr) {
1544     fil_block_check_type(desc_block, FIL_PAGE_TYPE_XDES, mtr);
1545   }
1546 
1547   if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
1548     /* Ok, we can take this extent */
1549   } else {
1550     /* Take the first extent in the free list */
1551     first = flst_get_first(header + FSP_FREE, mtr);
1552 
1553     if (fil_addr_is_null(first)) {
1554       fsp_fill_free_list(false, space, header, mtr);
1555 
1556       first = flst_get_first(header + FSP_FREE, mtr);
1557     }
1558 
1559     if (fil_addr_is_null(first)) {
1560       return (nullptr); /* No free extents left */
1561     }
1562 
1563     descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
1564   }
1565 
1566   flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1567   space->free_len--;
1568 
1569   return (descr);
1570 }
1571 
1572 /** Allocates a single free page from a space. */
fsp_alloc_from_free_frag(fsp_header_t * header,xdes_t * descr,page_no_t bit,mtr_t * mtr)1573 static void fsp_alloc_from_free_frag(
1574     fsp_header_t *header, /*!< in/out: tablespace header */
1575     xdes_t *descr,        /*!< in/out: extent descriptor */
1576     page_no_t bit,        /*!< in: slot to allocate in the extent */
1577     mtr_t *mtr)           /*!< in/out: mini-transaction */
1578 {
1579   ulint frag_n_used;
1580 
1581   ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
1582   ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr));
1583   xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
1584 
1585   /* Update the FRAG_N_USED field */
1586   frag_n_used = mach_read_from_4(header + FSP_FRAG_N_USED);
1587   frag_n_used++;
1588   mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, mtr);
1589   if (xdes_is_full(descr, mtr)) {
1590     /* The fragment is full: move it to another list */
1591     flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1592     xdes_set_state(descr, XDES_FULL_FRAG, mtr);
1593 
1594     flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, mtr);
1595     mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - FSP_EXTENT_SIZE,
1596                      MLOG_4BYTES, mtr);
1597   }
1598 }
1599 
1600 /** Gets a buffer block for an allocated page.
1601 NOTE: If init_mtr != mtr, the block will only be initialized if it was
1602 not previously x-latched. It is assumed that the block has been
1603 x-latched only by mtr, and freed in mtr in that case.
1604 @param[in]	page_id		page id of the allocated page
1605 @param[in]	page_size	page size of the allocated page
1606 @param[in]	rw_latch	RW_SX_LATCH, RW_X_LATCH
1607 @param[in,out]	mtr		mini-transaction of the allocation
1608 @param[in,out]	init_mtr	mini-transaction for initializing the page
1609 @return block, initialized if init_mtr==mtr
1610 or rw_lock_x_lock_count(&block->lock) == 1 */
fsp_page_create(const page_id_t & page_id,const page_size_t & page_size,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr)1611 static buf_block_t *fsp_page_create(const page_id_t &page_id,
1612                                     const page_size_t &page_size,
1613                                     rw_lock_type_t rw_latch, mtr_t *mtr,
1614                                     mtr_t *init_mtr) {
1615   ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH);
1616   buf_block_t *block = buf_page_create(page_id, page_size, rw_latch, init_mtr);
1617 
1618   if (init_mtr == mtr ||
1619       (rw_latch == RW_X_LATCH ? rw_lock_get_x_lock_count(&block->lock) == 1
1620                               : rw_lock_get_sx_lock_count(&block->lock) == 1)) {
1621     /* Initialize the page, unless it was already
1622     SX-latched in mtr. (In this case, we would want to
1623     allocate another page that has not been freed in mtr.) */
1624     ut_ad(init_mtr == mtr ||
1625           !mtr_memo_contains_flagged(
1626               mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
1627 
1628     fsp_init_file_page(block, init_mtr);
1629   }
1630 
1631   return (block);
1632 }
1633 
1634 /** Allocates a single free page from a space.
1635 The page is marked as used.
1636 @param[in]	space		space id
1637 @param[in]	page_size	page size
1638 @param[in]	hint		hint of which page would be desirable
1639 @param[in]	rw_latch	RW_SX_LATCH, RW_X_LATCH
1640 @param[in,out]	mtr		mini-transaction
1641 @param[in,out]	init_mtr	mini-transaction in which the page should be
1642 initialized (may be the same as mtr)
1643 @retval NULL	if no page could be allocated
1644 @retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
1645 (init_mtr == mtr, or the page was not previously freed in mtr)
1646 @retval block	(not allocated or initialized) otherwise */
fsp_alloc_free_page(space_id_t space,const page_size_t & page_size,page_no_t hint,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr)1647 static MY_ATTRIBUTE((warn_unused_result)) buf_block_t *fsp_alloc_free_page(
1648     space_id_t space, const page_size_t &page_size, page_no_t hint,
1649     rw_lock_type_t rw_latch, mtr_t *mtr, mtr_t *init_mtr) {
1650   fsp_header_t *header;
1651   fil_addr_t first;
1652   xdes_t *descr;
1653   page_no_t free;
1654   page_no_t page_no;
1655   page_no_t space_size;
1656 
1657   ut_ad(mtr);
1658   ut_ad(init_mtr);
1659 
1660   ut_d(fsp_space_modify_check(space, mtr));
1661   header = fsp_get_space_header(space, page_size, mtr);
1662 
1663   /* Get the hinted descriptor */
1664   descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
1665 
1666   if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
1667     /* Ok, we can take this extent */
1668   } else {
1669     /* Else take the first extent in free_frag list */
1670     first = flst_get_first(header + FSP_FREE_FRAG, mtr);
1671 
1672     if (fil_addr_is_null(first)) {
1673       /* There are no partially full fragments: allocate
1674       a free extent and add it to the FREE_FRAG list. NOTE
1675       that the allocation may have as a side-effect that an
1676       extent containing a descriptor page is added to the
1677       FREE_FRAG list. But we will allocate our page from the
1678       the free extent anyway. */
1679 
1680       descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
1681 
1682       if (descr == nullptr) {
1683         /* No free space left */
1684 
1685         return (nullptr);
1686       }
1687 
1688       xdes_set_state(descr, XDES_FREE_FRAG, mtr);
1689       flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1690     } else {
1691       descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
1692     }
1693 
1694     /* Reset the hint */
1695     hint = 0;
1696   }
1697 
1698   /* Now we have in descr an extent with at least one free page. Look
1699   for a free page in the extent. */
1700 
1701   free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE, hint % FSP_EXTENT_SIZE, mtr);
1702   if (free == FIL_NULL) {
1703     ut_print_buf(stderr, ((byte *)descr) - 500, 1000);
1704     putc('\n', stderr);
1705 
1706     ut_error;
1707   }
1708 
1709   page_no = xdes_get_offset(descr) + free;
1710 
1711   space_size = mach_read_from_4(header + FSP_SIZE);
1712   ut_ad(space_size == fil_space_get(space)->size_in_header ||
1713         (space == TRX_SYS_SPACE && srv_startup_is_before_trx_rollback_phase));
1714 
1715   if (space_size <= page_no) {
1716     /* It must be that we are extending a single-table tablespace
1717     whose size is still < 64 pages */
1718 
1719     ut_a(!fsp_is_system_tablespace(space));
1720     ut_a(!fsp_is_global_temporary(space));
1721     if (page_no >= FSP_EXTENT_SIZE) {
1722       ib::error(ER_IB_MSG_417) << "Trying to extend a single-table"
1723                                   " tablespace "
1724                                << space
1725                                << " , by single"
1726                                   " page(s) though the space size "
1727                                << space_size << ". Page no " << page_no << ".";
1728       return (nullptr);
1729     }
1730 
1731     fil_space_t *fspace = fil_space_get(space);
1732 
1733     if (!fsp_try_extend_data_file_with_pages(fspace, page_no, header, mtr)) {
1734       /* No disk space left */
1735       return (nullptr);
1736     }
1737   }
1738 
1739   fsp_alloc_from_free_frag(header, descr, free, mtr);
1740   return (fsp_page_create(page_id_t(space, page_no), page_size, rw_latch, mtr,
1741                           init_mtr));
1742 }
1743 
1744 /** Frees a single page of a space.
1745 The page is marked as free and clean.
1746 @param[in]	page_id		page id
1747 @param[in]	page_size	page size
1748 @param[in,out]	mtr		mini-transaction */
fsp_free_page(const page_id_t & page_id,const page_size_t & page_size,mtr_t * mtr)1749 static void fsp_free_page(const page_id_t &page_id,
1750                           const page_size_t &page_size, mtr_t *mtr) {
1751   fsp_header_t *header;
1752   xdes_t *descr;
1753   ulint state;
1754   ulint frag_n_used;
1755 
1756   ut_ad(mtr);
1757   ut_d(fsp_space_modify_check(page_id.space(), mtr));
1758 
1759   /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
1760 
1761   header = fsp_get_space_header(page_id.space(), page_size, mtr);
1762 
1763   descr = xdes_get_descriptor_with_space_hdr(header, page_id.space(),
1764                                              page_id.page_no(), mtr);
1765 
1766   state = xdes_get_state(descr, mtr);
1767 
1768   if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
1769     ib::error(ER_IB_MSG_418) << "File space extent descriptor of page "
1770                              << page_id << " has state " << state;
1771     fputs("InnoDB: Dump of descriptor: ", stderr);
1772     ut_print_buf(stderr, ((byte *)descr) - 50, 200);
1773     putc('\n', stderr);
1774     /* Crash in debug version, so that we get a core dump
1775     of this corruption. */
1776     ut_ad(0);
1777 
1778     if (state == XDES_FREE) {
1779       /* We put here some fault tolerance: if the page
1780       is already free, return without doing anything! */
1781 
1782       return;
1783     }
1784 
1785     ut_error;
1786   }
1787 
1788   if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
1789                        page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
1790     ib::error(ER_IB_MSG_419)
1791         << "File space extent descriptor of page " << page_id
1792         << " says it is free. Dump of descriptor: ";
1793     ut_print_buf(stderr, ((byte *)descr) - 50, 200);
1794     putc('\n', stderr);
1795     /* Crash in debug version, so that we get a core dump
1796     of this corruption. */
1797     ut_ad(0);
1798 
1799     /* We put here some fault tolerance: if the page
1800     is already free, return without doing anything! */
1801 
1802     return;
1803   }
1804 
1805   const page_no_t bit = page_id.page_no() % FSP_EXTENT_SIZE;
1806 
1807   xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
1808   xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
1809 
1810   frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
1811   if (state == XDES_FULL_FRAG) {
1812     /* The fragment was full: move it to another list */
1813     flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, mtr);
1814     xdes_set_state(descr, XDES_FREE_FRAG, mtr);
1815     flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1816     mlog_write_ulint(header + FSP_FRAG_N_USED,
1817                      frag_n_used + FSP_EXTENT_SIZE - 1, MLOG_4BYTES, mtr);
1818   } else {
1819     ut_a(frag_n_used > 0);
1820     mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1, MLOG_4BYTES,
1821                      mtr);
1822   }
1823 
1824   if (xdes_is_free(descr, mtr)) {
1825     /* The extent has become free: move it to another list */
1826     flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1827     fsp_free_extent(page_id, page_size, mtr);
1828   }
1829 }
1830 
1831 /** Returns an extent to the free list of a space.
1832 @param[in]	page_id		page id in the extent
1833 @param[in]	page_size	page size
1834 @param[in,out]	mtr		mini-transaction */
fsp_free_extent(const page_id_t & page_id,const page_size_t & page_size,mtr_t * mtr)1835 static void fsp_free_extent(const page_id_t &page_id,
1836                             const page_size_t &page_size, mtr_t *mtr) {
1837   fsp_header_t *header;
1838   xdes_t *descr;
1839 
1840   ut_ad(mtr);
1841 
1842   header = fsp_get_space_header(page_id.space(), page_size, mtr);
1843 
1844   descr = xdes_get_descriptor_with_space_hdr(header, page_id.space(),
1845                                              page_id.page_no(), mtr);
1846 
1847   switch (xdes_get_state(descr, mtr)) {
1848     case XDES_FSEG_FRAG:
1849       /* The extent is being returned to the FSP_FREE_FRAG list. */
1850       xdes_init(descr, mtr);
1851       fsp_init_xdes_free_frag(header, descr, mtr);
1852       break;
1853     case XDES_FSEG:
1854     case XDES_FREE_FRAG:
1855     case XDES_FULL_FRAG:
1856 
1857       xdes_init(descr, mtr);
1858 
1859       flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1860 
1861       fil_space_t *space;
1862 
1863       space = fil_space_get(page_id.space());
1864 
1865       ++space->free_len;
1866 
1867       break;
1868 
1869     case XDES_FREE:
1870     case XDES_NOT_INITED:
1871       ut_error;
1872   }
1873 }
1874 
1875 /** Returns the nth inode slot on an inode page.
1876 @param[in]	page		segment inode page
1877 @param[in]	i		inode index on page
1878 @param[in]	page_size	page size
1879 @param[in,out]	mtr		mini-transaction
1880 @return segment inode */
1881 UNIV_INLINE
fsp_seg_inode_page_get_nth_inode(page_t * page,page_no_t i,const page_size_t & page_size,mtr_t * mtr)1882 fseg_inode_t *fsp_seg_inode_page_get_nth_inode(page_t *page, page_no_t i,
1883                                                const page_size_t &page_size,
1884                                                mtr_t *mtr) {
1885   ut_ad(i < FSP_SEG_INODES_PER_PAGE(page_size));
1886   ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_SX_FIX));
1887 
1888   return (page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
1889 }
1890 
1891 /** Looks for a used segment inode on a segment inode page.
1892 @param[in]	page		segment inode page
1893 @param[in]	page_size	page size
1894 @param[in,out]	mtr		mini-transaction
1895 @return segment inode index, or FIL_NULL if not found */
fsp_seg_inode_page_find_used(page_t * page,const page_size_t & page_size,mtr_t * mtr)1896 static page_no_t fsp_seg_inode_page_find_used(page_t *page,
1897                                               const page_size_t &page_size,
1898                                               mtr_t *mtr) {
1899   page_no_t i;
1900   fseg_inode_t *inode;
1901 
1902   for (i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1903     inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1904 
1905     if (mach_read_from_8(inode + FSEG_ID)) {
1906       /* This is used */
1907 
1908       ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
1909       return (i);
1910     }
1911   }
1912 
1913   return (FIL_NULL);
1914 }
1915 
1916 /** Looks for an unused segment inode on a segment inode page.
1917 @param[in]	page		segment inode page
1918 @param[in]	i		search forward starting from this index
1919 @param[in]	page_size	page size
1920 @param[in,out]	mtr		mini-transaction
1921 @return segment inode index, or FIL_NULL if not found */
fsp_seg_inode_page_find_free(page_t * page,page_no_t i,const page_size_t & page_size,mtr_t * mtr)1922 static page_no_t fsp_seg_inode_page_find_free(page_t *page, page_no_t i,
1923                                               const page_size_t &page_size,
1924                                               mtr_t *mtr) {
1925   for (; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1926     fseg_inode_t *inode;
1927 
1928     inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1929 
1930     if (!mach_read_from_8(inode + FSEG_ID)) {
1931       /* This is unused */
1932       return (i);
1933     }
1934 
1935     ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
1936   }
1937 
1938   return (FIL_NULL);
1939 }
1940 
1941 /** Allocates a new file segment inode page.
1942  @return true if could be allocated */
fsp_alloc_seg_inode_page(fsp_header_t * space_header,mtr_t * mtr)1943 static ibool fsp_alloc_seg_inode_page(
1944     fsp_header_t *space_header, /*!< in: space header */
1945     mtr_t *mtr)                 /*!< in/out: mini-transaction */
1946 {
1947   fseg_inode_t *inode;
1948   buf_block_t *block;
1949   page_t *page;
1950   space_id_t space;
1951 
1952   ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
1953 
1954   space = page_get_space_id(page_align(space_header));
1955 
1956   const page_size_t page_size(mach_read_from_4(FSP_SPACE_FLAGS + space_header));
1957 
1958   block = fsp_alloc_free_page(space, page_size, 0, RW_SX_LATCH, mtr, mtr);
1959 
1960   if (block == nullptr) {
1961     return (FALSE);
1962   }
1963 
1964   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1965   ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
1966 
1967   page = buf_block_get_frame(block);
1968 
1969   mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE, MLOG_2BYTES, mtr);
1970 
1971   for (page_no_t i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1972     inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1973 
1974     mlog_write_ull(inode + FSEG_ID, 0, mtr);
1975   }
1976 
1977   flst_add_last(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
1978                 mtr);
1979 
1980   return (TRUE);
1981 }
1982 
1983 /** Allocates a new file segment inode.
1984  @return segment inode, or NULL if not enough space */
fsp_alloc_seg_inode(fsp_header_t * space_header,mtr_t * mtr)1985 static fseg_inode_t *fsp_alloc_seg_inode(
1986     fsp_header_t *space_header, /*!< in: space header */
1987     mtr_t *mtr)                 /*!< in/out: mini-transaction */
1988 {
1989   buf_block_t *block;
1990   page_t *page;
1991   fseg_inode_t *inode;
1992   page_no_t n;
1993 
1994   ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
1995 
1996   /* Allocate a new segment inode page if needed. */
1997   if (flst_get_len(space_header + FSP_SEG_INODES_FREE) == 0 &&
1998       !fsp_alloc_seg_inode_page(space_header, mtr)) {
1999     return (nullptr);
2000   }
2001 
2002   const page_size_t page_size(mach_read_from_4(FSP_SPACE_FLAGS + space_header));
2003 
2004   const page_id_t page_id(
2005       page_get_space_id(page_align(space_header)),
2006       flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page);
2007 
2008   block = buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
2009   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
2010   fil_block_check_type(block, FIL_PAGE_INODE, mtr);
2011 
2012   page = buf_block_get_frame(block);
2013 
2014   n = fsp_seg_inode_page_find_free(page, 0, page_size, mtr);
2015 
2016   ut_a(n != FIL_NULL);
2017 
2018   inode = fsp_seg_inode_page_get_nth_inode(page, n, page_size, mtr);
2019 
2020   if (FIL_NULL == fsp_seg_inode_page_find_free(page, n + 1, page_size, mtr)) {
2021     /* There are no other unused headers left on the page: move it
2022     to another list */
2023 
2024     flst_remove(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
2025                 mtr);
2026 
2027     flst_add_last(space_header + FSP_SEG_INODES_FULL,
2028                   page + FSEG_INODE_PAGE_NODE, mtr);
2029   }
2030 
2031   ut_ad(!mach_read_from_8(inode + FSEG_ID) ||
2032         mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2033   return (inode);
2034 }
2035 
2036 /** Frees a file segment inode.
2037 @param[in]	space		space id
2038 @param[in]	page_size	page size
2039 @param[in,out]	inode		segment inode
2040 @param[in,out]	mtr		mini-transaction */
fsp_free_seg_inode(space_id_t space,const page_size_t & page_size,fseg_inode_t * inode,mtr_t * mtr)2041 static void fsp_free_seg_inode(space_id_t space, const page_size_t &page_size,
2042                                fseg_inode_t *inode, mtr_t *mtr) {
2043   page_t *page;
2044   fsp_header_t *space_header;
2045 
2046   ut_d(fsp_space_modify_check(space, mtr));
2047 
2048   page = page_align(inode);
2049 
2050   space_header = fsp_get_space_header(space, page_size, mtr);
2051 
2052   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2053 
2054   if (FIL_NULL == fsp_seg_inode_page_find_free(page, 0, page_size, mtr)) {
2055     /* Move the page to another list */
2056 
2057     flst_remove(space_header + FSP_SEG_INODES_FULL, page + FSEG_INODE_PAGE_NODE,
2058                 mtr);
2059 
2060     flst_add_last(space_header + FSP_SEG_INODES_FREE,
2061                   page + FSEG_INODE_PAGE_NODE, mtr);
2062   }
2063 
2064   mlog_write_ull(inode + FSEG_ID, 0, mtr);
2065   mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
2066 
2067   if (FIL_NULL == fsp_seg_inode_page_find_used(page, page_size, mtr)) {
2068     /* There are no other used headers left on the page: free it */
2069 
2070     flst_remove(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
2071                 mtr);
2072 
2073     fsp_free_page(page_id_t(space, page_get_page_no(page)), page_size, mtr);
2074   }
2075 }
2076 
2077 /** Returns the file segment inode, page x-latched.
2078 @param[in]	header		segment header
2079 @param[in]	space		space id
2080 @param[in]	page_size	page size
2081 @param[in,out]	mtr		mini-transaction
2082 @param[out]	block		inode block, or NULL to ignore
2083 @return segment inode, page x-latched; NULL if the inode is free */
fseg_inode_try_get(fseg_header_t * header,space_id_t space,const page_size_t & page_size,mtr_t * mtr,buf_block_t ** block)2084 static fseg_inode_t *fseg_inode_try_get(fseg_header_t *header, space_id_t space,
2085                                         const page_size_t &page_size,
2086                                         mtr_t *mtr, buf_block_t **block) {
2087   fil_addr_t inode_addr;
2088   fseg_inode_t *inode;
2089 
2090   inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
2091   inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
2092   ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
2093 
2094   inode = fut_get_ptr(space, page_size, inode_addr, RW_SX_LATCH, mtr, block);
2095 
2096   if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
2097     inode = nullptr;
2098   } else {
2099     ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2100   }
2101 
2102   return (inode);
2103 }
2104 
2105 /** Returns the file segment inode, page x-latched.
2106 @param[in]	header		segment header
2107 @param[in]	space		space id
2108 @param[in]	page_size	page size
2109 @param[in,out]	mtr		mini-transaction
2110 @param[out]	block		inode block
2111 @return segment inode, page x-latched */
fseg_inode_get(fseg_header_t * header,space_id_t space,const page_size_t & page_size,mtr_t * mtr,buf_block_t ** block=nullptr)2112 static fseg_inode_t *fseg_inode_get(fseg_header_t *header, space_id_t space,
2113                                     const page_size_t &page_size, mtr_t *mtr,
2114                                     buf_block_t **block = nullptr) {
2115   fseg_inode_t *inode =
2116       fseg_inode_try_get(header, space, page_size, mtr, block);
2117   ut_a(inode);
2118   return (inode);
2119 }
2120 
2121 /** Gets the page number from the nth fragment page slot.
2122  @return page number, FIL_NULL if not in use */
2123 UNIV_INLINE
fseg_get_nth_frag_page_no(fseg_inode_t * inode,ulint n,mtr_t * mtr MY_ATTRIBUTE ((unused)))2124 page_no_t fseg_get_nth_frag_page_no(
2125     fseg_inode_t *inode, /*!< in: segment inode */
2126     ulint n,             /*!< in: slot index */
2127     mtr_t *mtr MY_ATTRIBUTE((unused)))
2128 /*!< in/out: mini-transaction */
2129 {
2130   ut_ad(inode && mtr);
2131   ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
2132   ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2133   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2134   return (mach_read_from_4(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE));
2135 }
2136 
2137 /** Sets the page number in the nth fragment page slot. */
2138 UNIV_INLINE
fseg_set_nth_frag_page_no(fseg_inode_t * inode,ulint n,page_no_t page_no,mtr_t * mtr)2139 void fseg_set_nth_frag_page_no(fseg_inode_t *inode, /*!< in: segment inode */
2140                                ulint n,             /*!< in: slot index */
2141                                page_no_t page_no, /*!< in: page number to set */
2142                                mtr_t *mtr) /*!< in/out: mini-transaction */
2143 {
2144   ut_ad(inode && mtr);
2145   ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
2146   ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2147   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2148 
2149   mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, page_no,
2150                    MLOG_4BYTES, mtr);
2151 }
2152 
2153 /** Finds a fragment page slot which is free.
2154  @return slot index; ULINT_UNDEFINED if none found */
fseg_find_free_frag_page_slot(fseg_inode_t * inode,mtr_t * mtr)2155 static ulint fseg_find_free_frag_page_slot(
2156     fseg_inode_t *inode, /*!< in: segment inode */
2157     mtr_t *mtr)          /*!< in/out: mini-transaction */
2158 {
2159   ulint i;
2160   page_no_t page_no;
2161 
2162   ut_ad(inode && mtr);
2163 
2164   for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2165     page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
2166 
2167     if (page_no == FIL_NULL) {
2168       return (i);
2169     }
2170   }
2171 
2172   return (ULINT_UNDEFINED);
2173 }
2174 
2175 /** Finds a fragment page slot which is used and last in the array.
2176  @return slot index; ULINT_UNDEFINED if none found */
fseg_find_last_used_frag_page_slot(fseg_inode_t * inode,mtr_t * mtr)2177 static ulint fseg_find_last_used_frag_page_slot(
2178     fseg_inode_t *inode, /*!< in: segment inode */
2179     mtr_t *mtr)          /*!< in/out: mini-transaction */
2180 {
2181   ulint i;
2182   page_no_t page_no;
2183 
2184   ut_ad(inode && mtr);
2185 
2186   for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2187     page_no =
2188         fseg_get_nth_frag_page_no(inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
2189 
2190     if (page_no != FIL_NULL) {
2191       return (FSEG_FRAG_ARR_N_SLOTS - i - 1);
2192     }
2193   }
2194 
2195   return (ULINT_UNDEFINED);
2196 }
2197 
2198 /** Calculates reserved fragment page slots.
2199  @return number of fragment pages */
fseg_get_n_frag_pages(fseg_inode_t * inode,mtr_t * mtr)2200 static ulint fseg_get_n_frag_pages(
2201     fseg_inode_t *inode, /*!< in: segment inode */
2202     mtr_t *mtr)          /*!< in/out: mini-transaction */
2203 {
2204   ulint i;
2205   ulint count = 0;
2206 
2207   ut_ad(inode && mtr);
2208 
2209   for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2210     if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
2211       count++;
2212     }
2213   }
2214 
2215   return (count);
2216 }
2217 
2218 /** Creates a new segment.
2219  @return the block where the segment header is placed, x-latched, NULL
2220  if could not create segment because of lack of space */
fseg_create_general(space_id_t space_id,page_no_t page,ulint byte_offset,ibool has_done_reservation,mtr_t * mtr)2221 buf_block_t *fseg_create_general(
2222     space_id_t space_id, /*!< in: space id */
2223     page_no_t page,      /*!< in: page where the segment header is
2224                          placed: if this is != 0, the page must belong
2225                          to another segment, if this is 0, a new page
2226                          will be allocated and it will belong to the
2227                          created segment */
2228     ulint byte_offset,   /*!< in: byte offset of the created segment header
2229                     on the page */
2230     ibool has_done_reservation, /*!< in: TRUE if the caller has already
2231                   done the reservation for the pages with
2232                   fsp_reserve_free_extents (at least 2 extents: one for
2233                   the inode and the other for the segment) then there is
2234                   no need to do the check for this individual
2235                   operation */
2236     mtr_t *mtr)                 /*!< in/out: mini-transaction */
2237 {
2238   fsp_header_t *space_header;
2239   fseg_inode_t *inode;
2240   ib_id_t seg_id;
2241   buf_block_t *block = nullptr;    /* remove warning */
2242   fseg_header_t *header = nullptr; /* remove warning */
2243   ulint n_reserved = 0;
2244   ulint i;
2245 
2246   DBUG_TRACE;
2247 
2248   ut_ad(byte_offset + FSEG_HEADER_SIZE <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
2249   ut_d(fsp_space_modify_check(space_id, mtr));
2250 
2251   fil_space_t *space = fil_space_get(space_id);
2252 
2253   mtr_x_lock_space(space, mtr);
2254 
2255   const page_size_t page_size(space->flags);
2256 
2257   if (page != 0) {
2258     block =
2259         buf_page_get(page_id_t(space_id, page), page_size, RW_SX_LATCH, mtr);
2260 
2261     header = byte_offset + buf_block_get_frame(block);
2262 
2263     const ulint type = space_id == TRX_SYS_SPACE && page == TRX_SYS_PAGE_NO
2264                            ? FIL_PAGE_TYPE_TRX_SYS
2265                            : FIL_PAGE_TYPE_SYS;
2266 
2267     fil_block_check_type(block, type, mtr);
2268   }
2269 
2270   if (rw_lock_get_x_lock_count(&space->latch) == 1) {
2271     /* This thread did not own the latch before this call: free
2272     excess pages from the insert buffer free list */
2273 
2274     if (space_id == IBUF_SPACE_ID) {
2275       ibuf_free_excess_pages();
2276     }
2277   }
2278 
2279   if (!has_done_reservation &&
2280       !fsp_reserve_free_extents(&n_reserved, space_id, 2, FSP_NORMAL, mtr)) {
2281     return nullptr;
2282   }
2283 
2284   space_header = fsp_get_space_header(space_id, page_size, mtr);
2285 
2286   inode = fsp_alloc_seg_inode(space_header, mtr);
2287 
2288   if (inode == nullptr) {
2289     goto funct_exit;
2290   }
2291 
2292   /* Read the next segment id from space header and increment the
2293   value in space header */
2294 
2295   seg_id = mach_read_from_8(space_header + FSP_SEG_ID);
2296 
2297   mlog_write_ull(space_header + FSP_SEG_ID, seg_id + 1, mtr);
2298 
2299   mlog_write_ull(inode + FSEG_ID, seg_id, mtr);
2300 
2301   { /* Introducing a new scope to localize this object. Otherwise, I have to
2302        declare this object before the goto statement above. */
2303     File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
2304     fseg_inode.write_not_full_n_used(0);
2305   }
2306 
2307   flst_init(inode + FSEG_FREE, mtr);
2308   flst_init(inode + FSEG_NOT_FULL, mtr);
2309   flst_init(inode + FSEG_FULL, mtr);
2310 
2311   mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE, MLOG_4BYTES, mtr);
2312   for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2313     fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
2314   }
2315 
2316   if (page == 0) {
2317     block = fseg_alloc_free_page_low(space, page_size, inode, 0, FSP_UP,
2318                                      RW_SX_LATCH, mtr, mtr
2319 #ifdef UNIV_DEBUG
2320                                      ,
2321                                      has_done_reservation
2322 #endif /* UNIV_DEBUG */
2323     );
2324 
2325     /* The allocation cannot fail if we have already reserved a
2326     space for the page. */
2327     ut_ad(!has_done_reservation || block != nullptr);
2328 
2329     if (block == nullptr) {
2330       fsp_free_seg_inode(space_id, page_size, inode, mtr);
2331 
2332       goto funct_exit;
2333     }
2334 
2335     ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
2336 
2337     header = byte_offset + buf_block_get_frame(block);
2338     mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
2339                      FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
2340   }
2341 
2342   mlog_write_ulint(header + FSEG_HDR_OFFSET, page_offset(inode), MLOG_2BYTES,
2343                    mtr);
2344 
2345   mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
2346                    page_get_page_no(page_align(inode)), MLOG_4BYTES, mtr);
2347 
2348   mlog_write_ulint(header + FSEG_HDR_SPACE, space_id, MLOG_4BYTES, mtr);
2349 
2350 funct_exit:
2351   if (!has_done_reservation) {
2352     fil_space_release_free_extents(space_id, n_reserved);
2353   }
2354 
2355   return block;
2356 }
2357 
2358 /** Creates a new segment.
2359  @return the block where the segment header is placed, x-latched, NULL
2360  if could not create segment because of lack of space */
fseg_create(space_id_t space,page_no_t page,ulint byte_offset,mtr_t * mtr)2361 buf_block_t *fseg_create(
2362     space_id_t space,  /*!< in: space id */
2363     page_no_t page,    /*!< in: page where the segment header is
2364                        placed: if this is != 0, the page must belong
2365                        to another segment, if this is 0, a new page
2366                        will be allocated and it will belong to the
2367                        created segment */
2368     ulint byte_offset, /*!< in: byte offset of the created
2369                        segment header on the page */
2370     mtr_t *mtr)        /*!< in/out: mini-transaction */
2371 {
2372   return (fseg_create_general(space, page, byte_offset, FALSE, mtr));
2373 }
2374 
2375 /** Calculates the number of pages reserved by a segment, and how many
2376 pages are currently used.
2377 @param[in]   space_id   unique tablespace identifier
2378 @param[in]   page_size  Size of each page in the tablespace.
2379 @param[in]     inode     file segment inode pointer
2380 @param[out]    used      number of pages used (not more than reserved)
2381 @param[in,out] mtr       the mini transaction
2382 @return number of reserved pages */
fseg_n_reserved_pages_low(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * inode,ulint * used,mtr_t * mtr)2383 static ulint fseg_n_reserved_pages_low(space_id_t space_id,
2384                                        const page_size_t &page_size,
2385                                        fseg_inode_t *inode, ulint *used,
2386                                        mtr_t *mtr) {
2387   ulint ret;
2388   ut_ad(inode && used && mtr);
2389   ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2390 
2391   File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
2392 
2393   /* number of used segment pages in the FSEG_NOT_FULL list */
2394   uint32_t n_used_not_full = fseg_inode.read_not_full_n_used();
2395 
2396   /* total number of segment pages in the FSEG_NOT_FULL list */
2397   ulint n_total_not_full =
2398       FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL);
2399 
2400   /* n_used can be zero only if n_total is zero. */
2401   ut_ad(n_used_not_full > 0 || n_total_not_full == 0);
2402   ut_ad((n_used_not_full < n_total_not_full) ||
2403         ((n_used_not_full == 0) && (n_total_not_full == 0)));
2404 
2405   /* total number of pages in FSEG_FULL list. */
2406   ulint n_total_full = FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL);
2407 
2408   /* total number of pages in FSEG_FREE list. */
2409   ulint n_total_free = FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE);
2410 
2411   /* Number of fragment pages in the segment. */
2412   ulint n_frags = fseg_get_n_frag_pages(inode, mtr);
2413 
2414   *used = n_frags + n_total_full + n_used_not_full;
2415   ret = n_frags + n_total_full + n_total_free + n_total_not_full;
2416 
2417   ut_ad(*used <= ret);
2418   ut_ad((*used < ret) || ((n_used_not_full == 0) && (n_total_not_full == 0) &&
2419                           (n_total_free == 0)));
2420 
2421   return (ret);
2422 }
2423 
2424 /** Calculates the number of pages reserved by a segment, and how many pages are
2425  currently used.
2426  @return number of reserved pages */
fseg_n_reserved_pages(fseg_header_t * header,ulint * used,mtr_t * mtr)2427 ulint fseg_n_reserved_pages(
2428     fseg_header_t *header, /*!< in: segment header */
2429     ulint *used,           /*!< out: number of pages used (<= reserved) */
2430     mtr_t *mtr)            /*!< in/out: mini-transaction */
2431 {
2432   space_id_t space_id;
2433 
2434   space_id = page_get_space_id(page_align(header));
2435 
2436   fil_space_t *space = fil_space_get(space_id);
2437 
2438   mtr_x_lock_space(space, mtr);
2439 
2440   const page_size_t page_size(space->flags);
2441 
2442   fseg_inode_t *inode;
2443 
2444   inode = fseg_inode_get(header, space_id, page_size, mtr);
2445 
2446   return (fseg_n_reserved_pages_low(space_id, page_size, inode, used, mtr));
2447 }
2448 
2449 /** Tries to fill the free list of a segment with consecutive free extents.
2450 This happens if the segment is big enough to allow extents in the free list,
2451 the free list is empty, and the extents can be allocated consecutively from
2452 the hint onward.
2453 @param[in]	inode		segment inode
2454 @param[in]	space		space id
2455 @param[in]	page_size	page size
2456 @param[in]	hint		hint which extent would be good as the first
2457 extent
2458 @param[in,out]	mtr		mini-transaction */
fseg_fill_free_list(fseg_inode_t * inode,space_id_t space,const page_size_t & page_size,page_no_t hint,mtr_t * mtr)2459 static void fseg_fill_free_list(fseg_inode_t *inode, space_id_t space,
2460                                 const page_size_t &page_size, page_no_t hint,
2461                                 mtr_t *mtr) {
2462   xdes_t *descr;
2463   page_no_t i;
2464   ib_id_t seg_id;
2465   ulint reserved;
2466   ulint used;
2467 
2468   ut_ad(inode && mtr);
2469   ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2470   ut_d(fsp_space_modify_check(space, mtr));
2471 
2472   reserved = fseg_n_reserved_pages_low(space, page_size, inode, &used, mtr);
2473 
2474   if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
2475     /* The segment is too small to allow extents in free list */
2476 
2477     return;
2478   }
2479 
2480   if (flst_get_len(inode + FSEG_FREE) > 0) {
2481     /* Free list is not empty */
2482 
2483     return;
2484   }
2485 
2486   for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
2487     descr = xdes_get_descriptor(space, hint, page_size, mtr);
2488 
2489     if ((descr == nullptr) || (XDES_FREE != xdes_get_state(descr, mtr))) {
2490       /* We cannot allocate the desired extent: stop */
2491 
2492       return;
2493     }
2494 
2495     descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
2496 
2497     seg_id = mach_read_from_8(inode + FSEG_ID);
2498     ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2499     xdes_set_segment_id(descr, seg_id, XDES_FSEG, mtr);
2500 
2501     flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
2502     hint += FSP_EXTENT_SIZE;
2503   }
2504 }
2505 
2506 /** A fragment extent can be leased if it is the special kind that has a
2507 descriptor page and no other pages are being used except the descriptor
2508 and ibuf bitmap pages.  The number of used pages will be equal to
2509 XDES_FRAG_N_USED.
2510 @param[in]	descr		extent descriptor
2511 @param[in]	page_size	the page size
2512 @param[in,out]	mtr		mini transaction
2513 @return	true if the extent is leasable, false otherwise. */
2514 UNIV_INLINE
xdes_is_leasable(const xdes_t * descr,const page_size_t & page_size,mtr_t * mtr)2515 bool xdes_is_leasable(const xdes_t *descr, const page_size_t &page_size,
2516                       mtr_t *mtr) {
2517   ut_ad(descr && mtr);
2518   ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
2519 
2520   const page_no_t page_no = xdes_get_offset(descr);
2521   const bool has_xdes_page = !ut_2pow_remainder(page_no, page_size.physical());
2522 
2523   if (!has_xdes_page) {
2524     return (false);
2525   }
2526   /* Page 0 and 1 must not be free */
2527   if (xdes_mtr_get_bit(descr, XDES_FREE_BIT, 0, mtr) ||
2528       xdes_mtr_get_bit(descr, XDES_FREE_BIT, 1, mtr)) {
2529     return (false);
2530   }
2531 
2532   /* All other pages must be free */
2533   for (page_no_t i = 2; i < FSP_EXTENT_SIZE; ++i) {
2534     if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
2535       return (false);
2536     }
2537   }
2538 
2539   return (true);
2540 }
2541 
2542 /** Get the extent descriptor of the last fragmented extent from the
2543 free_frag list.
2544 @param[in]	header		tablespace header
2545 @param[in]	page_size	page size
2546 @param[in,out]	mtr		mini-transaction
2547 @return	the extent descriptor, or NULL if none */
fsp_get_last_free_frag_extent(fsp_header_t * header,const page_size_t & page_size,mtr_t * mtr)2548 static xdes_t *fsp_get_last_free_frag_extent(fsp_header_t *header,
2549                                              const page_size_t &page_size,
2550                                              mtr_t *mtr) {
2551   space_id_t space;
2552   fil_addr_t node;
2553   xdes_t *descr;
2554 
2555   node = flst_get_last(header + FSP_FREE_FRAG, mtr);
2556 
2557   if (fil_addr_is_null(node)) {
2558     return (nullptr);
2559   }
2560 
2561   space = mach_read_from_4(header + FSEG_HDR_SPACE);
2562   descr = xdes_lst_get_descriptor(space, page_size, node, mtr);
2563   ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
2564 
2565   return (descr);
2566 }
2567 
2568 /** Allocate an extent from free fragment extent to a segment.
2569 @param[in]	space		space id
2570 @param[in,out]	inode		segment to which extent is leased
2571 @param[in]	page_size	page size
2572 @param[in,out]	mtr		mini-transaction
2573 @return	extent descriptor or NULL */
fsp_alloc_xdes_free_frag(space_id_t space,fseg_inode_t * inode,const page_size_t & page_size,mtr_t * mtr)2574 static xdes_t *fsp_alloc_xdes_free_frag(space_id_t space, fseg_inode_t *inode,
2575                                         const page_size_t &page_size,
2576                                         mtr_t *mtr) {
2577   xdes_t *descr;
2578   ib_id_t seg_id;
2579   ulint n_used;
2580 
2581   ut_ad(mtr);
2582   ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
2583 
2584   fsp_header_t *header = fsp_get_space_header(space, page_size, mtr);
2585 
2586   /* If available, take an extent from the free_frag list. */
2587   if (!(descr = fsp_get_last_free_frag_extent(header, page_size, mtr))) {
2588     return (nullptr);
2589   }
2590 
2591   if (!xdes_is_leasable(descr, page_size, mtr)) {
2592     return (nullptr);
2593   }
2594   ut_ad(xdes_get_n_used(descr, mtr) == XDES_FRAG_N_USED);
2595 
2596   /* Remove from the FSP_FREE_FRAG list */
2597   flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
2598   n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
2599   mlog_write_ulint(header + FSP_FRAG_N_USED, n_used - XDES_FRAG_N_USED,
2600                    MLOG_4BYTES, mtr);
2601 
2602   /* Transition the extent (and its ownership) to the segment. */
2603   seg_id = mach_read_from_8(inode + FSEG_ID);
2604   xdes_set_segment_id(descr, seg_id, XDES_FSEG_FRAG, mtr);
2605 
2606   /* Add to the end of FSEG_NOT_FULL list. */
2607   flst_add_last(inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
2608 
2609   File_segment_inode fseg_inode(space, page_size, inode, mtr);
2610   n_used = fseg_inode.read_not_full_n_used();
2611   fseg_inode.write_not_full_n_used(
2612       static_cast<uint32_t>(n_used + XDES_FRAG_N_USED));
2613 
2614   return (descr);
2615 }
2616 
2617 /** Allocates a free extent for the segment: looks first in the free list of
2618 the segment, then tries to allocate from the space free list.
2619 NOTE that the extent returned still resides in the segment free list, it is
2620 not yet taken off it!
2621 @param[in]	inode		segment inode
2622 @param[in]	space		space id
2623 @param[in]	page_size	page size
2624 @param[in,out]	mtr		mini-transaction
2625 @retval NULL	if no page could be allocated
2626 @retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2627 (init_mtr == mtr, or the page was not previously freed in mtr)
2628 @retval block	(not allocated or initialized) otherwise */
fseg_alloc_free_extent(fseg_inode_t * inode,space_id_t space,const page_size_t & page_size,mtr_t * mtr)2629 static xdes_t *fseg_alloc_free_extent(fseg_inode_t *inode, space_id_t space,
2630                                       const page_size_t &page_size,
2631                                       mtr_t *mtr) {
2632   xdes_t *descr;
2633   ib_id_t seg_id;
2634   fil_addr_t first;
2635 
2636   ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2637   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2638   ut_d(fsp_space_modify_check(space, mtr));
2639 
2640   if (flst_get_len(inode + FSEG_FREE) > 0) {
2641     /* Segment free list is not empty, allocate from it */
2642 
2643     first = flst_get_first(inode + FSEG_FREE, mtr);
2644 
2645     descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
2646   } else {
2647     /* Segment free list was empty. */
2648 
2649     /* Check if we can allocate an extent from free frag
2650     list of tablespace. */
2651     descr = fsp_alloc_xdes_free_frag(space, inode, page_size, mtr);
2652 
2653     if (descr != nullptr) {
2654       return (descr);
2655     }
2656 
2657     /* Allocate from space */
2658     descr = fsp_alloc_free_extent(space, page_size, 0, mtr);
2659 
2660     if (descr == nullptr) {
2661       return (nullptr);
2662     }
2663 
2664     seg_id = mach_read_from_8(inode + FSEG_ID);
2665 
2666     xdes_set_segment_id(descr, seg_id, XDES_FSEG, mtr);
2667     flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
2668 
2669     /* Try to fill the segment free list */
2670     fseg_fill_free_list(inode, space, page_size,
2671                         xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
2672   }
2673 
2674   return (descr);
2675 }
2676 
2677 /** Allocates a single free page from a segment.
2678 This function implements the intelligent allocation strategy which tries to
2679 minimize file space fragmentation.
2680 @param[in,out]	space			tablespace
2681 @param[in]	page_size		page size
2682 @param[in,out]	seg_inode		segment inode
2683 @param[in]	hint			hint of which page would be desirable
2684 @param[in]	direction		if the new page is needed because of
2685 an index page split, and records are inserted there in order, into which
2686 direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
2687 @param[in]	rw_latch		RW_SX_LATCH, RW_X_LATCH
2688 @param[in,out]	mtr			mini-transaction
2689 @param[in,out]	init_mtr		mtr or another mini-transaction in
2690 which the page should be initialized. If init_mtr != mtr, but the page is
2691 already latched in mtr, do not initialize the page */
2692 #ifdef UNIV_DEBUG
2693 /**
2694 @param[in]	has_done_reservation	TRUE if the space has already been
2695 reserved, in this case we will never return NULL */
2696 #endif /* UNIV_DEBUG */
2697 /**
2698 @retval NULL	if no page could be allocated
2699 @retval block	rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2700 (init_mtr == mtr, or the page was not previously freed in mtr)
2701 @retval block	(not allocated or initialized) otherwise */
fseg_alloc_free_page_low(fil_space_t * space,const page_size_t & page_size,fseg_inode_t * seg_inode,page_no_t hint,byte direction,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr,ibool has_done_reservation)2702 static buf_block_t *fseg_alloc_free_page_low(fil_space_t *space,
2703                                              const page_size_t &page_size,
2704                                              fseg_inode_t *seg_inode,
2705                                              page_no_t hint, byte direction,
2706                                              rw_lock_type_t rw_latch,
2707                                              mtr_t *mtr, mtr_t *init_mtr
2708 #ifdef UNIV_DEBUG
2709                                              ,
2710                                              ibool has_done_reservation
2711 #endif /* UNIV_DEBUG */
2712 ) {
2713   fsp_header_t *space_header;
2714   ib_id_t seg_id;
2715   ulint used;
2716   ulint reserved;
2717   xdes_t *descr;      /*!< extent of the hinted page */
2718   page_no_t ret_page; /*!< the allocated page offset, FIL_NULL
2719                       if could not be allocated */
2720   xdes_t *ret_descr;  /*!< the extent of the allocated page */
2721   ulint n;
2722   const space_id_t space_id = space->id;
2723 
2724   ut_ad(mtr);
2725   ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
2726   ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2727   ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2728   ut_ad(space->purpose == FIL_TYPE_TEMPORARY ||
2729         space->purpose == FIL_TYPE_TABLESPACE);
2730 
2731   seg_id = mach_read_from_8(seg_inode + FSEG_ID);
2732 
2733   ut_ad(seg_id);
2734   ut_d(fsp_space_modify_check(space_id, mtr));
2735   ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
2736 
2737   reserved =
2738       fseg_n_reserved_pages_low(space_id, page_size, seg_inode, &used, mtr);
2739 
2740   space_header = fsp_get_space_header(space_id, page_size, mtr);
2741 
2742   descr = xdes_get_descriptor_with_space_hdr(space_header, space_id, hint, mtr);
2743   if (descr == nullptr) {
2744     /* Hint outside space or too high above free limit: reset
2745     hint */
2746     /* The file space header page is always allocated. */
2747     hint = 0;
2748     descr = xdes_get_descriptor(space_id, hint, page_size, mtr);
2749   }
2750 
2751   /* In the big if-else below we look for ret_page and ret_descr */
2752   /*-------------------------------------------------------------*/
2753   if (xdes_in_segment(descr, seg_id, mtr) &&
2754       (xdes_mtr_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) ==
2755        TRUE)) {
2756   take_hinted_page:
2757     /* 1. We can take the hinted page
2758     =================================*/
2759     ret_descr = descr;
2760     ret_page = hint;
2761     /* Skip the check for extending the tablespace. If the
2762     page hint were not within the size of the tablespace,
2763     we would have got (descr == NULL) above and reset the hint. */
2764     goto got_hinted_page;
2765     /*-----------------------------------------------------------*/
2766   } else if (xdes_get_state(descr, mtr) == XDES_FREE &&
2767              reserved - used < reserved / FSEG_FILLFACTOR &&
2768              used >= FSEG_FRAG_LIMIT) {
2769     /* 2. We allocate the free extent from space and can take
2770     =========================================================
2771     the hinted page
2772     ===============*/
2773     ret_descr = fsp_alloc_free_extent(space_id, page_size, hint, mtr);
2774 
2775     ut_a(ret_descr == descr);
2776 
2777     xdes_set_segment_id(ret_descr, seg_id, XDES_FSEG, mtr);
2778     flst_add_last(seg_inode + FSEG_FREE, ret_descr + XDES_FLST_NODE, mtr);
2779 
2780     /* Try to fill the segment free list */
2781     fseg_fill_free_list(seg_inode, space_id, page_size, hint + FSP_EXTENT_SIZE,
2782                         mtr);
2783     goto take_hinted_page;
2784     /*-----------------------------------------------------------*/
2785   } else if ((direction != FSP_NO_DIR) &&
2786              ((reserved - used) < reserved / FSEG_FILLFACTOR) &&
2787              (used >= FSEG_FRAG_LIMIT) &&
2788              (!!(ret_descr = fseg_alloc_free_extent(seg_inode, space_id,
2789                                                     page_size, mtr)))) {
2790     /* 3. We take any free extent (which was already assigned above
2791     ===============================================================
2792     in the if-condition to ret_descr) and take the lowest or
2793     ========================================================
2794     highest page in it, depending on the direction
2795     ==============================================*/
2796     ret_page = xdes_get_offset(ret_descr);
2797 
2798     if (direction == FSP_DOWN) {
2799       ret_page += FSP_EXTENT_SIZE - 1;
2800     } else if (xdes_get_state(ret_descr, mtr) == XDES_FSEG_FRAG) {
2801       ret_page += xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2802     }
2803 
2804     ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2805     /*-----------------------------------------------------------*/
2806   } else if (xdes_in_segment(descr, seg_id, mtr) &&
2807              (!xdes_is_full(descr, mtr))) {
2808     /* 4. We can take the page from the same extent as the
2809     ======================================================
2810     hinted page (and the extent already belongs to the
2811     ==================================================
2812     segment)
2813     ========*/
2814     ret_descr = descr;
2815     ret_page = xdes_get_offset(ret_descr) +
2816                xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
2817                              hint % FSP_EXTENT_SIZE, mtr);
2818     ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2819     /*-----------------------------------------------------------*/
2820   } else if (used < reserved) {
2821     /* 5. We take any unused page from the segment
2822     ==============================================*/
2823     fil_addr_t first;
2824 
2825     if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
2826       first = flst_get_first(seg_inode + FSEG_NOT_FULL, mtr);
2827     } else if (flst_get_len(seg_inode + FSEG_FREE) > 0) {
2828       first = flst_get_first(seg_inode + FSEG_FREE, mtr);
2829     } else {
2830       ut_ad(!has_done_reservation);
2831       return (nullptr);
2832     }
2833 
2834     ret_descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
2835     ret_page = xdes_get_offset(ret_descr) +
2836                xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2837     ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2838     /*-----------------------------------------------------------*/
2839   } else if (used < FSEG_FRAG_LIMIT) {
2840     /* 6. We allocate an individual page from the space
2841     ===================================================*/
2842     buf_block_t *block =
2843         fsp_alloc_free_page(space_id, page_size, hint, rw_latch, mtr, init_mtr);
2844 
2845     ut_ad(!has_done_reservation || block != nullptr);
2846 
2847     if (block != nullptr) {
2848       /* Put the page in the fragment page array of the
2849       segment */
2850       n = fseg_find_free_frag_page_slot(seg_inode, mtr);
2851       ut_a(n != ULINT_UNDEFINED);
2852 
2853       fseg_set_nth_frag_page_no(seg_inode, n, block->page.id.page_no(), mtr);
2854     }
2855 
2856     /* fsp_alloc_free_page() invoked fsp_init_file_page()
2857     already. */
2858     return (block);
2859     /*-----------------------------------------------------------*/
2860   } else {
2861     /* 7. We allocate a new extent and take its first page
2862     ======================================================*/
2863     ret_descr = fseg_alloc_free_extent(seg_inode, space_id, page_size, mtr);
2864 
2865     if (ret_descr == nullptr) {
2866       ret_page = FIL_NULL;
2867       ut_ad(!has_done_reservation);
2868     } else {
2869       const xdes_state_t state = xdes_get_state(ret_descr, mtr);
2870       ret_page = xdes_get_offset(ret_descr);
2871 
2872       if (state == XDES_FSEG_FRAG) {
2873         ret_page += xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2874       }
2875 
2876       ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2877     }
2878   }
2879 
2880   if (ret_page == FIL_NULL) {
2881     /* Page could not be allocated */
2882 
2883     ut_ad(!has_done_reservation);
2884     return (nullptr);
2885   }
2886 
2887   if (space->size <= ret_page && !fsp_is_system_or_temp_tablespace(space_id)) {
2888     /* It must be that we are extending a single-table
2889     tablespace whose size is still < 64 pages */
2890 
2891     if (ret_page >= FSP_EXTENT_SIZE) {
2892       ib::error(ER_IB_MSG_420)
2893           << "Error (2): trying to extend"
2894              " a single-table tablespace "
2895           << space_id << " by single page(s) though the"
2896           << " space size " << space->size << ". Page no " << ret_page << ".";
2897       ut_ad(!has_done_reservation);
2898       return (nullptr);
2899     }
2900 
2901     if (!fsp_try_extend_data_file_with_pages(space, ret_page, space_header,
2902                                              mtr)) {
2903       /* No disk space left */
2904       ut_ad(!has_done_reservation);
2905       return (nullptr);
2906     }
2907   }
2908 
2909 got_hinted_page:
2910   /* ret_descr == NULL if the block was allocated from free_frag
2911   (XDES_FREE_FRAG) */
2912   if (ret_descr != nullptr) {
2913     /* At this point we know the extent and the page offset.
2914     The extent is still in the appropriate list (FSEG_NOT_FULL
2915     or FSEG_FREE), and the page is not yet marked as used. */
2916 
2917     ut_ad(xdes_get_descriptor(space_id, ret_page, page_size, mtr) == ret_descr);
2918 
2919     ut_ad(xdes_mtr_get_bit(ret_descr, XDES_FREE_BIT, ret_page % FSP_EXTENT_SIZE,
2920                            mtr));
2921 
2922     fseg_mark_page_used(space_id, page_size, seg_inode, ret_page, ret_descr,
2923                         mtr);
2924   }
2925 
2926   /* Exclude Encryption flag as it might have been changed In Memory flags but
2927   not on disk. */
2928   ut_ad(!((space->flags ^ mach_read_from_4(FSP_SPACE_FLAGS + space_header)) &
2929           ~(FSP_FLAGS_MASK_ENCRYPTION)));
2930 
2931   return (fsp_page_create(page_id_t(space_id, ret_page), page_size, rw_latch,
2932                           mtr, init_mtr));
2933 }
2934 
2935 /** Allocates a single free page from a segment. This function implements
2936  the intelligent allocation strategy which tries to minimize file space
2937  fragmentation.
2938  @retval NULL if no page could be allocated
2939  @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2940  (init_mtr == mtr, or the page was not previously freed in mtr)
2941  @retval block (not allocated or initialized) otherwise */
fseg_alloc_free_page_general(fseg_header_t * seg_header,page_no_t hint,byte direction,ibool has_done_reservation,mtr_t * mtr,mtr_t * init_mtr)2942 buf_block_t *fseg_alloc_free_page_general(
2943     fseg_header_t *seg_header,  /*!< in/out: segment header */
2944     page_no_t hint,             /*!< in: hint of which page would be
2945                                 desirable */
2946     byte direction,             /*!< in: if the new page is needed because
2947                               of an index page split, and records are
2948                               inserted there in order, into which
2949                               direction they go alphabetically: FSP_DOWN,
2950                               FSP_UP, FSP_NO_DIR */
2951     ibool has_done_reservation, /*!< in: TRUE if the caller has
2952                   already done the reservation for the page
2953                   with fsp_reserve_free_extents, then there
2954                   is no need to do the check for this individual
2955                   page */
2956     mtr_t *mtr,                 /*!< in/out: mini-transaction */
2957     mtr_t *init_mtr)            /*!< in/out: mtr or another mini-transaction
2958                                in which the page should be initialized.
2959                                If init_mtr!=mtr, but the page is already
2960                                latched in mtr, do not initialize the page. */
2961 {
2962   fseg_inode_t *inode;
2963   space_id_t space_id;
2964   buf_block_t *iblock;
2965   buf_block_t *block;
2966   ulint n_reserved = 0;
2967 
2968   space_id = page_get_space_id(page_align(seg_header));
2969 
2970   fil_space_t *space = fil_space_get(space_id);
2971 
2972   mtr_x_lock_space(space, mtr);
2973 
2974   const page_size_t page_size(space->flags);
2975 
2976   if (rw_lock_get_x_lock_count(&space->latch) == 1) {
2977     /* This thread did not own the latch before this call: free
2978     excess pages from the insert buffer free list */
2979 
2980     if (space_id == IBUF_SPACE_ID) {
2981       ibuf_free_excess_pages();
2982     }
2983   }
2984 
2985   inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
2986   fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
2987 
2988   if (!has_done_reservation &&
2989       !fsp_reserve_free_extents(&n_reserved, space_id, 2, FSP_NORMAL, mtr)) {
2990     return (nullptr);
2991   }
2992 
2993   block = fseg_alloc_free_page_low(space, page_size, inode, hint, direction,
2994                                    RW_X_LATCH, mtr, init_mtr
2995 #ifdef UNIV_DEBUG
2996                                    ,
2997                                    has_done_reservation
2998 #endif /* UNIV_DEBUG */
2999   );
3000 
3001   /* The allocation cannot fail if we have already reserved a
3002   space for the page. */
3003   ut_ad(!has_done_reservation || block != nullptr);
3004 
3005   if (!has_done_reservation) {
3006     fil_space_release_free_extents(space_id, n_reserved);
3007   }
3008 
3009   return (block);
3010 }
3011 
3012 /** Check that we have at least n_pages frag pages free in the first extent
3013 of a single-table tablespace, and they are also physically initialized to
3014 the data file. That is we have already extended the data file so that those
3015 pages are inside the data file. If not, this function extends the tablespace
3016 with pages.
3017 @param[in,out]	space		tablespace
3018 @param[in,out]	space_header	tablespace header, x-latched
3019 @param[in]	size		size of the tablespace in pages,
3020 must be less than FSP_EXTENT_SIZE
3021 @param[in,out]	mtr		mini-transaction
3022 @param[in]	n_pages		number of pages to reserve
3023 @return true if there were at least n_pages free pages, or we were able
3024 to extend */
fsp_reserve_free_pages(fil_space_t * space,fsp_header_t * space_header,page_no_t size,mtr_t * mtr,page_no_t n_pages)3025 static bool fsp_reserve_free_pages(fil_space_t *space,
3026                                    fsp_header_t *space_header, page_no_t size,
3027                                    mtr_t *mtr, page_no_t n_pages) {
3028   xdes_t *descr;
3029 
3030   ut_a(!fsp_is_system_tablespace(space->id));
3031   ut_a(!fsp_is_global_temporary(space->id));
3032   ut_a(size < FSP_EXTENT_SIZE);
3033 
3034   descr = xdes_get_descriptor_with_space_hdr(space_header, space->id, 0, mtr);
3035   page_no_t n_used = xdes_get_n_used(descr, mtr);
3036 
3037   ut_a(n_used <= size);
3038 
3039   return (size >= n_used + n_pages ||
3040           fsp_try_extend_data_file_with_pages(space, n_used + n_pages - 1,
3041                                               space_header, mtr));
3042 }
3043 
3044 /** Reserves free pages from a tablespace. All mini-transactions which may
3045 use several pages from the tablespace should call this function beforehand
3046 and reserve enough free extents so that they certainly will be able
3047 to do their operation, like a B-tree page split, fully. Reservations
3048 must be released with function fil_space_release_free_extents!
3049 
3050 The alloc_type below has the following meaning: FSP_NORMAL means an
3051 operation which will probably result in more space usage, like an
3052 insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
3053 deleting rows, then this allocation will in the long run result in
3054 less space usage (after a purge); FSP_CLEANING means allocation done
3055 in a physical record delete (like in a purge) or other cleaning operation
3056 which will result in less space usage in the long run. We prefer the latter
3057 two types of allocation: when space is scarce, FSP_NORMAL allocations
3058 will not succeed, but the latter two allocations will succeed, if possible.
3059 The purpose is to avoid dead end where the database is full but the
3060 user cannot free any space because these freeing operations temporarily
3061 reserve some space.
3062 
3063 Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
3064 case. In this function we would liberally reserve several extents for
3065 every page split or merge in a B-tree. But we do not want to waste disk space
3066 if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
3067 different rules in that special case, just ensuring that there are n_pages
3068 free pages available.
3069 
3070 @param[out]	n_reserved	number of extents actually reserved; if we
3071                                 return true and the tablespace size is <
3072                                 FSP_EXTENT_SIZE pages, then this can be 0,
3073                                 otherwise it is n_ext
3074 @param[in]	space_id	tablespace identifier
3075 @param[in]	n_ext		number of extents to reserve
3076 @param[in]	alloc_type	page reservation type (FSP_BLOB, etc)
3077 @param[in,out]	mtr		the mini transaction
3078 @param[in]	n_pages		for small tablespaces (tablespace size is
3079                                 less than FSP_EXTENT_SIZE), number of free
3080                                 pages to reserve.
3081 @return true if we were able to make the reservation */
fsp_reserve_free_extents(ulint * n_reserved,space_id_t space_id,ulint n_ext,fsp_reserve_t alloc_type,mtr_t * mtr,page_no_t n_pages)3082 bool fsp_reserve_free_extents(ulint *n_reserved, space_id_t space_id,
3083                               ulint n_ext, fsp_reserve_t alloc_type, mtr_t *mtr,
3084                               page_no_t n_pages) {
3085   fsp_header_t *space_header;
3086   ulint n_free_list_ext;
3087   page_no_t free_limit;
3088   page_no_t size;
3089   ulint n_free;
3090   ulint n_free_up;
3091   ulint reserve;
3092   DBUG_TRACE;
3093 
3094   *n_reserved = n_ext;
3095 
3096   fil_space_t *space = fil_space_get(space_id);
3097 
3098   mtr_x_lock_space(space, mtr);
3099 
3100   const page_size_t page_size(space->flags);
3101 
3102   space_header = fsp_get_space_header(space_id, page_size, mtr);
3103 try_again:
3104   size = mach_read_from_4(space_header + FSP_SIZE);
3105   ut_ad(size == space->size_in_header);
3106 
3107   if (size < FSP_EXTENT_SIZE && n_pages < FSP_EXTENT_SIZE / 2) {
3108     /* Use different rules for small single-table tablespaces */
3109     *n_reserved = 0;
3110     return fsp_reserve_free_pages(space, space_header, size, mtr, n_pages);
3111   }
3112 
3113   n_free_list_ext = flst_get_len(space_header + FSP_FREE);
3114   ut_ad(space->free_len == n_free_list_ext);
3115 
3116   free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
3117   ut_ad(space->free_limit == free_limit);
3118 
3119   /* Below we play safe when counting free extents above the free limit:
3120   some of them will contain extent descriptor pages, and therefore
3121   will not be free extents */
3122 
3123   if (size >= free_limit) {
3124     n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
3125   } else {
3126     ut_ad(alloc_type == FSP_BLOB);
3127     n_free_up = 0;
3128   }
3129 
3130   if (n_free_up > 0) {
3131     n_free_up--;
3132     n_free_up -= n_free_up / (page_size.physical() / FSP_EXTENT_SIZE);
3133   }
3134 
3135   n_free = n_free_list_ext + n_free_up;
3136 
3137   switch (alloc_type) {
3138     case FSP_NORMAL:
3139       /* We reserve 1 extent + 0.5 % of the space size to undo logs
3140       and 1 extent + 0.5 % to cleaning operations; NOTE: this source
3141       code is duplicated in the function below! */
3142 
3143       reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
3144 
3145       if (n_free <= reserve + n_ext) {
3146         goto try_to_extend;
3147       }
3148       break;
3149     case FSP_UNDO:
3150       /* We reserve 0.5 % of the space size to cleaning operations */
3151 
3152       reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
3153 
3154       if (n_free <= reserve + n_ext) {
3155         goto try_to_extend;
3156       }
3157       break;
3158     case FSP_CLEANING:
3159     case FSP_BLOB:
3160       break;
3161     default:
3162       ut_error;
3163   }
3164 
3165   if (fil_space_reserve_free_extents(space_id, n_free, n_ext)) {
3166     return true;
3167   }
3168 try_to_extend:
3169   if (fsp_try_extend_data_file(space, space_header, mtr)) {
3170     goto try_again;
3171   }
3172 
3173   return false;
3174 }
3175 
3176 /** Calculate how many KiB of new data we will be able to insert to the
3177 tablespace without running out of space.
3178 @param[in]	space_id	tablespace ID
3179 @return available space in KiB
3180 @retval UINTMAX_MAX if unknown */
fsp_get_available_space_in_free_extents(space_id_t space_id)3181 uintmax_t fsp_get_available_space_in_free_extents(space_id_t space_id) {
3182   fil_space_t *space = fil_space_acquire(space_id);
3183 
3184   if (space == nullptr) {
3185     return (UINTMAX_MAX);
3186   }
3187 
3188   auto n_free_extents = fsp_get_available_space_in_free_extents(space);
3189 
3190   fil_space_release(space);
3191 
3192   return (n_free_extents);
3193 }
3194 
3195 /** Calculate how many KiB of new data we will be able to insert to the
3196 tablespace without running out of space. Start with a space object that has
3197 been acquired by the caller who holds it for the calculation,
3198 @param[in]	space		tablespace object from fil_space_acquire()
3199 @return available space in KiB */
fsp_get_available_space_in_free_extents(const fil_space_t * space)3200 uintmax_t fsp_get_available_space_in_free_extents(const fil_space_t *space) {
3201   ut_ad(space->n_pending_ops > 0);
3202 
3203   ulint size_in_header = space->size_in_header;
3204   if (size_in_header < FSP_EXTENT_SIZE) {
3205     return (0); /* TODO: count free frag pages and
3206                 return a value based on that */
3207   }
3208 
3209   /* Below we play safe when counting free extents above the free limit:
3210   some of them will contain extent descriptor pages, and therefore
3211   will not be free extents */
3212   ut_ad(size_in_header >= space->free_limit);
3213   ulint n_free_up = (size_in_header - space->free_limit) / FSP_EXTENT_SIZE;
3214 
3215   page_size_t page_size(space->flags);
3216   if (n_free_up > 0) {
3217     n_free_up--;
3218     n_free_up -= n_free_up / (page_size.physical() / FSP_EXTENT_SIZE);
3219   }
3220 
3221   /* We reserve 1 extent + 0.5 % of the space size to undo logs
3222   and 1 extent + 0.5 % to cleaning operations; NOTE: this source
3223   code is duplicated in the function above! */
3224 
3225   ulint reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
3226   ulint n_free = space->free_len + n_free_up;
3227 
3228   if (reserve > n_free) {
3229     return (0);
3230   }
3231 
3232   return (static_cast<uintmax_t>(n_free - reserve) * FSP_EXTENT_SIZE *
3233           (page_size.physical() / 1024));
3234 }
3235 
fseg_mark_page_used(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * seg_inode,page_no_t page,xdes_t * descr,mtr_t * mtr)3236 static void fseg_mark_page_used(space_id_t space_id,
3237                                 const page_size_t &page_size,
3238                                 fseg_inode_t *seg_inode, page_no_t page,
3239                                 xdes_t *descr, mtr_t *mtr) {
3240   uint32_t not_full_n_used;
3241 
3242   ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
3243   ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3244   ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3245 
3246   ut_ad(mach_read_from_8(seg_inode + FSEG_ID) ==
3247         xdes_get_segment_id(descr, mtr));
3248 
3249   if (xdes_is_free(descr, mtr)) {
3250     /* We move the extent from the free list to the
3251     NOT_FULL list */
3252     flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
3253     flst_add_last(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3254   }
3255 
3256   ut_ad(xdes_mtr_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr));
3257 
3258   /* We mark the page as used */
3259   xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
3260 
3261   File_segment_inode fseg_inode(space_id, page_size, seg_inode, mtr);
3262 
3263   not_full_n_used = fseg_inode.read_not_full_n_used();
3264   not_full_n_used++;
3265   fseg_inode.write_not_full_n_used(not_full_n_used);
3266 
3267   if (xdes_is_full(descr, mtr)) {
3268     /* We move the extent from the NOT_FULL list to the
3269     FULL list */
3270     flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3271     flst_add_last(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3272 
3273     ut_ad(not_full_n_used >= FSP_EXTENT_SIZE);
3274     fseg_inode.write_not_full_n_used(not_full_n_used - FSP_EXTENT_SIZE);
3275   }
3276 }
3277 
3278 /** Frees a single page of a segment.
3279 @param[in]	seg_inode	segment inode
3280 @param[in]	page_id		page id
3281 @param[in]	page_size	page size
3282 @param[in]	ahi		whether we may need to drop the adaptive
3283 hash index
3284 @param[in,out]	mtr		mini-transaction */
fseg_free_page_low(fseg_inode_t * seg_inode,const page_id_t & page_id,const page_size_t & page_size,bool ahi,mtr_t * mtr)3285 static void fseg_free_page_low(fseg_inode_t *seg_inode,
3286                                const page_id_t &page_id,
3287                                const page_size_t &page_size, bool ahi,
3288                                mtr_t *mtr) {
3289   xdes_t *descr;
3290   uint32_t not_full_n_used;
3291   ib_id_t descr_id;
3292   ib_id_t seg_id;
3293   ulint i;
3294   DBUG_TRACE;
3295 
3296   ut_ad(seg_inode != nullptr);
3297   ut_ad(mtr != nullptr);
3298   ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3299   ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3300   ut_d(fsp_space_modify_check(page_id.space(), mtr));
3301 
3302   /* Drop search system page hash index if the page is found in
3303   the pool and is hashed */
3304 
3305   if (ahi) {
3306     btr_search_drop_page_hash_when_freed(page_id, page_size);
3307   }
3308 
3309   descr =
3310       xdes_get_descriptor(page_id.space(), page_id.page_no(), page_size, mtr);
3311 
3312   if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
3313                        page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
3314     fputs("InnoDB: Dump of the tablespace extent descriptor: ", stderr);
3315     ut_print_buf(stderr, descr, 40);
3316 
3317     ib::error(ER_IB_MSG_421) << "InnoDB is trying to free page " << page_id
3318                              << " though it is already marked as free in the"
3319                                 " tablespace! The tablespace free space info is"
3320                                 " corrupt. You may need to dump your tables and"
3321                                 " recreate the whole database!";
3322   crash:
3323     ib::fatal(ER_IB_MSG_422) << FORCE_RECOVERY_MSG;
3324   }
3325 
3326   xdes_state_t state = xdes_get_state(descr, mtr);
3327 
3328   switch (state) {
3329     case XDES_FSEG:
3330     case XDES_FSEG_FRAG:
3331       /* The page belongs to a segment */
3332       break;
3333     case XDES_FREE_FRAG:
3334     case XDES_FULL_FRAG:
3335       /* The page is in the fragment pages of the segment */
3336 
3337       for (i = 0;; i++) {
3338         if (fseg_get_nth_frag_page_no(seg_inode, i, mtr) == page_id.page_no()) {
3339           fseg_set_nth_frag_page_no(seg_inode, i, FIL_NULL, mtr);
3340           break;
3341         }
3342       }
3343 
3344       fsp_free_page(page_id, page_size, mtr);
3345 
3346       return;
3347     case XDES_FREE:
3348     case XDES_NOT_INITED:
3349       ut_error;
3350   }
3351 
3352   /* If we get here, the page is in some extent of the segment */
3353   File_segment_inode fseg_inode(page_id.space(), page_size, seg_inode, mtr);
3354 
3355   descr_id = xdes_get_segment_id(descr);
3356   seg_id = mach_read_from_8(seg_inode + FSEG_ID);
3357 
3358   if (UNIV_UNLIKELY(descr_id != seg_id)) {
3359     fputs("InnoDB: Dump of the tablespace extent descriptor: ", stderr);
3360     ut_print_buf(stderr, descr, 40);
3361     fputs("\nInnoDB: Dump of the segment inode: ", stderr);
3362     ut_print_buf(stderr, seg_inode, 40);
3363     putc('\n', stderr);
3364 
3365     ib::error(ER_IB_MSG_423)
3366         << "InnoDB is trying to free page " << page_id
3367         << ", which does not belong to segment " << descr_id
3368         << " but belongs to segment " << seg_id << ".";
3369     goto crash;
3370   }
3371 
3372   not_full_n_used = fseg_inode.read_not_full_n_used();
3373   if (xdes_is_full(descr, mtr)) {
3374     /* The fragment is full: move it to another list */
3375     flst_remove(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3376     flst_add_last(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3377     not_full_n_used += FSP_EXTENT_SIZE - 1;
3378   } else {
3379     ut_a(not_full_n_used > 0);
3380     not_full_n_used -= 1;
3381   }
3382 
3383   const page_no_t bit = page_id.page_no() % FSP_EXTENT_SIZE;
3384 
3385   xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
3386   xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
3387 
3388   page_no_t n_used = xdes_get_n_used(descr, mtr);
3389 
3390   ut_ad(state != XDES_FSEG_FRAG || (bit != 0 && bit != 1));
3391   ut_ad(state != XDES_FSEG_FRAG || n_used > 1);
3392   ut_ad(xdes_is_leasable(descr, page_size, mtr) ==
3393         (state == XDES_FSEG_FRAG && n_used == XDES_FRAG_N_USED));
3394 
3395   /* A leased fragment extent might have no more pages belonging to
3396   the segment.*/
3397   if (state == XDES_FSEG_FRAG && n_used == XDES_FRAG_N_USED) {
3398     n_used = 0;
3399 
3400     ut_ad(not_full_n_used >= XDES_FRAG_N_USED);
3401     not_full_n_used -= XDES_FRAG_N_USED;
3402   }
3403 
3404   if (n_used == 0) {
3405     /* The extent has become free: free it to space */
3406     flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3407     fsp_free_extent(page_id, page_size, mtr);
3408   }
3409 
3410   /* Update the FSEG_NOT_FULL_N_USED field after modifying the list. */
3411   fseg_inode.write_not_full_n_used(not_full_n_used);
3412 }
3413 
3414 /** Frees a single page of a segment. */
fseg_free_page(fseg_header_t * seg_header,space_id_t space_id,page_no_t page,bool ahi,mtr_t * mtr)3415 void fseg_free_page(fseg_header_t *seg_header, /*!< in: segment header */
3416                     space_id_t space_id,       /*!< in: space id */
3417                     page_no_t page,            /*!< in: page offset */
3418                     bool ahi,   /*!< in: whether we may need to drop
3419                                 the adaptive hash index */
3420                     mtr_t *mtr) /*!< in/out: mini-transaction */
3421 {
3422   DBUG_TRACE;
3423   fseg_inode_t *seg_inode;
3424   buf_block_t *iblock;
3425 
3426   fil_space_t *space = fil_space_get(space_id);
3427 
3428   mtr_x_lock_space(space, mtr);
3429 
3430   const page_size_t page_size(space->flags);
3431 
3432   DBUG_LOG("fseg_free_page", "space_id: " << space_id << ", page_no: " << page);
3433 
3434   seg_inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
3435   fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3436 
3437   const page_id_t page_id(space_id, page);
3438 
3439   fseg_free_page_low(seg_inode, page_id, page_size, ahi, mtr);
3440 
3441   ut_d(buf_page_set_file_page_was_freed(page_id));
3442 }
3443 
3444 /** Checks if a single page of a segment is free.
3445  @return true if free */
fseg_page_is_free(fseg_header_t * seg_header,space_id_t space_id,page_no_t page)3446 bool fseg_page_is_free(fseg_header_t *seg_header, /*!< in: segment header */
3447                        space_id_t space_id,       /*!< in: space id */
3448                        page_no_t page)            /*!< in: page offset */
3449 {
3450   mtr_t mtr;
3451   ibool is_free;
3452   xdes_t *descr;
3453   fseg_inode_t *seg_inode;
3454 
3455   fil_space_t *space = fil_space_get(space_id);
3456 
3457   mtr_start(&mtr);
3458 
3459   mtr_x_lock_space(space, &mtr);
3460 
3461   const page_size_t page_size(space->flags);
3462 
3463   seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);
3464 
3465   ut_a(seg_inode);
3466   ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3467   ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3468 
3469   descr = xdes_get_descriptor(space_id, page, page_size, &mtr);
3470   ut_a(descr);
3471 
3472   is_free =
3473       xdes_mtr_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
3474 
3475   mtr_commit(&mtr);
3476 
3477   return (is_free);
3478 }
3479 
3480 /** Frees an extent of a segment to the space free list.
3481 @param[in]	seg_inode	segment inode
3482 @param[in]	space		space id
3483 @param[in]	page		a page in the extent
3484 @param[in]	page_size	page size
3485 @param[in]	ahi		whether we may need to drop the adaptive hash
3486                                 index
3487 @param[in,out]	mtr		mini-transaction */
fseg_free_extent(fseg_inode_t * seg_inode,space_id_t space,const page_size_t & page_size,page_no_t page,bool ahi,mtr_t * mtr)3488 static void fseg_free_extent(fseg_inode_t *seg_inode, space_id_t space,
3489                              const page_size_t &page_size, page_no_t page,
3490                              bool ahi, mtr_t *mtr) {
3491   page_no_t first_page_in_extent;
3492   xdes_t *descr;
3493   page_no_t i;
3494   File_segment_inode fseg_inode(space, page_size, seg_inode, mtr);
3495 
3496   ut_ad(seg_inode != nullptr);
3497   ut_ad(mtr != nullptr);
3498 
3499   descr = xdes_get_descriptor(space, page, page_size, mtr);
3500 
3501   const xdes_state_t state = xdes_get_state(descr, mtr);
3502   ut_a(state == XDES_FSEG || state == XDES_FSEG_FRAG);
3503 
3504   ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
3505   ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3506   ut_d(fsp_space_modify_check(space, mtr));
3507 
3508   first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
3509 
3510   if (ahi) {
3511     for (i = state == XDES_FSEG ? 0 : XDES_FRAG_N_USED; i < FSP_EXTENT_SIZE;
3512          i++) {
3513       if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
3514         /* Drop search system page hash index
3515         if the page is found in the pool and
3516         is hashed */
3517 
3518         btr_search_drop_page_hash_when_freed(
3519             page_id_t(space, first_page_in_extent + i), page_size);
3520       }
3521     }
3522   }
3523 
3524   if (xdes_is_full(descr, mtr)) {
3525     flst_remove(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3526   } else if (xdes_is_free(descr, mtr)) {
3527     flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
3528   } else {
3529     flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3530 
3531     page_no_t not_full_n_used = fseg_inode.read_not_full_n_used();
3532 
3533     page_no_t descr_n_used = xdes_get_n_used(descr, mtr);
3534     ut_a(not_full_n_used >= descr_n_used);
3535     fseg_inode.write_not_full_n_used(not_full_n_used - descr_n_used);
3536   }
3537 
3538   fsp_free_extent(page_id_t(space, page), page_size, mtr);
3539 
3540 #ifdef UNIV_DEBUG
3541   for (i = state == XDES_FSEG ? 0 : XDES_FRAG_N_USED; i < FSP_EXTENT_SIZE;
3542        i++) {
3543     buf_page_set_file_page_was_freed(
3544         page_id_t(space, first_page_in_extent + i));
3545   }
3546 #endif /* UNIV_DEBUG */
3547 }
3548 
3549 /** Frees part of a segment. This function can be used to free a segment by
3550  repeatedly calling this function in different mini-transactions. Doing
3551  the freeing in a single mini-transaction might result in too big a
3552  mini-transaction.
3553  @return true if freeing completed */
fseg_free_step(fseg_header_t * header,bool ahi,mtr_t * mtr)3554 ibool fseg_free_step(
3555     fseg_header_t *header, /*!< in, own: segment header; NOTE: if the header
3556                            resides on the first page of the frag list
3557                            of the segment, this pointer becomes obsolete
3558                            after the last freeing step */
3559     bool ahi,              /*!< in: whether we may need to drop
3560                            the adaptive hash index */
3561     mtr_t *mtr)            /*!< in/out: mini-transaction */
3562 {
3563   ulint n;
3564   page_no_t page;
3565   xdes_t *descr;
3566   fseg_inode_t *inode;
3567   space_id_t space_id;
3568   page_no_t header_page;
3569 
3570   DBUG_TRACE;
3571 
3572   space_id = page_get_space_id(page_align(header));
3573   header_page = page_get_page_no(page_align(header));
3574 
3575   fil_space_t *space = fil_space_get(space_id);
3576 
3577   mtr_x_lock_space(space, mtr);
3578 
3579   const page_size_t page_size(space->flags);
3580 
3581   descr = xdes_get_descriptor(space_id, header_page, page_size, mtr);
3582 
3583   /* Check that the header resides on a page which has not been
3584   freed yet */
3585 
3586   ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, header_page % FSP_EXTENT_SIZE,
3587                         mtr) == FALSE);
3588   buf_block_t *iblock;
3589 
3590   inode = fseg_inode_try_get(header, space_id, page_size, mtr, &iblock);
3591 
3592   if (inode == nullptr) {
3593     ib::info(ER_IB_MSG_424)
3594         << "Double free of inode from " << page_id_t(space_id, header_page);
3595     return TRUE;
3596   }
3597 
3598   fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3599   descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
3600 
3601   if (descr != nullptr) {
3602     /* Free the extent held by the segment */
3603     page = xdes_get_offset(descr);
3604 
3605     fseg_free_extent(inode, space_id, page_size, page, ahi, mtr);
3606 
3607     return FALSE;
3608   }
3609 
3610   /* Free a frag page */
3611   n = fseg_find_last_used_frag_page_slot(inode, mtr);
3612 
3613   if (n == ULINT_UNDEFINED) {
3614     /* Freeing completed: free the segment inode */
3615     fsp_free_seg_inode(space_id, page_size, inode, mtr);
3616 
3617     return TRUE;
3618   }
3619 
3620   fseg_free_page_low(
3621       inode, page_id_t(space_id, fseg_get_nth_frag_page_no(inode, n, mtr)),
3622       page_size, ahi, mtr);
3623 
3624   n = fseg_find_last_used_frag_page_slot(inode, mtr);
3625 
3626   if (n == ULINT_UNDEFINED) {
3627     /* Freeing completed: free the segment inode */
3628     fsp_free_seg_inode(space_id, page_size, inode, mtr);
3629 
3630     return TRUE;
3631   }
3632 
3633   return FALSE;
3634 }
3635 
3636 /** Frees part of a segment. Differs from fseg_free_step because this function
3637  leaves the header page unfreed.
3638  @return true if freeing completed, except the header page */
fseg_free_step_not_header(fseg_header_t * header,bool ahi,mtr_t * mtr)3639 ibool fseg_free_step_not_header(
3640     fseg_header_t *header, /*!< in: segment header which must reside on
3641                            the first fragment page of the segment */
3642     bool ahi,              /*!< in: whether we may need to drop
3643                            the adaptive hash index */
3644     mtr_t *mtr)            /*!< in/out: mini-transaction */
3645 {
3646   ulint n;
3647   xdes_t *descr;
3648   fseg_inode_t *inode;
3649   space_id_t space_id;
3650   page_no_t page_no;
3651 
3652   space_id = page_get_space_id(page_align(header));
3653 
3654   fil_space_t *space = fil_space_get(space_id);
3655 
3656   mtr_x_lock_space(space, mtr);
3657 
3658   const page_size_t page_size(space->flags);
3659   buf_block_t *iblock;
3660 
3661   inode = fseg_inode_get(header, space_id, page_size, mtr, &iblock);
3662   fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3663 
3664   descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
3665 
3666   if (descr != nullptr) {
3667     /* Free the extent held by the segment */
3668     page_no = xdes_get_offset(descr);
3669 
3670     fseg_free_extent(inode, space_id, page_size, page_no, ahi, mtr);
3671 
3672     return (FALSE);
3673   }
3674 
3675   /* Free a frag page */
3676 
3677   n = fseg_find_last_used_frag_page_slot(inode, mtr);
3678 
3679   if (n == ULINT_UNDEFINED) {
3680     ut_error;
3681   }
3682 
3683   page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
3684 
3685   if (page_no == page_get_page_no(page_align(header))) {
3686     return (TRUE);
3687   }
3688 
3689   fseg_free_page_low(inode, page_id_t(space_id, page_no), page_size, ahi, mtr);
3690 
3691   return (FALSE);
3692 }
3693 
3694 /** Returns the first extent descriptor for a segment.
3695 We think of the extent lists of the segment catenated in the order
3696 FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
3697 @param[in]	inode		segment inode
3698 @param[in]	space_id	space id
3699 @param[in]	page_size	page size
3700 @param[in,out]	mtr		mini-transaction
3701 @return the first extent descriptor, or NULL if none */
fseg_get_first_extent(fseg_inode_t * inode,space_id_t space_id,const page_size_t & page_size,mtr_t * mtr)3702 static xdes_t *fseg_get_first_extent(fseg_inode_t *inode, space_id_t space_id,
3703                                      const page_size_t &page_size, mtr_t *mtr) {
3704   fil_addr_t first;
3705   xdes_t *descr;
3706 
3707   ut_ad(inode && mtr);
3708 
3709   ut_ad(space_id == page_get_space_id(page_align(inode)));
3710   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3711 
3712   first = fil_addr_null;
3713 
3714   if (flst_get_len(inode + FSEG_FULL) > 0) {
3715     first = flst_get_first(inode + FSEG_FULL, mtr);
3716 
3717   } else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) {
3718     first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
3719 
3720   } else if (flst_get_len(inode + FSEG_FREE) > 0) {
3721     first = flst_get_first(inode + FSEG_FREE, mtr);
3722   }
3723 
3724   if (first.page == FIL_NULL) {
3725     return (nullptr);
3726   }
3727   descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
3728 
3729   return (descr);
3730 }
3731 
3732 #ifdef UNIV_BTR_PRINT
3733 /** Writes info of a segment. */
fseg_print_low(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * inode,mtr_t * mtr)3734 static void fseg_print_low(space_id_t space_id, const page_size_t &page_size,
3735                            fseg_inode_t *inode, /*!< in: segment inode */
3736                            mtr_t *mtr)          /*!< in/out: mini-transaction */
3737 {
3738   space_id_t space;
3739   ulint n_used;
3740   ulint n_frag;
3741   ulint n_free;
3742   ulint n_not_full;
3743   ulint n_full;
3744   ulint reserved;
3745   ulint used;
3746   page_no_t page_no;
3747   ib_id_t seg_id;
3748   File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
3749 
3750   ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
3751   space = page_get_space_id(page_align(inode));
3752   page_no = page_get_page_no(page_align(inode));
3753 
3754   reserved = fseg_n_reserved_pages_low(space_id, page_size, inode, &used, mtr);
3755 
3756   seg_id = mach_read_from_8(inode + FSEG_ID);
3757 
3758   n_used = fseg_inode.read_not_full_n_used();
3759   n_frag = fseg_get_n_frag_pages(inode, mtr);
3760   n_free = flst_get_len(inode + FSEG_FREE);
3761   n_not_full = flst_get_len(inode + FSEG_NOT_FULL);
3762   n_full = flst_get_len(inode + FSEG_FULL);
3763 
3764   ib::info(ER_IB_MSG_425) << "SEGMENT id " << seg_id << " space " << space
3765                           << ";"
3766                           << " page " << page_no << ";"
3767                           << " res " << reserved << " used " << used << ";"
3768                           << " full ext " << n_full << ";"
3769                           << " fragm pages " << n_frag << ";"
3770                           << " free extents " << n_free << ";"
3771                           << " not full extents " << n_not_full << ": pages "
3772                           << n_used;
3773 
3774   ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3775 }
3776 
3777 /** Writes info of a segment. */
fseg_print(fseg_header_t * header,mtr_t * mtr)3778 void fseg_print(fseg_header_t *header, /*!< in: segment header */
3779                 mtr_t *mtr)            /*!< in/out: mini-transaction */
3780 {
3781   fseg_inode_t *inode;
3782   space_id_t space_id;
3783 
3784   space_id = page_get_space_id(page_align(header));
3785 
3786   fil_space_t *space = fil_space_get();
3787 
3788   mtr_x_lock_space(space, mtr);
3789 
3790   const page_size_t page_size(space->flags);
3791 
3792   inode = fseg_inode_get(header, space_id, page_size, mtr);
3793 
3794   fseg_print_low(space_id, page_size, inode, mtr);
3795 }
3796 #endif /* UNIV_BTR_PRINT */
3797 
3798 /** Retrieve tablespace dictionary index root page number stored in the
3799 page 0
3800 @param[in]	space		tablespace id
3801 @param[in]	page_size	page size
3802 @param[in,out]	mtr		mini-transaction
3803 @return root page num of the tablspace dictionary index copy */
fsp_sdi_get_root_page_num(space_id_t space,const page_size_t & page_size,mtr_t * mtr)3804 page_no_t fsp_sdi_get_root_page_num(space_id_t space,
3805                                     const page_size_t &page_size, mtr_t *mtr) {
3806   ut_ad(mtr != nullptr);
3807 
3808   buf_block_t *block =
3809       buf_page_get(page_id_t(space, 0), page_size, RW_S_LATCH, mtr);
3810   buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
3811 
3812   page_t *page = buf_block_get_frame(block);
3813 
3814   ulint sdi_offset = fsp_header_get_sdi_offset(page_size);
3815 
3816   uint32_t sdi_ver = mach_read_from_4(page + sdi_offset);
3817 
3818   if (sdi_ver != SDI_VERSION) {
3819     ib::warn(ER_IB_MSG_426) << "SDI version mismatch. Expected: " << SDI_VERSION
3820                             << " Current version: " << sdi_ver;
3821   }
3822   ut_ad(sdi_ver == SDI_VERSION);
3823 
3824   page_no_t root = mach_read_from_4(page + sdi_offset + 4);
3825 
3826   ut_ad(root > 2);
3827 
3828   return (root);
3829 }
3830 
3831 /** Write SDI Index root page num to page 0 of tablespace.
3832 @param[in,out]	page		page 0 frame
3833 @param[in]	page_size	size of page
3834 @param[in]	root_page_num	root page number of SDI
3835 @param[in,out]	mtr		mini-transaction */
fsp_sdi_write_root_to_page(page_t * page,const page_size_t & page_size,page_no_t root_page_num,mtr_t * mtr)3836 void fsp_sdi_write_root_to_page(page_t *page, const page_size_t &page_size,
3837                                 page_no_t root_page_num, mtr_t *mtr) {
3838   ut_ad(page_get_page_no(page) == 0);
3839 
3840   ulint sdi_offset = fsp_header_get_sdi_offset(page_size);
3841 
3842   /* Write SDI version here. */
3843   mlog_write_ulint(page + sdi_offset, SDI_VERSION, MLOG_4BYTES, mtr);
3844 
3845   /* Write SDI root page number */
3846   mlog_write_ulint(page + sdi_offset + 4, root_page_num, MLOG_4BYTES, mtr);
3847 }
3848 
3849 #ifdef UNIV_DEBUG
3850 /** Print the file segment header to the given output stream.
3851 @param[in]	out	the output stream into which the object is printed.
3852 @retval	the output stream into which the object was printed. */
to_stream(std::ostream & out) const3853 std::ostream &fseg_header::to_stream(std::ostream &out) const {
3854   const space_id_t space =
3855       mtr_read_ulint(m_header + FSEG_HDR_SPACE, MLOG_4BYTES, m_mtr);
3856 
3857   const page_no_t page_no =
3858       mtr_read_ulint(m_header + FSEG_HDR_PAGE_NO, MLOG_4BYTES, m_mtr);
3859 
3860   const ulint offset =
3861       mtr_read_ulint(m_header + FSEG_HDR_OFFSET, MLOG_2BYTES, m_mtr);
3862 
3863   out << "[fseg_header_t: space=" << space << ", page=" << page_no
3864       << ", offset=" << offset << "]";
3865 
3866   return (out);
3867 }
3868 #endif /* UNIV_DEBUG */
3869 
3870 /** Determine if extent belongs to a given segment.
3871 @param[in]	descr	extent descriptor
3872 @param[in]	seg_id	segment identifier
3873 @param[in]	mtr	mini-transaction
3874 @return	true if extent is part of the segment, false otherwise */
xdes_in_segment(const xdes_t * descr,ib_id_t seg_id,mtr_t * mtr)3875 static bool xdes_in_segment(const xdes_t *descr, ib_id_t seg_id, mtr_t *mtr) {
3876   const xdes_state_t state = xdes_get_state(descr, mtr);
3877   return ((state == XDES_FSEG || state == XDES_FSEG_FRAG) &&
3878           xdes_get_segment_id(descr, mtr) == seg_id);
3879 }
3880 
3881 #ifdef UNIV_DEBUG
fsp_header_mem_t(const fsp_header_t * header,mtr_t * mtr)3882 fsp_header_mem_t::fsp_header_mem_t(const fsp_header_t *header, mtr_t *mtr)
3883     : m_space_id(mach_read_from_4(header + FSP_SPACE_ID)),
3884       m_notused(0),
3885       m_fsp_size(mach_read_from_4(header + FSP_SIZE)),
3886       m_free_limit(mach_read_from_4(header + FSP_FREE_LIMIT)),
3887       m_flags(mach_read_from_4(header + FSP_SPACE_FLAGS)),
3888       m_fsp_frag_n_used(mach_read_from_4(header + FSP_FRAG_N_USED)),
3889       m_fsp_free(header + FSP_FREE, mtr),
3890       m_free_frag(header + FSP_FREE_FRAG, mtr),
3891       m_full_frag(header + FSP_FULL_FRAG, mtr),
3892       m_segid(mach_read_from_8(header + FSP_SEG_ID)),
3893       m_inodes_full(header + FSP_SEG_INODES_FULL, mtr),
3894       m_inodes_free(header + FSP_SEG_INODES_FREE, mtr) {}
3895 
print(std::ostream & out) const3896 std::ostream &fsp_header_mem_t::print(std::ostream &out) const {
3897   out << "[fsp_header_t: "
3898       << "m_space_id=" << m_space_id << ", m_fsp_size=" << m_fsp_size
3899       << ", m_free_limit=" << m_free_limit << ", m_flags=" << m_flags
3900       << ", m_fsp_frag_n_used=" << m_fsp_frag_n_used
3901       << ", m_fsp_free=" << m_fsp_free << ", m_free_frag=" << m_free_frag
3902       << ", m_full_frag=" << m_full_frag << ", m_segid=" << m_segid
3903       << ", m_inodes_full=" << m_inodes_full
3904       << ", m_inodes_free=" << m_inodes_free << "]";
3905   return (out);
3906 }
3907 
3908 /** Print the extent descriptor page in user-friendly format.
3909 @param[in]	out	the output file stream
3910 @param[in]	xdes	the extent descriptor page
3911 @param[in]	page_no	the page number of xdes page
3912 @param[in]	mtr	the mini transaction.
3913 @return None. */
xdes_page_print(std::ostream & out,const page_t * xdes,page_no_t page_no,mtr_t * mtr)3914 std::ostream &xdes_page_print(std::ostream &out, const page_t *xdes,
3915                               page_no_t page_no, mtr_t *mtr) {
3916   out << "[Extent Descriptor Page: page_no=" << page_no << "\n";
3917 
3918   if (page_no == 0) {
3919     const fsp_header_t *tmp = xdes + FSP_HEADER_OFFSET;
3920     fsp_header_mem_t header(tmp, mtr);
3921     out << header << "\n";
3922   }
3923 
3924   ulint N = UNIV_PAGE_SIZE / FSP_EXTENT_SIZE;
3925 
3926   for (ulint i = 0; i < N; ++i) {
3927     const byte *desc = xdes + XDES_ARR_OFFSET + (i * XDES_SIZE);
3928     xdes_mem_t x(desc);
3929 
3930     if (x.is_valid()) {
3931       out << x << "\n";
3932     }
3933   }
3934   out << "]\n";
3935   return (out);
3936 }
3937 
print(std::ostream & out) const3938 std::ostream &xdes_mem_t::print(std::ostream &out) const {
3939   ut_ad(m_xdes != nullptr);
3940 
3941   const page_no_t page_no = xdes_get_offset(m_xdes);
3942   const ib_id_t seg_id = xdes_get_segment_id(m_xdes);
3943 
3944   out << "[xdes_t: segid=" << seg_id << ",page=" << page_no
3945       << ",state=" << state_name() << ",bitmap=[";
3946   for (page_no_t i = 0; i < FSP_EXTENT_SIZE; ++i) {
3947     const bool is_free = xdes_get_bit(m_xdes, XDES_FREE_BIT, i);
3948     out << (is_free ? "." : "+");
3949   }
3950   out << "]]";
3951   return (out);
3952 }
3953 
3954 /** Check if the tablespace size information is valid.
3955 @param[in]	space_id	the tablespace identifier
3956 @return true if valid, false if invalid. */
fsp_check_tablespace_size(space_id_t space_id)3957 bool fsp_check_tablespace_size(space_id_t space_id) {
3958   mtr_t mtr;
3959 
3960   mtr_start(&mtr);
3961 
3962   fil_space_t *space = fil_space_get(space_id);
3963 
3964   mtr_x_lock_space(space, &mtr);
3965 
3966   const page_size_t page_size(space->flags);
3967 
3968   fsp_header_t *space_header = fsp_get_space_header(space_id, page_size, &mtr);
3969 
3970   xdes_t *descr =
3971       xdes_get_descriptor_with_space_hdr(space_header, space->id, 0, &mtr);
3972 
3973   ulint n_used = xdes_get_n_used(descr, &mtr);
3974   ulint size = mach_read_from_4(space_header + FSP_SIZE);
3975   ut_a(n_used <= size);
3976 
3977   mtr_commit(&mtr);
3978 
3979   return (true);
3980 }
3981 #endif /* UNIV_DEBUG */
3982 
3983 /** Determine if the tablespace has SDI.
3984 @param[in]	space_id	Tablespace id
3985 @return DB_SUCCESS if SDI is present else DB_ERROR
3986 or DB_TABLESPACE_NOT_FOUND */
fsp_has_sdi(space_id_t space_id)3987 dberr_t fsp_has_sdi(space_id_t space_id) {
3988   fil_space_t *space = fil_space_acquire_silent(space_id);
3989   if (space == nullptr) {
3990     DBUG_EXECUTE_IF(
3991         "ib_sdi", ib::warn(ER_IB_MSG_427)
3992                       << "Tablespace doesn't exist for space_id: " << space_id;
3993         ib::warn(ER_IB_MSG_428) << "Is the tablespace dropped or discarded";);
3994     return (DB_TABLESPACE_NOT_FOUND);
3995   }
3996 
3997 #if defined(UNIV_DEBUG) && !defined(XTRABACKUP)
3998   mtr_t mtr;
3999   mtr.start();
4000   ut_ad(fsp_sdi_get_root_page_num(space_id, page_size_t(space->flags), &mtr) !=
4001         0);
4002   mtr.commit();
4003 #endif /* UNIV_DEBUG && !XTRABACKUP */
4004 
4005   fil_space_release(space);
4006   DBUG_EXECUTE_IF(
4007       "ib_sdi", if (!FSP_FLAGS_HAS_SDI(space->flags)) {
4008         ib::warn(ER_IB_MSG_429)
4009             << "SDI doesn't exist in tablespace: " << space->name;
4010       });
4011   return (FSP_FLAGS_HAS_SDI(space->flags) ? DB_SUCCESS : DB_ERROR);
4012 }
4013 
4014 /** Mark all pages in tablespace dirty
4015 @param[in]	thd		current thread
4016 @param[in]	space_id	tablespace id
4017 @param[in]	space_flags	tablespace flags
4018 @param[in]	total_pages	total pages in tablespace
4019 @param[in]	from_page	page number from where to start the operation */
mark_all_page_dirty_in_tablespace(THD * thd,space_id_t space_id,uint32_t space_flags,page_no_t total_pages,page_no_t from_page)4020 static void mark_all_page_dirty_in_tablespace(THD *thd, space_id_t space_id,
4021                                               uint32_t space_flags,
4022                                               page_no_t total_pages,
4023                                               page_no_t from_page) {
4024 #ifdef HAVE_PSI_STAGE_INTERFACE
4025   ut_stage_alter_ts progress_monitor;
4026 #endif
4027   page_size_t pageSize(space_flags);
4028   page_no_t current_page = from_page;
4029   mtr_t mtr;
4030 
4031   /* Page 0 is never encrypted */
4032   ut_ad(current_page != 0);
4033 
4034 #ifdef HAVE_PSI_STAGE_INTERFACE
4035   progress_monitor.init(srv_stage_alter_tablespace_encryption.m_key);
4036   progress_monitor.set_estimate(total_pages - current_page);
4037 #endif
4038 
4039   while (current_page < total_pages) {
4040     /* Mark group of PAGE_GROUP_SIZE pages dirty */
4041     mtr_start(&mtr);
4042     page_no_t inner_count = 0;
4043     for (; inner_count < PAGE_GROUP_SIZE && current_page < total_pages;
4044          inner_count++, current_page++) {
4045       /* As we are trying to read each and every page of
4046       tablespace, there might be few pages which are freed.
4047       Take them into consideration. */
4048       buf_block_t *block = buf_page_get_gen(
4049           page_id_t(space_id, current_page), pageSize, RW_X_LATCH, nullptr,
4050           Page_fetch::POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
4051 
4052       if (block == nullptr) {
4053         continue;
4054       }
4055 
4056       page_t *page = buf_block_get_frame(block);
4057       page_zip_des_t *page_zip = buf_block_get_page_zip(block);
4058 
4059       /* If page is not initialized */
4060       if (page_get_space_id(page) == 0 || page_get_page_no(page) == 0) {
4061         continue;
4062       }
4063 
4064       if (page_zip != nullptr &&
4065           fil_page_type_is_index(fil_page_get_type(page))) {
4066         mach_write_to_4(page + FIL_PAGE_SPACE_ID, space_id);
4067         page_zip_write_header(page_zip, page + FIL_PAGE_SPACE_ID, 4, &mtr);
4068       } else {
4069         mlog_write_ulint(page + FIL_PAGE_SPACE_ID, space_id, MLOG_4BYTES, &mtr);
4070       }
4071 
4072       DBUG_INJECT_CRASH_WITH_LOG_FLUSH("alter_encrypt_tablespace_inner_page",
4073                                        current_page - 1);
4074     }
4075     mtr_commit(&mtr);
4076 
4077     mtr_start(&mtr);
4078     /* Write (Un)Encryption progress on page 0 */
4079     fsp_header_write_encryption_progress(space_id, space_flags,
4080                                          current_page - 1, 0, false, &mtr);
4081     mtr_commit(&mtr);
4082 
4083 #ifdef HAVE_PSI_STAGE_INTERFACE
4084     /* Update progress stats */
4085     progress_monitor.update_work(inner_count);
4086 #endif
4087 
4088     DBUG_EXECUTE_IF("alter_encrypt_tablespace_insert_delay", sleep(1););
4089 
4090     DBUG_INJECT_CRASH_WITH_LOG_FLUSH("alter_encrypt_tablespace_page",
4091                                      current_page - 1);
4092 
4093 #ifdef UNIV_DEBUG
4094     if ((current_page - 1) == 5) {
4095       DEBUG_SYNC(thd, "alter_encrypt_tablespace_wait_after_page5");
4096     }
4097 #endif /* UNIV_DEBUG */
4098 
4099     DBUG_EXECUTE_IF("flush_each_dirtied_page",
4100                     buf_LRU_flush_or_remove_pages(
4101                         space_id, BUF_REMOVE_FLUSH_WRITE, nullptr, false););
4102   }
4103 
4104 #ifdef HAVE_PSI_STAGE_INTERFACE
4105   /* Confirm that all pages are covered. */
4106   ut_ad(progress_monitor.is_completed());
4107 #endif
4108 }
4109 
4110 /** Encrypt/Unencrypt a tablespace.
4111 @param[in]	thd		current thread
4112 @param[in]	space_id	Tablespace id
4113 @param[in]	from_page	page id from where operation to be done
4114 @param[in]	to_encrypt	true if to encrypt, false if to unencrypt
4115 @param[in]	in_recovery	true if its called after recovery
4116 @param[in,out]	dd_space_in	dd tablespace object
4117 @return 0 for success, otherwise error code */
fsp_alter_encrypt_tablespace(THD * thd,space_id_t space_id,page_no_t from_page,bool to_encrypt,bool in_recovery,void * dd_space_in)4118 dberr_t fsp_alter_encrypt_tablespace(THD *thd, space_id_t space_id,
4119                                      page_no_t from_page, bool to_encrypt,
4120                                      bool in_recovery, void *dd_space_in) {
4121   dberr_t err = DB_SUCCESS;
4122   fil_space_t *space = fil_space_get(space_id);
4123   uint32_t space_flags = 0;
4124   page_no_t total_pages = 0;
4125   dd::Tablespace *dd_space = reinterpret_cast<dd::Tablespace *>(dd_space_in);
4126   byte operation_type = 0;
4127   byte encryption_info[Encryption::INFO_SIZE];
4128   memset(encryption_info, 0, Encryption::INFO_SIZE);
4129   mtr_t mtr;
4130 
4131   DBUG_TRACE;
4132 
4133   /* Page 0 is never encrypted */
4134   ut_ad(from_page != 0);
4135 
4136   operation_type |= (to_encrypt) ? Encryption::ENCRYPT_IN_PROGRESS
4137                                  : Encryption::DECRYPT_IN_PROGRESS;
4138 
4139   if (!in_recovery) { /* NOT IN RECOVERY */
4140     ut_ad(space->encryption_op_in_progress == NONE);
4141     if (to_encrypt) {
4142       /* Assert that tablespace is not encrypted */
4143       ut_ad(!FSP_FLAGS_GET_ENCRYPTION(space->flags));
4144 
4145       /* Fill key, iv and prepare encryption_info to be written in page 0 */
4146       byte key[Encryption::KEY_LEN];
4147       byte iv[Encryption::KEY_LEN];
4148 
4149       Encryption::random_value(key);
4150       Encryption::random_value(iv);
4151 
4152       /* Prepare encrypted encryption information to be written on page 0. */
4153       if (!Encryption::fill_encryption_info(key, iv, encryption_info, false,
4154                                             true)) {
4155         ut_ad(0);
4156       }
4157 
4158       /* Write Encryption information and space flags now on page 0
4159       NOTE : Not modifying space->flags as of now, because we want to persist
4160       the changes on disk and then modify in memory flags. */
4161       mtr_start(&mtr);
4162       if (!fsp_header_write_encryption(space_id,
4163                                        space->flags | FSP_FLAGS_MASK_ENCRYPTION,
4164                                        encryption_info, true, false, &mtr)) {
4165         ut_ad(0);
4166       }
4167 
4168       /* Write on page 0
4169               - Operation type (Encryption/Unencryption)
4170               - Write (Un)Encryption progress (0 now) */
4171       fsp_header_write_encryption_progress(space_id, space->flags, 0,
4172                                            operation_type, true, &mtr);
4173       mtr_commit(&mtr);
4174 
4175       /* Make sure REDO logs are flushed till this point */
4176       log_buffer_flush_to_disk();
4177 
4178       /* As DMLs are allowed in parallel, pass false for 'strict' */
4179       buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4180                                     false);
4181 
4182       /* Set encryption for tablespace */
4183       rw_lock_x_lock(&space->latch);
4184       err = fil_set_encryption(space_id, Encryption::AES, key, iv);
4185       rw_lock_x_unlock(&space->latch);
4186       ut_ad(err == DB_SUCCESS);
4187 
4188       /* Set encryption operation in progress flag */
4189       space->encryption_op_in_progress = ENCRYPTION;
4190 
4191       /* Update Encryption flag for tablespace */
4192       fsp_flags_set_encryption(space->flags);
4193     } else {
4194       /* Assert that tablespace is encrypted */
4195       ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4196 
4197       mtr_start(&mtr);
4198       /* Write on page 0
4199               - Operation type (Encryption/Unencryption)
4200               - Write (Un)Encryption progress (0 now) */
4201       fsp_header_write_encryption_progress(space_id, space->flags, 0,
4202                                            operation_type, true, &mtr);
4203       mtr_commit(&mtr);
4204 
4205       /* Make sure REDO logs are flushed till this point */
4206       log_buffer_flush_to_disk();
4207 
4208       /* As DMLs are allowed in parallel, pass false for 'strict' */
4209       buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4210                                     false);
4211 
4212       /* Set encryption operation in progress flag */
4213       space->encryption_op_in_progress = DECRYPTION;
4214 
4215       /* Update Encryption flag for tablespace */
4216       fsp_flags_unset_encryption(space->flags);
4217 
4218       /* Don't erase Encryption info from page 0 yet */
4219     }
4220 
4221     /* Till this point,
4222             - ddl_log entry has been made.
4223             For encryption :
4224              - In-mem Encryption information set for tablesapace.
4225              - In-mem Tablespace flags have been updated.
4226              - Encryption Info, Tablespace updated flags have been
4227                written to page 0.
4228              - Page 0 have been updated to indicate operation type.
4229             For Unencryption :
4230              - In-mem Tablespace flags have been updated.
4231              - Page 0 have been updated to indicate operation type.
4232 
4233     Now, read tablespace pages one by one and mark them dirty. */
4234   } else { /* IN RECOVERY */
4235 
4236     /* A corner case when crash happened after last page was processed but
4237     page 0 wasn't updated with this information. */
4238     if (from_page == space->size) {
4239       goto all_done;
4240     }
4241 
4242     /* If in recovery, update Tablespace Encryption flag again now
4243     as DD flags wouldn't have been updated before crash. */
4244     if (to_encrypt) {
4245       /* Tablespace Encryption flag were written on page 0
4246       before crash. */
4247       ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4248 
4249       /* It should have already been set */
4250       ut_ad(space->encryption_op_in_progress == ENCRYPTION);
4251     } else {
4252       /* Tablespace Encryption flag were not written on page 0
4253       before crash. */
4254       ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4255 
4256       /* It should have already been set */
4257       ut_ad(space->encryption_op_in_progress == DECRYPTION);
4258 
4259       /* Update Encryption flag for tablespace */
4260       fsp_flags_unset_encryption(space->flags);
4261 
4262       /* Don't erase Encryption information from page 0 yet */
4263     }
4264   }
4265 
4266   space_flags = space->flags;
4267   total_pages = space->size;
4268 
4269   /* Mark all pages in tablespace dirty */
4270   mark_all_page_dirty_in_tablespace(thd, space_id, space_flags, total_pages,
4271                                     from_page);
4272 
4273   /* As DMLs are allowed in parallel, pass false for 'strict' */
4274   buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4275                                 false);
4276 
4277   /* Till this point, all pages in tablespace have been marked dirty and
4278   flushed to disk . */
4279 
4280 all_done:
4281   /* For unencryption, if server crashed, before tablespace flags were flushed
4282   on disk. Set them now. */
4283   if (in_recovery && !to_encrypt) {
4284     fsp_flags_unset_encryption(space->flags);
4285   }
4286 
4287   /* If it was an Unencryption operation */
4288   if (!to_encrypt) {
4289     /* Crash before updating tablespace flags on page 0 */
4290     DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_updating_flags",
4291                     log_buffer_flush_to_disk();
4292                     DBUG_SUICIDE(););
4293 
4294     ut_ad(!FSP_FLAGS_GET_ENCRYPTION(space->flags));
4295 #ifdef UNIV_DEBUG
4296     byte buf[Encryption::INFO_SIZE];
4297     memset(buf, 0, Encryption::INFO_SIZE);
4298     ut_ad(memcmp(encryption_info, buf, Encryption::INFO_SIZE) == 0);
4299 #endif
4300     /* Now on page 0
4301             - erase Encryption information
4302             - write updated Tablespace flag */
4303     mtr_start(&mtr);
4304     if (!fsp_header_write_encryption(space_id, space->flags, encryption_info,
4305                                      true, false, &mtr)) {
4306       ut_ad(0);
4307     }
4308     mtr_commit(&mtr);
4309 
4310     rw_lock_x_lock(&space->latch);
4311     /* Reset In-mem encryption for tablespace */
4312     err = fil_reset_encryption(space_id);
4313     rw_lock_x_unlock(&space->latch);
4314     ut_ad(err == DB_SUCCESS);
4315   }
4316 
4317   /* Reset encryption in progress flag */
4318   space->encryption_op_in_progress = NONE;
4319 
4320   if (!in_recovery) {
4321     ut_ad(dd_space != nullptr);
4322     /* Update DD flags for tablespace */
4323     dd_space->se_private_data().set(dd_space_key_strings[DD_SPACE_FLAGS],
4324                                     static_cast<uint32>(space->flags));
4325   }
4326 
4327   /* Crash before resetting progress on page 0 */
4328   DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_resetting_progress",
4329                   log_buffer_flush_to_disk();
4330                   DBUG_SUICIDE(););
4331 
4332   /* Erase Operation type and encryption progress from page 0 */
4333   mtr_start(&mtr);
4334   fsp_header_write_encryption_progress(space_id, space->flags, 0, 0, true,
4335                                        &mtr);
4336   mtr_commit(&mtr);
4337 
4338   /* Crash before flushing page 0 on disk */
4339   DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_flushing_page_0",
4340                   log_buffer_flush_to_disk();
4341                   DBUG_SUICIDE(););
4342 
4343   /* As DMLs are allowed in parallel, pass false for 'strict' */
4344   buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4345                                 false);
4346 
4347   /* Crash after flushing page 0 on disk */
4348   DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_after_flushing_page_0",
4349                   log_buffer_flush_to_disk();
4350                   DBUG_SUICIDE(););
4351 
4352   return err;
4353 }
4354 
4355 #ifdef UNIV_DEBUG
4356 /** Validate tablespace encryption settings. */
validate_tablespace_encryption(fil_space_t * space)4357 static void validate_tablespace_encryption(fil_space_t *space) {
4358   byte buf[Encryption::KEY_LEN];
4359   memset(buf, 0, Encryption::KEY_LEN);
4360 
4361   if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4362     ut_ad(memcmp(space->encryption_key, buf, Encryption::KEY_LEN) != 0);
4363     ut_ad(memcmp(space->encryption_iv, buf, Encryption::KEY_LEN) != 0);
4364     ut_ad(space->encryption_klen != 0);
4365     ut_ad(space->encryption_type == Encryption::AES);
4366   } else {
4367     ut_ad(memcmp(space->encryption_key, buf, Encryption::KEY_LEN) == 0);
4368     ut_ad(memcmp(space->encryption_iv, buf, Encryption::KEY_LEN) == 0);
4369     ut_ad(space->encryption_klen == 0);
4370     ut_ad(space->encryption_type == Encryption::NONE);
4371   }
4372   ut_ad(space->encryption_op_in_progress == NONE);
4373 }
4374 #endif
4375 
4376 /** Resume Encrypt/Unencrypt for tablespace(s) post recovery.
4377 If an error occurs while processing any tablespace needing encryption,
4378 post an error for that space and keep going.
4379 @param[in]	thd	background thread */
resume_alter_encrypt_tablespace(THD * thd)4380 static void resume_alter_encrypt_tablespace(THD *thd) {
4381   dberr_t err = DB_SUCCESS;
4382   mtr_t mtr;
4383   char operation_name[3][20] = {"NONE", "ENCRYPTION", "DECRYPTION"};
4384   /* List of MDLs taken. One for each tablespace. */
4385   std::list<MDL_ticket *> shared_mdl_list;
4386 
4387   Disable_autocommit_guard autocommit_guard(thd);
4388   dd::cache::Dictionary_client *client = dd::get_dd_client(thd);
4389   dd::cache::Dictionary_client::Auto_releaser releaser(client);
4390   dd::Tablespace *recv_dd_space = nullptr;
4391 
4392   /* Take a SHARED MDL to make sure no one could run any DDL on it and DMLs
4393   are allowed. */
4394   for (auto it : ts_encrypt_ddl_records) {
4395     /* Get the space_id and then read page0 to get
4396     (un)encryption progress */
4397     space_id_t space_id = it->get_space_id();
4398     fil_space_t *space = fil_space_get(space_id);
4399     if (space == nullptr) {
4400       continue;
4401     }
4402 
4403     MDL_ticket *mdl_ticket;
4404     if (dd::acquire_shared_tablespace_mdl(thd, space->name, false, &mdl_ticket,
4405                                           false)) {
4406       ut_a(false);
4407     }
4408     shared_mdl_list.push_back(mdl_ticket);
4409   }
4410 
4411   /* Let the startup thread proceed now */
4412   mysql_cond_signal(&resume_encryption_cond);
4413 
4414   /* In following loop :
4415       - traverse every tablespace one by one and roll forward (un)encryption
4416         operation.
4417       - remove EXPLICIT MDL taken on tablespace explicitly */
4418   std::list<MDL_ticket *>::iterator mdl_it = shared_mdl_list.begin();
4419   for (auto it : ts_encrypt_ddl_records) {
4420     /* Get the space_id and then read page 0 to get (un)encryption progress */
4421     space_id_t space_id = it->get_space_id();
4422     fil_space_t *space = fil_space_get(space_id);
4423     if (space == nullptr) {
4424       ib::error(ER_IB_MSG_1277)
4425           << "Tablespace is missing for tablespace id" << space_id
4426           << ". Skipping (un)encryption resume operation.";
4427       continue;
4428     }
4429 
4430     /* MDL list must not be empty */
4431     ut_ad(mdl_it != shared_mdl_list.end());
4432 
4433     page_size_t pageSize(space->flags);
4434 
4435     mtr_start(&mtr);
4436     buf_block_t *block =
4437         buf_page_get(page_id_t(space_id, 0), pageSize, RW_X_LATCH, &mtr);
4438 
4439     page_t *page = buf_block_get_frame(block);
4440 
4441     /* Get the offset of Encryption progress information */
4442     ulint offset = fsp_header_get_encryption_progress_offset(pageSize);
4443 
4444     /* Read operation type (1 byte) */
4445     byte operation = mach_read_from_1(page + offset);
4446 
4447     /* Read maximum pages (4 byte) */
4448     uint progress =
4449         mach_read_from_4(page + offset + Encryption::OPERATION_INFO_SIZE);
4450     mtr_commit(&mtr);
4451 
4452     if (!(operation & Encryption::ENCRYPT_IN_PROGRESS) &&
4453         !(operation & Encryption::DECRYPT_IN_PROGRESS)) {
4454       /* There are two possibilities:
4455       1. Crash happened even before operation/progress
4456          was written to page 0. Nothing to do.
4457       2. Crash happened after (un)encryption was done and progress/operation
4458          was reset but before DD is updated.
4459       Update DD in that case. */
4460       ib::info(ER_IB_MSG_NO_ENCRYPT_PROGRESS_FOUND)
4461           << "No operation/progress found. Updating DD for tablespace "
4462           << space->name << ":" << space_id << ".";
4463       goto update_dd;
4464     }
4465 
4466     ib::info(ER_IB_MSG_RESUME_OP_FOR_SPACE)
4467         << "Resuming " << operation_name[operation] << " for tablespace "
4468         << space->name << ":" << space_id << " from page " << progress + 1;
4469 
4470     /* Resume (Un)Encryption operation next page onwards */
4471     err = fsp_alter_encrypt_tablespace(
4472         thd, space_id, progress + 1,
4473         (operation & Encryption::ENCRYPT_IN_PROGRESS) ? true : false, true,
4474         recv_dd_space);
4475 
4476     if (err != DB_SUCCESS) {
4477       ib::error(ER_IB_MSG_1280)
4478           << operation_name[operation] << " for tablespace " << space->name
4479           << ":" << space_id << " could not be done successfully.";
4480       continue;
4481     }
4482 
4483   update_dd:
4484     /* At this point, encryption/unencryption process would have been
4485     finished and all pages in tablespace should have been written
4486     correctly and flushed to disk. Now :
4487     - Set/Update tablespace flags encryption.
4488     - Remove In-mem encryption info from tablespace (If Unencrypted).
4489     - Reset operation in progress to NONE. */
4490     mtr_start(&mtr);
4491     block = buf_page_get(page_id_t(space_id, 0), pageSize, RW_X_LATCH, &mtr);
4492     page = buf_block_get_frame(block);
4493     uint32_t latest_fsp_flags = fsp_header_get_flags(page);
4494     if (FSP_FLAGS_GET_ENCRYPTION(latest_fsp_flags)) {
4495       fsp_flags_set_encryption(space->flags);
4496     } else {
4497       fsp_flags_unset_encryption(space->flags);
4498     }
4499     ut_ad(space->flags == latest_fsp_flags);
4500     mtr_commit(&mtr);
4501 
4502     if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4503       /* Reset In-mem encryption for tablespace */
4504       err = fil_reset_encryption(space_id);
4505       ut_ad(err == DB_SUCCESS);
4506     }
4507 
4508     space->encryption_op_in_progress = NONE;
4509 
4510     /* In case of crash/recovery, following has to be set explicitly
4511         - DD tablespace flags.
4512         - DD encryption option value. */
4513     while (acquire_shared_backup_lock(thd, thd->variables.lock_wait_timeout)) {
4514       os_thread_sleep(20);
4515     }
4516 
4517     while (dd::acquire_exclusive_tablespace_mdl(thd, space->name, false)) {
4518       os_thread_sleep(20);
4519     }
4520 
4521     while (client->acquire_for_modification<dd::Tablespace>(space->name,
4522                                                             &recv_dd_space)) {
4523       os_thread_sleep(20);
4524     }
4525 
4526     if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4527       /* Update DD Option value, for Unencryption */
4528       recv_dd_space->options().set("encryption", "N");
4529 
4530     } else {
4531       /* Update DD Option value, for Encryption */
4532       recv_dd_space->options().set("encryption", "Y");
4533     }
4534 
4535     /* Update DD flags for tablespace */
4536     recv_dd_space->se_private_data().set(dd_space_key_strings[DD_SPACE_FLAGS],
4537                                          static_cast<uint32>(space->flags));
4538 
4539     /* Validate tablespace In-mem representation */
4540     ut_d(validate_tablespace_encryption(space));
4541 
4542     /* Pass 'true' for 'release_mdl_on_commit' parameter because we want
4543     transactional locks to be released only in case of successful commit */
4544     while (dd::commit_or_rollback_tablespace_change(thd, recv_dd_space, false,
4545                                                     true)) {
4546       os_thread_sleep(20);
4547     }
4548 
4549     ib::info(ER_IB_MSG_1281)
4550         << "Finished " << operation_name[operation] << " for tablespace "
4551         << space->name << ":" << space_id << ".";
4552 
4553     /* Release MDL on tablespace explicitly */
4554     dd_release_mdl((*mdl_it));
4555     mdl_it = shared_mdl_list.erase(mdl_it);
4556   }
4557 
4558   DBUG_EXECUTE_IF("DDL_Log_remove_inject_startup_error_1",
4559                   srv_inject_too_many_concurrent_trxs = true;);
4560 
4561   /* Delete DDL logs now */
4562   err = log_ddl->post_ts_encryption(ts_encrypt_ddl_records);
4563 
4564   /* Abort post recovery startup if this is not successful since
4565   it would leave the DDL Log in an indeterminate state. */
4566   if (err != DB_SUCCESS) {
4567     ib::fatal(ER_IB_MSG_POST_RECOVER_POST_TS_ENCRYPT);
4568   }
4569 
4570   ts_encrypt_ddl_records.clear();
4571   /* All MDLs should have been released and removed from list by now */
4572   ut_ad(shared_mdl_list.empty());
4573   shared_mdl_list.clear();
4574 }
4575 
4576 /* Initiate roll-forward of alter encrypt in background thread */
fsp_init_resume_alter_encrypt_tablespace()4577 void fsp_init_resume_alter_encrypt_tablespace() {
4578 #ifdef UNIV_PFS_THREAD
4579   THD *thd =
4580       create_thd(false, true, true, srv_ts_alter_encrypt_thread_key.m_value);
4581 #else
4582   THD *thd = create_thd(false, true, true, 0);
4583 #endif
4584 
4585   resume_alter_encrypt_tablespace(thd);
4586 
4587   destroy_thd(thd);
4588 }
4589 
write_not_full_n_used(uint32_t n_used)4590 void File_segment_inode::write_not_full_n_used(uint32_t n_used) {
4591 #ifdef UNIV_DEBUG
4592   ut_ad(m_mtr != nullptr);
4593   ut_ad(mtr_memo_contains_page_flagged(
4594       m_mtr, m_fseg_inode, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
4595 
4596   if (n_used > 0) {
4597     uint32_t old_value = read_not_full_n_used();
4598     if (n_used > old_value) {
4599       uint32_t incr = n_used - old_value;
4600       ut_ad(incr == 1 || incr == XDES_FRAG_N_USED ||
4601             incr == (FSP_EXTENT_SIZE - 1));
4602     } else {
4603       uint32_t decr = old_value - n_used;
4604       ut_ad(decr == FSP_EXTENT_SIZE || decr == 1 ||
4605             decr == (XDES_FRAG_N_USED + 1) ||
4606             (n_used == calculate_not_full_n_used()));
4607     }
4608   }
4609 #endif /* UNIV_DEBUG */
4610 
4611   mlog_write_ulint(m_fseg_inode + FSEG_NOT_FULL_N_USED, n_used, MLOG_4BYTES,
4612                    m_mtr);
4613 
4614   ut_ad(n_used == 0 || verify_not_full_n_used());
4615 }
4616 
4617 #ifdef UNIV_DEBUG
verify_not_full_n_used()4618 bool File_segment_inode::verify_not_full_n_used() {
4619   if (!do_verify()) {
4620     return (true);
4621   }
4622   uint32_t not_full_n_used_1 = read_not_full_n_used();
4623   uint32_t not_full_n_used_2 = calculate_not_full_n_used();
4624   ut_ad(not_full_n_used_1 == not_full_n_used_2);
4625   return (not_full_n_used_1 == not_full_n_used_2);
4626 }
4627 
calculate_not_full_n_used()4628 page_no_t File_segment_inode::calculate_not_full_n_used() {
4629   page_no_t n_used = 0;
4630   xdes_t *descr;
4631   fil_addr_t xdes_addr = flst_get_first(m_fseg_inode + FSEG_NOT_FULL, m_mtr);
4632 
4633   while (!xdes_addr.is_null()) {
4634     descr = xdes_lst_get_descriptor(m_space_id, m_page_size, xdes_addr, m_mtr);
4635     n_used += xdes_get_n_used(descr, m_mtr);
4636     xdes_addr = flst_get_next_addr(descr + XDES_FLST_NODE, m_mtr);
4637   }
4638 
4639   return (n_used);
4640 }
4641 #endif /* UNIV_DEBUG */
4642 
read_not_full_n_used() const4643 uint32_t File_segment_inode::read_not_full_n_used() const {
4644   uint32_t n_used =
4645       mtr_read_ulint(m_fseg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, m_mtr);
4646   return (n_used);
4647 }
4648 
print(std::ostream & out) const4649 std::ostream &File_segment_inode::print(std::ostream &out) const {
4650   out << "[File_segment_inode: FSEG_ID=" << get_seg_id()
4651       << ", FSEG_NOT_FULL_N_USED=" << read_not_full_n_used() << "]";
4652 
4653   return (out);
4654 }
4655 
4656 #endif /* !UNIV_HOTBACKUP */
4657