1 /*****************************************************************************
2
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file fsp/fsp0fsp.cc
28 File space management
29
30 Created 11/29/1995 Heikki Tuuri
31 ***********************************************************************/
32
33 #include "fsp0fsp.h"
34 #include "buf0buf.h"
35 #include "fil0fil.h"
36 #include "ha_prototypes.h"
37 #include "mtr0log.h"
38
39 #include "my_dbug.h"
40
41 #include "page0page.h"
42 #include "page0zip.h"
43 #include "ut0byte.h"
44 #ifdef UNIV_HOTBACKUP
45 #include "fut0lst.h"
46 #endif /* UNIV_HOTBACKUP */
47 #include <my_aes.h>
48
49 #ifndef UNIV_HOTBACKUP
50 #include <debug_sync.h>
51 #include "btr0btr.h"
52 #include "btr0sea.h"
53 #include "dict0boot.h"
54 #include "dict0dd.h"
55 #include "fut0fut.h"
56 #include "ibuf0ibuf.h"
57 #include "log0log.h"
58 #include "srv0srv.h"
59 #endif /* !UNIV_HOTBACKUP */
60 #include "dict0mem.h"
61 #include "fsp0sysspace.h"
62 #include "srv0start.h"
63 #include "trx0purge.h"
64
65 #ifndef UNIV_HOTBACKUP
66
67 #include "dd/types/tablespace.h"
68 #include "dict0dd.h"
69 #include "sql_backup_lock.h"
70 #include "sql_thd_internal_api.h"
71 #include "thd_raii.h"
72 #include "transaction.h"
73 #include "ut0stage.h"
74
75 /** DDL records for tablespace (un)encryption. */
76 std::vector<DDL_Record *> ts_encrypt_ddl_records;
77
78 /** Group of pages to be marked dirty together during (un)encryption. */
79 #define PAGE_GROUP_SIZE 1
80
81 /** Returns an extent to the free list of a space.
82 @param[in] page_id page id in the extent
83 @param[in] page_size page size
84 @param[in,out] mtr mini-transaction */
85 static void fsp_free_extent(const page_id_t &page_id,
86 const page_size_t &page_size, mtr_t *mtr);
87
88 /** Determine if extent belongs to a given segment.
89 @param[in] descr extent descriptor
90 @param[in] seg_id segment identifier
91 @param[in] mtr mini-transaction
92 @return true if extent is part of the segment, false otherwise */
93 static bool xdes_in_segment(const xdes_t *descr, ib_id_t seg_id, mtr_t *mtr);
94
95 /** Marks a page used. The page must reside within the extents of the given
96 segment.
97 @param[in] space_id tablespace identifier
98 @param[in] page_size Size of each page in the tablespace.
99 @param[in] seg_inode the file segment inode pointer
100 @param[in] page the page number to be marked as used.
101 @param[in] descr extent descriptor containing information about page.
102 @param[in] mtr mini transaction context for modification. */
103 static void fseg_mark_page_used(space_id_t space_id,
104 const page_size_t &page_size,
105 fseg_inode_t *seg_inode, page_no_t page,
106 xdes_t *descr, mtr_t *mtr);
107
108 /** Returns the first extent descriptor for a segment.
109 We think of the extent lists of the segment catenated in the order
110 FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
111 @param[in] inode segment inode
112 @param[in] space_id space id
113 @param[in] page_size page size
114 @param[in,out] mtr mini-transaction
115 @return the first extent descriptor, or NULL if none */
116 static xdes_t *fseg_get_first_extent(fseg_inode_t *inode, space_id_t space_id,
117 const page_size_t &page_size, mtr_t *mtr);
118
119 /** Put new extents to the free list if there are free extents above the free
120 limit. If an extent happens to contain an extent descriptor page, the extent
121 is put to the FSP_FREE_FRAG list with the page marked as used.
122 @param[in] init_space true if this is a single-table tablespace
123 and we are only initializing the first extent and the first bitmap pages;
124 then we will not allocate more extents
125 @param[in,out] space tablespace
126 @param[in,out] header tablespace header
127 @param[in,out] mtr mini-transaction */
128 static UNIV_COLD void fsp_fill_free_list(bool init_space, fil_space_t *space,
129 fsp_header_t *header, mtr_t *mtr);
130
131 /** Allocates a single free page from a segment.
132 This function implements the intelligent allocation strategy which tries
133 to minimize file space fragmentation.
134 @param[in,out] space tablespace
135 @param[in] page_size page size
136 @param[in,out] seg_inode segment inode
137 @param[in] hint hint of which page would be desirable
138 @param[in] direction if the new page is needed because of
139 an index page split, and records are inserted there in order, into which
140 direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
141 @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
142 @param[in,out] mtr mini-transaction
143 @param[in,out] init_mtr mtr or another mini-transaction in
144 which the page should be initialized. If init_mtr != mtr, but the page is
145 already latched in mtr, do not initialize the page */
146 #ifdef UNIV_DEBUG
147 /**
148 @param[in] has_done_reservation TRUE if the space has already been
149 reserved, in this case we will never return NULL */
150 #endif /* UNIV_DEBUG */
151 /**
152 @retval NULL if no page could be allocated
153 @retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
154 (init_mtr == mtr, or the page was not previously freed in mtr)
155 @retval block (not allocated or initialized) otherwise */
156 static buf_block_t *fseg_alloc_free_page_low(
157 fil_space_t *space, const page_size_t &page_size, fseg_inode_t *seg_inode,
158 page_no_t hint, byte direction, rw_lock_type_t rw_latch, mtr_t *mtr,
159 mtr_t *init_mtr
160 #ifdef UNIV_DEBUG
161 ,
162 ibool has_done_reservation
163 #endif /* UNIV_DEBUG */
164 ) MY_ATTRIBUTE((warn_unused_result));
165 #endif /* !UNIV_HOTBACKUP */
166
167 /** Get the segment identifier to which the extent belongs to.
168 @param[in] descr extent descriptor
169 @return the segment identifier */
xdes_get_segment_id(const xdes_t * descr)170 inline ib_id_t xdes_get_segment_id(const xdes_t *descr) {
171 return (mach_read_from_8(descr + XDES_ID));
172 }
173
174 /** Get the segment identifier to which the extent belongs to.
175 @param[in] descr extent descriptor
176 @param[in] mtr mini-transaction
177 @return the segment identifier */
xdes_get_segment_id(const xdes_t * descr,mtr_t * mtr)178 inline ib_id_t xdes_get_segment_id(const xdes_t *descr, mtr_t *mtr) {
179 #ifndef UNIV_HOTBACKUP
180 ut_ad(mtr_memo_contains_page_flagged(
181 mtr, descr,
182 MTR_MEMO_PAGE_S_FIX | MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
183 #endif /* !UNIV_HOTBACKUP */
184
185 return (xdes_get_segment_id(descr));
186 }
187
188 #ifndef UNIV_HOTBACKUP
189 /** Gets a pointer to the space header and x-locks its page.
190 @param[in] id space id
191 @param[in] page_size page size
192 @param[in,out] mtr mini-transaction
193 @return pointer to the space header, page x-locked */
fsp_get_space_header(space_id_t id,const page_size_t & page_size,mtr_t * mtr)194 fsp_header_t *fsp_get_space_header(space_id_t id, const page_size_t &page_size,
195 mtr_t *mtr) {
196 buf_block_t *block;
197 fsp_header_t *header;
198
199 ut_ad(id != 0 || !page_size.is_compressed());
200
201 block = buf_page_get(page_id_t(id, 0), page_size, RW_SX_LATCH, mtr);
202 header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
203 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
204
205 ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
206 #ifdef UNIV_DEBUG
207 const uint32_t flags = mach_read_from_4(FSP_SPACE_FLAGS + header);
208 ut_ad(page_size_t(flags).equals_to(page_size));
209 #endif /* UNIV_DEBUG */
210 return (header);
211 }
212
213 /** Convert a 32 bit integer tablespace flags to the 32 bit table flags.
214 This can only be done for a tablespace that was built as a file-per-table
215 tablespace. Note that the fsp_flags cannot show the difference between a
216 Compact and Redundant table, so an extra Compact boolean must be supplied.
217 Low order bit
218 | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
219 fil_space_t::flags | 0 | 0 | 1 | 1
220 dict_table_t::flags | 0 | 1 | 1 | 1
221 @param[in] fsp_flags fil_space_t::flags
222 @param[in] compact true if not Redundant row format
223 @return tablespace flags (fil_space_t::flags) */
fsp_flags_to_dict_tf(uint32_t fsp_flags,bool compact)224 uint32_t fsp_flags_to_dict_tf(uint32_t fsp_flags, bool compact) {
225 /* If the table in this file-per-table tablespace is Compact
226 row format, the low order bit will not indicate Compact. */
227 bool post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
228 ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
229 bool atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
230 bool data_dir = FSP_FLAGS_HAS_DATA_DIR(fsp_flags);
231 bool shared_space = FSP_FLAGS_GET_SHARED(fsp_flags);
232 /* FSP_FLAGS_GET_TEMPORARY(fsp_flags) does not have an equivalent
233 flag position in the table flags. But it would go into flags2 if
234 any code is created where that is needed. */
235
236 uint32_t flags = dict_tf_init(post_antelope | compact, zip_ssize,
237 atomic_blobs, data_dir, shared_space);
238
239 return (flags);
240 }
241 #endif /* !UNIV_HOTBACKUP */
242
243 /** Check if tablespace is dd tablespace.
244 @param[in] space_id tablespace ID
245 @return true if tablespace is dd tablespace. */
fsp_is_dd_tablespace(space_id_t space_id)246 bool fsp_is_dd_tablespace(space_id_t space_id) {
247 return (space_id == dict_sys_t::s_space_id);
248 }
249
250 /** Check whether a space id is an undo tablespace ID
251 Undo tablespaces have space_id's starting 1 less than the redo logs.
252 They are numbered down from this. Since rseg_id=0 always refers to the
253 system tablespace, undo_space_num values start at 1. The current limit
254 is 127. The translation from an undo_space_num is:
255 undo space_id = log_first_space_id - undo_space_num
256 @param[in] space_id space id to check
257 @return true if it is undo tablespace else false. */
fsp_is_undo_tablespace(space_id_t space_id)258 bool fsp_is_undo_tablespace(space_id_t space_id) {
259 /* Starting with v8, undo space_ids have a unique range. */
260 if (space_id >= dict_sys_t::s_min_undo_space_id &&
261 space_id <= dict_sys_t::s_max_undo_space_id) {
262 return (true);
263 }
264
265 /* If upgrading from 5.7, there may be a list of old-style
266 undo tablespaces. Search them. */
267 if (trx_sys_undo_spaces != nullptr) {
268 return (trx_sys_undo_spaces->contains(space_id));
269 }
270
271 return (false);
272 }
273
274 /** Check if tablespace is global temporary.
275 @param[in] space_id tablespace ID
276 @return true if tablespace is global temporary. */
fsp_is_global_temporary(space_id_t space_id)277 bool fsp_is_global_temporary(space_id_t space_id) {
278 return (space_id == srv_tmp_space.space_id());
279 }
280
281 /** Check if the tablespace is session temporary.
282 @param[in] space_id tablespace ID
283 @return true if tablespace is a session temporary tablespace. */
fsp_is_session_temporary(space_id_t space_id)284 bool fsp_is_session_temporary(space_id_t space_id) {
285 return (space_id > dict_sys_t::s_min_temp_space_id &&
286 space_id <= dict_sys_t::s_max_temp_space_id);
287 }
288
289 /** Check if tablespace is system temporary.
290 @param[in] space_id tablespace ID
291 @return true if tablespace is system temporary. */
fsp_is_system_temporary(space_id_t space_id)292 bool fsp_is_system_temporary(space_id_t space_id) {
293 return (fsp_is_global_temporary(space_id) ||
294 fsp_is_session_temporary(space_id));
295 }
296
297 /** Check if checksum is disabled for the given space.
298 @param[in] space_id tablespace ID
299 @return true if checksum is disabled for given space. */
fsp_is_checksum_disabled(space_id_t space_id)300 bool fsp_is_checksum_disabled(space_id_t space_id) {
301 return (fsp_is_system_temporary(space_id));
302 }
303
304 #ifndef UNIV_HOTBACKUP
305 #ifdef UNIV_DEBUG
306
307 /** Skip some of the sanity checks that are time consuming even in debug mode
308 and can affect frequent verification runs that are done to ensure stability of
309 the product.
310 @return true if check should be skipped for given space. */
fsp_skip_sanity_check(space_id_t space_id)311 bool fsp_skip_sanity_check(space_id_t space_id) {
312 return (srv_skip_temp_table_checks_debug &&
313 fsp_is_system_temporary(space_id));
314 }
315
316 #endif /* UNIV_DEBUG */
317
318 /** Gets a descriptor bit of a page.
319 @return true if free */
320 UNIV_INLINE
xdes_mtr_get_bit(const xdes_t * descr,ulint bit,page_no_t offset,mtr_t * mtr)321 ibool xdes_mtr_get_bit(const xdes_t *descr, /*!< in: descriptor */
322 ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
323 page_no_t offset, /*!< in: page offset within extent:
324 0 ... FSP_EXTENT_SIZE - 1 */
325 mtr_t *mtr) /*!< in: mini-transaction */
326 {
327 ut_ad(mtr->is_active());
328 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
329
330 return (xdes_get_bit(descr, bit, offset));
331 }
332
333 /** Sets a descriptor bit of a page. */
334 UNIV_INLINE
xdes_set_bit(xdes_t * descr,ulint bit,page_no_t offset,ibool val,mtr_t * mtr)335 void xdes_set_bit(xdes_t *descr, /*!< in: descriptor */
336 ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
337 page_no_t offset, /*!< in: page offset within extent:
338 0 ... FSP_EXTENT_SIZE - 1 */
339 ibool val, /*!< in: bit value */
340 mtr_t *mtr) /*!< in/out: mini-transaction */
341 {
342 ulint index;
343 ulint byte_index;
344 ulint bit_index;
345 ulint descr_byte;
346
347 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
348 ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
349 ut_ad(offset < FSP_EXTENT_SIZE);
350
351 index = bit + XDES_BITS_PER_PAGE * offset;
352
353 byte_index = index / 8;
354 bit_index = index % 8;
355
356 descr_byte = mach_read_from_1(descr + XDES_BITMAP + byte_index);
357 descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
358
359 mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte, MLOG_1BYTE,
360 mtr);
361 }
362
363 /** Looks for a descriptor bit having the desired value. Starts from hint
364 and scans upward; at the end of the extent the search is wrapped to
365 the start of the extent.
366 @return bit index of the bit, ULINT_UNDEFINED if not found */
367 UNIV_INLINE
xdes_find_bit(xdes_t * descr,ulint bit,ibool val,page_no_t hint,mtr_t * mtr)368 page_no_t xdes_find_bit(xdes_t *descr, /*!< in: descriptor */
369 ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
370 ibool val, /*!< in: desired bit value */
371 page_no_t hint, /*!< in: hint of which bit position
372 would be desirable */
373 mtr_t *mtr) /*!< in/out: mini-transaction */
374 {
375 page_no_t i;
376
377 ut_ad(descr && mtr);
378 ut_ad(val <= TRUE);
379 ut_ad(hint < FSP_EXTENT_SIZE);
380 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
381 for (i = hint; i < FSP_EXTENT_SIZE; i++) {
382 if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
383 return (i);
384 }
385 }
386
387 for (i = 0; i < hint; i++) {
388 if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
389 return (i);
390 }
391 }
392
393 return (FIL_NULL);
394 }
395
396 /** Returns the number of used pages in a descriptor.
397 @return number of pages used */
398 UNIV_INLINE
xdes_get_n_used(const xdes_t * descr,mtr_t * mtr)399 page_no_t xdes_get_n_used(const xdes_t *descr, /*!< in: descriptor */
400 mtr_t *mtr) /*!< in/out: mini-transaction */
401 {
402 page_no_t count = 0;
403
404 ut_ad(descr && mtr);
405 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
406 for (page_no_t i = 0; i < FSP_EXTENT_SIZE; ++i) {
407 if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
408 count++;
409 }
410 }
411
412 return (count);
413 }
414
415 #ifdef UNIV_DEBUG
416 /** Check if the state of extent descriptor is valid.
417 @param[in] state the extent descriptor state
418 @return true if state is valid, false otherwise */
xdes_state_is_valid(ulint state)419 bool xdes_state_is_valid(ulint state) {
420 switch (state) {
421 case XDES_NOT_INITED:
422 case XDES_FREE:
423 case XDES_FREE_FRAG:
424 case XDES_FULL_FRAG:
425 case XDES_FSEG:
426 case XDES_FSEG_FRAG:
427 return (true);
428 }
429 return (false);
430 }
431 #endif /* UNIV_DEBUG */
432
433 /** Returns true if extent contains no used pages.
434 @return true if totally free */
435 UNIV_INLINE
xdes_is_free(const xdes_t * descr,mtr_t * mtr)436 ibool xdes_is_free(const xdes_t *descr, /*!< in: descriptor */
437 mtr_t *mtr) /*!< in/out: mini-transaction */
438 {
439 if (0 == xdes_get_n_used(descr, mtr)) {
440 ut_ad(xdes_get_state(descr, mtr) != XDES_FSEG_FRAG);
441
442 return (TRUE);
443 }
444
445 return (FALSE);
446 }
447
448 /** Returns true if extent contains no free pages.
449 @return true if full */
450 UNIV_INLINE
xdes_is_full(const xdes_t * descr,mtr_t * mtr)451 ibool xdes_is_full(const xdes_t *descr, /*!< in: descriptor */
452 mtr_t *mtr) /*!< in/out: mini-transaction */
453 {
454 if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
455 return (TRUE);
456 }
457
458 return (FALSE);
459 }
460
461 /** Sets the state of an xdes. */
462 UNIV_INLINE
xdes_set_state(xdes_t * descr,xdes_state_t state,mtr_t * mtr)463 void xdes_set_state(xdes_t *descr, /*!< in/out: descriptor */
464 xdes_state_t state, /*!< in: state to set */
465 mtr_t *mtr) /*!< in/out: mini-transaction */
466 {
467 ut_ad(descr && mtr);
468 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
469
470 #ifdef UNIV_DEBUG
471 switch (xdes_get_state(descr, mtr)) {
472 case XDES_FREE:
473 ut_ad(state == XDES_FSEG || state == XDES_FREE_FRAG);
474 break;
475 case XDES_FREE_FRAG:
476 ut_ad(state == XDES_FULL_FRAG || state == XDES_FSEG_FRAG ||
477 state == XDES_FREE);
478 break;
479 case XDES_FULL_FRAG:
480 ut_ad(state == XDES_FREE_FRAG);
481 break;
482 case XDES_FSEG:
483 ut_ad(state == XDES_FREE);
484 break;
485 case XDES_FSEG_FRAG:
486 ut_ad(state == XDES_FREE_FRAG || state == XDES_FULL_FRAG ||
487 state == XDES_FREE);
488 break;
489 case XDES_NOT_INITED:
490 /* The state is not yet initialized. */
491 ut_ad(state == XDES_FREE);
492 break;
493 }
494 #endif /* UNIV_DEBUG */
495
496 mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
497 }
498
499 /** Update the segment identifier to which the extent belongs to.
500 @param[in,out] descr extent descriptor
501 @param[in,out] seg_id segment identifier
502 @param[in] state state of the extent.
503 @param[in,out] mtr mini-transaction. */
xdes_set_segment_id(xdes_t * descr,const ib_id_t seg_id,xdes_state_t state,mtr_t * mtr)504 inline void xdes_set_segment_id(xdes_t *descr, const ib_id_t seg_id,
505 xdes_state_t state, mtr_t *mtr) {
506 ut_ad(mtr != nullptr);
507 mlog_write_ull(descr + XDES_ID, seg_id, mtr);
508 xdes_set_state(descr, state, mtr);
509 }
510
511 /** Inits an extent descriptor to the free and clean state. */
512 UNIV_INLINE
xdes_init(xdes_t * descr,mtr_t * mtr)513 void xdes_init(xdes_t *descr, /*!< in: descriptor */
514 mtr_t *mtr) /*!< in/out: mini-transaction */
515 {
516 ulint i;
517
518 ut_ad(descr && mtr);
519 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
520 ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
521
522 xdes_set_segment_id(descr, 0, XDES_FREE, mtr);
523 flst_write_addr(descr + XDES_FLST_NODE + FLST_PREV, fil_addr_null, mtr);
524 flst_write_addr(descr + XDES_FLST_NODE + FLST_NEXT, fil_addr_null, mtr);
525
526 for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
527 mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
528 }
529 }
530
531 /** Get pointer to a the extent descriptor of a page.
532 @param[in,out] sp_header tablespace header page, x-latched
533 @param[in] space tablespace identifier
534 @param[in] offset page offset
535 @param[in,out] mtr mini-transaction
536 @param[in] init_space whether the tablespace is being initialized
537 @param[out] desc_block descriptor block, or NULL if it is
538 the same as the tablespace header
539 @return pointer to the extent descriptor, NULL if the page does not
540 exist in the space or if the offset exceeds free limit */
UNIV_INLINE(warn_unused_result)541 UNIV_INLINE MY_ATTRIBUTE((warn_unused_result)) xdes_t
542 *xdes_get_descriptor_with_space_hdr(fsp_header_t *sp_header,
543 space_id_t space, page_no_t offset,
544 mtr_t *mtr, bool init_space = false,
545 buf_block_t **desc_block = nullptr) {
546 ulint limit;
547 ulint size;
548 page_no_t descr_page_no;
549 uint32_t flags;
550 page_t *descr_page;
551 #ifdef UNIV_DEBUG
552 const fil_space_t *fspace = fil_space_get(space);
553 ut_ad(fspace != nullptr);
554 #endif /* UNIV_DEBUG */
555 ut_ad(mtr_memo_contains(mtr, &fspace->latch, MTR_MEMO_X_LOCK));
556 ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_SX_FIX));
557 ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
558 /* Read free limit and space size */
559 limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
560 size = mach_read_from_4(sp_header + FSP_SIZE);
561 flags = mach_read_from_4(sp_header + FSP_SPACE_FLAGS);
562 ut_ad(limit == fspace->free_limit ||
563 (fspace->free_limit == 0 &&
564 (init_space || fspace->purpose == FIL_TYPE_TEMPORARY ||
565 (srv_startup_is_before_trx_rollback_phase &&
566 fsp_is_undo_tablespace(fspace->id)))));
567 ut_ad(size == fspace->size_in_header);
568 #ifdef UNIV_DEBUG
569 /* Exclude Encryption flag as it might have been changed In Memory flags but
570 not on disk. */
571 ut_ad(!((flags ^ fspace->flags) & ~(FSP_FLAGS_MASK_ENCRYPTION)));
572 #endif /* UNIV_DEBUG */
573
574 if ((offset >= size) || (offset >= limit)) {
575 return (nullptr);
576 }
577
578 const page_size_t page_size(flags);
579
580 descr_page_no = xdes_calc_descriptor_page(page_size, offset);
581
582 buf_block_t *block;
583
584 if (descr_page_no == 0) {
585 /* It is on the space header page */
586
587 descr_page = page_align(sp_header);
588 block = nullptr;
589 } else {
590 block = buf_page_get(page_id_t(space, descr_page_no), page_size,
591 RW_SX_LATCH, mtr);
592
593 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
594
595 descr_page = buf_block_get_frame(block);
596 }
597
598 if (desc_block != nullptr) {
599 *desc_block = block;
600 }
601
602 return (descr_page + XDES_ARR_OFFSET +
603 XDES_SIZE * xdes_calc_descriptor_index(page_size, offset));
604 }
605
606 /** Gets pointer to a the extent descriptor of a page.
607 The page where the extent descriptor resides is x-locked. If the page offset
608 is equal to the free limit of the space, adds new extents from above the free
609 limit to the space free list, if not free limit == space size. This adding
610 is necessary to make the descriptor defined, as they are uninitialized
611 above the free limit.
612 @param[in] space_id space id
613 @param[in] offset page offset; if equal to the free limit, we
614 try to add new extents to the space free list
615 @param[in] page_size page size
616 @param[in,out] mtr mini-transaction
617 @return pointer to the extent descriptor, NULL if the page does not
618 exist in the space or if the offset exceeds the free limit */
xdes_get_descriptor(space_id_t space_id,page_no_t offset,const page_size_t & page_size,mtr_t * mtr)619 static MY_ATTRIBUTE((warn_unused_result)) xdes_t *xdes_get_descriptor(
620 space_id_t space_id, page_no_t offset, const page_size_t &page_size,
621 mtr_t *mtr) {
622 buf_block_t *block;
623 fsp_header_t *sp_header;
624
625 block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
626
627 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
628
629 sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
630 return (xdes_get_descriptor_with_space_hdr(sp_header, space_id, offset, mtr));
631 }
632
633 /** Gets pointer to a the extent descriptor if the file address of the
634 descriptor list node is known. The page where the extent descriptor resides
635 is x-locked.
636 @param[in] space space id
637 @param[in] page_size page size
638 @param[in] lst_node file address of the list node contained in the
639 descriptor
640 @param[in,out] mtr mini-transaction
641 @return pointer to the extent descriptor */
642 UNIV_INLINE
xdes_lst_get_descriptor(space_id_t space,const page_size_t & page_size,fil_addr_t lst_node,mtr_t * mtr)643 xdes_t *xdes_lst_get_descriptor(space_id_t space, const page_size_t &page_size,
644 fil_addr_t lst_node, mtr_t *mtr) {
645 xdes_t *descr;
646
647 ut_ad(mtr);
648 ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
649
650 descr = fut_get_ptr(space, page_size, lst_node, RW_SX_LATCH, mtr) -
651 XDES_FLST_NODE;
652
653 return (descr);
654 }
655
656 /** Returns page offset of the first page in extent described by a descriptor.
657 @return offset of the first page in extent */
658 UNIV_INLINE
xdes_get_offset(const xdes_t * descr)659 page_no_t xdes_get_offset(const xdes_t *descr) /*!< in: extent descriptor */
660 {
661 ut_ad(descr);
662
663 return (page_get_page_no(page_align(descr)) +
664 static_cast<page_no_t>(
665 ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE) *
666 FSP_EXTENT_SIZE));
667 }
668 #endif /* !UNIV_HOTBACKUP */
669
670 /** Inits a file page whose prior contents should be ignored. */
fsp_init_file_page_low(buf_block_t * block)671 static void fsp_init_file_page_low(
672 buf_block_t *block) /*!< in: pointer to a page */
673 {
674 page_t *page = buf_block_get_frame(block);
675 page_zip_des_t *page_zip = buf_block_get_page_zip(block);
676
677 if (!fsp_is_system_temporary(block->page.id.space())) {
678 memset(page, 0, UNIV_PAGE_SIZE);
679 }
680
681 mach_write_to_4(page + FIL_PAGE_OFFSET, block->page.id.page_no());
682 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
683 block->page.id.space());
684
685 /* Reset FRAME LSN, which otherwise points to the LSN of the last
686 page that used this buffer block. This is needed by CLONE for
687 tracking dirty pages. */
688 memset(page + FIL_PAGE_LSN, 0, 8);
689
690 if (page_zip) {
691 memset(page_zip->data, 0, page_zip_get_size(page_zip));
692 memcpy(page_zip->data + FIL_PAGE_OFFSET, page + FIL_PAGE_OFFSET, 4);
693 memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
694 page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
695 memcpy(page_zip->data + FIL_PAGE_LSN, page + FIL_PAGE_LSN, 8);
696 }
697 }
698
699 #ifndef UNIV_HOTBACKUP
700 #ifdef UNIV_DEBUG
701 /** Assert that the mini-transaction is compatible with
702 updating an allocation bitmap page.
703 @param[in] id tablespace identifier
704 @param[in] mtr mini-transaction */
fsp_space_modify_check(space_id_t id,const mtr_t * mtr)705 static void fsp_space_modify_check(space_id_t id, const mtr_t *mtr) {
706 ut_ad(mtr);
707 switch (mtr->get_log_mode()) {
708 case MTR_LOG_SHORT_INSERTS:
709 /* These modes are only allowed within a non-bitmap page
710 when there is a higher-level redo log record written. */
711 break;
712
713 case MTR_LOG_NONE:
714 /* We allow MTR_LOG_NONE to be set over MTR_LOG_NO_REDO. */
715 if (!mtr_t::s_logging.is_enabled()) {
716 return;
717 }
718 break;
719
720 case MTR_LOG_NO_REDO:
721 #ifdef UNIV_DEBUG
722 {
723 const fil_type_t type = fil_space_get_type(id);
724 ut_a(fsp_is_system_temporary(id) || !mtr_t::s_logging.is_enabled() ||
725 fil_space_get_flags(id) == UINT32_UNDEFINED ||
726 type == FIL_TYPE_TEMPORARY || type == FIL_TYPE_IMPORT ||
727 fil_space_is_redo_skipped(id) || !undo::is_active(id, false));
728 }
729 #endif /* UNIV_DEBUG */
730 return;
731 case MTR_LOG_ALL:
732 /* We must not write redo log for the shared temporary
733 tablespace. */
734 ut_ad(!fsp_is_system_temporary(id));
735 /* If we write redo log, the tablespace must exist. */
736 ut_ad(fil_space_get_type(id) == FIL_TYPE_TABLESPACE);
737 return;
738
739 default:
740 break;
741 }
742
743 ut_ad(0);
744 }
745 #endif /* UNIV_DEBUG */
746
747 /** Initialize a file page.
748 @param[in,out] block file page
749 @param[in,out] mtr mini-transaction */
fsp_init_file_page(buf_block_t * block,mtr_t * mtr)750 static void fsp_init_file_page(buf_block_t *block, mtr_t *mtr) {
751 fsp_init_file_page_low(block);
752
753 ut_d(fsp_space_modify_check(block->page.id.space(), mtr));
754 mlog_write_initial_log_record(buf_block_get_frame(block),
755 MLOG_INIT_FILE_PAGE2, mtr);
756 }
757 #endif /* !UNIV_HOTBACKUP */
758
759 /** Parses a redo log record of a file page init.
760 @return end of log record or NULL */
fsp_parse_init_file_page(byte * ptr,byte * end_ptr MY_ATTRIBUTE ((unused)),buf_block_t * block)761 byte *fsp_parse_init_file_page(
762 byte *ptr, /*!< in: buffer */
763 byte *end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
764 buf_block_t *block) /*!< in: block or NULL */
765 {
766 ut_ad(ptr != nullptr);
767 ut_ad(end_ptr != nullptr);
768
769 if (block) {
770 fsp_init_file_page_low(block);
771 }
772
773 return (ptr);
774 }
775
776 /** Initializes the fsp system. */
fsp_init()777 void fsp_init() {
778 /* FSP_EXTENT_SIZE must be a multiple of page & zip size */
779 ut_a(UNIV_PAGE_SIZE > 0);
780 ut_a(0 == (UNIV_PAGE_SIZE % FSP_EXTENT_SIZE));
781
782 static_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX),
783 "UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX != 0");
784
785 static_assert(!(UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN),
786 "UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN != 0");
787
788 /* Does nothing at the moment */
789 }
790
791 /** Writes the space id and flags to a tablespace header. The flags contain
792 row type, physical/compressed page size, and logical/uncompressed page
793 size of the tablespace. */
fsp_header_init_fields(page_t * page,space_id_t space_id,uint32_t flags)794 void fsp_header_init_fields(
795 page_t *page, /*!< in/out: first page in the space */
796 space_id_t space_id, /*!< in: space id */
797 uint32_t flags) /*!< in: tablespace flags
798 (FSP_SPACE_FLAGS) */
799 {
800 ut_a(fsp_flags_is_valid(flags));
801
802 mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page, space_id);
803 mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page, flags);
804 }
805
806 /** Get the offset of encrytion information in page 0.
807 @param[in] page_size page size.
808 @return offset on success, otherwise 0. */
fsp_header_get_encryption_offset(const page_size_t & page_size)809 ulint fsp_header_get_encryption_offset(const page_size_t &page_size) {
810 ulint offset;
811 #ifdef UNIV_DEBUG
812 ulint left_size;
813 #endif
814
815 offset = XDES_ARR_OFFSET + XDES_SIZE * xdes_arr_size(page_size);
816 #ifdef UNIV_DEBUG
817 left_size =
818 page_size.physical() - FSP_HEADER_OFFSET - offset - FIL_PAGE_DATA_END;
819
820 ut_ad(left_size >= Encryption::INFO_SIZE);
821 #endif
822
823 return offset;
824 }
825
826 #ifndef UNIV_HOTBACKUP
827 /** Write the (un)encryption progress info into the space header.
828 @param[in] space_id tablespace id
829 @param[in] space_flags tablespace flags
830 @param[in] progress_info max pages (un)encrypted
831 @param[in] operation_type Type of operation
832 @param[in] update_operation_type is operation to be updated
833 @param[in,out] mtr mini-transaction
834 @return true if success. */
fsp_header_write_encryption_progress(space_id_t space_id,uint32_t space_flags,ulint progress_info,byte operation_type,bool update_operation_type,mtr_t * mtr)835 bool fsp_header_write_encryption_progress(
836 space_id_t space_id, uint32_t space_flags, ulint progress_info,
837 byte operation_type, bool update_operation_type, mtr_t *mtr) {
838 buf_block_t *block;
839 ulint offset;
840
841 const page_size_t page_size(space_flags);
842
843 /* Save the encryption info to the page 0. */
844 block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
845
846 if (block == nullptr) {
847 return false;
848 }
849
850 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
851 ut_ad(space_id == page_get_space_id(buf_block_get_frame(block)));
852
853 offset = fsp_header_get_encryption_progress_offset(page_size);
854 ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
855
856 page_t *page = buf_block_get_frame(block);
857
858 /* Write operation type if needed */
859 if (update_operation_type) {
860 mlog_write_ulint(page + offset, operation_type, MLOG_1BYTE, mtr);
861 }
862
863 mlog_write_ulint(page + offset + Encryption::OPERATION_INFO_SIZE,
864 progress_info, MLOG_4BYTES, mtr);
865 return (true);
866 }
867
868 /** Get encryption operation type in progress from the first
869 page of a tablespace.
870 @param[in] page first page of a tablespace
871 @param[in] page_size tablespace page size
872 @return encryption operation
873 */
fsp_header_encryption_op_type_in_progress(const page_t * page,page_size_t page_size)874 encryption_op_type fsp_header_encryption_op_type_in_progress(
875 const page_t *page, page_size_t page_size) {
876 ulint offset;
877 encryption_op_type op;
878 offset = fsp_header_get_encryption_progress_offset(page_size);
879 ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
880
881 /* Read operation type (1 byte) */
882 byte operation = mach_read_from_1(page + offset);
883 switch (operation) {
884 case Encryption::ENCRYPT_IN_PROGRESS:
885 op = ENCRYPTION;
886 break;
887 case Encryption::DECRYPT_IN_PROGRESS:
888 op = DECRYPTION;
889 break;
890 default:
891 op = NONE;
892 break;
893 }
894
895 return (op);
896 }
897
898 /** Write the encryption info into the space header.
899 @param[in] space_id tablespace id
900 @param[in] space_flags tablespace flags
901 @param[in] encrypt_info buffer for re-encrypt key
902 @param[in] update_fsp_flags if it need to update the space flags
903 @param[in] rotate_encryption if it is called during key rotation
904 @param[in,out] mtr mini-transaction
905 @return true if success. */
fsp_header_write_encryption(space_id_t space_id,uint32_t space_flags,byte * encrypt_info,bool update_fsp_flags,bool rotate_encryption,mtr_t * mtr)906 bool fsp_header_write_encryption(space_id_t space_id, uint32_t space_flags,
907 byte *encrypt_info, bool update_fsp_flags,
908 bool rotate_encryption, mtr_t *mtr) {
909 buf_block_t *block;
910 ulint offset;
911 page_t *page;
912 uint32_t master_key_id;
913
914 const page_size_t page_size(space_flags);
915
916 /* Save the encryption info to the page 0. */
917 block = buf_page_get(page_id_t(space_id, 0), page_size, RW_SX_LATCH, mtr);
918 if (block == nullptr) {
919 return (false);
920 }
921
922 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
923 ut_ad(space_id == page_get_space_id(buf_block_get_frame(block)));
924
925 offset = fsp_header_get_encryption_offset(page_size);
926 ut_ad(offset != 0 && offset < UNIV_PAGE_SIZE);
927
928 page = buf_block_get_frame(block);
929
930 /* Write the new fsp flags into be update to the header if needed */
931 if (update_fsp_flags) {
932 mlog_write_ulint(page + FSP_HEADER_OFFSET + FSP_SPACE_FLAGS, space_flags,
933 MLOG_4BYTES, mtr);
934 }
935
936 if (rotate_encryption) {
937 /* If called during recovery, skip all tablespaces which have updated
938 master_key_id. */
939 master_key_id = mach_read_from_4(page + offset + Encryption::MAGIC_SIZE);
940 if (srv_is_being_started &&
941 master_key_id == Encryption::get_master_key_id()) {
942 ut_ad(memcmp(page + offset, Encryption::KEY_MAGIC_V1,
943 Encryption::MAGIC_SIZE) == 0 ||
944 memcmp(page + offset, Encryption::KEY_MAGIC_V2,
945 Encryption::MAGIC_SIZE) == 0 ||
946 memcmp(page + offset, Encryption::KEY_MAGIC_V3,
947 Encryption::MAGIC_SIZE) == 0);
948 return (true);
949 }
950 }
951
952 /* Write encryption info passed */
953 mlog_write_string(page + offset, encrypt_info, Encryption::INFO_SIZE, mtr);
954
955 return (true);
956 }
957
958 /** Rotate the encryption info in the space header.
959 @param[in] space tablespace
960 @param[in] encrypt_info buffer for re-encrypt key.
961 @param[in,out] mtr mini-transaction
962 @return true if success. */
fsp_header_rotate_encryption(fil_space_t * space,byte * encrypt_info,mtr_t * mtr)963 bool fsp_header_rotate_encryption(fil_space_t *space, byte *encrypt_info,
964 mtr_t *mtr) {
965 ut_ad(mtr);
966 ut_ad(space->encryption_type != Encryption::NONE);
967
968 DBUG_EXECUTE_IF("fsp_header_rotate_encryption_failure", return (false););
969
970 /* Fill encryption info. */
971 if (!Encryption::fill_encryption_info(space->encryption_key,
972 space->encryption_iv, encrypt_info,
973 false, true)) {
974 return (false);
975 }
976
977 /* Write encryption info into space header. */
978 return (fsp_header_write_encryption(space->id, space->flags, encrypt_info,
979 false, true, mtr));
980 }
981
982 /** Read the server version number from the DD tablespace header.
983 @param[out] version server version from tablespace header
984 @return false if success. */
fsp_header_dict_get_server_version(uint * version)985 bool fsp_header_dict_get_server_version(uint *version) {
986 fil_space_t *space = fil_space_acquire(dict_sys_t::s_space_id);
987
988 if (space == nullptr) {
989 return (true);
990 }
991
992 buf_block_t *block;
993 page_t *page;
994 mtr_t mtr;
995
996 const page_size_t page_size(space->flags);
997
998 mtr_start(&mtr);
999 block = buf_page_get(page_id_t(dict_sys_t::s_space_id, 0), page_size,
1000 RW_SX_LATCH, &mtr);
1001 page = buf_block_get_frame(block);
1002 *version = fsp_header_get_server_version(page);
1003
1004 mtr_commit(&mtr);
1005 fil_space_release(space);
1006
1007 return (false);
1008 }
1009
1010 /** Initializes the space header of a new created space and creates also the
1011 insert buffer tree root if space == 0.
1012 @param[in] space_id space id
1013 @param[in] size current size in blocks
1014 @param[in,out] mtr min-transaction
1015 @param[in] is_boot if it's for bootstrap
1016 @return true on success, otherwise false. */
fsp_header_init(space_id_t space_id,page_no_t size,mtr_t * mtr,bool is_boot)1017 bool fsp_header_init(space_id_t space_id, page_no_t size, mtr_t *mtr,
1018 bool is_boot) {
1019 fsp_header_t *header;
1020 buf_block_t *block;
1021 page_t *page;
1022
1023 ut_ad(mtr);
1024
1025 fil_space_t *space = fil_space_get(space_id);
1026 ut_ad(space != nullptr);
1027
1028 mtr_x_lock_space(space, mtr);
1029
1030 const page_id_t page_id(space_id, 0);
1031 const page_size_t page_size(space->flags);
1032
1033 block = buf_page_create(page_id, page_size, RW_SX_LATCH, mtr);
1034 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1035
1036 space->size_in_header = size;
1037 space->free_len = 0;
1038 space->free_limit = 0;
1039
1040 /* The prior contents of the file page should be ignored */
1041
1042 fsp_init_file_page(block, mtr);
1043 page = buf_block_get_frame(block);
1044
1045 mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR, MLOG_2BYTES,
1046 mtr);
1047
1048 mlog_write_ulint(page + FIL_PAGE_SRV_VERSION, DD_SPACE_CURRENT_SRV_VERSION,
1049 MLOG_4BYTES, mtr);
1050 mlog_write_ulint(page + FIL_PAGE_SPACE_VERSION,
1051 DD_SPACE_CURRENT_SPACE_VERSION, MLOG_4BYTES, mtr);
1052
1053 header = FSP_HEADER_OFFSET + page;
1054
1055 mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr);
1056 mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
1057
1058 fsp_header_size_update(header, size, mtr);
1059 mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
1060 mlog_write_ulint(header + FSP_SPACE_FLAGS, space->flags, MLOG_4BYTES, mtr);
1061 mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
1062
1063 flst_init(header + FSP_FREE, mtr);
1064 flst_init(header + FSP_FREE_FRAG, mtr);
1065 flst_init(header + FSP_FULL_FRAG, mtr);
1066 flst_init(header + FSP_SEG_INODES_FULL, mtr);
1067 flst_init(header + FSP_SEG_INODES_FREE, mtr);
1068
1069 mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
1070
1071 fsp_fill_free_list(
1072 !fsp_is_system_tablespace(space_id) && !fsp_is_global_temporary(space_id),
1073 space, header, mtr);
1074
1075 /* For encryption tablespace, we need to save the encryption
1076 info to the page 0. */
1077 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
1078 ulint offset = fsp_header_get_encryption_offset(page_size);
1079 byte encryption_info[Encryption::INFO_SIZE];
1080
1081 if (offset == 0) return (false);
1082
1083 if (!Encryption::fill_encryption_info(space->encryption_key,
1084 space->encryption_iv, encryption_info,
1085 is_boot, true)) {
1086 space->encryption_type = Encryption::NONE;
1087 memset(space->encryption_key, 0, Encryption::KEY_LEN);
1088 memset(space->encryption_iv, 0, Encryption::KEY_LEN);
1089 return (false);
1090 }
1091
1092 mlog_write_string(page + offset, encryption_info, Encryption::INFO_SIZE,
1093 mtr);
1094 }
1095 space->encryption_op_in_progress = NONE;
1096
1097 if (space_id == TRX_SYS_SPACE) {
1098 if (btr_create(DICT_CLUSTERED | DICT_IBUF, 0, univ_page_size,
1099 DICT_IBUF_ID_MIN + space_id, dict_ind_redundant,
1100 mtr) == FIL_NULL) {
1101 return (false);
1102 }
1103 }
1104
1105 return (true);
1106 }
1107 #endif /* !UNIV_HOTBACKUP */
1108
1109 /** Reads the space id from the first page of a tablespace.
1110 @return space id, ULINT UNDEFINED if error */
fsp_header_get_space_id(const page_t * page)1111 space_id_t fsp_header_get_space_id(
1112 const page_t *page) /*!< in: first page of a tablespace */
1113 {
1114 space_id_t fsp_id;
1115 space_id_t id;
1116
1117 fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
1118
1119 id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
1120
1121 DBUG_EXECUTE_IF("fsp_header_get_space_id_failure", id = SPACE_UNKNOWN;);
1122
1123 if (id != fsp_id) {
1124 ib::error(ER_IB_MSG_414) << "Space ID in fsp header is " << fsp_id
1125 << ", but in the page header it is " << id << ".";
1126 return (SPACE_UNKNOWN);
1127 }
1128
1129 return (id);
1130 }
1131
1132 /** Reads the page size from the first page of a tablespace.
1133 @param[in] page first page of a tablespace
1134 @return page size */
fsp_header_get_page_size(const page_t * page)1135 page_size_t fsp_header_get_page_size(const page_t *page) {
1136 return (page_size_t(fsp_header_get_flags(page)));
1137 }
1138
1139 /** Reads the encryption key from the first page of a tablespace.
1140 @param[in] fsp_flags tablespace flags
1141 @param[in,out] key tablespace key
1142 @param[in,out] iv tablespace iv
1143 @param[in] page first page of a tablespace
1144 @return true if success */
fsp_header_get_encryption_key(uint32_t fsp_flags,byte * key,byte * iv,page_t * page)1145 bool fsp_header_get_encryption_key(uint32_t fsp_flags, byte *key, byte *iv,
1146 page_t *page) {
1147 ulint offset;
1148 const page_size_t page_size(fsp_flags);
1149
1150 offset = fsp_header_get_encryption_offset(page_size);
1151 if (offset == 0) {
1152 return (false);
1153 }
1154
1155 return (Encryption::decode_encryption_info(key, iv, page + offset, true));
1156 }
1157
1158 #ifndef UNIV_HOTBACKUP
1159 /** Increases the space size field of a space. */
fsp_header_inc_size(space_id_t space_id,page_no_t size_inc,mtr_t * mtr)1160 void fsp_header_inc_size(space_id_t space_id, /*!< in: space id */
1161 page_no_t size_inc, /*!< in: size increment in pages */
1162 mtr_t *mtr) /*!< in/out: mini-transaction */
1163 {
1164 fil_space_t *space = fil_space_get(space_id);
1165
1166 mtr_x_lock_space(space, mtr);
1167
1168 ut_d(fsp_space_modify_check(space_id, mtr));
1169
1170 fsp_header_t *header;
1171
1172 header = fsp_get_space_header(space_id, page_size_t(space->flags), mtr);
1173
1174 page_no_t size;
1175
1176 size = mach_read_from_4(header + FSP_SIZE);
1177 ut_ad(size == space->size_in_header);
1178
1179 size += size_inc;
1180
1181 fsp_header_size_update(header, size, mtr);
1182 space->size_in_header = size;
1183 }
1184
1185 /** Gets the size of the system tablespace from the tablespace header. If
1186 we do not have an auto-extending data file, this should be equal to
1187 the size of the data files. If there is an auto-extending data file,
1188 this can be smaller.
1189 @return size in pages */
fsp_header_get_tablespace_size(void)1190 page_no_t fsp_header_get_tablespace_size(void) {
1191 fil_space_t *space = fil_space_get_sys_space();
1192
1193 mtr_t mtr;
1194
1195 mtr_start(&mtr);
1196
1197 mtr_x_lock_space(space, &mtr);
1198
1199 fsp_header_t *header;
1200
1201 header = fsp_get_space_header(TRX_SYS_SPACE, univ_page_size, &mtr);
1202
1203 page_no_t size;
1204
1205 size = mach_read_from_4(header + FSP_SIZE);
1206
1207 ut_ad(space->size_in_header == size);
1208
1209 mtr_commit(&mtr);
1210
1211 return (size);
1212 }
1213
1214 /** Try to extend a single-table tablespace so that a page would fit in the
1215 data file.
1216 @param[in,out] space tablespace
1217 @param[in] page_no page number
1218 @param[in,out] header tablespace header
1219 @param[in,out] mtr mini-transaction
1220 @return true if success */
UNIV_COLD(warn_unused_result)1221 static UNIV_COLD
1222 MY_ATTRIBUTE((warn_unused_result)) bool fsp_try_extend_data_file_with_pages(
1223 fil_space_t *space, page_no_t page_no, fsp_header_t *header, mtr_t *mtr) {
1224 DBUG_TRACE;
1225
1226 ut_ad(!fsp_is_system_tablespace(space->id));
1227 ut_ad(!fsp_is_global_temporary(space->id));
1228 ut_d(fsp_space_modify_check(space->id, mtr));
1229
1230 page_no_t size = mach_read_from_4(header + FSP_SIZE);
1231 ut_ad(size == space->size_in_header);
1232
1233 ut_a(page_no >= size);
1234
1235 bool success = fil_space_extend(space, page_no + 1);
1236
1237 /* The size may be less than we wanted if we ran out of disk space. */
1238 fsp_header_size_update(header, space->size, mtr);
1239 space->size_in_header = space->size;
1240
1241 return success;
1242 }
1243
1244 /** Try to extend the last data file of a tablespace if it is auto-extending.
1245 @param[in,out] space tablespace
1246 @param[in,out] header tablespace header
1247 @param[in,out] mtr mini-transaction
1248 @return whether the tablespace was extended */
fsp_try_extend_data_file(fil_space_t * space,fsp_header_t * header,mtr_t * mtr)1249 static UNIV_COLD ulint fsp_try_extend_data_file(fil_space_t *space,
1250 fsp_header_t *header,
1251 mtr_t *mtr) {
1252 page_no_t size; /* current number of pages
1253 in the datafile */
1254 page_no_t size_increase; /* number of pages to extend
1255 this file */
1256 const char *OUT_OF_SPACE_MSG =
1257 "ran out of space. Please add another file or use"
1258 " 'autoextend' for the last file in setting";
1259 DBUG_TRACE;
1260
1261 ut_d(fsp_space_modify_check(space->id, mtr));
1262
1263 if (space->id == TRX_SYS_SPACE &&
1264 !srv_sys_space.can_auto_extend_last_file()) {
1265 /* We print the error message only once to avoid
1266 spamming the error log. Note that we don't need
1267 to reset the flag to false as dealing with this
1268 error requires server restart. */
1269 if (!srv_sys_space.get_tablespace_full_status()) {
1270 ib::error(ER_IB_MSG_415) << "Tablespace " << srv_sys_space.name() << " "
1271 << OUT_OF_SPACE_MSG << " innodb_data_file_path.";
1272 srv_sys_space.set_tablespace_full_status(true);
1273 }
1274 return false;
1275 } else if (fsp_is_global_temporary(space->id) &&
1276 !srv_tmp_space.can_auto_extend_last_file()) {
1277 /* We print the error message only once to avoid
1278 spamming the error log. Note that we don't need
1279 to reset the flag to false as dealing with this
1280 error requires server restart. */
1281 if (!srv_tmp_space.get_tablespace_full_status()) {
1282 ib::error(ER_IB_MSG_416)
1283 << "Tablespace " << srv_tmp_space.name() << " " << OUT_OF_SPACE_MSG
1284 << " innodb_temp_data_file_path.";
1285 srv_tmp_space.set_tablespace_full_status(true);
1286 }
1287 return false;
1288 }
1289
1290 size = mach_read_from_4(header + FSP_SIZE);
1291 ut_ad(size == space->size_in_header);
1292
1293 const page_size_t page_size(mach_read_from_4(header + FSP_SPACE_FLAGS));
1294
1295 if (space->id == TRX_SYS_SPACE) {
1296 size_increase = srv_sys_space.get_increment();
1297
1298 } else if (fsp_is_global_temporary(space->id)) {
1299 size_increase = srv_tmp_space.get_increment();
1300
1301 } else {
1302 page_no_t extent_pages = fsp_get_extent_size_in_pages(page_size);
1303 if (size < extent_pages) {
1304 /* Let us first extend the file to extent_size */
1305 if (!fsp_try_extend_data_file_with_pages(space, extent_pages - 1, header,
1306 mtr)) {
1307 return false;
1308 }
1309
1310 size = extent_pages;
1311 }
1312
1313 size_increase = fsp_get_pages_to_extend_ibd(page_size, size);
1314 }
1315
1316 if (size_increase == 0) {
1317 return false;
1318 }
1319
1320 if (!fil_space_extend(space, size + size_increase)) {
1321 return false;
1322 }
1323
1324 /* We ignore any fragments of a full megabyte when storing the size
1325 to the space header */
1326
1327 space->size_in_header =
1328 ut_calc_align_down(space->size, (1024 * 1024) / page_size.physical());
1329
1330 fsp_header_size_update(header, space->size_in_header, mtr);
1331
1332 return true;
1333 }
1334
1335 /** Calculate the number of pages to extend a datafile.
1336 We extend single-table and general tablespaces first one extent at a time,
1337 but 4 at a time for bigger tablespaces. It is not enough to extend always
1338 by one extent, because we need to add at least one extent to FSP_FREE.
1339 A single extent descriptor page will track many extents. And the extent
1340 that uses its extent descriptor page is put onto the FSP_FREE_FRAG list.
1341 Extents that do not use their extent descriptor page are added to FSP_FREE.
1342 The physical page size is used to determine how many extents are tracked
1343 on one extent descriptor page. See xdes_calc_descriptor_page().
1344 @param[in] page_size page_size of the datafile
1345 @param[in] size current number of pages in the datafile
1346 @return number of pages to extend the file. */
fsp_get_pages_to_extend_ibd(const page_size_t & page_size,page_no_t size)1347 page_no_t fsp_get_pages_to_extend_ibd(const page_size_t &page_size,
1348 page_no_t size) {
1349 page_no_t size_increase; /* number of pages to extend this file */
1350 page_no_t extent_size; /* one megabyte, in pages */
1351 page_no_t threshold; /* The size of the tablespace (in number
1352 of pages) where we start allocating more
1353 than one extent at a time. */
1354
1355 extent_size = fsp_get_extent_size_in_pages(page_size);
1356
1357 /* The threshold is set at 32MiB except when the physical page
1358 size is small enough that it must be done sooner. */
1359 threshold =
1360 std::min(32 * extent_size, static_cast<page_no_t>(page_size.physical()));
1361
1362 if (size < threshold) {
1363 size_increase = extent_size;
1364 } else {
1365 /* Below in fsp_fill_free_list() we assume
1366 that we add at most FSP_FREE_ADD extents at
1367 a time */
1368 size_increase = FSP_FREE_ADD * extent_size;
1369 }
1370
1371 return (size_increase);
1372 }
1373
1374 /** Initialize a fragment extent and puts it into the free fragment list.
1375 @param[in,out] header tablespace header
1376 @param[in,out] descr extent descriptor
1377 @param[in,out] mtr mini-transaction */
fsp_init_xdes_free_frag(fsp_header_t * header,xdes_t * descr,mtr_t * mtr)1378 static void fsp_init_xdes_free_frag(fsp_header_t *header, xdes_t *descr,
1379 mtr_t *mtr) {
1380 ulint n_used;
1381
1382 /* The first page in the extent is a extent descriptor page
1383 and the second is an ibuf bitmap page: mark them used */
1384 xdes_set_bit(descr, XDES_FREE_BIT, FSP_XDES_OFFSET, FALSE, mtr);
1385 xdes_set_bit(descr, XDES_FREE_BIT, FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
1386
1387 xdes_set_segment_id(descr, 0, XDES_FREE_FRAG, mtr);
1388 flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1389
1390 n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
1391 mlog_write_ulint(header + FSP_FRAG_N_USED, n_used + XDES_FRAG_N_USED,
1392 MLOG_4BYTES, mtr);
1393 }
1394
1395 /** Put new extents to the free list if there are free extents above the free
1396 limit. If an extent happens to contain an extent descriptor page, the extent
1397 is put to the FSP_FREE_FRAG list with the page marked as used.
1398 @param[in] init_space true if this is a single-table tablespace
1399 and we are only initializing the first extent and the first bitmap pages;
1400 then we will not allocate more extents
1401 @param[in,out] space tablespace
1402 @param[in,out] header tablespace header
1403 @param[in,out] mtr mini-transaction */
fsp_fill_free_list(bool init_space,fil_space_t * space,fsp_header_t * header,mtr_t * mtr)1404 static void fsp_fill_free_list(bool init_space, fil_space_t *space,
1405 fsp_header_t *header, mtr_t *mtr) {
1406 page_no_t limit;
1407 page_no_t size;
1408 uint32_t flags;
1409 xdes_t *descr;
1410 ulint count = 0;
1411 page_no_t i;
1412
1413 ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
1414 ut_d(fsp_space_modify_check(space->id, mtr));
1415
1416 /* Check if we can fill free list from above the free list limit */
1417 size = mach_read_from_4(header + FSP_SIZE);
1418 limit = mach_read_from_4(header + FSP_FREE_LIMIT);
1419 flags = mach_read_from_4(header + FSP_SPACE_FLAGS);
1420
1421 ut_ad(size == space->size_in_header);
1422 ut_ad(limit == space->free_limit);
1423
1424 /* Exclude Encryption flag as it might have been changed In Memory flags but
1425 not on disk. */
1426 ut_ad(!((flags ^ space->flags) & ~(FSP_FLAGS_MASK_ENCRYPTION)));
1427
1428 const page_size_t page_size(flags);
1429
1430 if (size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
1431 if ((!init_space && !fsp_is_system_tablespace(space->id) &&
1432 !fsp_is_global_temporary(space->id)) ||
1433 (space->id == TRX_SYS_SPACE &&
1434 srv_sys_space.can_auto_extend_last_file()) ||
1435 (fsp_is_global_temporary(space->id) &&
1436 srv_tmp_space.can_auto_extend_last_file())) {
1437 fsp_try_extend_data_file(space, header, mtr);
1438 size = space->size_in_header;
1439 }
1440 }
1441
1442 i = limit;
1443
1444 while ((init_space && i < 1) ||
1445 ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
1446 bool init_xdes = (ut_2pow_remainder(i, page_size.physical()) == 0);
1447
1448 space->free_limit = i + FSP_EXTENT_SIZE;
1449 mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE, MLOG_4BYTES,
1450 mtr);
1451
1452 if (init_xdes) {
1453 buf_block_t *block;
1454
1455 /* We are going to initialize a new descriptor page
1456 and a new ibuf bitmap page: the prior contents of the
1457 pages should be ignored. */
1458
1459 if (i > 0) {
1460 const page_id_t page_id(space->id, i);
1461
1462 block = buf_page_create(page_id, page_size, RW_SX_LATCH, mtr);
1463
1464 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1465
1466 fsp_init_file_page(block, mtr);
1467 mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
1468 FIL_PAGE_TYPE_XDES, MLOG_2BYTES, mtr);
1469 }
1470
1471 /* Initialize the ibuf bitmap page in a separate
1472 mini-transaction because it is low in the latching
1473 order, and we must be able to release its latch.
1474 Note: Insert-Buffering is disabled for tables that
1475 reside in the temp-tablespace. */
1476 if (!fsp_is_system_temporary(space->id)) {
1477 mtr_t ibuf_mtr;
1478
1479 mtr_start(&ibuf_mtr);
1480
1481 if (space->purpose == FIL_TYPE_TEMPORARY) {
1482 mtr_set_log_mode(&ibuf_mtr, MTR_LOG_NO_REDO);
1483 }
1484
1485 const page_id_t page_id(space->id, i + FSP_IBUF_BITMAP_OFFSET);
1486
1487 block = buf_page_create(page_id, page_size, RW_SX_LATCH, &ibuf_mtr);
1488
1489 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1490
1491 fsp_init_file_page(block, &ibuf_mtr);
1492
1493 ibuf_bitmap_page_init(block, &ibuf_mtr);
1494
1495 mtr_commit(&ibuf_mtr);
1496 }
1497 }
1498
1499 buf_block_t *desc_block = nullptr;
1500 descr = xdes_get_descriptor_with_space_hdr(header, space->id, i, mtr,
1501 init_space, &desc_block);
1502 if (desc_block != nullptr) {
1503 fil_block_check_type(desc_block, FIL_PAGE_TYPE_XDES, mtr);
1504 }
1505 xdes_init(descr, mtr);
1506
1507 if (init_xdes) {
1508 fsp_init_xdes_free_frag(header, descr, mtr);
1509 } else {
1510 flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1511 count++;
1512 }
1513
1514 i += FSP_EXTENT_SIZE;
1515 }
1516 ut_a(count < std::numeric_limits<uint32_t>::max());
1517 space->free_len += (uint32_t)count;
1518 }
1519
1520 /** Allocates a new free extent.
1521 @param[in] space_id tablespace identifier
1522 @param[in] page_size page size
1523 @param[in] hint hint of which extent would be desirable: any
1524 page offset in the extent goes; the hint must not be > FSP_FREE_LIMIT
1525 @param[in,out] mtr mini-transaction
1526 @return extent descriptor, NULL if cannot be allocated */
fsp_alloc_free_extent(space_id_t space_id,const page_size_t & page_size,page_no_t hint,mtr_t * mtr)1527 static xdes_t *fsp_alloc_free_extent(space_id_t space_id,
1528 const page_size_t &page_size,
1529 page_no_t hint, mtr_t *mtr) {
1530 fsp_header_t *header;
1531 fil_addr_t first;
1532 xdes_t *descr;
1533 buf_block_t *desc_block = nullptr;
1534
1535 header = fsp_get_space_header(space_id, page_size, mtr);
1536
1537 descr = xdes_get_descriptor_with_space_hdr(header, space_id, hint, mtr, false,
1538 &desc_block);
1539
1540 fil_space_t *space = fil_space_get(space_id);
1541 ut_a(space != nullptr);
1542
1543 if (desc_block != nullptr) {
1544 fil_block_check_type(desc_block, FIL_PAGE_TYPE_XDES, mtr);
1545 }
1546
1547 if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
1548 /* Ok, we can take this extent */
1549 } else {
1550 /* Take the first extent in the free list */
1551 first = flst_get_first(header + FSP_FREE, mtr);
1552
1553 if (fil_addr_is_null(first)) {
1554 fsp_fill_free_list(false, space, header, mtr);
1555
1556 first = flst_get_first(header + FSP_FREE, mtr);
1557 }
1558
1559 if (fil_addr_is_null(first)) {
1560 return (nullptr); /* No free extents left */
1561 }
1562
1563 descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
1564 }
1565
1566 flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1567 space->free_len--;
1568
1569 return (descr);
1570 }
1571
1572 /** Allocates a single free page from a space. */
fsp_alloc_from_free_frag(fsp_header_t * header,xdes_t * descr,page_no_t bit,mtr_t * mtr)1573 static void fsp_alloc_from_free_frag(
1574 fsp_header_t *header, /*!< in/out: tablespace header */
1575 xdes_t *descr, /*!< in/out: extent descriptor */
1576 page_no_t bit, /*!< in: slot to allocate in the extent */
1577 mtr_t *mtr) /*!< in/out: mini-transaction */
1578 {
1579 ulint frag_n_used;
1580
1581 ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
1582 ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr));
1583 xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
1584
1585 /* Update the FRAG_N_USED field */
1586 frag_n_used = mach_read_from_4(header + FSP_FRAG_N_USED);
1587 frag_n_used++;
1588 mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, mtr);
1589 if (xdes_is_full(descr, mtr)) {
1590 /* The fragment is full: move it to another list */
1591 flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1592 xdes_set_state(descr, XDES_FULL_FRAG, mtr);
1593
1594 flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, mtr);
1595 mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - FSP_EXTENT_SIZE,
1596 MLOG_4BYTES, mtr);
1597 }
1598 }
1599
1600 /** Gets a buffer block for an allocated page.
1601 NOTE: If init_mtr != mtr, the block will only be initialized if it was
1602 not previously x-latched. It is assumed that the block has been
1603 x-latched only by mtr, and freed in mtr in that case.
1604 @param[in] page_id page id of the allocated page
1605 @param[in] page_size page size of the allocated page
1606 @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
1607 @param[in,out] mtr mini-transaction of the allocation
1608 @param[in,out] init_mtr mini-transaction for initializing the page
1609 @return block, initialized if init_mtr==mtr
1610 or rw_lock_x_lock_count(&block->lock) == 1 */
fsp_page_create(const page_id_t & page_id,const page_size_t & page_size,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr)1611 static buf_block_t *fsp_page_create(const page_id_t &page_id,
1612 const page_size_t &page_size,
1613 rw_lock_type_t rw_latch, mtr_t *mtr,
1614 mtr_t *init_mtr) {
1615 ut_ad(rw_latch == RW_X_LATCH || rw_latch == RW_SX_LATCH);
1616 buf_block_t *block = buf_page_create(page_id, page_size, rw_latch, init_mtr);
1617
1618 if (init_mtr == mtr ||
1619 (rw_latch == RW_X_LATCH ? rw_lock_get_x_lock_count(&block->lock) == 1
1620 : rw_lock_get_sx_lock_count(&block->lock) == 1)) {
1621 /* Initialize the page, unless it was already
1622 SX-latched in mtr. (In this case, we would want to
1623 allocate another page that has not been freed in mtr.) */
1624 ut_ad(init_mtr == mtr ||
1625 !mtr_memo_contains_flagged(
1626 mtr, block, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
1627
1628 fsp_init_file_page(block, init_mtr);
1629 }
1630
1631 return (block);
1632 }
1633
1634 /** Allocates a single free page from a space.
1635 The page is marked as used.
1636 @param[in] space space id
1637 @param[in] page_size page size
1638 @param[in] hint hint of which page would be desirable
1639 @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
1640 @param[in,out] mtr mini-transaction
1641 @param[in,out] init_mtr mini-transaction in which the page should be
1642 initialized (may be the same as mtr)
1643 @retval NULL if no page could be allocated
1644 @retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
1645 (init_mtr == mtr, or the page was not previously freed in mtr)
1646 @retval block (not allocated or initialized) otherwise */
fsp_alloc_free_page(space_id_t space,const page_size_t & page_size,page_no_t hint,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr)1647 static MY_ATTRIBUTE((warn_unused_result)) buf_block_t *fsp_alloc_free_page(
1648 space_id_t space, const page_size_t &page_size, page_no_t hint,
1649 rw_lock_type_t rw_latch, mtr_t *mtr, mtr_t *init_mtr) {
1650 fsp_header_t *header;
1651 fil_addr_t first;
1652 xdes_t *descr;
1653 page_no_t free;
1654 page_no_t page_no;
1655 page_no_t space_size;
1656
1657 ut_ad(mtr);
1658 ut_ad(init_mtr);
1659
1660 ut_d(fsp_space_modify_check(space, mtr));
1661 header = fsp_get_space_header(space, page_size, mtr);
1662
1663 /* Get the hinted descriptor */
1664 descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
1665
1666 if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
1667 /* Ok, we can take this extent */
1668 } else {
1669 /* Else take the first extent in free_frag list */
1670 first = flst_get_first(header + FSP_FREE_FRAG, mtr);
1671
1672 if (fil_addr_is_null(first)) {
1673 /* There are no partially full fragments: allocate
1674 a free extent and add it to the FREE_FRAG list. NOTE
1675 that the allocation may have as a side-effect that an
1676 extent containing a descriptor page is added to the
1677 FREE_FRAG list. But we will allocate our page from the
1678 the free extent anyway. */
1679
1680 descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
1681
1682 if (descr == nullptr) {
1683 /* No free space left */
1684
1685 return (nullptr);
1686 }
1687
1688 xdes_set_state(descr, XDES_FREE_FRAG, mtr);
1689 flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1690 } else {
1691 descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
1692 }
1693
1694 /* Reset the hint */
1695 hint = 0;
1696 }
1697
1698 /* Now we have in descr an extent with at least one free page. Look
1699 for a free page in the extent. */
1700
1701 free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE, hint % FSP_EXTENT_SIZE, mtr);
1702 if (free == FIL_NULL) {
1703 ut_print_buf(stderr, ((byte *)descr) - 500, 1000);
1704 putc('\n', stderr);
1705
1706 ut_error;
1707 }
1708
1709 page_no = xdes_get_offset(descr) + free;
1710
1711 space_size = mach_read_from_4(header + FSP_SIZE);
1712 ut_ad(space_size == fil_space_get(space)->size_in_header ||
1713 (space == TRX_SYS_SPACE && srv_startup_is_before_trx_rollback_phase));
1714
1715 if (space_size <= page_no) {
1716 /* It must be that we are extending a single-table tablespace
1717 whose size is still < 64 pages */
1718
1719 ut_a(!fsp_is_system_tablespace(space));
1720 ut_a(!fsp_is_global_temporary(space));
1721 if (page_no >= FSP_EXTENT_SIZE) {
1722 ib::error(ER_IB_MSG_417) << "Trying to extend a single-table"
1723 " tablespace "
1724 << space
1725 << " , by single"
1726 " page(s) though the space size "
1727 << space_size << ". Page no " << page_no << ".";
1728 return (nullptr);
1729 }
1730
1731 fil_space_t *fspace = fil_space_get(space);
1732
1733 if (!fsp_try_extend_data_file_with_pages(fspace, page_no, header, mtr)) {
1734 /* No disk space left */
1735 return (nullptr);
1736 }
1737 }
1738
1739 fsp_alloc_from_free_frag(header, descr, free, mtr);
1740 return (fsp_page_create(page_id_t(space, page_no), page_size, rw_latch, mtr,
1741 init_mtr));
1742 }
1743
1744 /** Frees a single page of a space.
1745 The page is marked as free and clean.
1746 @param[in] page_id page id
1747 @param[in] page_size page size
1748 @param[in,out] mtr mini-transaction */
fsp_free_page(const page_id_t & page_id,const page_size_t & page_size,mtr_t * mtr)1749 static void fsp_free_page(const page_id_t &page_id,
1750 const page_size_t &page_size, mtr_t *mtr) {
1751 fsp_header_t *header;
1752 xdes_t *descr;
1753 ulint state;
1754 ulint frag_n_used;
1755
1756 ut_ad(mtr);
1757 ut_d(fsp_space_modify_check(page_id.space(), mtr));
1758
1759 /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
1760
1761 header = fsp_get_space_header(page_id.space(), page_size, mtr);
1762
1763 descr = xdes_get_descriptor_with_space_hdr(header, page_id.space(),
1764 page_id.page_no(), mtr);
1765
1766 state = xdes_get_state(descr, mtr);
1767
1768 if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
1769 ib::error(ER_IB_MSG_418) << "File space extent descriptor of page "
1770 << page_id << " has state " << state;
1771 fputs("InnoDB: Dump of descriptor: ", stderr);
1772 ut_print_buf(stderr, ((byte *)descr) - 50, 200);
1773 putc('\n', stderr);
1774 /* Crash in debug version, so that we get a core dump
1775 of this corruption. */
1776 ut_ad(0);
1777
1778 if (state == XDES_FREE) {
1779 /* We put here some fault tolerance: if the page
1780 is already free, return without doing anything! */
1781
1782 return;
1783 }
1784
1785 ut_error;
1786 }
1787
1788 if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
1789 page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
1790 ib::error(ER_IB_MSG_419)
1791 << "File space extent descriptor of page " << page_id
1792 << " says it is free. Dump of descriptor: ";
1793 ut_print_buf(stderr, ((byte *)descr) - 50, 200);
1794 putc('\n', stderr);
1795 /* Crash in debug version, so that we get a core dump
1796 of this corruption. */
1797 ut_ad(0);
1798
1799 /* We put here some fault tolerance: if the page
1800 is already free, return without doing anything! */
1801
1802 return;
1803 }
1804
1805 const page_no_t bit = page_id.page_no() % FSP_EXTENT_SIZE;
1806
1807 xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
1808 xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
1809
1810 frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
1811 if (state == XDES_FULL_FRAG) {
1812 /* The fragment was full: move it to another list */
1813 flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, mtr);
1814 xdes_set_state(descr, XDES_FREE_FRAG, mtr);
1815 flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1816 mlog_write_ulint(header + FSP_FRAG_N_USED,
1817 frag_n_used + FSP_EXTENT_SIZE - 1, MLOG_4BYTES, mtr);
1818 } else {
1819 ut_a(frag_n_used > 0);
1820 mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1, MLOG_4BYTES,
1821 mtr);
1822 }
1823
1824 if (xdes_is_free(descr, mtr)) {
1825 /* The extent has become free: move it to another list */
1826 flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
1827 fsp_free_extent(page_id, page_size, mtr);
1828 }
1829 }
1830
1831 /** Returns an extent to the free list of a space.
1832 @param[in] page_id page id in the extent
1833 @param[in] page_size page size
1834 @param[in,out] mtr mini-transaction */
fsp_free_extent(const page_id_t & page_id,const page_size_t & page_size,mtr_t * mtr)1835 static void fsp_free_extent(const page_id_t &page_id,
1836 const page_size_t &page_size, mtr_t *mtr) {
1837 fsp_header_t *header;
1838 xdes_t *descr;
1839
1840 ut_ad(mtr);
1841
1842 header = fsp_get_space_header(page_id.space(), page_size, mtr);
1843
1844 descr = xdes_get_descriptor_with_space_hdr(header, page_id.space(),
1845 page_id.page_no(), mtr);
1846
1847 switch (xdes_get_state(descr, mtr)) {
1848 case XDES_FSEG_FRAG:
1849 /* The extent is being returned to the FSP_FREE_FRAG list. */
1850 xdes_init(descr, mtr);
1851 fsp_init_xdes_free_frag(header, descr, mtr);
1852 break;
1853 case XDES_FSEG:
1854 case XDES_FREE_FRAG:
1855 case XDES_FULL_FRAG:
1856
1857 xdes_init(descr, mtr);
1858
1859 flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
1860
1861 fil_space_t *space;
1862
1863 space = fil_space_get(page_id.space());
1864
1865 ++space->free_len;
1866
1867 break;
1868
1869 case XDES_FREE:
1870 case XDES_NOT_INITED:
1871 ut_error;
1872 }
1873 }
1874
1875 /** Returns the nth inode slot on an inode page.
1876 @param[in] page segment inode page
1877 @param[in] i inode index on page
1878 @param[in] page_size page size
1879 @param[in,out] mtr mini-transaction
1880 @return segment inode */
1881 UNIV_INLINE
fsp_seg_inode_page_get_nth_inode(page_t * page,page_no_t i,const page_size_t & page_size,mtr_t * mtr)1882 fseg_inode_t *fsp_seg_inode_page_get_nth_inode(page_t *page, page_no_t i,
1883 const page_size_t &page_size,
1884 mtr_t *mtr) {
1885 ut_ad(i < FSP_SEG_INODES_PER_PAGE(page_size));
1886 ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_SX_FIX));
1887
1888 return (page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
1889 }
1890
1891 /** Looks for a used segment inode on a segment inode page.
1892 @param[in] page segment inode page
1893 @param[in] page_size page size
1894 @param[in,out] mtr mini-transaction
1895 @return segment inode index, or FIL_NULL if not found */
fsp_seg_inode_page_find_used(page_t * page,const page_size_t & page_size,mtr_t * mtr)1896 static page_no_t fsp_seg_inode_page_find_used(page_t *page,
1897 const page_size_t &page_size,
1898 mtr_t *mtr) {
1899 page_no_t i;
1900 fseg_inode_t *inode;
1901
1902 for (i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1903 inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1904
1905 if (mach_read_from_8(inode + FSEG_ID)) {
1906 /* This is used */
1907
1908 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
1909 return (i);
1910 }
1911 }
1912
1913 return (FIL_NULL);
1914 }
1915
1916 /** Looks for an unused segment inode on a segment inode page.
1917 @param[in] page segment inode page
1918 @param[in] i search forward starting from this index
1919 @param[in] page_size page size
1920 @param[in,out] mtr mini-transaction
1921 @return segment inode index, or FIL_NULL if not found */
fsp_seg_inode_page_find_free(page_t * page,page_no_t i,const page_size_t & page_size,mtr_t * mtr)1922 static page_no_t fsp_seg_inode_page_find_free(page_t *page, page_no_t i,
1923 const page_size_t &page_size,
1924 mtr_t *mtr) {
1925 for (; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1926 fseg_inode_t *inode;
1927
1928 inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1929
1930 if (!mach_read_from_8(inode + FSEG_ID)) {
1931 /* This is unused */
1932 return (i);
1933 }
1934
1935 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
1936 }
1937
1938 return (FIL_NULL);
1939 }
1940
1941 /** Allocates a new file segment inode page.
1942 @return true if could be allocated */
fsp_alloc_seg_inode_page(fsp_header_t * space_header,mtr_t * mtr)1943 static ibool fsp_alloc_seg_inode_page(
1944 fsp_header_t *space_header, /*!< in: space header */
1945 mtr_t *mtr) /*!< in/out: mini-transaction */
1946 {
1947 fseg_inode_t *inode;
1948 buf_block_t *block;
1949 page_t *page;
1950 space_id_t space;
1951
1952 ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
1953
1954 space = page_get_space_id(page_align(space_header));
1955
1956 const page_size_t page_size(mach_read_from_4(FSP_SPACE_FLAGS + space_header));
1957
1958 block = fsp_alloc_free_page(space, page_size, 0, RW_SX_LATCH, mtr, mtr);
1959
1960 if (block == nullptr) {
1961 return (FALSE);
1962 }
1963
1964 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
1965 ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
1966
1967 page = buf_block_get_frame(block);
1968
1969 mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE, MLOG_2BYTES, mtr);
1970
1971 for (page_no_t i = 0; i < FSP_SEG_INODES_PER_PAGE(page_size); i++) {
1972 inode = fsp_seg_inode_page_get_nth_inode(page, i, page_size, mtr);
1973
1974 mlog_write_ull(inode + FSEG_ID, 0, mtr);
1975 }
1976
1977 flst_add_last(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
1978 mtr);
1979
1980 return (TRUE);
1981 }
1982
1983 /** Allocates a new file segment inode.
1984 @return segment inode, or NULL if not enough space */
fsp_alloc_seg_inode(fsp_header_t * space_header,mtr_t * mtr)1985 static fseg_inode_t *fsp_alloc_seg_inode(
1986 fsp_header_t *space_header, /*!< in: space header */
1987 mtr_t *mtr) /*!< in/out: mini-transaction */
1988 {
1989 buf_block_t *block;
1990 page_t *page;
1991 fseg_inode_t *inode;
1992 page_no_t n;
1993
1994 ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
1995
1996 /* Allocate a new segment inode page if needed. */
1997 if (flst_get_len(space_header + FSP_SEG_INODES_FREE) == 0 &&
1998 !fsp_alloc_seg_inode_page(space_header, mtr)) {
1999 return (nullptr);
2000 }
2001
2002 const page_size_t page_size(mach_read_from_4(FSP_SPACE_FLAGS + space_header));
2003
2004 const page_id_t page_id(
2005 page_get_space_id(page_align(space_header)),
2006 flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page);
2007
2008 block = buf_page_get(page_id, page_size, RW_SX_LATCH, mtr);
2009 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
2010 fil_block_check_type(block, FIL_PAGE_INODE, mtr);
2011
2012 page = buf_block_get_frame(block);
2013
2014 n = fsp_seg_inode_page_find_free(page, 0, page_size, mtr);
2015
2016 ut_a(n != FIL_NULL);
2017
2018 inode = fsp_seg_inode_page_get_nth_inode(page, n, page_size, mtr);
2019
2020 if (FIL_NULL == fsp_seg_inode_page_find_free(page, n + 1, page_size, mtr)) {
2021 /* There are no other unused headers left on the page: move it
2022 to another list */
2023
2024 flst_remove(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
2025 mtr);
2026
2027 flst_add_last(space_header + FSP_SEG_INODES_FULL,
2028 page + FSEG_INODE_PAGE_NODE, mtr);
2029 }
2030
2031 ut_ad(!mach_read_from_8(inode + FSEG_ID) ||
2032 mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2033 return (inode);
2034 }
2035
2036 /** Frees a file segment inode.
2037 @param[in] space space id
2038 @param[in] page_size page size
2039 @param[in,out] inode segment inode
2040 @param[in,out] mtr mini-transaction */
fsp_free_seg_inode(space_id_t space,const page_size_t & page_size,fseg_inode_t * inode,mtr_t * mtr)2041 static void fsp_free_seg_inode(space_id_t space, const page_size_t &page_size,
2042 fseg_inode_t *inode, mtr_t *mtr) {
2043 page_t *page;
2044 fsp_header_t *space_header;
2045
2046 ut_d(fsp_space_modify_check(space, mtr));
2047
2048 page = page_align(inode);
2049
2050 space_header = fsp_get_space_header(space, page_size, mtr);
2051
2052 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2053
2054 if (FIL_NULL == fsp_seg_inode_page_find_free(page, 0, page_size, mtr)) {
2055 /* Move the page to another list */
2056
2057 flst_remove(space_header + FSP_SEG_INODES_FULL, page + FSEG_INODE_PAGE_NODE,
2058 mtr);
2059
2060 flst_add_last(space_header + FSP_SEG_INODES_FREE,
2061 page + FSEG_INODE_PAGE_NODE, mtr);
2062 }
2063
2064 mlog_write_ull(inode + FSEG_ID, 0, mtr);
2065 mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
2066
2067 if (FIL_NULL == fsp_seg_inode_page_find_used(page, page_size, mtr)) {
2068 /* There are no other used headers left on the page: free it */
2069
2070 flst_remove(space_header + FSP_SEG_INODES_FREE, page + FSEG_INODE_PAGE_NODE,
2071 mtr);
2072
2073 fsp_free_page(page_id_t(space, page_get_page_no(page)), page_size, mtr);
2074 }
2075 }
2076
2077 /** Returns the file segment inode, page x-latched.
2078 @param[in] header segment header
2079 @param[in] space space id
2080 @param[in] page_size page size
2081 @param[in,out] mtr mini-transaction
2082 @param[out] block inode block, or NULL to ignore
2083 @return segment inode, page x-latched; NULL if the inode is free */
fseg_inode_try_get(fseg_header_t * header,space_id_t space,const page_size_t & page_size,mtr_t * mtr,buf_block_t ** block)2084 static fseg_inode_t *fseg_inode_try_get(fseg_header_t *header, space_id_t space,
2085 const page_size_t &page_size,
2086 mtr_t *mtr, buf_block_t **block) {
2087 fil_addr_t inode_addr;
2088 fseg_inode_t *inode;
2089
2090 inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
2091 inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
2092 ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
2093
2094 inode = fut_get_ptr(space, page_size, inode_addr, RW_SX_LATCH, mtr, block);
2095
2096 if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
2097 inode = nullptr;
2098 } else {
2099 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2100 }
2101
2102 return (inode);
2103 }
2104
2105 /** Returns the file segment inode, page x-latched.
2106 @param[in] header segment header
2107 @param[in] space space id
2108 @param[in] page_size page size
2109 @param[in,out] mtr mini-transaction
2110 @param[out] block inode block
2111 @return segment inode, page x-latched */
fseg_inode_get(fseg_header_t * header,space_id_t space,const page_size_t & page_size,mtr_t * mtr,buf_block_t ** block=nullptr)2112 static fseg_inode_t *fseg_inode_get(fseg_header_t *header, space_id_t space,
2113 const page_size_t &page_size, mtr_t *mtr,
2114 buf_block_t **block = nullptr) {
2115 fseg_inode_t *inode =
2116 fseg_inode_try_get(header, space, page_size, mtr, block);
2117 ut_a(inode);
2118 return (inode);
2119 }
2120
2121 /** Gets the page number from the nth fragment page slot.
2122 @return page number, FIL_NULL if not in use */
2123 UNIV_INLINE
fseg_get_nth_frag_page_no(fseg_inode_t * inode,ulint n,mtr_t * mtr MY_ATTRIBUTE ((unused)))2124 page_no_t fseg_get_nth_frag_page_no(
2125 fseg_inode_t *inode, /*!< in: segment inode */
2126 ulint n, /*!< in: slot index */
2127 mtr_t *mtr MY_ATTRIBUTE((unused)))
2128 /*!< in/out: mini-transaction */
2129 {
2130 ut_ad(inode && mtr);
2131 ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
2132 ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2133 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2134 return (mach_read_from_4(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE));
2135 }
2136
2137 /** Sets the page number in the nth fragment page slot. */
2138 UNIV_INLINE
fseg_set_nth_frag_page_no(fseg_inode_t * inode,ulint n,page_no_t page_no,mtr_t * mtr)2139 void fseg_set_nth_frag_page_no(fseg_inode_t *inode, /*!< in: segment inode */
2140 ulint n, /*!< in: slot index */
2141 page_no_t page_no, /*!< in: page number to set */
2142 mtr_t *mtr) /*!< in/out: mini-transaction */
2143 {
2144 ut_ad(inode && mtr);
2145 ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
2146 ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2147 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2148
2149 mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, page_no,
2150 MLOG_4BYTES, mtr);
2151 }
2152
2153 /** Finds a fragment page slot which is free.
2154 @return slot index; ULINT_UNDEFINED if none found */
fseg_find_free_frag_page_slot(fseg_inode_t * inode,mtr_t * mtr)2155 static ulint fseg_find_free_frag_page_slot(
2156 fseg_inode_t *inode, /*!< in: segment inode */
2157 mtr_t *mtr) /*!< in/out: mini-transaction */
2158 {
2159 ulint i;
2160 page_no_t page_no;
2161
2162 ut_ad(inode && mtr);
2163
2164 for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2165 page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
2166
2167 if (page_no == FIL_NULL) {
2168 return (i);
2169 }
2170 }
2171
2172 return (ULINT_UNDEFINED);
2173 }
2174
2175 /** Finds a fragment page slot which is used and last in the array.
2176 @return slot index; ULINT_UNDEFINED if none found */
fseg_find_last_used_frag_page_slot(fseg_inode_t * inode,mtr_t * mtr)2177 static ulint fseg_find_last_used_frag_page_slot(
2178 fseg_inode_t *inode, /*!< in: segment inode */
2179 mtr_t *mtr) /*!< in/out: mini-transaction */
2180 {
2181 ulint i;
2182 page_no_t page_no;
2183
2184 ut_ad(inode && mtr);
2185
2186 for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2187 page_no =
2188 fseg_get_nth_frag_page_no(inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
2189
2190 if (page_no != FIL_NULL) {
2191 return (FSEG_FRAG_ARR_N_SLOTS - i - 1);
2192 }
2193 }
2194
2195 return (ULINT_UNDEFINED);
2196 }
2197
2198 /** Calculates reserved fragment page slots.
2199 @return number of fragment pages */
fseg_get_n_frag_pages(fseg_inode_t * inode,mtr_t * mtr)2200 static ulint fseg_get_n_frag_pages(
2201 fseg_inode_t *inode, /*!< in: segment inode */
2202 mtr_t *mtr) /*!< in/out: mini-transaction */
2203 {
2204 ulint i;
2205 ulint count = 0;
2206
2207 ut_ad(inode && mtr);
2208
2209 for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2210 if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
2211 count++;
2212 }
2213 }
2214
2215 return (count);
2216 }
2217
2218 /** Creates a new segment.
2219 @return the block where the segment header is placed, x-latched, NULL
2220 if could not create segment because of lack of space */
fseg_create_general(space_id_t space_id,page_no_t page,ulint byte_offset,ibool has_done_reservation,mtr_t * mtr)2221 buf_block_t *fseg_create_general(
2222 space_id_t space_id, /*!< in: space id */
2223 page_no_t page, /*!< in: page where the segment header is
2224 placed: if this is != 0, the page must belong
2225 to another segment, if this is 0, a new page
2226 will be allocated and it will belong to the
2227 created segment */
2228 ulint byte_offset, /*!< in: byte offset of the created segment header
2229 on the page */
2230 ibool has_done_reservation, /*!< in: TRUE if the caller has already
2231 done the reservation for the pages with
2232 fsp_reserve_free_extents (at least 2 extents: one for
2233 the inode and the other for the segment) then there is
2234 no need to do the check for this individual
2235 operation */
2236 mtr_t *mtr) /*!< in/out: mini-transaction */
2237 {
2238 fsp_header_t *space_header;
2239 fseg_inode_t *inode;
2240 ib_id_t seg_id;
2241 buf_block_t *block = nullptr; /* remove warning */
2242 fseg_header_t *header = nullptr; /* remove warning */
2243 ulint n_reserved = 0;
2244 ulint i;
2245
2246 DBUG_TRACE;
2247
2248 ut_ad(byte_offset + FSEG_HEADER_SIZE <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
2249 ut_d(fsp_space_modify_check(space_id, mtr));
2250
2251 fil_space_t *space = fil_space_get(space_id);
2252
2253 mtr_x_lock_space(space, mtr);
2254
2255 const page_size_t page_size(space->flags);
2256
2257 if (page != 0) {
2258 block =
2259 buf_page_get(page_id_t(space_id, page), page_size, RW_SX_LATCH, mtr);
2260
2261 header = byte_offset + buf_block_get_frame(block);
2262
2263 const ulint type = space_id == TRX_SYS_SPACE && page == TRX_SYS_PAGE_NO
2264 ? FIL_PAGE_TYPE_TRX_SYS
2265 : FIL_PAGE_TYPE_SYS;
2266
2267 fil_block_check_type(block, type, mtr);
2268 }
2269
2270 if (rw_lock_get_x_lock_count(&space->latch) == 1) {
2271 /* This thread did not own the latch before this call: free
2272 excess pages from the insert buffer free list */
2273
2274 if (space_id == IBUF_SPACE_ID) {
2275 ibuf_free_excess_pages();
2276 }
2277 }
2278
2279 if (!has_done_reservation &&
2280 !fsp_reserve_free_extents(&n_reserved, space_id, 2, FSP_NORMAL, mtr)) {
2281 return nullptr;
2282 }
2283
2284 space_header = fsp_get_space_header(space_id, page_size, mtr);
2285
2286 inode = fsp_alloc_seg_inode(space_header, mtr);
2287
2288 if (inode == nullptr) {
2289 goto funct_exit;
2290 }
2291
2292 /* Read the next segment id from space header and increment the
2293 value in space header */
2294
2295 seg_id = mach_read_from_8(space_header + FSP_SEG_ID);
2296
2297 mlog_write_ull(space_header + FSP_SEG_ID, seg_id + 1, mtr);
2298
2299 mlog_write_ull(inode + FSEG_ID, seg_id, mtr);
2300
2301 { /* Introducing a new scope to localize this object. Otherwise, I have to
2302 declare this object before the goto statement above. */
2303 File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
2304 fseg_inode.write_not_full_n_used(0);
2305 }
2306
2307 flst_init(inode + FSEG_FREE, mtr);
2308 flst_init(inode + FSEG_NOT_FULL, mtr);
2309 flst_init(inode + FSEG_FULL, mtr);
2310
2311 mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE, MLOG_4BYTES, mtr);
2312 for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
2313 fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
2314 }
2315
2316 if (page == 0) {
2317 block = fseg_alloc_free_page_low(space, page_size, inode, 0, FSP_UP,
2318 RW_SX_LATCH, mtr, mtr
2319 #ifdef UNIV_DEBUG
2320 ,
2321 has_done_reservation
2322 #endif /* UNIV_DEBUG */
2323 );
2324
2325 /* The allocation cannot fail if we have already reserved a
2326 space for the page. */
2327 ut_ad(!has_done_reservation || block != nullptr);
2328
2329 if (block == nullptr) {
2330 fsp_free_seg_inode(space_id, page_size, inode, mtr);
2331
2332 goto funct_exit;
2333 }
2334
2335 ut_ad(rw_lock_get_sx_lock_count(&block->lock) == 1);
2336
2337 header = byte_offset + buf_block_get_frame(block);
2338 mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
2339 FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
2340 }
2341
2342 mlog_write_ulint(header + FSEG_HDR_OFFSET, page_offset(inode), MLOG_2BYTES,
2343 mtr);
2344
2345 mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
2346 page_get_page_no(page_align(inode)), MLOG_4BYTES, mtr);
2347
2348 mlog_write_ulint(header + FSEG_HDR_SPACE, space_id, MLOG_4BYTES, mtr);
2349
2350 funct_exit:
2351 if (!has_done_reservation) {
2352 fil_space_release_free_extents(space_id, n_reserved);
2353 }
2354
2355 return block;
2356 }
2357
2358 /** Creates a new segment.
2359 @return the block where the segment header is placed, x-latched, NULL
2360 if could not create segment because of lack of space */
fseg_create(space_id_t space,page_no_t page,ulint byte_offset,mtr_t * mtr)2361 buf_block_t *fseg_create(
2362 space_id_t space, /*!< in: space id */
2363 page_no_t page, /*!< in: page where the segment header is
2364 placed: if this is != 0, the page must belong
2365 to another segment, if this is 0, a new page
2366 will be allocated and it will belong to the
2367 created segment */
2368 ulint byte_offset, /*!< in: byte offset of the created
2369 segment header on the page */
2370 mtr_t *mtr) /*!< in/out: mini-transaction */
2371 {
2372 return (fseg_create_general(space, page, byte_offset, FALSE, mtr));
2373 }
2374
2375 /** Calculates the number of pages reserved by a segment, and how many
2376 pages are currently used.
2377 @param[in] space_id unique tablespace identifier
2378 @param[in] page_size Size of each page in the tablespace.
2379 @param[in] inode file segment inode pointer
2380 @param[out] used number of pages used (not more than reserved)
2381 @param[in,out] mtr the mini transaction
2382 @return number of reserved pages */
fseg_n_reserved_pages_low(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * inode,ulint * used,mtr_t * mtr)2383 static ulint fseg_n_reserved_pages_low(space_id_t space_id,
2384 const page_size_t &page_size,
2385 fseg_inode_t *inode, ulint *used,
2386 mtr_t *mtr) {
2387 ulint ret;
2388 ut_ad(inode && used && mtr);
2389 ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
2390
2391 File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
2392
2393 /* number of used segment pages in the FSEG_NOT_FULL list */
2394 uint32_t n_used_not_full = fseg_inode.read_not_full_n_used();
2395
2396 /* total number of segment pages in the FSEG_NOT_FULL list */
2397 ulint n_total_not_full =
2398 FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL);
2399
2400 /* n_used can be zero only if n_total is zero. */
2401 ut_ad(n_used_not_full > 0 || n_total_not_full == 0);
2402 ut_ad((n_used_not_full < n_total_not_full) ||
2403 ((n_used_not_full == 0) && (n_total_not_full == 0)));
2404
2405 /* total number of pages in FSEG_FULL list. */
2406 ulint n_total_full = FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL);
2407
2408 /* total number of pages in FSEG_FREE list. */
2409 ulint n_total_free = FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE);
2410
2411 /* Number of fragment pages in the segment. */
2412 ulint n_frags = fseg_get_n_frag_pages(inode, mtr);
2413
2414 *used = n_frags + n_total_full + n_used_not_full;
2415 ret = n_frags + n_total_full + n_total_free + n_total_not_full;
2416
2417 ut_ad(*used <= ret);
2418 ut_ad((*used < ret) || ((n_used_not_full == 0) && (n_total_not_full == 0) &&
2419 (n_total_free == 0)));
2420
2421 return (ret);
2422 }
2423
2424 /** Calculates the number of pages reserved by a segment, and how many pages are
2425 currently used.
2426 @return number of reserved pages */
fseg_n_reserved_pages(fseg_header_t * header,ulint * used,mtr_t * mtr)2427 ulint fseg_n_reserved_pages(
2428 fseg_header_t *header, /*!< in: segment header */
2429 ulint *used, /*!< out: number of pages used (<= reserved) */
2430 mtr_t *mtr) /*!< in/out: mini-transaction */
2431 {
2432 space_id_t space_id;
2433
2434 space_id = page_get_space_id(page_align(header));
2435
2436 fil_space_t *space = fil_space_get(space_id);
2437
2438 mtr_x_lock_space(space, mtr);
2439
2440 const page_size_t page_size(space->flags);
2441
2442 fseg_inode_t *inode;
2443
2444 inode = fseg_inode_get(header, space_id, page_size, mtr);
2445
2446 return (fseg_n_reserved_pages_low(space_id, page_size, inode, used, mtr));
2447 }
2448
2449 /** Tries to fill the free list of a segment with consecutive free extents.
2450 This happens if the segment is big enough to allow extents in the free list,
2451 the free list is empty, and the extents can be allocated consecutively from
2452 the hint onward.
2453 @param[in] inode segment inode
2454 @param[in] space space id
2455 @param[in] page_size page size
2456 @param[in] hint hint which extent would be good as the first
2457 extent
2458 @param[in,out] mtr mini-transaction */
fseg_fill_free_list(fseg_inode_t * inode,space_id_t space,const page_size_t & page_size,page_no_t hint,mtr_t * mtr)2459 static void fseg_fill_free_list(fseg_inode_t *inode, space_id_t space,
2460 const page_size_t &page_size, page_no_t hint,
2461 mtr_t *mtr) {
2462 xdes_t *descr;
2463 page_no_t i;
2464 ib_id_t seg_id;
2465 ulint reserved;
2466 ulint used;
2467
2468 ut_ad(inode && mtr);
2469 ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2470 ut_d(fsp_space_modify_check(space, mtr));
2471
2472 reserved = fseg_n_reserved_pages_low(space, page_size, inode, &used, mtr);
2473
2474 if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
2475 /* The segment is too small to allow extents in free list */
2476
2477 return;
2478 }
2479
2480 if (flst_get_len(inode + FSEG_FREE) > 0) {
2481 /* Free list is not empty */
2482
2483 return;
2484 }
2485
2486 for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
2487 descr = xdes_get_descriptor(space, hint, page_size, mtr);
2488
2489 if ((descr == nullptr) || (XDES_FREE != xdes_get_state(descr, mtr))) {
2490 /* We cannot allocate the desired extent: stop */
2491
2492 return;
2493 }
2494
2495 descr = fsp_alloc_free_extent(space, page_size, hint, mtr);
2496
2497 seg_id = mach_read_from_8(inode + FSEG_ID);
2498 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2499 xdes_set_segment_id(descr, seg_id, XDES_FSEG, mtr);
2500
2501 flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
2502 hint += FSP_EXTENT_SIZE;
2503 }
2504 }
2505
2506 /** A fragment extent can be leased if it is the special kind that has a
2507 descriptor page and no other pages are being used except the descriptor
2508 and ibuf bitmap pages. The number of used pages will be equal to
2509 XDES_FRAG_N_USED.
2510 @param[in] descr extent descriptor
2511 @param[in] page_size the page size
2512 @param[in,out] mtr mini transaction
2513 @return true if the extent is leasable, false otherwise. */
2514 UNIV_INLINE
xdes_is_leasable(const xdes_t * descr,const page_size_t & page_size,mtr_t * mtr)2515 bool xdes_is_leasable(const xdes_t *descr, const page_size_t &page_size,
2516 mtr_t *mtr) {
2517 ut_ad(descr && mtr);
2518 ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_SX_FIX));
2519
2520 const page_no_t page_no = xdes_get_offset(descr);
2521 const bool has_xdes_page = !ut_2pow_remainder(page_no, page_size.physical());
2522
2523 if (!has_xdes_page) {
2524 return (false);
2525 }
2526 /* Page 0 and 1 must not be free */
2527 if (xdes_mtr_get_bit(descr, XDES_FREE_BIT, 0, mtr) ||
2528 xdes_mtr_get_bit(descr, XDES_FREE_BIT, 1, mtr)) {
2529 return (false);
2530 }
2531
2532 /* All other pages must be free */
2533 for (page_no_t i = 2; i < FSP_EXTENT_SIZE; ++i) {
2534 if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
2535 return (false);
2536 }
2537 }
2538
2539 return (true);
2540 }
2541
2542 /** Get the extent descriptor of the last fragmented extent from the
2543 free_frag list.
2544 @param[in] header tablespace header
2545 @param[in] page_size page size
2546 @param[in,out] mtr mini-transaction
2547 @return the extent descriptor, or NULL if none */
fsp_get_last_free_frag_extent(fsp_header_t * header,const page_size_t & page_size,mtr_t * mtr)2548 static xdes_t *fsp_get_last_free_frag_extent(fsp_header_t *header,
2549 const page_size_t &page_size,
2550 mtr_t *mtr) {
2551 space_id_t space;
2552 fil_addr_t node;
2553 xdes_t *descr;
2554
2555 node = flst_get_last(header + FSP_FREE_FRAG, mtr);
2556
2557 if (fil_addr_is_null(node)) {
2558 return (nullptr);
2559 }
2560
2561 space = mach_read_from_4(header + FSEG_HDR_SPACE);
2562 descr = xdes_lst_get_descriptor(space, page_size, node, mtr);
2563 ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
2564
2565 return (descr);
2566 }
2567
2568 /** Allocate an extent from free fragment extent to a segment.
2569 @param[in] space space id
2570 @param[in,out] inode segment to which extent is leased
2571 @param[in] page_size page size
2572 @param[in,out] mtr mini-transaction
2573 @return extent descriptor or NULL */
fsp_alloc_xdes_free_frag(space_id_t space,fseg_inode_t * inode,const page_size_t & page_size,mtr_t * mtr)2574 static xdes_t *fsp_alloc_xdes_free_frag(space_id_t space, fseg_inode_t *inode,
2575 const page_size_t &page_size,
2576 mtr_t *mtr) {
2577 xdes_t *descr;
2578 ib_id_t seg_id;
2579 ulint n_used;
2580
2581 ut_ad(mtr);
2582 ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
2583
2584 fsp_header_t *header = fsp_get_space_header(space, page_size, mtr);
2585
2586 /* If available, take an extent from the free_frag list. */
2587 if (!(descr = fsp_get_last_free_frag_extent(header, page_size, mtr))) {
2588 return (nullptr);
2589 }
2590
2591 if (!xdes_is_leasable(descr, page_size, mtr)) {
2592 return (nullptr);
2593 }
2594 ut_ad(xdes_get_n_used(descr, mtr) == XDES_FRAG_N_USED);
2595
2596 /* Remove from the FSP_FREE_FRAG list */
2597 flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, mtr);
2598 n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, mtr);
2599 mlog_write_ulint(header + FSP_FRAG_N_USED, n_used - XDES_FRAG_N_USED,
2600 MLOG_4BYTES, mtr);
2601
2602 /* Transition the extent (and its ownership) to the segment. */
2603 seg_id = mach_read_from_8(inode + FSEG_ID);
2604 xdes_set_segment_id(descr, seg_id, XDES_FSEG_FRAG, mtr);
2605
2606 /* Add to the end of FSEG_NOT_FULL list. */
2607 flst_add_last(inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
2608
2609 File_segment_inode fseg_inode(space, page_size, inode, mtr);
2610 n_used = fseg_inode.read_not_full_n_used();
2611 fseg_inode.write_not_full_n_used(
2612 static_cast<uint32_t>(n_used + XDES_FRAG_N_USED));
2613
2614 return (descr);
2615 }
2616
2617 /** Allocates a free extent for the segment: looks first in the free list of
2618 the segment, then tries to allocate from the space free list.
2619 NOTE that the extent returned still resides in the segment free list, it is
2620 not yet taken off it!
2621 @param[in] inode segment inode
2622 @param[in] space space id
2623 @param[in] page_size page size
2624 @param[in,out] mtr mini-transaction
2625 @retval NULL if no page could be allocated
2626 @retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2627 (init_mtr == mtr, or the page was not previously freed in mtr)
2628 @retval block (not allocated or initialized) otherwise */
fseg_alloc_free_extent(fseg_inode_t * inode,space_id_t space,const page_size_t & page_size,mtr_t * mtr)2629 static xdes_t *fseg_alloc_free_extent(fseg_inode_t *inode, space_id_t space,
2630 const page_size_t &page_size,
2631 mtr_t *mtr) {
2632 xdes_t *descr;
2633 ib_id_t seg_id;
2634 fil_addr_t first;
2635
2636 ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2637 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2638 ut_d(fsp_space_modify_check(space, mtr));
2639
2640 if (flst_get_len(inode + FSEG_FREE) > 0) {
2641 /* Segment free list is not empty, allocate from it */
2642
2643 first = flst_get_first(inode + FSEG_FREE, mtr);
2644
2645 descr = xdes_lst_get_descriptor(space, page_size, first, mtr);
2646 } else {
2647 /* Segment free list was empty. */
2648
2649 /* Check if we can allocate an extent from free frag
2650 list of tablespace. */
2651 descr = fsp_alloc_xdes_free_frag(space, inode, page_size, mtr);
2652
2653 if (descr != nullptr) {
2654 return (descr);
2655 }
2656
2657 /* Allocate from space */
2658 descr = fsp_alloc_free_extent(space, page_size, 0, mtr);
2659
2660 if (descr == nullptr) {
2661 return (nullptr);
2662 }
2663
2664 seg_id = mach_read_from_8(inode + FSEG_ID);
2665
2666 xdes_set_segment_id(descr, seg_id, XDES_FSEG, mtr);
2667 flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
2668
2669 /* Try to fill the segment free list */
2670 fseg_fill_free_list(inode, space, page_size,
2671 xdes_get_offset(descr) + FSP_EXTENT_SIZE, mtr);
2672 }
2673
2674 return (descr);
2675 }
2676
2677 /** Allocates a single free page from a segment.
2678 This function implements the intelligent allocation strategy which tries to
2679 minimize file space fragmentation.
2680 @param[in,out] space tablespace
2681 @param[in] page_size page size
2682 @param[in,out] seg_inode segment inode
2683 @param[in] hint hint of which page would be desirable
2684 @param[in] direction if the new page is needed because of
2685 an index page split, and records are inserted there in order, into which
2686 direction they go alphabetically: FSP_DOWN, FSP_UP, FSP_NO_DIR
2687 @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
2688 @param[in,out] mtr mini-transaction
2689 @param[in,out] init_mtr mtr or another mini-transaction in
2690 which the page should be initialized. If init_mtr != mtr, but the page is
2691 already latched in mtr, do not initialize the page */
2692 #ifdef UNIV_DEBUG
2693 /**
2694 @param[in] has_done_reservation TRUE if the space has already been
2695 reserved, in this case we will never return NULL */
2696 #endif /* UNIV_DEBUG */
2697 /**
2698 @retval NULL if no page could be allocated
2699 @retval block rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2700 (init_mtr == mtr, or the page was not previously freed in mtr)
2701 @retval block (not allocated or initialized) otherwise */
fseg_alloc_free_page_low(fil_space_t * space,const page_size_t & page_size,fseg_inode_t * seg_inode,page_no_t hint,byte direction,rw_lock_type_t rw_latch,mtr_t * mtr,mtr_t * init_mtr,ibool has_done_reservation)2702 static buf_block_t *fseg_alloc_free_page_low(fil_space_t *space,
2703 const page_size_t &page_size,
2704 fseg_inode_t *seg_inode,
2705 page_no_t hint, byte direction,
2706 rw_lock_type_t rw_latch,
2707 mtr_t *mtr, mtr_t *init_mtr
2708 #ifdef UNIV_DEBUG
2709 ,
2710 ibool has_done_reservation
2711 #endif /* UNIV_DEBUG */
2712 ) {
2713 fsp_header_t *space_header;
2714 ib_id_t seg_id;
2715 ulint used;
2716 ulint reserved;
2717 xdes_t *descr; /*!< extent of the hinted page */
2718 page_no_t ret_page; /*!< the allocated page offset, FIL_NULL
2719 if could not be allocated */
2720 xdes_t *ret_descr; /*!< the extent of the allocated page */
2721 ulint n;
2722 const space_id_t space_id = space->id;
2723
2724 ut_ad(mtr);
2725 ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
2726 ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
2727 ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
2728 ut_ad(space->purpose == FIL_TYPE_TEMPORARY ||
2729 space->purpose == FIL_TYPE_TABLESPACE);
2730
2731 seg_id = mach_read_from_8(seg_inode + FSEG_ID);
2732
2733 ut_ad(seg_id);
2734 ut_d(fsp_space_modify_check(space_id, mtr));
2735 ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
2736
2737 reserved =
2738 fseg_n_reserved_pages_low(space_id, page_size, seg_inode, &used, mtr);
2739
2740 space_header = fsp_get_space_header(space_id, page_size, mtr);
2741
2742 descr = xdes_get_descriptor_with_space_hdr(space_header, space_id, hint, mtr);
2743 if (descr == nullptr) {
2744 /* Hint outside space or too high above free limit: reset
2745 hint */
2746 /* The file space header page is always allocated. */
2747 hint = 0;
2748 descr = xdes_get_descriptor(space_id, hint, page_size, mtr);
2749 }
2750
2751 /* In the big if-else below we look for ret_page and ret_descr */
2752 /*-------------------------------------------------------------*/
2753 if (xdes_in_segment(descr, seg_id, mtr) &&
2754 (xdes_mtr_get_bit(descr, XDES_FREE_BIT, hint % FSP_EXTENT_SIZE, mtr) ==
2755 TRUE)) {
2756 take_hinted_page:
2757 /* 1. We can take the hinted page
2758 =================================*/
2759 ret_descr = descr;
2760 ret_page = hint;
2761 /* Skip the check for extending the tablespace. If the
2762 page hint were not within the size of the tablespace,
2763 we would have got (descr == NULL) above and reset the hint. */
2764 goto got_hinted_page;
2765 /*-----------------------------------------------------------*/
2766 } else if (xdes_get_state(descr, mtr) == XDES_FREE &&
2767 reserved - used < reserved / FSEG_FILLFACTOR &&
2768 used >= FSEG_FRAG_LIMIT) {
2769 /* 2. We allocate the free extent from space and can take
2770 =========================================================
2771 the hinted page
2772 ===============*/
2773 ret_descr = fsp_alloc_free_extent(space_id, page_size, hint, mtr);
2774
2775 ut_a(ret_descr == descr);
2776
2777 xdes_set_segment_id(ret_descr, seg_id, XDES_FSEG, mtr);
2778 flst_add_last(seg_inode + FSEG_FREE, ret_descr + XDES_FLST_NODE, mtr);
2779
2780 /* Try to fill the segment free list */
2781 fseg_fill_free_list(seg_inode, space_id, page_size, hint + FSP_EXTENT_SIZE,
2782 mtr);
2783 goto take_hinted_page;
2784 /*-----------------------------------------------------------*/
2785 } else if ((direction != FSP_NO_DIR) &&
2786 ((reserved - used) < reserved / FSEG_FILLFACTOR) &&
2787 (used >= FSEG_FRAG_LIMIT) &&
2788 (!!(ret_descr = fseg_alloc_free_extent(seg_inode, space_id,
2789 page_size, mtr)))) {
2790 /* 3. We take any free extent (which was already assigned above
2791 ===============================================================
2792 in the if-condition to ret_descr) and take the lowest or
2793 ========================================================
2794 highest page in it, depending on the direction
2795 ==============================================*/
2796 ret_page = xdes_get_offset(ret_descr);
2797
2798 if (direction == FSP_DOWN) {
2799 ret_page += FSP_EXTENT_SIZE - 1;
2800 } else if (xdes_get_state(ret_descr, mtr) == XDES_FSEG_FRAG) {
2801 ret_page += xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2802 }
2803
2804 ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2805 /*-----------------------------------------------------------*/
2806 } else if (xdes_in_segment(descr, seg_id, mtr) &&
2807 (!xdes_is_full(descr, mtr))) {
2808 /* 4. We can take the page from the same extent as the
2809 ======================================================
2810 hinted page (and the extent already belongs to the
2811 ==================================================
2812 segment)
2813 ========*/
2814 ret_descr = descr;
2815 ret_page = xdes_get_offset(ret_descr) +
2816 xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
2817 hint % FSP_EXTENT_SIZE, mtr);
2818 ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2819 /*-----------------------------------------------------------*/
2820 } else if (used < reserved) {
2821 /* 5. We take any unused page from the segment
2822 ==============================================*/
2823 fil_addr_t first;
2824
2825 if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
2826 first = flst_get_first(seg_inode + FSEG_NOT_FULL, mtr);
2827 } else if (flst_get_len(seg_inode + FSEG_FREE) > 0) {
2828 first = flst_get_first(seg_inode + FSEG_FREE, mtr);
2829 } else {
2830 ut_ad(!has_done_reservation);
2831 return (nullptr);
2832 }
2833
2834 ret_descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
2835 ret_page = xdes_get_offset(ret_descr) +
2836 xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2837 ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2838 /*-----------------------------------------------------------*/
2839 } else if (used < FSEG_FRAG_LIMIT) {
2840 /* 6. We allocate an individual page from the space
2841 ===================================================*/
2842 buf_block_t *block =
2843 fsp_alloc_free_page(space_id, page_size, hint, rw_latch, mtr, init_mtr);
2844
2845 ut_ad(!has_done_reservation || block != nullptr);
2846
2847 if (block != nullptr) {
2848 /* Put the page in the fragment page array of the
2849 segment */
2850 n = fseg_find_free_frag_page_slot(seg_inode, mtr);
2851 ut_a(n != ULINT_UNDEFINED);
2852
2853 fseg_set_nth_frag_page_no(seg_inode, n, block->page.id.page_no(), mtr);
2854 }
2855
2856 /* fsp_alloc_free_page() invoked fsp_init_file_page()
2857 already. */
2858 return (block);
2859 /*-----------------------------------------------------------*/
2860 } else {
2861 /* 7. We allocate a new extent and take its first page
2862 ======================================================*/
2863 ret_descr = fseg_alloc_free_extent(seg_inode, space_id, page_size, mtr);
2864
2865 if (ret_descr == nullptr) {
2866 ret_page = FIL_NULL;
2867 ut_ad(!has_done_reservation);
2868 } else {
2869 const xdes_state_t state = xdes_get_state(ret_descr, mtr);
2870 ret_page = xdes_get_offset(ret_descr);
2871
2872 if (state == XDES_FSEG_FRAG) {
2873 ret_page += xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, 0, mtr);
2874 }
2875
2876 ut_ad(!has_done_reservation || ret_page != FIL_NULL);
2877 }
2878 }
2879
2880 if (ret_page == FIL_NULL) {
2881 /* Page could not be allocated */
2882
2883 ut_ad(!has_done_reservation);
2884 return (nullptr);
2885 }
2886
2887 if (space->size <= ret_page && !fsp_is_system_or_temp_tablespace(space_id)) {
2888 /* It must be that we are extending a single-table
2889 tablespace whose size is still < 64 pages */
2890
2891 if (ret_page >= FSP_EXTENT_SIZE) {
2892 ib::error(ER_IB_MSG_420)
2893 << "Error (2): trying to extend"
2894 " a single-table tablespace "
2895 << space_id << " by single page(s) though the"
2896 << " space size " << space->size << ". Page no " << ret_page << ".";
2897 ut_ad(!has_done_reservation);
2898 return (nullptr);
2899 }
2900
2901 if (!fsp_try_extend_data_file_with_pages(space, ret_page, space_header,
2902 mtr)) {
2903 /* No disk space left */
2904 ut_ad(!has_done_reservation);
2905 return (nullptr);
2906 }
2907 }
2908
2909 got_hinted_page:
2910 /* ret_descr == NULL if the block was allocated from free_frag
2911 (XDES_FREE_FRAG) */
2912 if (ret_descr != nullptr) {
2913 /* At this point we know the extent and the page offset.
2914 The extent is still in the appropriate list (FSEG_NOT_FULL
2915 or FSEG_FREE), and the page is not yet marked as used. */
2916
2917 ut_ad(xdes_get_descriptor(space_id, ret_page, page_size, mtr) == ret_descr);
2918
2919 ut_ad(xdes_mtr_get_bit(ret_descr, XDES_FREE_BIT, ret_page % FSP_EXTENT_SIZE,
2920 mtr));
2921
2922 fseg_mark_page_used(space_id, page_size, seg_inode, ret_page, ret_descr,
2923 mtr);
2924 }
2925
2926 /* Exclude Encryption flag as it might have been changed In Memory flags but
2927 not on disk. */
2928 ut_ad(!((space->flags ^ mach_read_from_4(FSP_SPACE_FLAGS + space_header)) &
2929 ~(FSP_FLAGS_MASK_ENCRYPTION)));
2930
2931 return (fsp_page_create(page_id_t(space_id, ret_page), page_size, rw_latch,
2932 mtr, init_mtr));
2933 }
2934
2935 /** Allocates a single free page from a segment. This function implements
2936 the intelligent allocation strategy which tries to minimize file space
2937 fragmentation.
2938 @retval NULL if no page could be allocated
2939 @retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
2940 (init_mtr == mtr, or the page was not previously freed in mtr)
2941 @retval block (not allocated or initialized) otherwise */
fseg_alloc_free_page_general(fseg_header_t * seg_header,page_no_t hint,byte direction,ibool has_done_reservation,mtr_t * mtr,mtr_t * init_mtr)2942 buf_block_t *fseg_alloc_free_page_general(
2943 fseg_header_t *seg_header, /*!< in/out: segment header */
2944 page_no_t hint, /*!< in: hint of which page would be
2945 desirable */
2946 byte direction, /*!< in: if the new page is needed because
2947 of an index page split, and records are
2948 inserted there in order, into which
2949 direction they go alphabetically: FSP_DOWN,
2950 FSP_UP, FSP_NO_DIR */
2951 ibool has_done_reservation, /*!< in: TRUE if the caller has
2952 already done the reservation for the page
2953 with fsp_reserve_free_extents, then there
2954 is no need to do the check for this individual
2955 page */
2956 mtr_t *mtr, /*!< in/out: mini-transaction */
2957 mtr_t *init_mtr) /*!< in/out: mtr or another mini-transaction
2958 in which the page should be initialized.
2959 If init_mtr!=mtr, but the page is already
2960 latched in mtr, do not initialize the page. */
2961 {
2962 fseg_inode_t *inode;
2963 space_id_t space_id;
2964 buf_block_t *iblock;
2965 buf_block_t *block;
2966 ulint n_reserved = 0;
2967
2968 space_id = page_get_space_id(page_align(seg_header));
2969
2970 fil_space_t *space = fil_space_get(space_id);
2971
2972 mtr_x_lock_space(space, mtr);
2973
2974 const page_size_t page_size(space->flags);
2975
2976 if (rw_lock_get_x_lock_count(&space->latch) == 1) {
2977 /* This thread did not own the latch before this call: free
2978 excess pages from the insert buffer free list */
2979
2980 if (space_id == IBUF_SPACE_ID) {
2981 ibuf_free_excess_pages();
2982 }
2983 }
2984
2985 inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
2986 fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
2987
2988 if (!has_done_reservation &&
2989 !fsp_reserve_free_extents(&n_reserved, space_id, 2, FSP_NORMAL, mtr)) {
2990 return (nullptr);
2991 }
2992
2993 block = fseg_alloc_free_page_low(space, page_size, inode, hint, direction,
2994 RW_X_LATCH, mtr, init_mtr
2995 #ifdef UNIV_DEBUG
2996 ,
2997 has_done_reservation
2998 #endif /* UNIV_DEBUG */
2999 );
3000
3001 /* The allocation cannot fail if we have already reserved a
3002 space for the page. */
3003 ut_ad(!has_done_reservation || block != nullptr);
3004
3005 if (!has_done_reservation) {
3006 fil_space_release_free_extents(space_id, n_reserved);
3007 }
3008
3009 return (block);
3010 }
3011
3012 /** Check that we have at least n_pages frag pages free in the first extent
3013 of a single-table tablespace, and they are also physically initialized to
3014 the data file. That is we have already extended the data file so that those
3015 pages are inside the data file. If not, this function extends the tablespace
3016 with pages.
3017 @param[in,out] space tablespace
3018 @param[in,out] space_header tablespace header, x-latched
3019 @param[in] size size of the tablespace in pages,
3020 must be less than FSP_EXTENT_SIZE
3021 @param[in,out] mtr mini-transaction
3022 @param[in] n_pages number of pages to reserve
3023 @return true if there were at least n_pages free pages, or we were able
3024 to extend */
fsp_reserve_free_pages(fil_space_t * space,fsp_header_t * space_header,page_no_t size,mtr_t * mtr,page_no_t n_pages)3025 static bool fsp_reserve_free_pages(fil_space_t *space,
3026 fsp_header_t *space_header, page_no_t size,
3027 mtr_t *mtr, page_no_t n_pages) {
3028 xdes_t *descr;
3029
3030 ut_a(!fsp_is_system_tablespace(space->id));
3031 ut_a(!fsp_is_global_temporary(space->id));
3032 ut_a(size < FSP_EXTENT_SIZE);
3033
3034 descr = xdes_get_descriptor_with_space_hdr(space_header, space->id, 0, mtr);
3035 page_no_t n_used = xdes_get_n_used(descr, mtr);
3036
3037 ut_a(n_used <= size);
3038
3039 return (size >= n_used + n_pages ||
3040 fsp_try_extend_data_file_with_pages(space, n_used + n_pages - 1,
3041 space_header, mtr));
3042 }
3043
3044 /** Reserves free pages from a tablespace. All mini-transactions which may
3045 use several pages from the tablespace should call this function beforehand
3046 and reserve enough free extents so that they certainly will be able
3047 to do their operation, like a B-tree page split, fully. Reservations
3048 must be released with function fil_space_release_free_extents!
3049
3050 The alloc_type below has the following meaning: FSP_NORMAL means an
3051 operation which will probably result in more space usage, like an
3052 insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
3053 deleting rows, then this allocation will in the long run result in
3054 less space usage (after a purge); FSP_CLEANING means allocation done
3055 in a physical record delete (like in a purge) or other cleaning operation
3056 which will result in less space usage in the long run. We prefer the latter
3057 two types of allocation: when space is scarce, FSP_NORMAL allocations
3058 will not succeed, but the latter two allocations will succeed, if possible.
3059 The purpose is to avoid dead end where the database is full but the
3060 user cannot free any space because these freeing operations temporarily
3061 reserve some space.
3062
3063 Single-table tablespaces whose size is < FSP_EXTENT_SIZE pages are a special
3064 case. In this function we would liberally reserve several extents for
3065 every page split or merge in a B-tree. But we do not want to waste disk space
3066 if the table only occupies < FSP_EXTENT_SIZE pages. That is why we apply
3067 different rules in that special case, just ensuring that there are n_pages
3068 free pages available.
3069
3070 @param[out] n_reserved number of extents actually reserved; if we
3071 return true and the tablespace size is <
3072 FSP_EXTENT_SIZE pages, then this can be 0,
3073 otherwise it is n_ext
3074 @param[in] space_id tablespace identifier
3075 @param[in] n_ext number of extents to reserve
3076 @param[in] alloc_type page reservation type (FSP_BLOB, etc)
3077 @param[in,out] mtr the mini transaction
3078 @param[in] n_pages for small tablespaces (tablespace size is
3079 less than FSP_EXTENT_SIZE), number of free
3080 pages to reserve.
3081 @return true if we were able to make the reservation */
fsp_reserve_free_extents(ulint * n_reserved,space_id_t space_id,ulint n_ext,fsp_reserve_t alloc_type,mtr_t * mtr,page_no_t n_pages)3082 bool fsp_reserve_free_extents(ulint *n_reserved, space_id_t space_id,
3083 ulint n_ext, fsp_reserve_t alloc_type, mtr_t *mtr,
3084 page_no_t n_pages) {
3085 fsp_header_t *space_header;
3086 ulint n_free_list_ext;
3087 page_no_t free_limit;
3088 page_no_t size;
3089 ulint n_free;
3090 ulint n_free_up;
3091 ulint reserve;
3092 DBUG_TRACE;
3093
3094 *n_reserved = n_ext;
3095
3096 fil_space_t *space = fil_space_get(space_id);
3097
3098 mtr_x_lock_space(space, mtr);
3099
3100 const page_size_t page_size(space->flags);
3101
3102 space_header = fsp_get_space_header(space_id, page_size, mtr);
3103 try_again:
3104 size = mach_read_from_4(space_header + FSP_SIZE);
3105 ut_ad(size == space->size_in_header);
3106
3107 if (size < FSP_EXTENT_SIZE && n_pages < FSP_EXTENT_SIZE / 2) {
3108 /* Use different rules for small single-table tablespaces */
3109 *n_reserved = 0;
3110 return fsp_reserve_free_pages(space, space_header, size, mtr, n_pages);
3111 }
3112
3113 n_free_list_ext = flst_get_len(space_header + FSP_FREE);
3114 ut_ad(space->free_len == n_free_list_ext);
3115
3116 free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
3117 ut_ad(space->free_limit == free_limit);
3118
3119 /* Below we play safe when counting free extents above the free limit:
3120 some of them will contain extent descriptor pages, and therefore
3121 will not be free extents */
3122
3123 if (size >= free_limit) {
3124 n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
3125 } else {
3126 ut_ad(alloc_type == FSP_BLOB);
3127 n_free_up = 0;
3128 }
3129
3130 if (n_free_up > 0) {
3131 n_free_up--;
3132 n_free_up -= n_free_up / (page_size.physical() / FSP_EXTENT_SIZE);
3133 }
3134
3135 n_free = n_free_list_ext + n_free_up;
3136
3137 switch (alloc_type) {
3138 case FSP_NORMAL:
3139 /* We reserve 1 extent + 0.5 % of the space size to undo logs
3140 and 1 extent + 0.5 % to cleaning operations; NOTE: this source
3141 code is duplicated in the function below! */
3142
3143 reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
3144
3145 if (n_free <= reserve + n_ext) {
3146 goto try_to_extend;
3147 }
3148 break;
3149 case FSP_UNDO:
3150 /* We reserve 0.5 % of the space size to cleaning operations */
3151
3152 reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
3153
3154 if (n_free <= reserve + n_ext) {
3155 goto try_to_extend;
3156 }
3157 break;
3158 case FSP_CLEANING:
3159 case FSP_BLOB:
3160 break;
3161 default:
3162 ut_error;
3163 }
3164
3165 if (fil_space_reserve_free_extents(space_id, n_free, n_ext)) {
3166 return true;
3167 }
3168 try_to_extend:
3169 if (fsp_try_extend_data_file(space, space_header, mtr)) {
3170 goto try_again;
3171 }
3172
3173 return false;
3174 }
3175
3176 /** Calculate how many KiB of new data we will be able to insert to the
3177 tablespace without running out of space.
3178 @param[in] space_id tablespace ID
3179 @return available space in KiB
3180 @retval UINTMAX_MAX if unknown */
fsp_get_available_space_in_free_extents(space_id_t space_id)3181 uintmax_t fsp_get_available_space_in_free_extents(space_id_t space_id) {
3182 fil_space_t *space = fil_space_acquire(space_id);
3183
3184 if (space == nullptr) {
3185 return (UINTMAX_MAX);
3186 }
3187
3188 auto n_free_extents = fsp_get_available_space_in_free_extents(space);
3189
3190 fil_space_release(space);
3191
3192 return (n_free_extents);
3193 }
3194
3195 /** Calculate how many KiB of new data we will be able to insert to the
3196 tablespace without running out of space. Start with a space object that has
3197 been acquired by the caller who holds it for the calculation,
3198 @param[in] space tablespace object from fil_space_acquire()
3199 @return available space in KiB */
fsp_get_available_space_in_free_extents(const fil_space_t * space)3200 uintmax_t fsp_get_available_space_in_free_extents(const fil_space_t *space) {
3201 ut_ad(space->n_pending_ops > 0);
3202
3203 ulint size_in_header = space->size_in_header;
3204 if (size_in_header < FSP_EXTENT_SIZE) {
3205 return (0); /* TODO: count free frag pages and
3206 return a value based on that */
3207 }
3208
3209 /* Below we play safe when counting free extents above the free limit:
3210 some of them will contain extent descriptor pages, and therefore
3211 will not be free extents */
3212 ut_ad(size_in_header >= space->free_limit);
3213 ulint n_free_up = (size_in_header - space->free_limit) / FSP_EXTENT_SIZE;
3214
3215 page_size_t page_size(space->flags);
3216 if (n_free_up > 0) {
3217 n_free_up--;
3218 n_free_up -= n_free_up / (page_size.physical() / FSP_EXTENT_SIZE);
3219 }
3220
3221 /* We reserve 1 extent + 0.5 % of the space size to undo logs
3222 and 1 extent + 0.5 % to cleaning operations; NOTE: this source
3223 code is duplicated in the function above! */
3224
3225 ulint reserve = 2 + ((size_in_header / FSP_EXTENT_SIZE) * 2) / 200;
3226 ulint n_free = space->free_len + n_free_up;
3227
3228 if (reserve > n_free) {
3229 return (0);
3230 }
3231
3232 return (static_cast<uintmax_t>(n_free - reserve) * FSP_EXTENT_SIZE *
3233 (page_size.physical() / 1024));
3234 }
3235
fseg_mark_page_used(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * seg_inode,page_no_t page,xdes_t * descr,mtr_t * mtr)3236 static void fseg_mark_page_used(space_id_t space_id,
3237 const page_size_t &page_size,
3238 fseg_inode_t *seg_inode, page_no_t page,
3239 xdes_t *descr, mtr_t *mtr) {
3240 uint32_t not_full_n_used;
3241
3242 ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
3243 ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3244 ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3245
3246 ut_ad(mach_read_from_8(seg_inode + FSEG_ID) ==
3247 xdes_get_segment_id(descr, mtr));
3248
3249 if (xdes_is_free(descr, mtr)) {
3250 /* We move the extent from the free list to the
3251 NOT_FULL list */
3252 flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
3253 flst_add_last(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3254 }
3255
3256 ut_ad(xdes_mtr_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr));
3257
3258 /* We mark the page as used */
3259 xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
3260
3261 File_segment_inode fseg_inode(space_id, page_size, seg_inode, mtr);
3262
3263 not_full_n_used = fseg_inode.read_not_full_n_used();
3264 not_full_n_used++;
3265 fseg_inode.write_not_full_n_used(not_full_n_used);
3266
3267 if (xdes_is_full(descr, mtr)) {
3268 /* We move the extent from the NOT_FULL list to the
3269 FULL list */
3270 flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3271 flst_add_last(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3272
3273 ut_ad(not_full_n_used >= FSP_EXTENT_SIZE);
3274 fseg_inode.write_not_full_n_used(not_full_n_used - FSP_EXTENT_SIZE);
3275 }
3276 }
3277
3278 /** Frees a single page of a segment.
3279 @param[in] seg_inode segment inode
3280 @param[in] page_id page id
3281 @param[in] page_size page size
3282 @param[in] ahi whether we may need to drop the adaptive
3283 hash index
3284 @param[in,out] mtr mini-transaction */
fseg_free_page_low(fseg_inode_t * seg_inode,const page_id_t & page_id,const page_size_t & page_size,bool ahi,mtr_t * mtr)3285 static void fseg_free_page_low(fseg_inode_t *seg_inode,
3286 const page_id_t &page_id,
3287 const page_size_t &page_size, bool ahi,
3288 mtr_t *mtr) {
3289 xdes_t *descr;
3290 uint32_t not_full_n_used;
3291 ib_id_t descr_id;
3292 ib_id_t seg_id;
3293 ulint i;
3294 DBUG_TRACE;
3295
3296 ut_ad(seg_inode != nullptr);
3297 ut_ad(mtr != nullptr);
3298 ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3299 ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3300 ut_d(fsp_space_modify_check(page_id.space(), mtr));
3301
3302 /* Drop search system page hash index if the page is found in
3303 the pool and is hashed */
3304
3305 if (ahi) {
3306 btr_search_drop_page_hash_when_freed(page_id, page_size);
3307 }
3308
3309 descr =
3310 xdes_get_descriptor(page_id.space(), page_id.page_no(), page_size, mtr);
3311
3312 if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
3313 page_id.page_no() % FSP_EXTENT_SIZE, mtr)) {
3314 fputs("InnoDB: Dump of the tablespace extent descriptor: ", stderr);
3315 ut_print_buf(stderr, descr, 40);
3316
3317 ib::error(ER_IB_MSG_421) << "InnoDB is trying to free page " << page_id
3318 << " though it is already marked as free in the"
3319 " tablespace! The tablespace free space info is"
3320 " corrupt. You may need to dump your tables and"
3321 " recreate the whole database!";
3322 crash:
3323 ib::fatal(ER_IB_MSG_422) << FORCE_RECOVERY_MSG;
3324 }
3325
3326 xdes_state_t state = xdes_get_state(descr, mtr);
3327
3328 switch (state) {
3329 case XDES_FSEG:
3330 case XDES_FSEG_FRAG:
3331 /* The page belongs to a segment */
3332 break;
3333 case XDES_FREE_FRAG:
3334 case XDES_FULL_FRAG:
3335 /* The page is in the fragment pages of the segment */
3336
3337 for (i = 0;; i++) {
3338 if (fseg_get_nth_frag_page_no(seg_inode, i, mtr) == page_id.page_no()) {
3339 fseg_set_nth_frag_page_no(seg_inode, i, FIL_NULL, mtr);
3340 break;
3341 }
3342 }
3343
3344 fsp_free_page(page_id, page_size, mtr);
3345
3346 return;
3347 case XDES_FREE:
3348 case XDES_NOT_INITED:
3349 ut_error;
3350 }
3351
3352 /* If we get here, the page is in some extent of the segment */
3353 File_segment_inode fseg_inode(page_id.space(), page_size, seg_inode, mtr);
3354
3355 descr_id = xdes_get_segment_id(descr);
3356 seg_id = mach_read_from_8(seg_inode + FSEG_ID);
3357
3358 if (UNIV_UNLIKELY(descr_id != seg_id)) {
3359 fputs("InnoDB: Dump of the tablespace extent descriptor: ", stderr);
3360 ut_print_buf(stderr, descr, 40);
3361 fputs("\nInnoDB: Dump of the segment inode: ", stderr);
3362 ut_print_buf(stderr, seg_inode, 40);
3363 putc('\n', stderr);
3364
3365 ib::error(ER_IB_MSG_423)
3366 << "InnoDB is trying to free page " << page_id
3367 << ", which does not belong to segment " << descr_id
3368 << " but belongs to segment " << seg_id << ".";
3369 goto crash;
3370 }
3371
3372 not_full_n_used = fseg_inode.read_not_full_n_used();
3373 if (xdes_is_full(descr, mtr)) {
3374 /* The fragment is full: move it to another list */
3375 flst_remove(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3376 flst_add_last(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3377 not_full_n_used += FSP_EXTENT_SIZE - 1;
3378 } else {
3379 ut_a(not_full_n_used > 0);
3380 not_full_n_used -= 1;
3381 }
3382
3383 const page_no_t bit = page_id.page_no() % FSP_EXTENT_SIZE;
3384
3385 xdes_set_bit(descr, XDES_FREE_BIT, bit, TRUE, mtr);
3386 xdes_set_bit(descr, XDES_CLEAN_BIT, bit, TRUE, mtr);
3387
3388 page_no_t n_used = xdes_get_n_used(descr, mtr);
3389
3390 ut_ad(state != XDES_FSEG_FRAG || (bit != 0 && bit != 1));
3391 ut_ad(state != XDES_FSEG_FRAG || n_used > 1);
3392 ut_ad(xdes_is_leasable(descr, page_size, mtr) ==
3393 (state == XDES_FSEG_FRAG && n_used == XDES_FRAG_N_USED));
3394
3395 /* A leased fragment extent might have no more pages belonging to
3396 the segment.*/
3397 if (state == XDES_FSEG_FRAG && n_used == XDES_FRAG_N_USED) {
3398 n_used = 0;
3399
3400 ut_ad(not_full_n_used >= XDES_FRAG_N_USED);
3401 not_full_n_used -= XDES_FRAG_N_USED;
3402 }
3403
3404 if (n_used == 0) {
3405 /* The extent has become free: free it to space */
3406 flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3407 fsp_free_extent(page_id, page_size, mtr);
3408 }
3409
3410 /* Update the FSEG_NOT_FULL_N_USED field after modifying the list. */
3411 fseg_inode.write_not_full_n_used(not_full_n_used);
3412 }
3413
3414 /** Frees a single page of a segment. */
fseg_free_page(fseg_header_t * seg_header,space_id_t space_id,page_no_t page,bool ahi,mtr_t * mtr)3415 void fseg_free_page(fseg_header_t *seg_header, /*!< in: segment header */
3416 space_id_t space_id, /*!< in: space id */
3417 page_no_t page, /*!< in: page offset */
3418 bool ahi, /*!< in: whether we may need to drop
3419 the adaptive hash index */
3420 mtr_t *mtr) /*!< in/out: mini-transaction */
3421 {
3422 DBUG_TRACE;
3423 fseg_inode_t *seg_inode;
3424 buf_block_t *iblock;
3425
3426 fil_space_t *space = fil_space_get(space_id);
3427
3428 mtr_x_lock_space(space, mtr);
3429
3430 const page_size_t page_size(space->flags);
3431
3432 DBUG_LOG("fseg_free_page", "space_id: " << space_id << ", page_no: " << page);
3433
3434 seg_inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
3435 fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3436
3437 const page_id_t page_id(space_id, page);
3438
3439 fseg_free_page_low(seg_inode, page_id, page_size, ahi, mtr);
3440
3441 ut_d(buf_page_set_file_page_was_freed(page_id));
3442 }
3443
3444 /** Checks if a single page of a segment is free.
3445 @return true if free */
fseg_page_is_free(fseg_header_t * seg_header,space_id_t space_id,page_no_t page)3446 bool fseg_page_is_free(fseg_header_t *seg_header, /*!< in: segment header */
3447 space_id_t space_id, /*!< in: space id */
3448 page_no_t page) /*!< in: page offset */
3449 {
3450 mtr_t mtr;
3451 ibool is_free;
3452 xdes_t *descr;
3453 fseg_inode_t *seg_inode;
3454
3455 fil_space_t *space = fil_space_get(space_id);
3456
3457 mtr_start(&mtr);
3458
3459 mtr_x_lock_space(space, &mtr);
3460
3461 const page_size_t page_size(space->flags);
3462
3463 seg_inode = fseg_inode_get(seg_header, space_id, page_size, &mtr);
3464
3465 ut_a(seg_inode);
3466 ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3467 ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
3468
3469 descr = xdes_get_descriptor(space_id, page, page_size, &mtr);
3470 ut_a(descr);
3471
3472 is_free =
3473 xdes_mtr_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
3474
3475 mtr_commit(&mtr);
3476
3477 return (is_free);
3478 }
3479
3480 /** Frees an extent of a segment to the space free list.
3481 @param[in] seg_inode segment inode
3482 @param[in] space space id
3483 @param[in] page a page in the extent
3484 @param[in] page_size page size
3485 @param[in] ahi whether we may need to drop the adaptive hash
3486 index
3487 @param[in,out] mtr mini-transaction */
fseg_free_extent(fseg_inode_t * seg_inode,space_id_t space,const page_size_t & page_size,page_no_t page,bool ahi,mtr_t * mtr)3488 static void fseg_free_extent(fseg_inode_t *seg_inode, space_id_t space,
3489 const page_size_t &page_size, page_no_t page,
3490 bool ahi, mtr_t *mtr) {
3491 page_no_t first_page_in_extent;
3492 xdes_t *descr;
3493 page_no_t i;
3494 File_segment_inode fseg_inode(space, page_size, seg_inode, mtr);
3495
3496 ut_ad(seg_inode != nullptr);
3497 ut_ad(mtr != nullptr);
3498
3499 descr = xdes_get_descriptor(space, page, page_size, mtr);
3500
3501 const xdes_state_t state = xdes_get_state(descr, mtr);
3502 ut_a(state == XDES_FSEG || state == XDES_FSEG_FRAG);
3503
3504 ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
3505 ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3506 ut_d(fsp_space_modify_check(space, mtr));
3507
3508 first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
3509
3510 if (ahi) {
3511 for (i = state == XDES_FSEG ? 0 : XDES_FRAG_N_USED; i < FSP_EXTENT_SIZE;
3512 i++) {
3513 if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
3514 /* Drop search system page hash index
3515 if the page is found in the pool and
3516 is hashed */
3517
3518 btr_search_drop_page_hash_when_freed(
3519 page_id_t(space, first_page_in_extent + i), page_size);
3520 }
3521 }
3522 }
3523
3524 if (xdes_is_full(descr, mtr)) {
3525 flst_remove(seg_inode + FSEG_FULL, descr + XDES_FLST_NODE, mtr);
3526 } else if (xdes_is_free(descr, mtr)) {
3527 flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
3528 } else {
3529 flst_remove(seg_inode + FSEG_NOT_FULL, descr + XDES_FLST_NODE, mtr);
3530
3531 page_no_t not_full_n_used = fseg_inode.read_not_full_n_used();
3532
3533 page_no_t descr_n_used = xdes_get_n_used(descr, mtr);
3534 ut_a(not_full_n_used >= descr_n_used);
3535 fseg_inode.write_not_full_n_used(not_full_n_used - descr_n_used);
3536 }
3537
3538 fsp_free_extent(page_id_t(space, page), page_size, mtr);
3539
3540 #ifdef UNIV_DEBUG
3541 for (i = state == XDES_FSEG ? 0 : XDES_FRAG_N_USED; i < FSP_EXTENT_SIZE;
3542 i++) {
3543 buf_page_set_file_page_was_freed(
3544 page_id_t(space, first_page_in_extent + i));
3545 }
3546 #endif /* UNIV_DEBUG */
3547 }
3548
3549 /** Frees part of a segment. This function can be used to free a segment by
3550 repeatedly calling this function in different mini-transactions. Doing
3551 the freeing in a single mini-transaction might result in too big a
3552 mini-transaction.
3553 @return true if freeing completed */
fseg_free_step(fseg_header_t * header,bool ahi,mtr_t * mtr)3554 ibool fseg_free_step(
3555 fseg_header_t *header, /*!< in, own: segment header; NOTE: if the header
3556 resides on the first page of the frag list
3557 of the segment, this pointer becomes obsolete
3558 after the last freeing step */
3559 bool ahi, /*!< in: whether we may need to drop
3560 the adaptive hash index */
3561 mtr_t *mtr) /*!< in/out: mini-transaction */
3562 {
3563 ulint n;
3564 page_no_t page;
3565 xdes_t *descr;
3566 fseg_inode_t *inode;
3567 space_id_t space_id;
3568 page_no_t header_page;
3569
3570 DBUG_TRACE;
3571
3572 space_id = page_get_space_id(page_align(header));
3573 header_page = page_get_page_no(page_align(header));
3574
3575 fil_space_t *space = fil_space_get(space_id);
3576
3577 mtr_x_lock_space(space, mtr);
3578
3579 const page_size_t page_size(space->flags);
3580
3581 descr = xdes_get_descriptor(space_id, header_page, page_size, mtr);
3582
3583 /* Check that the header resides on a page which has not been
3584 freed yet */
3585
3586 ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, header_page % FSP_EXTENT_SIZE,
3587 mtr) == FALSE);
3588 buf_block_t *iblock;
3589
3590 inode = fseg_inode_try_get(header, space_id, page_size, mtr, &iblock);
3591
3592 if (inode == nullptr) {
3593 ib::info(ER_IB_MSG_424)
3594 << "Double free of inode from " << page_id_t(space_id, header_page);
3595 return TRUE;
3596 }
3597
3598 fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3599 descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
3600
3601 if (descr != nullptr) {
3602 /* Free the extent held by the segment */
3603 page = xdes_get_offset(descr);
3604
3605 fseg_free_extent(inode, space_id, page_size, page, ahi, mtr);
3606
3607 return FALSE;
3608 }
3609
3610 /* Free a frag page */
3611 n = fseg_find_last_used_frag_page_slot(inode, mtr);
3612
3613 if (n == ULINT_UNDEFINED) {
3614 /* Freeing completed: free the segment inode */
3615 fsp_free_seg_inode(space_id, page_size, inode, mtr);
3616
3617 return TRUE;
3618 }
3619
3620 fseg_free_page_low(
3621 inode, page_id_t(space_id, fseg_get_nth_frag_page_no(inode, n, mtr)),
3622 page_size, ahi, mtr);
3623
3624 n = fseg_find_last_used_frag_page_slot(inode, mtr);
3625
3626 if (n == ULINT_UNDEFINED) {
3627 /* Freeing completed: free the segment inode */
3628 fsp_free_seg_inode(space_id, page_size, inode, mtr);
3629
3630 return TRUE;
3631 }
3632
3633 return FALSE;
3634 }
3635
3636 /** Frees part of a segment. Differs from fseg_free_step because this function
3637 leaves the header page unfreed.
3638 @return true if freeing completed, except the header page */
fseg_free_step_not_header(fseg_header_t * header,bool ahi,mtr_t * mtr)3639 ibool fseg_free_step_not_header(
3640 fseg_header_t *header, /*!< in: segment header which must reside on
3641 the first fragment page of the segment */
3642 bool ahi, /*!< in: whether we may need to drop
3643 the adaptive hash index */
3644 mtr_t *mtr) /*!< in/out: mini-transaction */
3645 {
3646 ulint n;
3647 xdes_t *descr;
3648 fseg_inode_t *inode;
3649 space_id_t space_id;
3650 page_no_t page_no;
3651
3652 space_id = page_get_space_id(page_align(header));
3653
3654 fil_space_t *space = fil_space_get(space_id);
3655
3656 mtr_x_lock_space(space, mtr);
3657
3658 const page_size_t page_size(space->flags);
3659 buf_block_t *iblock;
3660
3661 inode = fseg_inode_get(header, space_id, page_size, mtr, &iblock);
3662 fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);
3663
3664 descr = fseg_get_first_extent(inode, space_id, page_size, mtr);
3665
3666 if (descr != nullptr) {
3667 /* Free the extent held by the segment */
3668 page_no = xdes_get_offset(descr);
3669
3670 fseg_free_extent(inode, space_id, page_size, page_no, ahi, mtr);
3671
3672 return (FALSE);
3673 }
3674
3675 /* Free a frag page */
3676
3677 n = fseg_find_last_used_frag_page_slot(inode, mtr);
3678
3679 if (n == ULINT_UNDEFINED) {
3680 ut_error;
3681 }
3682
3683 page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
3684
3685 if (page_no == page_get_page_no(page_align(header))) {
3686 return (TRUE);
3687 }
3688
3689 fseg_free_page_low(inode, page_id_t(space_id, page_no), page_size, ahi, mtr);
3690
3691 return (FALSE);
3692 }
3693
3694 /** Returns the first extent descriptor for a segment.
3695 We think of the extent lists of the segment catenated in the order
3696 FSEG_FULL -> FSEG_NOT_FULL -> FSEG_FREE.
3697 @param[in] inode segment inode
3698 @param[in] space_id space id
3699 @param[in] page_size page size
3700 @param[in,out] mtr mini-transaction
3701 @return the first extent descriptor, or NULL if none */
fseg_get_first_extent(fseg_inode_t * inode,space_id_t space_id,const page_size_t & page_size,mtr_t * mtr)3702 static xdes_t *fseg_get_first_extent(fseg_inode_t *inode, space_id_t space_id,
3703 const page_size_t &page_size, mtr_t *mtr) {
3704 fil_addr_t first;
3705 xdes_t *descr;
3706
3707 ut_ad(inode && mtr);
3708
3709 ut_ad(space_id == page_get_space_id(page_align(inode)));
3710 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3711
3712 first = fil_addr_null;
3713
3714 if (flst_get_len(inode + FSEG_FULL) > 0) {
3715 first = flst_get_first(inode + FSEG_FULL, mtr);
3716
3717 } else if (flst_get_len(inode + FSEG_NOT_FULL) > 0) {
3718 first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
3719
3720 } else if (flst_get_len(inode + FSEG_FREE) > 0) {
3721 first = flst_get_first(inode + FSEG_FREE, mtr);
3722 }
3723
3724 if (first.page == FIL_NULL) {
3725 return (nullptr);
3726 }
3727 descr = xdes_lst_get_descriptor(space_id, page_size, first, mtr);
3728
3729 return (descr);
3730 }
3731
3732 #ifdef UNIV_BTR_PRINT
3733 /** Writes info of a segment. */
fseg_print_low(space_id_t space_id,const page_size_t & page_size,fseg_inode_t * inode,mtr_t * mtr)3734 static void fseg_print_low(space_id_t space_id, const page_size_t &page_size,
3735 fseg_inode_t *inode, /*!< in: segment inode */
3736 mtr_t *mtr) /*!< in/out: mini-transaction */
3737 {
3738 space_id_t space;
3739 ulint n_used;
3740 ulint n_frag;
3741 ulint n_free;
3742 ulint n_not_full;
3743 ulint n_full;
3744 ulint reserved;
3745 ulint used;
3746 page_no_t page_no;
3747 ib_id_t seg_id;
3748 File_segment_inode fseg_inode(space_id, page_size, inode, mtr);
3749
3750 ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_SX_FIX));
3751 space = page_get_space_id(page_align(inode));
3752 page_no = page_get_page_no(page_align(inode));
3753
3754 reserved = fseg_n_reserved_pages_low(space_id, page_size, inode, &used, mtr);
3755
3756 seg_id = mach_read_from_8(inode + FSEG_ID);
3757
3758 n_used = fseg_inode.read_not_full_n_used();
3759 n_frag = fseg_get_n_frag_pages(inode, mtr);
3760 n_free = flst_get_len(inode + FSEG_FREE);
3761 n_not_full = flst_get_len(inode + FSEG_NOT_FULL);
3762 n_full = flst_get_len(inode + FSEG_FULL);
3763
3764 ib::info(ER_IB_MSG_425) << "SEGMENT id " << seg_id << " space " << space
3765 << ";"
3766 << " page " << page_no << ";"
3767 << " res " << reserved << " used " << used << ";"
3768 << " full ext " << n_full << ";"
3769 << " fragm pages " << n_frag << ";"
3770 << " free extents " << n_free << ";"
3771 << " not full extents " << n_not_full << ": pages "
3772 << n_used;
3773
3774 ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
3775 }
3776
3777 /** Writes info of a segment. */
fseg_print(fseg_header_t * header,mtr_t * mtr)3778 void fseg_print(fseg_header_t *header, /*!< in: segment header */
3779 mtr_t *mtr) /*!< in/out: mini-transaction */
3780 {
3781 fseg_inode_t *inode;
3782 space_id_t space_id;
3783
3784 space_id = page_get_space_id(page_align(header));
3785
3786 fil_space_t *space = fil_space_get();
3787
3788 mtr_x_lock_space(space, mtr);
3789
3790 const page_size_t page_size(space->flags);
3791
3792 inode = fseg_inode_get(header, space_id, page_size, mtr);
3793
3794 fseg_print_low(space_id, page_size, inode, mtr);
3795 }
3796 #endif /* UNIV_BTR_PRINT */
3797
3798 /** Retrieve tablespace dictionary index root page number stored in the
3799 page 0
3800 @param[in] space tablespace id
3801 @param[in] page_size page size
3802 @param[in,out] mtr mini-transaction
3803 @return root page num of the tablspace dictionary index copy */
fsp_sdi_get_root_page_num(space_id_t space,const page_size_t & page_size,mtr_t * mtr)3804 page_no_t fsp_sdi_get_root_page_num(space_id_t space,
3805 const page_size_t &page_size, mtr_t *mtr) {
3806 ut_ad(mtr != nullptr);
3807
3808 buf_block_t *block =
3809 buf_page_get(page_id_t(space, 0), page_size, RW_S_LATCH, mtr);
3810 buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
3811
3812 page_t *page = buf_block_get_frame(block);
3813
3814 ulint sdi_offset = fsp_header_get_sdi_offset(page_size);
3815
3816 uint32_t sdi_ver = mach_read_from_4(page + sdi_offset);
3817
3818 if (sdi_ver != SDI_VERSION) {
3819 ib::warn(ER_IB_MSG_426) << "SDI version mismatch. Expected: " << SDI_VERSION
3820 << " Current version: " << sdi_ver;
3821 }
3822 ut_ad(sdi_ver == SDI_VERSION);
3823
3824 page_no_t root = mach_read_from_4(page + sdi_offset + 4);
3825
3826 ut_ad(root > 2);
3827
3828 return (root);
3829 }
3830
3831 /** Write SDI Index root page num to page 0 of tablespace.
3832 @param[in,out] page page 0 frame
3833 @param[in] page_size size of page
3834 @param[in] root_page_num root page number of SDI
3835 @param[in,out] mtr mini-transaction */
fsp_sdi_write_root_to_page(page_t * page,const page_size_t & page_size,page_no_t root_page_num,mtr_t * mtr)3836 void fsp_sdi_write_root_to_page(page_t *page, const page_size_t &page_size,
3837 page_no_t root_page_num, mtr_t *mtr) {
3838 ut_ad(page_get_page_no(page) == 0);
3839
3840 ulint sdi_offset = fsp_header_get_sdi_offset(page_size);
3841
3842 /* Write SDI version here. */
3843 mlog_write_ulint(page + sdi_offset, SDI_VERSION, MLOG_4BYTES, mtr);
3844
3845 /* Write SDI root page number */
3846 mlog_write_ulint(page + sdi_offset + 4, root_page_num, MLOG_4BYTES, mtr);
3847 }
3848
3849 #ifdef UNIV_DEBUG
3850 /** Print the file segment header to the given output stream.
3851 @param[in] out the output stream into which the object is printed.
3852 @retval the output stream into which the object was printed. */
to_stream(std::ostream & out) const3853 std::ostream &fseg_header::to_stream(std::ostream &out) const {
3854 const space_id_t space =
3855 mtr_read_ulint(m_header + FSEG_HDR_SPACE, MLOG_4BYTES, m_mtr);
3856
3857 const page_no_t page_no =
3858 mtr_read_ulint(m_header + FSEG_HDR_PAGE_NO, MLOG_4BYTES, m_mtr);
3859
3860 const ulint offset =
3861 mtr_read_ulint(m_header + FSEG_HDR_OFFSET, MLOG_2BYTES, m_mtr);
3862
3863 out << "[fseg_header_t: space=" << space << ", page=" << page_no
3864 << ", offset=" << offset << "]";
3865
3866 return (out);
3867 }
3868 #endif /* UNIV_DEBUG */
3869
3870 /** Determine if extent belongs to a given segment.
3871 @param[in] descr extent descriptor
3872 @param[in] seg_id segment identifier
3873 @param[in] mtr mini-transaction
3874 @return true if extent is part of the segment, false otherwise */
xdes_in_segment(const xdes_t * descr,ib_id_t seg_id,mtr_t * mtr)3875 static bool xdes_in_segment(const xdes_t *descr, ib_id_t seg_id, mtr_t *mtr) {
3876 const xdes_state_t state = xdes_get_state(descr, mtr);
3877 return ((state == XDES_FSEG || state == XDES_FSEG_FRAG) &&
3878 xdes_get_segment_id(descr, mtr) == seg_id);
3879 }
3880
3881 #ifdef UNIV_DEBUG
fsp_header_mem_t(const fsp_header_t * header,mtr_t * mtr)3882 fsp_header_mem_t::fsp_header_mem_t(const fsp_header_t *header, mtr_t *mtr)
3883 : m_space_id(mach_read_from_4(header + FSP_SPACE_ID)),
3884 m_notused(0),
3885 m_fsp_size(mach_read_from_4(header + FSP_SIZE)),
3886 m_free_limit(mach_read_from_4(header + FSP_FREE_LIMIT)),
3887 m_flags(mach_read_from_4(header + FSP_SPACE_FLAGS)),
3888 m_fsp_frag_n_used(mach_read_from_4(header + FSP_FRAG_N_USED)),
3889 m_fsp_free(header + FSP_FREE, mtr),
3890 m_free_frag(header + FSP_FREE_FRAG, mtr),
3891 m_full_frag(header + FSP_FULL_FRAG, mtr),
3892 m_segid(mach_read_from_8(header + FSP_SEG_ID)),
3893 m_inodes_full(header + FSP_SEG_INODES_FULL, mtr),
3894 m_inodes_free(header + FSP_SEG_INODES_FREE, mtr) {}
3895
print(std::ostream & out) const3896 std::ostream &fsp_header_mem_t::print(std::ostream &out) const {
3897 out << "[fsp_header_t: "
3898 << "m_space_id=" << m_space_id << ", m_fsp_size=" << m_fsp_size
3899 << ", m_free_limit=" << m_free_limit << ", m_flags=" << m_flags
3900 << ", m_fsp_frag_n_used=" << m_fsp_frag_n_used
3901 << ", m_fsp_free=" << m_fsp_free << ", m_free_frag=" << m_free_frag
3902 << ", m_full_frag=" << m_full_frag << ", m_segid=" << m_segid
3903 << ", m_inodes_full=" << m_inodes_full
3904 << ", m_inodes_free=" << m_inodes_free << "]";
3905 return (out);
3906 }
3907
3908 /** Print the extent descriptor page in user-friendly format.
3909 @param[in] out the output file stream
3910 @param[in] xdes the extent descriptor page
3911 @param[in] page_no the page number of xdes page
3912 @param[in] mtr the mini transaction.
3913 @return None. */
xdes_page_print(std::ostream & out,const page_t * xdes,page_no_t page_no,mtr_t * mtr)3914 std::ostream &xdes_page_print(std::ostream &out, const page_t *xdes,
3915 page_no_t page_no, mtr_t *mtr) {
3916 out << "[Extent Descriptor Page: page_no=" << page_no << "\n";
3917
3918 if (page_no == 0) {
3919 const fsp_header_t *tmp = xdes + FSP_HEADER_OFFSET;
3920 fsp_header_mem_t header(tmp, mtr);
3921 out << header << "\n";
3922 }
3923
3924 ulint N = UNIV_PAGE_SIZE / FSP_EXTENT_SIZE;
3925
3926 for (ulint i = 0; i < N; ++i) {
3927 const byte *desc = xdes + XDES_ARR_OFFSET + (i * XDES_SIZE);
3928 xdes_mem_t x(desc);
3929
3930 if (x.is_valid()) {
3931 out << x << "\n";
3932 }
3933 }
3934 out << "]\n";
3935 return (out);
3936 }
3937
print(std::ostream & out) const3938 std::ostream &xdes_mem_t::print(std::ostream &out) const {
3939 ut_ad(m_xdes != nullptr);
3940
3941 const page_no_t page_no = xdes_get_offset(m_xdes);
3942 const ib_id_t seg_id = xdes_get_segment_id(m_xdes);
3943
3944 out << "[xdes_t: segid=" << seg_id << ",page=" << page_no
3945 << ",state=" << state_name() << ",bitmap=[";
3946 for (page_no_t i = 0; i < FSP_EXTENT_SIZE; ++i) {
3947 const bool is_free = xdes_get_bit(m_xdes, XDES_FREE_BIT, i);
3948 out << (is_free ? "." : "+");
3949 }
3950 out << "]]";
3951 return (out);
3952 }
3953
3954 /** Check if the tablespace size information is valid.
3955 @param[in] space_id the tablespace identifier
3956 @return true if valid, false if invalid. */
fsp_check_tablespace_size(space_id_t space_id)3957 bool fsp_check_tablespace_size(space_id_t space_id) {
3958 mtr_t mtr;
3959
3960 mtr_start(&mtr);
3961
3962 fil_space_t *space = fil_space_get(space_id);
3963
3964 mtr_x_lock_space(space, &mtr);
3965
3966 const page_size_t page_size(space->flags);
3967
3968 fsp_header_t *space_header = fsp_get_space_header(space_id, page_size, &mtr);
3969
3970 xdes_t *descr =
3971 xdes_get_descriptor_with_space_hdr(space_header, space->id, 0, &mtr);
3972
3973 ulint n_used = xdes_get_n_used(descr, &mtr);
3974 ulint size = mach_read_from_4(space_header + FSP_SIZE);
3975 ut_a(n_used <= size);
3976
3977 mtr_commit(&mtr);
3978
3979 return (true);
3980 }
3981 #endif /* UNIV_DEBUG */
3982
3983 /** Determine if the tablespace has SDI.
3984 @param[in] space_id Tablespace id
3985 @return DB_SUCCESS if SDI is present else DB_ERROR
3986 or DB_TABLESPACE_NOT_FOUND */
fsp_has_sdi(space_id_t space_id)3987 dberr_t fsp_has_sdi(space_id_t space_id) {
3988 fil_space_t *space = fil_space_acquire_silent(space_id);
3989 if (space == nullptr) {
3990 DBUG_EXECUTE_IF(
3991 "ib_sdi", ib::warn(ER_IB_MSG_427)
3992 << "Tablespace doesn't exist for space_id: " << space_id;
3993 ib::warn(ER_IB_MSG_428) << "Is the tablespace dropped or discarded";);
3994 return (DB_TABLESPACE_NOT_FOUND);
3995 }
3996
3997 #if defined(UNIV_DEBUG) && !defined(XTRABACKUP)
3998 mtr_t mtr;
3999 mtr.start();
4000 ut_ad(fsp_sdi_get_root_page_num(space_id, page_size_t(space->flags), &mtr) !=
4001 0);
4002 mtr.commit();
4003 #endif /* UNIV_DEBUG && !XTRABACKUP */
4004
4005 fil_space_release(space);
4006 DBUG_EXECUTE_IF(
4007 "ib_sdi", if (!FSP_FLAGS_HAS_SDI(space->flags)) {
4008 ib::warn(ER_IB_MSG_429)
4009 << "SDI doesn't exist in tablespace: " << space->name;
4010 });
4011 return (FSP_FLAGS_HAS_SDI(space->flags) ? DB_SUCCESS : DB_ERROR);
4012 }
4013
4014 /** Mark all pages in tablespace dirty
4015 @param[in] thd current thread
4016 @param[in] space_id tablespace id
4017 @param[in] space_flags tablespace flags
4018 @param[in] total_pages total pages in tablespace
4019 @param[in] from_page page number from where to start the operation */
mark_all_page_dirty_in_tablespace(THD * thd,space_id_t space_id,uint32_t space_flags,page_no_t total_pages,page_no_t from_page)4020 static void mark_all_page_dirty_in_tablespace(THD *thd, space_id_t space_id,
4021 uint32_t space_flags,
4022 page_no_t total_pages,
4023 page_no_t from_page) {
4024 #ifdef HAVE_PSI_STAGE_INTERFACE
4025 ut_stage_alter_ts progress_monitor;
4026 #endif
4027 page_size_t pageSize(space_flags);
4028 page_no_t current_page = from_page;
4029 mtr_t mtr;
4030
4031 /* Page 0 is never encrypted */
4032 ut_ad(current_page != 0);
4033
4034 #ifdef HAVE_PSI_STAGE_INTERFACE
4035 progress_monitor.init(srv_stage_alter_tablespace_encryption.m_key);
4036 progress_monitor.set_estimate(total_pages - current_page);
4037 #endif
4038
4039 while (current_page < total_pages) {
4040 /* Mark group of PAGE_GROUP_SIZE pages dirty */
4041 mtr_start(&mtr);
4042 page_no_t inner_count = 0;
4043 for (; inner_count < PAGE_GROUP_SIZE && current_page < total_pages;
4044 inner_count++, current_page++) {
4045 /* As we are trying to read each and every page of
4046 tablespace, there might be few pages which are freed.
4047 Take them into consideration. */
4048 buf_block_t *block = buf_page_get_gen(
4049 page_id_t(space_id, current_page), pageSize, RW_X_LATCH, nullptr,
4050 Page_fetch::POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
4051
4052 if (block == nullptr) {
4053 continue;
4054 }
4055
4056 page_t *page = buf_block_get_frame(block);
4057 page_zip_des_t *page_zip = buf_block_get_page_zip(block);
4058
4059 /* If page is not initialized */
4060 if (page_get_space_id(page) == 0 || page_get_page_no(page) == 0) {
4061 continue;
4062 }
4063
4064 if (page_zip != nullptr &&
4065 fil_page_type_is_index(fil_page_get_type(page))) {
4066 mach_write_to_4(page + FIL_PAGE_SPACE_ID, space_id);
4067 page_zip_write_header(page_zip, page + FIL_PAGE_SPACE_ID, 4, &mtr);
4068 } else {
4069 mlog_write_ulint(page + FIL_PAGE_SPACE_ID, space_id, MLOG_4BYTES, &mtr);
4070 }
4071
4072 DBUG_INJECT_CRASH_WITH_LOG_FLUSH("alter_encrypt_tablespace_inner_page",
4073 current_page - 1);
4074 }
4075 mtr_commit(&mtr);
4076
4077 mtr_start(&mtr);
4078 /* Write (Un)Encryption progress on page 0 */
4079 fsp_header_write_encryption_progress(space_id, space_flags,
4080 current_page - 1, 0, false, &mtr);
4081 mtr_commit(&mtr);
4082
4083 #ifdef HAVE_PSI_STAGE_INTERFACE
4084 /* Update progress stats */
4085 progress_monitor.update_work(inner_count);
4086 #endif
4087
4088 DBUG_EXECUTE_IF("alter_encrypt_tablespace_insert_delay", sleep(1););
4089
4090 DBUG_INJECT_CRASH_WITH_LOG_FLUSH("alter_encrypt_tablespace_page",
4091 current_page - 1);
4092
4093 #ifdef UNIV_DEBUG
4094 if ((current_page - 1) == 5) {
4095 DEBUG_SYNC(thd, "alter_encrypt_tablespace_wait_after_page5");
4096 }
4097 #endif /* UNIV_DEBUG */
4098
4099 DBUG_EXECUTE_IF("flush_each_dirtied_page",
4100 buf_LRU_flush_or_remove_pages(
4101 space_id, BUF_REMOVE_FLUSH_WRITE, nullptr, false););
4102 }
4103
4104 #ifdef HAVE_PSI_STAGE_INTERFACE
4105 /* Confirm that all pages are covered. */
4106 ut_ad(progress_monitor.is_completed());
4107 #endif
4108 }
4109
4110 /** Encrypt/Unencrypt a tablespace.
4111 @param[in] thd current thread
4112 @param[in] space_id Tablespace id
4113 @param[in] from_page page id from where operation to be done
4114 @param[in] to_encrypt true if to encrypt, false if to unencrypt
4115 @param[in] in_recovery true if its called after recovery
4116 @param[in,out] dd_space_in dd tablespace object
4117 @return 0 for success, otherwise error code */
fsp_alter_encrypt_tablespace(THD * thd,space_id_t space_id,page_no_t from_page,bool to_encrypt,bool in_recovery,void * dd_space_in)4118 dberr_t fsp_alter_encrypt_tablespace(THD *thd, space_id_t space_id,
4119 page_no_t from_page, bool to_encrypt,
4120 bool in_recovery, void *dd_space_in) {
4121 dberr_t err = DB_SUCCESS;
4122 fil_space_t *space = fil_space_get(space_id);
4123 uint32_t space_flags = 0;
4124 page_no_t total_pages = 0;
4125 dd::Tablespace *dd_space = reinterpret_cast<dd::Tablespace *>(dd_space_in);
4126 byte operation_type = 0;
4127 byte encryption_info[Encryption::INFO_SIZE];
4128 memset(encryption_info, 0, Encryption::INFO_SIZE);
4129 mtr_t mtr;
4130
4131 DBUG_TRACE;
4132
4133 /* Page 0 is never encrypted */
4134 ut_ad(from_page != 0);
4135
4136 operation_type |= (to_encrypt) ? Encryption::ENCRYPT_IN_PROGRESS
4137 : Encryption::DECRYPT_IN_PROGRESS;
4138
4139 if (!in_recovery) { /* NOT IN RECOVERY */
4140 ut_ad(space->encryption_op_in_progress == NONE);
4141 if (to_encrypt) {
4142 /* Assert that tablespace is not encrypted */
4143 ut_ad(!FSP_FLAGS_GET_ENCRYPTION(space->flags));
4144
4145 /* Fill key, iv and prepare encryption_info to be written in page 0 */
4146 byte key[Encryption::KEY_LEN];
4147 byte iv[Encryption::KEY_LEN];
4148
4149 Encryption::random_value(key);
4150 Encryption::random_value(iv);
4151
4152 /* Prepare encrypted encryption information to be written on page 0. */
4153 if (!Encryption::fill_encryption_info(key, iv, encryption_info, false,
4154 true)) {
4155 ut_ad(0);
4156 }
4157
4158 /* Write Encryption information and space flags now on page 0
4159 NOTE : Not modifying space->flags as of now, because we want to persist
4160 the changes on disk and then modify in memory flags. */
4161 mtr_start(&mtr);
4162 if (!fsp_header_write_encryption(space_id,
4163 space->flags | FSP_FLAGS_MASK_ENCRYPTION,
4164 encryption_info, true, false, &mtr)) {
4165 ut_ad(0);
4166 }
4167
4168 /* Write on page 0
4169 - Operation type (Encryption/Unencryption)
4170 - Write (Un)Encryption progress (0 now) */
4171 fsp_header_write_encryption_progress(space_id, space->flags, 0,
4172 operation_type, true, &mtr);
4173 mtr_commit(&mtr);
4174
4175 /* Make sure REDO logs are flushed till this point */
4176 log_buffer_flush_to_disk();
4177
4178 /* As DMLs are allowed in parallel, pass false for 'strict' */
4179 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4180 false);
4181
4182 /* Set encryption for tablespace */
4183 rw_lock_x_lock(&space->latch);
4184 err = fil_set_encryption(space_id, Encryption::AES, key, iv);
4185 rw_lock_x_unlock(&space->latch);
4186 ut_ad(err == DB_SUCCESS);
4187
4188 /* Set encryption operation in progress flag */
4189 space->encryption_op_in_progress = ENCRYPTION;
4190
4191 /* Update Encryption flag for tablespace */
4192 fsp_flags_set_encryption(space->flags);
4193 } else {
4194 /* Assert that tablespace is encrypted */
4195 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4196
4197 mtr_start(&mtr);
4198 /* Write on page 0
4199 - Operation type (Encryption/Unencryption)
4200 - Write (Un)Encryption progress (0 now) */
4201 fsp_header_write_encryption_progress(space_id, space->flags, 0,
4202 operation_type, true, &mtr);
4203 mtr_commit(&mtr);
4204
4205 /* Make sure REDO logs are flushed till this point */
4206 log_buffer_flush_to_disk();
4207
4208 /* As DMLs are allowed in parallel, pass false for 'strict' */
4209 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4210 false);
4211
4212 /* Set encryption operation in progress flag */
4213 space->encryption_op_in_progress = DECRYPTION;
4214
4215 /* Update Encryption flag for tablespace */
4216 fsp_flags_unset_encryption(space->flags);
4217
4218 /* Don't erase Encryption info from page 0 yet */
4219 }
4220
4221 /* Till this point,
4222 - ddl_log entry has been made.
4223 For encryption :
4224 - In-mem Encryption information set for tablesapace.
4225 - In-mem Tablespace flags have been updated.
4226 - Encryption Info, Tablespace updated flags have been
4227 written to page 0.
4228 - Page 0 have been updated to indicate operation type.
4229 For Unencryption :
4230 - In-mem Tablespace flags have been updated.
4231 - Page 0 have been updated to indicate operation type.
4232
4233 Now, read tablespace pages one by one and mark them dirty. */
4234 } else { /* IN RECOVERY */
4235
4236 /* A corner case when crash happened after last page was processed but
4237 page 0 wasn't updated with this information. */
4238 if (from_page == space->size) {
4239 goto all_done;
4240 }
4241
4242 /* If in recovery, update Tablespace Encryption flag again now
4243 as DD flags wouldn't have been updated before crash. */
4244 if (to_encrypt) {
4245 /* Tablespace Encryption flag were written on page 0
4246 before crash. */
4247 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4248
4249 /* It should have already been set */
4250 ut_ad(space->encryption_op_in_progress == ENCRYPTION);
4251 } else {
4252 /* Tablespace Encryption flag were not written on page 0
4253 before crash. */
4254 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
4255
4256 /* It should have already been set */
4257 ut_ad(space->encryption_op_in_progress == DECRYPTION);
4258
4259 /* Update Encryption flag for tablespace */
4260 fsp_flags_unset_encryption(space->flags);
4261
4262 /* Don't erase Encryption information from page 0 yet */
4263 }
4264 }
4265
4266 space_flags = space->flags;
4267 total_pages = space->size;
4268
4269 /* Mark all pages in tablespace dirty */
4270 mark_all_page_dirty_in_tablespace(thd, space_id, space_flags, total_pages,
4271 from_page);
4272
4273 /* As DMLs are allowed in parallel, pass false for 'strict' */
4274 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4275 false);
4276
4277 /* Till this point, all pages in tablespace have been marked dirty and
4278 flushed to disk . */
4279
4280 all_done:
4281 /* For unencryption, if server crashed, before tablespace flags were flushed
4282 on disk. Set them now. */
4283 if (in_recovery && !to_encrypt) {
4284 fsp_flags_unset_encryption(space->flags);
4285 }
4286
4287 /* If it was an Unencryption operation */
4288 if (!to_encrypt) {
4289 /* Crash before updating tablespace flags on page 0 */
4290 DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_updating_flags",
4291 log_buffer_flush_to_disk();
4292 DBUG_SUICIDE(););
4293
4294 ut_ad(!FSP_FLAGS_GET_ENCRYPTION(space->flags));
4295 #ifdef UNIV_DEBUG
4296 byte buf[Encryption::INFO_SIZE];
4297 memset(buf, 0, Encryption::INFO_SIZE);
4298 ut_ad(memcmp(encryption_info, buf, Encryption::INFO_SIZE) == 0);
4299 #endif
4300 /* Now on page 0
4301 - erase Encryption information
4302 - write updated Tablespace flag */
4303 mtr_start(&mtr);
4304 if (!fsp_header_write_encryption(space_id, space->flags, encryption_info,
4305 true, false, &mtr)) {
4306 ut_ad(0);
4307 }
4308 mtr_commit(&mtr);
4309
4310 rw_lock_x_lock(&space->latch);
4311 /* Reset In-mem encryption for tablespace */
4312 err = fil_reset_encryption(space_id);
4313 rw_lock_x_unlock(&space->latch);
4314 ut_ad(err == DB_SUCCESS);
4315 }
4316
4317 /* Reset encryption in progress flag */
4318 space->encryption_op_in_progress = NONE;
4319
4320 if (!in_recovery) {
4321 ut_ad(dd_space != nullptr);
4322 /* Update DD flags for tablespace */
4323 dd_space->se_private_data().set(dd_space_key_strings[DD_SPACE_FLAGS],
4324 static_cast<uint32>(space->flags));
4325 }
4326
4327 /* Crash before resetting progress on page 0 */
4328 DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_resetting_progress",
4329 log_buffer_flush_to_disk();
4330 DBUG_SUICIDE(););
4331
4332 /* Erase Operation type and encryption progress from page 0 */
4333 mtr_start(&mtr);
4334 fsp_header_write_encryption_progress(space_id, space->flags, 0, 0, true,
4335 &mtr);
4336 mtr_commit(&mtr);
4337
4338 /* Crash before flushing page 0 on disk */
4339 DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_before_flushing_page_0",
4340 log_buffer_flush_to_disk();
4341 DBUG_SUICIDE(););
4342
4343 /* As DMLs are allowed in parallel, pass false for 'strict' */
4344 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, nullptr,
4345 false);
4346
4347 /* Crash after flushing page 0 on disk */
4348 DBUG_EXECUTE_IF("alter_encrypt_tablespace_crash_after_flushing_page_0",
4349 log_buffer_flush_to_disk();
4350 DBUG_SUICIDE(););
4351
4352 return err;
4353 }
4354
4355 #ifdef UNIV_DEBUG
4356 /** Validate tablespace encryption settings. */
validate_tablespace_encryption(fil_space_t * space)4357 static void validate_tablespace_encryption(fil_space_t *space) {
4358 byte buf[Encryption::KEY_LEN];
4359 memset(buf, 0, Encryption::KEY_LEN);
4360
4361 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4362 ut_ad(memcmp(space->encryption_key, buf, Encryption::KEY_LEN) != 0);
4363 ut_ad(memcmp(space->encryption_iv, buf, Encryption::KEY_LEN) != 0);
4364 ut_ad(space->encryption_klen != 0);
4365 ut_ad(space->encryption_type == Encryption::AES);
4366 } else {
4367 ut_ad(memcmp(space->encryption_key, buf, Encryption::KEY_LEN) == 0);
4368 ut_ad(memcmp(space->encryption_iv, buf, Encryption::KEY_LEN) == 0);
4369 ut_ad(space->encryption_klen == 0);
4370 ut_ad(space->encryption_type == Encryption::NONE);
4371 }
4372 ut_ad(space->encryption_op_in_progress == NONE);
4373 }
4374 #endif
4375
4376 /** Resume Encrypt/Unencrypt for tablespace(s) post recovery.
4377 If an error occurs while processing any tablespace needing encryption,
4378 post an error for that space and keep going.
4379 @param[in] thd background thread */
resume_alter_encrypt_tablespace(THD * thd)4380 static void resume_alter_encrypt_tablespace(THD *thd) {
4381 dberr_t err = DB_SUCCESS;
4382 mtr_t mtr;
4383 char operation_name[3][20] = {"NONE", "ENCRYPTION", "DECRYPTION"};
4384 /* List of MDLs taken. One for each tablespace. */
4385 std::list<MDL_ticket *> shared_mdl_list;
4386
4387 Disable_autocommit_guard autocommit_guard(thd);
4388 dd::cache::Dictionary_client *client = dd::get_dd_client(thd);
4389 dd::cache::Dictionary_client::Auto_releaser releaser(client);
4390 dd::Tablespace *recv_dd_space = nullptr;
4391
4392 /* Take a SHARED MDL to make sure no one could run any DDL on it and DMLs
4393 are allowed. */
4394 for (auto it : ts_encrypt_ddl_records) {
4395 /* Get the space_id and then read page0 to get
4396 (un)encryption progress */
4397 space_id_t space_id = it->get_space_id();
4398 fil_space_t *space = fil_space_get(space_id);
4399 if (space == nullptr) {
4400 continue;
4401 }
4402
4403 MDL_ticket *mdl_ticket;
4404 if (dd::acquire_shared_tablespace_mdl(thd, space->name, false, &mdl_ticket,
4405 false)) {
4406 ut_a(false);
4407 }
4408 shared_mdl_list.push_back(mdl_ticket);
4409 }
4410
4411 /* Let the startup thread proceed now */
4412 mysql_cond_signal(&resume_encryption_cond);
4413
4414 /* In following loop :
4415 - traverse every tablespace one by one and roll forward (un)encryption
4416 operation.
4417 - remove EXPLICIT MDL taken on tablespace explicitly */
4418 std::list<MDL_ticket *>::iterator mdl_it = shared_mdl_list.begin();
4419 for (auto it : ts_encrypt_ddl_records) {
4420 /* Get the space_id and then read page 0 to get (un)encryption progress */
4421 space_id_t space_id = it->get_space_id();
4422 fil_space_t *space = fil_space_get(space_id);
4423 if (space == nullptr) {
4424 ib::error(ER_IB_MSG_1277)
4425 << "Tablespace is missing for tablespace id" << space_id
4426 << ". Skipping (un)encryption resume operation.";
4427 continue;
4428 }
4429
4430 /* MDL list must not be empty */
4431 ut_ad(mdl_it != shared_mdl_list.end());
4432
4433 page_size_t pageSize(space->flags);
4434
4435 mtr_start(&mtr);
4436 buf_block_t *block =
4437 buf_page_get(page_id_t(space_id, 0), pageSize, RW_X_LATCH, &mtr);
4438
4439 page_t *page = buf_block_get_frame(block);
4440
4441 /* Get the offset of Encryption progress information */
4442 ulint offset = fsp_header_get_encryption_progress_offset(pageSize);
4443
4444 /* Read operation type (1 byte) */
4445 byte operation = mach_read_from_1(page + offset);
4446
4447 /* Read maximum pages (4 byte) */
4448 uint progress =
4449 mach_read_from_4(page + offset + Encryption::OPERATION_INFO_SIZE);
4450 mtr_commit(&mtr);
4451
4452 if (!(operation & Encryption::ENCRYPT_IN_PROGRESS) &&
4453 !(operation & Encryption::DECRYPT_IN_PROGRESS)) {
4454 /* There are two possibilities:
4455 1. Crash happened even before operation/progress
4456 was written to page 0. Nothing to do.
4457 2. Crash happened after (un)encryption was done and progress/operation
4458 was reset but before DD is updated.
4459 Update DD in that case. */
4460 ib::info(ER_IB_MSG_NO_ENCRYPT_PROGRESS_FOUND)
4461 << "No operation/progress found. Updating DD for tablespace "
4462 << space->name << ":" << space_id << ".";
4463 goto update_dd;
4464 }
4465
4466 ib::info(ER_IB_MSG_RESUME_OP_FOR_SPACE)
4467 << "Resuming " << operation_name[operation] << " for tablespace "
4468 << space->name << ":" << space_id << " from page " << progress + 1;
4469
4470 /* Resume (Un)Encryption operation next page onwards */
4471 err = fsp_alter_encrypt_tablespace(
4472 thd, space_id, progress + 1,
4473 (operation & Encryption::ENCRYPT_IN_PROGRESS) ? true : false, true,
4474 recv_dd_space);
4475
4476 if (err != DB_SUCCESS) {
4477 ib::error(ER_IB_MSG_1280)
4478 << operation_name[operation] << " for tablespace " << space->name
4479 << ":" << space_id << " could not be done successfully.";
4480 continue;
4481 }
4482
4483 update_dd:
4484 /* At this point, encryption/unencryption process would have been
4485 finished and all pages in tablespace should have been written
4486 correctly and flushed to disk. Now :
4487 - Set/Update tablespace flags encryption.
4488 - Remove In-mem encryption info from tablespace (If Unencrypted).
4489 - Reset operation in progress to NONE. */
4490 mtr_start(&mtr);
4491 block = buf_page_get(page_id_t(space_id, 0), pageSize, RW_X_LATCH, &mtr);
4492 page = buf_block_get_frame(block);
4493 uint32_t latest_fsp_flags = fsp_header_get_flags(page);
4494 if (FSP_FLAGS_GET_ENCRYPTION(latest_fsp_flags)) {
4495 fsp_flags_set_encryption(space->flags);
4496 } else {
4497 fsp_flags_unset_encryption(space->flags);
4498 }
4499 ut_ad(space->flags == latest_fsp_flags);
4500 mtr_commit(&mtr);
4501
4502 if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4503 /* Reset In-mem encryption for tablespace */
4504 err = fil_reset_encryption(space_id);
4505 ut_ad(err == DB_SUCCESS);
4506 }
4507
4508 space->encryption_op_in_progress = NONE;
4509
4510 /* In case of crash/recovery, following has to be set explicitly
4511 - DD tablespace flags.
4512 - DD encryption option value. */
4513 while (acquire_shared_backup_lock(thd, thd->variables.lock_wait_timeout)) {
4514 os_thread_sleep(20);
4515 }
4516
4517 while (dd::acquire_exclusive_tablespace_mdl(thd, space->name, false)) {
4518 os_thread_sleep(20);
4519 }
4520
4521 while (client->acquire_for_modification<dd::Tablespace>(space->name,
4522 &recv_dd_space)) {
4523 os_thread_sleep(20);
4524 }
4525
4526 if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
4527 /* Update DD Option value, for Unencryption */
4528 recv_dd_space->options().set("encryption", "N");
4529
4530 } else {
4531 /* Update DD Option value, for Encryption */
4532 recv_dd_space->options().set("encryption", "Y");
4533 }
4534
4535 /* Update DD flags for tablespace */
4536 recv_dd_space->se_private_data().set(dd_space_key_strings[DD_SPACE_FLAGS],
4537 static_cast<uint32>(space->flags));
4538
4539 /* Validate tablespace In-mem representation */
4540 ut_d(validate_tablespace_encryption(space));
4541
4542 /* Pass 'true' for 'release_mdl_on_commit' parameter because we want
4543 transactional locks to be released only in case of successful commit */
4544 while (dd::commit_or_rollback_tablespace_change(thd, recv_dd_space, false,
4545 true)) {
4546 os_thread_sleep(20);
4547 }
4548
4549 ib::info(ER_IB_MSG_1281)
4550 << "Finished " << operation_name[operation] << " for tablespace "
4551 << space->name << ":" << space_id << ".";
4552
4553 /* Release MDL on tablespace explicitly */
4554 dd_release_mdl((*mdl_it));
4555 mdl_it = shared_mdl_list.erase(mdl_it);
4556 }
4557
4558 DBUG_EXECUTE_IF("DDL_Log_remove_inject_startup_error_1",
4559 srv_inject_too_many_concurrent_trxs = true;);
4560
4561 /* Delete DDL logs now */
4562 err = log_ddl->post_ts_encryption(ts_encrypt_ddl_records);
4563
4564 /* Abort post recovery startup if this is not successful since
4565 it would leave the DDL Log in an indeterminate state. */
4566 if (err != DB_SUCCESS) {
4567 ib::fatal(ER_IB_MSG_POST_RECOVER_POST_TS_ENCRYPT);
4568 }
4569
4570 ts_encrypt_ddl_records.clear();
4571 /* All MDLs should have been released and removed from list by now */
4572 ut_ad(shared_mdl_list.empty());
4573 shared_mdl_list.clear();
4574 }
4575
4576 /* Initiate roll-forward of alter encrypt in background thread */
fsp_init_resume_alter_encrypt_tablespace()4577 void fsp_init_resume_alter_encrypt_tablespace() {
4578 #ifdef UNIV_PFS_THREAD
4579 THD *thd =
4580 create_thd(false, true, true, srv_ts_alter_encrypt_thread_key.m_value);
4581 #else
4582 THD *thd = create_thd(false, true, true, 0);
4583 #endif
4584
4585 resume_alter_encrypt_tablespace(thd);
4586
4587 destroy_thd(thd);
4588 }
4589
write_not_full_n_used(uint32_t n_used)4590 void File_segment_inode::write_not_full_n_used(uint32_t n_used) {
4591 #ifdef UNIV_DEBUG
4592 ut_ad(m_mtr != nullptr);
4593 ut_ad(mtr_memo_contains_page_flagged(
4594 m_mtr, m_fseg_inode, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX));
4595
4596 if (n_used > 0) {
4597 uint32_t old_value = read_not_full_n_used();
4598 if (n_used > old_value) {
4599 uint32_t incr = n_used - old_value;
4600 ut_ad(incr == 1 || incr == XDES_FRAG_N_USED ||
4601 incr == (FSP_EXTENT_SIZE - 1));
4602 } else {
4603 uint32_t decr = old_value - n_used;
4604 ut_ad(decr == FSP_EXTENT_SIZE || decr == 1 ||
4605 decr == (XDES_FRAG_N_USED + 1) ||
4606 (n_used == calculate_not_full_n_used()));
4607 }
4608 }
4609 #endif /* UNIV_DEBUG */
4610
4611 mlog_write_ulint(m_fseg_inode + FSEG_NOT_FULL_N_USED, n_used, MLOG_4BYTES,
4612 m_mtr);
4613
4614 ut_ad(n_used == 0 || verify_not_full_n_used());
4615 }
4616
4617 #ifdef UNIV_DEBUG
verify_not_full_n_used()4618 bool File_segment_inode::verify_not_full_n_used() {
4619 if (!do_verify()) {
4620 return (true);
4621 }
4622 uint32_t not_full_n_used_1 = read_not_full_n_used();
4623 uint32_t not_full_n_used_2 = calculate_not_full_n_used();
4624 ut_ad(not_full_n_used_1 == not_full_n_used_2);
4625 return (not_full_n_used_1 == not_full_n_used_2);
4626 }
4627
calculate_not_full_n_used()4628 page_no_t File_segment_inode::calculate_not_full_n_used() {
4629 page_no_t n_used = 0;
4630 xdes_t *descr;
4631 fil_addr_t xdes_addr = flst_get_first(m_fseg_inode + FSEG_NOT_FULL, m_mtr);
4632
4633 while (!xdes_addr.is_null()) {
4634 descr = xdes_lst_get_descriptor(m_space_id, m_page_size, xdes_addr, m_mtr);
4635 n_used += xdes_get_n_used(descr, m_mtr);
4636 xdes_addr = flst_get_next_addr(descr + XDES_FLST_NODE, m_mtr);
4637 }
4638
4639 return (n_used);
4640 }
4641 #endif /* UNIV_DEBUG */
4642
read_not_full_n_used() const4643 uint32_t File_segment_inode::read_not_full_n_used() const {
4644 uint32_t n_used =
4645 mtr_read_ulint(m_fseg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, m_mtr);
4646 return (n_used);
4647 }
4648
print(std::ostream & out) const4649 std::ostream &File_segment_inode::print(std::ostream &out) const {
4650 out << "[File_segment_inode: FSEG_ID=" << get_seg_id()
4651 << ", FSEG_NOT_FULL_N_USED=" << read_not_full_n_used() << "]";
4652
4653 return (out);
4654 }
4655
4656 #endif /* !UNIV_HOTBACKUP */
4657