1 // Copyright (c) 2014, Google Inc.
2 // Copyright (c) 2017, 2021, MariaDB Corporation.
3 
4 /**************************************************//**
5 @file btr/btr0scrub.cc
6 Scrubbing of btree pages
7 
8 *******************************************************/
9 
10 #include "btr0btr.h"
11 #include "btr0cur.h"
12 #include "btr0scrub.h"
13 #include "ibuf0ibuf.h"
14 #include "fsp0fsp.h"
15 #include "dict0dict.h"
16 #include "mtr0mtr.h"
17 
18 /* used when trying to acquire dict-lock */
19 UNIV_INTERN bool fil_crypt_is_closing(ulint space);
20 
21 /**
22 * scrub data at delete time (e.g purge thread)
23 */
24 my_bool srv_immediate_scrub_data_uncompressed = false;
25 
26 /**
27 * background scrub uncompressed data
28 *
29 * if srv_immediate_scrub_data_uncompressed is enabled
30 * this is only needed to handle "old" data
31 */
32 my_bool srv_background_scrub_data_uncompressed = false;
33 
34 /**
35 * backgrounds scrub compressed data
36 *
37 * reorganize compressed page for scrubbing
38 * (only way to scrub compressed data)
39 */
40 my_bool srv_background_scrub_data_compressed = false;
41 
42 /* check spaces once per hour */
43 UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60);
44 
45 /* default to scrub spaces that hasn't been scrubbed in a week */
46 UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60);
47 
48 /**
49 * statistics for scrubbing by background threads
50 */
51 static btr_scrub_stat_t scrub_stat;
52 static ib_mutex_t scrub_stat_mutex;
53 #ifdef UNIV_PFS_MUTEX
54 UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key;
55 #endif
56 
57 #ifdef UNIV_DEBUG
58 /**
59 * srv_scrub_force_testing
60 *
61 * - force scrubbing using background threads even for uncompressed tables
62 * - force pessimistic scrubbing (page split) even if not needed
63 *   (see test_pessimistic_scrub_pct)
64 */
65 my_bool srv_scrub_force_testing = true;
66 
67 /**
68 * Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only)
69 */
70 static int test_pessimistic_scrub_pct = 50;
71 
72 #endif
73 static uint scrub_compression_level = page_zip_level;
74 
75 /**************************************************************//**
76 Log a scrubbing failure */
77 static
78 void
log_scrub_failure(dict_index_t * index,btr_scrub_t * scrub_data,buf_block_t * block,dberr_t err)79 log_scrub_failure(
80 /*===============*/
81 	dict_index_t* index,     /*!< in: index */
82 	btr_scrub_t* scrub_data, /*!< in: data to store statistics on */
83 	buf_block_t* block,	 /*!< in: block */
84 	dberr_t err)             /*!< in: error */
85 {
86 	const char* reason = "unknown";
87 	switch(err) {
88 	case DB_UNDERFLOW:
89 		reason = "too few records on page";
90 		scrub_data->scrub_stat.page_split_failures_underflow++;
91 		break;
92 	case DB_INDEX_CORRUPT:
93 		reason = "unable to find index!";
94 		scrub_data->scrub_stat.page_split_failures_missing_index++;
95 		break;
96 	case DB_OUT_OF_FILE_SPACE:
97 		reason = "out of filespace";
98 		scrub_data->scrub_stat.page_split_failures_out_of_filespace++;
99 		break;
100 	default:
101 		ut_ad(0);
102 		reason = "unknown";
103 		scrub_data->scrub_stat.page_split_failures_unknown++;
104 	}
105 
106 	ib::warn() << "Failed to scrub index " << index->name
107 		   << " of table " << index->table->name
108 		   << " page " << block->page.id << ": " << reason;
109 }
110 
111 /****************************************************************
112 Lock dict mutexes */
113 static
114 bool
btr_scrub_lock_dict_func(ulint space_id,bool lock_to_close_table,const char * file,uint line)115 btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table,
116 			 const char * file, uint line)
117 {
118 	time_t start = time(0);
119 	time_t last = start;
120 
121 	/* FIXME: this is not the proper way of doing things. The
122 	dict_sys.mutex should not be held by any thread for longer
123 	than a few microseconds. It must not be held during I/O,
124 	for example. So, what is the purpose for this busy-waiting?
125 	This function should be rewritten as part of MDEV-8139:
126 	Fix scrubbing tests. */
127 
128 	while (mutex_enter_nowait(&dict_sys.mutex)) {
129 		/* if we lock to close a table, we wait forever
130 		* if we don't lock to close a table, we check if space
131 		* is closing, and then instead give up
132 		*/
133 		if (lock_to_close_table) {
134 		} else if (fil_space_t* space = fil_space_acquire(space_id)) {
135 			bool stopping = space->is_stopping();
136 			space->release();
137 			if (stopping) {
138 				return false;
139 			}
140 		} else {
141 			return false;
142 		}
143 
144 		os_thread_sleep(250000);
145 
146 		time_t now = time(0);
147 
148 		if (now >= last + 30) {
149 			fprintf(stderr,
150 				"WARNING: %s:%u waited %ld seconds for"
151 				" dict_sys lock, space: " ULINTPF
152 				" lock_to_close_table: %d\n",
153 				file, line, long(now - start), space_id,
154 				lock_to_close_table);
155 
156 			last = now;
157 		}
158 	}
159 
160 	ut_ad(mutex_own(&dict_sys.mutex));
161 	return true;
162 }
163 
164 #define btr_scrub_lock_dict(space, lock_to_close_table)			\
165 	btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__)
166 
167 /****************************************************************
168 Unlock dict mutexes */
169 static
170 void
btr_scrub_unlock_dict()171 btr_scrub_unlock_dict()
172 {
173 	dict_mutex_exit_for_mysql();
174 }
175 
176 /****************************************************************
177 Release reference to table
178 */
179 static
180 void
btr_scrub_table_close(dict_table_t * table)181 btr_scrub_table_close(
182 /*==================*/
183 	dict_table_t* table)  /*!< in: table */
184 {
185 	bool dict_locked = true;
186 	bool try_drop = false;
187 	table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS;
188 	dict_table_close(table, dict_locked, try_drop);
189 }
190 
191 /****************************************************************
192 Release reference to table
193 */
194 static
195 void
btr_scrub_table_close_for_thread(btr_scrub_t * scrub_data)196 btr_scrub_table_close_for_thread(
197 	btr_scrub_t *scrub_data)
198 {
199 	if (scrub_data->current_table == NULL) {
200 		return;
201 	}
202 
203 	if (fil_space_t* space = fil_space_acquire(scrub_data->space)) {
204 		/* If tablespace is not marked as stopping perform
205 		the actual close. */
206 		if (!space->is_stopping()) {
207 			mutex_enter(&dict_sys.mutex);
208 			/* perform the actual closing */
209 			btr_scrub_table_close(scrub_data->current_table);
210 			mutex_exit(&dict_sys.mutex);
211 		}
212 		space->release();
213 	}
214 
215 	scrub_data->current_table = NULL;
216 	scrub_data->current_index = NULL;
217 }
218 
219 /**************************************************************//**
220 Check if scrubbing is turned ON or OFF */
221 static
222 bool
check_scrub_setting(btr_scrub_t * scrub_data)223 check_scrub_setting(
224 /*=====================*/
225 	btr_scrub_t*	scrub_data) /*!< in: scrub data  */
226 {
227 	if (scrub_data->compressed)
228 		return srv_background_scrub_data_compressed;
229 	else
230 		return srv_background_scrub_data_uncompressed;
231 }
232 
233 #define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID)
234 
235 /**************************************************************//**
236 Check if a page needs scrubbing */
237 UNIV_INTERN
238 int
btr_page_needs_scrubbing(btr_scrub_t * scrub_data,buf_block_t * block,btr_scrub_page_allocation_status_t allocated)239 btr_page_needs_scrubbing(
240 /*=====================*/
241 	btr_scrub_t*	scrub_data, /*!< in: scrub data  */
242 	buf_block_t*	block,	    /*!< in: block to check, latched */
243 	btr_scrub_page_allocation_status_t allocated)  /*!< in: is block known
244 						       to be allocated */
245 {
246 	/**
247 	* Check if scrubbing has been turned OFF.
248 	*
249 	* at start of space, we check if scrubbing is ON or OFF
250 	* here we only check if scrubbing is turned OFF.
251 	*
252 	* Motivation is that it's only valueable to have a full table (space)
253 	* scrubbed.
254 	*/
255 	if (!check_scrub_setting(scrub_data)) {
256 		bool before_value = scrub_data->scrubbing;
257 		scrub_data->scrubbing = false;
258 
259 		if (before_value == true) {
260 			/* we toggle scrubbing from on to off */
261 			return BTR_SCRUB_TURNED_OFF;
262 		}
263 	}
264 
265 	if (scrub_data->scrubbing == false) {
266 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
267 	}
268 
269 	const page_t*	page = buf_block_get_frame(block);
270 
271 	if (allocated == BTR_SCRUB_PAGE_ALLOCATED) {
272 		if (fil_page_get_type(page) != FIL_PAGE_INDEX) {
273 			/* this function is called from fil-crypt-threads.
274 			* these threads iterate all pages of all tablespaces
275 			* and don't know about fil_page_type.
276 			* But scrubbing is only needed for index-pages. */
277 
278 			/**
279 			* NOTE: scrubbing is also needed for UNDO pages,
280 			* but they are scrubbed at purge-time, since they are
281 			* uncompressed
282 			*/
283 
284 			/* if encountering page type not needing scrubbing
285 			release reference to table object */
286 			return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
287 		}
288 
289 		if (!page_has_garbage(page)) {
290 			/* no garbage (from deleted/shrunken records) */
291 			return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
292 		}
293 
294 	} else if (allocated == BTR_SCRUB_PAGE_FREE ||
295 		   allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) {
296 
297 		switch (fil_page_get_type(page)) {
298 		case FIL_PAGE_INDEX:
299 		case FIL_PAGE_TYPE_ZBLOB:
300 		case FIL_PAGE_TYPE_ZBLOB2:
301 			break;
302 		default:
303 			/**
304 			* If this is a dropped page, we also need to scrub
305 			* BLOB pages
306 			*/
307 
308 			/* if encountering page type not needing scrubbing
309 			release reference to table object */
310 			return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
311 		}
312 	}
313 
314 	if (block->page.id.space() == TRX_SYS_SPACE
315 	    && btr_page_get_index_id(page) == IBUF_INDEX_ID) {
316 		/* skip ibuf */
317 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
318 	}
319 
320 	return BTR_SCRUB_PAGE;
321 }
322 
323 /****************************************************************
324 Handle a skipped page
325 */
326 UNIV_INTERN
327 void
btr_scrub_skip_page(btr_scrub_t * scrub_data,int needs_scrubbing)328 btr_scrub_skip_page(
329 /*==================*/
330 	btr_scrub_t* scrub_data, /*!< in: data with scrub state */
331 	int needs_scrubbing)     /*!< in: return code from
332 				 btr_page_needs_scrubbing */
333 {
334 	switch(needs_scrubbing) {
335 	case BTR_SCRUB_SKIP_PAGE:
336 		/* nothing todo */
337 		return;
338 	case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE:
339 		btr_scrub_table_close_for_thread(scrub_data);
340 		return;
341 	case BTR_SCRUB_TURNED_OFF:
342 	case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE:
343 		btr_scrub_complete_space(scrub_data);
344 		return;
345 	}
346 
347 	/* unknown value. should not happen */
348 	ut_a(0);
349 }
350 
351 /****************************************************************
352 Try to scrub a page using btr_page_reorganize_low
353 return DB_SUCCESS on success or DB_OVERFLOW on failure */
354 static
355 dberr_t
btr_optimistic_scrub(btr_scrub_t * scrub_data,buf_block_t * block,dict_index_t * index,mtr_t * mtr)356 btr_optimistic_scrub(
357 /*==================*/
358 	btr_scrub_t* scrub_data, /*!< in: data with scrub state */
359 	buf_block_t* block,      /*!< in: block to scrub */
360 	dict_index_t* index,     /*!< in: index */
361 	mtr_t* mtr)              /*!< in: mtr */
362 {
363 #ifdef UNIV_DEBUG
364 	if (srv_scrub_force_testing &&
365 	    page_get_n_recs(buf_block_get_frame(block)) > 2 &&
366 	    (rand() % 100) < test_pessimistic_scrub_pct) {
367 
368 		log_scrub_failure(index, scrub_data, block, DB_OVERFLOW);
369 		return DB_OVERFLOW;
370 	}
371 #endif
372 
373 	page_cur_t cur;
374 	page_cur_set_before_first(block, &cur);
375 	bool recovery = false;
376 	if (!btr_page_reorganize_low(recovery, scrub_compression_level,
377 				     &cur, index, mtr)) {
378 		return DB_OVERFLOW;
379 	}
380 
381 	/* We play safe and reset the free bits */
382 	if (!dict_index_is_clust(index) &&
383 	    block != NULL) {
384 		buf_frame_t* frame = buf_block_get_frame(block);
385 		if (frame &&
386 		    page_is_leaf(frame)) {
387 
388 			ibuf_reset_free_bits(block);
389 		}
390 	}
391 
392 	scrub_data->scrub_stat.page_reorganizations++;
393 
394 	return DB_SUCCESS;
395 }
396 
397 /****************************************************************
398 Try to scrub a page by splitting it
399 return DB_SUCCESS on success
400 DB_UNDERFLOW if page has too few records
401 DB_OUT_OF_FILE_SPACE if we can't find space for split */
402 static
403 dberr_t
btr_pessimistic_scrub(btr_scrub_t * scrub_data,buf_block_t * block,dict_index_t * index,mtr_t * mtr)404 btr_pessimistic_scrub(
405 /*==================*/
406 	btr_scrub_t* scrub_data, /*!< in: data with scrub state */
407 	buf_block_t* block,      /*!< in: block to scrub */
408 	dict_index_t* index,     /*!< in: index */
409 	mtr_t* mtr)              /*!< in: mtr */
410 {
411 	page_t*	page = buf_block_get_frame(block);
412 
413 	if (page_get_n_recs(page) < 2) {
414 		/**
415 		* There is no way we can split a page with < 2 records
416 		*/
417 		log_scrub_failure(index, scrub_data, block, DB_UNDERFLOW);
418 		return DB_UNDERFLOW;
419 	}
420 
421 	/**
422 	* Splitting page needs new space, allocate it here
423 	* so that splitting won't fail due to this */
424 	ulint n_extents = 3;
425 	ulint n_reserved = 0;
426 	if (!fsp_reserve_free_extents(&n_reserved, index->table->space,
427 				      n_extents, FSP_NORMAL, mtr)) {
428 		log_scrub_failure(index, scrub_data, block,
429 				  DB_OUT_OF_FILE_SPACE);
430 		return DB_OUT_OF_FILE_SPACE;
431 	}
432 
433 	/* read block variables */
434 	const uint32_t page_no =  mach_read_from_4(page + FIL_PAGE_OFFSET);
435 	const uint32_t left_page_no = btr_page_get_prev(page);
436 	const uint32_t right_page_no = btr_page_get_next(page);
437 	const ulint zip_size = index->table->space->zip_size();
438 
439 	/**
440 	* When splitting page, we need X-latches on left/right brothers
441 	* see e.g btr_cur_latch_leaves
442 	*/
443 
444 	if (left_page_no != FIL_NULL) {
445 		/**
446 		* pages needs to be locked left-to-right, release block
447 		* and re-lock. We still have x-lock on index
448 		* so this should be safe
449 		*/
450 		mtr->release_block_at_savepoint(scrub_data->savepoint, block);
451 
452 		btr_block_get(
453 			page_id_t(index->table->space_id, left_page_no),
454 			zip_size, RW_X_LATCH, index, mtr);
455 
456 		/**
457 		* Refetch block and re-initialize page
458 		*/
459 		block = btr_block_get(
460 			page_id_t(index->table->space_id, page_no),
461 			zip_size, RW_X_LATCH, index, mtr);
462 
463 		page = buf_block_get_frame(block);
464 
465 		/**
466 		* structure should be unchanged
467 		*/
468 		ut_a(left_page_no == btr_page_get_prev(page));
469 		ut_a(right_page_no == btr_page_get_next(page));
470 	}
471 
472 	if (right_page_no != FIL_NULL) {
473 		btr_block_get(
474 			page_id_t(index->table->space_id, right_page_no),
475 			zip_size, RW_X_LATCH, index, mtr);
476 	}
477 
478 	/* arguments to btr_page_split_and_insert */
479 	mem_heap_t* heap = NULL;
480 	dtuple_t* entry = NULL;
481 	rec_offs* offsets = NULL;
482 	ulint n_ext = 0;
483 	ulint flags = BTR_MODIFY_TREE;
484 
485 	/**
486 	* position a cursor on first record on page
487 	*/
488 	rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
489 	btr_cur_t cursor;
490 	btr_cur_position(index, rec, block, &cursor);
491 
492 	/**
493 	* call split page with NULL as argument for entry to insert
494 	*/
495 	if (dict_index_get_page(index) == page_no) {
496 		/* The page is the root page
497 		* NOTE: ibuf_reset_free_bits is called inside
498 		* btr_root_raise_and_insert */
499 		rec = btr_root_raise_and_insert(
500 			flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
501 	} else {
502 		/* We play safe and reset the free bits
503 		* NOTE: need to call this prior to btr_page_split_and_insert */
504 		if (!dict_index_is_clust(index) &&
505 		    block != NULL) {
506 			buf_frame_t* frame = buf_block_get_frame(block);
507 			if (frame &&
508 			    page_is_leaf(frame)) {
509 
510 				ibuf_reset_free_bits(block);
511 			}
512 		}
513 
514 		rec = btr_page_split_and_insert(
515 			flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
516 	}
517 
518 	if (heap) {
519 		mem_heap_free(heap);
520 	}
521 
522 	index->table->space->release_free_extents(n_reserved);
523 	scrub_data->scrub_stat.page_splits++;
524 	return DB_SUCCESS;
525 }
526 
527 /****************************************************************
528 Location index by id for a table
529 return index or NULL */
530 static
531 dict_index_t*
find_index(dict_table_t * table,index_id_t index_id)532 find_index(
533 /*========*/
534 	dict_table_t* table, /*!< in: table */
535 	index_id_t index_id) /*!< in: index id */
536 {
537 	if (table != NULL) {
538 		dict_index_t* index = dict_table_get_first_index(table);
539 		while (index != NULL) {
540 			if (index->id == index_id)
541 				return index;
542 			index = dict_table_get_next_index(index);
543 		}
544 	}
545 
546 	return NULL;
547 }
548 
549 /****************************************************************
550 Check if table should be scrubbed
551 */
552 static
553 bool
btr_scrub_table_needs_scrubbing(dict_table_t * table)554 btr_scrub_table_needs_scrubbing(
555 /*============================*/
556 	dict_table_t* table) /*!< in: table */
557 {
558 	if (table == NULL)
559 		return false;
560 
561 	if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) {
562 		return false;
563 	}
564 
565 	if (table->to_be_dropped) {
566 		return false;
567 	}
568 
569 	if (!table->is_readable()) {
570 		return false;
571 	}
572 
573 	return true;
574 }
575 
576 /****************************************************************
577 Check if index should be scrubbed
578 */
579 static
580 bool
btr_scrub_index_needs_scrubbing(dict_index_t * index)581 btr_scrub_index_needs_scrubbing(
582 /*============================*/
583 	dict_index_t* index) /*!< in: index */
584 {
585 	if (index == NULL)
586 		return false;
587 
588 	if (dict_index_is_ibuf(index)) {
589 		return false;
590 	}
591 
592 	if (dict_index_is_online_ddl(index)) {
593 		return false;
594 	}
595 
596 	return true;
597 }
598 
599 /****************************************************************
600 Get table and index and store it on scrub_data
601 */
602 static
603 void
btr_scrub_get_table_and_index(btr_scrub_t * scrub_data,index_id_t index_id)604 btr_scrub_get_table_and_index(
605 /*=========================*/
606 	btr_scrub_t* scrub_data, /*!< in/out: scrub data */
607 	index_id_t index_id)     /*!< in: index id */
608 {
609 	/* first check if it's an index to current table */
610 	scrub_data->current_index = find_index(scrub_data->current_table,
611 					       index_id);
612 
613 	if (scrub_data->current_index != NULL) {
614 		/* yes it was */
615 		return;
616 	}
617 
618 	if (!btr_scrub_lock_dict(scrub_data->space, false)) {
619 		btr_scrub_complete_space(scrub_data);
620 		return;
621 	}
622 
623 	/* close current table (if any) */
624 	if (scrub_data->current_table != NULL) {
625 		btr_scrub_table_close(scrub_data->current_table);
626 		scrub_data->current_table = NULL;
627 	}
628 
629 	/* open table based on index_id */
630 	dict_table_t* table = dict_table_open_on_index_id(index_id);
631 
632 	if (table != NULL) {
633 		/* mark table as being scrubbed */
634 		table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS;
635 
636 		if (!btr_scrub_table_needs_scrubbing(table)) {
637 			btr_scrub_table_close(table);
638 			btr_scrub_unlock_dict();
639 			return;
640 		}
641 	}
642 
643 	btr_scrub_unlock_dict();
644 	scrub_data->current_table = table;
645 	scrub_data->current_index = find_index(table, index_id);
646 }
647 
648 /****************************************************************
649 Handle free page */
650 UNIV_INTERN
651 int
btr_scrub_free_page(btr_scrub_t * scrub_data,buf_block_t * block,mtr_t * mtr)652 btr_scrub_free_page(
653 /*====================*/
654 	btr_scrub_t* scrub_data,  /*!< in/out: scrub data */
655 	buf_block_t* block,       /*!< in: block to scrub */
656 	mtr_t* mtr)               /*!< in: mtr */
657 {
658 	// TODO(jonaso): scrub only what is actually needed
659 
660 	{
661 		/* note: perform both the memset and setting of FIL_PAGE_TYPE
662 		* wo/ logging. so that if we crash before page is flushed
663 		* it will be found by scrubbing thread again
664 		*/
665 		memset(buf_block_get_frame(block) + PAGE_HEADER, 0,
666 		       srv_page_size - PAGE_HEADER);
667 
668 		mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE,
669 				FIL_PAGE_TYPE_ALLOCATED);
670 	}
671 
672 	page_create(block, mtr,
673 		    dict_table_is_comp(scrub_data->current_table),
674 		    dict_index_is_spatial(scrub_data->current_index));
675 
676 	mtr_commit(mtr);
677 
678 	/* page doesn't need further processing => SKIP
679 	* and close table/index so that we don't keep references too long */
680 	return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
681 }
682 
683 /****************************************************************
684 Recheck if a page needs scrubbing, and if it does load appropriate
685 table and index */
686 UNIV_INTERN
687 int
btr_scrub_recheck_page(btr_scrub_t * scrub_data,buf_block_t * block,btr_scrub_page_allocation_status_t allocated,mtr_t * mtr)688 btr_scrub_recheck_page(
689 /*====================*/
690 	btr_scrub_t* scrub_data,  /*!< inut: scrub data */
691 	buf_block_t* block,       /*!< in: block */
692 	btr_scrub_page_allocation_status_t allocated, /*!< in: is block
693 						      allocated or free */
694 	mtr_t* mtr)               /*!< in: mtr */
695 {
696 	/* recheck if page needs scrubbing (knowing allocation status) */
697 	int needs_scrubbing = btr_page_needs_scrubbing(
698 		scrub_data, block, allocated);
699 
700 	if (needs_scrubbing != BTR_SCRUB_PAGE) {
701 		mtr_commit(mtr);
702 		return needs_scrubbing;
703 	}
704 
705 	if (allocated == BTR_SCRUB_PAGE_FREE) {
706 		/** we don't need to load table/index for free pages
707 		* so scrub directly here */
708 		/* mtr is committed inside btr_scrub_page_free */
709 		return btr_scrub_free_page(scrub_data,
710 					   block,
711 					   mtr);
712 	}
713 
714 	page_t*	page = buf_block_get_frame(block);
715 	index_id_t index_id = btr_page_get_index_id(page);
716 
717 	if (scrub_data->current_index == NULL ||
718 	    scrub_data->current_index->id != index_id) {
719 
720 		/**
721 		* commit mtr (i.e release locks on block)
722 		* and try to get table&index potentially loading it
723 		* from disk
724 		*/
725 		mtr_commit(mtr);
726 		btr_scrub_get_table_and_index(scrub_data, index_id);
727 	} else {
728 		/* we already have correct index
729 		* commit mtr so that we can lock index before fetching page
730 		*/
731 		mtr_commit(mtr);
732 	}
733 
734 	/* check if table is about to be dropped */
735 	if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) {
736 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
737 	}
738 
739 	/* check if index is scrubbable */
740 	if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) {
741 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
742 	}
743 
744 	mtr_start(mtr);
745 	mtr_x_lock_index(scrub_data->current_index, mtr);
746 	/** set savepoint for X-latch of block */
747 	scrub_data->savepoint = mtr_set_savepoint(mtr);
748 	return BTR_SCRUB_PAGE;
749 }
750 
751 /****************************************************************
752 Perform actual scrubbing of page */
753 UNIV_INTERN
754 int
btr_scrub_page(btr_scrub_t * scrub_data,buf_block_t * block,btr_scrub_page_allocation_status_t allocated,mtr_t * mtr)755 btr_scrub_page(
756 /*============*/
757 	btr_scrub_t* scrub_data,  /*!< in/out: scrub data */
758 	buf_block_t* block,       /*!< in: block */
759 	btr_scrub_page_allocation_status_t allocated, /*!< in: is block
760 						      allocated or free */
761 	mtr_t* mtr)               /*!< in: mtr */
762 {
763 	/* recheck if page needs scrubbing (knowing allocation status) */
764 	int needs_scrubbing = BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
765 
766 	if (block) {
767 		btr_page_needs_scrubbing(scrub_data, block, allocated);
768 	}
769 
770 	if (!block || needs_scrubbing != BTR_SCRUB_PAGE) {
771 		mtr_commit(mtr);
772 		return needs_scrubbing;
773 	}
774 
775 	if (allocated == BTR_SCRUB_PAGE_FREE) {
776 		/* mtr is committed inside btr_scrub_page_free */
777 		return btr_scrub_free_page(scrub_data,
778 					   block,
779 					   mtr);
780 	}
781 
782 	/* check that table/index still match now that they are loaded */
783 
784 	if (!scrub_data->current_table->space
785 	    || scrub_data->current_table->space_id != scrub_data->space) {
786 		/* this is truncate table */
787 		mtr_commit(mtr);
788 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
789 	}
790 
791 	if (scrub_data->current_index->table != scrub_data->current_table) {
792 		/* this is truncate table */
793 		mtr_commit(mtr);
794 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
795 	}
796 
797 	if (scrub_data->current_index->page == FIL_NULL) {
798 		/* this is truncate table */
799 		mtr_commit(mtr);
800 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
801 	}
802 
803 	buf_frame_t* frame = buf_block_get_frame(block);
804 
805 	if (!frame || btr_page_get_index_id(frame) !=
806 	    scrub_data->current_index->id) {
807 		/* page has been reallocated to new index */
808 		mtr_commit(mtr);
809 		return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
810 	}
811 
812 	/* check if I can scrub (reorganize) page wo/ overflow */
813 	if (btr_optimistic_scrub(scrub_data,
814 				 block,
815 				 scrub_data->current_index,
816 				 mtr) != DB_SUCCESS) {
817 
818 		/**
819 		* Can't reorganize page...need to split it
820 		*/
821 		btr_pessimistic_scrub(scrub_data,
822 				      block,
823 				      scrub_data->current_index,
824 				      mtr);
825 	}
826 	mtr_commit(mtr);
827 
828 	return BTR_SCRUB_SKIP_PAGE; // no further action needed
829 }
830 
831 /**************************************************************//**
832 Start iterating a space */
btr_scrub_start_space(const fil_space_t & space,btr_scrub_t * scrub_data)833 bool btr_scrub_start_space(const fil_space_t &space, btr_scrub_t *scrub_data)
834 {
835 	scrub_data->space = space.id;
836 	scrub_data->current_table = NULL;
837 	scrub_data->current_index = NULL;
838 	scrub_data->compressed = FSP_FLAGS_GET_ZIP_SSIZE(space.flags) != 0;
839 	scrub_data->scrubbing = check_scrub_setting(scrub_data);
840 	return scrub_data->scrubbing;
841 }
842 
843 /***********************************************************************
844 Update global statistics with thread statistics */
845 static
846 void
btr_scrub_update_total_stat(btr_scrub_t * scrub_data)847 btr_scrub_update_total_stat(btr_scrub_t *scrub_data)
848 {
849 	mutex_enter(&scrub_stat_mutex);
850 	scrub_stat.page_reorganizations +=
851 		scrub_data->scrub_stat.page_reorganizations;
852 	scrub_stat.page_splits +=
853 		scrub_data->scrub_stat.page_splits;
854 	scrub_stat.page_split_failures_underflow +=
855 		scrub_data->scrub_stat.page_split_failures_underflow;
856 	scrub_stat.page_split_failures_out_of_filespace +=
857 		scrub_data->scrub_stat.page_split_failures_out_of_filespace;
858 	scrub_stat.page_split_failures_missing_index +=
859 		scrub_data->scrub_stat.page_split_failures_missing_index;
860 	scrub_stat.page_split_failures_unknown +=
861 		scrub_data->scrub_stat.page_split_failures_unknown;
862 	mutex_exit(&scrub_stat_mutex);
863 
864 	// clear stat
865 	memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat));
866 }
867 
868 /** Complete iterating a space.
869 @param[in,out]	scrub_data	 scrub data */
870 UNIV_INTERN
871 void
btr_scrub_complete_space(btr_scrub_t * scrub_data)872 btr_scrub_complete_space(btr_scrub_t* scrub_data)
873 {
874 	ut_ad(scrub_data->scrubbing);
875 	btr_scrub_table_close_for_thread(scrub_data);
876 	btr_scrub_update_total_stat(scrub_data);
877 }
878 
879 /*********************************************************************
880 Return scrub statistics */
881 void
btr_scrub_total_stat(btr_scrub_stat_t * stat)882 btr_scrub_total_stat(btr_scrub_stat_t *stat)
883 {
884 	mutex_enter(&scrub_stat_mutex);
885 	*stat = scrub_stat;
886 	mutex_exit(&scrub_stat_mutex);
887 }
888 
889 /*********************************************************************
890 Init global variables */
891 UNIV_INTERN
892 void
btr_scrub_init()893 btr_scrub_init()
894 {
895 	mutex_create(LATCH_ID_SCRUB_STAT_MUTEX, &scrub_stat_mutex);
896 
897 	memset(&scrub_stat, 0, sizeof(scrub_stat));
898 }
899 
900 /*********************************************************************
901 Cleanup globals */
902 UNIV_INTERN
903 void
btr_scrub_cleanup()904 btr_scrub_cleanup()
905 {
906 	mutex_free(&scrub_stat_mutex);
907 }
908 
909