1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0purge.cc
29 Purge obsolete records
30 
31 Created 3/14/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include <debug_sync.h>
35 
36 #include "row0purge.h"
37 
38 #ifdef UNIV_NONINL
39 #include "row0purge.ic"
40 #endif
41 
42 #include "fsp0fsp.h"
43 #include "mach0data.h"
44 #include "trx0rseg.h"
45 #include "trx0trx.h"
46 #include "trx0roll.h"
47 #include "trx0undo.h"
48 #include "trx0purge.h"
49 #include "trx0rec.h"
50 #include "que0que.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "row0vers.h"
54 #include "row0mysql.h"
55 #include "row0log.h"
56 #include "log0log.h"
57 #include "srv0mon.h"
58 #include "srv0start.h"
59 #include "handler.h"
60 #include "ha_innodb.h"
61 #include "fil0fil.h"
62 
63 /*************************************************************************
64 IMPORTANT NOTE: Any operation that generates redo MUST check that there
65 is enough space in the redo log before for that operation. This is
66 done by calling log_free_check(). The reason for checking the
67 availability of the redo log space before the start of the operation is
68 that we MUST not hold any synchonization objects when performing the
69 check.
70 If you make a change in this module make sure that no codepath is
71 introduced where a call to log_free_check() is bypassed. */
72 
73 /** Create a purge node to a query graph.
74 @param[in]	parent	parent node, i.e., a thr node
75 @param[in]	heap	memory heap where created
76 @return own: purge node */
77 purge_node_t*
row_purge_node_create(que_thr_t * parent,mem_heap_t * heap)78 row_purge_node_create(
79 	que_thr_t*	parent,
80 	mem_heap_t*	heap)
81 {
82 	ut_ad(parent != NULL);
83 	ut_ad(heap != NULL);
84 
85 	purge_node_t*	node;
86 
87 	node = static_cast<purge_node_t*>(
88 		mem_heap_zalloc(heap, sizeof(*node)));
89 
90 	node->common.type = QUE_NODE_PURGE;
91 	node->common.parent = parent;
92 	node->done = true;
93 
94 	node->heap = mem_heap_create(256);
95 
96 	node->recs = NULL;
97 
98 	return(node);
99 }
100 
101 /***********************************************************//**
102 Repositions the pcur in the purge node on the clustered index record,
103 if found. If the record is not found, close pcur.
104 @return TRUE if the record was found */
105 static
106 ibool
row_purge_reposition_pcur(ulint mode,purge_node_t * node,mtr_t * mtr)107 row_purge_reposition_pcur(
108 /*======================*/
109 	ulint		mode,	/*!< in: latching mode */
110 	purge_node_t*	node,	/*!< in: row purge node */
111 	mtr_t*		mtr)	/*!< in: mtr */
112 {
113 	if (node->found_clust) {
114 		ut_ad(node->validate_pcur());
115 
116 		node->found_clust =
117 		    btr_pcur_restore_position(mode, &node->pcur, mtr);
118 
119 	} else {
120 		node->found_clust = row_search_on_row_ref(
121 			&node->pcur, mode, node->table, node->ref, mtr);
122 
123 		if (node->found_clust) {
124 			btr_pcur_store_position(&node->pcur, mtr);
125 		}
126 	}
127 
128 	/* Close the current cursor if we fail to position it correctly. */
129 	if (!node->found_clust) {
130 		btr_pcur_close(&node->pcur);
131 	}
132 
133 	return(node->found_clust);
134 }
135 
136 /***********************************************************//**
137 Removes a delete marked clustered index record if possible.
138 @retval true if the row was not found, or it was successfully removed
139 @retval false if the row was modified after the delete marking */
140 static MY_ATTRIBUTE((nonnull, warn_unused_result))
141 bool
row_purge_remove_clust_if_poss_low(purge_node_t * node,ulint mode)142 row_purge_remove_clust_if_poss_low(
143 /*===============================*/
144 	purge_node_t*	node,	/*!< in/out: row purge node */
145 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
146 {
147 	dict_index_t*		index;
148 	bool			success		= true;
149 	mtr_t			mtr;
150 	rec_t*			rec;
151 	mem_heap_t*		heap		= NULL;
152 	ulint*			offsets;
153 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
154 	rec_offs_init(offsets_);
155 
156 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
157 
158 	index = dict_table_get_first_index(node->table);
159 
160 	log_free_check();
161 	mtr_start(&mtr);
162 	mtr.set_named_space(index->space);
163 
164 	if (!row_purge_reposition_pcur(mode, node, &mtr)) {
165 		/* The record was already removed. */
166 		goto func_exit;
167 	}
168 
169 	rec = btr_pcur_get_rec(&node->pcur);
170 
171 	offsets = rec_get_offsets(
172 		rec, index, offsets_, ULINT_UNDEFINED, &heap);
173 
174 	if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
175 		/* Someone else has modified the record later: do not remove */
176 		goto func_exit;
177 	}
178 
179 	ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
180 
181 	if (mode == BTR_MODIFY_LEAF) {
182 		success = btr_cur_optimistic_delete(
183 			btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
184 	} else {
185 		dberr_t	err;
186 		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
187 
188 		DBUG_EXECUTE_IF("pessimistic_row_purge_clust", {
189 			const char act[] =
190 				"now SIGNAL pessimistic_row_purge_clust_pause "
191 				"WAIT_FOR pessimistic_row_purge_clust_continue";
192 			assert(opt_debug_sync_timeout > 0);
193 			assert(!debug_sync_set_action(
194 				       current_thd, STRING_WITH_LEN(act)));
195 		});
196 
197 		btr_cur_pessimistic_delete(
198 			&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
199 			false, &mtr);
200 
201 		switch (err) {
202 		case DB_SUCCESS:
203 			break;
204 		case DB_OUT_OF_FILE_SPACE:
205 			success = false;
206 			break;
207 		default:
208 			ut_error;
209 		}
210 	}
211 
212 func_exit:
213 	if (heap) {
214 		mem_heap_free(heap);
215 	}
216 
217 	/* Persistent cursor is closed if reposition fails. */
218 	if (node->found_clust) {
219 		btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
220 	} else {
221 		mtr_commit(&mtr);
222 	}
223 
224 	return(success);
225 }
226 
227 /***********************************************************//**
228 Removes a clustered index record if it has not been modified after the delete
229 marking.
230 @retval true if the row was not found, or it was successfully removed
231 @retval false the purge needs to be suspended because of running out
232 of file space. */
233 static MY_ATTRIBUTE((nonnull, warn_unused_result))
234 bool
row_purge_remove_clust_if_poss(purge_node_t * node)235 row_purge_remove_clust_if_poss(
236 /*===========================*/
237 	purge_node_t*	node)	/*!< in/out: row purge node */
238 {
239 	if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
240 		return(true);
241 	}
242 
243 	for (ulint n_tries = 0;
244 	     n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
245 	     n_tries++) {
246 		if (row_purge_remove_clust_if_poss_low(
247 			    node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
248 			return(true);
249 		}
250 
251 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
252 	}
253 
254 	return(false);
255 }
256 
257 /***********************************************************//**
258 Determines if it is possible to remove a secondary index entry.
259 Removal is possible if the secondary index entry does not refer to any
260 not delete marked version of a clustered index record where DB_TRX_ID
261 is newer than the purge view.
262 
263 NOTE: This function should only be called by the purge thread, only
264 while holding a latch on the leaf page of the secondary index entry
265 (or keeping the buffer pool watch on the page).  It is possible that
266 this function first returns true and then false, if a user transaction
267 inserts a record that the secondary index entry would refer to.
268 However, in that case, the user transaction would also re-insert the
269 secondary index entry after purge has removed it and released the leaf
270 page latch.
271 @return true if the secondary index record can be purged */
272 bool
row_purge_poss_sec(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)273 row_purge_poss_sec(
274 /*===============*/
275 	purge_node_t*	node,	/*!< in/out: row purge node */
276 	dict_index_t*	index,	/*!< in: secondary index */
277 	const dtuple_t*	entry)	/*!< in: secondary index entry */
278 {
279 	bool	can_delete;
280 	mtr_t	mtr;
281 	row_prebuilt_t* prebuilt =
282 		static_cast<que_thr_t*>(node->common.parent)->prebuilt;
283 
284 	ut_ad(!dict_index_is_clust(index));
285 	mtr_start(&mtr);
286 
287 	can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
288 		|| !row_vers_old_has_index_entry(TRUE,
289 						 btr_pcur_get_rec(&node->pcur),
290 						 &mtr, index, entry,
291 						 node->roll_ptr, node->trx_id,
292 						 prebuilt);
293 
294 	/* Persistent cursor is closed if reposition fails. */
295 	if (node->found_clust) {
296 		btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
297 	} else {
298 		mtr_commit(&mtr);
299 	}
300 
301 	return(can_delete);
302 }
303 
304 /***************************************************************
305 Removes a secondary index entry if possible, by modifying the
306 index tree.  Does not try to buffer the delete.
307 @return TRUE if success or if not found */
308 static MY_ATTRIBUTE((nonnull, warn_unused_result))
309 ibool
row_purge_remove_sec_if_poss_tree(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)310 row_purge_remove_sec_if_poss_tree(
311 /*==============================*/
312 	purge_node_t*	node,	/*!< in: row purge node */
313 	dict_index_t*	index,	/*!< in: index */
314 	const dtuple_t*	entry)	/*!< in: index entry */
315 {
316 	btr_pcur_t		pcur;
317 	btr_cur_t*		btr_cur;
318 	ibool			success	= TRUE;
319 	dberr_t			err;
320 	mtr_t			mtr;
321 	enum row_search_result	search_result;
322 
323 	log_free_check();
324 	mtr_start(&mtr);
325 	mtr.set_named_space(index->space);
326 
327 	if (!index->is_committed()) {
328 		/* The index->online_status may change if the index is
329 		or was being created online, but not committed yet. It
330 		is protected by index->lock. */
331 		mtr_sx_lock(dict_index_get_lock(index), &mtr);
332 
333 		if (dict_index_is_online_ddl(index)) {
334 			/* Online secondary index creation will not
335 			copy any delete-marked records. Therefore
336 			there is nothing to be purged. We must also
337 			skip the purge when a completed index is
338 			dropped by rollback_inplace_alter_table(). */
339 			goto func_exit_no_pcur;
340 		}
341 	} else {
342 		/* For secondary indexes,
343 		index->online_status==ONLINE_INDEX_COMPLETE if
344 		index->is_committed(). */
345 		ut_ad(!dict_index_is_online_ddl(index));
346 	}
347 
348 	search_result = row_search_index_entry(
349 				index, entry,
350 				BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
351 				&pcur, &mtr);
352 
353 	switch (search_result) {
354 	case ROW_NOT_FOUND:
355 		/* Not found.  This is a legitimate condition.  In a
356 		rollback, InnoDB will remove secondary recs that would
357 		be purged anyway.  Then the actual purge will not find
358 		the secondary index record.  Also, the purge itself is
359 		eager: if it comes to consider a secondary index
360 		record, and notices it does not need to exist in the
361 		index, it will remove it.  Then if/when the purge
362 		comes to consider the secondary index record a second
363 		time, it will not exist any more in the index. */
364 
365 		/* fputs("PURGE:........sec entry not found\n", stderr); */
366 		/* dtuple_print(stderr, entry); */
367 		goto func_exit;
368 	case ROW_FOUND:
369 		break;
370 	case ROW_BUFFERED:
371 	case ROW_NOT_DELETED_REF:
372 		/* These are invalid outcomes, because the mode passed
373 		to row_search_index_entry() did not include any of the
374 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
375 		ut_error;
376 	}
377 
378 	btr_cur = btr_pcur_get_btr_cur(&pcur);
379 
380 	/* We should remove the index record if no later version of the row,
381 	which cannot be purged yet, requires its existence. If some requires,
382 	we should do nothing. */
383 
384 	if (row_purge_poss_sec(node, index, entry)) {
385 		/* Remove the index record, which should have been
386 		marked for deletion. */
387 		if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
388 					  dict_table_is_comp(index->table))) {
389 			ib::error()
390 				<< "tried to purge non-delete-marked record"
391 				" in index " << index->name
392 				<< " of table " << index->table->name
393 				<< ": tuple: " << *entry
394 				<< ", record: " << rec_index_print(
395 					btr_cur_get_rec(btr_cur), index);
396 
397 			ut_ad(0);
398 
399 			goto func_exit;
400 		}
401 
402 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
403 					   false, &mtr);
404 		switch (UNIV_EXPECT(err, DB_SUCCESS)) {
405 		case DB_SUCCESS:
406 			break;
407 		case DB_OUT_OF_FILE_SPACE:
408 			success = FALSE;
409 			break;
410 		default:
411 			ut_error;
412 		}
413 	}
414 
415 func_exit:
416 	btr_pcur_close(&pcur);
417 func_exit_no_pcur:
418 	mtr_commit(&mtr);
419 
420 	return(success);
421 }
422 
423 /***************************************************************
424 Removes a secondary index entry without modifying the index tree,
425 if possible.
426 @retval true if success or if not found
427 @retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
428 static MY_ATTRIBUTE((nonnull, warn_unused_result))
429 bool
row_purge_remove_sec_if_poss_leaf(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)430 row_purge_remove_sec_if_poss_leaf(
431 /*==============================*/
432 	purge_node_t*	node,	/*!< in: row purge node */
433 	dict_index_t*	index,	/*!< in: index */
434 	const dtuple_t*	entry)	/*!< in: index entry */
435 {
436 	mtr_t			mtr;
437 	btr_pcur_t		pcur;
438 	ulint			mode;
439 	enum row_search_result	search_result;
440 	bool			success	= true;
441 
442 	log_free_check();
443 
444 	mtr_start(&mtr);
445 	mtr.set_named_space(index->space);
446 
447 	if (!index->is_committed()) {
448 		/* For uncommitted spatial index, we also skip the purge. */
449 		if (dict_index_is_spatial(index)) {
450 			goto func_exit_no_pcur;
451 		}
452 
453 		/* The index->online_status may change if the the
454 		index is or was being created online, but not
455 		committed yet. It is protected by index->lock. */
456 		mtr_s_lock(dict_index_get_lock(index), &mtr);
457 
458 		if (dict_index_is_online_ddl(index)) {
459 			/* Online secondary index creation will not
460 			copy any delete-marked records. Therefore
461 			there is nothing to be purged. We must also
462 			skip the purge when a completed index is
463 			dropped by rollback_inplace_alter_table(). */
464 			goto func_exit_no_pcur;
465 		}
466 
467 		/* Change buffering is disabled for temporary tables. */
468 		mode = (dict_table_is_temporary(index->table))
469 			? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
470 			: BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
471 			| BTR_DELETE;
472 	} else {
473 		/* For secondary indexes,
474 		index->online_status==ONLINE_INDEX_COMPLETE if
475 		index->is_committed(). */
476 		ut_ad(!dict_index_is_online_ddl(index));
477 
478 		/* Change buffering is disabled for temporary tables
479 		and spatial index. */
480 		mode = (dict_table_is_temporary(index->table)
481 			|| dict_index_is_spatial(index))
482 			? BTR_MODIFY_LEAF
483 			: BTR_MODIFY_LEAF | BTR_DELETE;
484 	}
485 
486 	/* Set the purge node for the call to row_purge_poss_sec(). */
487 	pcur.btr_cur.purge_node = node;
488 	if (dict_index_is_spatial(index)) {
489 		rw_lock_sx_lock(dict_index_get_lock(index));
490 		pcur.btr_cur.thr = NULL;
491 	} else {
492 		/* Set the query thread, so that ibuf_insert_low() will be
493 		able to invoke thd_get_trx(). */
494 		pcur.btr_cur.thr = static_cast<que_thr_t*>(
495 			que_node_get_parent(node));
496 	}
497 
498 	search_result = row_search_index_entry(
499 		index, entry, mode, &pcur, &mtr);
500 
501 	if (dict_index_is_spatial(index)) {
502 		rw_lock_sx_unlock(dict_index_get_lock(index));
503 	}
504 
505 	switch (search_result) {
506 	case ROW_FOUND:
507 		/* Before attempting to purge a record, check
508 		if it is safe to do so. */
509 		if (row_purge_poss_sec(node, index, entry)) {
510 			btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
511 
512 			/* Only delete-marked records should be purged. */
513 			if (!rec_get_deleted_flag(
514 				btr_cur_get_rec(btr_cur),
515 				dict_table_is_comp(index->table))) {
516 
517 				ib::error()
518 					<< "tried to purge non-delete-marked"
519 					" record" " in index " << index->name
520 					<< " of table " << index->table->name
521 					<< ": tuple: " << *entry
522 					<< ", record: "
523 					<< rec_index_print(
524 						btr_cur_get_rec(btr_cur),
525 						index);
526 				ut_ad(0);
527 
528 				btr_pcur_close(&pcur);
529 
530 				goto func_exit_no_pcur;
531 			}
532 
533 			if (dict_index_is_spatial(index)) {
534 				const page_t*   page;
535 				const trx_t*	trx = NULL;
536 
537 				if (btr_cur->rtr_info != NULL
538 				    && btr_cur->rtr_info->thr != NULL) {
539 					trx = thr_get_trx(
540 						btr_cur->rtr_info->thr);
541 				}
542 
543 				page = btr_cur_get_page(btr_cur);
544 
545 				if (!lock_test_prdt_page_lock(
546 					trx,
547 					page_get_space_id(page),
548 					page_get_page_no(page))
549 				     && page_get_n_recs(page) < 2
550 				     && page_get_page_no(page) !=
551 					dict_index_get_page(index)) {
552 					/* this is the last record on page,
553 					and it has a "page" lock on it,
554 					which mean search is still depending
555 					on it, so do not delete */
556 #ifdef UNIV_DEBUG
557 					ib::info() << "skip purging last"
558 						" record on page "
559 						<< page_get_page_no(page)
560 						<< ".";
561 #endif /* UNIV_DEBUG */
562 
563 					btr_pcur_close(&pcur);
564 					mtr_commit(&mtr);
565 					return(success);
566 				}
567 			}
568 
569 			if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
570 
571 				/* The index entry could not be deleted. */
572 				success = false;
573 			}
574 		}
575 		// fallthrough
576 		// (the index entry is still needed, or the deletion succeeded)
577 	case ROW_NOT_DELETED_REF:
578 		/* The index entry is still needed. */
579 	case ROW_BUFFERED:
580 		/* The deletion was buffered. */
581 	case ROW_NOT_FOUND:
582 		/* The index entry does not exist, nothing to do. */
583 		btr_pcur_close(&pcur);
584 func_exit_no_pcur:
585 		mtr_commit(&mtr);
586 		return(success);
587 	}
588 
589 	ut_error;
590 	return(false);
591 }
592 
593 /***********************************************************//**
594 Removes a secondary index entry if possible. */
595 UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2)))
596 void
row_purge_remove_sec_if_poss(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)597 row_purge_remove_sec_if_poss(
598 /*=========================*/
599 	purge_node_t*	node,	/*!< in: row purge node */
600 	dict_index_t*	index,	/*!< in: index */
601 	const dtuple_t*	entry)	/*!< in: index entry */
602 {
603 	ibool	success;
604 	ulint	n_tries		= 0;
605 
606 	/*	fputs("Purge: Removing secondary record\n", stderr); */
607 
608 	if (!entry) {
609 		/* The node->row must have lacked some fields of this
610 		index. This is possible when the undo log record was
611 		written before this index was created. */
612 		return;
613 	}
614 
615 	if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
616 
617 		return;
618 	}
619 retry:
620 	success = row_purge_remove_sec_if_poss_tree(node, index, entry);
621 	/* The delete operation may fail if we have little
622 	file space left: TODO: easiest to crash the database
623 	and restart with more file space */
624 
625 	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
626 
627 		n_tries++;
628 
629 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
630 
631 		goto retry;
632 	}
633 
634 	ut_a(success);
635 }
636 
637 /** Skip uncommitted virtual indexes on newly added virtual column.
638 @param[in,out]	index	dict index object */
639 static
640 inline
641 void
row_purge_skip_uncommitted_virtual_index(dict_index_t * & index)642 row_purge_skip_uncommitted_virtual_index(
643 	dict_index_t*&	index)
644 {
645 	/* We need to skip virtual indexes which is not
646 	committed yet. It's safe because these indexes are
647 	newly created by alter table, and because we do
648 	not support LOCK=NONE when adding an index on newly
649 	added virtual column.*/
650 	while (index != NULL && dict_index_has_virtual(index)
651 	       && !index->is_committed() && index->has_new_v_col) {
652 		index = dict_table_get_next_index(index);
653 	}
654 }
655 
656 /***********************************************************//**
657 Purges a delete marking of a record.
658 @retval true if the row was not found, or it was successfully removed
659 @retval false the purge needs to be suspended because of
660 running out of file space */
661 static MY_ATTRIBUTE((nonnull, warn_unused_result))
662 bool
row_purge_del_mark(purge_node_t * node)663 row_purge_del_mark(
664 /*===============*/
665 	purge_node_t*	node)	/*!< in/out: row purge node */
666 {
667 	mem_heap_t*	heap;
668 
669 	heap = mem_heap_create(1024);
670 
671 	while (node->index != NULL) {
672 		/* skip corrupted secondary index */
673 		dict_table_skip_corrupt_index(node->index);
674 
675 		row_purge_skip_uncommitted_virtual_index(node->index);
676 
677 		if (!node->index) {
678 			break;
679 		}
680 
681 		if (node->index->type != DICT_FTS) {
682 			dtuple_t*	entry = row_build_index_entry_low(
683 				node->row, NULL, node->index,
684 				heap, ROW_BUILD_FOR_PURGE);
685 			row_purge_remove_sec_if_poss(node, node->index, entry);
686 			mem_heap_empty(heap);
687 		}
688 
689 		node->index = dict_table_get_next_index(node->index);
690 	}
691 
692 	mem_heap_free(heap);
693 
694 	return(row_purge_remove_clust_if_poss(node));
695 }
696 
697 /***********************************************************//**
698 Purges an update of an existing record. Also purges an update of a delete
699 marked record if that record contained an externally stored field. */
700 static
701 void
row_purge_upd_exist_or_extern_func(const que_thr_t * thr,purge_node_t * node,trx_undo_rec_t * undo_rec)702 row_purge_upd_exist_or_extern_func(
703 /*===============================*/
704 #ifdef UNIV_DEBUG
705 	const que_thr_t*thr,		/*!< in: query thread */
706 #endif /* UNIV_DEBUG */
707 	purge_node_t*	node,		/*!< in: row purge node */
708 	trx_undo_rec_t*	undo_rec)	/*!< in: record to purge */
709 {
710 	mem_heap_t*	heap;
711 
712 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
713 
714 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC
715 	    || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
716 
717 		goto skip_secondaries;
718 	}
719 
720 	heap = mem_heap_create(1024);
721 
722 	while (node->index != NULL) {
723 		dict_table_skip_corrupt_index(node->index);
724 
725 		row_purge_skip_uncommitted_virtual_index(node->index);
726 
727 		if (!node->index) {
728 			break;
729 		}
730 
731 		if (row_upd_changes_ord_field_binary(node->index, node->update,
732 						     thr, NULL, NULL)) {
733 			/* Build the older version of the index entry */
734 			dtuple_t*	entry = row_build_index_entry_low(
735 				node->row, NULL, node->index,
736 				heap, ROW_BUILD_FOR_PURGE);
737 			row_purge_remove_sec_if_poss(node, node->index, entry);
738 			mem_heap_empty(heap);
739 		}
740 
741 		node->index = dict_table_get_next_index(node->index);
742 	}
743 
744 	mem_heap_free(heap);
745 
746 skip_secondaries:
747 	/* Free possible externally stored fields */
748 	for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
749 
750 		const upd_field_t*	ufield
751 			= upd_get_nth_field(node->update, i);
752 
753 		if (dfield_is_ext(&ufield->new_val)) {
754 			trx_rseg_t*	rseg;
755 			buf_block_t*	block;
756 			ulint		internal_offset;
757 			byte*		data_field;
758 			dict_index_t*	index;
759 			ibool		is_insert;
760 			ulint		rseg_id;
761 			ulint		page_no;
762 			ulint		offset;
763 			mtr_t		mtr;
764 
765 			/* We use the fact that new_val points to
766 			undo_rec and get thus the offset of
767 			dfield data inside the undo record. Then we
768 			can calculate from node->roll_ptr the file
769 			address of the new_val data */
770 
771 			internal_offset
772 				= ((const byte*)
773 				   dfield_get_data(&ufield->new_val))
774 				- undo_rec;
775 
776 			ut_a(internal_offset < UNIV_PAGE_SIZE);
777 
778 			trx_undo_decode_roll_ptr(node->roll_ptr,
779 						 &is_insert, &rseg_id,
780 						 &page_no, &offset);
781 
782 			/* If table is temp then it can't have its undo log
783 			residing in rollback segment with REDO log enabled. */
784 			bool is_redo_rseg =
785 				dict_table_is_temporary(node->table)
786 				? false : true;
787 			rseg = trx_sys_get_nth_rseg(
788 				trx_sys, rseg_id, is_redo_rseg);
789 
790 			ut_a(rseg != NULL);
791 			ut_a(rseg->id == rseg_id);
792 
793 			mtr_start(&mtr);
794 
795 			/* We have to acquire an SX-latch to the clustered
796 			index tree (exclude other tree changes) */
797 
798 			index = dict_table_get_first_index(node->table);
799 			mtr_sx_lock(dict_index_get_lock(index), &mtr);
800 
801 			mtr.set_named_space(index->space);
802 
803 			/* NOTE: we must also acquire an X-latch to the
804 			root page of the tree. We will need it when we
805 			free pages from the tree. If the tree is of height 1,
806 			the tree X-latch does NOT protect the root page,
807 			because it is also a leaf page. Since we will have a
808 			latch on an undo log page, we would break the
809 			latching order if we would only later latch the
810 			root page of such a tree! */
811 
812 			btr_root_get(index, &mtr);
813 
814 			block = buf_page_get(
815 				page_id_t(rseg->space, page_no),
816 				univ_page_size, RW_X_LATCH, &mtr);
817 
818 			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
819 
820 			data_field = buf_block_get_frame(block)
821 				+ offset + internal_offset;
822 
823 			ut_a(dfield_get_len(&ufield->new_val)
824 			     >= BTR_EXTERN_FIELD_REF_SIZE);
825 			btr_free_externally_stored_field(
826 				index,
827 				data_field + dfield_get_len(&ufield->new_val)
828 				- BTR_EXTERN_FIELD_REF_SIZE,
829 				NULL, NULL, NULL, 0, false, &mtr);
830 			mtr_commit(&mtr);
831 		}
832 	}
833 }
834 
835 #ifdef UNIV_DEBUG
836 # define row_purge_upd_exist_or_extern(thr,node,undo_rec)	\
837 	row_purge_upd_exist_or_extern_func(thr,node,undo_rec)
838 #else /* UNIV_DEBUG */
839 # define row_purge_upd_exist_or_extern(thr,node,undo_rec)	\
840 	row_purge_upd_exist_or_extern_func(node,undo_rec)
841 #endif /* UNIV_DEBUG */
842 
843 /***********************************************************//**
844 Parses the row reference and other info in a modify undo log record.
845 @return true if purge operation required */
846 static
847 bool
row_purge_parse_undo_rec(purge_node_t * node,trx_undo_rec_t * undo_rec,bool * updated_extern,que_thr_t * thr)848 row_purge_parse_undo_rec(
849 /*=====================*/
850 	purge_node_t*		node,		/*!< in: row undo node */
851 	trx_undo_rec_t*		undo_rec,	/*!< in: record to purge */
852 	bool*			updated_extern, /*!< out: true if an externally
853 						stored field was updated */
854 	que_thr_t*		thr)		/*!< in: query thread */
855 {
856 	dict_index_t*	clust_index;
857 	byte*		ptr;
858 	trx_t*		trx;
859 	undo_no_t	undo_no;
860 	table_id_t	table_id;
861 	trx_id_t	trx_id;
862 	roll_ptr_t	roll_ptr;
863 	ulint		info_bits;
864 	ulint		type;
865 
866 	ut_ad(node != NULL);
867 	ut_ad(thr != NULL);
868 
869 	ptr = trx_undo_rec_get_pars(
870 		undo_rec, &type, &node->cmpl_info,
871 		updated_extern, &undo_no, &table_id);
872 
873 	node->rec_type = type;
874 
875 	if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
876 
877 		return(false);
878 	}
879 
880 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
881 					       &info_bits);
882 	node->table = NULL;
883 	node->trx_id = trx_id;
884 
885 	/* Prevent DROP TABLE etc. from running when we are doing the purge
886 	for this row */
887 
888 try_again:
889 	rw_lock_s_lock_inline(dict_operation_lock, 0, __FILE__, __LINE__);
890 
891 	node->table = dict_table_open_on_id(
892 		table_id, FALSE, DICT_TABLE_OP_NORMAL);
893 
894 	if (node->table == NULL) {
895 		/* The table has been dropped: no need to do purge */
896 		goto err_exit;
897 	}
898 
899 	if (fil_space_is_being_truncated(node->table->space)) {
900 
901 #if UNIV_DEBUG
902 		ib::info() << "Record with space id "
903 			   << node->table->space
904 			   << " belongs to table which is being truncated"
905 			   << " therefore skipping this undo record.";
906 #endif
907 		ut_ad(dict_table_is_file_per_table(node->table));
908 		dict_table_close(node->table, FALSE, FALSE);
909 		node->table = NULL;
910 		goto err_exit;
911 	}
912 
913 	if (node->table->n_v_cols && !node->table->vc_templ
914 	    && dict_table_has_indexed_v_cols(node->table)) {
915 		/* Need server fully up for virtual column computation */
916 		if (!mysqld_server_started) {
917 
918 			dict_table_close(node->table, FALSE, FALSE);
919 			rw_lock_s_unlock(dict_operation_lock);
920 			if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
921 				return(false);
922 			}
923 			os_thread_sleep(1000000);
924 			goto try_again;
925 		}
926 
927 		/* Initialize the template for the table */
928 		innobase_init_vc_templ(node->table);
929 	}
930 
931 	/* Disable purging for temp-tables as they are short-lived
932 	and no point in re-organzing such short lived tables */
933 	if (dict_table_is_temporary(node->table)) {
934 		goto close_exit;
935 	}
936 
937 	if (node->table->file_unreadable) {
938 		/* We skip purge of missing .ibd files */
939 
940 		dict_table_close(node->table, FALSE, FALSE);
941 
942 		node->table = NULL;
943 
944 		goto err_exit;
945 	}
946 
947 	clust_index = dict_table_get_first_index(node->table);
948 
949 	if (clust_index == NULL
950 	    || dict_index_is_corrupted(clust_index)) {
951 		/* The table was corrupt in the data dictionary.
952 		dict_set_corrupted() works on an index, and
953 		we do not have an index to call it with. */
954 close_exit:
955 		dict_table_close(node->table, FALSE, FALSE);
956 err_exit:
957 		rw_lock_s_unlock(dict_operation_lock);
958 		return(false);
959 	}
960 
961 	if (type == TRX_UNDO_UPD_EXIST_REC
962 	    && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
963 	    && !*updated_extern) {
964 
965 		/* Purge requires no changes to indexes: we may return */
966 		goto close_exit;
967 	}
968 
969 	ptr = trx_undo_rec_get_row_ref(
970 		ptr, clust_index, &(node->ref), node->heap);
971 
972 	trx = thr_get_trx(thr);
973 
974 	ptr = trx_undo_update_rec_get_update(
975 		ptr, clust_index, type, trx_id, roll_ptr, info_bits, trx,
976 		node->heap, &(node->update));
977 
978 	/* Read to the partial row the fields that occur in indexes */
979 
980 	if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
981 		ptr = trx_undo_rec_get_partial_row(
982 			ptr, clust_index, &node->row,
983 			type == TRX_UNDO_UPD_DEL_REC,
984 			node->heap);
985 	}
986 
987 	return(true);
988 }
989 
990 /***********************************************************//**
991 Purges the parsed record.
992 @return true if purged, false if skipped */
993 static MY_ATTRIBUTE((nonnull, warn_unused_result))
994 bool
row_purge_record_func(purge_node_t * node,trx_undo_rec_t * undo_rec,const que_thr_t * thr,bool updated_extern)995 row_purge_record_func(
996 /*==================*/
997 	purge_node_t*	node,		/*!< in: row purge node */
998 	trx_undo_rec_t*	undo_rec,	/*!< in: record to purge */
999 #ifdef UNIV_DEBUG
1000 	const que_thr_t*thr,		/*!< in: query thread */
1001 #endif /* UNIV_DEBUG */
1002 	bool		updated_extern)	/*!< in: whether external columns
1003 					were updated */
1004 {
1005 	dict_index_t*	clust_index;
1006 	bool		purged		= true;
1007 
1008 	ut_ad(!node->found_clust);
1009 
1010 	clust_index = dict_table_get_first_index(node->table);
1011 
1012 	node->index = dict_table_get_next_index(clust_index);
1013 	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1014 
1015 	switch (node->rec_type) {
1016 	case TRX_UNDO_DEL_MARK_REC:
1017 		purged = row_purge_del_mark(node);
1018 		if (!purged) {
1019 			break;
1020 		}
1021 		MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
1022 		break;
1023 	default:
1024 		if (!updated_extern) {
1025 			break;
1026 		}
1027 		/* fall through */
1028 	case TRX_UNDO_UPD_EXIST_REC:
1029 		row_purge_upd_exist_or_extern(thr, node, undo_rec);
1030 		MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
1031 		break;
1032 	}
1033 
1034 	if (node->found_clust) {
1035 		btr_pcur_close(&node->pcur);
1036 		node->found_clust = FALSE;
1037 	}
1038 
1039 	if (node->table != NULL) {
1040 		dict_table_close(node->table, FALSE, FALSE);
1041 		node->table = NULL;
1042 	}
1043 
1044 	return(purged);
1045 }
1046 
1047 #ifdef UNIV_DEBUG
1048 # define row_purge_record(node,undo_rec,thr,updated_extern)	\
1049 	row_purge_record_func(node,undo_rec,thr,updated_extern)
1050 #else /* UNIV_DEBUG */
1051 # define row_purge_record(node,undo_rec,thr,updated_extern)	\
1052 	row_purge_record_func(node,undo_rec,updated_extern)
1053 #endif /* UNIV_DEBUG */
1054 
1055 /***********************************************************//**
1056 Fetches an undo log record and does the purge for the recorded operation.
1057 If none left, or the current purge completed, returns the control to the
1058 parent node, which is always a query thread node. */
1059 static
1060 void
row_purge(purge_node_t * node,trx_undo_rec_t * undo_rec,que_thr_t * thr)1061 row_purge(
1062 /*======*/
1063 	purge_node_t*	node,		/*!< in: row purge node */
1064 	trx_undo_rec_t*	undo_rec,	/*!< in: record to purge */
1065 	que_thr_t*	thr)		/*!< in: query thread */
1066 {
1067 	bool	updated_extern;
1068 
1069 	while (row_purge_parse_undo_rec(node, undo_rec, &updated_extern, thr)) {
1070 
1071 		bool purged;
1072 
1073 		purged = row_purge_record(node, undo_rec, thr, updated_extern);
1074 
1075 		rw_lock_s_unlock(dict_operation_lock);
1076 
1077 		if (purged || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
1078 			return;
1079 		}
1080 
1081 		/* Retry the purge in a second. */
1082 		os_thread_sleep(1000000);
1083 	}
1084 }
1085 
1086 /** Explicitly call the destructor, this is to get around Clang bug#12350.
1087 @param[in,out]	p		Instance on which to call the destructor */
1088 template<typename T>
1089 void
call_destructor(T * p)1090 call_destructor(T* p)
1091 {
1092 	p->~T();
1093 }
1094 
1095 /** Reset the purge query thread.
1096 @param[in,out]	thr		The query thread to execute */
1097 static
1098 void
row_purge_end(que_thr_t * thr)1099 row_purge_end(que_thr_t* thr)
1100 {
1101 	purge_node_t*	node;
1102 
1103 	node = static_cast<purge_node_t*>(thr->run_node);
1104 
1105 	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1106 
1107 	thr->run_node = que_node_get_parent(node);
1108 
1109 	if (node->recs != NULL) {
1110 
1111 		ut_ad(node->recs->empty());
1112 
1113 		/* Note: We call the destructor explicitly here, but don't
1114 		want to free the memory. The Recs (and rows contained within)
1115 		were allocated from the purge_sys->heap */
1116 
1117 		call_destructor(node->recs);
1118 
1119 		node->recs = NULL;
1120 	}
1121 
1122 	node->done = true;
1123 
1124 	ut_a(thr->run_node != NULL);
1125 
1126 	mem_heap_empty(node->heap);
1127 }
1128 
1129 /** Does the purge operation for a single undo log record. This is a high-level
1130 function used in an SQL execution graph.
1131 @param[in,out]	thr		The query thread to execute
1132 @return query thread to run next or NULL */
1133 que_thr_t*
row_purge_step(que_thr_t * thr)1134 row_purge_step(que_thr_t* thr)
1135 {
1136 	purge_node_t*	node;
1137 
1138 	node = static_cast<purge_node_t*>(thr->run_node);
1139 
1140 	node->table = NULL;
1141 	node->row = NULL;
1142 	node->ref = NULL;
1143 	node->index = NULL;
1144 	node->update = NULL;
1145 	node->found_clust = FALSE;
1146 	node->rec_type = ULINT_UNDEFINED;
1147 	node->cmpl_info = ULINT_UNDEFINED;
1148 
1149 	ut_a(!node->done);
1150 
1151 	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1152 
1153 	if (node->recs != NULL && !node->recs->empty()) {
1154 		purge_node_t::rec_t	rec;
1155 
1156 		rec = node->recs->back();
1157 		node->recs->pop_back();
1158 
1159 		node->roll_ptr = rec.roll_ptr;
1160 
1161 		row_purge(node, rec.undo_rec, thr);
1162 
1163 		if (node->recs->empty()) {
1164 			row_purge_end(thr);
1165 		} else {
1166 			thr->run_node = node;
1167 		}
1168 	} else {
1169 		row_purge_end(thr);
1170 	}
1171 
1172 	if (thr->prebuilt !=0 && thr->prebuilt->compress_heap != 0) {
1173 		mem_heap_empty(thr->prebuilt->compress_heap);
1174 	}
1175 
1176 	return(thr);
1177 }
1178 
1179 #ifdef UNIV_DEBUG
1180 /***********************************************************//**
1181 Validate the persisent cursor. The purge node has two references
1182 to the clustered index record - one via the ref member, and the
1183 other via the persistent cursor.  These two references must match
1184 each other if the found_clust flag is set.
1185 @return true if the stored copy of persistent cursor is consistent
1186 with the ref member.*/
1187 bool
validate_pcur()1188 purge_node_t::validate_pcur()
1189 {
1190 	if (!found_clust) {
1191 		return(true);
1192 	}
1193 
1194 	if (index == NULL) {
1195 		return(true);
1196 	}
1197 
1198 	if (index->type == DICT_FTS) {
1199 		return(true);
1200 	}
1201 
1202 	if (!pcur.old_stored) {
1203 		return(true);
1204 	}
1205 
1206 	dict_index_t*	clust_index = pcur.btr_cur.index;
1207 
1208 	ulint*	offsets = rec_get_offsets(
1209 		pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
1210 
1211 	/* Here we are comparing the purge ref record and the stored initial
1212 	part in persistent cursor. Both cases we store n_uniq fields of the
1213 	cluster index and so it is fine to do the comparison. We note this
1214 	dependency here as pcur and ref belong to different modules. */
1215 	int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
1216 
1217 	if (st != 0) {
1218 		ib::error() << "Purge node pcur validation failed";
1219 		ib::error() << rec_printer(ref).str();
1220 		ib::error() << rec_printer(pcur.old_rec, offsets).str();
1221 		return(false);
1222 	}
1223 
1224 	return(true);
1225 }
1226 #endif /* UNIV_DEBUG */
1227