1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0purge.cc
29 Purge obsolete records
30 
31 Created 3/14/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include <debug_sync.h>
35 
36 #include "row0purge.h"
37 
38 #ifdef UNIV_NONINL
39 #include "row0purge.ic"
40 #endif
41 
42 #include "fsp0fsp.h"
43 #include "mach0data.h"
44 #include "trx0rseg.h"
45 #include "trx0trx.h"
46 #include "trx0roll.h"
47 #include "trx0undo.h"
48 #include "trx0purge.h"
49 #include "trx0rec.h"
50 #include "que0que.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "row0vers.h"
54 #include "row0mysql.h"
55 #include "row0log.h"
56 #include "log0log.h"
57 #include "srv0mon.h"
58 #include "srv0start.h"
59 #include "handler.h"
60 #include "ha_innodb.h"
61 #include "fil0fil.h"
62 
63 /*************************************************************************
64 IMPORTANT NOTE: Any operation that generates redo MUST check that there
65 is enough space in the redo log before for that operation. This is
66 done by calling log_free_check(). The reason for checking the
67 availability of the redo log space before the start of the operation is
68 that we MUST not hold any synchonization objects when performing the
69 check.
70 If you make a change in this module make sure that no codepath is
71 introduced where a call to log_free_check() is bypassed. */
72 
73 /** Create a purge node to a query graph.
74 @param[in]	parent	parent node, i.e., a thr node
75 @param[in]	heap	memory heap where created
76 @return own: purge node */
77 purge_node_t*
row_purge_node_create(que_thr_t * parent,mem_heap_t * heap)78 row_purge_node_create(
79 	que_thr_t*	parent,
80 	mem_heap_t*	heap)
81 {
82 	purge_node_t*	node;
83 
84 	ut_ad(parent != NULL);
85 	ut_ad(heap != NULL);
86 
87 	node = static_cast<purge_node_t*>(
88 		mem_heap_zalloc(heap, sizeof(*node)));
89 
90 	node->common.type = QUE_NODE_PURGE;
91 	node->common.parent = parent;
92 	node->done = TRUE;
93 	node->heap = mem_heap_create(256);
94 
95 	return(node);
96 }
97 
98 /***********************************************************//**
99 Repositions the pcur in the purge node on the clustered index record,
100 if found. If the record is not found, close pcur.
101 @return TRUE if the record was found */
102 static
103 ibool
row_purge_reposition_pcur(ulint mode,purge_node_t * node,mtr_t * mtr)104 row_purge_reposition_pcur(
105 /*======================*/
106 	ulint		mode,	/*!< in: latching mode */
107 	purge_node_t*	node,	/*!< in: row purge node */
108 	mtr_t*		mtr)	/*!< in: mtr */
109 {
110 	if (node->found_clust) {
111 		ut_ad(node->validate_pcur());
112 
113 		node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr);
114 
115 	} else {
116 		node->found_clust = row_search_on_row_ref(
117 			&node->pcur, mode, node->table, node->ref, mtr);
118 
119 		if (node->found_clust) {
120 			btr_pcur_store_position(&node->pcur, mtr);
121 		}
122 	}
123 
124 	/* Close the current cursor if we fail to position it correctly. */
125 	if (!node->found_clust) {
126 		btr_pcur_close(&node->pcur);
127 	}
128 
129 	return(node->found_clust);
130 }
131 
132 /***********************************************************//**
133 Removes a delete marked clustered index record if possible.
134 @retval true if the row was not found, or it was successfully removed
135 @retval false if the row was modified after the delete marking */
136 static MY_ATTRIBUTE((nonnull, warn_unused_result))
137 bool
row_purge_remove_clust_if_poss_low(purge_node_t * node,ulint mode)138 row_purge_remove_clust_if_poss_low(
139 /*===============================*/
140 	purge_node_t*	node,	/*!< in/out: row purge node */
141 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
142 {
143 	dict_index_t*		index;
144 	bool			success		= true;
145 	mtr_t			mtr;
146 	rec_t*			rec;
147 	mem_heap_t*		heap		= NULL;
148 	ulint*			offsets;
149 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
150 	rec_offs_init(offsets_);
151 
152 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
153 
154 	index = dict_table_get_first_index(node->table);
155 
156 	log_free_check();
157 	mtr_start(&mtr);
158 	mtr.set_named_space(index->space);
159 
160 	if (!row_purge_reposition_pcur(mode, node, &mtr)) {
161 		/* The record was already removed. */
162 		goto func_exit;
163 	}
164 
165 	rec = btr_pcur_get_rec(&node->pcur);
166 
167 	offsets = rec_get_offsets(
168 		rec, index, offsets_, ULINT_UNDEFINED, &heap);
169 
170 	if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
171 		/* Someone else has modified the record later: do not remove */
172 		goto func_exit;
173 	}
174 
175 	ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
176 
177 	if (mode == BTR_MODIFY_LEAF) {
178 		success = btr_cur_optimistic_delete(
179 			btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
180 	} else {
181 		dberr_t	err;
182 		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
183 
184 		DBUG_EXECUTE_IF("pessimistic_row_purge_clust", {
185 			const char act[] =
186 				"now SIGNAL pessimistic_row_purge_clust_pause "
187 				"WAIT_FOR pessimistic_row_purge_clust_continue";
188 			assert(opt_debug_sync_timeout > 0);
189 			assert(!debug_sync_set_action(
190 				       current_thd, STRING_WITH_LEN(act)));
191 		});
192 
193 		btr_cur_pessimistic_delete(
194 			&err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
195 			false, &mtr);
196 
197 		switch (err) {
198 		case DB_SUCCESS:
199 			break;
200 		case DB_OUT_OF_FILE_SPACE:
201 			success = false;
202 			break;
203 		default:
204 			ut_error;
205 		}
206 	}
207 
208 func_exit:
209 	if (heap) {
210 		mem_heap_free(heap);
211 	}
212 
213 	/* Persistent cursor is closed if reposition fails. */
214 	if (node->found_clust) {
215 		btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
216 	} else {
217 		mtr_commit(&mtr);
218 	}
219 
220 	return(success);
221 }
222 
223 /***********************************************************//**
224 Removes a clustered index record if it has not been modified after the delete
225 marking.
226 @retval true if the row was not found, or it was successfully removed
227 @retval false the purge needs to be suspended because of running out
228 of file space. */
229 static MY_ATTRIBUTE((nonnull, warn_unused_result))
230 bool
row_purge_remove_clust_if_poss(purge_node_t * node)231 row_purge_remove_clust_if_poss(
232 /*===========================*/
233 	purge_node_t*	node)	/*!< in/out: row purge node */
234 {
235 	if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
236 		return(true);
237 	}
238 
239 	for (ulint n_tries = 0;
240 	     n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
241 	     n_tries++) {
242 		if (row_purge_remove_clust_if_poss_low(
243 			    node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
244 			return(true);
245 		}
246 
247 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
248 	}
249 
250 	return(false);
251 }
252 
253 /***********************************************************//**
254 Determines if it is possible to remove a secondary index entry.
255 Removal is possible if the secondary index entry does not refer to any
256 not delete marked version of a clustered index record where DB_TRX_ID
257 is newer than the purge view.
258 
259 NOTE: This function should only be called by the purge thread, only
260 while holding a latch on the leaf page of the secondary index entry
261 (or keeping the buffer pool watch on the page).  It is possible that
262 this function first returns true and then false, if a user transaction
263 inserts a record that the secondary index entry would refer to.
264 However, in that case, the user transaction would also re-insert the
265 secondary index entry after purge has removed it and released the leaf
266 page latch.
267 @return true if the secondary index record can be purged */
268 bool
row_purge_poss_sec(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)269 row_purge_poss_sec(
270 /*===============*/
271 	purge_node_t*	node,	/*!< in/out: row purge node */
272 	dict_index_t*	index,	/*!< in: secondary index */
273 	const dtuple_t*	entry)	/*!< in: secondary index entry */
274 {
275 	bool	can_delete;
276 	mtr_t	mtr;
277 
278 	ut_ad(!dict_index_is_clust(index));
279 	mtr_start(&mtr);
280 
281 	can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
282 		|| !row_vers_old_has_index_entry(TRUE,
283 						 btr_pcur_get_rec(&node->pcur),
284 						 &mtr, index, entry,
285 						 node->roll_ptr, node->trx_id);
286 
287 	/* Persistent cursor is closed if reposition fails. */
288 	if (node->found_clust) {
289 		btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
290 	} else {
291 		mtr_commit(&mtr);
292 	}
293 
294 	return(can_delete);
295 }
296 
297 /***************************************************************
298 Removes a secondary index entry if possible, by modifying the
299 index tree.  Does not try to buffer the delete.
300 @return TRUE if success or if not found */
301 static MY_ATTRIBUTE((nonnull, warn_unused_result))
302 ibool
row_purge_remove_sec_if_poss_tree(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)303 row_purge_remove_sec_if_poss_tree(
304 /*==============================*/
305 	purge_node_t*	node,	/*!< in: row purge node */
306 	dict_index_t*	index,	/*!< in: index */
307 	const dtuple_t*	entry)	/*!< in: index entry */
308 {
309 	btr_pcur_t		pcur;
310 	btr_cur_t*		btr_cur;
311 	ibool			success	= TRUE;
312 	dberr_t			err;
313 	mtr_t			mtr;
314 	enum row_search_result	search_result;
315 
316 	log_free_check();
317 	mtr_start(&mtr);
318 	mtr.set_named_space(index->space);
319 
320 	if (!index->is_committed()) {
321 		/* The index->online_status may change if the index is
322 		or was being created online, but not committed yet. It
323 		is protected by index->lock. */
324 		mtr_sx_lock(dict_index_get_lock(index), &mtr);
325 
326 		if (dict_index_is_online_ddl(index)) {
327 			/* Online secondary index creation will not
328 			copy any delete-marked records. Therefore
329 			there is nothing to be purged. We must also
330 			skip the purge when a completed index is
331 			dropped by rollback_inplace_alter_table(). */
332 			goto func_exit_no_pcur;
333 		}
334 	} else {
335 		/* For secondary indexes,
336 		index->online_status==ONLINE_INDEX_COMPLETE if
337 		index->is_committed(). */
338 		ut_ad(!dict_index_is_online_ddl(index));
339 	}
340 
341 	search_result = row_search_index_entry(
342 				index, entry,
343 				BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
344 				&pcur, &mtr);
345 
346 	switch (search_result) {
347 	case ROW_NOT_FOUND:
348 		/* Not found.  This is a legitimate condition.  In a
349 		rollback, InnoDB will remove secondary recs that would
350 		be purged anyway.  Then the actual purge will not find
351 		the secondary index record.  Also, the purge itself is
352 		eager: if it comes to consider a secondary index
353 		record, and notices it does not need to exist in the
354 		index, it will remove it.  Then if/when the purge
355 		comes to consider the secondary index record a second
356 		time, it will not exist any more in the index. */
357 
358 		/* fputs("PURGE:........sec entry not found\n", stderr); */
359 		/* dtuple_print(stderr, entry); */
360 		goto func_exit;
361 	case ROW_FOUND:
362 		break;
363 	case ROW_BUFFERED:
364 	case ROW_NOT_DELETED_REF:
365 		/* These are invalid outcomes, because the mode passed
366 		to row_search_index_entry() did not include any of the
367 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
368 		ut_error;
369 	}
370 
371 	btr_cur = btr_pcur_get_btr_cur(&pcur);
372 
373 	/* We should remove the index record if no later version of the row,
374 	which cannot be purged yet, requires its existence. If some requires,
375 	we should do nothing. */
376 
377 	if (row_purge_poss_sec(node, index, entry)) {
378 		/* Remove the index record, which should have been
379 		marked for deletion. */
380 		if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
381 					  dict_table_is_comp(index->table))) {
382 			ib::error()
383 				<< "tried to purge non-delete-marked record"
384 				" in index " << index->name
385 				<< " of table " << index->table->name
386 				<< ": tuple: " << *entry
387 				<< ", record: " << rec_index_print(
388 					btr_cur_get_rec(btr_cur), index);
389 
390 			ut_ad(0);
391 
392 			goto func_exit;
393 		}
394 
395 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
396 					   false, &mtr);
397 		switch (UNIV_EXPECT(err, DB_SUCCESS)) {
398 		case DB_SUCCESS:
399 			break;
400 		case DB_OUT_OF_FILE_SPACE:
401 			success = FALSE;
402 			break;
403 		default:
404 			ut_error;
405 		}
406 	}
407 
408 func_exit:
409 	btr_pcur_close(&pcur);
410 func_exit_no_pcur:
411 	mtr_commit(&mtr);
412 
413 	return(success);
414 }
415 
416 /***************************************************************
417 Removes a secondary index entry without modifying the index tree,
418 if possible.
419 @retval true if success or if not found
420 @retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
421 static MY_ATTRIBUTE((nonnull, warn_unused_result))
422 bool
row_purge_remove_sec_if_poss_leaf(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)423 row_purge_remove_sec_if_poss_leaf(
424 /*==============================*/
425 	purge_node_t*	node,	/*!< in: row purge node */
426 	dict_index_t*	index,	/*!< in: index */
427 	const dtuple_t*	entry)	/*!< in: index entry */
428 {
429 	mtr_t			mtr;
430 	btr_pcur_t		pcur;
431 	ulint			mode;
432 	enum row_search_result	search_result;
433 	bool			success	= true;
434 
435 	log_free_check();
436 
437 	mtr_start(&mtr);
438 	mtr.set_named_space(index->space);
439 
440 	if (!index->is_committed()) {
441 		/* For uncommitted spatial index, we also skip the purge. */
442 		if (dict_index_is_spatial(index)) {
443 			goto func_exit_no_pcur;
444 		}
445 
446 		/* The index->online_status may change if the the
447 		index is or was being created online, but not
448 		committed yet. It is protected by index->lock. */
449 		mtr_s_lock(dict_index_get_lock(index), &mtr);
450 
451 		if (dict_index_is_online_ddl(index)) {
452 			/* Online secondary index creation will not
453 			copy any delete-marked records. Therefore
454 			there is nothing to be purged. We must also
455 			skip the purge when a completed index is
456 			dropped by rollback_inplace_alter_table(). */
457 			goto func_exit_no_pcur;
458 		}
459 
460 		/* Change buffering is disabled for temporary tables. */
461 		mode = (dict_table_is_temporary(index->table))
462 			? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
463 			: BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
464 			| BTR_DELETE;
465 	} else {
466 		/* For secondary indexes,
467 		index->online_status==ONLINE_INDEX_COMPLETE if
468 		index->is_committed(). */
469 		ut_ad(!dict_index_is_online_ddl(index));
470 
471 		/* Change buffering is disabled for temporary tables
472 		and spatial index. */
473 		mode = (dict_table_is_temporary(index->table)
474 			|| dict_index_is_spatial(index))
475 			? BTR_MODIFY_LEAF
476 			: BTR_MODIFY_LEAF | BTR_DELETE;
477 	}
478 
479 	/* Set the purge node for the call to row_purge_poss_sec(). */
480 	pcur.btr_cur.purge_node = node;
481 	if (dict_index_is_spatial(index)) {
482 		rw_lock_sx_lock(dict_index_get_lock(index));
483 		pcur.btr_cur.thr = NULL;
484 	} else {
485 		/* Set the query thread, so that ibuf_insert_low() will be
486 		able to invoke thd_get_trx(). */
487 		pcur.btr_cur.thr = static_cast<que_thr_t*>(
488 			que_node_get_parent(node));
489 	}
490 
491 	search_result = row_search_index_entry(
492 		index, entry, mode, &pcur, &mtr);
493 
494 	if (dict_index_is_spatial(index)) {
495 		rw_lock_sx_unlock(dict_index_get_lock(index));
496 	}
497 
498 	switch (search_result) {
499 	case ROW_FOUND:
500 		/* Before attempting to purge a record, check
501 		if it is safe to do so. */
502 		if (row_purge_poss_sec(node, index, entry)) {
503 			btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
504 
505 			/* Only delete-marked records should be purged. */
506 			if (!rec_get_deleted_flag(
507 				btr_cur_get_rec(btr_cur),
508 				dict_table_is_comp(index->table))) {
509 
510 				ib::error()
511 					<< "tried to purge non-delete-marked"
512 					" record" " in index " << index->name
513 					<< " of table " << index->table->name
514 					<< ": tuple: " << *entry
515 					<< ", record: "
516 					<< rec_index_print(
517 						btr_cur_get_rec(btr_cur),
518 						index);
519 				ut_ad(0);
520 
521 				btr_pcur_close(&pcur);
522 
523 				goto func_exit_no_pcur;
524 			}
525 
526 			if (dict_index_is_spatial(index)) {
527 				const page_t*   page;
528 				const trx_t*	trx = NULL;
529 
530 				if (btr_cur->rtr_info != NULL
531 				    && btr_cur->rtr_info->thr != NULL) {
532 					trx = thr_get_trx(
533 						btr_cur->rtr_info->thr);
534 				}
535 
536 				page = btr_cur_get_page(btr_cur);
537 
538 				if (!lock_test_prdt_page_lock(
539 					trx,
540 					page_get_space_id(page),
541 					page_get_page_no(page))
542 				     && page_get_n_recs(page) < 2
543 				     && page_get_page_no(page) !=
544 					dict_index_get_page(index)) {
545 					/* this is the last record on page,
546 					and it has a "page" lock on it,
547 					which mean search is still depending
548 					on it, so do not delete */
549 #ifdef UNIV_DEBUG
550 					ib::info() << "skip purging last"
551 						" record on page "
552 						<< page_get_page_no(page)
553 						<< ".";
554 #endif /* UNIV_DEBUG */
555 
556 					btr_pcur_close(&pcur);
557 					mtr_commit(&mtr);
558 					return(success);
559 				}
560 			}
561 
562 			if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
563 
564 				/* The index entry could not be deleted. */
565 				success = false;
566 			}
567 		}
568 		/* fall through (the index entry is still needed,
569 		or the deletion succeeded) */
570 	case ROW_NOT_DELETED_REF:
571 		/* The index entry is still needed. */
572 	case ROW_BUFFERED:
573 		/* The deletion was buffered. */
574 	case ROW_NOT_FOUND:
575 		/* The index entry does not exist, nothing to do. */
576 		btr_pcur_close(&pcur);
577 func_exit_no_pcur:
578 		mtr_commit(&mtr);
579 		return(success);
580 	}
581 
582 	ut_error;
583 	return(false);
584 }
585 
586 /***********************************************************//**
587 Removes a secondary index entry if possible. */
588 UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2)))
589 void
row_purge_remove_sec_if_poss(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)590 row_purge_remove_sec_if_poss(
591 /*=========================*/
592 	purge_node_t*	node,	/*!< in: row purge node */
593 	dict_index_t*	index,	/*!< in: index */
594 	const dtuple_t*	entry)	/*!< in: index entry */
595 {
596 	ibool	success;
597 	ulint	n_tries		= 0;
598 
599 	/*	fputs("Purge: Removing secondary record\n", stderr); */
600 
601 	if (!entry) {
602 		/* The node->row must have lacked some fields of this
603 		index. This is possible when the undo log record was
604 		written before this index was created. */
605 		return;
606 	}
607 
608 	if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
609 
610 		return;
611 	}
612 retry:
613 	success = row_purge_remove_sec_if_poss_tree(node, index, entry);
614 	/* The delete operation may fail if we have little
615 	file space left: TODO: easiest to crash the database
616 	and restart with more file space */
617 
618 	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
619 
620 		n_tries++;
621 
622 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
623 
624 		goto retry;
625 	}
626 
627 	ut_a(success);
628 }
629 
630 /** Skip uncommitted virtual indexes on newly added virtual column.
631 @param[in,out]	index	dict index object */
632 static
633 inline
634 void
row_purge_skip_uncommitted_virtual_index(dict_index_t * & index)635 row_purge_skip_uncommitted_virtual_index(
636 	dict_index_t*&	index)
637 {
638 	/* We need to skip virtual indexes which is not
639 	committed yet. It's safe because these indexes are
640 	newly created by alter table, and because we do
641 	not support LOCK=NONE when adding an index on newly
642 	added virtual column.*/
643 	while (index != NULL && dict_index_has_virtual(index)
644 	       && !index->is_committed() && index->has_new_v_col) {
645 		index = dict_table_get_next_index(index);
646 	}
647 }
648 
649 /***********************************************************//**
650 Purges a delete marking of a record.
651 @retval true if the row was not found, or it was successfully removed
652 @retval false the purge needs to be suspended because of
653 running out of file space */
654 static MY_ATTRIBUTE((nonnull, warn_unused_result))
655 bool
row_purge_del_mark(purge_node_t * node)656 row_purge_del_mark(
657 /*===============*/
658 	purge_node_t*	node)	/*!< in/out: row purge node */
659 {
660 	mem_heap_t*	heap;
661 
662 	heap = mem_heap_create(1024);
663 
664 	while (node->index != NULL) {
665 		/* skip corrupted secondary index */
666 		dict_table_skip_corrupt_index(node->index);
667 
668 		row_purge_skip_uncommitted_virtual_index(node->index);
669 
670 		if (!node->index) {
671 			break;
672 		}
673 
674 		if (node->index->type != DICT_FTS) {
675 			dtuple_t*	entry = row_build_index_entry_low(
676 				node->row, NULL, node->index,
677 				heap, ROW_BUILD_FOR_PURGE);
678 			row_purge_remove_sec_if_poss(node, node->index, entry);
679 			mem_heap_empty(heap);
680 		}
681 
682 		node->index = dict_table_get_next_index(node->index);
683 	}
684 
685 	mem_heap_free(heap);
686 
687 	return(row_purge_remove_clust_if_poss(node));
688 }
689 
690 /***********************************************************//**
691 Purges an update of an existing record. Also purges an update of a delete
692 marked record if that record contained an externally stored field. */
693 static
694 void
row_purge_upd_exist_or_extern_func(const que_thr_t * thr,purge_node_t * node,trx_undo_rec_t * undo_rec)695 row_purge_upd_exist_or_extern_func(
696 /*===============================*/
697 #ifdef UNIV_DEBUG
698 	const que_thr_t*thr,		/*!< in: query thread */
699 #endif /* UNIV_DEBUG */
700 	purge_node_t*	node,		/*!< in: row purge node */
701 	trx_undo_rec_t*	undo_rec)	/*!< in: record to purge */
702 {
703 	mem_heap_t*	heap;
704 
705 	ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
706 
707 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC
708 	    || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
709 
710 		goto skip_secondaries;
711 	}
712 
713 	heap = mem_heap_create(1024);
714 
715 	while (node->index != NULL) {
716 		dict_table_skip_corrupt_index(node->index);
717 
718 		row_purge_skip_uncommitted_virtual_index(node->index);
719 
720 		if (!node->index) {
721 			break;
722 		}
723 
724 		if (row_upd_changes_ord_field_binary(node->index, node->update,
725 						     thr, NULL, NULL)) {
726 			/* Build the older version of the index entry */
727 			dtuple_t*	entry = row_build_index_entry_low(
728 				node->row, NULL, node->index,
729 				heap, ROW_BUILD_FOR_PURGE);
730 			row_purge_remove_sec_if_poss(node, node->index, entry);
731 			mem_heap_empty(heap);
732 		}
733 
734 		node->index = dict_table_get_next_index(node->index);
735 	}
736 
737 	mem_heap_free(heap);
738 
739 skip_secondaries:
740 	/* Free possible externally stored fields */
741 	for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
742 
743 		const upd_field_t*	ufield
744 			= upd_get_nth_field(node->update, i);
745 
746 		if (dfield_is_ext(&ufield->new_val)) {
747 			trx_rseg_t*	rseg;
748 			buf_block_t*	block;
749 			ulint		internal_offset;
750 			byte*		data_field;
751 			dict_index_t*	index;
752 			ibool		is_insert;
753 			ulint		rseg_id;
754 			ulint		page_no;
755 			ulint		offset;
756 			mtr_t		mtr;
757 
758 			/* We use the fact that new_val points to
759 			undo_rec and get thus the offset of
760 			dfield data inside the undo record. Then we
761 			can calculate from node->roll_ptr the file
762 			address of the new_val data */
763 
764 			internal_offset
765 				= ((const byte*)
766 				   dfield_get_data(&ufield->new_val))
767 				- undo_rec;
768 
769 			ut_a(internal_offset < UNIV_PAGE_SIZE);
770 
771 			trx_undo_decode_roll_ptr(node->roll_ptr,
772 						 &is_insert, &rseg_id,
773 						 &page_no, &offset);
774 
775 			/* If table is temp then it can't have its undo log
776 			residing in rollback segment with REDO log enabled. */
777 			bool is_redo_rseg =
778 				dict_table_is_temporary(node->table)
779 				? false : true;
780 			rseg = trx_sys_get_nth_rseg(
781 				trx_sys, rseg_id, is_redo_rseg);
782 
783 			ut_a(rseg != NULL);
784 			ut_a(rseg->id == rseg_id);
785 
786 			mtr_start(&mtr);
787 
788 			/* We have to acquire an SX-latch to the clustered
789 			index tree (exclude other tree changes) */
790 
791 			index = dict_table_get_first_index(node->table);
792 			mtr_sx_lock(dict_index_get_lock(index), &mtr);
793 
794 			mtr.set_named_space(index->space);
795 
796 			/* NOTE: we must also acquire an X-latch to the
797 			root page of the tree. We will need it when we
798 			free pages from the tree. If the tree is of height 1,
799 			the tree X-latch does NOT protect the root page,
800 			because it is also a leaf page. Since we will have a
801 			latch on an undo log page, we would break the
802 			latching order if we would only later latch the
803 			root page of such a tree! */
804 
805 			btr_root_get(index, &mtr);
806 
807 			block = buf_page_get(
808 				page_id_t(rseg->space, page_no),
809 				univ_page_size, RW_X_LATCH, &mtr);
810 
811 			buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
812 
813 			data_field = buf_block_get_frame(block)
814 				+ offset + internal_offset;
815 
816 			ut_a(dfield_get_len(&ufield->new_val)
817 			     >= BTR_EXTERN_FIELD_REF_SIZE);
818 			btr_free_externally_stored_field(
819 				index,
820 				data_field + dfield_get_len(&ufield->new_val)
821 				- BTR_EXTERN_FIELD_REF_SIZE,
822 				NULL, NULL, NULL, 0, false, &mtr);
823 			mtr_commit(&mtr);
824 		}
825 	}
826 }
827 
828 #ifdef UNIV_DEBUG
829 # define row_purge_upd_exist_or_extern(thr,node,undo_rec)	\
830 	row_purge_upd_exist_or_extern_func(thr,node,undo_rec)
831 #else /* UNIV_DEBUG */
832 # define row_purge_upd_exist_or_extern(thr,node,undo_rec)	\
833 	row_purge_upd_exist_or_extern_func(node,undo_rec)
834 #endif /* UNIV_DEBUG */
835 
836 /***********************************************************//**
837 Parses the row reference and other info in a modify undo log record.
838 @return true if purge operation required */
839 static
840 bool
row_purge_parse_undo_rec(purge_node_t * node,trx_undo_rec_t * undo_rec,bool * updated_extern,que_thr_t * thr)841 row_purge_parse_undo_rec(
842 /*=====================*/
843 	purge_node_t*		node,		/*!< in: row undo node */
844 	trx_undo_rec_t*		undo_rec,	/*!< in: record to purge */
845 	bool*			updated_extern, /*!< out: true if an externally
846 						stored field was updated */
847 	que_thr_t*		thr)		/*!< in: query thread */
848 {
849 	dict_index_t*	clust_index;
850 	byte*		ptr;
851 	trx_t*		trx;
852 	undo_no_t	undo_no;
853 	table_id_t	table_id;
854 	trx_id_t	trx_id;
855 	roll_ptr_t	roll_ptr;
856 	ulint		info_bits;
857 	ulint		type;
858 
859 	ut_ad(node != NULL);
860 	ut_ad(thr != NULL);
861 
862 	ptr = trx_undo_rec_get_pars(
863 		undo_rec, &type, &node->cmpl_info,
864 		updated_extern, &undo_no, &table_id);
865 
866 	node->rec_type = type;
867 
868 	if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
869 
870 		return(false);
871 	}
872 
873 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
874 					       &info_bits);
875 	node->table = NULL;
876 	node->trx_id = trx_id;
877 
878 	/* Prevent DROP TABLE etc. from running when we are doing the purge
879 	for this row */
880 
881 try_again:
882 	rw_lock_s_lock_inline(dict_operation_lock, 0, __FILE__, __LINE__);
883 
884 	node->table = dict_table_open_on_id(
885 		table_id, FALSE, DICT_TABLE_OP_NORMAL);
886 
887 	if (node->table == NULL) {
888 		/* The table has been dropped: no need to do purge */
889 		goto err_exit;
890 	}
891 
892 	if (fil_space_is_being_truncated(node->table->space)) {
893 
894 #if UNIV_DEBUG
895 		ib::info() << "Record with space id "
896 			   << node->table->space
897 			   << " belongs to table which is being truncated"
898 			   << " therefore skipping this undo record.";
899 #endif
900 		ut_ad(dict_table_is_file_per_table(node->table));
901 		dict_table_close(node->table, FALSE, FALSE);
902 		node->table = NULL;
903 		goto err_exit;
904 	}
905 
906 	if (node->table->n_v_cols && !node->table->vc_templ
907 	    && dict_table_has_indexed_v_cols(node->table)) {
908 		/* Need server fully up for virtual column computation */
909 		if (!mysqld_server_started) {
910 
911 			dict_table_close(node->table, FALSE, FALSE);
912 			rw_lock_s_unlock(dict_operation_lock);
913 			if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
914 				return(false);
915 			}
916 			os_thread_sleep(1000000);
917 			goto try_again;
918 		}
919 
920 		/* Initialize the template for the table */
921 		innobase_init_vc_templ(node->table);
922 	}
923 
924 	/* Disable purging for temp-tables as they are short-lived
925 	and no point in re-organzing such short lived tables */
926 	if (dict_table_is_temporary(node->table)) {
927 		goto close_exit;
928 	}
929 
930 	if (node->table->ibd_file_missing) {
931 		/* We skip purge of missing .ibd files */
932 
933 		dict_table_close(node->table, FALSE, FALSE);
934 
935 		node->table = NULL;
936 
937 		goto err_exit;
938 	}
939 
940 	clust_index = dict_table_get_first_index(node->table);
941 
942 	if (clust_index == NULL
943 	    || dict_index_is_corrupted(clust_index)) {
944 		/* The table was corrupt in the data dictionary.
945 		dict_set_corrupted() works on an index, and
946 		we do not have an index to call it with. */
947 close_exit:
948 		dict_table_close(node->table, FALSE, FALSE);
949 err_exit:
950 		rw_lock_s_unlock(dict_operation_lock);
951 		return(false);
952 	}
953 
954 	if (type == TRX_UNDO_UPD_EXIST_REC
955 	    && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
956 	    && !*updated_extern) {
957 
958 		/* Purge requires no changes to indexes: we may return */
959 		goto close_exit;
960 	}
961 
962 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
963 				       node->heap);
964 
965 	trx = thr_get_trx(thr);
966 
967 	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
968 					     roll_ptr, info_bits, trx,
969 					     node->heap, &(node->update));
970 
971 	/* Read to the partial row the fields that occur in indexes */
972 
973 	if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
974 		ptr = trx_undo_rec_get_partial_row(
975 			ptr, clust_index, &node->row,
976 			type == TRX_UNDO_UPD_DEL_REC,
977 			node->heap);
978 	}
979 
980 	return(true);
981 }
982 
983 /***********************************************************//**
984 Purges the parsed record.
985 @return true if purged, false if skipped */
986 static MY_ATTRIBUTE((nonnull, warn_unused_result))
987 bool
row_purge_record_func(purge_node_t * node,trx_undo_rec_t * undo_rec,const que_thr_t * thr,bool updated_extern)988 row_purge_record_func(
989 /*==================*/
990 	purge_node_t*	node,		/*!< in: row purge node */
991 	trx_undo_rec_t*	undo_rec,	/*!< in: record to purge */
992 #ifdef UNIV_DEBUG
993 	const que_thr_t*thr,		/*!< in: query thread */
994 #endif /* UNIV_DEBUG */
995 	bool		updated_extern)	/*!< in: whether external columns
996 					were updated */
997 {
998 	dict_index_t*	clust_index;
999 	bool		purged		= true;
1000 
1001 	ut_ad(!node->found_clust);
1002 
1003 	clust_index = dict_table_get_first_index(node->table);
1004 
1005 	node->index = dict_table_get_next_index(clust_index);
1006 	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1007 
1008 	switch (node->rec_type) {
1009 	case TRX_UNDO_DEL_MARK_REC:
1010 		purged = row_purge_del_mark(node);
1011 		if (!purged) {
1012 			break;
1013 		}
1014 		MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
1015 		break;
1016 	default:
1017 		if (!updated_extern) {
1018 			break;
1019 		}
1020 		/* fall through */
1021 	case TRX_UNDO_UPD_EXIST_REC:
1022 		row_purge_upd_exist_or_extern(thr, node, undo_rec);
1023 		MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
1024 		break;
1025 	}
1026 
1027 	if (node->found_clust) {
1028 		btr_pcur_close(&node->pcur);
1029 		node->found_clust = FALSE;
1030 	}
1031 
1032 	if (node->table != NULL) {
1033 		dict_table_close(node->table, FALSE, FALSE);
1034 		node->table = NULL;
1035 	}
1036 
1037 	return(purged);
1038 }
1039 
1040 #ifdef UNIV_DEBUG
1041 # define row_purge_record(node,undo_rec,thr,updated_extern)	\
1042 	row_purge_record_func(node,undo_rec,thr,updated_extern)
1043 #else /* UNIV_DEBUG */
1044 # define row_purge_record(node,undo_rec,thr,updated_extern)	\
1045 	row_purge_record_func(node,undo_rec,updated_extern)
1046 #endif /* UNIV_DEBUG */
1047 
1048 /***********************************************************//**
1049 Fetches an undo log record and does the purge for the recorded operation.
1050 If none left, or the current purge completed, returns the control to the
1051 parent node, which is always a query thread node. */
1052 static MY_ATTRIBUTE((nonnull))
1053 void
row_purge(purge_node_t * node,trx_undo_rec_t * undo_rec,que_thr_t * thr)1054 row_purge(
1055 /*======*/
1056 	purge_node_t*	node,		/*!< in: row purge node */
1057 	trx_undo_rec_t*	undo_rec,	/*!< in: record to purge */
1058 	que_thr_t*	thr)		/*!< in: query thread */
1059 {
1060 	if (undo_rec != &trx_purge_dummy_rec) {
1061 		bool	updated_extern;
1062 
1063 		while (row_purge_parse_undo_rec(
1064 			       node, undo_rec, &updated_extern, thr)) {
1065 
1066 			bool purged = row_purge_record(
1067 				node, undo_rec, thr, updated_extern);
1068 
1069 			rw_lock_s_unlock(dict_operation_lock);
1070 
1071 			if (purged
1072 			    || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
1073 				return;
1074 			}
1075 
1076 			/* Retry the purge in a second. */
1077 			os_thread_sleep(1000000);
1078 		}
1079 	}
1080 }
1081 
1082 /***********************************************************//**
1083 Reset the purge query thread. */
1084 UNIV_INLINE
1085 void
row_purge_end(que_thr_t * thr)1086 row_purge_end(
1087 /*==========*/
1088 	que_thr_t*	thr)	/*!< in: query thread */
1089 {
1090 	purge_node_t*	node;
1091 
1092 	ut_ad(thr);
1093 
1094 	node = static_cast<purge_node_t*>(thr->run_node);
1095 
1096 	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1097 
1098 	thr->run_node = que_node_get_parent(node);
1099 
1100 	node->undo_recs = NULL;
1101 
1102 	node->done = TRUE;
1103 
1104 	ut_a(thr->run_node != NULL);
1105 
1106 	mem_heap_empty(node->heap);
1107 }
1108 
1109 /***********************************************************//**
1110 Does the purge operation for a single undo log record. This is a high-level
1111 function used in an SQL execution graph.
1112 @return query thread to run next or NULL */
1113 que_thr_t*
row_purge_step(que_thr_t * thr)1114 row_purge_step(
1115 /*===========*/
1116 	que_thr_t*	thr)	/*!< in: query thread */
1117 {
1118 	purge_node_t*	node;
1119 
1120 	ut_ad(thr);
1121 
1122 	node = static_cast<purge_node_t*>(thr->run_node);
1123 
1124 	node->table = NULL;
1125 	node->row = NULL;
1126 	node->ref = NULL;
1127 	node->index = NULL;
1128 	node->update = NULL;
1129 	node->found_clust = FALSE;
1130 	node->rec_type = ULINT_UNDEFINED;
1131 	node->cmpl_info = ULINT_UNDEFINED;
1132 
1133 	ut_a(!node->done);
1134 
1135 	ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1136 
1137 	if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) {
1138 		trx_purge_rec_t*purge_rec;
1139 
1140 		purge_rec = static_cast<trx_purge_rec_t*>(
1141 			ib_vector_pop(node->undo_recs));
1142 
1143 		node->roll_ptr = purge_rec->roll_ptr;
1144 
1145 		row_purge(node, purge_rec->undo_rec, thr);
1146 
1147 		if (ib_vector_is_empty(node->undo_recs)) {
1148 			row_purge_end(thr);
1149 		} else {
1150 			thr->run_node = node;
1151 		}
1152 	} else {
1153 		row_purge_end(thr);
1154 	}
1155 
1156 	return(thr);
1157 }
1158 
1159 #ifdef UNIV_DEBUG
1160 /***********************************************************//**
1161 Validate the persisent cursor. The purge node has two references
1162 to the clustered index record - one via the ref member, and the
1163 other via the persistent cursor.  These two references must match
1164 each other if the found_clust flag is set.
1165 @return true if the stored copy of persistent cursor is consistent
1166 with the ref member.*/
1167 bool
validate_pcur()1168 purge_node_t::validate_pcur()
1169 {
1170 	if (!found_clust) {
1171 		return(true);
1172 	}
1173 
1174 	if (index == NULL) {
1175 		return(true);
1176 	}
1177 
1178 	if (index->type == DICT_FTS) {
1179 		return(true);
1180 	}
1181 
1182 	if (!pcur.old_stored) {
1183 		return(true);
1184 	}
1185 
1186 	dict_index_t*	clust_index = pcur.btr_cur.index;
1187 
1188 	ulint*	offsets = rec_get_offsets(
1189 		pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
1190 
1191 	/* Here we are comparing the purge ref record and the stored initial
1192 	part in persistent cursor. Both cases we store n_uniq fields of the
1193 	cluster index and so it is fine to do the comparison. We note this
1194 	dependency here as pcur and ref belong to different modules. */
1195 	int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
1196 
1197 	if (st != 0) {
1198 		ib::error() << "Purge node pcur validation failed";
1199 		ib::error() << rec_printer(ref).str();
1200 		ib::error() << rec_printer(pcur.old_rec, offsets).str();
1201 		return(false);
1202 	}
1203 
1204 	return(true);
1205 }
1206 #endif /* UNIV_DEBUG */
1207