1 /*****************************************************************************
2
3 Copyright (c) 1997, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file row/row0purge.cc
29 Purge obsolete records
30
31 Created 3/14/1997 Heikki Tuuri
32 *******************************************************/
33
34 #include <debug_sync.h>
35
36 #include "row0purge.h"
37
38 #ifdef UNIV_NONINL
39 #include "row0purge.ic"
40 #endif
41
42 #include "fsp0fsp.h"
43 #include "mach0data.h"
44 #include "trx0rseg.h"
45 #include "trx0trx.h"
46 #include "trx0roll.h"
47 #include "trx0undo.h"
48 #include "trx0purge.h"
49 #include "trx0rec.h"
50 #include "que0que.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "row0vers.h"
54 #include "row0mysql.h"
55 #include "row0log.h"
56 #include "log0log.h"
57 #include "srv0mon.h"
58 #include "srv0start.h"
59 #include "handler.h"
60 #include "ha_innodb.h"
61 #include "fil0fil.h"
62
63 /*************************************************************************
64 IMPORTANT NOTE: Any operation that generates redo MUST check that there
65 is enough space in the redo log before for that operation. This is
66 done by calling log_free_check(). The reason for checking the
67 availability of the redo log space before the start of the operation is
68 that we MUST not hold any synchonization objects when performing the
69 check.
70 If you make a change in this module make sure that no codepath is
71 introduced where a call to log_free_check() is bypassed. */
72
73 /** Create a purge node to a query graph.
74 @param[in] parent parent node, i.e., a thr node
75 @param[in] heap memory heap where created
76 @return own: purge node */
77 purge_node_t*
row_purge_node_create(que_thr_t * parent,mem_heap_t * heap)78 row_purge_node_create(
79 que_thr_t* parent,
80 mem_heap_t* heap)
81 {
82 purge_node_t* node;
83
84 ut_ad(parent != NULL);
85 ut_ad(heap != NULL);
86
87 node = static_cast<purge_node_t*>(
88 mem_heap_zalloc(heap, sizeof(*node)));
89
90 node->common.type = QUE_NODE_PURGE;
91 node->common.parent = parent;
92 node->done = TRUE;
93 node->heap = mem_heap_create(256);
94
95 return(node);
96 }
97
98 /***********************************************************//**
99 Repositions the pcur in the purge node on the clustered index record,
100 if found. If the record is not found, close pcur.
101 @return TRUE if the record was found */
102 static
103 ibool
row_purge_reposition_pcur(ulint mode,purge_node_t * node,mtr_t * mtr)104 row_purge_reposition_pcur(
105 /*======================*/
106 ulint mode, /*!< in: latching mode */
107 purge_node_t* node, /*!< in: row purge node */
108 mtr_t* mtr) /*!< in: mtr */
109 {
110 if (node->found_clust) {
111 ut_ad(node->validate_pcur());
112
113 node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr);
114
115 } else {
116 node->found_clust = row_search_on_row_ref(
117 &node->pcur, mode, node->table, node->ref, mtr);
118
119 if (node->found_clust) {
120 btr_pcur_store_position(&node->pcur, mtr);
121 }
122 }
123
124 /* Close the current cursor if we fail to position it correctly. */
125 if (!node->found_clust) {
126 btr_pcur_close(&node->pcur);
127 }
128
129 return(node->found_clust);
130 }
131
132 /***********************************************************//**
133 Removes a delete marked clustered index record if possible.
134 @retval true if the row was not found, or it was successfully removed
135 @retval false if the row was modified after the delete marking */
136 static MY_ATTRIBUTE((nonnull, warn_unused_result))
137 bool
row_purge_remove_clust_if_poss_low(purge_node_t * node,ulint mode)138 row_purge_remove_clust_if_poss_low(
139 /*===============================*/
140 purge_node_t* node, /*!< in/out: row purge node */
141 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
142 {
143 dict_index_t* index;
144 bool success = true;
145 mtr_t mtr;
146 rec_t* rec;
147 mem_heap_t* heap = NULL;
148 ulint* offsets;
149 ulint offsets_[REC_OFFS_NORMAL_SIZE];
150 rec_offs_init(offsets_);
151
152 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
153
154 index = dict_table_get_first_index(node->table);
155
156 log_free_check();
157 mtr_start(&mtr);
158 mtr.set_named_space(index->space);
159
160 if (!row_purge_reposition_pcur(mode, node, &mtr)) {
161 /* The record was already removed. */
162 goto func_exit;
163 }
164
165 rec = btr_pcur_get_rec(&node->pcur);
166
167 offsets = rec_get_offsets(
168 rec, index, offsets_, ULINT_UNDEFINED, &heap);
169
170 if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
171 /* Someone else has modified the record later: do not remove */
172 goto func_exit;
173 }
174
175 ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
176
177 if (mode == BTR_MODIFY_LEAF) {
178 success = btr_cur_optimistic_delete(
179 btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
180 } else {
181 dberr_t err;
182 ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
183
184 DBUG_EXECUTE_IF("pessimistic_row_purge_clust", {
185 const char act[] =
186 "now SIGNAL pessimistic_row_purge_clust_pause "
187 "WAIT_FOR pessimistic_row_purge_clust_continue";
188 assert(opt_debug_sync_timeout > 0);
189 assert(!debug_sync_set_action(
190 current_thd, STRING_WITH_LEN(act)));
191 });
192
193 btr_cur_pessimistic_delete(
194 &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
195 false, &mtr);
196
197 switch (err) {
198 case DB_SUCCESS:
199 break;
200 case DB_OUT_OF_FILE_SPACE:
201 success = false;
202 break;
203 default:
204 ut_error;
205 }
206 }
207
208 func_exit:
209 if (heap) {
210 mem_heap_free(heap);
211 }
212
213 /* Persistent cursor is closed if reposition fails. */
214 if (node->found_clust) {
215 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
216 } else {
217 mtr_commit(&mtr);
218 }
219
220 return(success);
221 }
222
223 /***********************************************************//**
224 Removes a clustered index record if it has not been modified after the delete
225 marking.
226 @retval true if the row was not found, or it was successfully removed
227 @retval false the purge needs to be suspended because of running out
228 of file space. */
229 static MY_ATTRIBUTE((nonnull, warn_unused_result))
230 bool
row_purge_remove_clust_if_poss(purge_node_t * node)231 row_purge_remove_clust_if_poss(
232 /*===========================*/
233 purge_node_t* node) /*!< in/out: row purge node */
234 {
235 if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
236 return(true);
237 }
238
239 for (ulint n_tries = 0;
240 n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
241 n_tries++) {
242 if (row_purge_remove_clust_if_poss_low(
243 node, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE)) {
244 return(true);
245 }
246
247 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
248 }
249
250 return(false);
251 }
252
253 /***********************************************************//**
254 Determines if it is possible to remove a secondary index entry.
255 Removal is possible if the secondary index entry does not refer to any
256 not delete marked version of a clustered index record where DB_TRX_ID
257 is newer than the purge view.
258
259 NOTE: This function should only be called by the purge thread, only
260 while holding a latch on the leaf page of the secondary index entry
261 (or keeping the buffer pool watch on the page). It is possible that
262 this function first returns true and then false, if a user transaction
263 inserts a record that the secondary index entry would refer to.
264 However, in that case, the user transaction would also re-insert the
265 secondary index entry after purge has removed it and released the leaf
266 page latch.
267 @return true if the secondary index record can be purged */
268 bool
row_purge_poss_sec(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)269 row_purge_poss_sec(
270 /*===============*/
271 purge_node_t* node, /*!< in/out: row purge node */
272 dict_index_t* index, /*!< in: secondary index */
273 const dtuple_t* entry) /*!< in: secondary index entry */
274 {
275 bool can_delete;
276 mtr_t mtr;
277
278 ut_ad(!dict_index_is_clust(index));
279 mtr_start(&mtr);
280
281 can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
282 || !row_vers_old_has_index_entry(TRUE,
283 btr_pcur_get_rec(&node->pcur),
284 &mtr, index, entry,
285 node->roll_ptr, node->trx_id);
286
287 /* Persistent cursor is closed if reposition fails. */
288 if (node->found_clust) {
289 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
290 } else {
291 mtr_commit(&mtr);
292 }
293
294 return(can_delete);
295 }
296
297 /***************************************************************
298 Removes a secondary index entry if possible, by modifying the
299 index tree. Does not try to buffer the delete.
300 @return TRUE if success or if not found */
301 static MY_ATTRIBUTE((nonnull, warn_unused_result))
302 ibool
row_purge_remove_sec_if_poss_tree(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)303 row_purge_remove_sec_if_poss_tree(
304 /*==============================*/
305 purge_node_t* node, /*!< in: row purge node */
306 dict_index_t* index, /*!< in: index */
307 const dtuple_t* entry) /*!< in: index entry */
308 {
309 btr_pcur_t pcur;
310 btr_cur_t* btr_cur;
311 ibool success = TRUE;
312 dberr_t err;
313 mtr_t mtr;
314 enum row_search_result search_result;
315
316 log_free_check();
317 mtr_start(&mtr);
318 mtr.set_named_space(index->space);
319
320 if (!index->is_committed()) {
321 /* The index->online_status may change if the index is
322 or was being created online, but not committed yet. It
323 is protected by index->lock. */
324 mtr_sx_lock(dict_index_get_lock(index), &mtr);
325
326 if (dict_index_is_online_ddl(index)) {
327 /* Online secondary index creation will not
328 copy any delete-marked records. Therefore
329 there is nothing to be purged. We must also
330 skip the purge when a completed index is
331 dropped by rollback_inplace_alter_table(). */
332 goto func_exit_no_pcur;
333 }
334 } else {
335 /* For secondary indexes,
336 index->online_status==ONLINE_INDEX_COMPLETE if
337 index->is_committed(). */
338 ut_ad(!dict_index_is_online_ddl(index));
339 }
340
341 search_result = row_search_index_entry(
342 index, entry,
343 BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
344 &pcur, &mtr);
345
346 switch (search_result) {
347 case ROW_NOT_FOUND:
348 /* Not found. This is a legitimate condition. In a
349 rollback, InnoDB will remove secondary recs that would
350 be purged anyway. Then the actual purge will not find
351 the secondary index record. Also, the purge itself is
352 eager: if it comes to consider a secondary index
353 record, and notices it does not need to exist in the
354 index, it will remove it. Then if/when the purge
355 comes to consider the secondary index record a second
356 time, it will not exist any more in the index. */
357
358 /* fputs("PURGE:........sec entry not found\n", stderr); */
359 /* dtuple_print(stderr, entry); */
360 goto func_exit;
361 case ROW_FOUND:
362 break;
363 case ROW_BUFFERED:
364 case ROW_NOT_DELETED_REF:
365 /* These are invalid outcomes, because the mode passed
366 to row_search_index_entry() did not include any of the
367 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
368 ut_error;
369 }
370
371 btr_cur = btr_pcur_get_btr_cur(&pcur);
372
373 /* We should remove the index record if no later version of the row,
374 which cannot be purged yet, requires its existence. If some requires,
375 we should do nothing. */
376
377 if (row_purge_poss_sec(node, index, entry)) {
378 /* Remove the index record, which should have been
379 marked for deletion. */
380 if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
381 dict_table_is_comp(index->table))) {
382 ib::error()
383 << "tried to purge non-delete-marked record"
384 " in index " << index->name
385 << " of table " << index->table->name
386 << ": tuple: " << *entry
387 << ", record: " << rec_index_print(
388 btr_cur_get_rec(btr_cur), index);
389
390 ut_ad(0);
391
392 goto func_exit;
393 }
394
395 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
396 false, &mtr);
397 switch (UNIV_EXPECT(err, DB_SUCCESS)) {
398 case DB_SUCCESS:
399 break;
400 case DB_OUT_OF_FILE_SPACE:
401 success = FALSE;
402 break;
403 default:
404 ut_error;
405 }
406 }
407
408 func_exit:
409 btr_pcur_close(&pcur);
410 func_exit_no_pcur:
411 mtr_commit(&mtr);
412
413 return(success);
414 }
415
416 /***************************************************************
417 Removes a secondary index entry without modifying the index tree,
418 if possible.
419 @retval true if success or if not found
420 @retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
421 static MY_ATTRIBUTE((nonnull, warn_unused_result))
422 bool
row_purge_remove_sec_if_poss_leaf(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)423 row_purge_remove_sec_if_poss_leaf(
424 /*==============================*/
425 purge_node_t* node, /*!< in: row purge node */
426 dict_index_t* index, /*!< in: index */
427 const dtuple_t* entry) /*!< in: index entry */
428 {
429 mtr_t mtr;
430 btr_pcur_t pcur;
431 ulint mode;
432 enum row_search_result search_result;
433 bool success = true;
434
435 log_free_check();
436
437 mtr_start(&mtr);
438 mtr.set_named_space(index->space);
439
440 if (!index->is_committed()) {
441 /* For uncommitted spatial index, we also skip the purge. */
442 if (dict_index_is_spatial(index)) {
443 goto func_exit_no_pcur;
444 }
445
446 /* The index->online_status may change if the the
447 index is or was being created online, but not
448 committed yet. It is protected by index->lock. */
449 mtr_s_lock(dict_index_get_lock(index), &mtr);
450
451 if (dict_index_is_online_ddl(index)) {
452 /* Online secondary index creation will not
453 copy any delete-marked records. Therefore
454 there is nothing to be purged. We must also
455 skip the purge when a completed index is
456 dropped by rollback_inplace_alter_table(). */
457 goto func_exit_no_pcur;
458 }
459
460 /* Change buffering is disabled for temporary tables. */
461 mode = (dict_table_is_temporary(index->table))
462 ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
463 : BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
464 | BTR_DELETE;
465 } else {
466 /* For secondary indexes,
467 index->online_status==ONLINE_INDEX_COMPLETE if
468 index->is_committed(). */
469 ut_ad(!dict_index_is_online_ddl(index));
470
471 /* Change buffering is disabled for temporary tables
472 and spatial index. */
473 mode = (dict_table_is_temporary(index->table)
474 || dict_index_is_spatial(index))
475 ? BTR_MODIFY_LEAF
476 : BTR_MODIFY_LEAF | BTR_DELETE;
477 }
478
479 /* Set the purge node for the call to row_purge_poss_sec(). */
480 pcur.btr_cur.purge_node = node;
481 if (dict_index_is_spatial(index)) {
482 rw_lock_sx_lock(dict_index_get_lock(index));
483 pcur.btr_cur.thr = NULL;
484 } else {
485 /* Set the query thread, so that ibuf_insert_low() will be
486 able to invoke thd_get_trx(). */
487 pcur.btr_cur.thr = static_cast<que_thr_t*>(
488 que_node_get_parent(node));
489 }
490
491 search_result = row_search_index_entry(
492 index, entry, mode, &pcur, &mtr);
493
494 if (dict_index_is_spatial(index)) {
495 rw_lock_sx_unlock(dict_index_get_lock(index));
496 }
497
498 switch (search_result) {
499 case ROW_FOUND:
500 /* Before attempting to purge a record, check
501 if it is safe to do so. */
502 if (row_purge_poss_sec(node, index, entry)) {
503 btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
504
505 /* Only delete-marked records should be purged. */
506 if (!rec_get_deleted_flag(
507 btr_cur_get_rec(btr_cur),
508 dict_table_is_comp(index->table))) {
509
510 ib::error()
511 << "tried to purge non-delete-marked"
512 " record" " in index " << index->name
513 << " of table " << index->table->name
514 << ": tuple: " << *entry
515 << ", record: "
516 << rec_index_print(
517 btr_cur_get_rec(btr_cur),
518 index);
519 ut_ad(0);
520
521 btr_pcur_close(&pcur);
522
523 goto func_exit_no_pcur;
524 }
525
526 if (dict_index_is_spatial(index)) {
527 const page_t* page;
528 const trx_t* trx = NULL;
529
530 if (btr_cur->rtr_info != NULL
531 && btr_cur->rtr_info->thr != NULL) {
532 trx = thr_get_trx(
533 btr_cur->rtr_info->thr);
534 }
535
536 page = btr_cur_get_page(btr_cur);
537
538 if (!lock_test_prdt_page_lock(
539 trx,
540 page_get_space_id(page),
541 page_get_page_no(page))
542 && page_get_n_recs(page) < 2
543 && page_get_page_no(page) !=
544 dict_index_get_page(index)) {
545 /* this is the last record on page,
546 and it has a "page" lock on it,
547 which mean search is still depending
548 on it, so do not delete */
549 #ifdef UNIV_DEBUG
550 ib::info() << "skip purging last"
551 " record on page "
552 << page_get_page_no(page)
553 << ".";
554 #endif /* UNIV_DEBUG */
555
556 btr_pcur_close(&pcur);
557 mtr_commit(&mtr);
558 return(success);
559 }
560 }
561
562 if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
563
564 /* The index entry could not be deleted. */
565 success = false;
566 }
567 }
568 /* fall through (the index entry is still needed,
569 or the deletion succeeded) */
570 case ROW_NOT_DELETED_REF:
571 /* The index entry is still needed. */
572 case ROW_BUFFERED:
573 /* The deletion was buffered. */
574 case ROW_NOT_FOUND:
575 /* The index entry does not exist, nothing to do. */
576 btr_pcur_close(&pcur);
577 func_exit_no_pcur:
578 mtr_commit(&mtr);
579 return(success);
580 }
581
582 ut_error;
583 return(false);
584 }
585
586 /***********************************************************//**
587 Removes a secondary index entry if possible. */
588 UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2)))
589 void
row_purge_remove_sec_if_poss(purge_node_t * node,dict_index_t * index,const dtuple_t * entry)590 row_purge_remove_sec_if_poss(
591 /*=========================*/
592 purge_node_t* node, /*!< in: row purge node */
593 dict_index_t* index, /*!< in: index */
594 const dtuple_t* entry) /*!< in: index entry */
595 {
596 ibool success;
597 ulint n_tries = 0;
598
599 /* fputs("Purge: Removing secondary record\n", stderr); */
600
601 if (!entry) {
602 /* The node->row must have lacked some fields of this
603 index. This is possible when the undo log record was
604 written before this index was created. */
605 return;
606 }
607
608 if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
609
610 return;
611 }
612 retry:
613 success = row_purge_remove_sec_if_poss_tree(node, index, entry);
614 /* The delete operation may fail if we have little
615 file space left: TODO: easiest to crash the database
616 and restart with more file space */
617
618 if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
619
620 n_tries++;
621
622 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
623
624 goto retry;
625 }
626
627 ut_a(success);
628 }
629
630 /** Skip uncommitted virtual indexes on newly added virtual column.
631 @param[in,out] index dict index object */
632 static
633 inline
634 void
row_purge_skip_uncommitted_virtual_index(dict_index_t * & index)635 row_purge_skip_uncommitted_virtual_index(
636 dict_index_t*& index)
637 {
638 /* We need to skip virtual indexes which is not
639 committed yet. It's safe because these indexes are
640 newly created by alter table, and because we do
641 not support LOCK=NONE when adding an index on newly
642 added virtual column.*/
643 while (index != NULL && dict_index_has_virtual(index)
644 && !index->is_committed() && index->has_new_v_col) {
645 index = dict_table_get_next_index(index);
646 }
647 }
648
649 /***********************************************************//**
650 Purges a delete marking of a record.
651 @retval true if the row was not found, or it was successfully removed
652 @retval false the purge needs to be suspended because of
653 running out of file space */
654 static MY_ATTRIBUTE((nonnull, warn_unused_result))
655 bool
row_purge_del_mark(purge_node_t * node)656 row_purge_del_mark(
657 /*===============*/
658 purge_node_t* node) /*!< in/out: row purge node */
659 {
660 mem_heap_t* heap;
661
662 heap = mem_heap_create(1024);
663
664 while (node->index != NULL) {
665 /* skip corrupted secondary index */
666 dict_table_skip_corrupt_index(node->index);
667
668 row_purge_skip_uncommitted_virtual_index(node->index);
669
670 if (!node->index) {
671 break;
672 }
673
674 if (node->index->type != DICT_FTS) {
675 dtuple_t* entry = row_build_index_entry_low(
676 node->row, NULL, node->index,
677 heap, ROW_BUILD_FOR_PURGE);
678 row_purge_remove_sec_if_poss(node, node->index, entry);
679 mem_heap_empty(heap);
680 }
681
682 node->index = dict_table_get_next_index(node->index);
683 }
684
685 mem_heap_free(heap);
686
687 return(row_purge_remove_clust_if_poss(node));
688 }
689
690 /***********************************************************//**
691 Purges an update of an existing record. Also purges an update of a delete
692 marked record if that record contained an externally stored field. */
693 static
694 void
row_purge_upd_exist_or_extern_func(const que_thr_t * thr,purge_node_t * node,trx_undo_rec_t * undo_rec)695 row_purge_upd_exist_or_extern_func(
696 /*===============================*/
697 #ifdef UNIV_DEBUG
698 const que_thr_t*thr, /*!< in: query thread */
699 #endif /* UNIV_DEBUG */
700 purge_node_t* node, /*!< in: row purge node */
701 trx_undo_rec_t* undo_rec) /*!< in: record to purge */
702 {
703 mem_heap_t* heap;
704
705 ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S));
706
707 if (node->rec_type == TRX_UNDO_UPD_DEL_REC
708 || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
709
710 goto skip_secondaries;
711 }
712
713 heap = mem_heap_create(1024);
714
715 while (node->index != NULL) {
716 dict_table_skip_corrupt_index(node->index);
717
718 row_purge_skip_uncommitted_virtual_index(node->index);
719
720 if (!node->index) {
721 break;
722 }
723
724 if (row_upd_changes_ord_field_binary(node->index, node->update,
725 thr, NULL, NULL)) {
726 /* Build the older version of the index entry */
727 dtuple_t* entry = row_build_index_entry_low(
728 node->row, NULL, node->index,
729 heap, ROW_BUILD_FOR_PURGE);
730 row_purge_remove_sec_if_poss(node, node->index, entry);
731 mem_heap_empty(heap);
732 }
733
734 node->index = dict_table_get_next_index(node->index);
735 }
736
737 mem_heap_free(heap);
738
739 skip_secondaries:
740 /* Free possible externally stored fields */
741 for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
742
743 const upd_field_t* ufield
744 = upd_get_nth_field(node->update, i);
745
746 if (dfield_is_ext(&ufield->new_val)) {
747 trx_rseg_t* rseg;
748 buf_block_t* block;
749 ulint internal_offset;
750 byte* data_field;
751 dict_index_t* index;
752 ibool is_insert;
753 ulint rseg_id;
754 ulint page_no;
755 ulint offset;
756 mtr_t mtr;
757
758 /* We use the fact that new_val points to
759 undo_rec and get thus the offset of
760 dfield data inside the undo record. Then we
761 can calculate from node->roll_ptr the file
762 address of the new_val data */
763
764 internal_offset
765 = ((const byte*)
766 dfield_get_data(&ufield->new_val))
767 - undo_rec;
768
769 ut_a(internal_offset < UNIV_PAGE_SIZE);
770
771 trx_undo_decode_roll_ptr(node->roll_ptr,
772 &is_insert, &rseg_id,
773 &page_no, &offset);
774
775 /* If table is temp then it can't have its undo log
776 residing in rollback segment with REDO log enabled. */
777 bool is_redo_rseg =
778 dict_table_is_temporary(node->table)
779 ? false : true;
780 rseg = trx_sys_get_nth_rseg(
781 trx_sys, rseg_id, is_redo_rseg);
782
783 ut_a(rseg != NULL);
784 ut_a(rseg->id == rseg_id);
785
786 mtr_start(&mtr);
787
788 /* We have to acquire an SX-latch to the clustered
789 index tree (exclude other tree changes) */
790
791 index = dict_table_get_first_index(node->table);
792 mtr_sx_lock(dict_index_get_lock(index), &mtr);
793
794 mtr.set_named_space(index->space);
795
796 /* NOTE: we must also acquire an X-latch to the
797 root page of the tree. We will need it when we
798 free pages from the tree. If the tree is of height 1,
799 the tree X-latch does NOT protect the root page,
800 because it is also a leaf page. Since we will have a
801 latch on an undo log page, we would break the
802 latching order if we would only later latch the
803 root page of such a tree! */
804
805 btr_root_get(index, &mtr);
806
807 block = buf_page_get(
808 page_id_t(rseg->space, page_no),
809 univ_page_size, RW_X_LATCH, &mtr);
810
811 buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
812
813 data_field = buf_block_get_frame(block)
814 + offset + internal_offset;
815
816 ut_a(dfield_get_len(&ufield->new_val)
817 >= BTR_EXTERN_FIELD_REF_SIZE);
818 btr_free_externally_stored_field(
819 index,
820 data_field + dfield_get_len(&ufield->new_val)
821 - BTR_EXTERN_FIELD_REF_SIZE,
822 NULL, NULL, NULL, 0, false, &mtr);
823 mtr_commit(&mtr);
824 }
825 }
826 }
827
828 #ifdef UNIV_DEBUG
829 # define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
830 row_purge_upd_exist_or_extern_func(thr,node,undo_rec)
831 #else /* UNIV_DEBUG */
832 # define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
833 row_purge_upd_exist_or_extern_func(node,undo_rec)
834 #endif /* UNIV_DEBUG */
835
836 /***********************************************************//**
837 Parses the row reference and other info in a modify undo log record.
838 @return true if purge operation required */
839 static
840 bool
row_purge_parse_undo_rec(purge_node_t * node,trx_undo_rec_t * undo_rec,bool * updated_extern,que_thr_t * thr)841 row_purge_parse_undo_rec(
842 /*=====================*/
843 purge_node_t* node, /*!< in: row undo node */
844 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
845 bool* updated_extern, /*!< out: true if an externally
846 stored field was updated */
847 que_thr_t* thr) /*!< in: query thread */
848 {
849 dict_index_t* clust_index;
850 byte* ptr;
851 trx_t* trx;
852 undo_no_t undo_no;
853 table_id_t table_id;
854 trx_id_t trx_id;
855 roll_ptr_t roll_ptr;
856 ulint info_bits;
857 ulint type;
858
859 ut_ad(node != NULL);
860 ut_ad(thr != NULL);
861
862 ptr = trx_undo_rec_get_pars(
863 undo_rec, &type, &node->cmpl_info,
864 updated_extern, &undo_no, &table_id);
865
866 node->rec_type = type;
867
868 if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
869
870 return(false);
871 }
872
873 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
874 &info_bits);
875 node->table = NULL;
876 node->trx_id = trx_id;
877
878 /* Prevent DROP TABLE etc. from running when we are doing the purge
879 for this row */
880
881 try_again:
882 rw_lock_s_lock_inline(dict_operation_lock, 0, __FILE__, __LINE__);
883
884 node->table = dict_table_open_on_id(
885 table_id, FALSE, DICT_TABLE_OP_NORMAL);
886
887 if (node->table == NULL) {
888 /* The table has been dropped: no need to do purge */
889 goto err_exit;
890 }
891
892 if (fil_space_is_being_truncated(node->table->space)) {
893
894 #if UNIV_DEBUG
895 ib::info() << "Record with space id "
896 << node->table->space
897 << " belongs to table which is being truncated"
898 << " therefore skipping this undo record.";
899 #endif
900 ut_ad(dict_table_is_file_per_table(node->table));
901 dict_table_close(node->table, FALSE, FALSE);
902 node->table = NULL;
903 goto err_exit;
904 }
905
906 if (node->table->n_v_cols && !node->table->vc_templ
907 && dict_table_has_indexed_v_cols(node->table)) {
908 /* Need server fully up for virtual column computation */
909 if (!mysqld_server_started) {
910
911 dict_table_close(node->table, FALSE, FALSE);
912 rw_lock_s_unlock(dict_operation_lock);
913 if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
914 return(false);
915 }
916 os_thread_sleep(1000000);
917 goto try_again;
918 }
919
920 /* Initialize the template for the table */
921 innobase_init_vc_templ(node->table);
922 }
923
924 /* Disable purging for temp-tables as they are short-lived
925 and no point in re-organzing such short lived tables */
926 if (dict_table_is_temporary(node->table)) {
927 goto close_exit;
928 }
929
930 if (node->table->ibd_file_missing) {
931 /* We skip purge of missing .ibd files */
932
933 dict_table_close(node->table, FALSE, FALSE);
934
935 node->table = NULL;
936
937 goto err_exit;
938 }
939
940 clust_index = dict_table_get_first_index(node->table);
941
942 if (clust_index == NULL
943 || dict_index_is_corrupted(clust_index)) {
944 /* The table was corrupt in the data dictionary.
945 dict_set_corrupted() works on an index, and
946 we do not have an index to call it with. */
947 close_exit:
948 dict_table_close(node->table, FALSE, FALSE);
949 err_exit:
950 rw_lock_s_unlock(dict_operation_lock);
951 return(false);
952 }
953
954 if (type == TRX_UNDO_UPD_EXIST_REC
955 && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
956 && !*updated_extern) {
957
958 /* Purge requires no changes to indexes: we may return */
959 goto close_exit;
960 }
961
962 ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
963 node->heap);
964
965 trx = thr_get_trx(thr);
966
967 ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
968 roll_ptr, info_bits, trx,
969 node->heap, &(node->update));
970
971 /* Read to the partial row the fields that occur in indexes */
972
973 if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
974 ptr = trx_undo_rec_get_partial_row(
975 ptr, clust_index, &node->row,
976 type == TRX_UNDO_UPD_DEL_REC,
977 node->heap);
978 }
979
980 return(true);
981 }
982
983 /***********************************************************//**
984 Purges the parsed record.
985 @return true if purged, false if skipped */
986 static MY_ATTRIBUTE((nonnull, warn_unused_result))
987 bool
row_purge_record_func(purge_node_t * node,trx_undo_rec_t * undo_rec,const que_thr_t * thr,bool updated_extern)988 row_purge_record_func(
989 /*==================*/
990 purge_node_t* node, /*!< in: row purge node */
991 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
992 #ifdef UNIV_DEBUG
993 const que_thr_t*thr, /*!< in: query thread */
994 #endif /* UNIV_DEBUG */
995 bool updated_extern) /*!< in: whether external columns
996 were updated */
997 {
998 dict_index_t* clust_index;
999 bool purged = true;
1000
1001 ut_ad(!node->found_clust);
1002
1003 clust_index = dict_table_get_first_index(node->table);
1004
1005 node->index = dict_table_get_next_index(clust_index);
1006 ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1007
1008 switch (node->rec_type) {
1009 case TRX_UNDO_DEL_MARK_REC:
1010 purged = row_purge_del_mark(node);
1011 if (!purged) {
1012 break;
1013 }
1014 MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
1015 break;
1016 default:
1017 if (!updated_extern) {
1018 break;
1019 }
1020 /* fall through */
1021 case TRX_UNDO_UPD_EXIST_REC:
1022 row_purge_upd_exist_or_extern(thr, node, undo_rec);
1023 MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
1024 break;
1025 }
1026
1027 if (node->found_clust) {
1028 btr_pcur_close(&node->pcur);
1029 node->found_clust = FALSE;
1030 }
1031
1032 if (node->table != NULL) {
1033 dict_table_close(node->table, FALSE, FALSE);
1034 node->table = NULL;
1035 }
1036
1037 return(purged);
1038 }
1039
1040 #ifdef UNIV_DEBUG
1041 # define row_purge_record(node,undo_rec,thr,updated_extern) \
1042 row_purge_record_func(node,undo_rec,thr,updated_extern)
1043 #else /* UNIV_DEBUG */
1044 # define row_purge_record(node,undo_rec,thr,updated_extern) \
1045 row_purge_record_func(node,undo_rec,updated_extern)
1046 #endif /* UNIV_DEBUG */
1047
1048 /***********************************************************//**
1049 Fetches an undo log record and does the purge for the recorded operation.
1050 If none left, or the current purge completed, returns the control to the
1051 parent node, which is always a query thread node. */
1052 static MY_ATTRIBUTE((nonnull))
1053 void
row_purge(purge_node_t * node,trx_undo_rec_t * undo_rec,que_thr_t * thr)1054 row_purge(
1055 /*======*/
1056 purge_node_t* node, /*!< in: row purge node */
1057 trx_undo_rec_t* undo_rec, /*!< in: record to purge */
1058 que_thr_t* thr) /*!< in: query thread */
1059 {
1060 if (undo_rec != &trx_purge_dummy_rec) {
1061 bool updated_extern;
1062
1063 while (row_purge_parse_undo_rec(
1064 node, undo_rec, &updated_extern, thr)) {
1065
1066 bool purged = row_purge_record(
1067 node, undo_rec, thr, updated_extern);
1068
1069 rw_lock_s_unlock(dict_operation_lock);
1070
1071 if (purged
1072 || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
1073 return;
1074 }
1075
1076 /* Retry the purge in a second. */
1077 os_thread_sleep(1000000);
1078 }
1079 }
1080 }
1081
1082 /***********************************************************//**
1083 Reset the purge query thread. */
1084 UNIV_INLINE
1085 void
row_purge_end(que_thr_t * thr)1086 row_purge_end(
1087 /*==========*/
1088 que_thr_t* thr) /*!< in: query thread */
1089 {
1090 purge_node_t* node;
1091
1092 ut_ad(thr);
1093
1094 node = static_cast<purge_node_t*>(thr->run_node);
1095
1096 ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1097
1098 thr->run_node = que_node_get_parent(node);
1099
1100 node->undo_recs = NULL;
1101
1102 node->done = TRUE;
1103
1104 ut_a(thr->run_node != NULL);
1105
1106 mem_heap_empty(node->heap);
1107 }
1108
1109 /***********************************************************//**
1110 Does the purge operation for a single undo log record. This is a high-level
1111 function used in an SQL execution graph.
1112 @return query thread to run next or NULL */
1113 que_thr_t*
row_purge_step(que_thr_t * thr)1114 row_purge_step(
1115 /*===========*/
1116 que_thr_t* thr) /*!< in: query thread */
1117 {
1118 purge_node_t* node;
1119
1120 ut_ad(thr);
1121
1122 node = static_cast<purge_node_t*>(thr->run_node);
1123
1124 node->table = NULL;
1125 node->row = NULL;
1126 node->ref = NULL;
1127 node->index = NULL;
1128 node->update = NULL;
1129 node->found_clust = FALSE;
1130 node->rec_type = ULINT_UNDEFINED;
1131 node->cmpl_info = ULINT_UNDEFINED;
1132
1133 ut_a(!node->done);
1134
1135 ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
1136
1137 if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) {
1138 trx_purge_rec_t*purge_rec;
1139
1140 purge_rec = static_cast<trx_purge_rec_t*>(
1141 ib_vector_pop(node->undo_recs));
1142
1143 node->roll_ptr = purge_rec->roll_ptr;
1144
1145 row_purge(node, purge_rec->undo_rec, thr);
1146
1147 if (ib_vector_is_empty(node->undo_recs)) {
1148 row_purge_end(thr);
1149 } else {
1150 thr->run_node = node;
1151 }
1152 } else {
1153 row_purge_end(thr);
1154 }
1155
1156 return(thr);
1157 }
1158
1159 #ifdef UNIV_DEBUG
1160 /***********************************************************//**
1161 Validate the persisent cursor. The purge node has two references
1162 to the clustered index record - one via the ref member, and the
1163 other via the persistent cursor. These two references must match
1164 each other if the found_clust flag is set.
1165 @return true if the stored copy of persistent cursor is consistent
1166 with the ref member.*/
1167 bool
validate_pcur()1168 purge_node_t::validate_pcur()
1169 {
1170 if (!found_clust) {
1171 return(true);
1172 }
1173
1174 if (index == NULL) {
1175 return(true);
1176 }
1177
1178 if (index->type == DICT_FTS) {
1179 return(true);
1180 }
1181
1182 if (!pcur.old_stored) {
1183 return(true);
1184 }
1185
1186 dict_index_t* clust_index = pcur.btr_cur.index;
1187
1188 ulint* offsets = rec_get_offsets(
1189 pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
1190
1191 /* Here we are comparing the purge ref record and the stored initial
1192 part in persistent cursor. Both cases we store n_uniq fields of the
1193 cluster index and so it is fine to do the comparison. We note this
1194 dependency here as pcur and ref belong to different modules. */
1195 int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
1196
1197 if (st != 0) {
1198 ib::error() << "Purge node pcur validation failed";
1199 ib::error() << rec_printer(ref).str();
1200 ib::error() << rec_printer(pcur.old_rec, offsets).str();
1201 return(false);
1202 }
1203
1204 return(true);
1205 }
1206 #endif /* UNIV_DEBUG */
1207