1 /*****************************************************************************
2
3 Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file row/row0umod.cc
22 Undo modify of a row
23
24 Created 2/27/1997 Heikki Tuuri
25 *******************************************************/
26
27 #include "row0umod.h"
28 #include "dict0dict.h"
29 #include "dict0stats.h"
30 #include "dict0boot.h"
31 #include "trx0undo.h"
32 #include "trx0roll.h"
33 #include "trx0purge.h"
34 #include "btr0btr.h"
35 #include "mach0data.h"
36 #include "ibuf0ibuf.h"
37 #include "row0undo.h"
38 #include "row0vers.h"
39 #include "row0log.h"
40 #include "trx0trx.h"
41 #include "trx0rec.h"
42 #include "row0row.h"
43 #include "row0upd.h"
44 #include "que0que.h"
45 #include "log0log.h"
46
47 /* Considerations on undoing a modify operation.
48 (1) Undoing a delete marking: all index records should be found. Some of
49 them may have delete mark already FALSE, if the delete mark operation was
50 stopped underway, or if the undo operation ended prematurely because of a
51 system crash.
52 (2) Undoing an update of a delete unmarked record: the newer version of
53 an updated secondary index entry should be removed if no prior version
54 of the clustered index record requires its existence. Otherwise, it should
55 be delete marked.
56 (3) Undoing an update of a delete marked record. In this kind of update a
57 delete marked clustered index record was delete unmarked and possibly also
58 some of its fields were changed. Now, it is possible that the delete marked
59 version has become obsolete at the time the undo is started. */
60
61 /*************************************************************************
62 IMPORTANT NOTE: Any operation that generates redo MUST check that there
63 is enough space in the redo log before for that operation. This is
64 done by calling log_free_check(). The reason for checking the
65 availability of the redo log space before the start of the operation is
66 that we MUST not hold any synchonization objects when performing the
67 check.
68 If you make a change in this module make sure that no codepath is
69 introduced where a call to log_free_check() is bypassed. */
70
71 /***********************************************************//**
72 Undoes a modify in a clustered index record.
73 @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
74 static MY_ATTRIBUTE((nonnull, warn_unused_result))
75 dberr_t
row_undo_mod_clust_low(undo_node_t * node,rec_offs ** offsets,mem_heap_t ** offsets_heap,mem_heap_t * heap,const dtuple_t ** rebuilt_old_pk,byte * sys,que_thr_t * thr,mtr_t * mtr,ulint mode)76 row_undo_mod_clust_low(
77 /*===================*/
78 undo_node_t* node, /*!< in: row undo node */
79 rec_offs** offsets,/*!< out: rec_get_offsets() on the record */
80 mem_heap_t** offsets_heap,
81 /*!< in/out: memory heap that can be emptied */
82 mem_heap_t* heap, /*!< in/out: memory heap */
83 const dtuple_t**rebuilt_old_pk,
84 /*!< out: row_log_table_get_pk()
85 before the update, or NULL if
86 the table is not being rebuilt online or
87 the PRIMARY KEY definition does not change */
88 byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR
89 for row_log_table_delete() */
90 que_thr_t* thr, /*!< in: query thread */
91 mtr_t* mtr, /*!< in: mtr; must be committed before
92 latching any further pages */
93 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
94 {
95 btr_pcur_t* pcur;
96 btr_cur_t* btr_cur;
97 dberr_t err;
98 #ifdef UNIV_DEBUG
99 ibool success;
100 #endif /* UNIV_DEBUG */
101
102 pcur = &node->pcur;
103 btr_cur = btr_pcur_get_btr_cur(pcur);
104
105 #ifdef UNIV_DEBUG
106 success =
107 #endif /* UNIV_DEBUG */
108 btr_pcur_restore_position(mode, pcur, mtr);
109
110 ut_ad(success);
111 ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
112 btr_cur_get_index(btr_cur))
113 == thr_get_trx(thr)->id
114 || btr_cur_get_index(btr_cur)->table->is_temporary());
115 ut_ad(node->ref != &trx_undo_metadata
116 || node->update->info_bits == REC_INFO_METADATA_ADD
117 || node->update->info_bits == REC_INFO_METADATA_ALTER);
118
119 if (mode != BTR_MODIFY_LEAF
120 && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
121 *rebuilt_old_pk = row_log_table_get_pk(
122 btr_cur_get_rec(btr_cur),
123 btr_cur_get_index(btr_cur), NULL, sys, &heap);
124 } else {
125 *rebuilt_old_pk = NULL;
126 }
127
128 if (mode != BTR_MODIFY_TREE) {
129 ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
130 == BTR_MODIFY_LEAF);
131
132 err = btr_cur_optimistic_update(
133 BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
134 | BTR_KEEP_SYS_FLAG,
135 btr_cur, offsets, offsets_heap,
136 node->update, node->cmpl_info,
137 thr, thr_get_trx(thr)->id, mtr);
138 ut_ad(err != DB_SUCCESS || node->ref != &trx_undo_metadata);
139 } else {
140 big_rec_t* dummy_big_rec;
141
142 err = btr_cur_pessimistic_update(
143 BTR_NO_LOCKING_FLAG
144 | BTR_NO_UNDO_LOG_FLAG
145 | BTR_KEEP_SYS_FLAG,
146 btr_cur, offsets, offsets_heap, heap,
147 &dummy_big_rec, node->update,
148 node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
149
150 ut_a(!dummy_big_rec);
151
152 static const byte
153 INFIMUM[8] = {'i','n','f','i','m','u','m',0},
154 SUPREMUM[8] = {'s','u','p','r','e','m','u','m'};
155
156 if (err == DB_SUCCESS
157 && node->ref == &trx_undo_metadata
158 && btr_cur_get_index(btr_cur)->table->instant
159 && node->update->info_bits == REC_INFO_METADATA_ADD) {
160 if (page_t* root = btr_root_get(
161 btr_cur_get_index(btr_cur), mtr)) {
162 byte* infimum;
163 byte *supremum;
164 if (page_is_comp(root)) {
165 infimum = PAGE_NEW_INFIMUM + root;
166 supremum = PAGE_NEW_SUPREMUM + root;
167 } else {
168 infimum = PAGE_OLD_INFIMUM + root;
169 supremum = PAGE_OLD_SUPREMUM + root;
170 }
171
172 ut_ad(!memcmp(infimum, INFIMUM, 8)
173 == !memcmp(supremum, SUPREMUM, 8));
174
175 if (memcmp(infimum, INFIMUM, 8)) {
176 mlog_write_string(infimum, INFIMUM,
177 8, mtr);
178 mlog_write_string(supremum, SUPREMUM,
179 8, mtr);
180 }
181 }
182 }
183 }
184
185 if (err == DB_SUCCESS
186 && btr_cur_get_index(btr_cur)->table->id == DICT_COLUMNS_ID) {
187 /* This is rolling back an UPDATE or DELETE on SYS_COLUMNS.
188 If it was part of an instant ALTER TABLE operation, we
189 must evict the table definition, so that it can be
190 reloaded after the dictionary operation has been
191 completed. At this point, any corresponding operation
192 to the metadata record will have been rolled back. */
193 const dfield_t& table_id = *dtuple_get_nth_field(node->row, 0);
194 ut_ad(dfield_get_len(&table_id) == 8);
195 node->trx->evict_table(mach_read_from_8(static_cast<byte*>(
196 table_id.data)));
197 }
198
199 return(err);
200 }
201
202 /** Get the byte offset of the DB_TRX_ID column
203 @param[in] rec clustered index record
204 @param[in] index clustered index
205 @return the byte offset of DB_TRX_ID, from the start of rec */
row_trx_id_offset(const rec_t * rec,const dict_index_t * index)206 static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
207 {
208 ut_ad(index->n_uniq <= MAX_REF_PARTS);
209 ulint trx_id_offset = index->trx_id_offset;
210 if (!trx_id_offset) {
211 /* Reserve enough offsets for the PRIMARY KEY and 2 columns
212 so that we can access DB_TRX_ID, DB_ROLL_PTR. */
213 rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
214 rec_offs_init(offsets_);
215 mem_heap_t* heap = NULL;
216 const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
217 rec_offs* offsets = rec_get_offsets(rec, index, offsets_,
218 index->n_core_fields,
219 trx_id_pos + 1, &heap);
220 ut_ad(!heap);
221 ulint len;
222 trx_id_offset = rec_get_nth_field_offs(
223 offsets, trx_id_pos, &len);
224 ut_ad(len == DATA_TRX_ID_LEN);
225 }
226
227 return trx_id_offset;
228 }
229
230 /** Determine if rollback must execute a purge-like operation.
231 @param[in,out] node row undo
232 @param[in,out] mtr mini-transaction
233 @return whether the record should be purged */
row_undo_mod_must_purge(undo_node_t * node,mtr_t * mtr)234 static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
235 {
236 ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
237 ut_ad(!node->table->is_temporary());
238
239 btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
240 ut_ad(btr_cur->index->is_primary());
241 DEBUG_SYNC_C("rollback_purge_clust");
242
243 mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__);
244
245 if (!purge_sys.view.changes_visible(node->new_trx_id,
246 node->table->name)) {
247 return false;
248 }
249
250 const rec_t* rec = btr_cur_get_rec(btr_cur);
251
252 return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
253 == node->new_trx_id;
254 }
255
256 /***********************************************************//**
257 Undoes a modify in a clustered index record. Sets also the node state for the
258 next round of undo.
259 @return DB_SUCCESS or error code: we may run out of file space */
260 static MY_ATTRIBUTE((nonnull, warn_unused_result))
261 dberr_t
row_undo_mod_clust(undo_node_t * node,que_thr_t * thr)262 row_undo_mod_clust(
263 /*===============*/
264 undo_node_t* node, /*!< in: row undo node */
265 que_thr_t* thr) /*!< in: query thread */
266 {
267 btr_pcur_t* pcur;
268 mtr_t mtr;
269 dberr_t err;
270 dict_index_t* index;
271 bool online;
272
273 ut_ad(thr_get_trx(thr) == node->trx);
274 ut_ad(node->trx->dict_operation_lock_mode);
275 ut_ad(node->trx->in_rollback);
276 ut_ad(rw_lock_own_flagged(&dict_sys.latch,
277 RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
278
279 log_free_check();
280 pcur = &node->pcur;
281 index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
282 ut_ad(index->is_primary());
283
284 mtr.start();
285 if (index->table->is_temporary()) {
286 mtr.set_log_mode(MTR_LOG_NO_REDO);
287 } else {
288 index->set_modified(mtr);
289 ut_ad(lock_table_has_locks(index->table));
290 }
291
292 online = dict_index_is_online_ddl(index);
293 if (online) {
294 ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
295 mtr_s_lock_index(index, &mtr);
296 }
297
298 mem_heap_t* heap = mem_heap_create(1024);
299 mem_heap_t* offsets_heap = NULL;
300 rec_offs* offsets = NULL;
301 const dtuple_t* rebuilt_old_pk;
302 byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
303
304 /* Try optimistic processing of the record, keeping changes within
305 the index page */
306
307 err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
308 heap, &rebuilt_old_pk, sys,
309 thr, &mtr, online
310 ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
311 : BTR_MODIFY_LEAF);
312
313 if (err != DB_SUCCESS) {
314 btr_pcur_commit_specify_mtr(pcur, &mtr);
315
316 /* We may have to modify tree structure: do a pessimistic
317 descent down the index tree */
318
319 mtr.start();
320 if (index->table->is_temporary()) {
321 mtr.set_log_mode(MTR_LOG_NO_REDO);
322 } else {
323 index->set_modified(mtr);
324 }
325
326 err = row_undo_mod_clust_low(
327 node, &offsets, &offsets_heap,
328 heap, &rebuilt_old_pk, sys,
329 thr, &mtr, BTR_MODIFY_TREE);
330 ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
331 }
332
333 /* Online rebuild cannot be initiated while we are holding
334 dict_sys.latch and index->lock. (It can be aborted.) */
335 ut_ad(online || !dict_index_is_online_ddl(index));
336
337 if (err == DB_SUCCESS && online) {
338
339 ut_ad(rw_lock_own_flagged(
340 &index->lock,
341 RW_LOCK_FLAG_S | RW_LOCK_FLAG_X
342 | RW_LOCK_FLAG_SX));
343
344 switch (node->rec_type) {
345 case TRX_UNDO_DEL_MARK_REC:
346 row_log_table_insert(
347 btr_pcur_get_rec(pcur), index, offsets);
348 break;
349 case TRX_UNDO_UPD_EXIST_REC:
350 row_log_table_update(
351 btr_pcur_get_rec(pcur), index, offsets,
352 rebuilt_old_pk);
353 break;
354 case TRX_UNDO_UPD_DEL_REC:
355 row_log_table_delete(
356 btr_pcur_get_rec(pcur), index, offsets, sys);
357 break;
358 default:
359 ut_ad(0);
360 break;
361 }
362 }
363
364 /**
365 * when scrubbing, and records gets cleared,
366 * the transaction id is not present afterwards.
367 * this is safe as: since the record is on free-list
368 * it can be reallocated at any time after this mtr-commits
369 * which is just below
370 */
371 ut_ad(srv_immediate_scrub_data_uncompressed
372 || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets)
373 == node->new_trx_id);
374
375 btr_pcur_commit_specify_mtr(pcur, &mtr);
376 DEBUG_SYNC_C("rollback_undo_pk");
377
378 if (err != DB_SUCCESS) {
379 goto func_exit;
380 }
381
382 /* FIXME: Perform the below operations in the above
383 mini-transaction when possible. */
384
385 if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
386 /* In delete-marked records, DB_TRX_ID must
387 always refer to an existing update_undo log record. */
388 ut_ad(node->new_trx_id);
389
390 mtr.start();
391 if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
392 goto mtr_commit_exit;
393 }
394
395 if (index->table->is_temporary()) {
396 mtr.set_log_mode(MTR_LOG_NO_REDO);
397 } else {
398 if (!row_undo_mod_must_purge(node, &mtr)) {
399 goto mtr_commit_exit;
400 }
401 index->set_modified(mtr);
402 }
403
404 ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
405 dict_table_is_comp(node->table)));
406 if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
407 goto mtr_commit_exit;
408 }
409
410 btr_pcur_commit_specify_mtr(pcur, &mtr);
411
412 mtr.start();
413 if (!btr_pcur_restore_position(
414 BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
415 pcur, &mtr)) {
416 goto mtr_commit_exit;
417 }
418
419 if (index->table->is_temporary()) {
420 mtr.set_log_mode(MTR_LOG_NO_REDO);
421 } else {
422 if (!row_undo_mod_must_purge(node, &mtr)) {
423 goto mtr_commit_exit;
424 }
425 index->set_modified(mtr);
426 }
427
428 ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
429 dict_table_is_comp(node->table)));
430
431 /* This operation is analogous to purge, we can free
432 also inherited externally stored fields. We can also
433 assume that the record was complete (including BLOBs),
434 because it had been delete-marked after it had been
435 completely inserted. Therefore, we are passing
436 rollback=false, just like purge does. */
437 btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
438 false, &mtr);
439 ut_ad(err == DB_SUCCESS
440 || err == DB_OUT_OF_FILE_SPACE);
441 } else if (!index->table->is_temporary() && node->new_trx_id) {
442 /* We rolled back a record so that it still exists.
443 We must reset the DB_TRX_ID if the history is no
444 longer accessible by any active read view. */
445
446 mtr.start();
447 if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
448 goto mtr_commit_exit;
449 }
450 rec_t* rec = btr_pcur_get_rec(pcur);
451 mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__);
452 if (!purge_sys.view.changes_visible(node->new_trx_id,
453 node->table->name)) {
454 goto mtr_commit_exit;
455 }
456
457 ulint trx_id_offset = index->trx_id_offset;
458 ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
459 /* Reserve enough offsets for the PRIMARY KEY and
460 2 columns so that we can access DB_TRX_ID, DB_ROLL_PTR. */
461 rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
462 if (trx_id_offset) {
463 #ifdef UNIV_DEBUG
464 ut_ad(rec_offs_validate(NULL, index, offsets));
465 if (buf_block_get_page_zip(
466 btr_pcur_get_block(&node->pcur))) {
467 /* Below, page_zip_write_trx_id_and_roll_ptr()
468 needs offsets to access DB_TRX_ID,DB_ROLL_PTR.
469 We already computed offsets for possibly
470 another record in the clustered index.
471 Because the PRIMARY KEY is fixed-length,
472 the offsets for the PRIMARY KEY and
473 DB_TRX_ID,DB_ROLL_PTR are still valid.
474 Silence the rec_offs_validate() assertion. */
475 rec_offs_make_valid(rec, index, true, offsets);
476 }
477 #endif
478 } else if (rec_is_metadata(rec, *index)) {
479 ut_ad(!buf_block_get_page_zip(btr_pcur_get_block(
480 &node->pcur)));
481 for (unsigned i = index->first_user_field(); i--; ) {
482 trx_id_offset += index->fields[i].fixed_len;
483 }
484 } else {
485 ut_ad(index->n_uniq <= MAX_REF_PARTS);
486 rec_offs_init(offsets_);
487 offsets = rec_get_offsets(rec, index, offsets_,
488 index->n_core_fields,
489 trx_id_pos + 2, &heap);
490 ulint len;
491 trx_id_offset = rec_get_nth_field_offs(
492 offsets, trx_id_pos, &len);
493 ut_ad(len == DATA_TRX_ID_LEN);
494 }
495
496 if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
497 ut_ad(!rec_get_deleted_flag(
498 rec, dict_table_is_comp(node->table))
499 || rec_is_alter_metadata(rec, *index));
500 index->set_modified(mtr);
501 if (page_zip_des_t* page_zip = buf_block_get_page_zip(
502 btr_pcur_get_block(&node->pcur))) {
503 page_zip_write_trx_id_and_roll_ptr(
504 page_zip, rec, offsets, trx_id_pos,
505 0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
506 &mtr);
507 } else {
508 mlog_write_string(rec + trx_id_offset,
509 reset_trx_id,
510 sizeof reset_trx_id, &mtr);
511 }
512 }
513 } else {
514 goto func_exit;
515 }
516
517 mtr_commit_exit:
518 btr_pcur_commit_specify_mtr(pcur, &mtr);
519
520 func_exit:
521 if (offsets_heap) {
522 mem_heap_free(offsets_heap);
523 }
524 mem_heap_free(heap);
525 return(err);
526 }
527
528 /***********************************************************//**
529 Delete marks or removes a secondary index entry if found.
530 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
531 static MY_ATTRIBUTE((nonnull, warn_unused_result))
532 dberr_t
row_undo_mod_del_mark_or_remove_sec_low(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry,ulint mode)533 row_undo_mod_del_mark_or_remove_sec_low(
534 /*====================================*/
535 undo_node_t* node, /*!< in: row undo node */
536 que_thr_t* thr, /*!< in: query thread */
537 dict_index_t* index, /*!< in: index */
538 dtuple_t* entry, /*!< in: index entry */
539 ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
540 BTR_MODIFY_TREE */
541 {
542 btr_pcur_t pcur;
543 btr_cur_t* btr_cur;
544 ibool success;
545 dberr_t err = DB_SUCCESS;
546 mtr_t mtr;
547 mtr_t mtr_vers;
548 row_search_result search_result;
549 const bool modify_leaf = mode == BTR_MODIFY_LEAF;
550
551 row_mtr_start(&mtr, index, !modify_leaf);
552
553 if (!index->is_committed()) {
554 /* The index->online_status may change if the index is
555 or was being created online, but not committed yet. It
556 is protected by index->lock. */
557 if (modify_leaf) {
558 mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
559 mtr_s_lock_index(index, &mtr);
560 } else {
561 ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
562 mtr_sx_lock_index(index, &mtr);
563 }
564
565 if (row_log_online_op_try(index, entry, 0)) {
566 goto func_exit_no_pcur;
567 }
568 } else {
569 /* For secondary indexes,
570 index->online_status==ONLINE_INDEX_COMPLETE if
571 index->is_committed(). */
572 ut_ad(!dict_index_is_online_ddl(index));
573 }
574
575 btr_cur = btr_pcur_get_btr_cur(&pcur);
576
577 if (dict_index_is_spatial(index)) {
578 if (modify_leaf) {
579 btr_cur->thr = thr;
580 mode |= BTR_RTREE_DELETE_MARK;
581 }
582 mode |= BTR_RTREE_UNDO_INS;
583 }
584
585 search_result = row_search_index_entry(index, entry, mode,
586 &pcur, &mtr);
587
588 switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
589 case ROW_NOT_FOUND:
590 /* In crash recovery, the secondary index record may
591 be missing if the UPDATE did not have time to insert
592 the secondary index records before the crash. When we
593 are undoing that UPDATE in crash recovery, the record
594 may be missing.
595
596 In normal processing, if an update ends in a deadlock
597 before it has inserted all updated secondary index
598 records, then the undo will not find those records. */
599 goto func_exit;
600 case ROW_FOUND:
601 break;
602 case ROW_BUFFERED:
603 case ROW_NOT_DELETED_REF:
604 /* These are invalid outcomes, because the mode passed
605 to row_search_index_entry() did not include any of the
606 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
607 ut_error;
608 }
609
610 /* We should remove the index record if no prior version of the row,
611 which cannot be purged yet, requires its existence. If some requires,
612 we should delete mark the record. */
613
614 mtr_vers.start();
615
616 success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
617 &mtr_vers);
618 ut_a(success);
619
620 /* For temporary table, we can skip to check older version of
621 clustered index entry, because there is no MVCC or purge. */
622 if (node->table->is_temporary()
623 || row_vers_old_has_index_entry(
624 false, btr_pcur_get_rec(&node->pcur),
625 &mtr_vers, index, entry, 0, 0)) {
626 err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
627 btr_cur, TRUE, thr, &mtr);
628 ut_ad(err == DB_SUCCESS);
629 } else {
630 /* Remove the index record */
631
632 if (dict_index_is_spatial(index)) {
633 rec_t* rec = btr_pcur_get_rec(&pcur);
634 if (rec_get_deleted_flag(rec,
635 dict_table_is_comp(index->table))) {
636 ib::error() << "Record found in index "
637 << index->name << " is deleted marked"
638 " on rollback update.";
639 ut_ad(0);
640 }
641 }
642
643 if (modify_leaf) {
644 err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
645 ? DB_SUCCESS : DB_FAIL;
646 } else {
647 /* Passing rollback=false,
648 because we are deleting a secondary index record:
649 the distinction only matters when deleting a
650 record that contains externally stored columns. */
651 ut_ad(!index->is_primary());
652 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
653 false, &mtr);
654
655 /* The delete operation may fail if we have little
656 file space left: TODO: easiest to crash the database
657 and restart with more file space */
658 }
659 }
660
661 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
662
663 func_exit:
664 btr_pcur_close(&pcur);
665 func_exit_no_pcur:
666 mtr_commit(&mtr);
667
668 return(err);
669 }
670
671 /***********************************************************//**
672 Delete marks or removes a secondary index entry if found.
673 NOTE that if we updated the fields of a delete-marked secondary index record
674 so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
675 return to the original values because we do not know them. But this should
676 not cause problems because in row0sel.cc, in queries we always retrieve the
677 clustered index record or an earlier version of it, if the secondary index
678 record through which we do the search is delete-marked.
679 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
680 static MY_ATTRIBUTE((nonnull, warn_unused_result))
681 dberr_t
row_undo_mod_del_mark_or_remove_sec(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)682 row_undo_mod_del_mark_or_remove_sec(
683 /*================================*/
684 undo_node_t* node, /*!< in: row undo node */
685 que_thr_t* thr, /*!< in: query thread */
686 dict_index_t* index, /*!< in: index */
687 dtuple_t* entry) /*!< in: index entry */
688 {
689 dberr_t err;
690
691 err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
692 entry, BTR_MODIFY_LEAF);
693 if (err == DB_SUCCESS) {
694
695 return(err);
696 }
697
698 err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
699 entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
700 return(err);
701 }
702
703 /***********************************************************//**
704 Delete unmarks a secondary index entry which must be found. It might not be
705 delete-marked at the moment, but it does not harm to unmark it anyway. We also
706 need to update the fields of the secondary index record if we updated its
707 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
708 @retval DB_SUCCESS on success
709 @retval DB_FAIL if BTR_MODIFY_TREE should be tried
710 @retval DB_OUT_OF_FILE_SPACE when running out of tablespace
711 @retval DB_DUPLICATE_KEY if the value was missing
712 and an insert would lead to a duplicate exists */
713 static MY_ATTRIBUTE((nonnull, warn_unused_result))
714 dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(ulint mode,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)715 row_undo_mod_del_unmark_sec_and_undo_update(
716 /*========================================*/
717 ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
718 BTR_MODIFY_TREE */
719 que_thr_t* thr, /*!< in: query thread */
720 dict_index_t* index, /*!< in: index */
721 dtuple_t* entry) /*!< in: index entry */
722 {
723 btr_pcur_t pcur;
724 btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
725 upd_t* update;
726 dberr_t err = DB_SUCCESS;
727 big_rec_t* dummy_big_rec;
728 mtr_t mtr;
729 trx_t* trx = thr_get_trx(thr);
730 const ulint flags
731 = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
732 row_search_result search_result;
733 ulint orig_mode = mode;
734
735 ut_ad(trx->id != 0);
736
737 if (dict_index_is_spatial(index)) {
738 /* FIXME: Currently we do a 2-pass search for the undo
739 due to avoid undel-mark a wrong rec in rolling back in
740 partial update. Later, we could log some info in
741 secondary index updates to avoid this. */
742 ut_ad(mode & BTR_MODIFY_LEAF);
743 mode |= BTR_RTREE_DELETE_MARK;
744 }
745
746 try_again:
747 row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF));
748
749 if (!index->is_committed()) {
750 /* The index->online_status may change if the index is
751 or was being created online, but not committed yet. It
752 is protected by index->lock. */
753 if (mode == BTR_MODIFY_LEAF) {
754 mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
755 mtr_s_lock_index(index, &mtr);
756 } else {
757 ut_ad(mode == BTR_MODIFY_TREE);
758 mtr_sx_lock_index(index, &mtr);
759 }
760
761 if (row_log_online_op_try(index, entry, trx->id)) {
762 goto func_exit_no_pcur;
763 }
764 } else {
765 /* For secondary indexes,
766 index->online_status==ONLINE_INDEX_COMPLETE if
767 index->is_committed(). */
768 ut_ad(!dict_index_is_online_ddl(index));
769 }
770
771 btr_cur->thr = thr;
772
773 search_result = row_search_index_entry(index, entry, mode,
774 &pcur, &mtr);
775
776 switch (search_result) {
777 mem_heap_t* heap;
778 mem_heap_t* offsets_heap;
779 rec_offs* offsets;
780 case ROW_BUFFERED:
781 case ROW_NOT_DELETED_REF:
782 /* These are invalid outcomes, because the mode passed
783 to row_search_index_entry() did not include any of the
784 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
785 ut_error;
786 case ROW_NOT_FOUND:
787 /* For spatial index, if first search didn't find an
788 undel-marked rec, try to find a del-marked rec. */
789 if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
790 if (mode != orig_mode) {
791 mode = orig_mode;
792 btr_pcur_close(&pcur);
793 mtr_commit(&mtr);
794 goto try_again;
795 }
796 }
797
798 if (index->is_committed()) {
799 /* During online secondary index creation, it
800 is possible that MySQL is waiting for a
801 meta-data lock upgrade before invoking
802 ha_innobase::commit_inplace_alter_table()
803 while this ROLLBACK is executing. InnoDB has
804 finished building the index, but it does not
805 yet exist in MySQL. In this case, we suppress
806 the printout to the error log. */
807 ib::warn() << "Record in index " << index->name
808 << " of table " << index->table->name
809 << " was not found on rollback, trying to"
810 " insert: " << *entry
811 << " at: " << rec_index_print(
812 btr_cur_get_rec(btr_cur), index);
813 }
814
815 if (btr_cur->up_match >= dict_index_get_n_unique(index)
816 || btr_cur->low_match >= dict_index_get_n_unique(index)) {
817 if (index->is_committed()) {
818 ib::warn() << "Record in index " << index->name
819 << " was not found on rollback, and"
820 " a duplicate exists";
821 }
822 err = DB_DUPLICATE_KEY;
823 break;
824 }
825
826 /* Insert the missing record that we were trying to
827 delete-unmark. */
828 big_rec_t* big_rec;
829 rec_t* insert_rec;
830 offsets = NULL;
831 offsets_heap = NULL;
832
833 err = btr_cur_optimistic_insert(
834 flags, btr_cur, &offsets, &offsets_heap,
835 entry, &insert_rec, &big_rec,
836 0, thr, &mtr);
837 ut_ad(!big_rec);
838
839 if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
840 err = btr_cur_pessimistic_insert(
841 flags, btr_cur,
842 &offsets, &offsets_heap,
843 entry, &insert_rec, &big_rec,
844 0, thr, &mtr);
845 /* There are no off-page columns in
846 secondary indexes. */
847 ut_ad(!big_rec);
848 }
849
850 if (err == DB_SUCCESS) {
851 page_update_max_trx_id(
852 btr_cur_get_block(btr_cur),
853 btr_cur_get_page_zip(btr_cur),
854 trx->id, &mtr);
855 }
856
857 if (offsets_heap) {
858 mem_heap_free(offsets_heap);
859 }
860
861 break;
862 case ROW_FOUND:
863 err = btr_cur_del_mark_set_sec_rec(
864 BTR_NO_LOCKING_FLAG,
865 btr_cur, FALSE, thr, &mtr);
866
867 ut_a(err == DB_SUCCESS);
868 heap = mem_heap_create(
869 sizeof(upd_t)
870 + dtuple_get_n_fields(entry) * sizeof(upd_field_t));
871 offsets_heap = NULL;
872 offsets = rec_get_offsets(
873 btr_cur_get_rec(btr_cur),
874 index, nullptr, index->n_core_fields, ULINT_UNDEFINED,
875 &offsets_heap);
876 update = row_upd_build_sec_rec_difference_binary(
877 btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
878 if (upd_get_n_fields(update) == 0) {
879
880 /* Do nothing */
881
882 } else if (mode != BTR_MODIFY_TREE) {
883 /* Try an optimistic updating of the record, keeping
884 changes within the page */
885
886 /* TODO: pass offsets, not &offsets */
887 err = btr_cur_optimistic_update(
888 flags, btr_cur, &offsets, &offsets_heap,
889 update, 0, thr, thr_get_trx(thr)->id, &mtr);
890 switch (err) {
891 case DB_OVERFLOW:
892 case DB_UNDERFLOW:
893 case DB_ZIP_OVERFLOW:
894 err = DB_FAIL;
895 default:
896 break;
897 }
898 } else {
899 err = btr_cur_pessimistic_update(
900 flags, btr_cur, &offsets, &offsets_heap,
901 heap, &dummy_big_rec,
902 update, 0, thr, thr_get_trx(thr)->id, &mtr);
903 ut_a(!dummy_big_rec);
904 }
905
906 mem_heap_free(heap);
907 mem_heap_free(offsets_heap);
908 }
909
910 btr_pcur_close(&pcur);
911 func_exit_no_pcur:
912 mtr_commit(&mtr);
913
914 return(err);
915 }
916
917 /***********************************************************//**
918 Flags a secondary index corrupted. */
919 static MY_ATTRIBUTE((nonnull))
920 void
row_undo_mod_sec_flag_corrupted(trx_t * trx,dict_index_t * index)921 row_undo_mod_sec_flag_corrupted(
922 /*============================*/
923 trx_t* trx, /*!< in/out: transaction */
924 dict_index_t* index) /*!< in: secondary index */
925 {
926 ut_ad(!dict_index_is_clust(index));
927
928 switch (trx->dict_operation_lock_mode) {
929 case RW_S_LATCH:
930 /* Because row_undo() is holding an S-latch
931 on the data dictionary during normal rollback,
932 we can only mark the index corrupted in the
933 data dictionary cache. TODO: fix this somehow.*/
934 mutex_enter(&dict_sys.mutex);
935 dict_set_corrupted_index_cache_only(index);
936 mutex_exit(&dict_sys.mutex);
937 break;
938 default:
939 ut_ad(0);
940 /* fall through */
941 case RW_X_LATCH:
942 /* This should be the rollback of a data dictionary
943 transaction. */
944 dict_set_corrupted(index, trx, "rollback");
945 }
946 }
947
948 /***********************************************************//**
949 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
950 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
951 static MY_ATTRIBUTE((nonnull, warn_unused_result))
952 dberr_t
row_undo_mod_upd_del_sec(undo_node_t * node,que_thr_t * thr)953 row_undo_mod_upd_del_sec(
954 /*=====================*/
955 undo_node_t* node, /*!< in: row undo node */
956 que_thr_t* thr) /*!< in: query thread */
957 {
958 mem_heap_t* heap;
959 dberr_t err = DB_SUCCESS;
960
961 ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
962 ut_ad(!node->undo_row);
963
964 heap = mem_heap_create(1024);
965
966 while (node->index != NULL) {
967 dict_index_t* index = node->index;
968 dtuple_t* entry;
969
970 if (index->type & DICT_FTS) {
971 dict_table_next_uncorrupted_index(node->index);
972 continue;
973 }
974
975 /* During online index creation,
976 HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk
977 should guarantee that any active transaction has not modified
978 indexed columns such that col->ord_part was 0 at the
979 time when the undo log record was written. When we get
980 to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
981 it should always cover all affected indexes. */
982 entry = row_build_index_entry(
983 node->row, node->ext, index, heap);
984
985 if (UNIV_UNLIKELY(!entry)) {
986 /* The database must have crashed after
987 inserting a clustered index record but before
988 writing all the externally stored columns of
989 that record. Because secondary index entries
990 are inserted after the clustered index record,
991 we may assume that the secondary index record
992 does not exist. However, this situation may
993 only occur during the rollback of incomplete
994 transactions. */
995 ut_a(thr_get_trx(thr) == trx_roll_crash_recv_trx);
996 } else {
997 err = row_undo_mod_del_mark_or_remove_sec(
998 node, thr, index, entry);
999
1000 if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1001
1002 break;
1003 }
1004 }
1005
1006 mem_heap_empty(heap);
1007 dict_table_next_uncorrupted_index(node->index);
1008 }
1009
1010 mem_heap_free(heap);
1011
1012 return(err);
1013 }
1014
1015 /***********************************************************//**
1016 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
1017 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
1018 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1019 dberr_t
row_undo_mod_del_mark_sec(undo_node_t * node,que_thr_t * thr)1020 row_undo_mod_del_mark_sec(
1021 /*======================*/
1022 undo_node_t* node, /*!< in: row undo node */
1023 que_thr_t* thr) /*!< in: query thread */
1024 {
1025 mem_heap_t* heap;
1026 dberr_t err = DB_SUCCESS;
1027
1028 ut_ad(!node->undo_row);
1029
1030 heap = mem_heap_create(1024);
1031
1032 while (node->index != NULL) {
1033 dict_index_t* index = node->index;
1034 dtuple_t* entry;
1035
1036 if (index->type == DICT_FTS) {
1037 dict_table_next_uncorrupted_index(node->index);
1038 continue;
1039 }
1040
1041 /* During online index creation,
1042 HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK
1043 should guarantee that any active transaction has not modified
1044 indexed columns such that col->ord_part was 0 at the
1045 time when the undo log record was written. When we get
1046 to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
1047 it should always cover all affected indexes. */
1048 entry = row_build_index_entry(
1049 node->row, node->ext, index, heap);
1050
1051 ut_a(entry);
1052
1053 err = row_undo_mod_del_unmark_sec_and_undo_update(
1054 BTR_MODIFY_LEAF, thr, index, entry);
1055 if (err == DB_FAIL) {
1056 err = row_undo_mod_del_unmark_sec_and_undo_update(
1057 BTR_MODIFY_TREE, thr, index, entry);
1058 }
1059
1060 if (err == DB_DUPLICATE_KEY) {
1061 row_undo_mod_sec_flag_corrupted(
1062 thr_get_trx(thr), index);
1063 err = DB_SUCCESS;
1064 /* Do not return any error to the caller. The
1065 duplicate will be reported by ALTER TABLE or
1066 CREATE UNIQUE INDEX. Unfortunately we cannot
1067 report the duplicate key value to the DDL
1068 thread, because the altered_table object is
1069 private to its call stack. */
1070 } else if (err != DB_SUCCESS) {
1071 break;
1072 }
1073
1074 mem_heap_empty(heap);
1075 dict_table_next_uncorrupted_index(node->index);
1076 }
1077
1078 mem_heap_free(heap);
1079
1080 return(err);
1081 }
1082
1083 /***********************************************************//**
1084 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
1085 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
1086 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1087 dberr_t
row_undo_mod_upd_exist_sec(undo_node_t * node,que_thr_t * thr)1088 row_undo_mod_upd_exist_sec(
1089 /*=======================*/
1090 undo_node_t* node, /*!< in: row undo node */
1091 que_thr_t* thr) /*!< in: query thread */
1092 {
1093 mem_heap_t* heap;
1094 dberr_t err = DB_SUCCESS;
1095
1096 if (node->index == NULL
1097 || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
1098 /* No change in secondary indexes */
1099
1100 return(err);
1101 }
1102
1103 heap = mem_heap_create(1024);
1104
1105
1106 while (node->index != NULL) {
1107 dict_index_t* index = node->index;
1108 dtuple_t* entry;
1109
1110 if (dict_index_is_spatial(index)) {
1111 if (!row_upd_changes_ord_field_binary_func(
1112 index, node->update,
1113 #ifdef UNIV_DEBUG
1114 thr,
1115 #endif /* UNIV_DEBUG */
1116 node->row,
1117 node->ext, ROW_BUILD_FOR_UNDO)) {
1118 dict_table_next_uncorrupted_index(node->index);
1119 continue;
1120 }
1121 } else {
1122 if (index->type == DICT_FTS
1123 || !row_upd_changes_ord_field_binary(index,
1124 node->update,
1125 thr, node->row,
1126 node->ext)) {
1127 dict_table_next_uncorrupted_index(node->index);
1128 continue;
1129 }
1130 }
1131
1132 /* Build the newest version of the index entry */
1133 entry = row_build_index_entry(node->row, node->ext,
1134 index, heap);
1135 if (UNIV_UNLIKELY(!entry)) {
1136 /* The server must have crashed in
1137 row_upd_clust_rec_by_insert() before
1138 the updated externally stored columns (BLOBs)
1139 of the new clustered index entry were written. */
1140
1141 /* The table must be in DYNAMIC or COMPRESSED
1142 format. REDUNDANT and COMPACT formats
1143 store a local 768-byte prefix of each
1144 externally stored column. */
1145 ut_a(dict_table_has_atomic_blobs(index->table));
1146
1147 /* This is only legitimate when
1148 rolling back an incomplete transaction
1149 after crash recovery. */
1150 ut_a(thr_get_trx(thr)->is_recovered);
1151
1152 /* The server must have crashed before
1153 completing the insert of the new
1154 clustered index entry and before
1155 inserting to the secondary indexes.
1156 Because node->row was not yet written
1157 to this index, we can ignore it. But
1158 we must restore node->undo_row. */
1159 } else {
1160 /* NOTE that if we updated the fields of a
1161 delete-marked secondary index record so that
1162 alphabetically they stayed the same, e.g.,
1163 'abc' -> 'aBc', we cannot return to the
1164 original values because we do not know them.
1165 But this should not cause problems because
1166 in row0sel.cc, in queries we always retrieve
1167 the clustered index record or an earlier
1168 version of it, if the secondary index record
1169 through which we do the search is
1170 delete-marked. */
1171
1172 err = row_undo_mod_del_mark_or_remove_sec(
1173 node, thr, index, entry);
1174 if (err != DB_SUCCESS) {
1175 break;
1176 }
1177 }
1178
1179 mem_heap_empty(heap);
1180 /* We may have to update the delete mark in the
1181 secondary index record of the previous version of
1182 the row. We also need to update the fields of
1183 the secondary index record if we updated its fields
1184 but alphabetically they stayed the same, e.g.,
1185 'abc' -> 'aBc'. */
1186 if (dict_index_is_spatial(index)) {
1187 entry = row_build_index_entry_low(node->undo_row,
1188 node->undo_ext,
1189 index, heap,
1190 ROW_BUILD_FOR_UNDO);
1191 } else {
1192 entry = row_build_index_entry(node->undo_row,
1193 node->undo_ext,
1194 index, heap);
1195 }
1196
1197 ut_a(entry);
1198
1199 err = row_undo_mod_del_unmark_sec_and_undo_update(
1200 BTR_MODIFY_LEAF, thr, index, entry);
1201 if (err == DB_FAIL) {
1202 err = row_undo_mod_del_unmark_sec_and_undo_update(
1203 BTR_MODIFY_TREE, thr, index, entry);
1204 }
1205
1206 if (err == DB_DUPLICATE_KEY) {
1207 row_undo_mod_sec_flag_corrupted(
1208 thr_get_trx(thr), index);
1209 err = DB_SUCCESS;
1210 } else if (err != DB_SUCCESS) {
1211 break;
1212 }
1213
1214 mem_heap_empty(heap);
1215 dict_table_next_uncorrupted_index(node->index);
1216 }
1217
1218 mem_heap_free(heap);
1219
1220 return(err);
1221 }
1222
1223 /** Parse an update undo record.
1224 @param[in,out] node row rollback state
1225 @param[in] dict_locked whether the data dictionary cache is locked */
row_undo_mod_parse_undo_rec(undo_node_t * node,bool dict_locked)1226 static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
1227 {
1228 dict_index_t* clust_index;
1229 byte* ptr;
1230 undo_no_t undo_no;
1231 table_id_t table_id;
1232 trx_id_t trx_id;
1233 roll_ptr_t roll_ptr;
1234 ulint info_bits;
1235 ulint type;
1236 ulint cmpl_info;
1237 bool dummy_extern;
1238
1239 ut_ad(node->state == UNDO_UPDATE_PERSISTENT
1240 || node->state == UNDO_UPDATE_TEMPORARY);
1241 ut_ad(node->trx->in_rollback);
1242 ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1243
1244 ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
1245 &dummy_extern, &undo_no, &table_id);
1246 node->rec_type = type;
1247
1248 if (node->state == UNDO_UPDATE_PERSISTENT) {
1249 node->table = dict_table_open_on_id(table_id, dict_locked,
1250 DICT_TABLE_OP_NORMAL);
1251 } else if (!dict_locked) {
1252 mutex_enter(&dict_sys.mutex);
1253 node->table = dict_sys.get_temporary_table(table_id);
1254 mutex_exit(&dict_sys.mutex);
1255 } else {
1256 node->table = dict_sys.get_temporary_table(table_id);
1257 }
1258
1259 if (!node->table) {
1260 return false;
1261 }
1262
1263 ut_ad(!node->table->skip_alter_undo);
1264
1265 if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
1266 close_table:
1267 /* Normally, tables should not disappear or become
1268 unaccessible during ROLLBACK, because they should be
1269 protected by InnoDB table locks. Corruption could be
1270 a valid exception.
1271
1272 FIXME: When running out of temporary tablespace, it
1273 would probably be better to just drop all temporary
1274 tables (and temporary undo log records) of the current
1275 connection, instead of doing this rollback. */
1276 dict_table_close(node->table, dict_locked, FALSE);
1277 node->table = NULL;
1278 return false;
1279 }
1280
1281 clust_index = dict_table_get_first_index(node->table);
1282
1283 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1284 &info_bits);
1285
1286 ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
1287 node->heap);
1288
1289 ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
1290 roll_ptr, info_bits,
1291 node->heap, &(node->update));
1292 node->new_trx_id = trx_id;
1293 node->cmpl_info = cmpl_info;
1294 ut_ad(!node->ref->info_bits);
1295
1296 if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) {
1297 if ((node->update->info_bits & ~REC_INFO_DELETED_FLAG)
1298 != REC_INFO_MIN_REC_FLAG) {
1299 ut_ad(!"wrong info_bits in undo log record");
1300 goto close_table;
1301 }
1302 /* This must be an undo log record for a subsequent
1303 instant ALTER TABLE, extending the metadata record. */
1304 ut_ad(clust_index->is_instant());
1305 ut_ad(clust_index->table->instant
1306 || !(node->update->info_bits & REC_INFO_DELETED_FLAG));
1307 node->ref = &trx_undo_metadata;
1308 node->update->info_bits = (node->update->info_bits
1309 & REC_INFO_DELETED_FLAG)
1310 ? REC_INFO_METADATA_ALTER
1311 : REC_INFO_METADATA_ADD;
1312 }
1313
1314 if (!row_undo_search_clust_to_pcur(node)) {
1315 /* As long as this rolling-back transaction exists,
1316 the PRIMARY KEY value pointed to by the undo log
1317 record should exist.
1318
1319 However, if InnoDB is killed during a rollback, or
1320 shut down during the rollback of recovered
1321 transactions, then after restart we may try to roll
1322 back some of the same undo log records again, because
1323 trx_roll_try_truncate() is not being invoked after
1324 every undo log record.
1325
1326 It is also possible that the record
1327 was not modified yet (the DB_ROLL_PTR does not match
1328 node->roll_ptr) and thus there is nothing to roll back.
1329
1330 btr_cur_upd_lock_and_undo() only writes the undo log
1331 record after successfully acquiring an exclusive lock
1332 on the the clustered index record. That lock will not
1333 be released before the transaction is committed or
1334 fully rolled back. (Exception: if the server was
1335 killed, restarted, and shut down again before the
1336 rollback of the recovered transaction was completed,
1337 it is possible that the transaction was partially
1338 rolled back and locks released.) */
1339 goto close_table;
1340 }
1341
1342 /* Extract indexed virtual columns from undo log */
1343 if (node->ref != &trx_undo_metadata && node->table->n_v_cols) {
1344 row_upd_replace_vcol(node->row, node->table,
1345 node->update, false, node->undo_row,
1346 (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
1347 ? NULL : ptr);
1348 }
1349
1350 return true;
1351 }
1352
1353 /***********************************************************//**
1354 Undoes a modify operation on a row of a table.
1355 @return DB_SUCCESS or error code */
1356 dberr_t
row_undo_mod(undo_node_t * node,que_thr_t * thr)1357 row_undo_mod(
1358 /*=========*/
1359 undo_node_t* node, /*!< in: row undo node */
1360 que_thr_t* thr) /*!< in: query thread */
1361 {
1362 dberr_t err;
1363 ut_ad(thr_get_trx(thr) == node->trx);
1364 const bool dict_locked = node->trx->dict_operation_lock_mode
1365 == RW_X_LATCH;
1366
1367 if (!row_undo_mod_parse_undo_rec(node, dict_locked)) {
1368 return DB_SUCCESS;
1369 }
1370
1371 node->index = dict_table_get_first_index(node->table);
1372 ut_ad(dict_index_is_clust(node->index));
1373
1374 if (node->ref->info_bits) {
1375 ut_ad(node->ref->is_metadata());
1376 goto rollback_clust;
1377 }
1378
1379 /* Skip the clustered index (the first index) */
1380 node->index = dict_table_get_next_index(node->index);
1381
1382 /* Skip all corrupted secondary index */
1383 dict_table_skip_corrupt_index(node->index);
1384
1385 switch (node->rec_type) {
1386 case TRX_UNDO_UPD_EXIST_REC:
1387 err = row_undo_mod_upd_exist_sec(node, thr);
1388 break;
1389 case TRX_UNDO_DEL_MARK_REC:
1390 err = row_undo_mod_del_mark_sec(node, thr);
1391 break;
1392 case TRX_UNDO_UPD_DEL_REC:
1393 err = row_undo_mod_upd_del_sec(node, thr);
1394 break;
1395 default:
1396 ut_error;
1397 err = DB_ERROR;
1398 }
1399
1400 if (err == DB_SUCCESS) {
1401 rollback_clust:
1402 err = row_undo_mod_clust(node, thr);
1403
1404 bool update_statistics
1405 = !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
1406
1407 if (err == DB_SUCCESS && node->table->stat_initialized) {
1408 switch (node->rec_type) {
1409 case TRX_UNDO_UPD_EXIST_REC:
1410 break;
1411 case TRX_UNDO_DEL_MARK_REC:
1412 dict_table_n_rows_inc(node->table);
1413 update_statistics = update_statistics
1414 || !srv_stats_include_delete_marked;
1415 break;
1416 case TRX_UNDO_UPD_DEL_REC:
1417 dict_table_n_rows_dec(node->table);
1418 update_statistics = update_statistics
1419 || !srv_stats_include_delete_marked;
1420 break;
1421 }
1422
1423 /* Do not attempt to update statistics when
1424 executing ROLLBACK in the InnoDB SQL
1425 interpreter, because in that case we would
1426 already be holding dict_sys.mutex, which
1427 would be acquired when updating statistics. */
1428 if (update_statistics && !dict_locked) {
1429 dict_stats_update_if_needed(node->table,
1430 *node->trx);
1431 } else {
1432 node->table->stat_modified_counter++;
1433 }
1434 }
1435 }
1436
1437 dict_table_close(node->table, dict_locked, FALSE);
1438
1439 node->table = NULL;
1440
1441 return(err);
1442 }
1443