1 /*****************************************************************************
2
3 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file row/row0umod.cc
29 Undo modify of a row
30
31 Created 2/27/1997 Heikki Tuuri
32 *******************************************************/
33
34 #include "row0umod.h"
35
36 #ifdef UNIV_NONINL
37 #include "row0umod.ic"
38 #endif
39
40 #include "dict0dict.h"
41 #include "dict0boot.h"
42 #include "trx0undo.h"
43 #include "trx0roll.h"
44 #include "btr0btr.h"
45 #include "mach0data.h"
46 #include "row0undo.h"
47 #include "row0vers.h"
48 #include "row0log.h"
49 #include "trx0trx.h"
50 #include "trx0rec.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "que0que.h"
54 #include "log0log.h"
55
56 /* Considerations on undoing a modify operation.
57 (1) Undoing a delete marking: all index records should be found. Some of
58 them may have delete mark already FALSE, if the delete mark operation was
59 stopped underway, or if the undo operation ended prematurely because of a
60 system crash.
61 (2) Undoing an update of a delete unmarked record: the newer version of
62 an updated secondary index entry should be removed if no prior version
63 of the clustered index record requires its existence. Otherwise, it should
64 be delete marked.
65 (3) Undoing an update of a delete marked record. In this kind of update a
66 delete marked clustered index record was delete unmarked and possibly also
67 some of its fields were changed. Now, it is possible that the delete marked
68 version has become obsolete at the time the undo is started. */
69
70 /*************************************************************************
71 IMPORTANT NOTE: Any operation that generates redo MUST check that there
72 is enough space in the redo log before for that operation. This is
73 done by calling log_free_check(). The reason for checking the
74 availability of the redo log space before the start of the operation is
75 that we MUST not hold any synchonization objects when performing the
76 check.
77 If you make a change in this module make sure that no codepath is
78 introduced where a call to log_free_check() is bypassed. */
79
80 /***********************************************************//**
81 Undoes a modify in a clustered index record.
82 @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
83 static MY_ATTRIBUTE((nonnull, warn_unused_result))
84 dberr_t
row_undo_mod_clust_low(undo_node_t * node,ulint ** offsets,mem_heap_t ** offsets_heap,mem_heap_t * heap,const dtuple_t ** rebuilt_old_pk,byte * sys,que_thr_t * thr,mtr_t * mtr,ulint mode)85 row_undo_mod_clust_low(
86 /*===================*/
87 undo_node_t* node, /*!< in: row undo node */
88 ulint** offsets,/*!< out: rec_get_offsets() on the record */
89 mem_heap_t** offsets_heap,
90 /*!< in/out: memory heap that can be emptied */
91 mem_heap_t* heap, /*!< in/out: memory heap */
92 const dtuple_t**rebuilt_old_pk,
93 /*!< out: row_log_table_get_pk()
94 before the update, or NULL if
95 the table is not being rebuilt online or
96 the PRIMARY KEY definition does not change */
97 byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR
98 for row_log_table_delete() */
99 que_thr_t* thr, /*!< in: query thread */
100 mtr_t* mtr, /*!< in: mtr; must be committed before
101 latching any further pages */
102 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
103 {
104 btr_pcur_t* pcur;
105 btr_cur_t* btr_cur;
106 dberr_t err;
107 #ifdef UNIV_DEBUG
108 ibool success;
109 #endif /* UNIV_DEBUG */
110
111 pcur = &node->pcur;
112 btr_cur = btr_pcur_get_btr_cur(pcur);
113
114 #ifdef UNIV_DEBUG
115 success =
116 #endif /* UNIV_DEBUG */
117 btr_pcur_restore_position(mode, pcur, mtr);
118
119 ut_ad(success);
120 ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
121 btr_cur_get_index(btr_cur))
122 == thr_get_trx(thr)->id);
123
124 if (mode != BTR_MODIFY_LEAF
125 && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
126 *rebuilt_old_pk = row_log_table_get_pk(
127 btr_cur_get_rec(btr_cur),
128 btr_cur_get_index(btr_cur), NULL, sys, &heap);
129 } else {
130 *rebuilt_old_pk = NULL;
131 }
132
133 if (mode != BTR_MODIFY_TREE) {
134 ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
135
136 err = btr_cur_optimistic_update(
137 BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
138 | BTR_KEEP_SYS_FLAG,
139 btr_cur, offsets, offsets_heap,
140 node->update, node->cmpl_info,
141 thr, thr_get_trx(thr)->id, mtr);
142 } else {
143 big_rec_t* dummy_big_rec;
144
145 err = btr_cur_pessimistic_update(
146 BTR_NO_LOCKING_FLAG
147 | BTR_NO_UNDO_LOG_FLAG
148 | BTR_KEEP_SYS_FLAG,
149 btr_cur, offsets, offsets_heap, heap,
150 &dummy_big_rec, node->update,
151 node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
152
153 ut_a(!dummy_big_rec);
154 }
155
156 return(err);
157 }
158
159 /***********************************************************//**
160 Purges a clustered index record after undo if possible.
161 This is attempted when the record was inserted by updating a
162 delete-marked record and there no longer exist transactions
163 that would see the delete-marked record.
164 @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
165 static MY_ATTRIBUTE((nonnull, warn_unused_result))
166 dberr_t
row_undo_mod_remove_clust_low(undo_node_t * node,que_thr_t * thr,mtr_t * mtr,ulint mode)167 row_undo_mod_remove_clust_low(
168 /*==========================*/
169 undo_node_t* node, /*!< in: row undo node */
170 que_thr_t* thr, /*!< in: query thread */
171 mtr_t* mtr, /*!< in/out: mini-transaction */
172 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
173 {
174 btr_cur_t* btr_cur;
175 dberr_t err;
176 ulint trx_id_offset;
177
178 ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
179
180 /* Find out if the record has been purged already
181 or if we can remove it. */
182
183 if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
184 || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
185
186 return(DB_SUCCESS);
187 }
188
189 btr_cur = btr_pcur_get_btr_cur(&node->pcur);
190
191 trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
192
193 if (!trx_id_offset) {
194 mem_heap_t* heap = NULL;
195 ulint trx_id_col;
196 const ulint* offsets;
197 ulint len;
198
199 trx_id_col = dict_index_get_sys_col_pos(
200 btr_cur_get_index(btr_cur), DATA_TRX_ID);
201 ut_ad(trx_id_col > 0);
202 ut_ad(trx_id_col != ULINT_UNDEFINED);
203
204 offsets = rec_get_offsets(
205 btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
206 NULL, trx_id_col + 1, &heap);
207
208 trx_id_offset = rec_get_nth_field_offs(
209 offsets, trx_id_col, &len);
210 ut_ad(len == DATA_TRX_ID_LEN);
211 mem_heap_free(heap);
212 }
213
214 if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
215 != node->new_trx_id) {
216 /* The record must have been purged and then replaced
217 with a different one. */
218 return(DB_SUCCESS);
219 }
220
221 /* We are about to remove an old, delete-marked version of the
222 record that may have been delete-marked by a different transaction
223 than the rolling-back one. */
224 ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
225 dict_table_is_comp(node->table)));
226
227 if (mode == BTR_MODIFY_LEAF) {
228 err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
229 ? DB_SUCCESS
230 : DB_FAIL;
231 } else {
232 ut_ad(mode == BTR_MODIFY_TREE);
233
234 /* This operation is analogous to purge, we can free also
235 inherited externally stored fields */
236
237 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
238 thr_is_recv(thr)
239 ? RB_RECOVERY_PURGE_REC
240 : RB_NONE, mtr);
241
242 /* The delete operation may fail if we have little
243 file space left: TODO: easiest to crash the database
244 and restart with more file space */
245 }
246
247 return(err);
248 }
249
250 /***********************************************************//**
251 Undoes a modify in a clustered index record. Sets also the node state for the
252 next round of undo.
253 @return DB_SUCCESS or error code: we may run out of file space */
254 static MY_ATTRIBUTE((nonnull, warn_unused_result))
255 dberr_t
row_undo_mod_clust(undo_node_t * node,que_thr_t * thr)256 row_undo_mod_clust(
257 /*===============*/
258 undo_node_t* node, /*!< in: row undo node */
259 que_thr_t* thr) /*!< in: query thread */
260 {
261 btr_pcur_t* pcur;
262 mtr_t mtr;
263 dberr_t err;
264 dict_index_t* index;
265 bool online;
266
267 ut_ad(thr_get_trx(thr) == node->trx);
268 ut_ad(node->trx->dict_operation_lock_mode);
269 #ifdef UNIV_SYNC_DEBUG
270 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
271 || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
272 #endif /* UNIV_SYNC_DEBUG */
273
274 log_free_check();
275 pcur = &node->pcur;
276 index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
277
278 mtr_start(&mtr);
279
280 online = dict_index_is_online_ddl(index);
281 if (online) {
282 ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
283 mtr_s_lock(dict_index_get_lock(index), &mtr);
284 }
285
286 mem_heap_t* heap = mem_heap_create(1024);
287 mem_heap_t* offsets_heap = NULL;
288 ulint* offsets = NULL;
289 const dtuple_t* rebuilt_old_pk;
290 byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
291
292 /* Try optimistic processing of the record, keeping changes within
293 the index page */
294
295 err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
296 heap, &rebuilt_old_pk, sys,
297 thr, &mtr, online
298 ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
299 : BTR_MODIFY_LEAF);
300
301 if (err != DB_SUCCESS) {
302 btr_pcur_commit_specify_mtr(pcur, &mtr);
303
304 /* We may have to modify tree structure: do a pessimistic
305 descent down the index tree */
306
307 mtr_start(&mtr);
308
309 err = row_undo_mod_clust_low(
310 node, &offsets, &offsets_heap,
311 heap, &rebuilt_old_pk, sys,
312 thr, &mtr, BTR_MODIFY_TREE);
313 ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
314 }
315
316 /* Online rebuild cannot be initiated while we are holding
317 dict_operation_lock and index->lock. (It can be aborted.) */
318 ut_ad(online || !dict_index_is_online_ddl(index));
319
320 if (err == DB_SUCCESS && online) {
321 #ifdef UNIV_SYNC_DEBUG
322 ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
323 || rw_lock_own(&index->lock, RW_LOCK_EX));
324 #endif /* UNIV_SYNC_DEBUG */
325 switch (node->rec_type) {
326 case TRX_UNDO_DEL_MARK_REC:
327 row_log_table_insert(
328 btr_pcur_get_rec(pcur), index, offsets);
329 break;
330 case TRX_UNDO_UPD_EXIST_REC:
331 row_log_table_update(
332 btr_pcur_get_rec(pcur), index, offsets,
333 rebuilt_old_pk);
334 break;
335 case TRX_UNDO_UPD_DEL_REC:
336 row_log_table_delete(
337 btr_pcur_get_rec(pcur), index, offsets, sys);
338 break;
339 default:
340 ut_ad(0);
341 break;
342 }
343 }
344
345 ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index)
346 == node->new_trx_id);
347
348 btr_pcur_commit_specify_mtr(pcur, &mtr);
349
350 if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
351
352 mtr_start(&mtr);
353
354 /* It is not necessary to call row_log_table,
355 because the record is delete-marked and would thus
356 be omitted from the rebuilt copy of the table. */
357 err = row_undo_mod_remove_clust_low(
358 node, thr, &mtr, BTR_MODIFY_LEAF);
359 if (err != DB_SUCCESS) {
360 btr_pcur_commit_specify_mtr(pcur, &mtr);
361
362 /* We may have to modify tree structure: do a
363 pessimistic descent down the index tree */
364
365 mtr_start(&mtr);
366
367 err = row_undo_mod_remove_clust_low(node, thr, &mtr,
368 BTR_MODIFY_TREE);
369
370 ut_ad(err == DB_SUCCESS
371 || err == DB_OUT_OF_FILE_SPACE);
372 }
373
374 btr_pcur_commit_specify_mtr(pcur, &mtr);
375 }
376
377 node->state = UNDO_NODE_FETCH_NEXT;
378
379 trx_undo_rec_release(node->trx, node->undo_no);
380
381 if (offsets_heap) {
382 mem_heap_free(offsets_heap);
383 }
384 mem_heap_free(heap);
385 return(err);
386 }
387
388 /***********************************************************//**
389 Delete marks or removes a secondary index entry if found.
390 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
391 static MY_ATTRIBUTE((nonnull, warn_unused_result))
392 dberr_t
row_undo_mod_del_mark_or_remove_sec_low(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry,ulint mode)393 row_undo_mod_del_mark_or_remove_sec_low(
394 /*====================================*/
395 undo_node_t* node, /*!< in: row undo node */
396 que_thr_t* thr, /*!< in: query thread */
397 dict_index_t* index, /*!< in: index */
398 dtuple_t* entry, /*!< in: index entry */
399 ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
400 BTR_MODIFY_TREE */
401 {
402 btr_pcur_t pcur;
403 btr_cur_t* btr_cur;
404 ibool success;
405 ibool old_has;
406 dberr_t err = DB_SUCCESS;
407 mtr_t mtr;
408 mtr_t mtr_vers;
409 enum row_search_result search_result;
410
411 log_free_check();
412 mtr_start(&mtr);
413
414 if (*index->name == TEMP_INDEX_PREFIX) {
415 /* The index->online_status may change if the
416 index->name starts with TEMP_INDEX_PREFIX (meaning
417 that the index is or was being created online). It is
418 protected by index->lock. */
419 if (mode == BTR_MODIFY_LEAF) {
420 mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
421 mtr_s_lock(dict_index_get_lock(index), &mtr);
422 } else {
423 ut_ad(mode == BTR_MODIFY_TREE);
424 mtr_x_lock(dict_index_get_lock(index), &mtr);
425 }
426
427 if (row_log_online_op_try(index, entry, 0)) {
428 goto func_exit_no_pcur;
429 }
430 } else {
431 /* For secondary indexes,
432 index->online_status==ONLINE_INDEX_CREATION unless
433 index->name starts with TEMP_INDEX_PREFIX. */
434 ut_ad(!dict_index_is_online_ddl(index));
435 }
436
437 btr_cur = btr_pcur_get_btr_cur(&pcur);
438
439 search_result = row_search_index_entry(index, entry, mode,
440 &pcur, &mtr);
441
442 switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
443 case ROW_NOT_FOUND:
444 /* In crash recovery, the secondary index record may
445 be missing if the UPDATE did not have time to insert
446 the secondary index records before the crash. When we
447 are undoing that UPDATE in crash recovery, the record
448 may be missing.
449
450 In normal processing, if an update ends in a deadlock
451 before it has inserted all updated secondary index
452 records, then the undo will not find those records. */
453 goto func_exit;
454 case ROW_FOUND:
455 break;
456 case ROW_BUFFERED:
457 case ROW_NOT_DELETED_REF:
458 /* These are invalid outcomes, because the mode passed
459 to row_search_index_entry() did not include any of the
460 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
461 ut_error;
462 }
463
464 /* We should remove the index record if no prior version of the row,
465 which cannot be purged yet, requires its existence. If some requires,
466 we should delete mark the record. */
467
468 mtr_start(&mtr_vers);
469
470 success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
471 &mtr_vers);
472 ut_a(success);
473
474 old_has = row_vers_old_has_index_entry(FALSE,
475 btr_pcur_get_rec(&(node->pcur)),
476 &mtr_vers, index, entry);
477 if (old_has) {
478 err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
479 btr_cur, TRUE, thr, &mtr);
480 ut_ad(err == DB_SUCCESS);
481 } else {
482 /* Remove the index record */
483
484 if (mode != BTR_MODIFY_TREE) {
485 success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
486 if (success) {
487 err = DB_SUCCESS;
488 } else {
489 err = DB_FAIL;
490 }
491 } else {
492 /* No need to distinguish RB_RECOVERY_PURGE here,
493 because we are deleting a secondary index record:
494 the distinction between RB_NORMAL and
495 RB_RECOVERY_PURGE only matters when deleting a
496 record that contains externally stored
497 columns. */
498 ut_ad(!dict_index_is_clust(index));
499 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
500 RB_NORMAL, &mtr);
501
502 /* The delete operation may fail if we have little
503 file space left: TODO: easiest to crash the database
504 and restart with more file space */
505 }
506 }
507
508 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
509
510 func_exit:
511 btr_pcur_close(&pcur);
512 func_exit_no_pcur:
513 mtr_commit(&mtr);
514
515 return(err);
516 }
517
518 /***********************************************************//**
519 Delete marks or removes a secondary index entry if found.
520 NOTE that if we updated the fields of a delete-marked secondary index record
521 so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
522 return to the original values because we do not know them. But this should
523 not cause problems because in row0sel.cc, in queries we always retrieve the
524 clustered index record or an earlier version of it, if the secondary index
525 record through which we do the search is delete-marked.
526 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
527 static MY_ATTRIBUTE((nonnull, warn_unused_result))
528 dberr_t
row_undo_mod_del_mark_or_remove_sec(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)529 row_undo_mod_del_mark_or_remove_sec(
530 /*================================*/
531 undo_node_t* node, /*!< in: row undo node */
532 que_thr_t* thr, /*!< in: query thread */
533 dict_index_t* index, /*!< in: index */
534 dtuple_t* entry) /*!< in: index entry */
535 {
536 dberr_t err;
537
538 err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
539 entry, BTR_MODIFY_LEAF);
540 if (err == DB_SUCCESS) {
541
542 return(err);
543 }
544
545 err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
546 entry, BTR_MODIFY_TREE);
547 return(err);
548 }
549
550 /***********************************************************//**
551 Delete unmarks a secondary index entry which must be found. It might not be
552 delete-marked at the moment, but it does not harm to unmark it anyway. We also
553 need to update the fields of the secondary index record if we updated its
554 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
555 @retval DB_SUCCESS on success
556 @retval DB_FAIL if BTR_MODIFY_TREE should be tried
557 @retval DB_OUT_OF_FILE_SPACE when running out of tablespace
558 @retval DB_DUPLICATE_KEY if the value was missing
559 and an insert would lead to a duplicate exists */
560 static MY_ATTRIBUTE((nonnull, warn_unused_result))
561 dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(ulint mode,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)562 row_undo_mod_del_unmark_sec_and_undo_update(
563 /*========================================*/
564 ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
565 BTR_MODIFY_TREE */
566 que_thr_t* thr, /*!< in: query thread */
567 dict_index_t* index, /*!< in: index */
568 dtuple_t* entry) /*!< in: index entry */
569 {
570 btr_pcur_t pcur;
571 btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
572 upd_t* update;
573 dberr_t err = DB_SUCCESS;
574 big_rec_t* dummy_big_rec;
575 mtr_t mtr;
576 trx_t* trx = thr_get_trx(thr);
577 const ulint flags
578 = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
579 enum row_search_result search_result;
580
581 ut_ad(trx->id);
582
583 log_free_check();
584 mtr_start(&mtr);
585
586 if (*index->name == TEMP_INDEX_PREFIX) {
587 /* The index->online_status may change if the
588 index->name starts with TEMP_INDEX_PREFIX (meaning
589 that the index is or was being created online). It is
590 protected by index->lock. */
591 if (mode == BTR_MODIFY_LEAF) {
592 mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
593 mtr_s_lock(dict_index_get_lock(index), &mtr);
594 } else {
595 ut_ad(mode == BTR_MODIFY_TREE);
596 mtr_x_lock(dict_index_get_lock(index), &mtr);
597 }
598
599 if (row_log_online_op_try(index, entry, trx->id)) {
600 goto func_exit_no_pcur;
601 }
602 } else {
603 /* For secondary indexes,
604 index->online_status==ONLINE_INDEX_CREATION unless
605 index->name starts with TEMP_INDEX_PREFIX. */
606 ut_ad(!dict_index_is_online_ddl(index));
607 }
608
609 search_result = row_search_index_entry(index, entry, mode,
610 &pcur, &mtr);
611
612 switch (search_result) {
613 mem_heap_t* heap;
614 mem_heap_t* offsets_heap;
615 ulint* offsets;
616 case ROW_BUFFERED:
617 case ROW_NOT_DELETED_REF:
618 /* These are invalid outcomes, because the mode passed
619 to row_search_index_entry() did not include any of the
620 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
621 ut_error;
622 case ROW_NOT_FOUND:
623 if (*index->name != TEMP_INDEX_PREFIX) {
624 /* During online secondary index creation, it
625 is possible that MySQL is waiting for a
626 meta-data lock upgrade before invoking
627 ha_innobase::commit_inplace_alter_table()
628 while this ROLLBACK is executing. InnoDB has
629 finished building the index, but it does not
630 yet exist in MySQL. In this case, we suppress
631 the printout to the error log. */
632 fputs("InnoDB: error in sec index entry del undo in\n"
633 "InnoDB: ", stderr);
634 dict_index_name_print(stderr, trx, index);
635 fputs("\n"
636 "InnoDB: tuple ", stderr);
637 dtuple_print(stderr, entry);
638 fputs("\n"
639 "InnoDB: record ", stderr);
640 rec_print(stderr, btr_pcur_get_rec(&pcur), index);
641 putc('\n', stderr);
642 trx_print(stderr, trx, 0);
643 fputs("\n"
644 "InnoDB: Submit a detailed bug report"
645 " to http://bugs.mysql.com\n", stderr);
646
647 ib_logf(IB_LOG_LEVEL_WARN,
648 "record in index %s was not found"
649 " on rollback, trying to insert",
650 index->name);
651 }
652
653 if (btr_cur->up_match >= dict_index_get_n_unique(index)
654 || btr_cur->low_match >= dict_index_get_n_unique(index)) {
655 if (*index->name != TEMP_INDEX_PREFIX) {
656 ib_logf(IB_LOG_LEVEL_WARN,
657 "record in index %s was not found on"
658 " rollback, and a duplicate exists",
659 index->name);
660 }
661 err = DB_DUPLICATE_KEY;
662 break;
663 }
664
665 /* Insert the missing record that we were trying to
666 delete-unmark. */
667 big_rec_t* big_rec;
668 rec_t* insert_rec;
669 offsets = NULL;
670 offsets_heap = NULL;
671
672 err = btr_cur_optimistic_insert(
673 flags, btr_cur, &offsets, &offsets_heap,
674 entry, &insert_rec, &big_rec,
675 0, thr, &mtr);
676 ut_ad(!big_rec);
677
678 if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
679 err = btr_cur_pessimistic_insert(
680 flags, btr_cur,
681 &offsets, &offsets_heap,
682 entry, &insert_rec, &big_rec,
683 0, thr, &mtr);
684 /* There are no off-page columns in
685 secondary indexes. */
686 ut_ad(!big_rec);
687 }
688
689 if (err == DB_SUCCESS) {
690 page_update_max_trx_id(
691 btr_cur_get_block(btr_cur),
692 btr_cur_get_page_zip(btr_cur),
693 trx->id, &mtr);
694 }
695
696 if (offsets_heap) {
697 mem_heap_free(offsets_heap);
698 }
699
700 break;
701 case ROW_FOUND:
702 err = btr_cur_del_mark_set_sec_rec(
703 BTR_NO_LOCKING_FLAG,
704 btr_cur, FALSE, thr, &mtr);
705 ut_a(err == DB_SUCCESS);
706 heap = mem_heap_create(
707 sizeof(upd_t)
708 + dtuple_get_n_fields(entry) * sizeof(upd_field_t));
709 offsets_heap = NULL;
710 offsets = rec_get_offsets(
711 btr_cur_get_rec(btr_cur),
712 index, NULL, ULINT_UNDEFINED, &offsets_heap);
713 update = row_upd_build_sec_rec_difference_binary(
714 btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
715 if (upd_get_n_fields(update) == 0) {
716
717 /* Do nothing */
718
719 } else if (mode != BTR_MODIFY_TREE) {
720 /* Try an optimistic updating of the record, keeping
721 changes within the page */
722
723 /* TODO: pass offsets, not &offsets */
724 err = btr_cur_optimistic_update(
725 flags, btr_cur, &offsets, &offsets_heap,
726 update, 0, thr, thr_get_trx(thr)->id, &mtr);
727 switch (err) {
728 case DB_OVERFLOW:
729 case DB_UNDERFLOW:
730 case DB_ZIP_OVERFLOW:
731 err = DB_FAIL;
732 default:
733 break;
734 }
735 } else {
736 err = btr_cur_pessimistic_update(
737 flags, btr_cur, &offsets, &offsets_heap,
738 heap, &dummy_big_rec,
739 update, 0, thr, thr_get_trx(thr)->id, &mtr);
740 ut_a(!dummy_big_rec);
741 }
742
743 mem_heap_free(heap);
744 mem_heap_free(offsets_heap);
745 }
746
747 btr_pcur_close(&pcur);
748 func_exit_no_pcur:
749 mtr_commit(&mtr);
750
751 return(err);
752 }
753
754 /***********************************************************//**
755 Flags a secondary index corrupted. */
756 static MY_ATTRIBUTE((nonnull))
757 void
row_undo_mod_sec_flag_corrupted(trx_t * trx,dict_index_t * index)758 row_undo_mod_sec_flag_corrupted(
759 /*============================*/
760 trx_t* trx, /*!< in/out: transaction */
761 dict_index_t* index) /*!< in: secondary index */
762 {
763 ut_ad(!dict_index_is_clust(index));
764
765 switch (trx->dict_operation_lock_mode) {
766 case RW_S_LATCH:
767 /* Because row_undo() is holding an S-latch
768 on the data dictionary during normal rollback,
769 we can only mark the index corrupted in the
770 data dictionary cache. TODO: fix this somehow.*/
771 mutex_enter(&dict_sys->mutex);
772 dict_set_corrupted_index_cache_only(index, index->table);
773 mutex_exit(&dict_sys->mutex);
774 break;
775 default:
776 ut_ad(0);
777 /* fall through */
778 case RW_X_LATCH:
779 /* This should be the rollback of a data dictionary
780 transaction. */
781 dict_set_corrupted(index, trx, "rollback");
782 }
783 }
784
785 /***********************************************************//**
786 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
787 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
788 static MY_ATTRIBUTE((nonnull, warn_unused_result))
789 dberr_t
row_undo_mod_upd_del_sec(undo_node_t * node,que_thr_t * thr)790 row_undo_mod_upd_del_sec(
791 /*=====================*/
792 undo_node_t* node, /*!< in: row undo node */
793 que_thr_t* thr) /*!< in: query thread */
794 {
795 mem_heap_t* heap;
796 dberr_t err = DB_SUCCESS;
797
798 ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
799 ut_ad(!node->undo_row);
800
801 heap = mem_heap_create(1024);
802
803 while (node->index != NULL) {
804 dict_index_t* index = node->index;
805 dtuple_t* entry;
806
807 if (index->type & DICT_FTS) {
808 dict_table_next_uncorrupted_index(node->index);
809 continue;
810 }
811
812 /* During online index creation,
813 HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
814 guarantee that any active transaction has not modified
815 indexed columns such that col->ord_part was 0 at the
816 time when the undo log record was written. When we get
817 to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
818 it should always cover all affected indexes. */
819 entry = row_build_index_entry(
820 node->row, node->ext, index, heap);
821
822 if (UNIV_UNLIKELY(!entry)) {
823 /* The database must have crashed after
824 inserting a clustered index record but before
825 writing all the externally stored columns of
826 that record. Because secondary index entries
827 are inserted after the clustered index record,
828 we may assume that the secondary index record
829 does not exist. However, this situation may
830 only occur during the rollback of incomplete
831 transactions. */
832 ut_a(thr_is_recv(thr));
833 } else {
834 err = row_undo_mod_del_mark_or_remove_sec(
835 node, thr, index, entry);
836
837 if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
838
839 break;
840 }
841 }
842
843 mem_heap_empty(heap);
844 dict_table_next_uncorrupted_index(node->index);
845 }
846
847 mem_heap_free(heap);
848
849 return(err);
850 }
851
852 /***********************************************************//**
853 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
854 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
855 static MY_ATTRIBUTE((nonnull, warn_unused_result))
856 dberr_t
row_undo_mod_del_mark_sec(undo_node_t * node,que_thr_t * thr)857 row_undo_mod_del_mark_sec(
858 /*======================*/
859 undo_node_t* node, /*!< in: row undo node */
860 que_thr_t* thr) /*!< in: query thread */
861 {
862 mem_heap_t* heap;
863 dberr_t err = DB_SUCCESS;
864
865 ut_ad(!node->undo_row);
866
867 heap = mem_heap_create(1024);
868
869 while (node->index != NULL) {
870 dict_index_t* index = node->index;
871 dtuple_t* entry;
872
873 if (index->type == DICT_FTS) {
874 dict_table_next_uncorrupted_index(node->index);
875 continue;
876 }
877
878 /* During online index creation,
879 HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
880 guarantee that any active transaction has not modified
881 indexed columns such that col->ord_part was 0 at the
882 time when the undo log record was written. When we get
883 to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
884 it should always cover all affected indexes. */
885 entry = row_build_index_entry(
886 node->row, node->ext, index, heap);
887
888 ut_a(entry);
889
890 err = row_undo_mod_del_unmark_sec_and_undo_update(
891 BTR_MODIFY_LEAF, thr, index, entry);
892 if (err == DB_FAIL) {
893 err = row_undo_mod_del_unmark_sec_and_undo_update(
894 BTR_MODIFY_TREE, thr, index, entry);
895 }
896
897 if (err == DB_DUPLICATE_KEY) {
898 row_undo_mod_sec_flag_corrupted(
899 thr_get_trx(thr), index);
900 err = DB_SUCCESS;
901 /* Do not return any error to the caller. The
902 duplicate will be reported by ALTER TABLE or
903 CREATE UNIQUE INDEX. Unfortunately we cannot
904 report the duplicate key value to the DDL
905 thread, because the altered_table object is
906 private to its call stack. */
907 } else if (err != DB_SUCCESS) {
908 break;
909 }
910
911 mem_heap_empty(heap);
912 dict_table_next_uncorrupted_index(node->index);
913 }
914
915 mem_heap_free(heap);
916
917 return(err);
918 }
919
920 /***********************************************************//**
921 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
922 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
923 static MY_ATTRIBUTE((nonnull, warn_unused_result))
924 dberr_t
row_undo_mod_upd_exist_sec(undo_node_t * node,que_thr_t * thr)925 row_undo_mod_upd_exist_sec(
926 /*=======================*/
927 undo_node_t* node, /*!< in: row undo node */
928 que_thr_t* thr) /*!< in: query thread */
929 {
930 mem_heap_t* heap;
931 dberr_t err = DB_SUCCESS;
932
933 if (node->index == NULL
934 || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
935 /* No change in secondary indexes */
936
937 return(err);
938 }
939
940 heap = mem_heap_create(1024);
941
942 while (node->index != NULL) {
943 dict_index_t* index = node->index;
944 dtuple_t* entry;
945
946 if (index->type == DICT_FTS
947 || !row_upd_changes_ord_field_binary(
948 index, node->update, thr, node->row, node->ext)) {
949 dict_table_next_uncorrupted_index(node->index);
950 continue;
951 }
952
953 /* Build the newest version of the index entry */
954 entry = row_build_index_entry(node->row, node->ext,
955 index, heap);
956 if (UNIV_UNLIKELY(!entry)) {
957 /* The server must have crashed in
958 row_upd_clust_rec_by_insert() before
959 the updated externally stored columns (BLOBs)
960 of the new clustered index entry were written. */
961
962 /* The table must be in DYNAMIC or COMPRESSED
963 format. REDUNDANT and COMPACT formats
964 store a local 768-byte prefix of each
965 externally stored column. */
966 ut_a(dict_table_get_format(index->table)
967 >= UNIV_FORMAT_B);
968
969 /* This is only legitimate when
970 rolling back an incomplete transaction
971 after crash recovery. */
972 ut_a(thr_get_trx(thr)->is_recovered);
973
974 /* The server must have crashed before
975 completing the insert of the new
976 clustered index entry and before
977 inserting to the secondary indexes.
978 Because node->row was not yet written
979 to this index, we can ignore it. But
980 we must restore node->undo_row. */
981 } else {
982 /* NOTE that if we updated the fields of a
983 delete-marked secondary index record so that
984 alphabetically they stayed the same, e.g.,
985 'abc' -> 'aBc', we cannot return to the
986 original values because we do not know them.
987 But this should not cause problems because
988 in row0sel.cc, in queries we always retrieve
989 the clustered index record or an earlier
990 version of it, if the secondary index record
991 through which we do the search is
992 delete-marked. */
993
994 err = row_undo_mod_del_mark_or_remove_sec(
995 node, thr, index, entry);
996 if (err != DB_SUCCESS) {
997 break;
998 }
999 }
1000
1001 mem_heap_empty(heap);
1002 /* We may have to update the delete mark in the
1003 secondary index record of the previous version of
1004 the row. We also need to update the fields of
1005 the secondary index record if we updated its fields
1006 but alphabetically they stayed the same, e.g.,
1007 'abc' -> 'aBc'. */
1008 entry = row_build_index_entry(node->undo_row,
1009 node->undo_ext,
1010 index, heap);
1011 ut_a(entry);
1012
1013 err = row_undo_mod_del_unmark_sec_and_undo_update(
1014 BTR_MODIFY_LEAF, thr, index, entry);
1015 if (err == DB_FAIL) {
1016 err = row_undo_mod_del_unmark_sec_and_undo_update(
1017 BTR_MODIFY_TREE, thr, index, entry);
1018 }
1019
1020 if (err == DB_DUPLICATE_KEY) {
1021 row_undo_mod_sec_flag_corrupted(
1022 thr_get_trx(thr), index);
1023 err = DB_SUCCESS;
1024 } else if (err != DB_SUCCESS) {
1025 break;
1026 }
1027
1028 mem_heap_empty(heap);
1029 dict_table_next_uncorrupted_index(node->index);
1030 }
1031
1032 mem_heap_free(heap);
1033
1034 return(err);
1035 }
1036
1037 /***********************************************************//**
1038 Parses the row reference and other info in a modify undo log record. */
1039 static MY_ATTRIBUTE((nonnull))
1040 void
row_undo_mod_parse_undo_rec(undo_node_t * node,ibool dict_locked)1041 row_undo_mod_parse_undo_rec(
1042 /*========================*/
1043 undo_node_t* node, /*!< in: row undo node */
1044 ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
1045 {
1046 dict_index_t* clust_index;
1047 byte* ptr;
1048 undo_no_t undo_no;
1049 table_id_t table_id;
1050 trx_id_t trx_id;
1051 roll_ptr_t roll_ptr;
1052 ulint info_bits;
1053 ulint type;
1054 ulint cmpl_info;
1055 bool dummy_extern;
1056
1057 ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
1058 &dummy_extern, &undo_no, &table_id);
1059 node->rec_type = type;
1060
1061 node->table = dict_table_open_on_id(
1062 table_id, dict_locked, DICT_TABLE_OP_NORMAL);
1063
1064 /* TODO: other fixes associated with DROP TABLE + rollback in the
1065 same table by another user */
1066
1067 if (node->table == NULL) {
1068 /* Table was dropped */
1069 return;
1070 }
1071
1072 if (node->table->ibd_file_missing) {
1073 dict_table_close(node->table, dict_locked, FALSE);
1074
1075 /* We skip undo operations to missing .ibd files */
1076 node->table = NULL;
1077
1078 return;
1079 }
1080
1081 clust_index = dict_table_get_first_index(node->table);
1082
1083 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1084 &info_bits);
1085
1086 ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
1087 node->heap);
1088
1089 trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
1090 roll_ptr, info_bits, node->trx,
1091 node->heap, &(node->update));
1092 node->new_trx_id = trx_id;
1093 node->cmpl_info = cmpl_info;
1094
1095 if (!row_undo_search_clust_to_pcur(node)) {
1096
1097 dict_table_close(node->table, dict_locked, FALSE);
1098
1099 node->table = NULL;
1100 }
1101 }
1102
1103 /***********************************************************//**
1104 Undoes a modify operation on a row of a table.
1105 @return DB_SUCCESS or error code */
1106 UNIV_INTERN
1107 dberr_t
row_undo_mod(undo_node_t * node,que_thr_t * thr)1108 row_undo_mod(
1109 /*=========*/
1110 undo_node_t* node, /*!< in: row undo node */
1111 que_thr_t* thr) /*!< in: query thread */
1112 {
1113 dberr_t err;
1114 ibool dict_locked;
1115
1116 ut_ad(node != NULL);
1117 ut_ad(thr != NULL);
1118 ut_ad(node->state == UNDO_NODE_MODIFY);
1119
1120 dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
1121
1122 ut_ad(thr_get_trx(thr) == node->trx);
1123
1124 row_undo_mod_parse_undo_rec(node, dict_locked);
1125
1126 if (node->table == NULL) {
1127 /* It is already undone, or will be undone by another query
1128 thread, or table was dropped */
1129
1130 trx_undo_rec_release(node->trx, node->undo_no);
1131 node->state = UNDO_NODE_FETCH_NEXT;
1132
1133 return(DB_SUCCESS);
1134 }
1135
1136 node->index = dict_table_get_first_index(node->table);
1137 ut_ad(dict_index_is_clust(node->index));
1138 /* Skip the clustered index (the first index) */
1139 node->index = dict_table_get_next_index(node->index);
1140
1141 /* Skip all corrupted secondary index */
1142 dict_table_skip_corrupt_index(node->index);
1143
1144 switch (node->rec_type) {
1145 case TRX_UNDO_UPD_EXIST_REC:
1146 err = row_undo_mod_upd_exist_sec(node, thr);
1147 break;
1148 case TRX_UNDO_DEL_MARK_REC:
1149 err = row_undo_mod_del_mark_sec(node, thr);
1150 break;
1151 case TRX_UNDO_UPD_DEL_REC:
1152 err = row_undo_mod_upd_del_sec(node, thr);
1153 break;
1154 default:
1155 ut_error;
1156 err = DB_ERROR;
1157 }
1158
1159 if (err == DB_SUCCESS) {
1160
1161 err = row_undo_mod_clust(node, thr);
1162 }
1163
1164 dict_table_close(node->table, dict_locked, FALSE);
1165
1166 node->table = NULL;
1167
1168 return(err);
1169 }
1170