1 /*****************************************************************************
2
3 Copyright (c) 1997, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file row/row0uins.cc
29 Fresh insert undo
30
31 Created 2/25/1997 Heikki Tuuri
32 *******************************************************/
33
34 #include "row0uins.h"
35
36 #ifdef UNIV_NONINL
37 #include "row0uins.ic"
38 #endif
39
40 #include "dict0dict.h"
41 #include "dict0boot.h"
42 #include "dict0crea.h"
43 #include "trx0undo.h"
44 #include "trx0roll.h"
45 #include "btr0btr.h"
46 #include "mach0data.h"
47 #include "row0undo.h"
48 #include "row0vers.h"
49 #include "row0log.h"
50 #include "trx0trx.h"
51 #include "trx0rec.h"
52 #include "row0row.h"
53 #include "row0upd.h"
54 #include "que0que.h"
55 #include "ibuf0ibuf.h"
56 #include "log0log.h"
57 #include "fil0fil.h"
58 /*************************************************************************
59 IMPORTANT NOTE: Any operation that generates redo MUST check that there
60 is enough space in the redo log before for that operation. This is
61 done by calling log_free_check(). The reason for checking the
62 availability of the redo log space before the start of the operation is
63 that we MUST not hold any synchonization objects when performing the
64 check.
65 If you make a change in this module make sure that no codepath is
66 introduced where a call to log_free_check() is bypassed. */
67
68 /***************************************************************//**
69 Removes a clustered index record. The pcur in node was positioned on the
70 record, now it is detached.
71 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
72 static MY_ATTRIBUTE((nonnull, warn_unused_result))
73 dberr_t
row_undo_ins_remove_clust_rec(undo_node_t * node)74 row_undo_ins_remove_clust_rec(
75 /*==========================*/
76 undo_node_t* node) /*!< in: undo node */
77 {
78 btr_cur_t* btr_cur;
79 ibool success;
80 dberr_t err;
81 ulint n_tries = 0;
82 mtr_t mtr;
83 dict_index_t* index = node->pcur.btr_cur.index;
84 bool online;
85
86 ut_ad(dict_index_is_clust(index));
87 ut_ad(node->trx->in_rollback);
88
89 mtr_start(&mtr);
90 mtr.set_named_space(index->space);
91 dict_disable_redo_if_temporary(index->table, &mtr);
92
93 /* This is similar to row_undo_mod_clust(). The DDL thread may
94 already have copied this row from the log to the new table.
95 We must log the removal, so that the row will be correctly
96 purged. However, we can log the removal out of sync with the
97 B-tree modification. */
98
99 online = dict_index_is_online_ddl(index);
100 if (online) {
101 ut_ad(node->trx->dict_operation_lock_mode
102 != RW_X_LATCH);
103 ut_ad(node->table->id != DICT_INDEXES_ID);
104 mtr_s_lock(dict_index_get_lock(index), &mtr);
105 }
106
107 success = btr_pcur_restore_position(
108 online
109 ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
110 : BTR_MODIFY_LEAF, &node->pcur, &mtr);
111 ut_a(success);
112
113 btr_cur = btr_pcur_get_btr_cur(&node->pcur);
114
115 ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
116 == node->trx->id);
117 ut_ad(!rec_get_deleted_flag(
118 btr_cur_get_rec(btr_cur),
119 dict_table_is_comp(btr_cur->index->table)));
120
121 if (online && dict_index_is_online_ddl(index)) {
122 const rec_t* rec = btr_cur_get_rec(btr_cur);
123 mem_heap_t* heap = NULL;
124 const ulint* offsets = rec_get_offsets(
125 rec, index, NULL, ULINT_UNDEFINED, &heap);
126 row_log_table_delete(rec, node->row, index, offsets, NULL);
127 mem_heap_free(heap);
128 }
129
130 if (node->table->id == DICT_INDEXES_ID) {
131
132 ut_ad(!online);
133 ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
134
135 dict_drop_index_tree(
136 btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
137
138 mtr_commit(&mtr);
139
140 mtr_start(&mtr);
141
142 success = btr_pcur_restore_position(
143 BTR_MODIFY_LEAF, &node->pcur, &mtr);
144 ut_a(success);
145 }
146
147 row_convert_impl_to_expl_if_needed(btr_cur, node);
148
149 if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
150 err = DB_SUCCESS;
151 goto func_exit;
152 }
153
154 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
155 retry:
156 /* If did not succeed, try pessimistic descent to tree */
157 mtr_start(&mtr);
158 mtr.set_named_space(index->space);
159 dict_disable_redo_if_temporary(index->table, &mtr);
160
161 success = btr_pcur_restore_position(
162 BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
163 &node->pcur, &mtr);
164 ut_a(success);
165
166 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, true, &mtr);
167
168 /* The delete operation may fail if we have little
169 file space left: TODO: easiest to crash the database
170 and restart with more file space */
171
172 if (err == DB_OUT_OF_FILE_SPACE
173 && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
174
175 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
176
177 n_tries++;
178
179 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
180
181 goto retry;
182 }
183
184 func_exit:
185 btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
186
187 return(err);
188 }
189
190 /***************************************************************//**
191 Removes a secondary index entry if found.
192 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
193 static MY_ATTRIBUTE((nonnull, warn_unused_result))
194 dberr_t
row_undo_ins_remove_sec_low(ulint mode,dict_index_t * index,dtuple_t * entry,que_thr_t * thr,undo_node_t * node)195 row_undo_ins_remove_sec_low(
196 /*========================*/
197 ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
198 depending on whether we wish optimistic or
199 pessimistic descent down the index tree */
200 dict_index_t* index, /*!< in: index */
201 dtuple_t* entry, /*!< in: index entry to remove */
202 que_thr_t* thr, /*!< in: query thread */
203 undo_node_t* node) /*!< in: undo node */
204 {
205 btr_pcur_t pcur;
206 btr_cur_t* btr_cur;
207 dberr_t err = DB_SUCCESS;
208 mtr_t mtr;
209 enum row_search_result search_result;
210 ibool modify_leaf = false;
211 ulint rec_deleted;
212
213 log_free_check();
214
215 mtr_start(&mtr);
216 mtr.set_named_space(index->space);
217 dict_disable_redo_if_temporary(index->table, &mtr);
218
219 if (mode == BTR_MODIFY_LEAF) {
220 mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
221 mtr_s_lock(dict_index_get_lock(index), &mtr);
222 modify_leaf = true;
223 } else {
224 ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
225 mtr_sx_lock(dict_index_get_lock(index), &mtr);
226 }
227
228 if (row_log_online_op_try(index, entry, 0)) {
229 goto func_exit_no_pcur;
230 }
231
232 if (dict_index_is_spatial(index)) {
233 if (mode & BTR_MODIFY_LEAF) {
234 mode |= BTR_RTREE_DELETE_MARK;
235 }
236 btr_pcur_get_btr_cur(&pcur)->thr = thr;
237 mode |= BTR_RTREE_UNDO_INS;
238 }
239
240 search_result = row_search_index_entry(index, entry, mode,
241 &pcur, &mtr);
242
243 switch (search_result) {
244 case ROW_NOT_FOUND:
245 goto func_exit;
246 case ROW_FOUND:
247 break;
248
249 case ROW_BUFFERED:
250 case ROW_NOT_DELETED_REF:
251 /* These are invalid outcomes, because the mode passed
252 to row_search_index_entry() did not include any of the
253 flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
254 ut_error;
255 }
256
257 rec_deleted = rec_get_deleted_flag(btr_pcur_get_rec(&pcur),
258 dict_table_is_comp(index->table));
259
260 if (search_result == ROW_FOUND && dict_index_is_spatial(index)) {
261 if(rec_deleted) {
262 ib::error() << "Record found in index " << index->name
263 << " is deleted marked on insert rollback.";
264 }
265 }
266
267 btr_cur = btr_pcur_get_btr_cur(&pcur);
268
269 if (rec_deleted == 0) {
270 /* This record is not delete marked and has an implicit
271 lock on it. For delete marked record, INSERT has not
272 modified it yet and we don't have implicit lock on it.
273 We must convert to explicit if and only if we have
274 implicit lock on the record.*/
275 row_convert_impl_to_expl_if_needed(btr_cur, node);
276 }
277
278 if (modify_leaf) {
279 err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
280 ? DB_SUCCESS : DB_FAIL;
281 } else {
282 /* Passing rollback=false here, because we are
283 deleting a secondary index record: the distinction
284 only matters when deleting a record that contains
285 externally stored columns. */
286 ut_ad(!dict_index_is_clust(index));
287 btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
288 false, &mtr);
289 }
290 func_exit:
291 btr_pcur_close(&pcur);
292 func_exit_no_pcur:
293 mtr_commit(&mtr);
294
295 return(err);
296 }
297
298 /***************************************************************//**
299 Removes a secondary index entry from the index if found. Tries first
300 optimistic, then pessimistic descent down the tree.
301 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
302 static MY_ATTRIBUTE((nonnull, warn_unused_result))
303 dberr_t
row_undo_ins_remove_sec(dict_index_t * index,dtuple_t * entry,que_thr_t * thr,undo_node_t * node)304 row_undo_ins_remove_sec(
305 /*====================*/
306 dict_index_t* index, /*!< in: index */
307 dtuple_t* entry, /*!< in: index entry to insert */
308 que_thr_t* thr, /*!< in: query thread */
309 undo_node_t* node)
310 {
311 dberr_t err;
312 ulint n_tries = 0;
313
314 /* Try first optimistic descent to the B-tree */
315
316 err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr, node);
317
318 if (err == DB_SUCCESS) {
319
320 return(err);
321 }
322
323 /* Try then pessimistic descent to the B-tree */
324 retry:
325 err = row_undo_ins_remove_sec_low(
326 BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
327 index, entry, thr, node);
328
329 /* The delete operation may fail if we have little
330 file space left: TODO: easiest to crash the database
331 and restart with more file space */
332
333 if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
334
335 n_tries++;
336
337 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
338
339 goto retry;
340 }
341
342 return(err);
343 }
344
345 /***********************************************************//**
346 Parses the row reference and other info in a fresh insert undo record. */
347 static
348 void
row_undo_ins_parse_undo_rec(undo_node_t * node,ibool dict_locked)349 row_undo_ins_parse_undo_rec(
350 /*========================*/
351 undo_node_t* node, /*!< in/out: row undo node */
352 ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
353 {
354 dict_index_t* clust_index;
355 byte* ptr;
356 undo_no_t undo_no;
357 table_id_t table_id;
358 ulint type;
359 ulint dummy;
360 bool dummy_extern;
361
362 ut_ad(node);
363
364 ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
365 &dummy_extern, &undo_no, &table_id);
366 ut_ad(type == TRX_UNDO_INSERT_REC);
367 node->rec_type = type;
368
369 node->update = NULL;
370 node->table = dict_table_open_on_id(
371 table_id, dict_locked, DICT_TABLE_OP_NORMAL);
372
373 /* Skip the UNDO if we can't find the table or the .ibd file. */
374 if (UNIV_UNLIKELY(node->table == NULL)) {
375 } else if (UNIV_UNLIKELY(node->table->file_unreadable)) {
376 close_table:
377 dict_table_close(node->table, dict_locked, FALSE);
378 node->table = NULL;
379 } else if (fil_space_is_being_truncated(node->table->space)) {
380
381 dict_table_close(node->table, dict_locked, FALSE);
382 node->table = NULL;
383 } else {
384 clust_index = dict_table_get_first_index(node->table);
385
386 if (clust_index != NULL) {
387 ptr = trx_undo_rec_get_row_ref(
388 ptr, clust_index, &node->ref, node->heap);
389
390 if (!row_undo_search_clust_to_pcur(node)) {
391 goto close_table;
392 }
393 if (node->table->n_v_cols) {
394 trx_undo_read_v_cols(node->table, ptr,
395 node->row, false, NULL);
396 }
397
398 } else {
399 ib::warn() << "Table " << node->table->name
400 << " has no indexes,"
401 " ignoring the table";
402 goto close_table;
403 }
404 }
405 }
406
407 /***************************************************************//**
408 Removes secondary index records.
409 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
410 static MY_ATTRIBUTE((nonnull, warn_unused_result))
411 dberr_t
row_undo_ins_remove_sec_rec(undo_node_t * node,que_thr_t * thr)412 row_undo_ins_remove_sec_rec(
413 /*========================*/
414 undo_node_t* node, /*!< in/out: row undo node */
415 que_thr_t* thr) /*!< in: query thread */
416 {
417 dberr_t err = DB_SUCCESS;
418 dict_index_t* index = node->index;
419 mem_heap_t* heap;
420
421 heap = mem_heap_create(1024);
422
423 while (index != NULL) {
424 dtuple_t* entry;
425
426 if (index->type & DICT_FTS) {
427 dict_table_next_uncorrupted_index(index);
428 continue;
429 }
430
431 /* An insert undo record TRX_UNDO_INSERT_REC will
432 always contain all fields of the index. It does not
433 matter if any indexes were created afterwards; all
434 index entries can be reconstructed from the row. */
435 entry = row_build_index_entry(
436 node->row, node->ext, index, heap);
437 if (UNIV_UNLIKELY(!entry)) {
438 /* The database must have crashed after
439 inserting a clustered index record but before
440 writing all the externally stored columns of
441 that record, or a statement is being rolled
442 back because an error occurred while storing
443 off-page columns.
444
445 Because secondary index entries are inserted
446 after the clustered index record, we may
447 assume that the secondary index record does
448 not exist. */
449 } else {
450 err = row_undo_ins_remove_sec(index, entry, thr, node);
451
452 if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
453 goto func_exit;
454 }
455 }
456
457 mem_heap_empty(heap);
458 dict_table_next_uncorrupted_index(index);
459 }
460
461 func_exit:
462 node->index = index;
463 mem_heap_free(heap);
464 return(err);
465 }
466
467 /***********************************************************//**
468 Undoes a fresh insert of a row to a table. A fresh insert means that
469 the same clustered index unique key did not have any record, even delete
470 marked, at the time of the insert. InnoDB is eager in a rollback:
471 if it figures out that an index record will be removed in the purge
472 anyway, it will remove it in the rollback.
473 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
474 dberr_t
row_undo_ins(undo_node_t * node,que_thr_t * thr)475 row_undo_ins(
476 /*=========*/
477 undo_node_t* node, /*!< in: row undo node */
478 que_thr_t* thr) /*!< in: query thread */
479 {
480 dberr_t err;
481 ibool dict_locked;
482
483 ut_ad(node->state == UNDO_NODE_INSERT);
484 ut_ad(node->trx->in_rollback);
485 ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
486
487 dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
488
489 row_undo_ins_parse_undo_rec(node, dict_locked);
490
491 if (node->table == NULL) {
492 return(DB_SUCCESS);
493 }
494
495 /* Iterate over all the indexes and undo the insert.*/
496
497 node->index = dict_table_get_first_index(node->table);
498 ut_ad(dict_index_is_clust(node->index));
499 /* Skip the clustered index (the first index) */
500 node->index = dict_table_get_next_index(node->index);
501
502 dict_table_skip_corrupt_index(node->index);
503
504 err = row_undo_ins_remove_sec_rec(node, thr);
505
506 if (err == DB_SUCCESS) {
507
508 log_free_check();
509
510 if (node->table->id == DICT_INDEXES_ID) {
511
512 if (!dict_locked) {
513 mutex_enter(&dict_sys->mutex);
514 }
515 }
516
517 // FIXME: We need to update the dict_index_t::space and
518 // page number fields too.
519 err = row_undo_ins_remove_clust_rec(node);
520
521 if (node->table->id == DICT_INDEXES_ID
522 && !dict_locked) {
523
524 mutex_exit(&dict_sys->mutex);
525 }
526 }
527
528 dict_table_close(node->table, dict_locked, FALSE);
529
530 node->table = NULL;
531
532 return(err);
533 }
534