1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0uins.cc
29 Fresh insert undo
30 
31 Created 2/25/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include "row0uins.h"
35 
36 #ifdef UNIV_NONINL
37 #include "row0uins.ic"
38 #endif
39 
40 #include "dict0dict.h"
41 #include "dict0boot.h"
42 #include "dict0crea.h"
43 #include "trx0undo.h"
44 #include "trx0roll.h"
45 #include "btr0btr.h"
46 #include "mach0data.h"
47 #include "row0undo.h"
48 #include "row0vers.h"
49 #include "row0log.h"
50 #include "trx0trx.h"
51 #include "trx0rec.h"
52 #include "row0row.h"
53 #include "row0upd.h"
54 #include "que0que.h"
55 #include "ibuf0ibuf.h"
56 #include "log0log.h"
57 #include "fil0fil.h"
58 /*************************************************************************
59 IMPORTANT NOTE: Any operation that generates redo MUST check that there
60 is enough space in the redo log before for that operation. This is
61 done by calling log_free_check(). The reason for checking the
62 availability of the redo log space before the start of the operation is
63 that we MUST not hold any synchonization objects when performing the
64 check.
65 If you make a change in this module make sure that no codepath is
66 introduced where a call to log_free_check() is bypassed. */
67 
68 /***************************************************************//**
69 Removes a clustered index record. The pcur in node was positioned on the
70 record, now it is detached.
71 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
72 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
73 dberr_t
row_undo_ins_remove_clust_rec(undo_node_t * node)74 row_undo_ins_remove_clust_rec(
75 /*==========================*/
76 	undo_node_t*	node)	/*!< in: undo node */
77 {
78 	btr_cur_t*	btr_cur;
79 	ibool		success;
80 	dberr_t		err;
81 	ulint		n_tries	= 0;
82 	mtr_t		mtr;
83 	dict_index_t*	index	= node->pcur.btr_cur.index;
84 	bool		online;
85 
86 	ut_ad(dict_index_is_clust(index));
87 	ut_ad(node->trx->in_rollback);
88 
89 	mtr_start(&mtr);
90 	mtr.set_named_space(index->space);
91 	dict_disable_redo_if_temporary(index->table, &mtr);
92 
93 	/* This is similar to row_undo_mod_clust(). The DDL thread may
94 	already have copied this row from the log to the new table.
95 	We must log the removal, so that the row will be correctly
96 	purged. However, we can log the removal out of sync with the
97 	B-tree modification. */
98 
99 	online = dict_index_is_online_ddl(index);
100 	if (online) {
101 		ut_ad(node->trx->dict_operation_lock_mode
102 		      != RW_X_LATCH);
103 		ut_ad(node->table->id != DICT_INDEXES_ID);
104 		mtr_s_lock(dict_index_get_lock(index), &mtr);
105 	}
106 
107 	success = btr_pcur_restore_position(
108 		online
109 		? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
110 		: BTR_MODIFY_LEAF, &node->pcur, &mtr);
111 	ut_a(success);
112 
113 	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
114 
115 	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
116 	      == node->trx->id);
117 	ut_ad(!rec_get_deleted_flag(
118 		      btr_cur_get_rec(btr_cur),
119 		      dict_table_is_comp(btr_cur->index->table)));
120 
121 	if (online && dict_index_is_online_ddl(index)) {
122 		const rec_t*	rec	= btr_cur_get_rec(btr_cur);
123 		mem_heap_t*	heap	= NULL;
124 		const ulint*	offsets	= rec_get_offsets(
125 			rec, index, NULL, ULINT_UNDEFINED, &heap);
126 		row_log_table_delete(rec, node->row, index, offsets, NULL);
127 		mem_heap_free(heap);
128 	}
129 
130 	if (node->table->id == DICT_INDEXES_ID) {
131 
132 		ut_ad(!online);
133 		ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
134 
135 		dict_drop_index_tree(
136 			btr_pcur_get_rec(&node->pcur), &(node->pcur), &mtr);
137 
138 		mtr_commit(&mtr);
139 
140 		mtr_start(&mtr);
141 
142 		success = btr_pcur_restore_position(
143 			BTR_MODIFY_LEAF, &node->pcur, &mtr);
144 		ut_a(success);
145 	}
146 
147 	row_convert_impl_to_expl_if_needed(btr_cur, node);
148 
149 	if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
150 		err = DB_SUCCESS;
151 		goto func_exit;
152 	}
153 
154 	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
155 retry:
156 	/* If did not succeed, try pessimistic descent to tree */
157 	mtr_start(&mtr);
158 	mtr.set_named_space(index->space);
159 	dict_disable_redo_if_temporary(index->table, &mtr);
160 
161 	success = btr_pcur_restore_position(
162 			BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
163 			&node->pcur, &mtr);
164 	ut_a(success);
165 
166 	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0, true, &mtr);
167 
168 	/* The delete operation may fail if we have little
169 	file space left: TODO: easiest to crash the database
170 	and restart with more file space */
171 
172 	if (err == DB_OUT_OF_FILE_SPACE
173 	    && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
174 
175 		btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
176 
177 		n_tries++;
178 
179 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
180 
181 		goto retry;
182 	}
183 
184 func_exit:
185 	btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
186 
187 	return(err);
188 }
189 
190 /***************************************************************//**
191 Removes a secondary index entry if found.
192 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
193 static MY_ATTRIBUTE((nonnull, warn_unused_result))
194 dberr_t
row_undo_ins_remove_sec_low(ulint mode,dict_index_t * index,dtuple_t * entry,que_thr_t * thr,undo_node_t * node)195 row_undo_ins_remove_sec_low(
196 /*========================*/
197 	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
198 				depending on whether we wish optimistic or
199 				pessimistic descent down the index tree */
200 	dict_index_t*	index,	/*!< in: index */
201 	dtuple_t*	entry,	/*!< in: index entry to remove */
202 	que_thr_t*	thr,	/*!< in: query thread */
203 	undo_node_t*	node)	/*!< in: undo node */
204 {
205 	btr_pcur_t		pcur;
206 	btr_cur_t*		btr_cur;
207 	dberr_t			err	= DB_SUCCESS;
208 	mtr_t			mtr;
209 	enum row_search_result	search_result;
210 	ibool			modify_leaf = false;
211 	ulint			rec_deleted;
212 
213 	log_free_check();
214 
215 	mtr_start(&mtr);
216 	mtr.set_named_space(index->space);
217 	dict_disable_redo_if_temporary(index->table, &mtr);
218 
219 	if (mode == BTR_MODIFY_LEAF) {
220 		mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
221 		mtr_s_lock(dict_index_get_lock(index), &mtr);
222 		modify_leaf = true;
223 	} else {
224 		ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
225 		mtr_sx_lock(dict_index_get_lock(index), &mtr);
226 	}
227 
228 	if (row_log_online_op_try(index, entry, 0)) {
229 		goto func_exit_no_pcur;
230 	}
231 
232 	if (dict_index_is_spatial(index)) {
233 		if (mode & BTR_MODIFY_LEAF) {
234 			mode |= BTR_RTREE_DELETE_MARK;
235 		}
236 		btr_pcur_get_btr_cur(&pcur)->thr = thr;
237 		mode |= BTR_RTREE_UNDO_INS;
238 	}
239 
240 	search_result = row_search_index_entry(index, entry, mode,
241 					       &pcur, &mtr);
242 
243 	switch (search_result) {
244 	case ROW_NOT_FOUND:
245 		goto func_exit;
246 	case ROW_FOUND:
247 		break;
248 
249 	case ROW_BUFFERED:
250 	case ROW_NOT_DELETED_REF:
251 		/* These are invalid outcomes, because the mode passed
252 		to row_search_index_entry() did not include any of the
253 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
254 		ut_error;
255 	}
256 
257 	rec_deleted = rec_get_deleted_flag(btr_pcur_get_rec(&pcur),
258 					   dict_table_is_comp(index->table));
259 
260 	if (search_result == ROW_FOUND && dict_index_is_spatial(index)) {
261 		if(rec_deleted) {
262 			ib::error() << "Record found in index " << index->name
263 				<< " is deleted marked on insert rollback.";
264 		}
265 	}
266 
267 	btr_cur = btr_pcur_get_btr_cur(&pcur);
268 
269 	if (rec_deleted == 0) {
270 		/* This record is not delete marked and has an implicit
271 		lock on it. For delete marked record, INSERT has not
272 		modified it yet and we don't have implicit lock on it.
273 		We must convert to explicit if and only if we have
274 		implicit lock on the record.*/
275 		row_convert_impl_to_expl_if_needed(btr_cur, node);
276 	}
277 
278 	if (modify_leaf) {
279 		err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
280 			? DB_SUCCESS : DB_FAIL;
281 	} else {
282 		/* Passing rollback=false here, because we are
283 		deleting a secondary index record: the distinction
284 		only matters when deleting a record that contains
285 		externally stored columns. */
286 		ut_ad(!dict_index_is_clust(index));
287 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
288 					   false, &mtr);
289 	}
290 func_exit:
291 	btr_pcur_close(&pcur);
292 func_exit_no_pcur:
293 	mtr_commit(&mtr);
294 
295 	return(err);
296 }
297 
298 /***************************************************************//**
299 Removes a secondary index entry from the index if found. Tries first
300 optimistic, then pessimistic descent down the tree.
301 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
302 static MY_ATTRIBUTE((nonnull, warn_unused_result))
303 dberr_t
row_undo_ins_remove_sec(dict_index_t * index,dtuple_t * entry,que_thr_t * thr,undo_node_t * node)304 row_undo_ins_remove_sec(
305 /*====================*/
306 	dict_index_t*	index,	/*!< in: index */
307 	dtuple_t*	entry,	/*!< in: index entry to insert */
308 	que_thr_t*	thr,	/*!< in: query thread */
309 	undo_node_t*	node)
310 {
311 	dberr_t	err;
312 	ulint	n_tries	= 0;
313 
314 	/* Try first optimistic descent to the B-tree */
315 
316 	err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr, node);
317 
318 	if (err == DB_SUCCESS) {
319 
320 		return(err);
321 	}
322 
323 	/* Try then pessimistic descent to the B-tree */
324 retry:
325 	err = row_undo_ins_remove_sec_low(
326 		BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
327 		index, entry, thr, node);
328 
329 	/* The delete operation may fail if we have little
330 	file space left: TODO: easiest to crash the database
331 	and restart with more file space */
332 
333 	if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
334 
335 		n_tries++;
336 
337 		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
338 
339 		goto retry;
340 	}
341 
342 	return(err);
343 }
344 
345 /***********************************************************//**
346 Parses the row reference and other info in a fresh insert undo record. */
347 static
348 void
row_undo_ins_parse_undo_rec(undo_node_t * node,ibool dict_locked)349 row_undo_ins_parse_undo_rec(
350 /*========================*/
351 	undo_node_t*	node,		/*!< in/out: row undo node */
352 	ibool		dict_locked)	/*!< in: TRUE if own dict_sys->mutex */
353 {
354 	dict_index_t*	clust_index;
355 	byte*		ptr;
356 	undo_no_t	undo_no;
357 	table_id_t	table_id;
358 	ulint		type;
359 	ulint		dummy;
360 	bool		dummy_extern;
361 
362 	ut_ad(node);
363 
364 	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
365 				    &dummy_extern, &undo_no, &table_id);
366 	ut_ad(type == TRX_UNDO_INSERT_REC);
367 	node->rec_type = type;
368 
369 	node->update = NULL;
370 	node->table = dict_table_open_on_id(
371 		table_id, dict_locked, DICT_TABLE_OP_NORMAL);
372 
373 	/* Skip the UNDO if we can't find the table or the .ibd file. */
374 	if (UNIV_UNLIKELY(node->table == NULL)) {
375 	} else if (UNIV_UNLIKELY(node->table->file_unreadable)) {
376 close_table:
377 		dict_table_close(node->table, dict_locked, FALSE);
378 		node->table = NULL;
379 	} else if (fil_space_is_being_truncated(node->table->space)) {
380 
381 		dict_table_close(node->table, dict_locked, FALSE);
382 		node->table = NULL;
383 	} else {
384 		clust_index = dict_table_get_first_index(node->table);
385 
386 		if (clust_index != NULL) {
387 			ptr = trx_undo_rec_get_row_ref(
388 				ptr, clust_index, &node->ref, node->heap);
389 
390 			if (!row_undo_search_clust_to_pcur(node)) {
391 				goto close_table;
392 			}
393 			if (node->table->n_v_cols) {
394 				trx_undo_read_v_cols(node->table, ptr,
395 						     node->row, false, NULL);
396 			}
397 
398 		} else {
399 			ib::warn() << "Table " << node->table->name
400 				 << " has no indexes,"
401 				" ignoring the table";
402 			goto close_table;
403 		}
404 	}
405 }
406 
407 /***************************************************************//**
408 Removes secondary index records.
409 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
410 static MY_ATTRIBUTE((nonnull, warn_unused_result))
411 dberr_t
row_undo_ins_remove_sec_rec(undo_node_t * node,que_thr_t * thr)412 row_undo_ins_remove_sec_rec(
413 /*========================*/
414 	undo_node_t*	node,	/*!< in/out: row undo node */
415 	que_thr_t*	thr)	/*!< in: query thread */
416 {
417 	dberr_t		err	= DB_SUCCESS;
418 	dict_index_t*	index	= node->index;
419 	mem_heap_t*	heap;
420 
421 	heap = mem_heap_create(1024);
422 
423 	while (index != NULL) {
424 		dtuple_t*	entry;
425 
426 		if (index->type & DICT_FTS) {
427 			dict_table_next_uncorrupted_index(index);
428 			continue;
429 		}
430 
431 		/* An insert undo record TRX_UNDO_INSERT_REC will
432 		always contain all fields of the index. It does not
433 		matter if any indexes were created afterwards; all
434 		index entries can be reconstructed from the row. */
435 		entry = row_build_index_entry(
436 			node->row, node->ext, index, heap);
437 		if (UNIV_UNLIKELY(!entry)) {
438 			/* The database must have crashed after
439 			inserting a clustered index record but before
440 			writing all the externally stored columns of
441 			that record, or a statement is being rolled
442 			back because an error occurred while storing
443 			off-page columns.
444 
445 			Because secondary index entries are inserted
446 			after the clustered index record, we may
447 			assume that the secondary index record does
448 			not exist. */
449 		} else {
450 			err = row_undo_ins_remove_sec(index, entry, thr, node);
451 
452 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
453 				goto func_exit;
454 			}
455 		}
456 
457 		mem_heap_empty(heap);
458 		dict_table_next_uncorrupted_index(index);
459 	}
460 
461 func_exit:
462 	node->index = index;
463 	mem_heap_free(heap);
464 	return(err);
465 }
466 
467 /***********************************************************//**
468 Undoes a fresh insert of a row to a table. A fresh insert means that
469 the same clustered index unique key did not have any record, even delete
470 marked, at the time of the insert.  InnoDB is eager in a rollback:
471 if it figures out that an index record will be removed in the purge
472 anyway, it will remove it in the rollback.
473 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
474 dberr_t
row_undo_ins(undo_node_t * node,que_thr_t * thr)475 row_undo_ins(
476 /*=========*/
477 	undo_node_t*	node,	/*!< in: row undo node */
478 	que_thr_t*	thr)	/*!< in: query thread */
479 {
480 	dberr_t	err;
481 	ibool	dict_locked;
482 
483 	ut_ad(node->state == UNDO_NODE_INSERT);
484 	ut_ad(node->trx->in_rollback);
485 	ut_ad(trx_undo_roll_ptr_is_insert(node->roll_ptr));
486 
487 	dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
488 
489 	row_undo_ins_parse_undo_rec(node, dict_locked);
490 
491 	if (node->table == NULL) {
492 		return(DB_SUCCESS);
493 	}
494 
495 	/* Iterate over all the indexes and undo the insert.*/
496 
497 	node->index = dict_table_get_first_index(node->table);
498 	ut_ad(dict_index_is_clust(node->index));
499 	/* Skip the clustered index (the first index) */
500 	node->index = dict_table_get_next_index(node->index);
501 
502 	dict_table_skip_corrupt_index(node->index);
503 
504 	err = row_undo_ins_remove_sec_rec(node, thr);
505 
506 	if (err == DB_SUCCESS) {
507 
508 		log_free_check();
509 
510 		if (node->table->id == DICT_INDEXES_ID) {
511 
512 			if (!dict_locked) {
513 				mutex_enter(&dict_sys->mutex);
514 			}
515 		}
516 
517 		// FIXME: We need to update the dict_index_t::space and
518 		// page number fields too.
519 		err = row_undo_ins_remove_clust_rec(node);
520 
521 		if (node->table->id == DICT_INDEXES_ID
522 		    && !dict_locked) {
523 
524 			mutex_exit(&dict_sys->mutex);
525 		}
526 	}
527 
528 	dict_table_close(node->table, dict_locked, FALSE);
529 
530 	node->table = NULL;
531 
532 	return(err);
533 }
534