1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0umod.cc
29 Undo modify of a row
30 
31 Created 2/27/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include "row0umod.h"
35 
36 #ifdef UNIV_NONINL
37 #include "row0umod.ic"
38 #endif
39 
40 #include "dict0dict.h"
41 #include "dict0boot.h"
42 #include "trx0undo.h"
43 #include "trx0roll.h"
44 #include "btr0btr.h"
45 #include "mach0data.h"
46 #include "row0undo.h"
47 #include "row0vers.h"
48 #include "row0log.h"
49 #include "trx0trx.h"
50 #include "trx0rec.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "que0que.h"
54 #include "log0log.h"
55 
56 /* Considerations on undoing a modify operation.
57 (1) Undoing a delete marking: all index records should be found. Some of
58 them may have delete mark already FALSE, if the delete mark operation was
59 stopped underway, or if the undo operation ended prematurely because of a
60 system crash.
61 (2) Undoing an update of a delete unmarked record: the newer version of
62 an updated secondary index entry should be removed if no prior version
63 of the clustered index record requires its existence. Otherwise, it should
64 be delete marked.
65 (3) Undoing an update of a delete marked record. In this kind of update a
66 delete marked clustered index record was delete unmarked and possibly also
67 some of its fields were changed. Now, it is possible that the delete marked
68 version has become obsolete at the time the undo is started. */
69 
70 /*************************************************************************
71 IMPORTANT NOTE: Any operation that generates redo MUST check that there
72 is enough space in the redo log before for that operation. This is
73 done by calling log_free_check(). The reason for checking the
74 availability of the redo log space before the start of the operation is
75 that we MUST not hold any synchonization objects when performing the
76 check.
77 If you make a change in this module make sure that no codepath is
78 introduced where a call to log_free_check() is bypassed. */
79 
80 /***********************************************************//**
81 Undoes a modify in a clustered index record.
82 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
83 static MY_ATTRIBUTE((nonnull, warn_unused_result))
84 dberr_t
row_undo_mod_clust_low(undo_node_t * node,ulint ** offsets,mem_heap_t ** offsets_heap,mem_heap_t * heap,const dtuple_t ** rebuilt_old_pk,byte * sys,que_thr_t * thr,mtr_t * mtr,ulint mode)85 row_undo_mod_clust_low(
86 /*===================*/
87 	undo_node_t*	node,	/*!< in: row undo node */
88 	ulint**		offsets,/*!< out: rec_get_offsets() on the record */
89 	mem_heap_t**	offsets_heap,
90 				/*!< in/out: memory heap that can be emptied */
91 	mem_heap_t*	heap,	/*!< in/out: memory heap */
92 	const dtuple_t**rebuilt_old_pk,
93 				/*!< out: row_log_table_get_pk()
94 				before the update, or NULL if
95 				the table is not being rebuilt online or
96 				the PRIMARY KEY definition does not change */
97 	byte*		sys,	/*!< out: DB_TRX_ID, DB_ROLL_PTR
98 				for row_log_table_delete() */
99 	que_thr_t*	thr,	/*!< in: query thread */
100 	mtr_t*		mtr,	/*!< in: mtr; must be committed before
101 				latching any further pages */
102 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
103 {
104 	btr_pcur_t*	pcur;
105 	btr_cur_t*	btr_cur;
106 	dberr_t		err;
107 #ifdef UNIV_DEBUG
108 	ibool		success;
109 #endif /* UNIV_DEBUG */
110 
111 	pcur = &node->pcur;
112 	btr_cur = btr_pcur_get_btr_cur(pcur);
113 
114 #ifdef UNIV_DEBUG
115 	success =
116 #endif /* UNIV_DEBUG */
117 	btr_pcur_restore_position(mode, pcur, mtr);
118 
119 	ut_ad(success);
120 	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
121 			     btr_cur_get_index(btr_cur))
122 	      == thr_get_trx(thr)->id);
123 
124 	if (mode != BTR_MODIFY_LEAF
125 	    && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
126 		*rebuilt_old_pk = row_log_table_get_pk(
127 			btr_cur_get_rec(btr_cur),
128 			btr_cur_get_index(btr_cur), NULL, sys, &heap);
129 	} else {
130 		*rebuilt_old_pk = NULL;
131 	}
132 
133 	if (mode != BTR_MODIFY_TREE) {
134 		ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
135 
136 		err = btr_cur_optimistic_update(
137 			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
138 			| BTR_KEEP_SYS_FLAG,
139 			btr_cur, offsets, offsets_heap,
140 			node->update, node->cmpl_info,
141 			thr, thr_get_trx(thr)->id, mtr);
142 	} else {
143 		big_rec_t*	dummy_big_rec;
144 
145 		err = btr_cur_pessimistic_update(
146 			BTR_NO_LOCKING_FLAG
147 			| BTR_NO_UNDO_LOG_FLAG
148 			| BTR_KEEP_SYS_FLAG,
149 			btr_cur, offsets, offsets_heap, heap,
150 			&dummy_big_rec, node->update,
151 			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
152 
153 		ut_a(!dummy_big_rec);
154 	}
155 
156 	return(err);
157 }
158 
159 /***********************************************************//**
160 Purges a clustered index record after undo if possible.
161 This is attempted when the record was inserted by updating a
162 delete-marked record and there no longer exist transactions
163 that would see the delete-marked record.
164 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
165 static MY_ATTRIBUTE((nonnull, warn_unused_result))
166 dberr_t
row_undo_mod_remove_clust_low(undo_node_t * node,que_thr_t * thr,mtr_t * mtr,ulint mode)167 row_undo_mod_remove_clust_low(
168 /*==========================*/
169 	undo_node_t*	node,	/*!< in: row undo node */
170 	que_thr_t*	thr,	/*!< in: query thread */
171 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
172 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
173 {
174 	btr_cur_t*	btr_cur;
175 	dberr_t		err;
176 	ulint		trx_id_offset;
177 
178 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
179 
180 	/* Find out if the record has been purged already
181 	or if we can remove it. */
182 
183 	if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
184 	    || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
185 
186 		return(DB_SUCCESS);
187 	}
188 
189 	btr_cur = btr_pcur_get_btr_cur(&node->pcur);
190 
191 	trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
192 
193 	if (!trx_id_offset) {
194 		mem_heap_t*	heap	= NULL;
195 		ulint		trx_id_col;
196 		const ulint*	offsets;
197 		ulint		len;
198 
199 		trx_id_col = dict_index_get_sys_col_pos(
200 			btr_cur_get_index(btr_cur), DATA_TRX_ID);
201 		ut_ad(trx_id_col > 0);
202 		ut_ad(trx_id_col != ULINT_UNDEFINED);
203 
204 		offsets = rec_get_offsets(
205 			btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
206 			NULL, trx_id_col + 1, &heap);
207 
208 		trx_id_offset = rec_get_nth_field_offs(
209 			offsets, trx_id_col, &len);
210 		ut_ad(len == DATA_TRX_ID_LEN);
211 		mem_heap_free(heap);
212 	}
213 
214 	if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
215 	    != node->new_trx_id) {
216 		/* The record must have been purged and then replaced
217 		with a different one. */
218 		return(DB_SUCCESS);
219 	}
220 
221 	/* We are about to remove an old, delete-marked version of the
222 	record that may have been delete-marked by a different transaction
223 	than the rolling-back one. */
224 	ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
225 				   dict_table_is_comp(node->table)));
226 
227 	if (mode == BTR_MODIFY_LEAF) {
228 		err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
229 			? DB_SUCCESS
230 			: DB_FAIL;
231 	} else {
232 		ut_ad(mode == BTR_MODIFY_TREE);
233 
234 		/* This operation is analogous to purge, we can free also
235 		inherited externally stored fields */
236 
237 		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
238 					   thr_is_recv(thr)
239 					   ? RB_RECOVERY_PURGE_REC
240 					   : RB_NONE, mtr);
241 
242 		/* The delete operation may fail if we have little
243 		file space left: TODO: easiest to crash the database
244 		and restart with more file space */
245 	}
246 
247 	return(err);
248 }
249 
250 /***********************************************************//**
251 Undoes a modify in a clustered index record. Sets also the node state for the
252 next round of undo.
253 @return	DB_SUCCESS or error code: we may run out of file space */
254 static MY_ATTRIBUTE((nonnull, warn_unused_result))
255 dberr_t
row_undo_mod_clust(undo_node_t * node,que_thr_t * thr)256 row_undo_mod_clust(
257 /*===============*/
258 	undo_node_t*	node,	/*!< in: row undo node */
259 	que_thr_t*	thr)	/*!< in: query thread */
260 {
261 	btr_pcur_t*	pcur;
262 	mtr_t		mtr;
263 	dberr_t		err;
264 	dict_index_t*	index;
265 	bool		online;
266 
267 	ut_ad(thr_get_trx(thr) == node->trx);
268 	ut_ad(node->trx->dict_operation_lock_mode);
269 #ifdef UNIV_SYNC_DEBUG
270 	ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
271 	      || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
272 #endif /* UNIV_SYNC_DEBUG */
273 
274 	log_free_check();
275 	pcur = &node->pcur;
276 	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
277 
278 	mtr_start(&mtr);
279 
280 	online = dict_index_is_online_ddl(index);
281 	if (online) {
282 		ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
283 		mtr_s_lock(dict_index_get_lock(index), &mtr);
284 	}
285 
286 	mem_heap_t*	heap		= mem_heap_create(1024);
287 	mem_heap_t*	offsets_heap	= NULL;
288 	ulint*		offsets		= NULL;
289 	const dtuple_t*	rebuilt_old_pk;
290 	byte		sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
291 
292 	/* Try optimistic processing of the record, keeping changes within
293 	the index page */
294 
295 	err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
296 				     heap, &rebuilt_old_pk, sys,
297 				     thr, &mtr, online
298 				     ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
299 				     : BTR_MODIFY_LEAF);
300 
301 	if (err != DB_SUCCESS) {
302 		btr_pcur_commit_specify_mtr(pcur, &mtr);
303 
304 		/* We may have to modify tree structure: do a pessimistic
305 		descent down the index tree */
306 
307 		mtr_start(&mtr);
308 
309 		err = row_undo_mod_clust_low(
310 			node, &offsets, &offsets_heap,
311 			heap, &rebuilt_old_pk, sys,
312 			thr, &mtr, BTR_MODIFY_TREE);
313 		ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
314 	}
315 
316 	/* Online rebuild cannot be initiated while we are holding
317 	dict_operation_lock and index->lock. (It can be aborted.) */
318 	ut_ad(online || !dict_index_is_online_ddl(index));
319 
320 	if (err == DB_SUCCESS && online) {
321 #ifdef UNIV_SYNC_DEBUG
322 		ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
323 		      || rw_lock_own(&index->lock, RW_LOCK_EX));
324 #endif /* UNIV_SYNC_DEBUG */
325 		switch (node->rec_type) {
326 		case TRX_UNDO_DEL_MARK_REC:
327 			row_log_table_insert(
328 				btr_pcur_get_rec(pcur), index, offsets);
329 			break;
330 		case TRX_UNDO_UPD_EXIST_REC:
331 			row_log_table_update(
332 				btr_pcur_get_rec(pcur), index, offsets,
333 				rebuilt_old_pk);
334 			break;
335 		case TRX_UNDO_UPD_DEL_REC:
336 			row_log_table_delete(
337 				btr_pcur_get_rec(pcur), index, offsets, sys);
338 			break;
339 		default:
340 			ut_ad(0);
341 			break;
342 		}
343 	}
344 
345 	ut_ad(rec_get_trx_id(btr_pcur_get_rec(pcur), index)
346 	      == node->new_trx_id);
347 
348 	btr_pcur_commit_specify_mtr(pcur, &mtr);
349 
350 	if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
351 
352 		mtr_start(&mtr);
353 
354 		/* It is not necessary to call row_log_table,
355 		because the record is delete-marked and would thus
356 		be omitted from the rebuilt copy of the table. */
357 		err = row_undo_mod_remove_clust_low(
358 			node, thr, &mtr, BTR_MODIFY_LEAF);
359 		if (err != DB_SUCCESS) {
360 			btr_pcur_commit_specify_mtr(pcur, &mtr);
361 
362 			/* We may have to modify tree structure: do a
363 			pessimistic descent down the index tree */
364 
365 			mtr_start(&mtr);
366 
367 			err = row_undo_mod_remove_clust_low(node, thr, &mtr,
368 							    BTR_MODIFY_TREE);
369 
370 			ut_ad(err == DB_SUCCESS
371 			      || err == DB_OUT_OF_FILE_SPACE);
372 		}
373 
374 		btr_pcur_commit_specify_mtr(pcur, &mtr);
375 	}
376 
377 	node->state = UNDO_NODE_FETCH_NEXT;
378 
379 	trx_undo_rec_release(node->trx, node->undo_no);
380 
381 	if (offsets_heap) {
382 		mem_heap_free(offsets_heap);
383 	}
384 	mem_heap_free(heap);
385 	return(err);
386 }
387 
388 /***********************************************************//**
389 Delete marks or removes a secondary index entry if found.
390 @return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
391 static MY_ATTRIBUTE((nonnull, warn_unused_result))
392 dberr_t
row_undo_mod_del_mark_or_remove_sec_low(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry,ulint mode)393 row_undo_mod_del_mark_or_remove_sec_low(
394 /*====================================*/
395 	undo_node_t*	node,	/*!< in: row undo node */
396 	que_thr_t*	thr,	/*!< in: query thread */
397 	dict_index_t*	index,	/*!< in: index */
398 	dtuple_t*	entry,	/*!< in: index entry */
399 	ulint		mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
400 				BTR_MODIFY_TREE */
401 {
402 	btr_pcur_t		pcur;
403 	btr_cur_t*		btr_cur;
404 	ibool			success;
405 	ibool			old_has;
406 	dberr_t			err	= DB_SUCCESS;
407 	mtr_t			mtr;
408 	mtr_t			mtr_vers;
409 	enum row_search_result	search_result;
410 
411 	log_free_check();
412 	mtr_start(&mtr);
413 
414 	if (*index->name == TEMP_INDEX_PREFIX) {
415 		/* The index->online_status may change if the
416 		index->name starts with TEMP_INDEX_PREFIX (meaning
417 		that the index is or was being created online). It is
418 		protected by index->lock. */
419 		if (mode == BTR_MODIFY_LEAF) {
420 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
421 			mtr_s_lock(dict_index_get_lock(index), &mtr);
422 		} else {
423 			ut_ad(mode == BTR_MODIFY_TREE);
424 			mtr_x_lock(dict_index_get_lock(index), &mtr);
425 		}
426 
427 		if (row_log_online_op_try(index, entry, 0)) {
428 			goto func_exit_no_pcur;
429 		}
430 	} else {
431 		/* For secondary indexes,
432 		index->online_status==ONLINE_INDEX_CREATION unless
433 		index->name starts with TEMP_INDEX_PREFIX. */
434 		ut_ad(!dict_index_is_online_ddl(index));
435 	}
436 
437 	btr_cur = btr_pcur_get_btr_cur(&pcur);
438 
439 	search_result = row_search_index_entry(index, entry, mode,
440 					       &pcur, &mtr);
441 
442 	switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
443 	case ROW_NOT_FOUND:
444 		/* In crash recovery, the secondary index record may
445 		be missing if the UPDATE did not have time to insert
446 		the secondary index records before the crash.  When we
447 		are undoing that UPDATE in crash recovery, the record
448 		may be missing.
449 
450 		In normal processing, if an update ends in a deadlock
451 		before it has inserted all updated secondary index
452 		records, then the undo will not find those records. */
453 		goto func_exit;
454 	case ROW_FOUND:
455 		break;
456 	case ROW_BUFFERED:
457 	case ROW_NOT_DELETED_REF:
458 		/* These are invalid outcomes, because the mode passed
459 		to row_search_index_entry() did not include any of the
460 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
461 		ut_error;
462 	}
463 
464 	/* We should remove the index record if no prior version of the row,
465 	which cannot be purged yet, requires its existence. If some requires,
466 	we should delete mark the record. */
467 
468 	mtr_start(&mtr_vers);
469 
470 	success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
471 					    &mtr_vers);
472 	ut_a(success);
473 
474 	old_has = row_vers_old_has_index_entry(FALSE,
475 					       btr_pcur_get_rec(&(node->pcur)),
476 					       &mtr_vers, index, entry);
477 	if (old_has) {
478 		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
479 						   btr_cur, TRUE, thr, &mtr);
480 		ut_ad(err == DB_SUCCESS);
481 	} else {
482 		/* Remove the index record */
483 
484 		if (mode != BTR_MODIFY_TREE) {
485 			success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
486 			if (success) {
487 				err = DB_SUCCESS;
488 			} else {
489 				err = DB_FAIL;
490 			}
491 		} else {
492 			/* No need to distinguish RB_RECOVERY_PURGE here,
493 			because we are deleting a secondary index record:
494 			the distinction between RB_NORMAL and
495 			RB_RECOVERY_PURGE only matters when deleting a
496 			record that contains externally stored
497 			columns. */
498 			ut_ad(!dict_index_is_clust(index));
499 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
500 						   RB_NORMAL, &mtr);
501 
502 			/* The delete operation may fail if we have little
503 			file space left: TODO: easiest to crash the database
504 			and restart with more file space */
505 		}
506 	}
507 
508 	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
509 
510 func_exit:
511 	btr_pcur_close(&pcur);
512 func_exit_no_pcur:
513 	mtr_commit(&mtr);
514 
515 	return(err);
516 }
517 
518 /***********************************************************//**
519 Delete marks or removes a secondary index entry if found.
520 NOTE that if we updated the fields of a delete-marked secondary index record
521 so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
522 return to the original values because we do not know them. But this should
523 not cause problems because in row0sel.cc, in queries we always retrieve the
524 clustered index record or an earlier version of it, if the secondary index
525 record through which we do the search is delete-marked.
526 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
527 static MY_ATTRIBUTE((nonnull, warn_unused_result))
528 dberr_t
row_undo_mod_del_mark_or_remove_sec(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)529 row_undo_mod_del_mark_or_remove_sec(
530 /*================================*/
531 	undo_node_t*	node,	/*!< in: row undo node */
532 	que_thr_t*	thr,	/*!< in: query thread */
533 	dict_index_t*	index,	/*!< in: index */
534 	dtuple_t*	entry)	/*!< in: index entry */
535 {
536 	dberr_t	err;
537 
538 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
539 						      entry, BTR_MODIFY_LEAF);
540 	if (err == DB_SUCCESS) {
541 
542 		return(err);
543 	}
544 
545 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
546 						      entry, BTR_MODIFY_TREE);
547 	return(err);
548 }
549 
550 /***********************************************************//**
551 Delete unmarks a secondary index entry which must be found. It might not be
552 delete-marked at the moment, but it does not harm to unmark it anyway. We also
553 need to update the fields of the secondary index record if we updated its
554 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
555 @retval	DB_SUCCESS on success
556 @retval	DB_FAIL if BTR_MODIFY_TREE should be tried
557 @retval	DB_OUT_OF_FILE_SPACE when running out of tablespace
558 @retval	DB_DUPLICATE_KEY if the value was missing
559 	and an insert would lead to a duplicate exists */
560 static MY_ATTRIBUTE((nonnull, warn_unused_result))
561 dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(ulint mode,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)562 row_undo_mod_del_unmark_sec_and_undo_update(
563 /*========================================*/
564 	ulint		mode,	/*!< in: search mode: BTR_MODIFY_LEAF or
565 				BTR_MODIFY_TREE */
566 	que_thr_t*	thr,	/*!< in: query thread */
567 	dict_index_t*	index,	/*!< in: index */
568 	dtuple_t*	entry)	/*!< in: index entry */
569 {
570 	btr_pcur_t		pcur;
571 	btr_cur_t*		btr_cur		= btr_pcur_get_btr_cur(&pcur);
572 	upd_t*			update;
573 	dberr_t			err		= DB_SUCCESS;
574 	big_rec_t*		dummy_big_rec;
575 	mtr_t			mtr;
576 	trx_t*			trx		= thr_get_trx(thr);
577 	const ulint		flags
578 		= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
579 	enum row_search_result	search_result;
580 
581 	ut_ad(trx->id);
582 
583 	log_free_check();
584 	mtr_start(&mtr);
585 
586 	if (*index->name == TEMP_INDEX_PREFIX) {
587 		/* The index->online_status may change if the
588 		index->name starts with TEMP_INDEX_PREFIX (meaning
589 		that the index is or was being created online). It is
590 		protected by index->lock. */
591 		if (mode == BTR_MODIFY_LEAF) {
592 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
593 			mtr_s_lock(dict_index_get_lock(index), &mtr);
594 		} else {
595 			ut_ad(mode == BTR_MODIFY_TREE);
596 			mtr_x_lock(dict_index_get_lock(index), &mtr);
597 		}
598 
599 		if (row_log_online_op_try(index, entry, trx->id)) {
600 			goto func_exit_no_pcur;
601 		}
602 	} else {
603 		/* For secondary indexes,
604 		index->online_status==ONLINE_INDEX_CREATION unless
605 		index->name starts with TEMP_INDEX_PREFIX. */
606 		ut_ad(!dict_index_is_online_ddl(index));
607 	}
608 
609 	search_result = row_search_index_entry(index, entry, mode,
610 					       &pcur, &mtr);
611 
612 	switch (search_result) {
613 		mem_heap_t*	heap;
614 		mem_heap_t*	offsets_heap;
615 		ulint*		offsets;
616 	case ROW_BUFFERED:
617 	case ROW_NOT_DELETED_REF:
618 		/* These are invalid outcomes, because the mode passed
619 		to row_search_index_entry() did not include any of the
620 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
621 		ut_error;
622 	case ROW_NOT_FOUND:
623 		if (*index->name != TEMP_INDEX_PREFIX) {
624 			/* During online secondary index creation, it
625 			is possible that MySQL is waiting for a
626 			meta-data lock upgrade before invoking
627 			ha_innobase::commit_inplace_alter_table()
628 			while this ROLLBACK is executing. InnoDB has
629 			finished building the index, but it does not
630 			yet exist in MySQL. In this case, we suppress
631 			the printout to the error log. */
632 			fputs("InnoDB: error in sec index entry del undo in\n"
633 			      "InnoDB: ", stderr);
634 			dict_index_name_print(stderr, trx, index);
635 			fputs("\n"
636 			      "InnoDB: tuple ", stderr);
637 			dtuple_print(stderr, entry);
638 			fputs("\n"
639 			      "InnoDB: record ", stderr);
640 			rec_print(stderr, btr_pcur_get_rec(&pcur), index);
641 			putc('\n', stderr);
642 			trx_print(stderr, trx, 0);
643 			fputs("\n"
644 			      "InnoDB: Submit a detailed bug report"
645 			      " to http://bugs.mysql.com\n", stderr);
646 
647 			ib_logf(IB_LOG_LEVEL_WARN,
648 				"record in index %s was not found"
649 				" on rollback, trying to insert",
650 				index->name);
651 		}
652 
653 		if (btr_cur->up_match >= dict_index_get_n_unique(index)
654 		    || btr_cur->low_match >= dict_index_get_n_unique(index)) {
655 			if (*index->name != TEMP_INDEX_PREFIX) {
656 				ib_logf(IB_LOG_LEVEL_WARN,
657 					"record in index %s was not found on"
658 					" rollback, and a duplicate exists",
659 					index->name);
660 			}
661 			err = DB_DUPLICATE_KEY;
662 			break;
663 		}
664 
665 		/* Insert the missing record that we were trying to
666 		delete-unmark. */
667 		big_rec_t*	big_rec;
668 		rec_t*		insert_rec;
669 		offsets = NULL;
670 		offsets_heap = NULL;
671 
672 		err = btr_cur_optimistic_insert(
673 			flags, btr_cur, &offsets, &offsets_heap,
674 			entry, &insert_rec, &big_rec,
675 			0, thr, &mtr);
676 		ut_ad(!big_rec);
677 
678 		if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
679 			err = btr_cur_pessimistic_insert(
680 				flags, btr_cur,
681 				&offsets, &offsets_heap,
682 				entry, &insert_rec, &big_rec,
683 				0, thr, &mtr);
684 			/* There are no off-page columns in
685 			secondary indexes. */
686 			ut_ad(!big_rec);
687 		}
688 
689 		if (err == DB_SUCCESS) {
690 			page_update_max_trx_id(
691 				btr_cur_get_block(btr_cur),
692 				btr_cur_get_page_zip(btr_cur),
693 				trx->id, &mtr);
694 		}
695 
696 		if (offsets_heap) {
697 			mem_heap_free(offsets_heap);
698 		}
699 
700 		break;
701 	case ROW_FOUND:
702 		err = btr_cur_del_mark_set_sec_rec(
703 			BTR_NO_LOCKING_FLAG,
704 			btr_cur, FALSE, thr, &mtr);
705 		ut_a(err == DB_SUCCESS);
706 		heap = mem_heap_create(
707 			sizeof(upd_t)
708 			+ dtuple_get_n_fields(entry) * sizeof(upd_field_t));
709 		offsets_heap = NULL;
710 		offsets = rec_get_offsets(
711 			btr_cur_get_rec(btr_cur),
712 			index, NULL, ULINT_UNDEFINED, &offsets_heap);
713 		update = row_upd_build_sec_rec_difference_binary(
714 			btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
715 		if (upd_get_n_fields(update) == 0) {
716 
717 			/* Do nothing */
718 
719 		} else if (mode != BTR_MODIFY_TREE) {
720 			/* Try an optimistic updating of the record, keeping
721 			changes within the page */
722 
723 			/* TODO: pass offsets, not &offsets */
724 			err = btr_cur_optimistic_update(
725 				flags, btr_cur, &offsets, &offsets_heap,
726 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
727 			switch (err) {
728 			case DB_OVERFLOW:
729 			case DB_UNDERFLOW:
730 			case DB_ZIP_OVERFLOW:
731 				err = DB_FAIL;
732 			default:
733 				break;
734 			}
735 		} else {
736 			err = btr_cur_pessimistic_update(
737 				flags, btr_cur, &offsets, &offsets_heap,
738 				heap, &dummy_big_rec,
739 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
740 			ut_a(!dummy_big_rec);
741 		}
742 
743 		mem_heap_free(heap);
744 		mem_heap_free(offsets_heap);
745 	}
746 
747 	btr_pcur_close(&pcur);
748 func_exit_no_pcur:
749 	mtr_commit(&mtr);
750 
751 	return(err);
752 }
753 
754 /***********************************************************//**
755 Flags a secondary index corrupted. */
756 static MY_ATTRIBUTE((nonnull))
757 void
row_undo_mod_sec_flag_corrupted(trx_t * trx,dict_index_t * index)758 row_undo_mod_sec_flag_corrupted(
759 /*============================*/
760 	trx_t*		trx,	/*!< in/out: transaction */
761 	dict_index_t*	index)	/*!< in: secondary index */
762 {
763 	ut_ad(!dict_index_is_clust(index));
764 
765 	switch (trx->dict_operation_lock_mode) {
766 	case RW_S_LATCH:
767 		/* Because row_undo() is holding an S-latch
768 		on the data dictionary during normal rollback,
769 		we can only mark the index corrupted in the
770 		data dictionary cache. TODO: fix this somehow.*/
771 		mutex_enter(&dict_sys->mutex);
772 		dict_set_corrupted_index_cache_only(index, index->table);
773 		mutex_exit(&dict_sys->mutex);
774 		break;
775 	default:
776 		ut_ad(0);
777 		/* fall through */
778 	case RW_X_LATCH:
779 		/* This should be the rollback of a data dictionary
780 		transaction. */
781 		dict_set_corrupted(index, trx, "rollback");
782 	}
783 }
784 
785 /***********************************************************//**
786 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
787 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
788 static MY_ATTRIBUTE((nonnull, warn_unused_result))
789 dberr_t
row_undo_mod_upd_del_sec(undo_node_t * node,que_thr_t * thr)790 row_undo_mod_upd_del_sec(
791 /*=====================*/
792 	undo_node_t*	node,	/*!< in: row undo node */
793 	que_thr_t*	thr)	/*!< in: query thread */
794 {
795 	mem_heap_t*	heap;
796 	dberr_t		err	= DB_SUCCESS;
797 
798 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
799 	ut_ad(!node->undo_row);
800 
801 	heap = mem_heap_create(1024);
802 
803 	while (node->index != NULL) {
804 		dict_index_t*	index	= node->index;
805 		dtuple_t*	entry;
806 
807 		if (index->type & DICT_FTS) {
808 			dict_table_next_uncorrupted_index(node->index);
809 			continue;
810 		}
811 
812 		/* During online index creation,
813 		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
814 		guarantee that any active transaction has not modified
815 		indexed columns such that col->ord_part was 0 at the
816 		time when the undo log record was written. When we get
817 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
818 		it should always cover all affected indexes. */
819 		entry = row_build_index_entry(
820 			node->row, node->ext, index, heap);
821 
822 		if (UNIV_UNLIKELY(!entry)) {
823 			/* The database must have crashed after
824 			inserting a clustered index record but before
825 			writing all the externally stored columns of
826 			that record.  Because secondary index entries
827 			are inserted after the clustered index record,
828 			we may assume that the secondary index record
829 			does not exist.  However, this situation may
830 			only occur during the rollback of incomplete
831 			transactions. */
832 			ut_a(thr_is_recv(thr));
833 		} else {
834 			err = row_undo_mod_del_mark_or_remove_sec(
835 				node, thr, index, entry);
836 
837 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
838 
839 				break;
840 			}
841 		}
842 
843 		mem_heap_empty(heap);
844 		dict_table_next_uncorrupted_index(node->index);
845 	}
846 
847 	mem_heap_free(heap);
848 
849 	return(err);
850 }
851 
852 /***********************************************************//**
853 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
854 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
855 static MY_ATTRIBUTE((nonnull, warn_unused_result))
856 dberr_t
row_undo_mod_del_mark_sec(undo_node_t * node,que_thr_t * thr)857 row_undo_mod_del_mark_sec(
858 /*======================*/
859 	undo_node_t*	node,	/*!< in: row undo node */
860 	que_thr_t*	thr)	/*!< in: query thread */
861 {
862 	mem_heap_t*	heap;
863 	dberr_t		err	= DB_SUCCESS;
864 
865 	ut_ad(!node->undo_row);
866 
867 	heap = mem_heap_create(1024);
868 
869 	while (node->index != NULL) {
870 		dict_index_t*	index	= node->index;
871 		dtuple_t*	entry;
872 
873 		if (index->type == DICT_FTS) {
874 			dict_table_next_uncorrupted_index(node->index);
875 			continue;
876 		}
877 
878 		/* During online index creation,
879 		HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
880 		guarantee that any active transaction has not modified
881 		indexed columns such that col->ord_part was 0 at the
882 		time when the undo log record was written. When we get
883 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
884 		it should always cover all affected indexes. */
885 		entry = row_build_index_entry(
886 			node->row, node->ext, index, heap);
887 
888 		ut_a(entry);
889 
890 		err = row_undo_mod_del_unmark_sec_and_undo_update(
891 			BTR_MODIFY_LEAF, thr, index, entry);
892 		if (err == DB_FAIL) {
893 			err = row_undo_mod_del_unmark_sec_and_undo_update(
894 				BTR_MODIFY_TREE, thr, index, entry);
895 		}
896 
897 		if (err == DB_DUPLICATE_KEY) {
898 			row_undo_mod_sec_flag_corrupted(
899 				thr_get_trx(thr), index);
900 			err = DB_SUCCESS;
901 			/* Do not return any error to the caller. The
902 			duplicate will be reported by ALTER TABLE or
903 			CREATE UNIQUE INDEX. Unfortunately we cannot
904 			report the duplicate key value to the DDL
905 			thread, because the altered_table object is
906 			private to its call stack. */
907 		} else if (err != DB_SUCCESS) {
908 			break;
909 		}
910 
911 		mem_heap_empty(heap);
912 		dict_table_next_uncorrupted_index(node->index);
913 	}
914 
915 	mem_heap_free(heap);
916 
917 	return(err);
918 }
919 
920 /***********************************************************//**
921 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
922 @return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
923 static MY_ATTRIBUTE((nonnull, warn_unused_result))
924 dberr_t
row_undo_mod_upd_exist_sec(undo_node_t * node,que_thr_t * thr)925 row_undo_mod_upd_exist_sec(
926 /*=======================*/
927 	undo_node_t*	node,	/*!< in: row undo node */
928 	que_thr_t*	thr)	/*!< in: query thread */
929 {
930 	mem_heap_t*	heap;
931 	dberr_t		err	= DB_SUCCESS;
932 
933 	if (node->index == NULL
934 	    || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
935 		/* No change in secondary indexes */
936 
937 		return(err);
938 	}
939 
940 	heap = mem_heap_create(1024);
941 
942 	while (node->index != NULL) {
943 		dict_index_t*	index	= node->index;
944 		dtuple_t*	entry;
945 
946 		if (index->type == DICT_FTS
947 		    || !row_upd_changes_ord_field_binary(
948 			index, node->update, thr, node->row, node->ext)) {
949 			dict_table_next_uncorrupted_index(node->index);
950 			continue;
951 		}
952 
953 		/* Build the newest version of the index entry */
954 		entry = row_build_index_entry(node->row, node->ext,
955 					      index, heap);
956 		if (UNIV_UNLIKELY(!entry)) {
957 			/* The server must have crashed in
958 			row_upd_clust_rec_by_insert() before
959 			the updated externally stored columns (BLOBs)
960 			of the new clustered index entry were written. */
961 
962 			/* The table must be in DYNAMIC or COMPRESSED
963 			format.  REDUNDANT and COMPACT formats
964 			store a local 768-byte prefix of each
965 			externally stored column. */
966 			ut_a(dict_table_get_format(index->table)
967 			     >= UNIV_FORMAT_B);
968 
969 			/* This is only legitimate when
970 			rolling back an incomplete transaction
971 			after crash recovery. */
972 			ut_a(thr_get_trx(thr)->is_recovered);
973 
974 			/* The server must have crashed before
975 			completing the insert of the new
976 			clustered index entry and before
977 			inserting to the secondary indexes.
978 			Because node->row was not yet written
979 			to this index, we can ignore it.  But
980 			we must restore node->undo_row. */
981 		} else {
982 			/* NOTE that if we updated the fields of a
983 			delete-marked secondary index record so that
984 			alphabetically they stayed the same, e.g.,
985 			'abc' -> 'aBc', we cannot return to the
986 			original values because we do not know them.
987 			But this should not cause problems because
988 			in row0sel.cc, in queries we always retrieve
989 			the clustered index record or an earlier
990 			version of it, if the secondary index record
991 			through which we do the search is
992 			delete-marked. */
993 
994 			err = row_undo_mod_del_mark_or_remove_sec(
995 				node, thr, index, entry);
996 			if (err != DB_SUCCESS) {
997 				break;
998 			}
999 		}
1000 
1001 		mem_heap_empty(heap);
1002 		/* We may have to update the delete mark in the
1003 		secondary index record of the previous version of
1004 		the row. We also need to update the fields of
1005 		the secondary index record if we updated its fields
1006 		but alphabetically they stayed the same, e.g.,
1007 		'abc' -> 'aBc'. */
1008 		entry = row_build_index_entry(node->undo_row,
1009 					      node->undo_ext,
1010 					      index, heap);
1011 		ut_a(entry);
1012 
1013 		err = row_undo_mod_del_unmark_sec_and_undo_update(
1014 			BTR_MODIFY_LEAF, thr, index, entry);
1015 		if (err == DB_FAIL) {
1016 			err = row_undo_mod_del_unmark_sec_and_undo_update(
1017 				BTR_MODIFY_TREE, thr, index, entry);
1018 		}
1019 
1020 		if (err == DB_DUPLICATE_KEY) {
1021 			row_undo_mod_sec_flag_corrupted(
1022 				thr_get_trx(thr), index);
1023 			err = DB_SUCCESS;
1024 		} else if (err != DB_SUCCESS) {
1025 			break;
1026 		}
1027 
1028 		mem_heap_empty(heap);
1029 		dict_table_next_uncorrupted_index(node->index);
1030 	}
1031 
1032 	mem_heap_free(heap);
1033 
1034 	return(err);
1035 }
1036 
1037 /***********************************************************//**
1038 Parses the row reference and other info in a modify undo log record. */
1039 static MY_ATTRIBUTE((nonnull))
1040 void
row_undo_mod_parse_undo_rec(undo_node_t * node,ibool dict_locked)1041 row_undo_mod_parse_undo_rec(
1042 /*========================*/
1043 	undo_node_t*	node,		/*!< in: row undo node */
1044 	ibool		dict_locked)	/*!< in: TRUE if own dict_sys->mutex */
1045 {
1046 	dict_index_t*	clust_index;
1047 	byte*		ptr;
1048 	undo_no_t	undo_no;
1049 	table_id_t	table_id;
1050 	trx_id_t	trx_id;
1051 	roll_ptr_t	roll_ptr;
1052 	ulint		info_bits;
1053 	ulint		type;
1054 	ulint		cmpl_info;
1055 	bool		dummy_extern;
1056 
1057 	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
1058 				    &dummy_extern, &undo_no, &table_id);
1059 	node->rec_type = type;
1060 
1061 	node->table = dict_table_open_on_id(
1062 		table_id, dict_locked, DICT_TABLE_OP_NORMAL);
1063 
1064 	/* TODO: other fixes associated with DROP TABLE + rollback in the
1065 	same table by another user */
1066 
1067 	if (node->table == NULL) {
1068 		/* Table was dropped */
1069 		return;
1070 	}
1071 
1072 	if (node->table->ibd_file_missing) {
1073 		dict_table_close(node->table, dict_locked, FALSE);
1074 
1075 		/* We skip undo operations to missing .ibd files */
1076 		node->table = NULL;
1077 
1078 		return;
1079 	}
1080 
1081 	clust_index = dict_table_get_first_index(node->table);
1082 
1083 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1084 					       &info_bits);
1085 
1086 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
1087 				       node->heap);
1088 
1089 	trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
1090 				       roll_ptr, info_bits, node->trx,
1091 				       node->heap, &(node->update));
1092 	node->new_trx_id = trx_id;
1093 	node->cmpl_info = cmpl_info;
1094 
1095 	if (!row_undo_search_clust_to_pcur(node)) {
1096 
1097 		dict_table_close(node->table, dict_locked, FALSE);
1098 
1099 		node->table = NULL;
1100 	}
1101 }
1102 
1103 /***********************************************************//**
1104 Undoes a modify operation on a row of a table.
1105 @return	DB_SUCCESS or error code */
1106 UNIV_INTERN
1107 dberr_t
row_undo_mod(undo_node_t * node,que_thr_t * thr)1108 row_undo_mod(
1109 /*=========*/
1110 	undo_node_t*	node,	/*!< in: row undo node */
1111 	que_thr_t*	thr)	/*!< in: query thread */
1112 {
1113 	dberr_t	err;
1114 	ibool	dict_locked;
1115 
1116 	ut_ad(node != NULL);
1117 	ut_ad(thr != NULL);
1118 	ut_ad(node->state == UNDO_NODE_MODIFY);
1119 
1120 	dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
1121 
1122 	ut_ad(thr_get_trx(thr) == node->trx);
1123 
1124 	row_undo_mod_parse_undo_rec(node, dict_locked);
1125 
1126 	if (node->table == NULL) {
1127 		/* It is already undone, or will be undone by another query
1128 		thread, or table was dropped */
1129 
1130 		trx_undo_rec_release(node->trx, node->undo_no);
1131 		node->state = UNDO_NODE_FETCH_NEXT;
1132 
1133 		return(DB_SUCCESS);
1134 	}
1135 
1136 	node->index = dict_table_get_first_index(node->table);
1137 	ut_ad(dict_index_is_clust(node->index));
1138 	/* Skip the clustered index (the first index) */
1139 	node->index = dict_table_get_next_index(node->index);
1140 
1141 	/* Skip all corrupted secondary index */
1142 	dict_table_skip_corrupt_index(node->index);
1143 
1144 	switch (node->rec_type) {
1145 	case TRX_UNDO_UPD_EXIST_REC:
1146 		err = row_undo_mod_upd_exist_sec(node, thr);
1147 		break;
1148 	case TRX_UNDO_DEL_MARK_REC:
1149 		err = row_undo_mod_del_mark_sec(node, thr);
1150 		break;
1151 	case TRX_UNDO_UPD_DEL_REC:
1152 		err = row_undo_mod_upd_del_sec(node, thr);
1153 		break;
1154 	default:
1155 		ut_error;
1156 		err = DB_ERROR;
1157 	}
1158 
1159 	if (err == DB_SUCCESS) {
1160 
1161 		err = row_undo_mod_clust(node, thr);
1162 	}
1163 
1164 	dict_table_close(node->table, dict_locked, FALSE);
1165 
1166 	node->table = NULL;
1167 
1168 	return(err);
1169 }
1170