1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2021, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file row/row0umod.cc
22 Undo modify of a row
23 
24 Created 2/27/1997 Heikki Tuuri
25 *******************************************************/
26 
27 #include "row0umod.h"
28 #include "dict0dict.h"
29 #include "dict0stats.h"
30 #include "dict0boot.h"
31 #include "trx0undo.h"
32 #include "trx0roll.h"
33 #include "trx0purge.h"
34 #include "btr0btr.h"
35 #include "mach0data.h"
36 #include "ibuf0ibuf.h"
37 #include "row0undo.h"
38 #include "row0vers.h"
39 #include "row0log.h"
40 #include "trx0trx.h"
41 #include "trx0rec.h"
42 #include "row0row.h"
43 #include "row0upd.h"
44 #include "que0que.h"
45 #include "log0log.h"
46 
47 /* Considerations on undoing a modify operation.
48 (1) Undoing a delete marking: all index records should be found. Some of
49 them may have delete mark already FALSE, if the delete mark operation was
50 stopped underway, or if the undo operation ended prematurely because of a
51 system crash.
52 (2) Undoing an update of a delete unmarked record: the newer version of
53 an updated secondary index entry should be removed if no prior version
54 of the clustered index record requires its existence. Otherwise, it should
55 be delete marked.
56 (3) Undoing an update of a delete marked record. In this kind of update a
57 delete marked clustered index record was delete unmarked and possibly also
58 some of its fields were changed. Now, it is possible that the delete marked
59 version has become obsolete at the time the undo is started. */
60 
61 /*************************************************************************
62 IMPORTANT NOTE: Any operation that generates redo MUST check that there
63 is enough space in the redo log before for that operation. This is
64 done by calling log_free_check(). The reason for checking the
65 availability of the redo log space before the start of the operation is
66 that we MUST not hold any synchonization objects when performing the
67 check.
68 If you make a change in this module make sure that no codepath is
69 introduced where a call to log_free_check() is bypassed. */
70 
71 /***********************************************************//**
72 Undoes a modify in a clustered index record.
73 @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
74 static MY_ATTRIBUTE((nonnull, warn_unused_result))
75 dberr_t
row_undo_mod_clust_low(undo_node_t * node,rec_offs ** offsets,mem_heap_t ** offsets_heap,mem_heap_t * heap,const dtuple_t ** rebuilt_old_pk,byte * sys,que_thr_t * thr,mtr_t * mtr,ulint mode)76 row_undo_mod_clust_low(
77 /*===================*/
78 	undo_node_t*	node,	/*!< in: row undo node */
79 	rec_offs**	offsets,/*!< out: rec_get_offsets() on the record */
80 	mem_heap_t**	offsets_heap,
81 				/*!< in/out: memory heap that can be emptied */
82 	mem_heap_t*	heap,	/*!< in/out: memory heap */
83 	const dtuple_t**rebuilt_old_pk,
84 				/*!< out: row_log_table_get_pk()
85 				before the update, or NULL if
86 				the table is not being rebuilt online or
87 				the PRIMARY KEY definition does not change */
88 	byte*		sys,	/*!< out: DB_TRX_ID, DB_ROLL_PTR
89 				for row_log_table_delete() */
90 	que_thr_t*	thr,	/*!< in: query thread */
91 	mtr_t*		mtr,	/*!< in: mtr; must be committed before
92 				latching any further pages */
93 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
94 {
95 	btr_pcur_t*	pcur;
96 	btr_cur_t*	btr_cur;
97 	dberr_t		err;
98 #ifdef UNIV_DEBUG
99 	ibool		success;
100 #endif /* UNIV_DEBUG */
101 
102 	pcur = &node->pcur;
103 	btr_cur = btr_pcur_get_btr_cur(pcur);
104 
105 #ifdef UNIV_DEBUG
106 	success =
107 #endif /* UNIV_DEBUG */
108 	btr_pcur_restore_position(mode, pcur, mtr);
109 
110 	ut_ad(success);
111 	ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
112 			     btr_cur_get_index(btr_cur))
113 	      == thr_get_trx(thr)->id
114 	      || btr_cur_get_index(btr_cur)->table->is_temporary());
115 	ut_ad(node->ref != &trx_undo_metadata
116 	      || node->update->info_bits == REC_INFO_METADATA_ADD
117 	      || node->update->info_bits == REC_INFO_METADATA_ALTER);
118 
119 	if (mode != BTR_MODIFY_LEAF
120 	    && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
121 		*rebuilt_old_pk = row_log_table_get_pk(
122 			btr_cur_get_rec(btr_cur),
123 			btr_cur_get_index(btr_cur), NULL, sys, &heap);
124 	} else {
125 		*rebuilt_old_pk = NULL;
126 	}
127 
128 	if (mode != BTR_MODIFY_TREE) {
129 		ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED))
130 		      == BTR_MODIFY_LEAF);
131 
132 		err = btr_cur_optimistic_update(
133 			BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
134 			| BTR_KEEP_SYS_FLAG,
135 			btr_cur, offsets, offsets_heap,
136 			node->update, node->cmpl_info,
137 			thr, thr_get_trx(thr)->id, mtr);
138 		ut_ad(err != DB_SUCCESS || node->ref != &trx_undo_metadata);
139 	} else {
140 		big_rec_t*	dummy_big_rec;
141 
142 		err = btr_cur_pessimistic_update(
143 			BTR_NO_LOCKING_FLAG
144 			| BTR_NO_UNDO_LOG_FLAG
145 			| BTR_KEEP_SYS_FLAG,
146 			btr_cur, offsets, offsets_heap, heap,
147 			&dummy_big_rec, node->update,
148 			node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
149 
150 		ut_a(!dummy_big_rec);
151 
152 		static const byte
153 			INFIMUM[8] = {'i','n','f','i','m','u','m',0},
154 			SUPREMUM[8] = {'s','u','p','r','e','m','u','m'};
155 
156 		if (err == DB_SUCCESS
157 		    && node->ref == &trx_undo_metadata
158 		    && btr_cur_get_index(btr_cur)->table->instant
159 		    && node->update->info_bits == REC_INFO_METADATA_ADD) {
160 			if (page_t* root = btr_root_get(
161 				    btr_cur_get_index(btr_cur), mtr)) {
162 				byte* infimum;
163 				byte *supremum;
164 				if (page_is_comp(root)) {
165 					infimum = PAGE_NEW_INFIMUM + root;
166 					supremum = PAGE_NEW_SUPREMUM + root;
167 				} else {
168 					infimum = PAGE_OLD_INFIMUM + root;
169 					supremum = PAGE_OLD_SUPREMUM + root;
170 				}
171 
172 				ut_ad(!memcmp(infimum, INFIMUM, 8)
173 				      == !memcmp(supremum, SUPREMUM, 8));
174 
175 				if (memcmp(infimum, INFIMUM, 8)) {
176 					mlog_write_string(infimum, INFIMUM,
177 							  8, mtr);
178 					mlog_write_string(supremum, SUPREMUM,
179 							  8, mtr);
180 				}
181 			}
182 		}
183 	}
184 
185 	if (err == DB_SUCCESS
186 	    && btr_cur_get_index(btr_cur)->table->id == DICT_COLUMNS_ID) {
187 		/* This is rolling back an UPDATE or DELETE on SYS_COLUMNS.
188 		If it was part of an instant ALTER TABLE operation, we
189 		must evict the table definition, so that it can be
190 		reloaded after the dictionary operation has been
191 		completed. At this point, any corresponding operation
192 		to the metadata record will have been rolled back. */
193 		const dfield_t& table_id = *dtuple_get_nth_field(node->row, 0);
194 		ut_ad(dfield_get_len(&table_id) == 8);
195 		node->trx->evict_table(mach_read_from_8(static_cast<byte*>(
196 					table_id.data)));
197 	}
198 
199 	return(err);
200 }
201 
202 /** Get the byte offset of the DB_TRX_ID column
203 @param[in]	rec	clustered index record
204 @param[in]	index	clustered index
205 @return	the byte offset of DB_TRX_ID, from the start of rec */
row_trx_id_offset(const rec_t * rec,const dict_index_t * index)206 static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
207 {
208 	ut_ad(index->n_uniq <= MAX_REF_PARTS);
209 	ulint trx_id_offset = index->trx_id_offset;
210 	if (!trx_id_offset) {
211 		/* Reserve enough offsets for the PRIMARY KEY and 2 columns
212 		so that we can access DB_TRX_ID, DB_ROLL_PTR. */
213 		rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
214 		rec_offs_init(offsets_);
215 		mem_heap_t* heap = NULL;
216 		const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
217 		rec_offs* offsets = rec_get_offsets(rec, index, offsets_,
218 						    index->n_core_fields,
219 						    trx_id_pos + 1, &heap);
220 		ut_ad(!heap);
221 		ulint len;
222 		trx_id_offset = rec_get_nth_field_offs(
223 			offsets, trx_id_pos, &len);
224 		ut_ad(len == DATA_TRX_ID_LEN);
225 	}
226 
227 	return trx_id_offset;
228 }
229 
230 /** Determine if rollback must execute a purge-like operation.
231 @param[in,out]	node	row undo
232 @param[in,out]	mtr	mini-transaction
233 @return	whether the record should be purged */
row_undo_mod_must_purge(undo_node_t * node,mtr_t * mtr)234 static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
235 {
236 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
237 	ut_ad(!node->table->is_temporary());
238 
239 	btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
240 	ut_ad(btr_cur->index->is_primary());
241 	DEBUG_SYNC_C("rollback_purge_clust");
242 
243 	mtr->s_lock(&purge_sys.latch, __FILE__, __LINE__);
244 
245 	if (!purge_sys.view.changes_visible(node->new_trx_id,
246 					    node->table->name)) {
247 		return false;
248 	}
249 
250 	const rec_t* rec = btr_cur_get_rec(btr_cur);
251 
252 	return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
253 		== node->new_trx_id;
254 }
255 
256 /***********************************************************//**
257 Undoes a modify in a clustered index record. Sets also the node state for the
258 next round of undo.
259 @return DB_SUCCESS or error code: we may run out of file space */
260 static MY_ATTRIBUTE((nonnull, warn_unused_result))
261 dberr_t
row_undo_mod_clust(undo_node_t * node,que_thr_t * thr)262 row_undo_mod_clust(
263 /*===============*/
264 	undo_node_t*	node,	/*!< in: row undo node */
265 	que_thr_t*	thr)	/*!< in: query thread */
266 {
267 	btr_pcur_t*	pcur;
268 	mtr_t		mtr;
269 	dberr_t		err;
270 	dict_index_t*	index;
271 	bool		online;
272 
273 	ut_ad(thr_get_trx(thr) == node->trx);
274 	ut_ad(node->trx->dict_operation_lock_mode);
275 	ut_ad(node->trx->in_rollback);
276 	ut_ad(rw_lock_own_flagged(&dict_sys.latch,
277 				  RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
278 
279 	log_free_check();
280 	pcur = &node->pcur;
281 	index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
282 	ut_ad(index->is_primary());
283 
284 	mtr.start();
285 	if (index->table->is_temporary()) {
286 		mtr.set_log_mode(MTR_LOG_NO_REDO);
287 	} else {
288 		index->set_modified(mtr);
289 		ut_ad(lock_table_has_locks(index->table));
290 	}
291 
292 	online = dict_index_is_online_ddl(index);
293 	if (online) {
294 		ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
295 		mtr_s_lock_index(index, &mtr);
296 	}
297 
298 	mem_heap_t*	heap		= mem_heap_create(1024);
299 	mem_heap_t*	offsets_heap	= NULL;
300 	rec_offs*	offsets		= NULL;
301 	const dtuple_t*	rebuilt_old_pk;
302 	byte		sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
303 
304 	/* Try optimistic processing of the record, keeping changes within
305 	the index page */
306 
307 	err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
308 				     heap, &rebuilt_old_pk, sys,
309 				     thr, &mtr, online
310 				     ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
311 				     : BTR_MODIFY_LEAF);
312 
313 	if (err != DB_SUCCESS) {
314 		btr_pcur_commit_specify_mtr(pcur, &mtr);
315 
316 		/* We may have to modify tree structure: do a pessimistic
317 		descent down the index tree */
318 
319 		mtr.start();
320 		if (index->table->is_temporary()) {
321 			mtr.set_log_mode(MTR_LOG_NO_REDO);
322 		} else {
323 			index->set_modified(mtr);
324 		}
325 
326 		err = row_undo_mod_clust_low(
327 			node, &offsets, &offsets_heap,
328 			heap, &rebuilt_old_pk, sys,
329 			thr, &mtr, BTR_MODIFY_TREE);
330 		ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
331 	}
332 
333 	/* Online rebuild cannot be initiated while we are holding
334 	dict_sys.latch and index->lock. (It can be aborted.) */
335 	ut_ad(online || !dict_index_is_online_ddl(index));
336 
337 	if (err == DB_SUCCESS && online) {
338 
339 		ut_ad(rw_lock_own_flagged(
340 				&index->lock,
341 				RW_LOCK_FLAG_S | RW_LOCK_FLAG_X
342 				| RW_LOCK_FLAG_SX));
343 
344 		switch (node->rec_type) {
345 		case TRX_UNDO_DEL_MARK_REC:
346 			row_log_table_insert(
347 				btr_pcur_get_rec(pcur), index, offsets);
348 			break;
349 		case TRX_UNDO_UPD_EXIST_REC:
350 			row_log_table_update(
351 				btr_pcur_get_rec(pcur), index, offsets,
352 				rebuilt_old_pk);
353 			break;
354 		case TRX_UNDO_UPD_DEL_REC:
355 			row_log_table_delete(
356 				btr_pcur_get_rec(pcur), index, offsets, sys);
357 			break;
358 		default:
359 			ut_ad(0);
360 			break;
361 		}
362 	}
363 
364 	/**
365 	* when scrubbing, and records gets cleared,
366 	*   the transaction id is not present afterwards.
367 	*   this is safe as: since the record is on free-list
368 	*   it can be reallocated at any time after this mtr-commits
369 	*   which is just below
370 	*/
371 	ut_ad(srv_immediate_scrub_data_uncompressed
372 	      || row_get_rec_trx_id(btr_pcur_get_rec(pcur), index, offsets)
373 	      == node->new_trx_id);
374 
375 	btr_pcur_commit_specify_mtr(pcur, &mtr);
376 	DEBUG_SYNC_C("rollback_undo_pk");
377 
378 	if (err != DB_SUCCESS) {
379 		goto func_exit;
380 	}
381 
382 	/* FIXME: Perform the below operations in the above
383 	mini-transaction when possible. */
384 
385 	if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
386 		/* In delete-marked records, DB_TRX_ID must
387 		always refer to an existing update_undo log record. */
388 		ut_ad(node->new_trx_id);
389 
390 		mtr.start();
391 		if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
392 			goto mtr_commit_exit;
393 		}
394 
395 		if (index->table->is_temporary()) {
396 			mtr.set_log_mode(MTR_LOG_NO_REDO);
397 		} else {
398 			if (!row_undo_mod_must_purge(node, &mtr)) {
399 				goto mtr_commit_exit;
400 			}
401 			index->set_modified(mtr);
402 		}
403 
404 		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
405 					   dict_table_is_comp(node->table)));
406 		if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
407 			goto mtr_commit_exit;
408 		}
409 
410 		btr_pcur_commit_specify_mtr(pcur, &mtr);
411 
412 		mtr.start();
413 		if (!btr_pcur_restore_position(
414 			    BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
415 			    pcur, &mtr)) {
416 			goto mtr_commit_exit;
417 		}
418 
419 		if (index->table->is_temporary()) {
420 			mtr.set_log_mode(MTR_LOG_NO_REDO);
421 		} else {
422 			if (!row_undo_mod_must_purge(node, &mtr)) {
423 				goto mtr_commit_exit;
424 			}
425 			index->set_modified(mtr);
426 		}
427 
428 		ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
429 					   dict_table_is_comp(node->table)));
430 
431 		/* This operation is analogous to purge, we can free
432 		also inherited externally stored fields. We can also
433 		assume that the record was complete (including BLOBs),
434 		because it had been delete-marked after it had been
435 		completely inserted. Therefore, we are passing
436 		rollback=false, just like purge does. */
437 		btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
438 					   false, &mtr);
439 		ut_ad(err == DB_SUCCESS
440 		      || err == DB_OUT_OF_FILE_SPACE);
441 	} else if (!index->table->is_temporary() && node->new_trx_id) {
442 		/* We rolled back a record so that it still exists.
443 		We must reset the DB_TRX_ID if the history is no
444 		longer accessible by any active read view. */
445 
446 		mtr.start();
447 		if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
448 			goto mtr_commit_exit;
449 		}
450 		rec_t* rec = btr_pcur_get_rec(pcur);
451 		mtr.s_lock(&purge_sys.latch, __FILE__, __LINE__);
452 		if (!purge_sys.view.changes_visible(node->new_trx_id,
453 						   node->table->name)) {
454 			goto mtr_commit_exit;
455 		}
456 
457 		ulint trx_id_offset = index->trx_id_offset;
458 		ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
459 		/* Reserve enough offsets for the PRIMARY KEY and
460 		2 columns so that we can access DB_TRX_ID, DB_ROLL_PTR. */
461 		rec_offs offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
462 		if (trx_id_offset) {
463 #ifdef UNIV_DEBUG
464 			ut_ad(rec_offs_validate(NULL, index, offsets));
465 			if (buf_block_get_page_zip(
466 				    btr_pcur_get_block(&node->pcur))) {
467 				/* Below, page_zip_write_trx_id_and_roll_ptr()
468 				needs offsets to access DB_TRX_ID,DB_ROLL_PTR.
469 				We already computed offsets for possibly
470 				another record in the clustered index.
471 				Because the PRIMARY KEY is fixed-length,
472 				the offsets for the PRIMARY KEY and
473 				DB_TRX_ID,DB_ROLL_PTR are still valid.
474 				Silence the rec_offs_validate() assertion. */
475 				rec_offs_make_valid(rec, index, true, offsets);
476 			}
477 #endif
478 		} else if (rec_is_metadata(rec, *index)) {
479 			ut_ad(!buf_block_get_page_zip(btr_pcur_get_block(
480 							      &node->pcur)));
481 			for (unsigned i = index->first_user_field(); i--; ) {
482 				trx_id_offset += index->fields[i].fixed_len;
483 			}
484 		} else {
485 			ut_ad(index->n_uniq <= MAX_REF_PARTS);
486 			rec_offs_init(offsets_);
487 			offsets = rec_get_offsets(rec, index, offsets_,
488 						  index->n_core_fields,
489 						  trx_id_pos + 2, &heap);
490 			ulint len;
491 			trx_id_offset = rec_get_nth_field_offs(
492 				offsets, trx_id_pos, &len);
493 			ut_ad(len == DATA_TRX_ID_LEN);
494 		}
495 
496 		if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
497 			ut_ad(!rec_get_deleted_flag(
498 				      rec, dict_table_is_comp(node->table))
499 			      || rec_is_alter_metadata(rec, *index));
500 			index->set_modified(mtr);
501 			if (page_zip_des_t* page_zip = buf_block_get_page_zip(
502 				    btr_pcur_get_block(&node->pcur))) {
503 				page_zip_write_trx_id_and_roll_ptr(
504 					page_zip, rec, offsets, trx_id_pos,
505 					0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
506 					&mtr);
507 			} else {
508 				mlog_write_string(rec + trx_id_offset,
509 						  reset_trx_id,
510 						  sizeof reset_trx_id, &mtr);
511 			}
512 		}
513 	} else {
514 		goto func_exit;
515 	}
516 
517 mtr_commit_exit:
518 	btr_pcur_commit_specify_mtr(pcur, &mtr);
519 
520 func_exit:
521 	if (offsets_heap) {
522 		mem_heap_free(offsets_heap);
523 	}
524 	mem_heap_free(heap);
525 	return(err);
526 }
527 
528 /***********************************************************//**
529 Delete marks or removes a secondary index entry if found.
530 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
531 static MY_ATTRIBUTE((nonnull, warn_unused_result))
532 dberr_t
row_undo_mod_del_mark_or_remove_sec_low(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry,ulint mode)533 row_undo_mod_del_mark_or_remove_sec_low(
534 /*====================================*/
535 	undo_node_t*	node,	/*!< in: row undo node */
536 	que_thr_t*	thr,	/*!< in: query thread */
537 	dict_index_t*	index,	/*!< in: index */
538 	dtuple_t*	entry,	/*!< in: index entry */
539 	ulint		mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
540 				BTR_MODIFY_TREE */
541 {
542 	btr_pcur_t		pcur;
543 	btr_cur_t*		btr_cur;
544 	ibool			success;
545 	dberr_t			err	= DB_SUCCESS;
546 	mtr_t			mtr;
547 	mtr_t			mtr_vers;
548 	row_search_result	search_result;
549 	const bool		modify_leaf = mode == BTR_MODIFY_LEAF;
550 
551 	row_mtr_start(&mtr, index, !modify_leaf);
552 
553 	if (!index->is_committed()) {
554 		/* The index->online_status may change if the index is
555 		or was being created online, but not committed yet. It
556 		is protected by index->lock. */
557 		if (modify_leaf) {
558 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
559 			mtr_s_lock_index(index, &mtr);
560 		} else {
561 			ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
562 			mtr_sx_lock_index(index, &mtr);
563 		}
564 
565 		if (row_log_online_op_try(index, entry, 0)) {
566 			goto func_exit_no_pcur;
567 		}
568 	} else {
569 		/* For secondary indexes,
570 		index->online_status==ONLINE_INDEX_COMPLETE if
571 		index->is_committed(). */
572 		ut_ad(!dict_index_is_online_ddl(index));
573 	}
574 
575 	btr_cur = btr_pcur_get_btr_cur(&pcur);
576 
577 	if (dict_index_is_spatial(index)) {
578 		if (modify_leaf) {
579 			btr_cur->thr = thr;
580 			mode |= BTR_RTREE_DELETE_MARK;
581 		}
582 		mode |= BTR_RTREE_UNDO_INS;
583 	}
584 
585 	search_result = row_search_index_entry(index, entry, mode,
586 					       &pcur, &mtr);
587 
588 	switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
589 	case ROW_NOT_FOUND:
590 		/* In crash recovery, the secondary index record may
591 		be missing if the UPDATE did not have time to insert
592 		the secondary index records before the crash.  When we
593 		are undoing that UPDATE in crash recovery, the record
594 		may be missing.
595 
596 		In normal processing, if an update ends in a deadlock
597 		before it has inserted all updated secondary index
598 		records, then the undo will not find those records. */
599 		goto func_exit;
600 	case ROW_FOUND:
601 		break;
602 	case ROW_BUFFERED:
603 	case ROW_NOT_DELETED_REF:
604 		/* These are invalid outcomes, because the mode passed
605 		to row_search_index_entry() did not include any of the
606 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
607 		ut_error;
608 	}
609 
610 	/* We should remove the index record if no prior version of the row,
611 	which cannot be purged yet, requires its existence. If some requires,
612 	we should delete mark the record. */
613 
614 	mtr_vers.start();
615 
616 	success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
617 					    &mtr_vers);
618 	ut_a(success);
619 
620 	/* For temporary table, we can skip to check older version of
621 	clustered index entry, because there is no MVCC or purge. */
622 	if (node->table->is_temporary()
623 	    || row_vers_old_has_index_entry(
624 		    false, btr_pcur_get_rec(&node->pcur),
625 		    &mtr_vers, index, entry, 0, 0)) {
626 		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
627 						   btr_cur, TRUE, thr, &mtr);
628 		ut_ad(err == DB_SUCCESS);
629 	} else {
630 		/* Remove the index record */
631 
632 		if (dict_index_is_spatial(index)) {
633 			rec_t*	rec = btr_pcur_get_rec(&pcur);
634 			if (rec_get_deleted_flag(rec,
635 						 dict_table_is_comp(index->table))) {
636 				ib::error() << "Record found in index "
637 					<< index->name << " is deleted marked"
638 					" on rollback update.";
639 				ut_ad(0);
640 			}
641 		}
642 
643 		if (modify_leaf) {
644 			err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
645 				? DB_SUCCESS : DB_FAIL;
646 		} else {
647 			/* Passing rollback=false,
648 			because we are deleting a secondary index record:
649 			the distinction only matters when deleting a
650 			record that contains externally stored columns. */
651 			ut_ad(!index->is_primary());
652 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
653 						   false, &mtr);
654 
655 			/* The delete operation may fail if we have little
656 			file space left: TODO: easiest to crash the database
657 			and restart with more file space */
658 		}
659 	}
660 
661 	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
662 
663 func_exit:
664 	btr_pcur_close(&pcur);
665 func_exit_no_pcur:
666 	mtr_commit(&mtr);
667 
668 	return(err);
669 }
670 
671 /***********************************************************//**
672 Delete marks or removes a secondary index entry if found.
673 NOTE that if we updated the fields of a delete-marked secondary index record
674 so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
675 return to the original values because we do not know them. But this should
676 not cause problems because in row0sel.cc, in queries we always retrieve the
677 clustered index record or an earlier version of it, if the secondary index
678 record through which we do the search is delete-marked.
679 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
680 static MY_ATTRIBUTE((nonnull, warn_unused_result))
681 dberr_t
row_undo_mod_del_mark_or_remove_sec(undo_node_t * node,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)682 row_undo_mod_del_mark_or_remove_sec(
683 /*================================*/
684 	undo_node_t*	node,	/*!< in: row undo node */
685 	que_thr_t*	thr,	/*!< in: query thread */
686 	dict_index_t*	index,	/*!< in: index */
687 	dtuple_t*	entry)	/*!< in: index entry */
688 {
689 	dberr_t	err;
690 
691 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
692 						      entry, BTR_MODIFY_LEAF);
693 	if (err == DB_SUCCESS) {
694 
695 		return(err);
696 	}
697 
698 	err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
699 		entry, BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
700 	return(err);
701 }
702 
703 /***********************************************************//**
704 Delete unmarks a secondary index entry which must be found. It might not be
705 delete-marked at the moment, but it does not harm to unmark it anyway. We also
706 need to update the fields of the secondary index record if we updated its
707 fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
708 @retval DB_SUCCESS on success
709 @retval DB_FAIL if BTR_MODIFY_TREE should be tried
710 @retval DB_OUT_OF_FILE_SPACE when running out of tablespace
711 @retval DB_DUPLICATE_KEY if the value was missing
712 	and an insert would lead to a duplicate exists */
713 static MY_ATTRIBUTE((nonnull, warn_unused_result))
714 dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(ulint mode,que_thr_t * thr,dict_index_t * index,dtuple_t * entry)715 row_undo_mod_del_unmark_sec_and_undo_update(
716 /*========================================*/
717 	ulint		mode,	/*!< in: search mode: BTR_MODIFY_LEAF or
718 				BTR_MODIFY_TREE */
719 	que_thr_t*	thr,	/*!< in: query thread */
720 	dict_index_t*	index,	/*!< in: index */
721 	dtuple_t*	entry)	/*!< in: index entry */
722 {
723 	btr_pcur_t		pcur;
724 	btr_cur_t*		btr_cur		= btr_pcur_get_btr_cur(&pcur);
725 	upd_t*			update;
726 	dberr_t			err		= DB_SUCCESS;
727 	big_rec_t*		dummy_big_rec;
728 	mtr_t			mtr;
729 	trx_t*			trx		= thr_get_trx(thr);
730 	const ulint		flags
731 		= BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
732 	row_search_result	search_result;
733 	ulint			orig_mode = mode;
734 
735 	ut_ad(trx->id != 0);
736 
737 	if (dict_index_is_spatial(index)) {
738 		/* FIXME: Currently we do a 2-pass search for the undo
739 		due to avoid undel-mark a wrong rec in rolling back in
740 		partial update.  Later, we could log some info in
741 		secondary index updates to avoid this. */
742 		ut_ad(mode & BTR_MODIFY_LEAF);
743 		mode |= BTR_RTREE_DELETE_MARK;
744 	}
745 
746 try_again:
747 	row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF));
748 
749 	if (!index->is_committed()) {
750 		/* The index->online_status may change if the index is
751 		or was being created online, but not committed yet. It
752 		is protected by index->lock. */
753 		if (mode == BTR_MODIFY_LEAF) {
754 			mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
755 			mtr_s_lock_index(index, &mtr);
756 		} else {
757 			ut_ad(mode == BTR_MODIFY_TREE);
758 			mtr_sx_lock_index(index, &mtr);
759 		}
760 
761 		if (row_log_online_op_try(index, entry, trx->id)) {
762 			goto func_exit_no_pcur;
763 		}
764 	} else {
765 		/* For secondary indexes,
766 		index->online_status==ONLINE_INDEX_COMPLETE if
767 		index->is_committed(). */
768 		ut_ad(!dict_index_is_online_ddl(index));
769 	}
770 
771 	btr_cur->thr = thr;
772 
773 	search_result = row_search_index_entry(index, entry, mode,
774 					       &pcur, &mtr);
775 
776 	switch (search_result) {
777 		mem_heap_t*	heap;
778 		mem_heap_t*	offsets_heap;
779 		rec_offs*	offsets;
780 	case ROW_BUFFERED:
781 	case ROW_NOT_DELETED_REF:
782 		/* These are invalid outcomes, because the mode passed
783 		to row_search_index_entry() did not include any of the
784 		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
785 		ut_error;
786 	case ROW_NOT_FOUND:
787 		/* For spatial index, if first search didn't find an
788 		undel-marked rec, try to find a del-marked rec. */
789 		if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) {
790 			if (mode != orig_mode) {
791 				mode = orig_mode;
792 				btr_pcur_close(&pcur);
793 				mtr_commit(&mtr);
794 				goto try_again;
795 			}
796 		}
797 
798 		if (index->is_committed()) {
799 			/* During online secondary index creation, it
800 			is possible that MySQL is waiting for a
801 			meta-data lock upgrade before invoking
802 			ha_innobase::commit_inplace_alter_table()
803 			while this ROLLBACK is executing. InnoDB has
804 			finished building the index, but it does not
805 			yet exist in MySQL. In this case, we suppress
806 			the printout to the error log. */
807 			ib::warn() << "Record in index " << index->name
808 				<< " of table " << index->table->name
809 				<< " was not found on rollback, trying to"
810 				" insert: " << *entry
811 				<< " at: " << rec_index_print(
812 					btr_cur_get_rec(btr_cur), index);
813 		}
814 
815 		if (btr_cur->up_match >= dict_index_get_n_unique(index)
816 		    || btr_cur->low_match >= dict_index_get_n_unique(index)) {
817 			if (index->is_committed()) {
818 				ib::warn() << "Record in index " << index->name
819 					<< " was not found on rollback, and"
820 					" a duplicate exists";
821 			}
822 			err = DB_DUPLICATE_KEY;
823 			break;
824 		}
825 
826 		/* Insert the missing record that we were trying to
827 		delete-unmark. */
828 		big_rec_t*	big_rec;
829 		rec_t*		insert_rec;
830 		offsets = NULL;
831 		offsets_heap = NULL;
832 
833 		err = btr_cur_optimistic_insert(
834 			flags, btr_cur, &offsets, &offsets_heap,
835 			entry, &insert_rec, &big_rec,
836 			0, thr, &mtr);
837 		ut_ad(!big_rec);
838 
839 		if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
840 			err = btr_cur_pessimistic_insert(
841 				flags, btr_cur,
842 				&offsets, &offsets_heap,
843 				entry, &insert_rec, &big_rec,
844 				0, thr, &mtr);
845 			/* There are no off-page columns in
846 			secondary indexes. */
847 			ut_ad(!big_rec);
848 		}
849 
850 		if (err == DB_SUCCESS) {
851 			page_update_max_trx_id(
852 				btr_cur_get_block(btr_cur),
853 				btr_cur_get_page_zip(btr_cur),
854 				trx->id, &mtr);
855 		}
856 
857 		if (offsets_heap) {
858 			mem_heap_free(offsets_heap);
859 		}
860 
861 		break;
862 	case ROW_FOUND:
863 		err = btr_cur_del_mark_set_sec_rec(
864 			BTR_NO_LOCKING_FLAG,
865 			btr_cur, FALSE, thr, &mtr);
866 
867 		ut_a(err == DB_SUCCESS);
868 		heap = mem_heap_create(
869 			sizeof(upd_t)
870 			+ dtuple_get_n_fields(entry) * sizeof(upd_field_t));
871 		offsets_heap = NULL;
872 		offsets = rec_get_offsets(
873 			btr_cur_get_rec(btr_cur),
874 			index, nullptr, index->n_core_fields, ULINT_UNDEFINED,
875 			&offsets_heap);
876 		update = row_upd_build_sec_rec_difference_binary(
877 			btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
878 		if (upd_get_n_fields(update) == 0) {
879 
880 			/* Do nothing */
881 
882 		} else if (mode != BTR_MODIFY_TREE) {
883 			/* Try an optimistic updating of the record, keeping
884 			changes within the page */
885 
886 			/* TODO: pass offsets, not &offsets */
887 			err = btr_cur_optimistic_update(
888 				flags, btr_cur, &offsets, &offsets_heap,
889 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
890 			switch (err) {
891 			case DB_OVERFLOW:
892 			case DB_UNDERFLOW:
893 			case DB_ZIP_OVERFLOW:
894 				err = DB_FAIL;
895 			default:
896 				break;
897 			}
898 		} else {
899 			err = btr_cur_pessimistic_update(
900 				flags, btr_cur, &offsets, &offsets_heap,
901 				heap, &dummy_big_rec,
902 				update, 0, thr, thr_get_trx(thr)->id, &mtr);
903 			ut_a(!dummy_big_rec);
904 		}
905 
906 		mem_heap_free(heap);
907 		mem_heap_free(offsets_heap);
908 	}
909 
910 	btr_pcur_close(&pcur);
911 func_exit_no_pcur:
912 	mtr_commit(&mtr);
913 
914 	return(err);
915 }
916 
917 /***********************************************************//**
918 Flags a secondary index corrupted. */
919 static MY_ATTRIBUTE((nonnull))
920 void
row_undo_mod_sec_flag_corrupted(trx_t * trx,dict_index_t * index)921 row_undo_mod_sec_flag_corrupted(
922 /*============================*/
923 	trx_t*		trx,	/*!< in/out: transaction */
924 	dict_index_t*	index)	/*!< in: secondary index */
925 {
926 	ut_ad(!dict_index_is_clust(index));
927 
928 	switch (trx->dict_operation_lock_mode) {
929 	case RW_S_LATCH:
930 		/* Because row_undo() is holding an S-latch
931 		on the data dictionary during normal rollback,
932 		we can only mark the index corrupted in the
933 		data dictionary cache. TODO: fix this somehow.*/
934 		mutex_enter(&dict_sys.mutex);
935 		dict_set_corrupted_index_cache_only(index);
936 		mutex_exit(&dict_sys.mutex);
937 		break;
938 	default:
939 		ut_ad(0);
940 		/* fall through */
941 	case RW_X_LATCH:
942 		/* This should be the rollback of a data dictionary
943 		transaction. */
944 		dict_set_corrupted(index, trx, "rollback");
945 	}
946 }
947 
948 /***********************************************************//**
949 Undoes a modify in secondary indexes when undo record type is UPD_DEL.
950 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
951 static MY_ATTRIBUTE((nonnull, warn_unused_result))
952 dberr_t
row_undo_mod_upd_del_sec(undo_node_t * node,que_thr_t * thr)953 row_undo_mod_upd_del_sec(
954 /*=====================*/
955 	undo_node_t*	node,	/*!< in: row undo node */
956 	que_thr_t*	thr)	/*!< in: query thread */
957 {
958 	mem_heap_t*	heap;
959 	dberr_t		err	= DB_SUCCESS;
960 
961 	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
962 	ut_ad(!node->undo_row);
963 
964 	heap = mem_heap_create(1024);
965 
966 	while (node->index != NULL) {
967 		dict_index_t*	index	= node->index;
968 		dtuple_t*	entry;
969 
970 		if (index->type & DICT_FTS) {
971 			dict_table_next_uncorrupted_index(node->index);
972 			continue;
973 		}
974 
975 		/* During online index creation,
976 		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCk
977 		should guarantee that any active transaction has not modified
978 		indexed columns such that col->ord_part was 0 at the
979 		time when the undo log record was written. When we get
980 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
981 		it should always cover all affected indexes. */
982 		entry = row_build_index_entry(
983 			node->row, node->ext, index, heap);
984 
985 		if (UNIV_UNLIKELY(!entry)) {
986 			/* The database must have crashed after
987 			inserting a clustered index record but before
988 			writing all the externally stored columns of
989 			that record.  Because secondary index entries
990 			are inserted after the clustered index record,
991 			we may assume that the secondary index record
992 			does not exist.  However, this situation may
993 			only occur during the rollback of incomplete
994 			transactions. */
995 			ut_a(thr_get_trx(thr) == trx_roll_crash_recv_trx);
996 		} else {
997 			err = row_undo_mod_del_mark_or_remove_sec(
998 				node, thr, index, entry);
999 
1000 			if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1001 
1002 				break;
1003 			}
1004 		}
1005 
1006 		mem_heap_empty(heap);
1007 		dict_table_next_uncorrupted_index(node->index);
1008 	}
1009 
1010 	mem_heap_free(heap);
1011 
1012 	return(err);
1013 }
1014 
1015 /***********************************************************//**
1016 Undoes a modify in secondary indexes when undo record type is DEL_MARK.
1017 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
1018 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1019 dberr_t
row_undo_mod_del_mark_sec(undo_node_t * node,que_thr_t * thr)1020 row_undo_mod_del_mark_sec(
1021 /*======================*/
1022 	undo_node_t*	node,	/*!< in: row undo node */
1023 	que_thr_t*	thr)	/*!< in: query thread */
1024 {
1025 	mem_heap_t*	heap;
1026 	dberr_t		err	= DB_SUCCESS;
1027 
1028 	ut_ad(!node->undo_row);
1029 
1030 	heap = mem_heap_create(1024);
1031 
1032 	while (node->index != NULL) {
1033 		dict_index_t*	index	= node->index;
1034 		dtuple_t*	entry;
1035 
1036 		if (index->type == DICT_FTS) {
1037 			dict_table_next_uncorrupted_index(node->index);
1038 			continue;
1039 		}
1040 
1041 		/* During online index creation,
1042 		HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_NOCOPY_NO_LOCK
1043 		should guarantee that any active transaction has not modified
1044 		indexed columns such that col->ord_part was 0 at the
1045 		time when the undo log record was written. When we get
1046 		to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
1047 		it should always cover all affected indexes. */
1048 		entry = row_build_index_entry(
1049 			node->row, node->ext, index, heap);
1050 
1051 		ut_a(entry);
1052 
1053 		err = row_undo_mod_del_unmark_sec_and_undo_update(
1054 			BTR_MODIFY_LEAF, thr, index, entry);
1055 		if (err == DB_FAIL) {
1056 			err = row_undo_mod_del_unmark_sec_and_undo_update(
1057 				BTR_MODIFY_TREE, thr, index, entry);
1058 		}
1059 
1060 		if (err == DB_DUPLICATE_KEY) {
1061 			row_undo_mod_sec_flag_corrupted(
1062 				thr_get_trx(thr), index);
1063 			err = DB_SUCCESS;
1064 			/* Do not return any error to the caller. The
1065 			duplicate will be reported by ALTER TABLE or
1066 			CREATE UNIQUE INDEX. Unfortunately we cannot
1067 			report the duplicate key value to the DDL
1068 			thread, because the altered_table object is
1069 			private to its call stack. */
1070 		} else if (err != DB_SUCCESS) {
1071 			break;
1072 		}
1073 
1074 		mem_heap_empty(heap);
1075 		dict_table_next_uncorrupted_index(node->index);
1076 	}
1077 
1078 	mem_heap_free(heap);
1079 
1080 	return(err);
1081 }
1082 
1083 /***********************************************************//**
1084 Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
1085 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
1086 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1087 dberr_t
row_undo_mod_upd_exist_sec(undo_node_t * node,que_thr_t * thr)1088 row_undo_mod_upd_exist_sec(
1089 /*=======================*/
1090 	undo_node_t*	node,	/*!< in: row undo node */
1091 	que_thr_t*	thr)	/*!< in: query thread */
1092 {
1093 	mem_heap_t*	heap;
1094 	dberr_t		err	= DB_SUCCESS;
1095 
1096 	if (node->index == NULL
1097 	    || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
1098 		/* No change in secondary indexes */
1099 
1100 		return(err);
1101 	}
1102 
1103 	heap = mem_heap_create(1024);
1104 
1105 
1106 	while (node->index != NULL) {
1107 		dict_index_t*	index	= node->index;
1108 		dtuple_t*	entry;
1109 
1110 		if (dict_index_is_spatial(index)) {
1111 			if (!row_upd_changes_ord_field_binary_func(
1112 				index, node->update,
1113 #ifdef UNIV_DEBUG
1114 				thr,
1115 #endif /* UNIV_DEBUG */
1116                                 node->row,
1117 				node->ext, ROW_BUILD_FOR_UNDO)) {
1118 				dict_table_next_uncorrupted_index(node->index);
1119 				continue;
1120 			}
1121 		} else {
1122 			if (index->type == DICT_FTS
1123 			    || !row_upd_changes_ord_field_binary(index,
1124 								 node->update,
1125 								 thr, node->row,
1126 								 node->ext)) {
1127 				dict_table_next_uncorrupted_index(node->index);
1128 				continue;
1129 			}
1130 		}
1131 
1132 		/* Build the newest version of the index entry */
1133 		entry = row_build_index_entry(node->row, node->ext,
1134 					      index, heap);
1135 		if (UNIV_UNLIKELY(!entry)) {
1136 			/* The server must have crashed in
1137 			row_upd_clust_rec_by_insert() before
1138 			the updated externally stored columns (BLOBs)
1139 			of the new clustered index entry were written. */
1140 
1141 			/* The table must be in DYNAMIC or COMPRESSED
1142 			format.  REDUNDANT and COMPACT formats
1143 			store a local 768-byte prefix of each
1144 			externally stored column. */
1145 			ut_a(dict_table_has_atomic_blobs(index->table));
1146 
1147 			/* This is only legitimate when
1148 			rolling back an incomplete transaction
1149 			after crash recovery. */
1150 			ut_a(thr_get_trx(thr)->is_recovered);
1151 
1152 			/* The server must have crashed before
1153 			completing the insert of the new
1154 			clustered index entry and before
1155 			inserting to the secondary indexes.
1156 			Because node->row was not yet written
1157 			to this index, we can ignore it.  But
1158 			we must restore node->undo_row. */
1159 		} else {
1160 			/* NOTE that if we updated the fields of a
1161 			delete-marked secondary index record so that
1162 			alphabetically they stayed the same, e.g.,
1163 			'abc' -> 'aBc', we cannot return to the
1164 			original values because we do not know them.
1165 			But this should not cause problems because
1166 			in row0sel.cc, in queries we always retrieve
1167 			the clustered index record or an earlier
1168 			version of it, if the secondary index record
1169 			through which we do the search is
1170 			delete-marked. */
1171 
1172 			err = row_undo_mod_del_mark_or_remove_sec(
1173 				node, thr, index, entry);
1174 			if (err != DB_SUCCESS) {
1175 				break;
1176 			}
1177 		}
1178 
1179 		mem_heap_empty(heap);
1180 		/* We may have to update the delete mark in the
1181 		secondary index record of the previous version of
1182 		the row. We also need to update the fields of
1183 		the secondary index record if we updated its fields
1184 		but alphabetically they stayed the same, e.g.,
1185 		'abc' -> 'aBc'. */
1186 		if (dict_index_is_spatial(index)) {
1187 			entry = row_build_index_entry_low(node->undo_row,
1188 							  node->undo_ext,
1189 							  index, heap,
1190 							  ROW_BUILD_FOR_UNDO);
1191 		} else {
1192 			entry = row_build_index_entry(node->undo_row,
1193 						      node->undo_ext,
1194 						      index, heap);
1195 		}
1196 
1197 		ut_a(entry);
1198 
1199 		err = row_undo_mod_del_unmark_sec_and_undo_update(
1200 			BTR_MODIFY_LEAF, thr, index, entry);
1201 		if (err == DB_FAIL) {
1202 			err = row_undo_mod_del_unmark_sec_and_undo_update(
1203 				BTR_MODIFY_TREE, thr, index, entry);
1204 		}
1205 
1206 		if (err == DB_DUPLICATE_KEY) {
1207 			row_undo_mod_sec_flag_corrupted(
1208 				thr_get_trx(thr), index);
1209 			err = DB_SUCCESS;
1210 		} else if (err != DB_SUCCESS) {
1211 			break;
1212 		}
1213 
1214 		mem_heap_empty(heap);
1215 		dict_table_next_uncorrupted_index(node->index);
1216 	}
1217 
1218 	mem_heap_free(heap);
1219 
1220 	return(err);
1221 }
1222 
1223 /** Parse an update undo record.
1224 @param[in,out]	node		row rollback state
1225 @param[in]	dict_locked	whether the data dictionary cache is locked */
row_undo_mod_parse_undo_rec(undo_node_t * node,bool dict_locked)1226 static bool row_undo_mod_parse_undo_rec(undo_node_t* node, bool dict_locked)
1227 {
1228 	dict_index_t*	clust_index;
1229 	byte*		ptr;
1230 	undo_no_t	undo_no;
1231 	table_id_t	table_id;
1232 	trx_id_t	trx_id;
1233 	roll_ptr_t	roll_ptr;
1234 	ulint		info_bits;
1235 	ulint		type;
1236 	ulint		cmpl_info;
1237 	bool		dummy_extern;
1238 
1239 	ut_ad(node->state == UNDO_UPDATE_PERSISTENT
1240 	      || node->state == UNDO_UPDATE_TEMPORARY);
1241 	ut_ad(node->trx->in_rollback);
1242 	ut_ad(!trx_undo_roll_ptr_is_insert(node->roll_ptr));
1243 
1244 	ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
1245 				    &dummy_extern, &undo_no, &table_id);
1246 	node->rec_type = type;
1247 
1248 	if (node->state == UNDO_UPDATE_PERSISTENT) {
1249 		node->table = dict_table_open_on_id(table_id, dict_locked,
1250 						    DICT_TABLE_OP_NORMAL);
1251 	} else if (!dict_locked) {
1252 		mutex_enter(&dict_sys.mutex);
1253 		node->table = dict_sys.get_temporary_table(table_id);
1254 		mutex_exit(&dict_sys.mutex);
1255 	} else {
1256 		node->table = dict_sys.get_temporary_table(table_id);
1257 	}
1258 
1259 	if (!node->table) {
1260 		return false;
1261 	}
1262 
1263 	ut_ad(!node->table->skip_alter_undo);
1264 
1265 	if (UNIV_UNLIKELY(!fil_table_accessible(node->table))) {
1266 close_table:
1267 		/* Normally, tables should not disappear or become
1268 		unaccessible during ROLLBACK, because they should be
1269 		protected by InnoDB table locks. Corruption could be
1270 		a valid exception.
1271 
1272 		FIXME: When running out of temporary tablespace, it
1273 		would probably be better to just drop all temporary
1274 		tables (and temporary undo log records) of the current
1275 		connection, instead of doing this rollback. */
1276 		dict_table_close(node->table, dict_locked, FALSE);
1277 		node->table = NULL;
1278 		return false;
1279 	}
1280 
1281 	clust_index = dict_table_get_first_index(node->table);
1282 
1283 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1284 					       &info_bits);
1285 
1286 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
1287 				       node->heap);
1288 
1289 	ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
1290 				       roll_ptr, info_bits,
1291 				       node->heap, &(node->update));
1292 	node->new_trx_id = trx_id;
1293 	node->cmpl_info = cmpl_info;
1294 	ut_ad(!node->ref->info_bits);
1295 
1296 	if (node->update->info_bits & REC_INFO_MIN_REC_FLAG) {
1297 		if ((node->update->info_bits & ~REC_INFO_DELETED_FLAG)
1298 		    != REC_INFO_MIN_REC_FLAG) {
1299 			ut_ad(!"wrong info_bits in undo log record");
1300 			goto close_table;
1301 		}
1302 		/* This must be an undo log record for a subsequent
1303 		instant ALTER TABLE, extending the metadata record. */
1304 		ut_ad(clust_index->is_instant());
1305 		ut_ad(clust_index->table->instant
1306 		      || !(node->update->info_bits & REC_INFO_DELETED_FLAG));
1307 		node->ref = &trx_undo_metadata;
1308 		node->update->info_bits = (node->update->info_bits
1309 					   & REC_INFO_DELETED_FLAG)
1310 			? REC_INFO_METADATA_ALTER
1311 			: REC_INFO_METADATA_ADD;
1312 	}
1313 
1314 	if (!row_undo_search_clust_to_pcur(node)) {
1315 		/* As long as this rolling-back transaction exists,
1316 		the PRIMARY KEY value pointed to by the undo log
1317 		record should exist.
1318 
1319 		However, if InnoDB is killed during a rollback, or
1320 		shut down during the rollback of recovered
1321 		transactions, then after restart we may try to roll
1322 		back some of the same undo log records again, because
1323 		trx_roll_try_truncate() is not being invoked after
1324 		every undo log record.
1325 
1326 		It is also possible that the record
1327 		was not modified yet (the DB_ROLL_PTR does not match
1328 		node->roll_ptr) and thus there is nothing to roll back.
1329 
1330 		btr_cur_upd_lock_and_undo() only writes the undo log
1331 		record after successfully acquiring an exclusive lock
1332 		on the the clustered index record. That lock will not
1333 		be released before the transaction is committed or
1334 		fully rolled back. (Exception: if the server was
1335 		killed, restarted, and shut down again before the
1336 		rollback of the recovered transaction was completed,
1337 		it is possible that the transaction was partially
1338 		rolled back and locks released.) */
1339 		goto close_table;
1340 	}
1341 
1342 	/* Extract indexed virtual columns from undo log */
1343 	if (node->ref != &trx_undo_metadata && node->table->n_v_cols) {
1344 		row_upd_replace_vcol(node->row, node->table,
1345 				     node->update, false, node->undo_row,
1346 				     (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
1347 					? NULL : ptr);
1348 	}
1349 
1350 	return true;
1351 }
1352 
1353 /***********************************************************//**
1354 Undoes a modify operation on a row of a table.
1355 @return DB_SUCCESS or error code */
1356 dberr_t
row_undo_mod(undo_node_t * node,que_thr_t * thr)1357 row_undo_mod(
1358 /*=========*/
1359 	undo_node_t*	node,	/*!< in: row undo node */
1360 	que_thr_t*	thr)	/*!< in: query thread */
1361 {
1362 	dberr_t	err;
1363 	ut_ad(thr_get_trx(thr) == node->trx);
1364 	const bool dict_locked = node->trx->dict_operation_lock_mode
1365 		== RW_X_LATCH;
1366 
1367 	if (!row_undo_mod_parse_undo_rec(node, dict_locked)) {
1368 		return DB_SUCCESS;
1369 	}
1370 
1371 	node->index = dict_table_get_first_index(node->table);
1372 	ut_ad(dict_index_is_clust(node->index));
1373 
1374 	if (node->ref->info_bits) {
1375 		ut_ad(node->ref->is_metadata());
1376 		goto rollback_clust;
1377 	}
1378 
1379 	/* Skip the clustered index (the first index) */
1380 	node->index = dict_table_get_next_index(node->index);
1381 
1382 	/* Skip all corrupted secondary index */
1383 	dict_table_skip_corrupt_index(node->index);
1384 
1385 	switch (node->rec_type) {
1386 	case TRX_UNDO_UPD_EXIST_REC:
1387 		err = row_undo_mod_upd_exist_sec(node, thr);
1388 		break;
1389 	case TRX_UNDO_DEL_MARK_REC:
1390 		err = row_undo_mod_del_mark_sec(node, thr);
1391 		break;
1392 	case TRX_UNDO_UPD_DEL_REC:
1393 		err = row_undo_mod_upd_del_sec(node, thr);
1394 		break;
1395 	default:
1396 		ut_error;
1397 		err = DB_ERROR;
1398 	}
1399 
1400 	if (err == DB_SUCCESS) {
1401 rollback_clust:
1402 		err = row_undo_mod_clust(node, thr);
1403 
1404 		bool update_statistics
1405 			= !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE);
1406 
1407 		if (err == DB_SUCCESS && node->table->stat_initialized) {
1408 			switch (node->rec_type) {
1409 			case TRX_UNDO_UPD_EXIST_REC:
1410 				break;
1411 			case TRX_UNDO_DEL_MARK_REC:
1412 				dict_table_n_rows_inc(node->table);
1413 				update_statistics = update_statistics
1414 					|| !srv_stats_include_delete_marked;
1415 				break;
1416 			case TRX_UNDO_UPD_DEL_REC:
1417 				dict_table_n_rows_dec(node->table);
1418 				update_statistics = update_statistics
1419 					|| !srv_stats_include_delete_marked;
1420 				break;
1421 			}
1422 
1423 			/* Do not attempt to update statistics when
1424 			executing ROLLBACK in the InnoDB SQL
1425 			interpreter, because in that case we would
1426 			already be holding dict_sys.mutex, which
1427 			would be acquired when updating statistics. */
1428 			if (update_statistics && !dict_locked) {
1429 				dict_stats_update_if_needed(node->table,
1430 							    *node->trx);
1431 			} else {
1432 				node->table->stat_modified_counter++;
1433 			}
1434 		}
1435 	}
1436 
1437 	dict_table_close(node->table, dict_locked, FALSE);
1438 
1439 	node->table = NULL;
1440 
1441 	return(err);
1442 }
1443