1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file row/row0vers.cc
29 Row versions
30 
31 Created 2/6/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include "row0vers.h"
35 
36 #ifdef UNIV_NONINL
37 #include "row0vers.ic"
38 #endif
39 
40 #include "dict0dict.h"
41 #include "dict0boot.h"
42 #include "btr0btr.h"
43 #include "mach0data.h"
44 #include "trx0rseg.h"
45 #include "trx0trx.h"
46 #include "trx0roll.h"
47 #include "trx0undo.h"
48 #include "trx0purge.h"
49 #include "trx0rec.h"
50 #include "que0que.h"
51 #include "row0row.h"
52 #include "row0upd.h"
53 #include "rem0cmp.h"
54 #include "read0read.h"
55 #include "lock0lock.h"
56 
57 /*****************************************************************//**
58 Finds out if an active transaction has inserted or modified a secondary
59 index record.
60 @return 0 if committed, else the active transaction id;
61 NOTE that this function can return false positives but never false
62 negatives. The caller must confirm all positive results by calling
63 trx_is_active() while holding lock_sys->mutex. */
64 UNIV_INLINE
65 trx_id_t
row_vers_impl_x_locked_low(const rec_t * clust_rec,dict_index_t * clust_index,const rec_t * rec,dict_index_t * index,const ulint * offsets,mtr_t * mtr)66 row_vers_impl_x_locked_low(
67 /*=======================*/
68 	const rec_t*	clust_rec,	/*!< in: clustered index record */
69 	dict_index_t*	clust_index,	/*!< in: the clustered index */
70 	const rec_t*	rec,		/*!< in: secondary index record */
71 	dict_index_t*	index,		/*!< in: the secondary index */
72 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
73 	mtr_t*		mtr)		/*!< in/out: mini-transaction */
74 {
75 	trx_id_t	trx_id;
76 	ibool		corrupt;
77 	ulint		comp;
78 	ulint		rec_del;
79 	const rec_t*	version;
80 	rec_t*		prev_version = NULL;
81 	ulint*		clust_offsets;
82 	mem_heap_t*	heap;
83 
84 	DBUG_ENTER("row_vers_impl_x_locked_low");
85 
86 	ut_ad(rec_offs_validate(rec, index, offsets));
87 
88 	heap = mem_heap_create(1024);
89 
90 	clust_offsets = rec_get_offsets(
91 		clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap);
92 
93 	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
94 	corrupt = FALSE;
95 
96 	if (!trx_rw_is_active(trx_id, &corrupt)) {
97 		/* The transaction that modified or inserted clust_rec is no
98 		longer active, or it is corrupt: no implicit lock on rec */
99 		if (corrupt) {
100 			lock_report_trx_id_insanity(
101 				trx_id, clust_rec, clust_index, clust_offsets,
102 				trx_sys_get_max_trx_id());
103 		}
104 		mem_heap_free(heap);
105 		DBUG_RETURN(0);
106 	}
107 
108 	comp = page_rec_is_comp(rec);
109 	ut_ad(index->table == clust_index->table);
110 	ut_ad(!!comp == dict_table_is_comp(index->table));
111 	ut_ad(!comp == !page_rec_is_comp(clust_rec));
112 
113 	rec_del = rec_get_deleted_flag(rec, comp);
114 
115 	/* We look up if some earlier version, which was modified by
116 	the trx_id transaction, of the clustered index record would
117 	require rec to be in a different state (delete marked or
118 	unmarked, or have different field values, or not existing). If
119 	there is such a version, then rec was modified by the trx_id
120 	transaction, and it has an implicit x-lock on rec. Note that
121 	if clust_rec itself would require rec to be in a different
122 	state, then the trx_id transaction has not yet had time to
123 	modify rec, and does not necessarily have an implicit x-lock
124 	on rec. */
125 
126 	for (version = clust_rec;; version = prev_version) {
127 		row_ext_t*	ext;
128 		const dtuple_t*	row;
129 		dtuple_t*	entry;
130 		ulint		vers_del;
131 		trx_id_t	prev_trx_id;
132 		mem_heap_t*	old_heap = heap;
133 
134 		/* We keep the semaphore in mtr on the clust_rec page, so
135 		that no other transaction can update it and get an
136 		implicit x-lock on rec until mtr_commit(mtr). */
137 
138 		heap = mem_heap_create(1024);
139 
140 		trx_undo_prev_version_build(
141 			clust_rec, mtr, version, clust_index, clust_offsets,
142 			heap, &prev_version);
143 
144 		/* The oldest visible clustered index version must not be
145 		delete-marked, because we never start a transaction by
146 		inserting a delete-marked record. */
147 		ut_ad(prev_version
148 		      || !rec_get_deleted_flag(version, comp)
149 		      || !trx_rw_is_active(trx_id, NULL));
150 
151 		/* Free version and clust_offsets. */
152 		mem_heap_free(old_heap);
153 
154 		if (prev_version == NULL) {
155 
156 			/* We reached the oldest visible version without
157 			finding an older version of clust_rec that would
158 			match the secondary index record.  If the secondary
159 			index record is not delete marked, then clust_rec
160 			is considered the correct match of the secondary
161 			index record and hence holds the implicit lock. */
162 
163 			if (rec_del) {
164 				/* The secondary index record is del marked.
165 				So, the implicit lock holder of clust_rec
166 				did not modify the secondary index record yet,
167 				and is not holding an implicit lock on it.
168 
169 				This assumes that whenever a row is inserted
170 				or updated, the leaf page record always is
171 				created with a clear delete-mark flag.
172 				(We never insert a delete-marked record.) */
173 				trx_id = 0;
174 			}
175 
176 			break;
177 		}
178 
179 		clust_offsets = rec_get_offsets(
180 			prev_version, clust_index, NULL, ULINT_UNDEFINED,
181 			&heap);
182 
183 		vers_del = rec_get_deleted_flag(prev_version, comp);
184 
185 		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
186 						 clust_offsets);
187 
188 		/* The stack of versions is locked by mtr.  Thus, it
189 		is safe to fetch the prefixes for externally stored
190 		columns. */
191 
192 		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
193 				clust_offsets,
194 				NULL, NULL, NULL, &ext, heap);
195 
196 		entry = row_build_index_entry(row, ext, index, heap);
197 
198 		/* entry may be NULL if a record was inserted in place
199 		of a deleted record, and the BLOB pointers of the new
200 		record were not initialized yet.  But in that case,
201 		prev_version should be NULL. */
202 
203 		ut_a(entry != NULL);
204 
205 		/* If we get here, we know that the trx_id transaction
206 		modified prev_version. Let us check if prev_version
207 		would require rec to be in a different state. */
208 
209 		/* The previous version of clust_rec must be
210 		accessible, because clust_rec was not a fresh insert.
211 		There is no guarantee that the transaction is still
212 		active. */
213 
214 		/* We check if entry and rec are identified in the alphabetical
215 		ordering */
216 
217 		if (!trx_rw_is_active(trx_id, &corrupt)) {
218 			/* Transaction no longer active: no implicit
219 			x-lock. This situation should only be possible
220 			because we are not holding lock_sys->mutex. */
221 			ut_ad(!lock_mutex_own());
222 			if (corrupt) {
223 				lock_report_trx_id_insanity(
224 					trx_id,
225 					prev_version, clust_index,
226 					clust_offsets,
227 					trx_sys_get_max_trx_id());
228 			}
229 			trx_id = 0;
230 			break;
231 		} else if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
232 			/* The delete marks of rec and prev_version should be
233 			equal for rec to be in the state required by
234 			prev_version */
235 
236 			if (rec_del != vers_del) {
237 
238 				break;
239 			}
240 
241 			/* It is possible that the row was updated so that the
242 			secondary index record remained the same in
243 			alphabetical ordering, but the field values changed
244 			still. For example, 'abc' -> 'ABC'. Check also that. */
245 
246 			dtuple_set_types_binary(
247 				entry, dtuple_get_n_fields(entry));
248 
249 			if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
250 
251 				break;
252 			}
253 
254 		} else if (!rec_del) {
255 			/* The delete mark should be set in rec for it to be
256 			in the state required by prev_version */
257 
258 			break;
259 		}
260 
261 		if (trx_id != prev_trx_id) {
262 			/* prev_version was the first version modified by
263 			the trx_id transaction: no implicit x-lock */
264 
265 			trx_id = 0;
266 			break;
267 		}
268 	}
269 
270 	DBUG_PRINT("info", ("Implicit lock is held by trx:%lu",
271 		static_cast<unsigned long>(trx_id)));
272 
273 	mem_heap_free(heap);
274 	DBUG_RETURN(trx_id);
275 }
276 
277 /*****************************************************************//**
278 Finds out if an active transaction has inserted or modified a secondary
279 index record.
280 @return 0 if committed, else the active transaction id;
281 NOTE that this function can return false positives but never false
282 negatives. The caller must confirm all positive results by calling
283 trx_is_active() while holding lock_sys->mutex. */
284 UNIV_INTERN
285 trx_id_t
row_vers_impl_x_locked(const rec_t * rec,dict_index_t * index,const ulint * offsets)286 row_vers_impl_x_locked(
287 /*===================*/
288 	const rec_t*	rec,	/*!< in: record in a secondary index */
289 	dict_index_t*	index,	/*!< in: the secondary index */
290 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
291 {
292 	dict_index_t*	clust_index;
293 	const rec_t*	clust_rec;
294 	trx_id_t	trx_id;
295 	mtr_t		mtr;
296 
297 	ut_ad(!lock_mutex_own());
298 	ut_ad(!mutex_own(&trx_sys->mutex));
299 
300 	mtr_start(&mtr);
301 
302 	/* Search for the clustered index record. The latch on the
303 	page of clust_rec locks the top of the stack of versions. The
304 	bottom of the version stack is not locked; oldest versions may
305 	disappear by the fact that transactions may be committed and
306 	collected by the purge. This is not a problem, because we are
307 	only interested in active transactions. */
308 
309 	clust_rec = row_get_clust_rec(
310 		BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr);
311 
312 	if (UNIV_UNLIKELY(!clust_rec)) {
313 		/* In a rare case it is possible that no clust rec is found
314 		for a secondary index record: if in row0umod.cc
315 		row_undo_mod_remove_clust_low() we have already removed the
316 		clust rec, while purge is still cleaning and removing
317 		secondary index records associated with earlier versions of
318 		the clustered index record. In that case there cannot be
319 		any implicit lock on the secondary index record, because
320 		an active transaction which has modified the secondary index
321 		record has also modified the clustered index record. And in
322 		a rollback we always undo the modifications to secondary index
323 		records before the clustered index record. */
324 
325 		trx_id = 0;
326 	} else {
327 		trx_id = row_vers_impl_x_locked_low(
328 			clust_rec, clust_index, rec, index, offsets, &mtr);
329 	}
330 
331 	mtr_commit(&mtr);
332 
333 	return(trx_id);
334 }
335 
336 /*****************************************************************//**
337 Finds out if we must preserve a delete marked earlier version of a clustered
338 index record, because it is >= the purge view.
339 @return	TRUE if earlier version should be preserved */
340 UNIV_INTERN
341 ibool
row_vers_must_preserve_del_marked(trx_id_t trx_id,mtr_t * mtr)342 row_vers_must_preserve_del_marked(
343 /*==============================*/
344 	trx_id_t	trx_id,	/*!< in: transaction id in the version */
345 	mtr_t*		mtr)	/*!< in: mtr holding the latch on the
346 				clustered index record; it will also
347 				hold the latch on purge_view */
348 {
349 #ifdef UNIV_SYNC_DEBUG
350 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
351 #endif /* UNIV_SYNC_DEBUG */
352 
353 	mtr_s_lock(&(purge_sys->latch), mtr);
354 
355 	return(!read_view_sees_trx_id(purge_sys->view, trx_id));
356 }
357 
358 /*****************************************************************//**
359 Finds out if a version of the record, where the version >= the current
360 purge view, should have ientry as its secondary index entry. We check
361 if there is any not delete marked version of the record where the trx
362 id >= purge view, and the secondary index entry and ientry are identified in
363 the alphabetical ordering; exactly in this case we return TRUE.
364 @return	TRUE if earlier version should have */
365 UNIV_INTERN
366 ibool
row_vers_old_has_index_entry(ibool also_curr,const rec_t * rec,mtr_t * mtr,dict_index_t * index,const dtuple_t * ientry)367 row_vers_old_has_index_entry(
368 /*=========================*/
369 	ibool		also_curr,/*!< in: TRUE if also rec is included in the
370 				versions to search; otherwise only versions
371 				prior to it are searched */
372 	const rec_t*	rec,	/*!< in: record in the clustered index; the
373 				caller must have a latch on the page */
374 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
375 				also hold the latch on purge_view */
376 	dict_index_t*	index,	/*!< in: the secondary index */
377 	const dtuple_t*	ientry)	/*!< in: the secondary index entry */
378 {
379 	const rec_t*	version;
380 	rec_t*		prev_version;
381 	dict_index_t*	clust_index;
382 	ulint*		clust_offsets;
383 	mem_heap_t*	heap;
384 	mem_heap_t*	heap2;
385 	const dtuple_t*	row;
386 	const dtuple_t*	entry;
387 	ulint		comp;
388 
389 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
390 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
391 #ifdef UNIV_SYNC_DEBUG
392 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
393 #endif /* UNIV_SYNC_DEBUG */
394 
395 	clust_index = dict_table_get_first_index(index->table);
396 
397 	comp = page_rec_is_comp(rec);
398 	ut_ad(!dict_table_is_comp(index->table) == !comp);
399 	heap = mem_heap_create(1024);
400 	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
401 					ULINT_UNDEFINED, &heap);
402 
403 	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
404 		row_ext_t*	ext;
405 
406 		/* The top of the stack of versions is locked by the
407 		mtr holding a latch on the page containing the
408 		clustered index record. The bottom of the stack is
409 		locked by the fact that the purge_sys->view must
410 		'overtake' any read view of an active transaction.
411 		Thus, it is safe to fetch the prefixes for
412 		externally stored columns. */
413 		row = row_build(ROW_COPY_POINTERS, clust_index,
414 				rec, clust_offsets,
415 				NULL, NULL, NULL, &ext, heap);
416 		entry = row_build_index_entry(row, ext, index, heap);
417 
418 		/* If entry == NULL, the record contains unset BLOB
419 		pointers.  This must be a freshly inserted record.  If
420 		this is called from
421 		row_purge_remove_sec_if_poss_low(), the thread will
422 		hold latches on the clustered index and the secondary
423 		index.  Because the insert works in three steps:
424 
425 			(1) insert the record to clustered index
426 			(2) store the BLOBs and update BLOB pointers
427 			(3) insert records to secondary indexes
428 
429 		the purge thread can safely ignore freshly inserted
430 		records and delete the secondary index record.  The
431 		thread that inserted the new record will be inserting
432 		the secondary index records. */
433 
434 		/* NOTE that we cannot do the comparison as binary
435 		fields because the row is maybe being modified so that
436 		the clustered index record has already been updated to
437 		a different binary value in a char field, but the
438 		collation identifies the old and new value anyway! */
439 		if (entry && !dtuple_coll_cmp(ientry, entry)) {
440 
441 			mem_heap_free(heap);
442 
443 			return(TRUE);
444 		}
445 	}
446 
447 	version = rec;
448 
449 	for (;;) {
450 		heap2 = heap;
451 		heap = mem_heap_create(1024);
452 		trx_undo_prev_version_build(rec, mtr, version,
453 					    clust_index, clust_offsets,
454 					    heap, &prev_version);
455 		mem_heap_free(heap2); /* free version and clust_offsets */
456 
457 		if (!prev_version) {
458 			/* Versions end here */
459 
460 			mem_heap_free(heap);
461 
462 			return(FALSE);
463 		}
464 
465 		clust_offsets = rec_get_offsets(prev_version, clust_index,
466 						NULL, ULINT_UNDEFINED, &heap);
467 
468 		if (!rec_get_deleted_flag(prev_version, comp)) {
469 			row_ext_t*	ext;
470 
471 			/* The stack of versions is locked by mtr.
472 			Thus, it is safe to fetch the prefixes for
473 			externally stored columns. */
474 			row = row_build(ROW_COPY_POINTERS, clust_index,
475 					prev_version, clust_offsets,
476 					NULL, NULL, NULL, &ext, heap);
477 			entry = row_build_index_entry(row, ext, index, heap);
478 
479 			/* If entry == NULL, the record contains unset
480 			BLOB pointers.  This must be a freshly
481 			inserted record that we can safely ignore.
482 			For the justification, see the comments after
483 			the previous row_build_index_entry() call. */
484 
485 			/* NOTE that we cannot do the comparison as binary
486 			fields because maybe the secondary index record has
487 			already been updated to a different binary value in
488 			a char field, but the collation identifies the old
489 			and new value anyway! */
490 
491 			if (entry && !dtuple_coll_cmp(ientry, entry)) {
492 
493 				mem_heap_free(heap);
494 
495 				return(TRUE);
496 			}
497 		}
498 
499 		version = prev_version;
500 	}
501 }
502 
503 /*****************************************************************//**
504 Constructs the version of a clustered index record which a consistent
505 read should see. We assume that the trx id stored in rec is such that
506 the consistent read should not see rec in its present version.
507 @return	DB_SUCCESS or DB_MISSING_HISTORY */
508 UNIV_INTERN
509 dberr_t
row_vers_build_for_consistent_read(const rec_t * rec,mtr_t * mtr,dict_index_t * index,ulint ** offsets,read_view_t * view,mem_heap_t ** offset_heap,mem_heap_t * in_heap,rec_t ** old_vers)510 row_vers_build_for_consistent_read(
511 /*===============================*/
512 	const rec_t*	rec,	/*!< in: record in a clustered index; the
513 				caller must have a latch on the page; this
514 				latch locks the top of the stack of versions
515 				of this records */
516 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
517 	dict_index_t*	index,	/*!< in: the clustered index */
518 	ulint**		offsets,/*!< in/out: offsets returned by
519 				rec_get_offsets(rec, index) */
520 	read_view_t*	view,	/*!< in: the consistent read view */
521 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
522 				the offsets are allocated */
523 	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
524 				*old_vers is allocated; memory for possible
525 				intermediate versions is allocated and freed
526 				locally within the function */
527 	rec_t**		old_vers)/*!< out, own: old version, or NULL
528 				if the history is missing or the record
529 				does not exist in the view, that is,
530 				it was freshly inserted afterwards */
531 {
532 	const rec_t*	version;
533 	rec_t*		prev_version;
534 	trx_id_t	trx_id;
535 	mem_heap_t*	heap		= NULL;
536 	byte*		buf;
537 	dberr_t		err;
538 
539 	ut_ad(dict_index_is_clust(index));
540 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
541 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
542 #ifdef UNIV_SYNC_DEBUG
543 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
544 #endif /* UNIV_SYNC_DEBUG */
545 
546 	ut_ad(rec_offs_validate(rec, index, *offsets));
547 
548 	trx_id = row_get_rec_trx_id(rec, index, *offsets);
549 
550 	ut_ad(!read_view_sees_trx_id(view, trx_id));
551 
552 	version = rec;
553 
554 	for (;;) {
555 		mem_heap_t*	heap2	= heap;
556 		trx_undo_rec_t* undo_rec;
557 		roll_ptr_t	roll_ptr;
558 		undo_no_t	undo_no;
559 		heap = mem_heap_create(1024);
560 
561 		/* If we have high-granularity consistent read view and
562 		creating transaction of the view is the same as trx_id in
563 		the record we see this record only in the case when
564 		undo_no of the record is < undo_no in the view. */
565 
566 		if (view->type == VIEW_HIGH_GRANULARITY
567 		    && view->creator_trx_id == trx_id) {
568 
569 			roll_ptr = row_get_rec_roll_ptr(version, index,
570 							*offsets);
571 			undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
572 			undo_no = trx_undo_rec_get_undo_no(undo_rec);
573 			mem_heap_empty(heap);
574 
575 			if (view->undo_no > undo_no) {
576 				/* The view already sees this version: we can
577 				copy it to in_heap and return */
578 
579 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
580 				ut_a(!rec_offs_any_null_extern(
581 					     version, *offsets));
582 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
583 
584 				buf = static_cast<byte*>(mem_heap_alloc(
585 					in_heap, rec_offs_size(*offsets)));
586 
587 				*old_vers = rec_copy(buf, version, *offsets);
588 				rec_offs_make_valid(*old_vers, index,
589 						    *offsets);
590 				err = DB_SUCCESS;
591 				break;
592 			}
593 		}
594 
595 		err = trx_undo_prev_version_build(rec, mtr, version, index,
596 						  *offsets, heap,
597 						  &prev_version)
598 			? DB_SUCCESS : DB_MISSING_HISTORY;
599 		if (heap2) {
600 			mem_heap_free(heap2); /* free version */
601 		}
602 
603 		if (prev_version == NULL) {
604 			/* It was a freshly inserted version */
605 			*old_vers = NULL;
606 			break;
607 		}
608 
609 		*offsets = rec_get_offsets(prev_version, index, *offsets,
610 					   ULINT_UNDEFINED, offset_heap);
611 
612 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
613 		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
614 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
615 
616 		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
617 
618 		if (read_view_sees_trx_id(view, trx_id)) {
619 
620 			/* The view already sees this version: we can copy
621 			it to in_heap and return */
622 
623 			buf = static_cast<byte*>(
624 				mem_heap_alloc(
625 					in_heap, rec_offs_size(*offsets)));
626 
627 			*old_vers = rec_copy(buf, prev_version, *offsets);
628 			rec_offs_make_valid(*old_vers, index, *offsets);
629 			break;
630 		}
631 
632 		version = prev_version;
633 	}/* for (;;) */
634 
635 	mem_heap_free(heap);
636 
637 	return(err);
638 }
639 
640 /*****************************************************************//**
641 Constructs the last committed version of a clustered index record,
642 which should be seen by a semi-consistent read. */
643 UNIV_INTERN
644 void
row_vers_build_for_semi_consistent_read(const rec_t * rec,mtr_t * mtr,dict_index_t * index,ulint ** offsets,mem_heap_t ** offset_heap,mem_heap_t * in_heap,const rec_t ** old_vers)645 row_vers_build_for_semi_consistent_read(
646 /*====================================*/
647 	const rec_t*	rec,	/*!< in: record in a clustered index; the
648 				caller must have a latch on the page; this
649 				latch locks the top of the stack of versions
650 				of this records */
651 	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
652 	dict_index_t*	index,	/*!< in: the clustered index */
653 	ulint**		offsets,/*!< in/out: offsets returned by
654 				rec_get_offsets(rec, index) */
655 	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
656 				the offsets are allocated */
657 	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
658 				*old_vers is allocated; memory for possible
659 				intermediate versions is allocated and freed
660 				locally within the function */
661 	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
662 				record does not exist in the view, that is,
663 				it was freshly inserted afterwards */
664 {
665 	const rec_t*	version;
666 	mem_heap_t*	heap		= NULL;
667 	byte*		buf;
668 	trx_id_t	rec_trx_id	= 0;
669 
670 	ut_ad(dict_index_is_clust(index));
671 	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
672 	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
673 #ifdef UNIV_SYNC_DEBUG
674 	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
675 #endif /* UNIV_SYNC_DEBUG */
676 
677 	ut_ad(rec_offs_validate(rec, index, *offsets));
678 
679 	version = rec;
680 
681 	for (;;) {
682 		trx_id_t*	version_trx_descr;
683 		mem_heap_t*	heap2;
684 		rec_t*		prev_version;
685 		trx_id_t	version_trx_id;
686 
687 		version_trx_id = row_get_rec_trx_id(version, index, *offsets);
688 		if (rec == version) {
689 			rec_trx_id = version_trx_id;
690 		}
691 
692 		mutex_enter(&trx_sys->mutex);
693 		version_trx_descr = trx_find_descriptor(trx_sys->descriptors,
694 							trx_sys->descr_n_used,
695 							version_trx_id);
696 		/* Because version_trx is a read-write transaction,
697 		its state cannot change from or to NOT_STARTED while
698 		we are holding the trx_sys->mutex.  It may change from
699 		ACTIVE to PREPARED or COMMITTED. */
700 		mutex_exit(&trx_sys->mutex);
701 
702 		if (!version_trx_descr) {
703 committed_version_trx:
704 			/* We found a version that belongs to a
705 			committed transaction: return it. */
706 
707 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
708 			ut_a(!rec_offs_any_null_extern(version, *offsets));
709 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
710 
711 			if (rec == version) {
712 				*old_vers = rec;
713 				break;
714 			}
715 
716 			/* We assume that a rolled-back transaction stays in
717 			TRX_STATE_ACTIVE state until all the changes have been
718 			rolled back and the transaction is removed from
719 			the global list of transactions. */
720 
721 			if (rec_trx_id == version_trx_id) {
722 				/* The transaction was committed while
723 				we searched for earlier versions.
724 				Return the current version as a
725 				semi-consistent read. */
726 
727 				version = rec;
728 				*offsets = rec_get_offsets(version,
729 							   index, *offsets,
730 							   ULINT_UNDEFINED,
731 							   offset_heap);
732 			}
733 
734 			buf = static_cast<byte*>(
735 				mem_heap_alloc(
736 					in_heap, rec_offs_size(*offsets)));
737 
738 			*old_vers = rec_copy(buf, version, *offsets);
739 			rec_offs_make_valid(*old_vers, index, *offsets);
740 			break;
741 		}
742 
743 		DEBUG_SYNC_C("after_row_vers_check_trx_active");
744 
745 		heap2 = heap;
746 		heap = mem_heap_create(1024);
747 
748 		if (!trx_undo_prev_version_build(rec, mtr, version, index,
749 						 *offsets, heap,
750 						 &prev_version)) {
751 			mem_heap_free(heap);
752 			heap = heap2;
753 			heap2 = NULL;
754 			goto committed_version_trx;
755 		}
756 
757 		if (heap2) {
758 			mem_heap_free(heap2); /* free version */
759 		}
760 
761 		if (prev_version == NULL) {
762 			/* It was a freshly inserted version */
763 			*old_vers = NULL;
764 			break;
765 		}
766 
767 		version = prev_version;
768 		*offsets = rec_get_offsets(version, index, *offsets,
769 					   ULINT_UNDEFINED, offset_heap);
770 #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
771 		ut_a(!rec_offs_any_null_extern(version, *offsets));
772 #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
773 	}/* for (;;) */
774 
775 	if (heap) {
776 		mem_heap_free(heap);
777 	}
778 }
779