1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file read/read0read.cc
29 Cursor read
30 
31 Created 2/16/1997 Heikki Tuuri
32 *******************************************************/
33 
34 #include "read0read.h"
35 
36 #ifdef UNIV_NONINL
37 #include "read0read.ic"
38 #endif
39 
40 #include "srv0srv.h"
41 #include "trx0sys.h"
42 
43 /*
44 -------------------------------------------------------------------------------
45 FACT A: Cursor read view on a secondary index sees only committed versions
46 -------
47 of the records in the secondary index or those versions of rows created
48 by transaction which created a cursor before cursor was created even
49 if transaction which created the cursor has changed that clustered index page.
50 
51 PROOF: We must show that read goes always to the clustered index record
52 to see that record is visible in the cursor read view. Consider e.g.
53 following table and SQL-clauses:
54 
55 create table t1(a int not null, b int, primary key(a), index(b));
56 insert into t1 values (1,1),(2,2);
57 commit;
58 
59 Now consider that we have a cursor for a query
60 
61 select b from t1 where b >= 1;
62 
63 This query will use secondary key on the table t1. Now after the first fetch
64 on this cursor if we do a update:
65 
66 update t1 set b = 5 where b = 2;
67 
68 Now second fetch of the cursor should not see record (2,5) instead it should
69 see record (2,2).
70 
71 We also should show that if we have delete t1 where b = 5; we still
72 can see record (2,2).
73 
74 When we access a secondary key record maximum transaction id is fetched
75 from this record and this trx_id is compared to up_limit_id in the view.
76 If trx_id in the record is greater or equal than up_limit_id in the view
77 cluster record is accessed.  Because trx_id of the creating
78 transaction is stored when this view was created to the list of
79 trx_ids not seen by this read view previous version of the
80 record is requested to be built. This is build using clustered record.
81 If the secondary key record is delete-marked, its corresponding
82 clustered record can be already be purged only if records
83 trx_id < low_limit_no. Purge can't remove any record deleted by a
84 transaction which was active when cursor was created. But, we still
85 may have a deleted secondary key record but no clustered record. But,
86 this is not a problem because this case is handled in
87 row_sel_get_clust_rec() function which is called
88 whenever we note that this read view does not see trx_id in the
89 record. Thus, we see correct version. Q. E. D.
90 
91 -------------------------------------------------------------------------------
92 FACT B: Cursor read view on a clustered index sees only committed versions
93 -------
94 of the records in the clustered index or those versions of rows created
95 by transaction which created a cursor before cursor was created even
96 if transaction which created the cursor has changed that clustered index page.
97 
98 PROOF:  Consider e.g.following table and SQL-clauses:
99 
100 create table t1(a int not null, b int, primary key(a));
101 insert into t1 values (1),(2);
102 commit;
103 
104 Now consider that we have a cursor for a query
105 
106 select a from t1 where a >= 1;
107 
108 This query will use clustered key on the table t1. Now after the first fetch
109 on this cursor if we do a update:
110 
111 update t1 set a = 5 where a = 2;
112 
113 Now second fetch of the cursor should not see record (5) instead it should
114 see record (2).
115 
116 We also should show that if we have execute delete t1 where a = 5; after
117 the cursor is opened we still can see record (2).
118 
119 When accessing clustered record we always check if this read view sees
120 trx_id stored to clustered record. By default we don't see any changes
121 if record trx_id >= low_limit_id i.e. change was made transaction
122 which started after transaction which created the cursor. If row
123 was changed by the future transaction a previous version of the
124 clustered record is created. Thus we see only committed version in
125 this case. We see all changes made by committed transactions i.e.
126 record trx_id < up_limit_id. In this case we don't need to do anything,
127 we already see correct version of the record. We don't see any changes
128 made by active transaction except creating transaction. We have stored
129 trx_id of creating transaction to list of trx_ids when this view was
130 created. Thus we can easily see if this record was changed by the
131 creating transaction. Because we already have clustered record we can
132 access roll_ptr. Using this roll_ptr we can fetch undo record.
133 We can now check that undo_no of the undo record is less than undo_no of the
134 trancaction which created a view when cursor was created. We see this
135 clustered record only in case when record undo_no is less than undo_no
136 in the view. If this is not true we build based on undo_rec previous
137 version of the record. This record is found because purge can't remove
138 records accessed by active transaction. Thus we see correct version. Q. E. D.
139 -------------------------------------------------------------------------------
140 FACT C: Purge does not remove any delete-marked row that is visible
141 -------
142 in any cursor read view.
143 
144 PROOF: We know that:
145  1: Currently active read views in trx_sys_t::view_list are ordered by
146     read_view_t::low_limit_no in descending order, that is,
147     newest read view first.
148 
149  2: Purge clones the oldest read view and uses that to determine whether there
150     are any active transactions that can see the to be purged records.
151 
152 Therefore any joining or active transaction will not have a view older
153 than the purge view, according to 1.
154 
155 When purge needs to remove a delete-marked row from a secondary index,
156 it will first check that the DB_TRX_ID value of the corresponding
157 record in the clustered index is older than the purge view. It will
158 also check if there is a newer version of the row (clustered index
159 record) that is not delete-marked in the secondary index. If such a
160 row exists and is collation-equal to the delete-marked secondary index
161 record then purge will not remove the secondary index record.
162 
163 Delete-marked clustered index records will be removed by
164 row_purge_remove_clust_if_poss(), unless the clustered index record
165 (and its DB_ROLL_PTR) has been updated. Every new version of the
166 clustered index record will update DB_ROLL_PTR, pointing to a new UNDO
167 log entry that allows the old version to be reconstructed. The
168 DB_ROLL_PTR in the oldest remaining version in the old-version chain
169 may be pointing to garbage (an undo log record discarded by purge),
170 but it will never be dereferenced, because the purge view is older
171 than any active transaction.
172 
173 For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
174 
175 Some additional issues:
176 
177 What if trx_sys->view_list == NULL and some transaction T1 and Purge both
178 try to open read_view at same time. Only one can acquire trx_sys->mutex.
179 In which order will the views be opened? Should it matter? If no, why?
180 
181 The order does not matter. No new transactions can be created and no running
182 transaction can commit or rollback (or free views).
183 */
184 
185 /*********************************************************************//**
186 Creates a read view object.
187 @return	own: read view struct */
188 UNIV_INLINE
189 read_view_t*
read_view_create_low(ulint n,mem_heap_t * heap)190 read_view_create_low(
191 /*=================*/
192 	ulint		n,	/*!< in: number of cells in the trx_ids array */
193 	mem_heap_t*	heap)	/*!< in: memory heap from which allocated */
194 {
195 	read_view_t*	view;
196 
197 	view = static_cast<read_view_t*>(
198 		mem_heap_alloc(
199 			heap, sizeof(*view) + n * sizeof(*view->trx_ids)));
200 
201 	view->n_trx_ids = n;
202 	view->trx_ids = (trx_id_t*) &view[1];
203 
204 	return(view);
205 }
206 
207 /*********************************************************************//**
208 Clones a read view object. This function will allocate space for two read
209 views contiguously, one identical in size and content as @param view (starting
210 at returned pointer) and another view immediately following the trx_ids array.
211 The second view will have space for an extra trx_id_t element.
212 @return	read view struct */
213 UNIV_INLINE
214 read_view_t*
read_view_clone(const read_view_t * view,mem_heap_t * heap)215 read_view_clone(
216 /*============*/
217 	const read_view_t*	view,	/*!< in: view to clone */
218 	mem_heap_t*		heap)	/*!< in: memory heap
219 					from which allocated */
220 {
221 	ulint		sz;
222 	read_view_t*	clone;
223 	read_view_t*	new_view;
224 
225 	ut_ad(mutex_own(&trx_sys->mutex));
226 
227 	/* Allocate space for two views. */
228 
229 	sz = sizeof(*view) + view->n_trx_ids * sizeof(*view->trx_ids);
230 
231 	/* Add an extra trx_id_t slot for the new view. */
232 
233 	clone = static_cast<read_view_t*>(
234 		mem_heap_alloc(heap, (sz * 2) + sizeof(trx_id_t)));
235 
236 	/* Only the contents of the old view are important, the new view
237 	will be created from this and so we don't copy that across. */
238 
239 	memcpy(clone, view, sz);
240 
241 	clone->trx_ids = (trx_id_t*) &clone[1];
242 
243 	new_view = (read_view_t*) &clone->trx_ids[clone->n_trx_ids];
244 	new_view->trx_ids = (trx_id_t*) &new_view[1];
245 	new_view->n_trx_ids = clone->n_trx_ids + 1;
246 
247 	ut_a(new_view->n_trx_ids == view->n_trx_ids + 1);
248 
249 	return(clone);
250 }
251 
252 /*********************************************************************//**
253 Insert the view in the proper order into the trx_sys->view_list. The
254 read view list is ordered by read_view_t::low_limit_no in descending order. */
255 static
256 void
read_view_add(read_view_t * view)257 read_view_add(
258 /*==========*/
259 	read_view_t*	view)		/*!< in: view to add to */
260 {
261 	read_view_t*	elem;
262 	read_view_t*	prev_elem;
263 
264 	ut_ad(mutex_own(&trx_sys->mutex));
265 	ut_ad(read_view_validate(view));
266 
267 	/* Find the correct slot for insertion. */
268 	for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL;
269 	     elem != NULL && view->low_limit_no < elem->low_limit_no;
270 	     prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) {
271 		/* No op */
272 	}
273 
274 	if (prev_elem == NULL) {
275 		UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
276 	} else {
277 		UT_LIST_INSERT_AFTER(
278 			view_list, trx_sys->view_list, prev_elem, view);
279 	}
280 
281 	ut_ad(read_view_list_validate());
282 }
283 
284 /** Functor to create thew view trx_ids array. */
285 struct	CreateView {
286 
CreateViewCreateView287 	CreateView(read_view_t*	view)
288 		: m_view(view)
289 	{
290 		  m_n_trx = m_view->n_trx_ids;
291 		  m_view->n_trx_ids = 0;
292 	}
293 
operator ()CreateView294 	void	operator()(const trx_t* trx)
295 	{
296 		ut_ad(mutex_own(&trx_sys->mutex));
297 		ut_ad(trx->in_rw_trx_list);
298 
299 		/* trx->state cannot change from or to NOT_STARTED
300 		while we are holding the trx_sys->mutex. It may change
301 		from ACTIVE to PREPARED or COMMITTED. */
302 
303 		if (trx->id != m_view->creator_trx_id
304 		    && !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
305 
306 			ut_ad(m_n_trx > m_view->n_trx_ids);
307 
308 			m_view->trx_ids[m_view->n_trx_ids++] = trx->id;
309 
310 			/* NOTE that a transaction whose trx number is <
311 			trx_sys->max_trx_id can still be active, if it is
312 			in the middle of its commit! Note that when a
313 			transaction starts, we initialize trx->no to
314 			TRX_ID_MAX. */
315 
316 			/* trx->no is protected by trx_sys->mutex, which
317 			we are holding. It is assigned by trx_commit()
318 			before lock_trx_release_locks() assigns
319 			trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
320 
321 			if (m_view->low_limit_no > trx->no) {
322 				m_view->low_limit_no = trx->no;
323 			}
324 		}
325 	}
326 
327 	read_view_t*	m_view;
328 	ulint		m_n_trx;
329 };
330 
331 /*********************************************************************//**
332 Opens a read view where exactly the transactions serialized before this
333 point in time are seen in the view.
334 @return	own: read view struct */
335 static
336 read_view_t*
read_view_open_now_low(trx_id_t cr_trx_id,mem_heap_t * heap)337 read_view_open_now_low(
338 /*===================*/
339 	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
340 					transaction, or 0 used in purge */
341 	mem_heap_t*	heap)		/*!< in: memory heap from which
342 					allocated */
343 {
344 	read_view_t*	view;
345 	ulint		n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
346 
347 	ut_ad(mutex_own(&trx_sys->mutex));
348 
349 	view = read_view_create_low(n_trx, heap);
350 
351 	view->undo_no = 0;
352 	view->type = VIEW_NORMAL;
353 	view->creator_trx_id = cr_trx_id;
354 
355 	/* No future transactions should be visible in the view */
356 
357 	view->low_limit_no = trx_sys->max_trx_id;
358 	view->low_limit_id = view->low_limit_no;
359 
360 	/* No active transaction should be visible, except cr_trx */
361 
362 	ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
363 
364 	if (view->n_trx_ids > 0) {
365 		/* The last active transaction has the smallest id: */
366 		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
367 	} else {
368 		view->up_limit_id = view->low_limit_id;
369 	}
370 
371 	/* Purge views are not added to the view list. */
372 	if (cr_trx_id > 0) {
373 		read_view_add(view);
374 	}
375 
376 	return(view);
377 }
378 
379 /*********************************************************************//**
380 Opens a read view where exactly the transactions serialized before this
381 point in time are seen in the view.
382 @return	own: read view struct */
383 UNIV_INTERN
384 read_view_t*
read_view_open_now(trx_id_t cr_trx_id,mem_heap_t * heap)385 read_view_open_now(
386 /*===============*/
387 	trx_id_t	cr_trx_id,	/*!< in: trx_id of creating
388 					transaction, or 0 used in purge */
389 	mem_heap_t*	heap)		/*!< in: memory heap from which
390 					allocated */
391 {
392 	read_view_t*	view;
393 
394 	mutex_enter(&trx_sys->mutex);
395 
396 	view = read_view_open_now_low(cr_trx_id, heap);
397 
398 	mutex_exit(&trx_sys->mutex);
399 
400 	return(view);
401 }
402 
403 /*********************************************************************//**
404 Makes a copy of the oldest existing read view, with the exception that also
405 the creating trx of the oldest view is set as not visible in the 'copied'
406 view. Opens a new view if no views currently exist. The view must be closed
407 with ..._close. This is used in purge.
408 @return	own: read view struct */
409 UNIV_INTERN
410 read_view_t*
read_view_purge_open(mem_heap_t * heap)411 read_view_purge_open(
412 /*=================*/
413 	mem_heap_t*	heap)		/*!< in: memory heap from which
414 					allocated */
415 {
416 	ulint		i;
417 	read_view_t*	view;
418 	read_view_t*	oldest_view;
419 	trx_id_t	creator_trx_id;
420 	ulint		insert_done	= 0;
421 
422 	mutex_enter(&trx_sys->mutex);
423 
424 	oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
425 
426 	if (oldest_view == NULL) {
427 
428 		view = read_view_open_now_low(0, heap);
429 
430 		mutex_exit(&trx_sys->mutex);
431 
432 		return(view);
433 	}
434 
435 	/* Allocate space for both views, the oldest and the new purge view. */
436 
437 	oldest_view = read_view_clone(oldest_view, heap);
438 
439 	ut_ad(read_view_validate(oldest_view));
440 
441 	mutex_exit(&trx_sys->mutex);
442 
443 	ut_a(oldest_view->creator_trx_id > 0);
444 	creator_trx_id = oldest_view->creator_trx_id;
445 
446 	view = (read_view_t*) &oldest_view->trx_ids[oldest_view->n_trx_ids];
447 
448 	/* Add the creator transaction id in the trx_ids array in the
449 	correct slot. */
450 
451 	for (i = 0; i < oldest_view->n_trx_ids; ++i) {
452 		trx_id_t	id;
453 
454 		id = oldest_view->trx_ids[i - insert_done];
455 
456 		if (insert_done == 0 && creator_trx_id > id) {
457 			id = creator_trx_id;
458 			insert_done = 1;
459 		}
460 
461 		view->trx_ids[i] = id;
462 	}
463 
464 	if (insert_done == 0) {
465 		view->trx_ids[i] = creator_trx_id;
466 	} else {
467 		ut_a(i > 0);
468 		view->trx_ids[i] = oldest_view->trx_ids[i - 1];
469 	}
470 
471 	view->creator_trx_id = 0;
472 
473 	view->low_limit_no = oldest_view->low_limit_no;
474 	view->low_limit_id = oldest_view->low_limit_id;
475 
476 	if (view->n_trx_ids > 0) {
477 		/* The last active transaction has the smallest id: */
478 
479 		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
480 	} else {
481 		view->up_limit_id = oldest_view->up_limit_id;
482 	}
483 
484 	return(view);
485 }
486 
487 /*********************************************************************//**
488 Closes a consistent read view for MySQL. This function is called at an SQL
489 statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
490 UNIV_INTERN
491 void
read_view_close_for_mysql(trx_t * trx)492 read_view_close_for_mysql(
493 /*======================*/
494 	trx_t*		trx)	/*!< in: trx which has a read view */
495 {
496 	ut_a(trx->global_read_view);
497 
498 	read_view_remove(trx->global_read_view, false);
499 
500 	mem_heap_empty(trx->global_read_view_heap);
501 
502 	trx->read_view = NULL;
503 	trx->global_read_view = NULL;
504 }
505 
506 /*********************************************************************//**
507 Prints a read view to stderr. */
508 UNIV_INTERN
509 void
read_view_print(const read_view_t * view)510 read_view_print(
511 /*============*/
512 	const read_view_t*	view)	/*!< in: read view */
513 {
514 	ulint	n_ids;
515 	ulint	i;
516 
517 	if (view->type == VIEW_HIGH_GRANULARITY) {
518 		fprintf(stderr,
519 			"High-granularity read view undo_n:o " TRX_ID_FMT "\n",
520 			view->undo_no);
521 	} else {
522 		fprintf(stderr, "Normal read view\n");
523 	}
524 
525 	fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
526 		view->low_limit_no);
527 
528 	fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
529 		view->up_limit_id);
530 
531 	fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
532 		view->low_limit_id);
533 
534 	fprintf(stderr, "Read view individually stored trx ids:\n");
535 
536 	n_ids = view->n_trx_ids;
537 
538 	for (i = 0; i < n_ids; i++) {
539 		fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
540 			view->trx_ids[i]);
541 	}
542 }
543 
544 /*********************************************************************//**
545 Create a high-granularity consistent cursor view for mysql to be used
546 in cursors. In this consistent read view modifications done by the
547 creating transaction after the cursor is created or future transactions
548 are not visible. */
549 UNIV_INTERN
550 cursor_view_t*
read_cursor_view_create_for_mysql(trx_t * cr_trx)551 read_cursor_view_create_for_mysql(
552 /*==============================*/
553 	trx_t*		cr_trx)	/*!< in: trx where cursor view is created */
554 {
555 	read_view_t*	view;
556 	mem_heap_t*	heap;
557 	ulint		n_trx;
558 	cursor_view_t*	curview;
559 
560 	/* Use larger heap than in trx_create when creating a read_view
561 	because cursors are quite long. */
562 
563 	heap = mem_heap_create(512);
564 
565 	curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview));
566 
567 	curview->heap = heap;
568 
569 	/* Drop cursor tables from consideration when evaluating the
570 	need of auto-commit */
571 
572 	curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
573 
574 	cr_trx->n_mysql_tables_in_use = 0;
575 
576 	mutex_enter(&trx_sys->mutex);
577 
578 	n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
579 
580 	curview->read_view = read_view_create_low(n_trx, curview->heap);
581 
582 	view = curview->read_view;
583 	view->undo_no = cr_trx->undo_no;
584 	view->type = VIEW_HIGH_GRANULARITY;
585 	view->creator_trx_id = UINT64_UNDEFINED;
586 
587 	/* No future transactions should be visible in the view */
588 
589 	view->low_limit_no = trx_sys->max_trx_id;
590 	view->low_limit_id = view->low_limit_no;
591 
592 	/* No active transaction should be visible */
593 
594 	ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
595 
596 	view->creator_trx_id = cr_trx->id;
597 
598 	if (view->n_trx_ids > 0) {
599 		/* The last active transaction has the smallest id: */
600 
601 		view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
602 	} else {
603 		view->up_limit_id = view->low_limit_id;
604 	}
605 
606 	read_view_add(view);
607 
608 	mutex_exit(&trx_sys->mutex);
609 
610 	return(curview);
611 }
612 
613 /*********************************************************************//**
614 Close a given consistent cursor view for mysql and restore global read view
615 back to a transaction read view. */
616 UNIV_INTERN
617 void
read_cursor_view_close_for_mysql(trx_t * trx,cursor_view_t * curview)618 read_cursor_view_close_for_mysql(
619 /*=============================*/
620 	trx_t*		trx,	/*!< in: trx */
621 	cursor_view_t*	curview)/*!< in: cursor view to be closed */
622 {
623 	ut_a(curview);
624 	ut_a(curview->read_view);
625 	ut_a(curview->heap);
626 
627 	/* Add cursor's tables to the global count of active tables that
628 	belong to this transaction */
629 	trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
630 
631 	read_view_remove(curview->read_view, false);
632 
633 	trx->read_view = trx->global_read_view;
634 
635 	mem_heap_free(curview->heap);
636 }
637 
638 /*********************************************************************//**
639 This function sets a given consistent cursor view to a transaction
640 read view if given consistent cursor view is not NULL. Otherwise, function
641 restores a global read view to a transaction read view. */
642 UNIV_INTERN
643 void
read_cursor_set_for_mysql(trx_t * trx,cursor_view_t * curview)644 read_cursor_set_for_mysql(
645 /*======================*/
646 	trx_t*		trx,	/*!< in: transaction where cursor is set */
647 	cursor_view_t*	curview)/*!< in: consistent cursor view to be set */
648 {
649 	ut_a(trx);
650 
651 	mutex_enter(&trx_sys->mutex);
652 
653 	if (UNIV_LIKELY(curview != NULL)) {
654 		trx->read_view = curview->read_view;
655 	} else {
656 		trx->read_view = trx->global_read_view;
657 	}
658 
659 	ut_ad(read_view_validate(trx->read_view));
660 
661 	mutex_exit(&trx_sys->mutex);
662 }
663