1 /*****************************************************************************
2
3 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file read/read0read.cc
29 Cursor read
30
31 Created 2/16/1997 Heikki Tuuri
32 *******************************************************/
33
34 #include "read0read.h"
35
36 #ifdef UNIV_NONINL
37 #include "read0read.ic"
38 #endif
39
40 #include "srv0srv.h"
41 #include "trx0sys.h"
42
43 /*
44 -------------------------------------------------------------------------------
45 FACT A: Cursor read view on a secondary index sees only committed versions
46 -------
47 of the records in the secondary index or those versions of rows created
48 by transaction which created a cursor before cursor was created even
49 if transaction which created the cursor has changed that clustered index page.
50
51 PROOF: We must show that read goes always to the clustered index record
52 to see that record is visible in the cursor read view. Consider e.g.
53 following table and SQL-clauses:
54
55 create table t1(a int not null, b int, primary key(a), index(b));
56 insert into t1 values (1,1),(2,2);
57 commit;
58
59 Now consider that we have a cursor for a query
60
61 select b from t1 where b >= 1;
62
63 This query will use secondary key on the table t1. Now after the first fetch
64 on this cursor if we do a update:
65
66 update t1 set b = 5 where b = 2;
67
68 Now second fetch of the cursor should not see record (2,5) instead it should
69 see record (2,2).
70
71 We also should show that if we have delete t1 where b = 5; we still
72 can see record (2,2).
73
74 When we access a secondary key record maximum transaction id is fetched
75 from this record and this trx_id is compared to up_limit_id in the view.
76 If trx_id in the record is greater or equal than up_limit_id in the view
77 cluster record is accessed. Because trx_id of the creating
78 transaction is stored when this view was created to the list of
79 trx_ids not seen by this read view previous version of the
80 record is requested to be built. This is build using clustered record.
81 If the secondary key record is delete-marked, its corresponding
82 clustered record can be already be purged only if records
83 trx_id < low_limit_no. Purge can't remove any record deleted by a
84 transaction which was active when cursor was created. But, we still
85 may have a deleted secondary key record but no clustered record. But,
86 this is not a problem because this case is handled in
87 row_sel_get_clust_rec() function which is called
88 whenever we note that this read view does not see trx_id in the
89 record. Thus, we see correct version. Q. E. D.
90
91 -------------------------------------------------------------------------------
92 FACT B: Cursor read view on a clustered index sees only committed versions
93 -------
94 of the records in the clustered index or those versions of rows created
95 by transaction which created a cursor before cursor was created even
96 if transaction which created the cursor has changed that clustered index page.
97
98 PROOF: Consider e.g.following table and SQL-clauses:
99
100 create table t1(a int not null, b int, primary key(a));
101 insert into t1 values (1),(2);
102 commit;
103
104 Now consider that we have a cursor for a query
105
106 select a from t1 where a >= 1;
107
108 This query will use clustered key on the table t1. Now after the first fetch
109 on this cursor if we do a update:
110
111 update t1 set a = 5 where a = 2;
112
113 Now second fetch of the cursor should not see record (5) instead it should
114 see record (2).
115
116 We also should show that if we have execute delete t1 where a = 5; after
117 the cursor is opened we still can see record (2).
118
119 When accessing clustered record we always check if this read view sees
120 trx_id stored to clustered record. By default we don't see any changes
121 if record trx_id >= low_limit_id i.e. change was made transaction
122 which started after transaction which created the cursor. If row
123 was changed by the future transaction a previous version of the
124 clustered record is created. Thus we see only committed version in
125 this case. We see all changes made by committed transactions i.e.
126 record trx_id < up_limit_id. In this case we don't need to do anything,
127 we already see correct version of the record. We don't see any changes
128 made by active transaction except creating transaction. We have stored
129 trx_id of creating transaction to list of trx_ids when this view was
130 created. Thus we can easily see if this record was changed by the
131 creating transaction. Because we already have clustered record we can
132 access roll_ptr. Using this roll_ptr we can fetch undo record.
133 We can now check that undo_no of the undo record is less than undo_no of the
134 trancaction which created a view when cursor was created. We see this
135 clustered record only in case when record undo_no is less than undo_no
136 in the view. If this is not true we build based on undo_rec previous
137 version of the record. This record is found because purge can't remove
138 records accessed by active transaction. Thus we see correct version. Q. E. D.
139 -------------------------------------------------------------------------------
140 FACT C: Purge does not remove any delete-marked row that is visible
141 -------
142 in any cursor read view.
143
144 PROOF: We know that:
145 1: Currently active read views in trx_sys_t::view_list are ordered by
146 read_view_t::low_limit_no in descending order, that is,
147 newest read view first.
148
149 2: Purge clones the oldest read view and uses that to determine whether there
150 are any active transactions that can see the to be purged records.
151
152 Therefore any joining or active transaction will not have a view older
153 than the purge view, according to 1.
154
155 When purge needs to remove a delete-marked row from a secondary index,
156 it will first check that the DB_TRX_ID value of the corresponding
157 record in the clustered index is older than the purge view. It will
158 also check if there is a newer version of the row (clustered index
159 record) that is not delete-marked in the secondary index. If such a
160 row exists and is collation-equal to the delete-marked secondary index
161 record then purge will not remove the secondary index record.
162
163 Delete-marked clustered index records will be removed by
164 row_purge_remove_clust_if_poss(), unless the clustered index record
165 (and its DB_ROLL_PTR) has been updated. Every new version of the
166 clustered index record will update DB_ROLL_PTR, pointing to a new UNDO
167 log entry that allows the old version to be reconstructed. The
168 DB_ROLL_PTR in the oldest remaining version in the old-version chain
169 may be pointing to garbage (an undo log record discarded by purge),
170 but it will never be dereferenced, because the purge view is older
171 than any active transaction.
172
173 For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
174
175 Some additional issues:
176
177 What if trx_sys->view_list == NULL and some transaction T1 and Purge both
178 try to open read_view at same time. Only one can acquire trx_sys->mutex.
179 In which order will the views be opened? Should it matter? If no, why?
180
181 The order does not matter. No new transactions can be created and no running
182 transaction can commit or rollback (or free views).
183 */
184
185 /*********************************************************************//**
186 Creates a read view object.
187 @return own: read view struct */
188 UNIV_INLINE
189 read_view_t*
read_view_create_low(ulint n,mem_heap_t * heap)190 read_view_create_low(
191 /*=================*/
192 ulint n, /*!< in: number of cells in the trx_ids array */
193 mem_heap_t* heap) /*!< in: memory heap from which allocated */
194 {
195 read_view_t* view;
196
197 view = static_cast<read_view_t*>(
198 mem_heap_alloc(
199 heap, sizeof(*view) + n * sizeof(*view->trx_ids)));
200
201 view->n_trx_ids = n;
202 view->trx_ids = (trx_id_t*) &view[1];
203
204 return(view);
205 }
206
207 /*********************************************************************//**
208 Clones a read view object. This function will allocate space for two read
209 views contiguously, one identical in size and content as @param view (starting
210 at returned pointer) and another view immediately following the trx_ids array.
211 The second view will have space for an extra trx_id_t element.
212 @return read view struct */
213 UNIV_INLINE
214 read_view_t*
read_view_clone(const read_view_t * view,mem_heap_t * heap)215 read_view_clone(
216 /*============*/
217 const read_view_t* view, /*!< in: view to clone */
218 mem_heap_t* heap) /*!< in: memory heap
219 from which allocated */
220 {
221 ulint sz;
222 read_view_t* clone;
223 read_view_t* new_view;
224
225 ut_ad(mutex_own(&trx_sys->mutex));
226
227 /* Allocate space for two views. */
228
229 sz = sizeof(*view) + view->n_trx_ids * sizeof(*view->trx_ids);
230
231 /* Add an extra trx_id_t slot for the new view. */
232
233 clone = static_cast<read_view_t*>(
234 mem_heap_alloc(heap, (sz * 2) + sizeof(trx_id_t)));
235
236 /* Only the contents of the old view are important, the new view
237 will be created from this and so we don't copy that across. */
238
239 memcpy(clone, view, sz);
240
241 clone->trx_ids = (trx_id_t*) &clone[1];
242
243 new_view = (read_view_t*) &clone->trx_ids[clone->n_trx_ids];
244 new_view->trx_ids = (trx_id_t*) &new_view[1];
245 new_view->n_trx_ids = clone->n_trx_ids + 1;
246
247 ut_a(new_view->n_trx_ids == view->n_trx_ids + 1);
248
249 return(clone);
250 }
251
252 /*********************************************************************//**
253 Insert the view in the proper order into the trx_sys->view_list. The
254 read view list is ordered by read_view_t::low_limit_no in descending order. */
255 static
256 void
read_view_add(read_view_t * view)257 read_view_add(
258 /*==========*/
259 read_view_t* view) /*!< in: view to add to */
260 {
261 read_view_t* elem;
262 read_view_t* prev_elem;
263
264 ut_ad(mutex_own(&trx_sys->mutex));
265 ut_ad(read_view_validate(view));
266
267 /* Find the correct slot for insertion. */
268 for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL;
269 elem != NULL && view->low_limit_no < elem->low_limit_no;
270 prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) {
271 /* No op */
272 }
273
274 if (prev_elem == NULL) {
275 UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
276 } else {
277 UT_LIST_INSERT_AFTER(
278 view_list, trx_sys->view_list, prev_elem, view);
279 }
280
281 ut_ad(read_view_list_validate());
282 }
283
284 /** Functor to create thew view trx_ids array. */
285 struct CreateView {
286
CreateViewCreateView287 CreateView(read_view_t* view)
288 : m_view(view)
289 {
290 m_n_trx = m_view->n_trx_ids;
291 m_view->n_trx_ids = 0;
292 }
293
operator ()CreateView294 void operator()(const trx_t* trx)
295 {
296 ut_ad(mutex_own(&trx_sys->mutex));
297 ut_ad(trx->in_rw_trx_list);
298
299 /* trx->state cannot change from or to NOT_STARTED
300 while we are holding the trx_sys->mutex. It may change
301 from ACTIVE to PREPARED or COMMITTED. */
302
303 if (trx->id != m_view->creator_trx_id
304 && !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
305
306 ut_ad(m_n_trx > m_view->n_trx_ids);
307
308 m_view->trx_ids[m_view->n_trx_ids++] = trx->id;
309
310 /* NOTE that a transaction whose trx number is <
311 trx_sys->max_trx_id can still be active, if it is
312 in the middle of its commit! Note that when a
313 transaction starts, we initialize trx->no to
314 TRX_ID_MAX. */
315
316 /* trx->no is protected by trx_sys->mutex, which
317 we are holding. It is assigned by trx_commit()
318 before lock_trx_release_locks() assigns
319 trx->state = TRX_STATE_COMMITTED_IN_MEMORY. */
320
321 if (m_view->low_limit_no > trx->no) {
322 m_view->low_limit_no = trx->no;
323 }
324 }
325 }
326
327 read_view_t* m_view;
328 ulint m_n_trx;
329 };
330
331 /*********************************************************************//**
332 Opens a read view where exactly the transactions serialized before this
333 point in time are seen in the view.
334 @return own: read view struct */
335 static
336 read_view_t*
read_view_open_now_low(trx_id_t cr_trx_id,mem_heap_t * heap)337 read_view_open_now_low(
338 /*===================*/
339 trx_id_t cr_trx_id, /*!< in: trx_id of creating
340 transaction, or 0 used in purge */
341 mem_heap_t* heap) /*!< in: memory heap from which
342 allocated */
343 {
344 read_view_t* view;
345 ulint n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
346
347 ut_ad(mutex_own(&trx_sys->mutex));
348
349 view = read_view_create_low(n_trx, heap);
350
351 view->undo_no = 0;
352 view->type = VIEW_NORMAL;
353 view->creator_trx_id = cr_trx_id;
354
355 /* No future transactions should be visible in the view */
356
357 view->low_limit_no = trx_sys->max_trx_id;
358 view->low_limit_id = view->low_limit_no;
359
360 /* No active transaction should be visible, except cr_trx */
361
362 ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
363
364 if (view->n_trx_ids > 0) {
365 /* The last active transaction has the smallest id: */
366 view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
367 } else {
368 view->up_limit_id = view->low_limit_id;
369 }
370
371 /* Purge views are not added to the view list. */
372 if (cr_trx_id > 0) {
373 read_view_add(view);
374 }
375
376 return(view);
377 }
378
379 /*********************************************************************//**
380 Opens a read view where exactly the transactions serialized before this
381 point in time are seen in the view.
382 @return own: read view struct */
383 UNIV_INTERN
384 read_view_t*
read_view_open_now(trx_id_t cr_trx_id,mem_heap_t * heap)385 read_view_open_now(
386 /*===============*/
387 trx_id_t cr_trx_id, /*!< in: trx_id of creating
388 transaction, or 0 used in purge */
389 mem_heap_t* heap) /*!< in: memory heap from which
390 allocated */
391 {
392 read_view_t* view;
393
394 mutex_enter(&trx_sys->mutex);
395
396 view = read_view_open_now_low(cr_trx_id, heap);
397
398 mutex_exit(&trx_sys->mutex);
399
400 return(view);
401 }
402
403 /*********************************************************************//**
404 Makes a copy of the oldest existing read view, with the exception that also
405 the creating trx of the oldest view is set as not visible in the 'copied'
406 view. Opens a new view if no views currently exist. The view must be closed
407 with ..._close. This is used in purge.
408 @return own: read view struct */
409 UNIV_INTERN
410 read_view_t*
read_view_purge_open(mem_heap_t * heap)411 read_view_purge_open(
412 /*=================*/
413 mem_heap_t* heap) /*!< in: memory heap from which
414 allocated */
415 {
416 ulint i;
417 read_view_t* view;
418 read_view_t* oldest_view;
419 trx_id_t creator_trx_id;
420 ulint insert_done = 0;
421
422 mutex_enter(&trx_sys->mutex);
423
424 oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
425
426 if (oldest_view == NULL) {
427
428 view = read_view_open_now_low(0, heap);
429
430 mutex_exit(&trx_sys->mutex);
431
432 return(view);
433 }
434
435 /* Allocate space for both views, the oldest and the new purge view. */
436
437 oldest_view = read_view_clone(oldest_view, heap);
438
439 ut_ad(read_view_validate(oldest_view));
440
441 mutex_exit(&trx_sys->mutex);
442
443 ut_a(oldest_view->creator_trx_id > 0);
444 creator_trx_id = oldest_view->creator_trx_id;
445
446 view = (read_view_t*) &oldest_view->trx_ids[oldest_view->n_trx_ids];
447
448 /* Add the creator transaction id in the trx_ids array in the
449 correct slot. */
450
451 for (i = 0; i < oldest_view->n_trx_ids; ++i) {
452 trx_id_t id;
453
454 id = oldest_view->trx_ids[i - insert_done];
455
456 if (insert_done == 0 && creator_trx_id > id) {
457 id = creator_trx_id;
458 insert_done = 1;
459 }
460
461 view->trx_ids[i] = id;
462 }
463
464 if (insert_done == 0) {
465 view->trx_ids[i] = creator_trx_id;
466 } else {
467 ut_a(i > 0);
468 view->trx_ids[i] = oldest_view->trx_ids[i - 1];
469 }
470
471 view->creator_trx_id = 0;
472
473 view->low_limit_no = oldest_view->low_limit_no;
474 view->low_limit_id = oldest_view->low_limit_id;
475
476 if (view->n_trx_ids > 0) {
477 /* The last active transaction has the smallest id: */
478
479 view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
480 } else {
481 view->up_limit_id = oldest_view->up_limit_id;
482 }
483
484 return(view);
485 }
486
487 /*********************************************************************//**
488 Closes a consistent read view for MySQL. This function is called at an SQL
489 statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
490 UNIV_INTERN
491 void
read_view_close_for_mysql(trx_t * trx)492 read_view_close_for_mysql(
493 /*======================*/
494 trx_t* trx) /*!< in: trx which has a read view */
495 {
496 ut_a(trx->global_read_view);
497
498 read_view_remove(trx->global_read_view, false);
499
500 mem_heap_empty(trx->global_read_view_heap);
501
502 trx->read_view = NULL;
503 trx->global_read_view = NULL;
504 }
505
506 /*********************************************************************//**
507 Prints a read view to stderr. */
508 UNIV_INTERN
509 void
read_view_print(const read_view_t * view)510 read_view_print(
511 /*============*/
512 const read_view_t* view) /*!< in: read view */
513 {
514 ulint n_ids;
515 ulint i;
516
517 if (view->type == VIEW_HIGH_GRANULARITY) {
518 fprintf(stderr,
519 "High-granularity read view undo_n:o " TRX_ID_FMT "\n",
520 view->undo_no);
521 } else {
522 fprintf(stderr, "Normal read view\n");
523 }
524
525 fprintf(stderr, "Read view low limit trx n:o " TRX_ID_FMT "\n",
526 view->low_limit_no);
527
528 fprintf(stderr, "Read view up limit trx id " TRX_ID_FMT "\n",
529 view->up_limit_id);
530
531 fprintf(stderr, "Read view low limit trx id " TRX_ID_FMT "\n",
532 view->low_limit_id);
533
534 fprintf(stderr, "Read view individually stored trx ids:\n");
535
536 n_ids = view->n_trx_ids;
537
538 for (i = 0; i < n_ids; i++) {
539 fprintf(stderr, "Read view trx id " TRX_ID_FMT "\n",
540 view->trx_ids[i]);
541 }
542 }
543
544 /*********************************************************************//**
545 Create a high-granularity consistent cursor view for mysql to be used
546 in cursors. In this consistent read view modifications done by the
547 creating transaction after the cursor is created or future transactions
548 are not visible. */
549 UNIV_INTERN
550 cursor_view_t*
read_cursor_view_create_for_mysql(trx_t * cr_trx)551 read_cursor_view_create_for_mysql(
552 /*==============================*/
553 trx_t* cr_trx) /*!< in: trx where cursor view is created */
554 {
555 read_view_t* view;
556 mem_heap_t* heap;
557 ulint n_trx;
558 cursor_view_t* curview;
559
560 /* Use larger heap than in trx_create when creating a read_view
561 because cursors are quite long. */
562
563 heap = mem_heap_create(512);
564
565 curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview));
566
567 curview->heap = heap;
568
569 /* Drop cursor tables from consideration when evaluating the
570 need of auto-commit */
571
572 curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
573
574 cr_trx->n_mysql_tables_in_use = 0;
575
576 mutex_enter(&trx_sys->mutex);
577
578 n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
579
580 curview->read_view = read_view_create_low(n_trx, curview->heap);
581
582 view = curview->read_view;
583 view->undo_no = cr_trx->undo_no;
584 view->type = VIEW_HIGH_GRANULARITY;
585 view->creator_trx_id = UINT64_UNDEFINED;
586
587 /* No future transactions should be visible in the view */
588
589 view->low_limit_no = trx_sys->max_trx_id;
590 view->low_limit_id = view->low_limit_no;
591
592 /* No active transaction should be visible */
593
594 ut_list_map(trx_sys->rw_trx_list, &trx_t::trx_list, CreateView(view));
595
596 view->creator_trx_id = cr_trx->id;
597
598 if (view->n_trx_ids > 0) {
599 /* The last active transaction has the smallest id: */
600
601 view->up_limit_id = view->trx_ids[view->n_trx_ids - 1];
602 } else {
603 view->up_limit_id = view->low_limit_id;
604 }
605
606 read_view_add(view);
607
608 mutex_exit(&trx_sys->mutex);
609
610 return(curview);
611 }
612
613 /*********************************************************************//**
614 Close a given consistent cursor view for mysql and restore global read view
615 back to a transaction read view. */
616 UNIV_INTERN
617 void
read_cursor_view_close_for_mysql(trx_t * trx,cursor_view_t * curview)618 read_cursor_view_close_for_mysql(
619 /*=============================*/
620 trx_t* trx, /*!< in: trx */
621 cursor_view_t* curview)/*!< in: cursor view to be closed */
622 {
623 ut_a(curview);
624 ut_a(curview->read_view);
625 ut_a(curview->heap);
626
627 /* Add cursor's tables to the global count of active tables that
628 belong to this transaction */
629 trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
630
631 read_view_remove(curview->read_view, false);
632
633 trx->read_view = trx->global_read_view;
634
635 mem_heap_free(curview->heap);
636 }
637
638 /*********************************************************************//**
639 This function sets a given consistent cursor view to a transaction
640 read view if given consistent cursor view is not NULL. Otherwise, function
641 restores a global read view to a transaction read view. */
642 UNIV_INTERN
643 void
read_cursor_set_for_mysql(trx_t * trx,cursor_view_t * curview)644 read_cursor_set_for_mysql(
645 /*======================*/
646 trx_t* trx, /*!< in: transaction where cursor is set */
647 cursor_view_t* curview)/*!< in: consistent cursor view to be set */
648 {
649 ut_a(trx);
650
651 mutex_enter(&trx_sys->mutex);
652
653 if (UNIV_LIKELY(curview != NULL)) {
654 trx->read_view = curview->read_view;
655 } else {
656 trx->read_view = trx->global_read_view;
657 }
658
659 ut_ad(read_view_validate(trx->read_view));
660
661 mutex_exit(&trx_sys->mutex);
662 }
663