1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file btr/btr0pcur.cc
29 The index tree persistent cursor
30 
31 Created 2/23/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #include "btr0pcur.h"
35 
36 #ifdef UNIV_NONINL
37 #include "btr0pcur.ic"
38 #endif
39 
40 #include "ut0byte.h"
41 #include "rem0cmp.h"
42 #include "trx0trx.h"
43 
44 /**************************************************************//**
45 Allocates memory for a persistent cursor object and initializes the cursor.
46 @return	own: persistent cursor */
47 UNIV_INTERN
48 btr_pcur_t*
btr_pcur_create_for_mysql(void)49 btr_pcur_create_for_mysql(void)
50 /*============================*/
51 {
52 	btr_pcur_t*	pcur;
53 
54 	pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t));
55 
56 	pcur->btr_cur.index = NULL;
57 	btr_pcur_init(pcur);
58 
59 	return(pcur);
60 }
61 
62 /**************************************************************//**
63 Resets a persistent cursor object, freeing ::old_rec_buf if it is
64 allocated and resetting the other members to their initial values. */
65 UNIV_INTERN
66 void
btr_pcur_reset(btr_pcur_t * cursor)67 btr_pcur_reset(
68 /*===========*/
69 	btr_pcur_t*	cursor)	/*!< in, out: persistent cursor */
70 {
71 	if (cursor->old_rec_buf != NULL) {
72 
73 		mem_free(cursor->old_rec_buf);
74 
75 		cursor->old_rec_buf = NULL;
76 	}
77 
78 	cursor->btr_cur.index = NULL;
79 	cursor->btr_cur.page_cur.rec = NULL;
80 	cursor->old_rec = NULL;
81 	cursor->old_n_fields = 0;
82 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
83 
84 	cursor->latch_mode = BTR_NO_LATCHES;
85 	cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
86 }
87 
88 /**************************************************************//**
89 Frees the memory for a persistent cursor object. */
90 UNIV_INTERN
91 void
btr_pcur_free_for_mysql(btr_pcur_t * cursor)92 btr_pcur_free_for_mysql(
93 /*====================*/
94 	btr_pcur_t*	cursor)	/*!< in, own: persistent cursor */
95 {
96 	btr_pcur_reset(cursor);
97 	mem_free(cursor);
98 }
99 
100 /**************************************************************//**
101 The position of the cursor is stored by taking an initial segment of the
102 record the cursor is positioned on, before, or after, and copying it to the
103 cursor data structure, or just setting a flag if the cursor id before the
104 first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
105 page where the cursor is positioned must not be empty if the index tree is
106 not totally empty! */
107 UNIV_INTERN
108 void
btr_pcur_store_position(btr_pcur_t * cursor,mtr_t * mtr)109 btr_pcur_store_position(
110 /*====================*/
111 	btr_pcur_t*	cursor, /*!< in: persistent cursor */
112 	mtr_t*		mtr)	/*!< in: mtr */
113 {
114 	page_cur_t*	page_cursor;
115 	buf_block_t*	block;
116 	rec_t*		rec;
117 	dict_index_t*	index;
118 	page_t*		page;
119 	ulint		offs;
120 
121 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
122 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
123 
124 	block = btr_pcur_get_block(cursor);
125 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
126 
127 	page_cursor = btr_pcur_get_page_cur(cursor);
128 
129 	rec = page_cur_get_rec(page_cursor);
130 	page = page_align(rec);
131 	offs = page_offset(rec);
132 
133 	ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
134 	      || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
135 
136 	if (page_is_empty(page)) {
137 		/* It must be an empty index tree; NOTE that in this case
138 		we do not store the modify_clock, but always do a search
139 		if we restore the cursor position */
140 
141 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
142 		ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
143 		ut_ad(page_is_leaf(page));
144 		ut_ad(page_get_page_no(page) == index->page);
145 
146 		cursor->old_stored = BTR_PCUR_OLD_STORED;
147 
148 		if (page_rec_is_supremum_low(offs)) {
149 
150 			cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
151 		} else {
152 			cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
153 		}
154 
155 		return;
156 	}
157 
158 	if (page_rec_is_supremum_low(offs)) {
159 
160 		rec = page_rec_get_prev(rec);
161 
162 		cursor->rel_pos = BTR_PCUR_AFTER;
163 
164 	} else if (page_rec_is_infimum_low(offs)) {
165 
166 		rec = page_rec_get_next(rec);
167 
168 		cursor->rel_pos = BTR_PCUR_BEFORE;
169 	} else {
170 		cursor->rel_pos = BTR_PCUR_ON;
171 	}
172 
173 	cursor->old_stored = BTR_PCUR_OLD_STORED;
174 	cursor->old_rec = dict_index_copy_rec_order_prefix(
175 		index, rec, &cursor->old_n_fields,
176 		&cursor->old_rec_buf, &cursor->buf_size);
177 
178 	cursor->block_when_stored = block;
179 	cursor->modify_clock = buf_block_get_modify_clock(block);
180 }
181 
182 /**************************************************************//**
183 Copies the stored position of a pcur to another pcur. */
184 UNIV_INTERN
185 void
btr_pcur_copy_stored_position(btr_pcur_t * pcur_receive,btr_pcur_t * pcur_donate)186 btr_pcur_copy_stored_position(
187 /*==========================*/
188 	btr_pcur_t*	pcur_receive,	/*!< in: pcur which will receive the
189 					position info */
190 	btr_pcur_t*	pcur_donate)	/*!< in: pcur from which the info is
191 					copied */
192 {
193 	if (pcur_receive->old_rec_buf) {
194 		mem_free(pcur_receive->old_rec_buf);
195 	}
196 
197 	ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
198 
199 	if (pcur_donate->old_rec_buf) {
200 
201 		pcur_receive->old_rec_buf = (byte*)
202 			mem_alloc(pcur_donate->buf_size);
203 
204 		ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
205 			  pcur_donate->buf_size);
206 		pcur_receive->old_rec = pcur_receive->old_rec_buf
207 			+ (pcur_donate->old_rec - pcur_donate->old_rec_buf);
208 	}
209 
210 	pcur_receive->old_n_fields = pcur_donate->old_n_fields;
211 }
212 
213 /**************************************************************//**
214 Restores the stored position of a persistent cursor bufferfixing the page and
215 obtaining the specified latches. If the cursor position was saved when the
216 (1) cursor was positioned on a user record: this function restores the position
217 to the last record LESS OR EQUAL to the stored record;
218 (2) cursor was positioned on a page infimum record: restores the position to
219 the last record LESS than the user record which was the successor of the page
220 infimum;
221 (3) cursor was positioned on the page supremum: restores to the first record
222 GREATER than the user record which was the predecessor of the supremum.
223 (4) cursor was positioned before the first or after the last in an empty tree:
224 restores to before first or after the last in the tree.
225 @return TRUE if the cursor position was stored when it was on a user
226 record and it can be restored on a user record whose ordering fields
227 are identical to the ones of the original user record */
228 UNIV_INTERN
229 ibool
btr_pcur_restore_position_func(ulint latch_mode,btr_pcur_t * cursor,const char * file,ulint line,mtr_t * mtr)230 btr_pcur_restore_position_func(
231 /*===========================*/
232 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
233 	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
234 	const char*	file,		/*!< in: file name */
235 	ulint		line,		/*!< in: line where called */
236 	mtr_t*		mtr)		/*!< in: mtr */
237 {
238 	dict_index_t*	index;
239 	dtuple_t*	tuple;
240 	ulint		mode;
241 	ulint		old_mode;
242 	mem_heap_t*	heap;
243 
244 	ut_ad(mtr);
245 	ut_ad(mtr->state == MTR_ACTIVE);
246 	ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
247 	ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
248 	      || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
249 
250 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
251 
252 	if (UNIV_UNLIKELY
253 	    (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
254 	     || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
255 
256 		/* In these cases we do not try an optimistic restoration,
257 		but always do a search */
258 
259 		btr_cur_open_at_index_side(
260 			cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
261 			index, latch_mode,
262 			btr_pcur_get_btr_cur(cursor), 0, mtr);
263 
264 		cursor->latch_mode = latch_mode;
265 		cursor->pos_state = BTR_PCUR_IS_POSITIONED;
266 		cursor->block_when_stored = btr_pcur_get_block(cursor);
267 
268 		return(FALSE);
269 	}
270 
271 	ut_a(cursor->old_rec);
272 	ut_a(cursor->old_n_fields);
273 
274 	if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
275 	    || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
276 		/* Try optimistic restoration. */
277 
278 		if (buf_page_optimistic_get(latch_mode,
279 					    cursor->block_when_stored,
280 					    cursor->modify_clock,
281 					    file, line, mtr)) {
282 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
283 			cursor->latch_mode = latch_mode;
284 
285 			buf_block_dbg_add_level(
286 				btr_pcur_get_block(cursor),
287 				dict_index_is_ibuf(index)
288 				? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
289 
290 			if (cursor->rel_pos == BTR_PCUR_ON) {
291 #ifdef UNIV_DEBUG
292 				const rec_t*	rec;
293 				const ulint*	offsets1;
294 				const ulint*	offsets2;
295 				rec = btr_pcur_get_rec(cursor);
296 
297 				heap = mem_heap_create(256);
298 				offsets1 = rec_get_offsets(
299 					cursor->old_rec, index, NULL,
300 					cursor->old_n_fields, &heap);
301 				offsets2 = rec_get_offsets(
302 					rec, index, NULL,
303 					cursor->old_n_fields, &heap);
304 
305 				ut_ad(!cmp_rec_rec(cursor->old_rec,
306 						   rec, offsets1, offsets2,
307 						   index));
308 				mem_heap_free(heap);
309 #endif /* UNIV_DEBUG */
310 				return(TRUE);
311 			}
312 			/* This is the same record as stored,
313 			may need to be adjusted for BTR_PCUR_BEFORE/AFTER,
314 			depending on search mode and direction. */
315 			if (btr_pcur_is_on_user_rec(cursor)) {
316 				cursor->pos_state
317 					= BTR_PCUR_IS_POSITIONED_OPTIMISTIC;
318 			}
319 			return(FALSE);
320 		}
321 	}
322 
323 	/* If optimistic restoration did not succeed, open the cursor anew */
324 
325 	heap = mem_heap_create(256);
326 
327 	tuple = dict_index_build_data_tuple(index, cursor->old_rec,
328 					    cursor->old_n_fields, heap);
329 
330 	/* Save the old search mode of the cursor */
331 	old_mode = cursor->search_mode;
332 
333 	switch (cursor->rel_pos) {
334 	case BTR_PCUR_ON:
335 		mode = PAGE_CUR_LE;
336 		break;
337 	case BTR_PCUR_AFTER:
338 		mode = PAGE_CUR_G;
339 		break;
340 	case BTR_PCUR_BEFORE:
341 		mode = PAGE_CUR_L;
342 		break;
343 	default:
344 		ut_error;
345 		mode = 0;
346 	}
347 
348 	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
349 					cursor, 0, file, line, mtr);
350 
351 	/* Restore the old search mode */
352 	cursor->search_mode = old_mode;
353 
354 	switch (cursor->rel_pos) {
355 	case BTR_PCUR_ON:
356 		if (btr_pcur_is_on_user_rec(cursor)
357 		    && !cmp_dtuple_rec(
358 			    tuple, btr_pcur_get_rec(cursor),
359 			    rec_get_offsets(btr_pcur_get_rec(cursor),
360 					    index, NULL,
361 					    ULINT_UNDEFINED, &heap))) {
362 
363 			/* We have to store the NEW value for
364 			the modify clock, since the cursor can
365 			now be on a different page! But we can
366 			retain the value of old_rec */
367 
368 			cursor->block_when_stored =
369 				btr_pcur_get_block(cursor);
370 			cursor->modify_clock =
371 				buf_block_get_modify_clock(
372 					cursor->block_when_stored);
373 			cursor->old_stored = BTR_PCUR_OLD_STORED;
374 
375 			mem_heap_free(heap);
376 
377 			return(TRUE);
378 		}
379 #ifdef UNIV_DEBUG
380 		/* fall through */
381 	case BTR_PCUR_BEFORE:
382 	case BTR_PCUR_AFTER:
383 		break;
384 	default:
385 		ut_error;
386 #endif /* UNIV_DEBUG */
387 	}
388 
389 	mem_heap_free(heap);
390 
391 	/* We have to store new position information, modify_clock etc.,
392 	to the cursor because it can now be on a different page, the record
393 	under it may have been removed, etc. */
394 
395 	btr_pcur_store_position(cursor, mtr);
396 
397 	return(FALSE);
398 }
399 
400 /*********************************************************//**
401 Moves the persistent cursor to the first record on the next page. Releases the
402 latch on the current page, and bufferunfixes it. Note that there must not be
403 modifications on the current page, as then the x-latch can be released only in
404 mtr_commit. */
405 UNIV_INTERN
406 void
btr_pcur_move_to_next_page(btr_pcur_t * cursor,mtr_t * mtr)407 btr_pcur_move_to_next_page(
408 /*=======================*/
409 	btr_pcur_t*	cursor,	/*!< in: persistent cursor; must be on the
410 				last record of the current page */
411 	mtr_t*		mtr)	/*!< in: mtr */
412 {
413 	ulint		next_page_no;
414 	ulint		space;
415 	ulint		zip_size;
416 	page_t*		page;
417 	buf_block_t*	next_block;
418 	page_t*		next_page;
419 
420 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
421 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
422 	ut_ad(btr_pcur_is_after_last_on_page(cursor));
423 
424 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
425 
426 	page = btr_pcur_get_page(cursor);
427 	next_page_no = btr_page_get_next(page, mtr);
428 	space = buf_block_get_space(btr_pcur_get_block(cursor));
429 	zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
430 
431 	ut_ad(next_page_no != FIL_NULL);
432 
433 	next_block = btr_block_get(space, zip_size, next_page_no,
434 				   cursor->latch_mode,
435 				   btr_pcur_get_btr_cur(cursor)->index, mtr);
436 	next_page = buf_block_get_frame(next_block);
437 #ifdef UNIV_BTR_DEBUG
438 	ut_a(page_is_comp(next_page) == page_is_comp(page));
439 	ut_a(btr_page_get_prev(next_page, mtr)
440 	     == buf_block_get_page_no(btr_pcur_get_block(cursor)));
441 #endif /* UNIV_BTR_DEBUG */
442 	next_block->check_index_page_at_flush = TRUE;
443 
444 	btr_leaf_page_release(btr_pcur_get_block(cursor),
445 			      cursor->latch_mode, mtr);
446 
447 	page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
448 
449 	page_check_dir(next_page);
450 }
451 
452 /*********************************************************//**
453 Moves the persistent cursor backward if it is on the first record of the page.
454 Commits mtr. Note that to prevent a possible deadlock, the operation
455 first stores the position of the cursor, commits mtr, acquires the necessary
456 latches and restores the cursor position again before returning. The
457 alphabetical position of the cursor is guaranteed to be sensible on
458 return, but it may happen that the cursor is not positioned on the last
459 record of any page, because the structure of the tree may have changed
460 during the time when the cursor had no latches. */
461 UNIV_INTERN
462 void
btr_pcur_move_backward_from_page(btr_pcur_t * cursor,mtr_t * mtr)463 btr_pcur_move_backward_from_page(
464 /*=============================*/
465 	btr_pcur_t*	cursor,	/*!< in: persistent cursor, must be on the first
466 				record of the current page */
467 	mtr_t*		mtr)	/*!< in: mtr */
468 {
469 	ulint		prev_page_no;
470 	page_t*		page;
471 	buf_block_t*	prev_block;
472 	ulint		latch_mode;
473 	ulint		latch_mode2;
474 
475 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
476 	ut_ad(btr_pcur_is_before_first_on_page(cursor));
477 	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
478 
479 	latch_mode = cursor->latch_mode;
480 
481 	if (latch_mode == BTR_SEARCH_LEAF) {
482 
483 		latch_mode2 = BTR_SEARCH_PREV;
484 
485 	} else if (latch_mode == BTR_MODIFY_LEAF) {
486 
487 		latch_mode2 = BTR_MODIFY_PREV;
488 	} else {
489 		latch_mode2 = 0; /* To eliminate compiler warning */
490 		ut_error;
491 	}
492 
493 	btr_pcur_store_position(cursor, mtr);
494 
495 	mtr_commit(mtr);
496 
497 	mtr_start(mtr);
498 
499 	btr_pcur_restore_position(latch_mode2, cursor, mtr);
500 
501 	page = btr_pcur_get_page(cursor);
502 
503 	prev_page_no = btr_page_get_prev(page, mtr);
504 
505 	if (prev_page_no == FIL_NULL) {
506 	} else if (btr_pcur_is_before_first_on_page(cursor)) {
507 
508 		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
509 
510 		btr_leaf_page_release(btr_pcur_get_block(cursor),
511 				      latch_mode, mtr);
512 
513 		page_cur_set_after_last(prev_block,
514 					btr_pcur_get_page_cur(cursor));
515 	} else {
516 
517 		/* The repositioned cursor did not end on an infimum record on
518 		a page. Cursor repositioning acquired a latch also on the
519 		previous page, but we do not need the latch: release it. */
520 
521 		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
522 
523 		btr_leaf_page_release(prev_block, latch_mode, mtr);
524 	}
525 
526 	cursor->latch_mode = latch_mode;
527 
528 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
529 }
530 
531 /*********************************************************//**
532 Moves the persistent cursor to the previous record in the tree. If no records
533 are left, the cursor stays 'before first in tree'.
534 @return	TRUE if the cursor was not before first in tree */
535 UNIV_INTERN
536 ibool
btr_pcur_move_to_prev(btr_pcur_t * cursor,mtr_t * mtr)537 btr_pcur_move_to_prev(
538 /*==================*/
539 	btr_pcur_t*	cursor,	/*!< in: persistent cursor; NOTE that the
540 				function may release the page latch */
541 	mtr_t*		mtr)	/*!< in: mtr */
542 {
543 	ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
544 	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
545 
546 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
547 
548 	if (btr_pcur_is_before_first_on_page(cursor)) {
549 
550 		if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
551 
552 			return(FALSE);
553 		}
554 
555 		btr_pcur_move_backward_from_page(cursor, mtr);
556 
557 		return(TRUE);
558 	}
559 
560 	btr_pcur_move_to_prev_on_page(cursor);
561 
562 	return(TRUE);
563 }
564 
565 /**************************************************************//**
566 If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
567 user record satisfying the search condition, in the case PAGE_CUR_L or
568 PAGE_CUR_LE, on the last user record. If no such user record exists, then
569 in the first case sets the cursor after last in tree, and in the latter case
570 before first in tree. The latching mode must be BTR_SEARCH_LEAF or
571 BTR_MODIFY_LEAF. */
572 UNIV_INTERN
573 void
btr_pcur_open_on_user_rec_func(dict_index_t * index,const dtuple_t * tuple,ulint mode,ulint latch_mode,btr_pcur_t * cursor,const char * file,ulint line,mtr_t * mtr)574 btr_pcur_open_on_user_rec_func(
575 /*===========================*/
576 	dict_index_t*	index,		/*!< in: index */
577 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
578 	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
579 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF or
580 					BTR_MODIFY_LEAF */
581 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
582 					cursor */
583 	const char*	file,		/*!< in: file name */
584 	ulint		line,		/*!< in: line where called */
585 	mtr_t*		mtr)		/*!< in: mtr */
586 {
587 	btr_pcur_open_low(index, 0, tuple, mode, latch_mode, cursor,
588 			  file, line, mtr);
589 
590 	if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
591 
592 		if (btr_pcur_is_after_last_on_page(cursor)) {
593 
594 			btr_pcur_move_to_next_user_rec(cursor, mtr);
595 		}
596 	} else {
597 		ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
598 
599 		/* Not implemented yet */
600 
601 		ut_error;
602 	}
603 }
604