1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************//**
20 @file trx/trx0purge.c
21 Purge old versions
22 
23 Created 3/26/1996 Heikki Tuuri
24 *******************************************************/
25 
26 #include "trx0purge.h"
27 
28 #ifdef UNIV_NONINL
29 #include "trx0purge.ic"
30 #endif
31 
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "mtr0log.h"
35 #include "trx0rseg.h"
36 #include "trx0trx.h"
37 #include "trx0roll.h"
38 #include "read0read.h"
39 #include "fut0fut.h"
40 #include "que0que.h"
41 #include "row0purge.h"
42 #include "row0upd.h"
43 #include "trx0rec.h"
44 #include "srv0srv.h"
45 #include "os0thread.h"
46 
47 /** The global data structure coordinating a purge */
48 UNIV_INTERN trx_purge_t*	purge_sys = NULL;
49 
50 /** A dummy undo record used as a return value when we have a whole undo log
51 which needs no purge */
52 UNIV_INTERN trx_undo_rec_t	trx_purge_dummy_rec;
53 
54 #ifdef UNIV_PFS_RWLOCK
55 /* Key to register trx_purge_latch with performance schema */
56 UNIV_INTERN mysql_pfs_key_t	trx_purge_latch_key;
57 #endif /* UNIV_PFS_RWLOCK */
58 
59 #ifdef UNIV_PFS_MUTEX
60 /* Key to register purge_sys_bh_mutex with performance schema */
61 UNIV_INTERN mysql_pfs_key_t	purge_sys_bh_mutex_key;
62 #endif /* UNIV_PFS_MUTEX */
63 
64 #ifdef UNIV_DEBUG
65 UNIV_INTERN my_bool		srv_purge_view_update_only_debug;
66 #endif /* UNIV_DEBUG */
67 
68 /*****************************************************************//**
69 Checks if trx_id is >= purge_view: then it is guaranteed that its update
70 undo log still exists in the system.
71 @return TRUE if is sure that it is preserved, also if the function
72 returns FALSE, it is possible that the undo log still exists in the
73 system */
74 UNIV_INTERN
75 ibool
trx_purge_update_undo_must_exist(trx_id_t trx_id)76 trx_purge_update_undo_must_exist(
77 /*=============================*/
78 	trx_id_t	trx_id)	/*!< in: transaction id */
79 {
80 #ifdef UNIV_SYNC_DEBUG
81 	ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
82 #endif /* UNIV_SYNC_DEBUG */
83 
84 	if (!read_view_sees_trx_id(purge_sys->view, trx_id)) {
85 
86 		return(TRUE);
87 	}
88 
89 	return(FALSE);
90 }
91 
92 /*=================== PURGE RECORD ARRAY =============================*/
93 
94 /*******************************************************************//**
95 Stores info of an undo log record during a purge.
96 @return	pointer to the storage cell */
97 static
98 trx_undo_inf_t*
trx_purge_arr_store_info(trx_id_t trx_no,undo_no_t undo_no)99 trx_purge_arr_store_info(
100 /*=====================*/
101 	trx_id_t	trx_no,	/*!< in: transaction number */
102 	undo_no_t	undo_no)/*!< in: undo number */
103 {
104 	trx_undo_inf_t*	cell;
105 	trx_undo_arr_t*	arr;
106 	ulint		i;
107 
108 	arr = purge_sys->arr;
109 
110 	for (i = 0;; i++) {
111 		cell = trx_undo_arr_get_nth_info(arr, i);
112 
113 		if (!(cell->in_use)) {
114 			/* Not in use, we may store here */
115 			cell->undo_no = undo_no;
116 			cell->trx_no = trx_no;
117 			cell->in_use = TRUE;
118 
119 			arr->n_used++;
120 
121 			return(cell);
122 		}
123 	}
124 }
125 
126 /*******************************************************************//**
127 Removes info of an undo log record during a purge. */
128 UNIV_INLINE
129 void
trx_purge_arr_remove_info(trx_undo_inf_t * cell)130 trx_purge_arr_remove_info(
131 /*======================*/
132 	trx_undo_inf_t*	cell)	/*!< in: pointer to the storage cell */
133 {
134 	trx_undo_arr_t*	arr;
135 
136 	arr = purge_sys->arr;
137 
138 	cell->in_use = FALSE;
139 
140 	ut_ad(arr->n_used > 0);
141 
142 	arr->n_used--;
143 }
144 
145 /*******************************************************************//**
146 Gets the biggest pair of a trx number and an undo number in a purge array. */
147 static
148 void
trx_purge_arr_get_biggest(trx_undo_arr_t * arr,trx_id_t * trx_no,undo_no_t * undo_no)149 trx_purge_arr_get_biggest(
150 /*======================*/
151 	trx_undo_arr_t*	arr,	/*!< in: purge array */
152 	trx_id_t*	trx_no,	/*!< out: transaction number: 0
153 				if array is empty */
154 	undo_no_t*	undo_no)/*!< out: undo number */
155 {
156 	trx_undo_inf_t*	cell;
157 	trx_id_t	pair_trx_no;
158 	undo_no_t	pair_undo_no;
159 	ulint		i;
160 	ulint		n;
161 
162 	n = arr->n_used;
163 	pair_trx_no = 0;
164 	pair_undo_no = 0;
165 
166 	if (n) {
167 		for (i = 0;; i++) {
168 			cell = trx_undo_arr_get_nth_info(arr, i);
169 
170 			if (!cell->in_use) {
171 				continue;
172 			}
173 
174 			if ((cell->trx_no > pair_trx_no)
175 			    || ((cell->trx_no == pair_trx_no)
176 				&& cell->undo_no >= pair_undo_no)) {
177 
178 				pair_trx_no = cell->trx_no;
179 				pair_undo_no = cell->undo_no;
180 			}
181 
182 			if (!--n) {
183 				break;
184 			}
185 		}
186 	}
187 
188 	*trx_no = pair_trx_no;
189 	*undo_no = pair_undo_no;
190 }
191 
192 /****************************************************************//**
193 Builds a purge 'query' graph. The actual purge is performed by executing
194 this query graph.
195 @return	own: the query graph */
196 static
197 que_t*
trx_purge_graph_build(void)198 trx_purge_graph_build(void)
199 /*=======================*/
200 {
201 	mem_heap_t*	heap;
202 	que_fork_t*	fork;
203 	que_thr_t*	thr;
204 	/*	que_thr_t*	thr2; */
205 
206 	heap = mem_heap_create(512);
207 	fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
208 	fork->trx = purge_sys->trx;
209 
210 	thr = que_thr_create(fork, heap);
211 
212 	thr->child = row_purge_node_create(thr, heap);
213 
214 	/*	thr2 = que_thr_create(fork, fork, heap);
215 
216 	thr2->child = row_purge_node_create(fork, thr2, heap);	 */
217 
218 	return(fork);
219 }
220 
221 /********************************************************************//**
222 Creates the global purge system control structure and inits the history
223 mutex. */
224 UNIV_INTERN
225 void
trx_purge_sys_create(ib_bh_t * ib_bh)226 trx_purge_sys_create(
227 /*=================*/
228 	ib_bh_t*	ib_bh)	/*!< in, own: UNDO log min binary heap */
229 {
230 	ut_ad(mutex_own(&kernel_mutex));
231 
232 	purge_sys = mem_zalloc(sizeof(trx_purge_t));
233 
234 	/* Take ownership of ib_bh, we are responsible for freeing it. */
235 	purge_sys->ib_bh = ib_bh;
236 	purge_sys->state = TRX_STOP_PURGE;
237 
238 	purge_sys->n_pages_handled = 0;
239 
240 	purge_sys->purge_trx_no = 0;
241 	purge_sys->purge_undo_no = 0;
242 	purge_sys->next_stored = FALSE;
243 	ut_d(purge_sys->done_trx_no = 0);
244 
245 	rw_lock_create(trx_purge_latch_key,
246 		       &purge_sys->latch, SYNC_PURGE_LATCH);
247 
248 	mutex_create(
249 		purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
250 		SYNC_PURGE_QUEUE);
251 
252 	purge_sys->heap = mem_heap_create(256);
253 
254 	purge_sys->arr = trx_undo_arr_create();
255 
256 	purge_sys->sess = sess_open();
257 
258 	purge_sys->trx = purge_sys->sess->trx;
259 
260 	purge_sys->trx->is_purge = 1;
261 
262 	ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED));
263 
264 	purge_sys->query = trx_purge_graph_build();
265 
266 	purge_sys->view = read_view_oldest_copy_or_open_new(0,
267 							    purge_sys->heap);
268 }
269 
270 /************************************************************************
271 Frees the global purge system control structure. */
272 UNIV_INTERN
273 void
trx_purge_sys_close(void)274 trx_purge_sys_close(void)
275 /*======================*/
276 {
277 	ut_ad(!mutex_own(&kernel_mutex));
278 
279 	que_graph_free(purge_sys->query);
280 
281 	ut_a(purge_sys->sess->trx->is_purge);
282 	purge_sys->sess->trx->conc_state = TRX_NOT_STARTED;
283 	sess_close(purge_sys->sess);
284 	purge_sys->sess = NULL;
285 
286 	if (purge_sys->view != NULL) {
287 		/* Because acquiring the kernel mutex is a pre-condition
288 		of read_view_close(). We don't really need it here. */
289 		mutex_enter(&kernel_mutex);
290 
291 		read_view_close(purge_sys->view);
292 		purge_sys->view = NULL;
293 
294 		mutex_exit(&kernel_mutex);
295 	}
296 
297 	trx_undo_arr_free(purge_sys->arr);
298 
299 	rw_lock_free(&purge_sys->latch);
300 	mutex_free(&purge_sys->bh_mutex);
301 
302 	mem_heap_free(purge_sys->heap);
303 
304 	ib_bh_free(purge_sys->ib_bh);
305 
306 	mem_free(purge_sys);
307 
308 	purge_sys = NULL;
309 }
310 
311 /*================ UNDO LOG HISTORY LIST =============================*/
312 
313 /********************************************************************//**
314 Adds the update undo log as the first log in the history list. Removes the
315 update undo log segment from the rseg slot if it is too big for reuse. */
316 UNIV_INTERN
317 void
trx_purge_add_update_undo_to_history(trx_t * trx,page_t * undo_page,mtr_t * mtr)318 trx_purge_add_update_undo_to_history(
319 /*=================================*/
320 	trx_t*	trx,		/*!< in: transaction */
321 	page_t*	undo_page,	/*!< in: update undo log header page,
322 				x-latched */
323 	mtr_t*	mtr)		/*!< in: mtr */
324 {
325 	trx_undo_t*	undo;
326 	trx_rsegf_t*	rseg_header;
327 	trx_ulogf_t*	undo_header;
328 
329 	undo = trx->update_undo;
330 
331 	ut_ad(undo);
332 
333 	ut_ad(mutex_own(&undo->rseg->mutex));
334 
335 	rseg_header = trx_rsegf_get(
336 		undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
337 		mtr);
338 
339 	undo_header = undo_page + undo->hdr_offset;
340 	/* Add the log as the first in the history list */
341 
342 	if (undo->state != TRX_UNDO_CACHED) {
343 		ulint		hist_size;
344 #ifdef UNIV_DEBUG
345 		trx_usegf_t*	seg_header = undo_page + TRX_UNDO_SEG_HDR;
346 #endif /* UNIV_DEBUG */
347 
348 		/* The undo log segment will not be reused */
349 
350 		if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
351 			fprintf(stderr,
352 				"InnoDB: Error: undo->id is %lu\n",
353 				(ulong) undo->id);
354 			ut_error;
355 		}
356 
357 		trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
358 
359 		hist_size = mtr_read_ulint(
360 			rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
361 
362 		ut_ad(undo->size == flst_get_len(
363 			      seg_header + TRX_UNDO_PAGE_LIST, mtr));
364 
365 		mlog_write_ulint(
366 			rseg_header + TRX_RSEG_HISTORY_SIZE,
367 			hist_size + undo->size, MLOG_4BYTES, mtr);
368 	}
369 
370 	flst_add_first(
371 		rseg_header + TRX_RSEG_HISTORY,
372 		undo_header + TRX_UNDO_HISTORY_NODE, mtr);
373 
374 	/* Write the trx number to the undo log header */
375 
376 	mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
377 
378 	/* Write information about delete markings to the undo log header */
379 
380 	if (!undo->del_marks) {
381 		mlog_write_ulint(
382 			undo_header + TRX_UNDO_DEL_MARKS, FALSE,
383 			MLOG_2BYTES, mtr);
384 	}
385 
386 	if (undo->rseg->last_page_no == FIL_NULL) {
387 		undo->rseg->last_trx_no = trx->no;
388 		undo->rseg->last_offset = undo->hdr_offset;
389 		undo->rseg->last_page_no = undo->hdr_page_no;
390 		undo->rseg->last_del_marks = undo->del_marks;
391 
392 		/* FIXME: Add a bin heap validate function to check that
393 		the rseg exists. */
394 	}
395 
396 	mutex_enter(&kernel_mutex);
397 	trx_sys->rseg_history_len++;
398 	mutex_exit(&kernel_mutex);
399 
400 	if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
401 		/* Inform the purge thread that there is work to do. */
402 		srv_wake_purge_thread_if_not_active();
403 	}
404 }
405 
406 /**********************************************************************//**
407 Frees an undo log segment which is in the history list. Cuts the end of the
408 history list at the youngest undo log in this segment. */
409 static
410 void
trx_purge_free_segment(trx_rseg_t * rseg,fil_addr_t hdr_addr,ulint n_removed_logs)411 trx_purge_free_segment(
412 /*===================*/
413 	trx_rseg_t*	rseg,		/*!< in: rollback segment */
414 	fil_addr_t	hdr_addr,	/*!< in: the file address of log_hdr */
415 	ulint		n_removed_logs)	/*!< in: count of how many undo logs we
416 					will cut off from the end of the
417 					history list */
418 {
419 	page_t*		undo_page;
420 	trx_rsegf_t*	rseg_hdr;
421 	trx_ulogf_t*	log_hdr;
422 	trx_usegf_t*	seg_hdr;
423 	ibool		freed;
424 	ulint		seg_size;
425 	ulint		hist_size;
426 	ibool		marked		= FALSE;
427 	mtr_t		mtr;
428 
429 	/*	fputs("Freeing an update undo log segment\n", stderr); */
430 
431 loop:
432 	mtr_start(&mtr);
433 	mutex_enter(&(rseg->mutex));
434 
435 	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
436 				 rseg->page_no, &mtr);
437 
438 	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
439 				      hdr_addr.page, &mtr);
440 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
441 	log_hdr = undo_page + hdr_addr.boffset;
442 
443 	/* Mark the last undo log totally purged, so that if the system
444 	crashes, the tail of the undo log will not get accessed again. The
445 	list of pages in the undo log tail gets inconsistent during the
446 	freeing of the segment, and therefore purge should not try to access
447 	them again. */
448 
449 	if (!marked) {
450 		mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
451 				 MLOG_2BYTES, &mtr);
452 		marked = TRUE;
453 	}
454 
455 	freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
456 					  &mtr);
457 	if (!freed) {
458 		mutex_exit(&(rseg->mutex));
459 		mtr_commit(&mtr);
460 
461 		goto loop;
462 	}
463 
464 	/* The page list may now be inconsistent, but the length field
465 	stored in the list base node tells us how big it was before we
466 	started the freeing. */
467 
468 	seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
469 
470 	/* We may free the undo log segment header page; it must be freed
471 	within the same mtr as the undo log header is removed from the
472 	history list: otherwise, in case of a database crash, the segment
473 	could become inaccessible garbage in the file space. */
474 
475 	flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
476 		     log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);
477 
478 	mutex_enter(&kernel_mutex);
479 	ut_ad(trx_sys->rseg_history_len >= n_removed_logs);
480 	trx_sys->rseg_history_len -= n_removed_logs;
481 	mutex_exit(&kernel_mutex);
482 
483 	freed = FALSE;
484 
485 	while (!freed) {
486 		/* Here we assume that a file segment with just the header
487 		page can be freed in a few steps, so that the buffer pool
488 		is not flooded with bufferfixed pages: see the note in
489 		fsp0fsp.c. */
490 
491 		freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
492 				       &mtr);
493 	}
494 
495 	hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
496 				   MLOG_4BYTES, &mtr);
497 	ut_ad(hist_size >= seg_size);
498 
499 	mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
500 			 hist_size - seg_size, MLOG_4BYTES, &mtr);
501 
502 	ut_ad(rseg->curr_size >= seg_size);
503 
504 	rseg->curr_size -= seg_size;
505 
506 	mutex_exit(&(rseg->mutex));
507 
508 	mtr_commit(&mtr);
509 }
510 
511 /********************************************************************//**
512 Removes unnecessary history data from a rollback segment. */
513 static
514 void
trx_purge_truncate_rseg_history(trx_rseg_t * rseg,trx_id_t limit_trx_no,undo_no_t limit_undo_no)515 trx_purge_truncate_rseg_history(
516 /*============================*/
517 	trx_rseg_t*	rseg,		/*!< in: rollback segment */
518 	trx_id_t	limit_trx_no,	/*!< in: remove update undo logs whose
519 					trx number is < limit_trx_no */
520 	undo_no_t	limit_undo_no)	/*!< in: if transaction number is equal
521 					to limit_trx_no, truncate undo records
522 					with undo number < limit_undo_no */
523 {
524 	fil_addr_t	hdr_addr;
525 	fil_addr_t	prev_hdr_addr;
526 	trx_rsegf_t*	rseg_hdr;
527 	page_t*		undo_page;
528 	trx_ulogf_t*	log_hdr;
529 	trx_usegf_t*	seg_hdr;
530 	ulint		n_removed_logs	= 0;
531 	mtr_t		mtr;
532 	trx_id_t	undo_trx_no;
533 
534 	mtr_start(&mtr);
535 	mutex_enter(&(rseg->mutex));
536 
537 	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
538 				 rseg->page_no, &mtr);
539 
540 	hdr_addr = trx_purge_get_log_from_hist(
541 		flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
542 loop:
543 	if (hdr_addr.page == FIL_NULL) {
544 
545 		mutex_exit(&(rseg->mutex));
546 
547 		mtr_commit(&mtr);
548 
549 		return;
550 	}
551 
552 	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
553 				      hdr_addr.page, &mtr);
554 
555 	log_hdr = undo_page + hdr_addr.boffset;
556 	undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
557 
558 	if (undo_trx_no >= limit_trx_no) {
559 		if (undo_trx_no == limit_trx_no) {
560 			trx_undo_truncate_start(rseg, rseg->space,
561 						hdr_addr.page,
562 						hdr_addr.boffset,
563 						limit_undo_no);
564 		}
565 
566 		mutex_enter(&kernel_mutex);
567 		ut_a(trx_sys->rseg_history_len >= n_removed_logs);
568 		trx_sys->rseg_history_len -= n_removed_logs;
569 		mutex_exit(&kernel_mutex);
570 
571 		flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
572 				  log_hdr + TRX_UNDO_HISTORY_NODE,
573 				  n_removed_logs, &mtr);
574 
575 		mutex_exit(&(rseg->mutex));
576 		mtr_commit(&mtr);
577 
578 		return;
579 	}
580 
581 	prev_hdr_addr = trx_purge_get_log_from_hist(
582 		flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
583 	n_removed_logs++;
584 
585 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
586 
587 	if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
588 	    && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
589 
590 		/* We can free the whole log segment */
591 
592 		mutex_exit(&(rseg->mutex));
593 		mtr_commit(&mtr);
594 
595 		trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);
596 
597 		n_removed_logs = 0;
598 	} else {
599 		mutex_exit(&(rseg->mutex));
600 		mtr_commit(&mtr);
601 	}
602 
603 	mtr_start(&mtr);
604 	mutex_enter(&(rseg->mutex));
605 
606 	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
607 				 rseg->page_no, &mtr);
608 
609 	hdr_addr = prev_hdr_addr;
610 
611 	goto loop;
612 }
613 
614 /********************************************************************//**
615 Removes unnecessary history data from rollback segments. NOTE that when this
616 function is called, the caller must not have any latches on undo log pages! */
617 static
618 void
trx_purge_truncate_history(void)619 trx_purge_truncate_history(void)
620 /*============================*/
621 {
622 	trx_rseg_t*	rseg;
623 	trx_id_t	limit_trx_no;
624 	undo_no_t	limit_undo_no;
625 
626 	trx_purge_arr_get_biggest(
627 		purge_sys->arr, &limit_trx_no, &limit_undo_no);
628 
629 	if (limit_trx_no == 0) {
630 
631 		limit_trx_no = purge_sys->purge_trx_no;
632 		limit_undo_no = purge_sys->purge_undo_no;
633 	}
634 
635 	/* We play safe and set the truncate limit at most to the purge view
636 	low_limit number, though this is not necessary */
637 
638 	if (limit_trx_no >= purge_sys->view->low_limit_no) {
639 		limit_trx_no = purge_sys->view->low_limit_no;
640 		limit_undo_no = 0;
641 	}
642 
643 	ut_ad(limit_trx_no <= purge_sys->view->low_limit_no);
644 
645 	for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
646 	     rseg != NULL;
647 	     rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) {
648 
649 		trx_purge_truncate_rseg_history(
650 			rseg, limit_trx_no, limit_undo_no);
651 	}
652 }
653 
654 /********************************************************************//**
655 Does a truncate if the purge array is empty. NOTE that when this function is
656 called, the caller must not have any latches on undo log pages! */
657 UNIV_INLINE
658 void
trx_purge_truncate_if_arr_empty(void)659 trx_purge_truncate_if_arr_empty(void)
660 /*=================================*/
661 {
662 	static ulint	count;
663 
664 #ifdef UNIV_DEBUG
665 	if (purge_sys->arr->n_used == 0) {
666 		purge_sys->done_trx_no = purge_sys->purge_trx_no;
667 	}
668 #endif /* UNIV_DEBUG */
669 
670 	if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
671 
672 		trx_purge_truncate_history();
673 	}
674 }
675 
676 /***********************************************************************//**
677 Updates the last not yet purged history log info in rseg when we have purged
678 a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
679 static
680 void
trx_purge_rseg_get_next_history_log(trx_rseg_t * rseg)681 trx_purge_rseg_get_next_history_log(
682 /*================================*/
683 	trx_rseg_t*	rseg)	/*!< in: rollback segment */
684 {
685 	page_t*		undo_page;
686 	trx_ulogf_t*	log_hdr;
687 	fil_addr_t	prev_log_addr;
688 	trx_id_t	trx_no;
689 	ibool		del_marks;
690 	mtr_t		mtr;
691 	rseg_queue_t	rseg_queue;
692 	const void*	ptr;
693 
694 	mutex_enter(&(rseg->mutex));
695 
696 	ut_a(rseg->last_page_no != FIL_NULL);
697 
698 	purge_sys->purge_trx_no = rseg->last_trx_no + 1;
699 	purge_sys->purge_undo_no = 0;
700 	purge_sys->next_stored = FALSE;
701 
702 	mtr_start(&mtr);
703 
704 	undo_page = trx_undo_page_get_s_latched(
705 		rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
706 
707 	log_hdr = undo_page + rseg->last_offset;
708 
709 	/* Increase the purge page count by one for every handled log */
710 
711 	purge_sys->n_pages_handled++;
712 
713 	prev_log_addr = trx_purge_get_log_from_hist(
714 		flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
715 
716 	if (prev_log_addr.page == FIL_NULL) {
717 		/* No logs left in the history list */
718 
719 		rseg->last_page_no = FIL_NULL;
720 
721 		mutex_exit(&(rseg->mutex));
722 		mtr_commit(&mtr);
723 
724 #ifdef UNIV_DEBUG
725 		mutex_enter(&kernel_mutex);
726 
727 		/* Add debug code to track history list corruption reported
728 		on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
729 		file-based list was corrupt. The prev node pointer was
730 		FIL_NULL, even though the list length was over 8 million nodes!
731 		We assume that purge truncates the history list in large
732 		size pieces, and if we here reach the head of the list, the
733 		list cannot be longer than 2000 000 undo logs now. */
734 
735 		if (trx_sys->rseg_history_len > 2000000) {
736 			ut_print_timestamp(stderr);
737 			fprintf(stderr,
738 				" InnoDB: Warning: purge reached the"
739 				" head of the history list,\n"
740 				"InnoDB: but its length is still"
741 				" reported as %lu!."
742 				" This can happen becasue a long"
743 				" running transaction is  withholding"
744 				" purging of undo logs or a read"
745 				" view is open. Please try to commit"
746 				" the long running transaction.",
747 				(ulong) trx_sys->rseg_history_len);
748 		}
749 
750 		mutex_exit(&kernel_mutex);
751 #endif
752 		return;
753 	}
754 
755 	mutex_exit(&(rseg->mutex));
756 	mtr_commit(&mtr);
757 
758 	/* Read the trx number and del marks from the previous log header */
759 	mtr_start(&mtr);
760 
761 	log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
762 					      prev_log_addr.page, &mtr)
763 		+ prev_log_addr.boffset;
764 
765 	trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
766 
767 	del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
768 
769 	mtr_commit(&mtr);
770 
771 	mutex_enter(&(rseg->mutex));
772 
773 	rseg->last_page_no = prev_log_addr.page;
774 	rseg->last_offset = prev_log_addr.boffset;
775 	rseg->last_trx_no = trx_no;
776 	rseg->last_del_marks = del_marks;
777 
778 	rseg_queue.rseg = rseg;
779 	rseg_queue.trx_no = rseg->last_trx_no;
780 
781 	/* Purge can also produce events, however these are already ordered
782 	in the rollback segment and any user generated event will be greater
783 	than the events that Purge produces. ie. Purge can never produce
784 	events from an empty rollback segment. */
785 
786 	mutex_enter(&purge_sys->bh_mutex);
787 
788 	ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
789 	ut_a(ptr != NULL);
790 
791 	mutex_exit(&purge_sys->bh_mutex);
792 
793 	mutex_exit(&(rseg->mutex));
794 }
795 
796 /***********************************************************************//**
797 Chooses the rollback segment with the smallest trx_id.
798 @return zip_size if log is for a compressed table, ULINT_UNDEFINED if
799 	no rollback segments to purge, 0 for non compressed tables. */
800 static
801 ulint
trx_purge_get_rseg_with_min_trx_id(trx_purge_t * purge_sys)802 trx_purge_get_rseg_with_min_trx_id(
803 /*===============================*/
804 	trx_purge_t*	purge_sys)		/*!< in/out: purge instance */
805 
806 {
807 	ulint		zip_size = 0;
808 
809 	mutex_enter(&purge_sys->bh_mutex);
810 
811 	/* Only purge consumes events from the binary heap, user
812 	threads only produce the events. */
813 
814 	if (!ib_bh_is_empty(purge_sys->ib_bh)) {
815 		trx_rseg_t*	rseg;
816 
817 		rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
818 		ib_bh_pop(purge_sys->ib_bh);
819 
820 		mutex_exit(&purge_sys->bh_mutex);
821 
822 		purge_sys->rseg = rseg;
823 	} else {
824 		mutex_exit(&purge_sys->bh_mutex);
825 
826 		purge_sys->rseg = NULL;
827 
828 		return(ULINT_UNDEFINED);
829 	}
830 
831 	ut_a(purge_sys->rseg != NULL);
832 
833 	mutex_enter(&purge_sys->rseg->mutex);
834 
835 	ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
836 
837 	/* We assume in purge of externally stored fields
838 	that space id == 0 */
839 	ut_a(purge_sys->rseg->space == 0);
840 
841 	zip_size = purge_sys->rseg->zip_size;
842 
843 	ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no);
844 
845 	purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no;
846 
847 	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
848 
849 	purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
850 
851 	mutex_exit(&purge_sys->rseg->mutex);
852 
853 	return(zip_size);
854 }
855 
856 /***********************************************************************//**
857 Position the purge sys "iterator" on the undo record to use for purging. */
858 static
859 void
trx_purge_read_undo_rec(trx_purge_t * purge_sys,ulint zip_size)860 trx_purge_read_undo_rec(
861 /*====================*/
862 	trx_purge_t*	purge_sys,		/*!< in/out: purge instance */
863 	ulint		zip_size)		/*!< in: block size or 0 */
864 {
865 	ulint		page_no;
866 	ulint		offset = 0;
867 	ib_uint64_t	undo_no = 0;
868 
869 	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
870 	page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
871 
872 	if (purge_sys->rseg->last_del_marks) {
873 		mtr_t		mtr;
874 		trx_undo_rec_t*	undo_rec;
875 
876 		mtr_start(&mtr);
877 
878 		undo_rec = trx_undo_get_first_rec(
879 			0 /* System space id */, zip_size,
880 			purge_sys->hdr_page_no,
881 			purge_sys->hdr_offset, RW_S_LATCH, &mtr);
882 
883 		if (undo_rec != NULL) {
884 			offset = page_offset(undo_rec);
885 			undo_no = trx_undo_rec_get_undo_no(undo_rec);
886 			page_no = page_get_page_no(page_align(undo_rec));
887 		}
888 
889 		mtr_commit(&mtr);
890 	}
891 
892 	purge_sys->offset = offset;
893 	purge_sys->page_no = page_no;
894 	purge_sys->purge_undo_no = undo_no;
895 
896 	purge_sys->next_stored = TRUE;
897 }
898 
899 /***********************************************************************//**
900 Chooses the next undo log to purge and updates the info in purge_sys. This
901 function is used to initialize purge_sys when the next record to purge is
902 not known, and also to update the purge system info on the next record when
903 purge has handled the whole undo log for a transaction. */
904 static
905 void
trx_purge_choose_next_log(void)906 trx_purge_choose_next_log(void)
907 /*===========================*/
908 {
909 	ulint		zip_size;
910 
911 	ut_ad(purge_sys->next_stored == FALSE);
912 
913 	zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
914 
915 	if (purge_sys->rseg != NULL) {
916 
917 		trx_purge_read_undo_rec(purge_sys, zip_size);
918 	} else {
919 		/* There is nothing to do yet. */
920 		os_thread_yield();
921 	}
922 }
923 
924 /***********************************************************************//**
925 Gets the next record to purge and updates the info in the purge system.
926 @return	copy of an undo log record or pointer to the dummy undo log record */
927 static
928 trx_undo_rec_t*
trx_purge_get_next_rec(mem_heap_t * heap)929 trx_purge_get_next_rec(
930 /*===================*/
931 	mem_heap_t*	heap)	/*!< in: memory heap where copied */
932 {
933 	trx_undo_rec_t*	rec;
934 	trx_undo_rec_t*	rec_copy;
935 	trx_undo_rec_t*	rec2;
936 	trx_undo_rec_t*	next_rec;
937 	page_t*		undo_page;
938 	page_t*		page;
939 	ulint		offset;
940 	ulint		page_no;
941 	ulint		space;
942 	ulint		zip_size;
943 	ulint		type;
944 	ulint		cmpl_info;
945 	mtr_t		mtr;
946 
947 	ut_ad(purge_sys->next_stored);
948 
949 	space = purge_sys->rseg->space;
950 	zip_size = purge_sys->rseg->zip_size;
951 	page_no = purge_sys->page_no;
952 	offset = purge_sys->offset;
953 
954 	if (offset == 0) {
955 		/* It is the dummy undo log record, which means that there is
956 		no need to purge this undo log */
957 
958 		trx_purge_rseg_get_next_history_log(purge_sys->rseg);
959 
960 		/* Look for the next undo log and record to purge */
961 
962 		trx_purge_choose_next_log();
963 
964 		return(&trx_purge_dummy_rec);
965 	}
966 
967 	mtr_start(&mtr);
968 
969 	undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
970 
971 	rec = undo_page + offset;
972 
973 	rec2 = rec;
974 
975 	for (;;) {
976 		/* Try first to find the next record which requires a purge
977 		operation from the same page of the same undo log */
978 
979 		next_rec = trx_undo_page_get_next_rec(
980 			rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
981 
982 		if (next_rec == NULL) {
983 			rec2 = trx_undo_get_next_rec(
984 				rec2, purge_sys->hdr_page_no,
985 				purge_sys->hdr_offset, &mtr);
986 			break;
987 		}
988 
989 		rec2 = next_rec;
990 
991 		type = trx_undo_rec_get_type(rec2);
992 
993 		if (type == TRX_UNDO_DEL_MARK_REC) {
994 
995 			break;
996 		}
997 
998 		cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
999 
1000 		if (trx_undo_rec_get_extern_storage(rec2)) {
1001 			break;
1002 		}
1003 
1004 		if ((type == TRX_UNDO_UPD_EXIST_REC)
1005 		    && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
1006 			break;
1007 		}
1008 	}
1009 
1010 	if (rec2 == NULL) {
1011 		mtr_commit(&mtr);
1012 
1013 		trx_purge_rseg_get_next_history_log(purge_sys->rseg);
1014 
1015 		/* Look for the next undo log and record to purge */
1016 
1017 		trx_purge_choose_next_log();
1018 
1019 		mtr_start(&mtr);
1020 
1021 		undo_page = trx_undo_page_get_s_latched(space, zip_size,
1022 							page_no, &mtr);
1023 
1024 		rec = undo_page + offset;
1025 	} else {
1026 		page = page_align(rec2);
1027 
1028 		purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
1029 		purge_sys->page_no = page_get_page_no(page);
1030 		purge_sys->offset = rec2 - page;
1031 
1032 		if (undo_page != page) {
1033 			/* We advance to a new page of the undo log: */
1034 			purge_sys->n_pages_handled++;
1035 		}
1036 	}
1037 
1038 	rec_copy = trx_undo_rec_copy(rec, heap);
1039 
1040 	mtr_commit(&mtr);
1041 
1042 	return(rec_copy);
1043 }
1044 
1045 /********************************************************************//**
1046 Fetches the next undo log record from the history list to purge. It must be
1047 released with the corresponding release function.
1048 @return copy of an undo log record or pointer to trx_purge_dummy_rec,
1049 if the whole undo log can skipped in purge; NULL if none left */
1050 UNIV_INTERN
1051 trx_undo_rec_t*
trx_purge_fetch_next_rec(roll_ptr_t * roll_ptr,trx_undo_inf_t ** cell,mem_heap_t * heap)1052 trx_purge_fetch_next_rec(
1053 /*=====================*/
1054 	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
1055 	trx_undo_inf_t** cell,	/*!< out: storage cell for the record in the
1056 				purge array */
1057 	mem_heap_t*	heap)	/*!< in: memory heap where copied */
1058 {
1059 	trx_undo_rec_t*	undo_rec;
1060 
1061 
1062 	if (purge_sys->state == TRX_STOP_PURGE) {
1063 		trx_purge_truncate_if_arr_empty();
1064 
1065 		return(NULL);
1066 	} else if (!purge_sys->next_stored) {
1067 		trx_purge_choose_next_log();
1068 
1069 		if (!purge_sys->next_stored) {
1070 			purge_sys->state = TRX_STOP_PURGE;
1071 
1072 			trx_purge_truncate_if_arr_empty();
1073 
1074 			if (srv_print_thread_releases) {
1075 				fprintf(stderr,
1076 					"Purge: No logs left in the"
1077 					" history list; pages handled %lu\n",
1078 					(ulong) purge_sys->n_pages_handled);
1079 			}
1080 
1081 			return(NULL);
1082 		}
1083 	}
1084 
1085 	if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {
1086 
1087 		purge_sys->state = TRX_STOP_PURGE;
1088 
1089 		trx_purge_truncate_if_arr_empty();
1090 
1091 		return(NULL);
1092 	} else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
1093 		purge_sys->state = TRX_STOP_PURGE;
1094 
1095 		trx_purge_truncate_if_arr_empty();
1096 
1097 		return(NULL);
1098 	}
1099 
1100 	/* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",
1101 	os_thread_get_curr_id(),
1102 	(ullint) purge_sys->purge_trx_no,
1103 	(ullint) purge_sys->purge_undo_no); */
1104 
1105 
1106 	*roll_ptr = trx_undo_build_roll_ptr(
1107 		FALSE, (purge_sys->rseg)->id, purge_sys->page_no,
1108 		purge_sys->offset);
1109 
1110 	*cell = trx_purge_arr_store_info(
1111 		purge_sys->purge_trx_no, purge_sys->purge_undo_no);
1112 
1113 	ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no);
1114 
1115 	/* The following call will advance the stored values of purge_trx_no
1116 	and purge_undo_no, therefore we had to store them first */
1117 
1118 	undo_rec = trx_purge_get_next_rec(heap);
1119 
1120 	return(undo_rec);
1121 }
1122 
1123 /*******************************************************************//**
1124 Releases a reserved purge undo record. */
1125 UNIV_INTERN
1126 void
trx_purge_rec_release(trx_undo_inf_t * cell)1127 trx_purge_rec_release(
1128 /*==================*/
1129 	trx_undo_inf_t*	cell)	/*!< in: storage cell */
1130 {
1131 	trx_purge_arr_remove_info(cell);
1132 }
1133 
1134 /*******************************************************************//**
1135 This function runs a purge batch.
1136 @return	number of undo log pages handled in the batch */
1137 UNIV_INTERN
1138 ulint
trx_purge(ulint limit)1139 trx_purge(
1140 /*======*/
1141 	ulint	limit)		/*!< in: the maximum number of records to
1142 				purge in one batch */
1143 {
1144 	que_thr_t*	thr;
1145 	ulint		old_pages_handled;
1146 
1147 	ut_a(purge_sys->trx->n_active_thrs == 0);
1148 
1149 	rw_lock_x_lock(&purge_sys->latch);
1150 
1151 	mutex_enter(&kernel_mutex);
1152 
1153 	/* Close and free the old purge view */
1154 
1155 	read_view_close(purge_sys->view);
1156 	purge_sys->view = NULL;
1157 	mem_heap_empty(purge_sys->heap);
1158 
1159 	/* Determine how much data manipulation language (DML) statements
1160 	need to be delayed in order to reduce the lagging of the purge
1161 	thread. */
1162 	srv_dml_needed_delay = 0; /* in microseconds; default: no delay */
1163 
1164 	/* If we cannot advance the 'purge view' because of an old
1165 	'consistent read view', then the DML statements cannot be delayed.
1166 	Also, srv_max_purge_lag <= 0 means 'infinity'. */
1167 	if (srv_max_purge_lag > 0
1168 	    && !UT_LIST_GET_LAST(trx_sys->view_list)) {
1169 		float	ratio = (float) trx_sys->rseg_history_len
1170 			/ srv_max_purge_lag;
1171 		if (ratio > ULINT_MAX / 10000) {
1172 			/* Avoid overflow: maximum delay is 4295 seconds */
1173 			srv_dml_needed_delay = ULINT_MAX;
1174 		} else if (ratio > 1) {
1175 			/* If the history list length exceeds the
1176 			innodb_max_purge_lag, the
1177 			data manipulation statements are delayed
1178 			by at least 5000 microseconds. */
1179 			srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
1180 		}
1181 	}
1182 
1183 	purge_sys->view = read_view_oldest_copy_or_open_new(
1184 		0, purge_sys->heap);
1185 
1186 	mutex_exit(&kernel_mutex);
1187 
1188 	rw_lock_x_unlock(&(purge_sys->latch));
1189 
1190 #ifdef UNIV_DEBUG
1191 	if (srv_purge_view_update_only_debug) {
1192 		return(0);
1193 	}
1194 #endif
1195 
1196 	purge_sys->state = TRX_PURGE_ON;
1197 
1198 	purge_sys->handle_limit = purge_sys->n_pages_handled + limit;
1199 
1200 	old_pages_handled = purge_sys->n_pages_handled;
1201 
1202 
1203 	mutex_enter(&kernel_mutex);
1204 
1205 	thr = que_fork_start_command(purge_sys->query);
1206 
1207 	ut_ad(thr);
1208 
1209 	mutex_exit(&kernel_mutex);
1210 
1211 	if (srv_print_thread_releases) {
1212 
1213 		fputs("Starting purge\n", stderr);
1214 	}
1215 
1216 	que_run_threads(thr);
1217 
1218 	if (srv_print_thread_releases) {
1219 
1220 		fprintf(stderr,
1221 			"Purge ends; pages handled %lu\n",
1222 			(ulong) purge_sys->n_pages_handled);
1223 	}
1224 
1225 	return((ulint) (purge_sys->n_pages_handled - old_pages_handled));
1226 }
1227 
1228 /******************************************************************//**
1229 Prints information of the purge system to stderr. */
1230 UNIV_INTERN
1231 void
trx_purge_sys_print(void)1232 trx_purge_sys_print(void)
1233 /*=====================*/
1234 {
1235 	fprintf(stderr, "InnoDB: Purge system view:\n");
1236 	read_view_print(purge_sys->view);
1237 
1238 	fprintf(stderr, "InnoDB: Purge trx n:o " TRX_ID_FMT
1239 		", undo n:o " TRX_ID_FMT "\n",
1240 		(ullint) purge_sys->purge_trx_no,
1241 		(ullint) purge_sys->purge_undo_no);
1242 	fprintf(stderr,
1243 		"InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
1244 		"InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
1245 		(ulong) purge_sys->next_stored,
1246 		(ulong) purge_sys->page_no,
1247 		(ulong) purge_sys->offset,
1248 		(ulong) purge_sys->hdr_page_no,
1249 		(ulong) purge_sys->hdr_offset);
1250 }
1251