1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file buf/buf0dump.cc
29 Implements a buffer pool dump/load.
30 
31 Created April 08, 2011 Vasil Dimov
32 *******************************************************/
33 
34 #include "univ.i"
35 
36 #include <stdarg.h> /* va_* */
37 #include <string.h> /* strerror() */
38 
39 #include "buf0buf.h" /* srv_buf_pool_instances */
40 #include "buf0dump.h"
41 #include "db0err.h"
42 #include "dict0dict.h" /* dict_operation_lock */
43 #include "os0file.h" /* OS_FILE_MAX_PATH */
44 #include "os0sync.h" /* os_event* */
45 #include "os0thread.h" /* os_thread_* */
46 #include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */
47 #include "srv0start.h" /* srv_shutdown_state */
48 #include "sync0rw.h" /* rw_lock_s_lock() */
49 #include "ut0byte.h" /* ut_ull_create() */
50 #include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
51 
52 enum status_severity {
53 	STATUS_INFO,
54 	STATUS_NOTICE,
55 	STATUS_ERR
56 };
57 
58 #define SHUTTING_DOWN()	(UNIV_UNLIKELY(srv_shutdown_state \
59 				       != SRV_SHUTDOWN_NONE))
60 
61 /* Flags that tell the buffer pool dump/load thread which action should it
62 take after being waked up. */
63 static ibool	buf_dump_should_start = FALSE;
64 static ibool	buf_load_should_start = FALSE;
65 
66 static ibool	buf_load_abort_flag = FALSE;
67 
68 /* Used to temporary store dump info in order to avoid IO while holding
69 buffer pool LRU list mutex during dump and also to sort the contents of the
70 dump before reading the pages from disk during load.
71 We store the space id in the high 32 bits and page no in low 32 bits. */
72 typedef ib_uint64_t	buf_dump_t;
73 
74 /* Aux macros to create buf_dump_t and to extract space and page from it */
75 #define BUF_DUMP_CREATE(space, page)	ut_ull_create(space, page)
76 #define BUF_DUMP_SPACE(a)		((ulint) ((a) >> 32))
77 #define BUF_DUMP_PAGE(a)		((ulint) ((a) & 0xFFFFFFFFUL))
78 
79 /*****************************************************************//**
80 Wakes up the buffer pool dump/load thread and instructs it to start
81 a dump. This function is called by MySQL code via buffer_pool_dump_now()
82 and it should return immediately because the whole MySQL is frozen during
83 its execution. */
84 UNIV_INTERN
85 void
buf_dump_start()86 buf_dump_start()
87 /*============*/
88 {
89 	buf_dump_should_start = TRUE;
90 	os_event_set(srv_buf_dump_event);
91 }
92 
93 /*****************************************************************//**
94 Wakes up the buffer pool dump/load thread and instructs it to start
95 a load. This function is called by MySQL code via buffer_pool_load_now()
96 and it should return immediately because the whole MySQL is frozen during
97 its execution. */
98 UNIV_INTERN
99 void
buf_load_start()100 buf_load_start()
101 /*============*/
102 {
103 	buf_load_should_start = TRUE;
104 	os_event_set(srv_buf_dump_event);
105 }
106 
107 /*****************************************************************//**
108 Sets the global variable that feeds MySQL's innodb_buffer_pool_dump_status
109 to the specified string. The format and the following parameters are the
110 same as the ones used for printf(3). The value of this variable can be
111 retrieved by:
112 SELECT variable_value FROM information_schema.global_status WHERE
113 variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS';
114 or by:
115 SHOW STATUS LIKE 'innodb_buffer_pool_dump_status'; */
116 static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
117 void
buf_dump_status(enum status_severity severity,const char * fmt,...)118 buf_dump_status(
119 /*============*/
120 	enum status_severity	severity,/*!< in: status severity */
121 	const char*		fmt,	/*!< in: format */
122 	...)				/*!< in: extra parameters according
123 					to fmt */
124 {
125 	va_list	ap;
126 
127 	va_start(ap, fmt);
128 
129 	ut_vsnprintf(
130 		export_vars.innodb_buffer_pool_dump_status,
131 		sizeof(export_vars.innodb_buffer_pool_dump_status),
132 		fmt, ap);
133 
134 	if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
135 		ut_print_timestamp(stderr);
136 		fprintf(stderr, " InnoDB: %s\n",
137 			export_vars.innodb_buffer_pool_dump_status);
138 	}
139 
140 	va_end(ap);
141 }
142 
143 /*****************************************************************//**
144 Sets the global variable that feeds MySQL's innodb_buffer_pool_load_status
145 to the specified string. The format and the following parameters are the
146 same as the ones used for printf(3). The value of this variable can be
147 retrieved by:
148 SELECT variable_value FROM information_schema.global_status WHERE
149 variable_name = 'INNODB_BUFFER_POOL_LOAD_STATUS';
150 or by:
151 SHOW STATUS LIKE 'innodb_buffer_pool_load_status'; */
152 static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
153 void
buf_load_status(enum status_severity severity,const char * fmt,...)154 buf_load_status(
155 /*============*/
156 	enum status_severity	severity,/*!< in: status severity */
157 	const char*	fmt,	/*!< in: format */
158 	...)			/*!< in: extra parameters according to fmt */
159 {
160 	va_list	ap;
161 
162 	va_start(ap, fmt);
163 
164 	ut_vsnprintf(
165 		export_vars.innodb_buffer_pool_load_status,
166 		sizeof(export_vars.innodb_buffer_pool_load_status),
167 		fmt, ap);
168 
169 	if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
170 		ut_print_timestamp(stderr);
171 		fprintf(stderr, " InnoDB: %s\n",
172 			export_vars.innodb_buffer_pool_load_status);
173 	}
174 
175 	va_end(ap);
176 }
177 
178 /** Returns the directory path where the buffer pool dump file will be created.
179 @return directory path */
180 static
181 const char*
get_buf_dump_dir()182 get_buf_dump_dir()
183 {
184 	const char*	dump_dir;
185 
186 	/* The dump file should be created in the default data directory if
187 	innodb_data_home_dir is set as an empty string. */
188 	if (strcmp(srv_data_home, "") == 0) {
189 		dump_dir = fil_path_to_mysql_datadir;
190 	} else {
191 		dump_dir = srv_data_home;
192 	}
193 
194 	return(dump_dir);
195 }
196 
197 /*****************************************************************//**
198 Perform a buffer pool dump into the file specified by
199 innodb_buffer_pool_filename. If any errors occur then the value of
200 innodb_buffer_pool_dump_status will be set accordingly, see buf_dump_status().
201 The dump filename can be specified by (relative to srv_data_home):
202 SET GLOBAL innodb_buffer_pool_filename='filename'; */
203 static
204 void
buf_dump(ibool obey_shutdown)205 buf_dump(
206 /*=====*/
207 	ibool	obey_shutdown)	/*!< in: quit if we are in a shutting down
208 				state */
209 {
210 #define SHOULD_QUIT()	(SHUTTING_DOWN() && obey_shutdown)
211 
212 	static const char format_name[]= "%s.incomplete";
213 	char	full_filename[OS_FILE_MAX_PATH];
214 	char	tmp_filename[OS_FILE_MAX_PATH + sizeof(format_name)];
215 	char	now[32];
216 	FILE*	f;
217 	ulint	i;
218 	int	ret;
219 
220 	ut_snprintf(full_filename, sizeof(full_filename),
221 		    "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
222 		    srv_buf_dump_filename);
223 
224 	ut_snprintf(tmp_filename, sizeof(tmp_filename),
225 		    format_name, full_filename);
226 
227 	buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s",
228 			full_filename);
229 
230 	f = fopen(tmp_filename, "w");
231 	if (f == NULL) {
232 		buf_dump_status(STATUS_ERR,
233 				"Cannot open '%s' for writing: %s",
234 				tmp_filename, strerror(errno));
235 		return;
236 	}
237 	/* else */
238 
239 	/* walk through each buffer pool */
240 	for (i = 0; i < srv_buf_pool_instances && !SHOULD_QUIT(); i++) {
241 		buf_pool_t*		buf_pool;
242 		const buf_page_t*	bpage;
243 		buf_dump_t*		dump;
244 		ulint			n_pages;
245 		ulint			j;
246 
247 		buf_pool = buf_pool_from_array(i);
248 
249 		/* obtain buf_pool LRU list mutex before allocate, since
250 		UT_LIST_GET_LEN(buf_pool->LRU) could change */
251 		mutex_enter(&buf_pool->LRU_list_mutex);
252 
253 		n_pages = UT_LIST_GET_LEN(buf_pool->LRU);
254 
255 		/* skip empty buffer pools */
256 		if (n_pages == 0) {
257 			mutex_exit(&buf_pool->LRU_list_mutex);
258 			continue;
259 		}
260 
261 		dump = static_cast<buf_dump_t*>(
262 			ut_malloc(n_pages * sizeof(*dump))) ;
263 
264 		if (dump == NULL) {
265 			mutex_exit(&buf_pool->LRU_list_mutex);
266 			fclose(f);
267 			buf_dump_status(STATUS_ERR,
268 					"Cannot allocate " ULINTPF " bytes: %s",
269 					(ulint) (n_pages * sizeof(*dump)),
270 					strerror(errno));
271 			/* leave tmp_filename to exist */
272 			return;
273 		}
274 
275 		for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), j = 0;
276 		     bpage != NULL;
277 		     bpage = UT_LIST_GET_PREV(LRU, bpage), j++) {
278 
279 			ut_a(buf_page_in_file(bpage));
280 
281 			dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage),
282 						  buf_page_get_page_no(bpage));
283 		}
284 
285 		ut_a(j == n_pages);
286 
287 		mutex_exit(&buf_pool->LRU_list_mutex);
288 
289 		for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) {
290 			ret = fprintf(f, ULINTPF "," ULINTPF "\n",
291 				      BUF_DUMP_SPACE(dump[j]),
292 				      BUF_DUMP_PAGE(dump[j]));
293 			if (ret < 0) {
294 				ut_free(dump);
295 				fclose(f);
296 				buf_dump_status(STATUS_ERR,
297 						"Cannot write to '%s': %s",
298 						tmp_filename, strerror(errno));
299 				/* leave tmp_filename to exist */
300 				return;
301 			}
302 
303 			if (j % 128 == 0) {
304 				buf_dump_status(
305 					STATUS_INFO,
306 					"Dumping buffer pool "
307 					ULINTPF "/" ULINTPF ", "
308 					"page " ULINTPF "/" ULINTPF,
309 					i + 1, srv_buf_pool_instances,
310 					j + 1, n_pages);
311 			}
312 		}
313 
314 		ut_free(dump);
315 	}
316 
317 	ret = fclose(f);
318 	if (ret != 0) {
319 		buf_dump_status(STATUS_ERR,
320 				"Cannot close '%s': %s",
321 				tmp_filename, strerror(errno));
322 		return;
323 	}
324 	/* else */
325 
326 	ret = unlink(full_filename);
327 	if (ret != 0 && errno != ENOENT) {
328 		buf_dump_status(STATUS_ERR,
329 				"Cannot delete '%s': %s",
330 				full_filename, strerror(errno));
331 		/* leave tmp_filename to exist */
332 		return;
333 	}
334 	/* else */
335 
336 	ret = rename(tmp_filename, full_filename);
337 	if (ret != 0) {
338 		buf_dump_status(STATUS_ERR,
339 				"Cannot rename '%s' to '%s': %s",
340 				tmp_filename, full_filename,
341 				strerror(errno));
342 		/* leave tmp_filename to exist */
343 		return;
344 	}
345 	/* else */
346 
347 	/* success */
348 
349 	ut_sprintf_timestamp(now);
350 
351 	buf_dump_status(STATUS_NOTICE,
352 			"Buffer pool(s) dump completed at %s", now);
353 }
354 
355 /*****************************************************************//**
356 Compare two buffer pool dump entries, used to sort the dump on
357 space_no,page_no before loading in order to increase the chance for
358 sequential IO.
359 @return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */
360 static
361 lint
buf_dump_cmp(const buf_dump_t d1,const buf_dump_t d2)362 buf_dump_cmp(
363 /*=========*/
364 	const buf_dump_t	d1,	/*!< in: buffer pool dump entry 1 */
365 	const buf_dump_t	d2)	/*!< in: buffer pool dump entry 2 */
366 {
367 	if (d1 < d2) {
368 		return(-1);
369 	} else if (d1 == d2) {
370 		return(0);
371 	} else {
372 		return(1);
373 	}
374 }
375 
376 /*****************************************************************//**
377 Sort a buffer pool dump on space_no, page_no. */
378 static
379 void
buf_dump_sort(buf_dump_t * dump,buf_dump_t * tmp,ulint low,ulint high)380 buf_dump_sort(
381 /*==========*/
382 	buf_dump_t*	dump,	/*!< in/out: buffer pool dump to sort */
383 	buf_dump_t*	tmp,	/*!< in/out: temp storage */
384 	ulint		low,	/*!< in: lowest index (inclusive) */
385 	ulint		high)	/*!< in: highest index (non-inclusive) */
386 {
387 	UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high,
388 			      buf_dump_cmp);
389 }
390 
391 /*****************************************************************//**
392 Perform a buffer pool load from the file specified by
393 innodb_buffer_pool_filename. If any errors occur then the value of
394 innodb_buffer_pool_load_status will be set accordingly, see buf_load_status().
395 The dump filename can be specified by (relative to srv_data_home):
396 SET GLOBAL innodb_buffer_pool_filename='filename'; */
397 static
398 void
buf_load()399 buf_load()
400 /*======*/
401 {
402 	char		full_filename[OS_FILE_MAX_PATH];
403 	char		now[32];
404 	FILE*		f;
405 	buf_dump_t*	dump;
406 	buf_dump_t*	dump_tmp;
407 	ulint		dump_n;
408 	ulint		total_buffer_pools_pages;
409 	ulint		i;
410 	ulint		space_id;
411 	ulint		page_no;
412 	int		fscanf_ret;
413 
414 	/* Ignore any leftovers from before */
415 	buf_load_abort_flag = FALSE;
416 
417 	ut_snprintf(full_filename, sizeof(full_filename),
418 		    "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
419 		    srv_buf_dump_filename);
420 
421 	buf_load_status(STATUS_NOTICE,
422 			"Loading buffer pool(s) from %s", full_filename);
423 
424 	f = fopen(full_filename, "r");
425 	if (f == NULL) {
426 		buf_load_status(STATUS_ERR,
427 				"Cannot open '%s' for reading: %s",
428 				full_filename, strerror(errno));
429 		return;
430 	}
431 	/* else */
432 
433 	/* First scan the file to estimate how many entries are in it.
434 	This file is tiny (approx 500KB per 1GB buffer pool), reading it
435 	two times is fine. */
436 	dump_n = 0;
437 	while (fscanf(f, ULINTPF "," ULINTPF, &space_id, &page_no) == 2
438 	       && !SHUTTING_DOWN()) {
439 		dump_n++;
440 	}
441 
442 	if (!SHUTTING_DOWN() && !feof(f)) {
443 		/* fscanf() returned != 2 */
444 		const char*	what;
445 		if (ferror(f)) {
446 			what = "reading";
447 		} else {
448 			what = "parsing";
449 		}
450 		fclose(f);
451 		buf_load_status(STATUS_ERR, "Error %s '%s', "
452 				"unable to load buffer pool (stage 1)",
453 				what, full_filename);
454 		return;
455 	}
456 
457 	/* If dump is larger than the buffer pool(s), then we ignore the
458 	extra trailing. This could happen if a dump is made, then buffer
459 	pool is shrunk and then load it attempted. */
460 	total_buffer_pools_pages = buf_pool_get_n_pages()
461 		* srv_buf_pool_instances;
462 	if (dump_n > total_buffer_pools_pages) {
463 		dump_n = total_buffer_pools_pages;
464 	}
465 
466 	dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump)));
467 
468 	if (dump == NULL) {
469 		fclose(f);
470 		buf_load_status(STATUS_ERR,
471 				"Cannot allocate " ULINTPF " bytes: %s",
472 				(ulint) (dump_n * sizeof(*dump)),
473 				strerror(errno));
474 		return;
475 	}
476 
477 	dump_tmp = static_cast<buf_dump_t*>(
478 		ut_malloc(dump_n * sizeof(*dump_tmp)));
479 
480 	if (dump_tmp == NULL) {
481 		ut_free(dump);
482 		fclose(f);
483 		buf_load_status(STATUS_ERR,
484 				"Cannot allocate " ULINTPF " bytes: %s",
485 				(ulint) (dump_n * sizeof(*dump_tmp)),
486 				strerror(errno));
487 		return;
488 	}
489 
490 	rewind(f);
491 
492 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
493 		fscanf_ret = fscanf(f, ULINTPF "," ULINTPF,
494 				    &space_id, &page_no);
495 
496 		if (fscanf_ret != 2) {
497 			if (feof(f)) {
498 				break;
499 			}
500 			/* else */
501 
502 			ut_free(dump);
503 			ut_free(dump_tmp);
504 			fclose(f);
505 			buf_load_status(STATUS_ERR,
506 					"Error parsing '%s', unable "
507 					"to load buffer pool (stage 2)",
508 					full_filename);
509 			return;
510 		}
511 
512 		if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
513 			ut_free(dump);
514 			ut_free(dump_tmp);
515 			fclose(f);
516 			buf_load_status(STATUS_ERR,
517 					"Error parsing '%s': bogus "
518 					"space,page " ULINTPF "," ULINTPF
519 					" at line " ULINTPF ", "
520 					"unable to load buffer pool",
521 					full_filename,
522 					space_id, page_no,
523 					i);
524 			return;
525 		}
526 
527 		dump[i] = BUF_DUMP_CREATE(space_id, page_no);
528 	}
529 
530 	/* Set dump_n to the actual number of initialized elements,
531 	i could be smaller than dump_n here if the file got truncated after
532 	we read it the first time. */
533 	dump_n = i;
534 
535 	fclose(f);
536 
537 	if (dump_n == 0) {
538 		ut_free(dump);
539 		ut_sprintf_timestamp(now);
540 		buf_load_status(STATUS_NOTICE,
541 				"Buffer pool(s) load completed at %s "
542 				"(%s was empty)", now, full_filename);
543 		return;
544 	}
545 
546 	if (!SHUTTING_DOWN()) {
547 		buf_dump_sort(dump, dump_tmp, 0, dump_n);
548 	}
549 
550 	ut_free(dump_tmp);
551 
552 	for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
553 
554 		buf_read_page_async(BUF_DUMP_SPACE(dump[i]),
555 				    BUF_DUMP_PAGE(dump[i]));
556 
557 		if (i % 64 == 63) {
558 			os_aio_simulated_wake_handler_threads();
559 		}
560 
561 		if (i % 128 == 0) {
562 			buf_load_status(STATUS_INFO,
563 					"Loaded " ULINTPF "/" ULINTPF " pages",
564 					i + 1, dump_n);
565 		}
566 
567 		if (buf_load_abort_flag) {
568 			buf_load_abort_flag = FALSE;
569 			ut_free(dump);
570 			buf_load_status(
571 				STATUS_NOTICE,
572 				"Buffer pool(s) load aborted on request");
573 			return;
574 		}
575 	}
576 
577 	ut_free(dump);
578 
579 	ut_sprintf_timestamp(now);
580 
581 	buf_load_status(STATUS_NOTICE,
582 			"Buffer pool(s) load completed at %s", now);
583 }
584 
585 /*****************************************************************//**
586 Aborts a currently running buffer pool load. This function is called by
587 MySQL code via buffer_pool_load_abort() and it should return immediately
588 because the whole MySQL is frozen during its execution. */
589 UNIV_INTERN
590 void
buf_load_abort()591 buf_load_abort()
592 /*============*/
593 {
594 	buf_load_abort_flag = TRUE;
595 }
596 
597 /*****************************************************************//**
598 This is the main thread for buffer pool dump/load. It waits for an
599 event and when waked up either performs a dump or load and sleeps
600 again.
601 @return this function does not return, it calls os_thread_exit() */
602 extern "C" UNIV_INTERN
603 os_thread_ret_t
DECLARE_THREAD(buf_dump_thread)604 DECLARE_THREAD(buf_dump_thread)(
605 /*============================*/
606 	void*	arg MY_ATTRIBUTE((unused)))	/*!< in: a dummy parameter
607 						required by os_thread_create */
608 {
609 	my_thread_init();
610 	ut_ad(!srv_read_only_mode);
611 
612 	srv_buf_dump_thread_active = TRUE;
613 
614 	buf_dump_status(STATUS_INFO, "not started");
615 	buf_load_status(STATUS_INFO, "not started");
616 
617 	if (srv_buffer_pool_load_at_startup) {
618 		buf_load();
619 	}
620 
621 	while (!SHUTTING_DOWN()) {
622 
623 		os_event_wait(srv_buf_dump_event);
624 
625 		if (buf_dump_should_start) {
626 			buf_dump_should_start = FALSE;
627 			buf_dump(TRUE /* quit on shutdown */);
628 		}
629 
630 		if (buf_load_should_start) {
631 			buf_load_should_start = FALSE;
632 			buf_load();
633 		}
634 
635 		os_event_reset(srv_buf_dump_event);
636 	}
637 
638 	if (srv_buffer_pool_dump_at_shutdown && srv_fast_shutdown != 2) {
639 		buf_dump(FALSE /* ignore shutdown down flag,
640 		keep going even if we are in a shutdown state */);
641 	}
642 
643 	srv_buf_dump_thread_active = FALSE;
644 
645 	my_thread_end();
646 	/* We count the number of threads in os_thread_exit(). A created
647 	thread should always use that to exit and not use return() to exit. */
648 	os_thread_exit(NULL);
649 
650 	OS_THREAD_DUMMY_RETURN;
651 }
652