1 /*****************************************************************************
2 
3 Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fsp/fsp0space.cc
29 Multi file, shared, system tablespace implementation.
30 
31 Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
32 Refactored 2013-7-26 by Kevin Lewis
33 *******************************************************/
34 
35 #include "ha_prototypes.h"
36 
37 #include "fsp0sysspace.h"
38 #include "srv0start.h"
39 #include "trx0sys.h"
40 #ifndef UNIV_HOTBACKUP
41 #include "dict0load.h"
42 #include "mem0mem.h"
43 #include "os0file.h"
44 #include "row0mysql.h"
45 #include "ut0new.h"
46 
47 /** The server header file is included to access opt_initialize global variable.
48 If server passes the option for create/open DB to SE, we should remove such
49 direct reference to server header and global variable */
50 #include "mysqld.h"
51 #else
52 my_bool opt_initialize = 0;
53 #endif /* !UNIV_HOTBACKUP */
54 
55 /** The control info of the system tablespace. */
56 SysTablespace srv_sys_space;
57 
58 /** The control info of a temporary table shared tablespace. */
59 SysTablespace srv_tmp_space;
60 
61 /** If the last data file is auto-extended, we add this many pages to it
62 at a time. We have to make this public because it is a config variable. */
63 ulong sys_tablespace_auto_extend_increment;
64 
65 #ifdef UNIV_DEBUG
66 /** Control if extra debug checks need to be done for temporary tablespace.
67 Default = true that is disable such checks.
68 This variable is not exposed to end-user but still kept as variable for
69 developer to enable it during debug. */
70 bool srv_skip_temp_table_checks_debug = true;
71 #endif /* UNIV_DEBUG */
72 
73 /** Convert a numeric string that optionally ends in G or M or K,
74     to a number containing megabytes.
75 @param[in]	str	String with a quantity in bytes
76 @param[out]	megs	The number in megabytes
77 @return next character in string */
78 char*
parse_units(char * ptr,ulint * megs)79 SysTablespace::parse_units(
80 	char*	ptr,
81 	ulint*	megs)
82 {
83 	char*		endp;
84 
85 	*megs = strtoul(ptr, &endp, 10);
86 
87 	ptr = endp;
88 
89 	switch (*ptr) {
90 	case 'G': case 'g':
91 		*megs *= 1024;
92 		/* fall through */
93 	case 'M': case 'm':
94 		++ptr;
95 		break;
96 	case 'K': case 'k':
97 		*megs /= 1024;
98 		++ptr;
99 		break;
100 	default:
101 		*megs /= 1024 * 1024;
102 		break;
103 	}
104 
105 	return(ptr);
106 }
107 
108 /** Parse the input params and populate member variables.
109 @param[in]	filepath	path to data files
110 @param[in]	supports_raw	true if the tablespace supports raw devices
111 @return true on success parse */
112 bool
parse_params(const char * filepath_spec,bool supports_raw,bool filenames_only)113 SysTablespace::parse_params(
114 	const char*	filepath_spec,
115 	bool		supports_raw,
116 	bool		filenames_only)
117 {
118 	char*	filepath;
119 	ulint	size;
120 	char*	input_str;
121 	ulint	n_files = 0;
122 
123 	ut_ad(m_last_file_size_max == 0);
124 	ut_ad(!m_auto_extend_last_file);
125 
126 	char*	new_str = mem_strdup(filepath_spec);
127 	char*	str = new_str;
128 
129 	input_str = str;
130 
131 	/*---------------------- PASS 1 ---------------------------*/
132 	/* First calculate the number of data files and check syntax:
133 	filepath:size[K |M | G];filepath:size[K |M | G]... .
134 	Note that a Windows path may contain a drive name and a ':'. */
135 	while (*str != '\0') {
136 		filepath = str;
137 
138 		while ((*str != ':' && *str != '\0')
139 		       || (*str == ':'
140 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
141 			       || *(str + 1) == ':'))) {
142 			str++;
143 		}
144 
145 		if (*str == '\0') {
146 			ut_free(new_str);
147 
148 			ib::error()
149 				<< "syntax error in file path or size"
150 				" specified is less than 1 megabyte";
151 			return(false);
152 		}
153 
154 		str++;
155 
156 		str = parse_units(str, &size);
157 
158 		if (0 == strncmp(str, ":autoextend",
159 				 (sizeof ":autoextend") - 1)) {
160 
161 			str += (sizeof ":autoextend") - 1;
162 
163 			if (0 == strncmp(str, ":max:",
164 					 (sizeof ":max:") - 1)) {
165 
166 				str += (sizeof ":max:") - 1;
167 
168 				str = parse_units(str, &size);
169 			}
170 
171 			if (*str != '\0') {
172 				ut_free(new_str);
173 				ib::error()
174 					<< "syntax error in file path or"
175 					<< " size specified is less than"
176 					<< " 1 megabyte";
177 				return(false);
178 			}
179 		}
180 
181 		if (::strlen(str) >= 6
182 		    && *str == 'n'
183 		    && *(str + 1) == 'e'
184 		    && *(str + 2) == 'w') {
185 
186 			if (!supports_raw) {
187 				ib::error()
188 					<< "Tablespace doesn't support raw"
189 					" devices";
190 				ut_free(new_str);
191 				return(false);
192 			}
193 
194 			str += 3;
195 		}
196 
197 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
198 			str += 3;
199 
200 			if (!supports_raw) {
201 				ib::error()
202 					<< "Tablespace doesn't support raw"
203 					" devices";
204 				ut_free(new_str);
205 				return(false);
206 			}
207 		}
208 
209 		if (size == 0) {
210 
211 			ut_free(new_str);
212 
213 			ib::error()
214 				<< "syntax error in file path or size"
215 				" specified is less than 1 megabyte";
216 
217 			return(false);
218 		}
219 
220 		++n_files;
221 
222 		if (*str == ';') {
223 			str++;
224 		} else if (*str != '\0') {
225 			ut_free(new_str);
226 
227 			ib::error()
228 				<< "syntax error in file path or size"
229 				" specified is less than 1 megabyte";
230 			return(false);
231 		}
232 	}
233 
234 	if (n_files == 0) {
235 
236 		/* filepath_spec must contain at least one data file
237 		definition */
238 
239 		ut_free(new_str);
240 
241 		ib::error()
242 			<< "syntax error in file path or size specified"
243 			" is less than 1 megabyte";
244 
245 		return(false);
246 	}
247 
248 	/*---------------------- PASS 2 ---------------------------*/
249 	/* Then store the actual values to our arrays */
250 	str = input_str;
251 	ulint order = 0;
252 
253 	while (*str != '\0') {
254 		filepath = str;
255 
256 		/* XtraBackup needs only file names on prepare */
257 		if (filenames_only) {
258 			char*	p;
259 
260 			for (p = str; *p && *p != ';'; p++) {
261 				if (*p == OS_PATH_SEPARATOR) {
262 					str = filepath = p + 1;
263 				}
264 			}
265 		}
266 
267 		/* Note that we must step over the ':' in a Windows filepath;
268 		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
269 		a Windows raw partition may have a specification like
270 		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
271 
272 		while ((*str != ':' && *str != '\0')
273 		       || (*str == ':'
274 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
275 			       || *(str + 1) == ':'))) {
276 			str++;
277 		}
278 
279 		if (*str == ':') {
280 			/* Make filepath a null-terminated string */
281 			*str = '\0';
282 			str++;
283 		}
284 
285 		str = parse_units(str, &size);
286 
287 		if (0 == strncmp(str, ":autoextend",
288 				 (sizeof ":autoextend") - 1)) {
289 
290 			m_auto_extend_last_file = true;
291 
292 			str += (sizeof ":autoextend") - 1;
293 
294 			if (0 == strncmp(str, ":max:",
295 					 (sizeof ":max:") - 1)) {
296 
297 				str += (sizeof ":max:") - 1;
298 
299 				str = parse_units(str, &m_last_file_size_max);
300 			}
301 
302 			if (*str != '\0') {
303 				ut_free(new_str);
304 				ib::error() << "syntax error in file path or"
305 					" size specified is less than 1"
306 					" megabyte";
307 				return(false);
308 			}
309 		}
310 
311 		m_files.push_back(Datafile(filepath, flags(), size, order));
312 		Datafile* datafile = &m_files.back();
313 		datafile->make_filepath(path(), filepath, NO_EXT);
314 
315 		if (::strlen(str) >= 6
316 		    && *str == 'n'
317 		    && *(str + 1) == 'e'
318 		    && *(str + 2) == 'w') {
319 
320 			ut_a(supports_raw);
321 
322 			str += 3;
323 
324 			/* Initialize new raw device only during initialize */
325 			m_files.back().m_type =
326 			opt_initialize ? SRV_NEW_RAW : SRV_OLD_RAW;
327 		}
328 
329 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
330 
331 			ut_a(supports_raw);
332 
333 			str += 3;
334 
335 			/* Initialize new raw device only during initialize */
336 			if (m_files.back().m_type == SRV_NOT_RAW) {
337 				m_files.back().m_type =
338 				opt_initialize ? SRV_NEW_RAW : SRV_OLD_RAW;
339 			}
340 		}
341 
342 		if (*str == ';') {
343 			++str;
344 		}
345 		order++;
346 	}
347 
348 	ut_ad(n_files == ulint(m_files.size()));
349 
350 	ut_free(new_str);
351 
352 	return(true);
353 }
354 
355 /** Frees the memory allocated by the parse method. */
356 void
shutdown()357 SysTablespace::shutdown()
358 {
359 	Tablespace::shutdown();
360 
361 	m_auto_extend_last_file = 0;
362 	m_last_file_size_max = 0;
363 	m_created_new_raw = 0;
364 	m_is_tablespace_full = false;
365 	m_sanity_checks_done = false;
366 }
367 
368 /** Verify the size of the physical file.
369 @param[in]	file	data file object
370 @return DB_SUCCESS if OK else error code. */
371 dberr_t
check_size(Datafile & file)372 SysTablespace::check_size(
373 	Datafile&	file)
374 {
375 	os_offset_t	size = os_file_get_size(file.m_handle);
376 	ut_a(size != (os_offset_t) -1);
377 
378 	/* Under some error conditions like disk full scenarios
379 	or file size reaching filesystem limit the data file
380 	could contain an incomplete extent at the end. When we
381 	extend a data file and if some failure happens, then
382 	also the data file could contain an incomplete extent.
383 	So we need to round the size downward to a  megabyte.*/
384 
385 	ulint	rounded_size_pages = get_pages_from_size(size);
386 
387 	/* If last file */
388 	if (&file == &m_files.back() && m_auto_extend_last_file) {
389 
390 		if (file.m_size > rounded_size_pages
391 		    || (m_last_file_size_max > 0
392 			&& m_last_file_size_max < rounded_size_pages)) {
393 			ib::error() << "The Auto-extending " << name()
394 				<< " data file '" << file.filepath() << "' is"
395 				" of a different size " << rounded_size_pages
396 				<< " pages (rounded down to MB) than specified"
397 				" in the .cnf file: initial " << file.m_size
398 				<< " pages, max " << m_last_file_size_max
399 				<< " (relevant if non-zero) pages!";
400 			return(DB_ERROR);
401 		}
402 
403 		file.m_size = rounded_size_pages;
404 	}
405 
406 	if (rounded_size_pages != file.m_size) {
407 		ib::error() << "The " << name() << " data file '"
408 			<< file.filepath() << "' is of a different size "
409 			<< rounded_size_pages << " pages (rounded down to MB)"
410 			" than the " << file.m_size << " pages specified in"
411 			" the .cnf file!";
412 		return(DB_ERROR);
413 	}
414 
415 	return(DB_SUCCESS);
416 }
417 
418 /** Set the size of the file.
419 @param[in]	file	data file object
420 @return DB_SUCCESS or error code */
421 dberr_t
set_size(Datafile & file)422 SysTablespace::set_size(
423 	Datafile&	file)
424 {
425 	ut_a(!srv_read_only_mode || m_ignore_read_only);
426 
427 	/* We created the data file and now write it full of zeros */
428 	ib::info() << "Setting file '" << file.filepath() << "' size to "
429 		<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB."
430 		" Physically writing the file full; Please wait ...";
431 
432 	bool	success = os_file_set_size(
433 		file.m_filepath, file.m_handle,
434 		static_cast<os_offset_t>(file.m_size << UNIV_PAGE_SIZE_SHIFT),
435 		m_ignore_read_only ? false : srv_read_only_mode);
436 
437 	if (success) {
438 		ib::info() << "File '" << file.filepath() << "' size is now "
439 			<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
440 			<< " MB.";
441 	} else {
442 		ib::error() << "Could not set the file size of '"
443 			<< file.filepath() << "'. Probably out of disk space";
444 
445 		return(DB_ERROR);
446 	}
447 
448 	return(DB_SUCCESS);
449 }
450 
451 /** Create a data file.
452 @param[in]	file	data file object
453 @return DB_SUCCESS or error code */
454 dberr_t
create_file(Datafile & file)455 SysTablespace::create_file(
456 	Datafile&	file)
457 {
458 	dberr_t	err = DB_SUCCESS;
459 
460 	ut_a(!file.m_exists);
461 	ut_a(!srv_read_only_mode || m_ignore_read_only);
462 
463 	switch (file.m_type) {
464 	case SRV_NEW_RAW:
465 
466 		/* The partition is opened, not created; then it is
467 		written over */
468 		m_created_new_raw = true;
469 
470 		/* Fall through. */
471 
472 	case SRV_OLD_RAW:
473 
474 		srv_start_raw_disk_in_use = TRUE;
475 
476 		/* Fall through. */
477 
478 	case SRV_NOT_RAW:
479 		err = file.open_or_create(
480 			m_ignore_read_only ? false : srv_read_only_mode);
481 		break;
482 	}
483 
484 
485 	if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
486 		err = set_size(file);
487 	}
488 
489 	return(err);
490 }
491 
492 /** Open a data file.
493 @param[in]	file	data file object
494 @return DB_SUCCESS or error code */
495 dberr_t
open_file(Datafile & file)496 SysTablespace::open_file(
497 	Datafile&	file)
498 {
499 	dberr_t	err = DB_SUCCESS;
500 
501 	ut_a(file.m_exists);
502 
503 	switch (file.m_type) {
504 	case SRV_NEW_RAW:
505 		/* The partition is opened, not created; then it is
506 		written over */
507 		m_created_new_raw = true;
508 
509 		/* Fall through */
510 
511 	case SRV_OLD_RAW:
512 		srv_start_raw_disk_in_use = TRUE;
513 
514 		if (srv_read_only_mode && !m_ignore_read_only) {
515 			ib::error() << "Can't open a raw device '"
516 				<< file.m_filepath << "' when"
517 				" --innodb-read-only is set";
518 
519 			return(DB_ERROR);
520 		}
521 
522 		/* Fall through */
523 
524 	case SRV_NOT_RAW:
525 		err = file.open_or_create(
526 			m_ignore_read_only ? false : srv_read_only_mode);
527 
528 		if (err != DB_SUCCESS) {
529 			return(err);
530 		}
531 		break;
532 	}
533 
534 	switch (file.m_type) {
535 	case SRV_NEW_RAW:
536 		/* Set file size for new raw device. */
537 		err = set_size(file);
538 		break;
539 
540 	case SRV_NOT_RAW:
541 		/* Check file size for existing file. */
542 		err = check_size(file);
543 		break;
544 
545 	case SRV_OLD_RAW:
546 		err = DB_SUCCESS;
547 		break;
548 
549 	}
550 
551 	if (err != DB_SUCCESS) {
552 		file.close();
553 	}
554 
555 	return(err);
556 }
557 
558 #ifndef UNIV_HOTBACKUP
559 /** Check the tablespace header for this tablespace.
560 @param[out]	flushed_lsn	the value of FIL_PAGE_FILE_FLUSH_LSN
561 @return DB_SUCCESS or error code */
562 dberr_t
read_lsn_and_check_flags(lsn_t * flushed_lsn)563 SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
564 {
565 	dberr_t	err;
566 
567 	/* Only relevant for the system tablespace. */
568 	ut_ad(space_id() == TRX_SYS_SPACE);
569 
570 	files_t::iterator it = m_files.begin();
571 
572 	ut_a(it->m_exists);
573 
574 	if (it->m_handle.m_file == OS_FILE_CLOSED) {
575 
576 		err = it->open_or_create(
577 			m_ignore_read_only ?  false : srv_read_only_mode);
578 
579 		if (err != DB_SUCCESS) {
580 			return(err);
581 		}
582 	}
583 
584 	err = it->read_first_page(
585 		m_ignore_read_only ?  false : srv_read_only_mode);
586 
587 	if (err != DB_SUCCESS) {
588 		return(err);
589 	}
590 
591 	ut_a(it->order() == 0);
592 
593 	/* XtraBackup never loads corrupted pages from
594 	the doublewrite buffer */
595 	buf_dblwr_init_or_load_pages(it->handle(), it->filepath(), false);
596 
597 	/* Check the contents of the first page of the
598 	first datafile. */
599 	for (int retry = 0; retry < 2; ++retry) {
600 
601 		err = it->validate_first_page(flushed_lsn, false);
602 
603 		if (err != DB_SUCCESS
604 		    && (retry == 1
605 			/* never restore from doublewrite */
606 #if 0
607 			|| it->restore_from_doublewrite(0) != DB_SUCCESS
608 #endif
609 		    )) {
610 
611 			it->close();
612 
613 			return(err);
614 		}
615 	}
616 
617 	/* Make sure the tablespace space ID matches the
618 	space ID on the first page of the first datafile. */
619 	if (space_id() != it->m_space_id) {
620 
621 		ib::error()
622 			<< "The " << name() << " data file '" << it->name()
623 			<< "' has the wrong space ID. It should be "
624 			<< space_id() << ", but " << it->m_space_id
625 			<< " was found";
626 
627 		it->close();
628 
629 		return(err);
630 	}
631 
632 	it->close();
633 
634 	return(DB_SUCCESS);
635 }
636 #endif /* !UNIV_HOTBACKUP */
637 /** Check if a file can be opened in the correct mode.
638 @param[in]	file	data file object
639 @param[out]	reason	exact reason if file_status check failed.
640 @return DB_SUCCESS or error code. */
641 dberr_t
check_file_status(const Datafile & file,file_status_t & reason)642 SysTablespace::check_file_status(
643 	const Datafile&		file,
644 	file_status_t&		reason)
645 {
646 	os_file_stat_t	stat;
647 
648 	memset(&stat, 0x0, sizeof(stat));
649 
650 	dberr_t	err = os_file_get_status(
651 		file.m_filepath, &stat, true,
652 		m_ignore_read_only ? false : srv_read_only_mode);
653 
654 	reason = FILE_STATUS_VOID;
655 	/* File exists but we can't read the rw-permission settings. */
656 	switch (err) {
657 	case DB_FAIL:
658 		ib::error() << "os_file_get_status() failed on '"
659 			<< file.filepath()
660 			<< "'. Can't determine file permissions";
661 		err = DB_ERROR;
662 		reason = FILE_STATUS_RW_PERMISSION_ERROR;
663 		break;
664 
665 	case DB_SUCCESS:
666 
667 		/* Note: stat.rw_perm is only valid for "regular" files */
668 
669 		if (stat.type == OS_FILE_TYPE_FILE) {
670 
671 			if (!stat.rw_perm) {
672 				const char	*p = (!srv_read_only_mode
673 						      || m_ignore_read_only)
674 						     ? "writable"
675 						     : "readable";
676 
677 				ib::error() << "The " << name() << " data file"
678 					<< " '" << file.name() << "' must be "
679 					<< p;
680 
681 				err = DB_ERROR;
682 				reason = FILE_STATUS_READ_WRITE_ERROR;
683 			}
684 
685 		} else {
686 			/* Not a regular file, bail out. */
687 			ib::error() << "The " << name() << " data file '"
688 				<< file.name() << "' is not a regular"
689 				" InnoDB data file.";
690 
691 			err = DB_ERROR;
692 			reason = FILE_STATUS_NOT_REGULAR_FILE_ERROR;
693 		}
694 		break;
695 
696 	case DB_NOT_FOUND:
697 		break;
698 
699 	default:
700 		ut_ad(0);
701 	}
702 
703 	return(err);
704 }
705 
706 /** Note that the data file was not found.
707 @param[in]	file		data file object
708 @param[out]	create_new_db	true if a new instance to be created
709 @return DB_SUCESS or error code */
710 dberr_t
file_not_found(Datafile & file,bool * create_new_db)711 SysTablespace::file_not_found(
712 	Datafile&	file,
713 	bool*	create_new_db)
714 {
715 	file.m_exists = false;
716 
717 	if (srv_read_only_mode && !m_ignore_read_only) {
718 		ib::error() << "Can't create file '" << file.filepath()
719 			<< "' when --innodb-read-only is set";
720 
721 		return(DB_ERROR);
722 
723 	} else if (&file == &m_files.front()) {
724 
725 		/* First data file. */
726 		ut_a(!*create_new_db);
727 		*create_new_db = TRUE;
728 
729 		if (space_id() == TRX_SYS_SPACE) {
730 			ib::info() << "The first " << name() << " data file '"
731 				<< file.name() << "' did not exist."
732 				" A new tablespace will be created!";
733 		}
734 
735 	} else {
736 		ib::info() << "Need to create a new " << name()
737 			<< " data file '" << file.name() << "'.";
738 	}
739 
740 	/* Set the file create mode. */
741 	switch (file.m_type) {
742 	case SRV_NOT_RAW:
743 		file.set_open_flags(OS_FILE_CREATE);
744 		break;
745 
746 	case SRV_NEW_RAW:
747 	case SRV_OLD_RAW:
748 		file.set_open_flags(OS_FILE_OPEN_RAW);
749 		break;
750 	}
751 
752 	return(DB_SUCCESS);
753 }
754 
755 /** Note that the data file was found.
756 @param[in,out]	file	data file object
757 @return true if a new instance to be created */
758 bool
file_found(Datafile & file)759 SysTablespace::file_found(
760 	Datafile&	file)
761 {
762 	/* Note that the file exists and can be opened
763 	in the appropriate mode. */
764 	file.m_exists = true;
765 
766 	/* Set the file open mode */
767 	switch (file.m_type) {
768 	case SRV_NOT_RAW:
769 		file.set_open_flags(
770 			&file == &m_files.front()
771 			? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
772 		break;
773 
774 	case SRV_NEW_RAW:
775 	case SRV_OLD_RAW:
776 		file.set_open_flags(OS_FILE_OPEN_RAW);
777 		break;
778 	}
779 
780 	/* Need to create the system tablespace for new raw device. */
781 	return(file.m_type == SRV_NEW_RAW);
782 }
783 #ifndef UNIV_HOTBACKUP
784 /** Check the data file specification.
785 @param[out] create_new_db	true if a new database is to be created
786 @param[in] min_expected_size	Minimum expected tablespace size in bytes
787 @return DB_SUCCESS if all OK else error code */
788 dberr_t
check_file_spec(bool * create_new_db,ulint min_expected_size)789 SysTablespace::check_file_spec(
790 	bool*	create_new_db,
791 	ulint	min_expected_size)
792 {
793 	*create_new_db = FALSE;
794 
795 	if (m_files.size() >= 1000) {
796 		ib::error() << "There must be < 1000 data files in "
797 			<< name() << " but " << m_files.size() << " have been"
798 			" defined.";
799 
800 		return(DB_ERROR);
801 	}
802 
803 	if (get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
804 
805 		ib::error() << "Tablespace size must be at least "
806 			<< min_expected_size / (1024 * 1024) << " MB";
807 
808 		return(DB_ERROR);
809 	}
810 
811 	dberr_t	err = DB_SUCCESS;
812 
813 	ut_a(!m_files.empty());
814 
815 	/* If there is more than one data file and the last data file
816 	doesn't exist, that is OK. We allow adding of new data files. */
817 
818 	files_t::iterator	begin = m_files.begin();
819 	files_t::iterator	end = m_files.end();
820 
821 	for (files_t::iterator it = begin; it != end; ++it) {
822 
823 		file_status_t reason_if_failed;
824 		err = check_file_status(*it, reason_if_failed);
825 
826 		if (err == DB_NOT_FOUND) {
827 
828 			err = file_not_found(*it, create_new_db);
829 
830 			if (err != DB_SUCCESS) {
831 				break;
832 			}
833 
834 		} else if (err != DB_SUCCESS) {
835 			if (reason_if_failed == FILE_STATUS_READ_WRITE_ERROR) {
836 				const char*	p = (!srv_read_only_mode
837 						     || m_ignore_read_only)
838 						    ? "writable" : "readable";
839 				ib::error() << "The " << name() << " data file"
840 					<< " '" << it->name() << "' must be "
841 					<< p;
842 			}
843 
844 			ut_a(err != DB_FAIL);
845 			break;
846 
847 		} else if (*create_new_db) {
848 			ib::error() << "The " << name() << " data file '"
849 				<< begin->m_name << "' was not found but"
850 				" one of the other data files '" << it->m_name
851 				<< "' exists.";
852 
853 			err = DB_ERROR;
854 			break;
855 
856 		} else {
857 			*create_new_db = file_found(*it);
858 		}
859 	}
860 
861 	/* We assume doublewirte blocks in the first data file. */
862 	if (err == DB_SUCCESS && *create_new_db
863 	    && begin->m_size < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3) {
864 		ib::error() << "The " << name() << " data file "
865 			<< "'" << begin->name() << "' must be at least "
866 			<< TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3 * UNIV_PAGE_SIZE
867 			/ (1024 * 1024) << " MB";
868 
869 		err = DB_ERROR;
870 	}
871 
872 	return(err);
873 }
874 
875 /** Open or create the data files
876 @param[in]  is_temp		whether this is a temporary tablespace
877 @param[in]  create_new_db	whether we are creating a new database
878 @param[out] sum_new_sizes	sum of sizes of the new files added
879 @param[out] flush_lsn		FIL_PAGE_FILE_FLUSH_LSN of first file
880 @return DB_SUCCESS or error code */
881 dberr_t
open_or_create(bool is_temp,bool create_new_db,ulint * sum_new_sizes,lsn_t * flush_lsn)882 SysTablespace::open_or_create(
883 	bool	is_temp,
884 	bool	create_new_db,
885 	ulint*	sum_new_sizes,
886 	lsn_t*	flush_lsn)
887 {
888 	dberr_t		err	= DB_SUCCESS;
889 	fil_space_t*	space	= NULL;
890 
891 	ut_ad(!m_files.empty());
892 
893 	if (sum_new_sizes) {
894 		*sum_new_sizes = 0;
895 	}
896 
897 	files_t::iterator	begin = m_files.begin();
898 	files_t::iterator	end = m_files.end();
899 
900 	ut_ad(begin->order() == 0);
901 
902 	for (files_t::iterator it = begin; it != end; ++it) {
903 
904 		if (it->m_exists) {
905 			err = open_file(*it);
906 
907 			/* For new raw device increment new size. */
908 			if (sum_new_sizes && it->m_type == SRV_NEW_RAW) {
909 
910 				*sum_new_sizes += it->m_size;
911 			}
912 
913 		} else {
914 			err = create_file(*it);
915 
916 			if (sum_new_sizes) {
917 				*sum_new_sizes += it->m_size;
918 			}
919 
920 			/* Set the correct open flags now that we have
921 			successfully created the file. */
922 			if (err == DB_SUCCESS) {
923 				/* We ignore new_db OUT parameter here
924 				as the information is known at this stage */
925 				file_found(*it);
926 			}
927 		}
928 
929 		if (err != DB_SUCCESS) {
930 			return(err);
931 		}
932 
933 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
934 		/* Note: This should really be per node and not per
935 		tablespace because a tablespace can contain multiple
936 		files (nodes). The implication is that all files of
937 		the tablespace should be on the same medium. */
938 
939 		if (fil_fusionio_enable_atomic_write(it->m_handle)) {
940 
941 			if (srv_use_doublewrite_buf) {
942 				ib::info() << "FusionIO atomic IO enabled,"
943 					" disabling the double write buffer";
944 
945 				srv_use_doublewrite_buf = false;
946 			}
947 
948 			it->m_atomic_write = true;
949 		} else {
950 			it->m_atomic_write = false;
951 		}
952 #else
953 		it->m_atomic_write = false;
954 #endif /* !NO_FALLOCATE && UNIV_LINUX*/
955 	}
956 
957 	if (!create_new_db && flush_lsn) {
958 		/* Validate the header page in the first datafile
959 		and read LSNs fom the others. */
960 		err = read_lsn_and_check_flags(flush_lsn);
961 		if (err != DB_SUCCESS) {
962 			return(err);
963 		}
964 	}
965 
966 	/* Close the curent handles, add space and file info to the
967 	fil_system cache and the Data Dictionary, and re-open them
968 	in file_system cache so that they stay open until shutdown. */
969 	ulint	node_counter = 0;
970 	for (files_t::iterator it = begin; it != end; ++it) {
971 		it->close();
972 		it->m_exists = true;
973 
974 		if (it == begin) {
975 			/* First data file. */
976 
977 			/* Create the tablespace entry for the multi-file
978 			tablespace in the tablespace manager. */
979 			space = fil_space_create(
980 				name(), space_id(), flags(), is_temp
981 				? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
982 		}
983 
984 		ut_a(fil_validate());
985 
986 		ulint	max_size = (++node_counter == m_files.size()
987 				    ? (m_last_file_size_max == 0
988 				       ? ULINT_MAX
989 				       : m_last_file_size_max)
990 				    : it->m_size);
991 
992 		/* Add the datafile to the fil_system cache. */
993 		if (!fil_node_create(
994 			    it->m_filepath, it->m_size,
995 			    space, it->m_type != SRV_NOT_RAW,
996 			    it->m_atomic_write, max_size)) {
997 
998 			err = DB_ERROR;
999 			break;
1000 		}
1001 	}
1002 
1003 	return(err);
1004 }
1005 #endif /* UNIV_HOTBACKUP */
1006 /** Normalize the file size, convert from megabytes to number of pages. */
1007 void
normalize()1008 SysTablespace::normalize()
1009 {
1010 	files_t::iterator	end = m_files.end();
1011 
1012 	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
1013 
1014 		it->m_size *= (1024 * 1024) / UNIV_PAGE_SIZE;
1015 	}
1016 
1017 	m_last_file_size_max *= (1024 * 1024) / UNIV_PAGE_SIZE;
1018 }
1019 
1020 
1021 /**
1022 @return next increment size */
1023 ulint
get_increment() const1024 SysTablespace::get_increment() const
1025 {
1026 	ulint	increment;
1027 
1028 	if (m_last_file_size_max == 0) {
1029 		increment = get_autoextend_increment();
1030 	} else {
1031 
1032 		if (!is_valid_size()) {
1033 			ib::error() << "The last data file in " << name()
1034 				<< " has a size of " << last_file_size()
1035 				<< " but the max size allowed is "
1036 				<< m_last_file_size_max;
1037 		}
1038 
1039 		increment = m_last_file_size_max - last_file_size();
1040 	}
1041 
1042 	if (increment > get_autoextend_increment()) {
1043 		increment = get_autoextend_increment();
1044 	}
1045 
1046 	return(increment);
1047 }
1048 
1049 
1050 /**
1051 @return true if configured to use raw devices */
1052 bool
has_raw_device()1053 SysTablespace::has_raw_device()
1054 {
1055 	files_t::iterator	end = m_files.end();
1056 
1057 	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
1058 
1059 		if (it->is_raw_device()) {
1060 			return(true);
1061 		}
1062 	}
1063 
1064 	return(false);
1065 }
1066