1 /*****************************************************************************
2 
3 Copyright (c) 2013, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fsp/fsp0space.cc
29 Multi file, shared, system tablespace implementation.
30 
31 Created 2012-11-16 by Sunny Bains as srv/srv0space.cc
32 Refactored 2013-7-26 by Kevin Lewis
33 *******************************************************/
34 
35 #include "ha_prototypes.h"
36 
37 #include "fsp0sysspace.h"
38 #include "srv0start.h"
39 #include "trx0sys.h"
40 #ifndef UNIV_HOTBACKUP
41 #include "dict0load.h"
42 #include "mem0mem.h"
43 #include "os0file.h"
44 #include "row0mysql.h"
45 #include "ut0new.h"
46 
47 /** The server header file is included to access opt_initialize global variable.
48 If server passes the option for create/open DB to SE, we should remove such
49 direct reference to server header and global variable */
50 #include "mysqld.h"
51 #else
52 my_bool opt_initialize = 0;
53 #endif /* !UNIV_HOTBACKUP */
54 
55 /** The control info of the system tablespace. */
56 SysTablespace srv_sys_space;
57 
58 /** The control info of a temporary table shared tablespace. */
59 SysTablespace srv_tmp_space;
60 
61 /** If the last data file is auto-extended, we add this many pages to it
62 at a time. We have to make this public because it is a config variable. */
63 ulong sys_tablespace_auto_extend_increment;
64 
65 #ifdef UNIV_DEBUG
66 /** Control if extra debug checks need to be done for temporary tablespace.
67 Default = true that is disable such checks.
68 This variable is not exposed to end-user but still kept as variable for
69 developer to enable it during debug. */
70 bool srv_skip_temp_table_checks_debug = true;
71 #endif /* UNIV_DEBUG */
72 
73 /** Convert a numeric string that optionally ends in G or M or K,
74     to a number containing megabytes.
75 @param[in]	str	String with a quantity in bytes
76 @param[out]	megs	The number in megabytes
77 @return next character in string */
78 char*
parse_units(char * ptr,ulint * megs)79 SysTablespace::parse_units(
80 	char*	ptr,
81 	ulint*	megs)
82 {
83 	char*		endp;
84 
85 	*megs = strtoul(ptr, &endp, 10);
86 
87 	ptr = endp;
88 
89 	switch (*ptr) {
90 	case 'G': case 'g':
91 		*megs *= 1024;
92 		/* fall through */
93 	case 'M': case 'm':
94 		++ptr;
95 		break;
96 	case 'K': case 'k':
97 		*megs /= 1024;
98 		++ptr;
99 		break;
100 	default:
101 		*megs /= 1024 * 1024;
102 		break;
103 	}
104 
105 	return(ptr);
106 }
107 
108 /** Parse the input params and populate member variables.
109 @param[in]	filepath	path to data files
110 @param[in]	supports_raw	true if the tablespace supports raw devices
111 @return true on success parse */
112 bool
parse_params(const char * filepath_spec,bool supports_raw)113 SysTablespace::parse_params(
114 	const char*	filepath_spec,
115 	bool		supports_raw)
116 {
117 	char*	filepath;
118 	ulint	size;
119 	char*	input_str;
120 	ulint	n_files = 0;
121 
122 	ut_ad(m_last_file_size_max == 0);
123 	ut_ad(!m_auto_extend_last_file);
124 
125 	char*	new_str = mem_strdup(filepath_spec);
126 	char*	str = new_str;
127 
128 	input_str = str;
129 
130 	/*---------------------- PASS 1 ---------------------------*/
131 	/* First calculate the number of data files and check syntax:
132 	filepath:size[K |M | G];filepath:size[K |M | G]... .
133 	Note that a Windows path may contain a drive name and a ':'. */
134 	while (*str != '\0') {
135 		filepath = str;
136 
137 		while ((*str != ':' && *str != '\0')
138 		       || (*str == ':'
139 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
140 			       || *(str + 1) == ':'))) {
141 			str++;
142 		}
143 
144 		if (*str == '\0') {
145 			ut_free(new_str);
146 
147 			ib::error()
148 				<< "syntax error in file path or size"
149 				" specified is less than 1 megabyte";
150 			return(false);
151 		}
152 
153 		str++;
154 
155 		str = parse_units(str, &size);
156 
157 		if (0 == strncmp(str, ":autoextend",
158 				 (sizeof ":autoextend") - 1)) {
159 
160 			str += (sizeof ":autoextend") - 1;
161 
162 			if (0 == strncmp(str, ":max:",
163 					 (sizeof ":max:") - 1)) {
164 
165 				str += (sizeof ":max:") - 1;
166 
167 				str = parse_units(str, &size);
168 			}
169 
170 			if (*str != '\0') {
171 				ut_free(new_str);
172 				ib::error()
173 					<< "syntax error in file path or"
174 					<< " size specified is less than"
175 					<< " 1 megabyte";
176 				return(false);
177 			}
178 		}
179 
180 		if (::strlen(str) >= 6
181 		    && *str == 'n'
182 		    && *(str + 1) == 'e'
183 		    && *(str + 2) == 'w') {
184 
185 			if (!supports_raw) {
186 				ib::error()
187 					<< "Tablespace doesn't support raw"
188 					" devices";
189 				ut_free(new_str);
190 				return(false);
191 			}
192 
193 			str += 3;
194 		}
195 
196 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
197 			str += 3;
198 
199 			if (!supports_raw) {
200 				ib::error()
201 					<< "Tablespace doesn't support raw"
202 					" devices";
203 				ut_free(new_str);
204 				return(false);
205 			}
206 		}
207 
208 		if (size == 0) {
209 
210 			ut_free(new_str);
211 
212 			ib::error()
213 				<< "syntax error in file path or size"
214 				" specified is less than 1 megabyte";
215 
216 			return(false);
217 		}
218 
219 		++n_files;
220 
221 		if (*str == ';') {
222 			str++;
223 		} else if (*str != '\0') {
224 			ut_free(new_str);
225 
226 			ib::error()
227 				<< "syntax error in file path or size"
228 				" specified is less than 1 megabyte";
229 			return(false);
230 		}
231 	}
232 
233 	if (n_files == 0) {
234 
235 		/* filepath_spec must contain at least one data file
236 		definition */
237 
238 		ut_free(new_str);
239 
240 		ib::error()
241 			<< "syntax error in file path or size specified"
242 			" is less than 1 megabyte";
243 
244 		return(false);
245 	}
246 
247 	/*---------------------- PASS 2 ---------------------------*/
248 	/* Then store the actual values to our arrays */
249 	str = input_str;
250 	ulint order = 0;
251 
252 	while (*str != '\0') {
253 		filepath = str;
254 
255 		/* Note that we must step over the ':' in a Windows filepath;
256 		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
257 		a Windows raw partition may have a specification like
258 		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
259 
260 		while ((*str != ':' && *str != '\0')
261 		       || (*str == ':'
262 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
263 			       || *(str + 1) == ':'))) {
264 			str++;
265 		}
266 
267 		if (*str == ':') {
268 			/* Make filepath a null-terminated string */
269 			*str = '\0';
270 			str++;
271 		}
272 
273 		str = parse_units(str, &size);
274 
275 		if (0 == strncmp(str, ":autoextend",
276 				 (sizeof ":autoextend") - 1)) {
277 
278 			m_auto_extend_last_file = true;
279 
280 			str += (sizeof ":autoextend") - 1;
281 
282 			if (0 == strncmp(str, ":max:",
283 					 (sizeof ":max:") - 1)) {
284 
285 				str += (sizeof ":max:") - 1;
286 
287 				str = parse_units(str, &m_last_file_size_max);
288 			}
289 
290 			if (*str != '\0') {
291 				ut_free(new_str);
292 				ib::error() << "syntax error in file path or"
293 					" size specified is less than 1"
294 					" megabyte";
295 				return(false);
296 			}
297 		}
298 
299 		m_files.push_back(Datafile(filepath, flags(), size, order));
300 		Datafile* datafile = &m_files.back();
301 		datafile->make_filepath(path(), filepath, NO_EXT);
302 
303 		if (::strlen(str) >= 6
304 		    && *str == 'n'
305 		    && *(str + 1) == 'e'
306 		    && *(str + 2) == 'w') {
307 
308 			ut_a(supports_raw);
309 
310 			str += 3;
311 
312 			/* Initialize new raw device only during initialize */
313 			m_files.back().m_type =
314 			opt_initialize ? SRV_NEW_RAW : SRV_OLD_RAW;
315 		}
316 
317 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
318 
319 			ut_a(supports_raw);
320 
321 			str += 3;
322 
323 			/* Initialize new raw device only during initialize */
324 			if (m_files.back().m_type == SRV_NOT_RAW) {
325 				m_files.back().m_type =
326 				opt_initialize ? SRV_NEW_RAW : SRV_OLD_RAW;
327 			}
328 		}
329 
330 		if (*str == ';') {
331 			++str;
332 		}
333 		order++;
334 	}
335 
336 	ut_ad(n_files == ulint(m_files.size()));
337 
338 	ut_free(new_str);
339 
340 	return(true);
341 }
342 
343 /** Frees the memory allocated by the parse method. */
344 void
shutdown()345 SysTablespace::shutdown()
346 {
347 	Tablespace::shutdown();
348 
349 	m_auto_extend_last_file = 0;
350 	m_last_file_size_max = 0;
351 	m_created_new_raw = 0;
352 	m_is_tablespace_full = false;
353 	m_sanity_checks_done = false;
354 }
355 
356 /** Verify the size of the physical file.
357 @param[in]	file	data file object
358 @return DB_SUCCESS if OK else error code. */
359 dberr_t
check_size(Datafile & file)360 SysTablespace::check_size(
361 	Datafile&	file)
362 {
363 	os_offset_t	size = os_file_get_size(file.m_handle);
364 	ut_a(size != (os_offset_t) -1);
365 
366 	/* Under some error conditions like disk full scenarios
367 	or file size reaching filesystem limit the data file
368 	could contain an incomplete extent at the end. When we
369 	extend a data file and if some failure happens, then
370 	also the data file could contain an incomplete extent.
371 	So we need to round the size downward to a  megabyte.*/
372 
373 	ulint	rounded_size_pages = get_pages_from_size(size);
374 
375 	/* If last file */
376 	if (&file == &m_files.back() && m_auto_extend_last_file) {
377 
378 		if (file.m_size > rounded_size_pages
379 		    || (m_last_file_size_max > 0
380 			&& m_last_file_size_max < rounded_size_pages)) {
381 			ib::error() << "The Auto-extending " << name()
382 				<< " data file '" << file.filepath() << "' is"
383 				" of a different size " << rounded_size_pages
384 				<< " pages (rounded down to MB) than specified"
385 				" in the .cnf file: initial " << file.m_size
386 				<< " pages, max " << m_last_file_size_max
387 				<< " (relevant if non-zero) pages!";
388 			return(DB_ERROR);
389 		}
390 
391 		file.m_size = rounded_size_pages;
392 	}
393 
394 	if (rounded_size_pages != file.m_size) {
395 		ib::error() << "The " << name() << " data file '"
396 			<< file.filepath() << "' is of a different size "
397 			<< rounded_size_pages << " pages (rounded down to MB)"
398 			" than the " << file.m_size << " pages specified in"
399 			" the .cnf file!";
400 		return(DB_ERROR);
401 	}
402 
403 	return(DB_SUCCESS);
404 }
405 
406 /** Set the size of the file.
407 @param[in]	file	data file object
408 @return DB_SUCCESS or error code */
409 dberr_t
set_size(Datafile & file)410 SysTablespace::set_size(
411 	Datafile&	file)
412 {
413 	ut_a(!srv_read_only_mode || m_ignore_read_only);
414 
415 	/* We created the data file and now write it full of zeros */
416 	ib::info() << "Setting file '" << file.filepath() << "' size to "
417 		<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB."
418 		" Physically writing the file full; Please wait ...";
419 
420 	bool	success = os_file_set_size(
421 		file.m_filepath, file.m_handle,
422 		static_cast<os_offset_t>(file.m_size << UNIV_PAGE_SIZE_SHIFT),
423 		m_ignore_read_only ? false : srv_read_only_mode);
424 
425 	if (success) {
426 		ib::info() << "File '" << file.filepath() << "' size is now "
427 			<< (file.m_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
428 			<< " MB.";
429 	} else {
430 		ib::error() << "Could not set the file size of '"
431 			<< file.filepath() << "'. Probably out of disk space";
432 
433 		return(DB_ERROR);
434 	}
435 
436 	return(DB_SUCCESS);
437 }
438 
439 /** Create a data file.
440 @param[in]	file	data file object
441 @return DB_SUCCESS or error code */
442 dberr_t
create_file(Datafile & file)443 SysTablespace::create_file(
444 	Datafile&	file)
445 {
446 	dberr_t	err = DB_SUCCESS;
447 
448 	ut_a(!file.m_exists);
449 	ut_a(!srv_read_only_mode || m_ignore_read_only);
450 
451 	switch (file.m_type) {
452 	case SRV_NEW_RAW:
453 
454 		/* The partition is opened, not created; then it is
455 		written over */
456 		m_created_new_raw = true;
457 
458 		/* Fall through. */
459 
460 	case SRV_OLD_RAW:
461 
462 		srv_start_raw_disk_in_use = TRUE;
463 
464 		/* Fall through. */
465 
466 	case SRV_NOT_RAW:
467 		err = file.open_or_create(
468 			m_ignore_read_only ? false : srv_read_only_mode);
469 		break;
470 	}
471 
472 
473 	if (err == DB_SUCCESS && file.m_type != SRV_OLD_RAW) {
474 		err = set_size(file);
475 	}
476 
477 	return(err);
478 }
479 
480 /** Open a data file.
481 @param[in]	file	data file object
482 @return DB_SUCCESS or error code */
483 dberr_t
open_file(Datafile & file)484 SysTablespace::open_file(
485 	Datafile&	file)
486 {
487 	dberr_t	err = DB_SUCCESS;
488 
489 	ut_a(file.m_exists);
490 
491 	switch (file.m_type) {
492 	case SRV_NEW_RAW:
493 		/* The partition is opened, not created; then it is
494 		written over */
495 		m_created_new_raw = true;
496 
497 		/* Fall through */
498 
499 	case SRV_OLD_RAW:
500 		srv_start_raw_disk_in_use = TRUE;
501 
502 		if (srv_read_only_mode && !m_ignore_read_only) {
503 			ib::error() << "Can't open a raw device '"
504 				<< file.m_filepath << "' when"
505 				" --innodb-read-only is set";
506 
507 			return(DB_ERROR);
508 		}
509 
510 		/* Fall through */
511 
512 	case SRV_NOT_RAW:
513 		err = file.open_or_create(
514 			m_ignore_read_only ? false : srv_read_only_mode);
515 
516 		if (err != DB_SUCCESS) {
517 			return(err);
518 		}
519 		break;
520 	}
521 
522 	switch (file.m_type) {
523 	case SRV_NEW_RAW:
524 		/* Set file size for new raw device. */
525 		err = set_size(file);
526 		break;
527 
528 	case SRV_NOT_RAW:
529 		/* Check file size for existing file. */
530 		err = check_size(file);
531 		break;
532 
533 	case SRV_OLD_RAW:
534 		err = DB_SUCCESS;
535 		break;
536 
537 	}
538 
539 	if (err != DB_SUCCESS) {
540 		file.close();
541 	}
542 
543 	return(err);
544 }
545 
546 #ifndef UNIV_HOTBACKUP
547 /** Check the tablespace header for this tablespace.
548 @param[out]	flushed_lsn	the value of FIL_PAGE_FILE_FLUSH_LSN
549 @return DB_SUCCESS or error code */
550 dberr_t
read_lsn_and_check_flags(lsn_t * flushed_lsn)551 SysTablespace::read_lsn_and_check_flags(lsn_t* flushed_lsn)
552 {
553 	dberr_t	err;
554 
555 	/* Only relevant for the system tablespace. */
556 	ut_ad(space_id() == TRX_SYS_SPACE);
557 
558 	files_t::iterator it = m_files.begin();
559 
560 	ut_a(it->m_exists);
561 
562 	if (it->m_handle.m_file == OS_FILE_CLOSED) {
563 
564 		err = it->open_or_create(
565 			m_ignore_read_only ?  false : srv_read_only_mode);
566 
567 		if (err != DB_SUCCESS) {
568 			return(err);
569 		}
570 	}
571 
572 	err = it->read_first_page(
573 		m_ignore_read_only ?  false : srv_read_only_mode);
574 
575 	if (err != DB_SUCCESS) {
576 		return(err);
577 	}
578 
579 	ut_a(it->order() == 0);
580 
581 
582 	buf_dblwr_init_or_load_pages(it->handle(), it->filepath());
583 
584 	/* Check the contents of the first page of the
585 	first datafile. */
586 	for (int retry = 0; retry < 2; ++retry) {
587 
588 		err = it->validate_first_page(flushed_lsn, false);
589 
590 		if (err != DB_SUCCESS
591 		    && (retry == 1
592 			|| it->restore_from_doublewrite(0) != DB_SUCCESS)) {
593 
594 			it->close();
595 
596 			return(err);
597 		}
598 	}
599 
600 	/* Make sure the tablespace space ID matches the
601 	space ID on the first page of the first datafile. */
602 	if (space_id() != it->m_space_id) {
603 
604 		ib::error()
605 			<< "The " << name() << " data file '" << it->name()
606 			<< "' has the wrong space ID. It should be "
607 			<< space_id() << ", but " << it->m_space_id
608 			<< " was found";
609 
610 		it->close();
611 
612 		return(err);
613 	}
614 
615 	it->close();
616 
617 	return(DB_SUCCESS);
618 }
619 #endif /* !UNIV_HOTBACKUP */
620 /** Check if a file can be opened in the correct mode.
621 @param[in]	file	data file object
622 @param[out]	reason	exact reason if file_status check failed.
623 @return DB_SUCCESS or error code. */
624 dberr_t
check_file_status(const Datafile & file,file_status_t & reason)625 SysTablespace::check_file_status(
626 	const Datafile&		file,
627 	file_status_t&		reason)
628 {
629 	os_file_stat_t	stat;
630 
631 	memset(&stat, 0x0, sizeof(stat));
632 
633 	dberr_t	err = os_file_get_status(
634 		file.m_filepath, &stat, true,
635 		m_ignore_read_only ? false : srv_read_only_mode);
636 
637 	reason = FILE_STATUS_VOID;
638 	/* File exists but we can't read the rw-permission settings. */
639 	switch (err) {
640 	case DB_FAIL:
641 		ib::error() << "os_file_get_status() failed on '"
642 			<< file.filepath()
643 			<< "'. Can't determine file permissions";
644 		err = DB_ERROR;
645 		reason = FILE_STATUS_RW_PERMISSION_ERROR;
646 		break;
647 
648 	case DB_SUCCESS:
649 
650 		/* Note: stat.rw_perm is only valid for "regular" files */
651 
652 		if (stat.type == OS_FILE_TYPE_FILE) {
653 
654 			if (!stat.rw_perm) {
655 				const char	*p = (!srv_read_only_mode
656 						      || m_ignore_read_only)
657 						     ? "writable"
658 						     : "readable";
659 
660 				ib::error() << "The " << name() << " data file"
661 					<< " '" << file.name() << "' must be "
662 					<< p;
663 
664 				err = DB_ERROR;
665 				reason = FILE_STATUS_READ_WRITE_ERROR;
666 			}
667 
668 		} else {
669 			/* Not a regular file, bail out. */
670 			ib::error() << "The " << name() << " data file '"
671 				<< file.name() << "' is not a regular"
672 				" InnoDB data file.";
673 
674 			err = DB_ERROR;
675 			reason = FILE_STATUS_NOT_REGULAR_FILE_ERROR;
676 		}
677 		break;
678 
679 	case DB_NOT_FOUND:
680 		break;
681 
682 	default:
683 		ut_ad(0);
684 	}
685 
686 	return(err);
687 }
688 
689 /** Note that the data file was not found.
690 @param[in]	file		data file object
691 @param[out]	create_new_db	true if a new instance to be created
692 @return DB_SUCESS or error code */
693 dberr_t
file_not_found(Datafile & file,bool * create_new_db)694 SysTablespace::file_not_found(
695 	Datafile&	file,
696 	bool*	create_new_db)
697 {
698 	file.m_exists = false;
699 
700 	if (srv_read_only_mode && !m_ignore_read_only) {
701 		ib::error() << "Can't create file '" << file.filepath()
702 			<< "' when --innodb-read-only is set";
703 
704 		return(DB_ERROR);
705 
706 	} else if (&file == &m_files.front()) {
707 
708 		/* First data file. */
709 		ut_a(!*create_new_db);
710 		*create_new_db = TRUE;
711 
712 		if (space_id() == TRX_SYS_SPACE) {
713 			ib::info() << "The first " << name() << " data file '"
714 				<< file.name() << "' did not exist."
715 				" A new tablespace will be created!";
716 		}
717 
718 	} else {
719 		ib::info() << "Need to create a new " << name()
720 			<< " data file '" << file.name() << "'.";
721 	}
722 
723 	/* Set the file create mode. */
724 	switch (file.m_type) {
725 	case SRV_NOT_RAW:
726 		file.set_open_flags(OS_FILE_CREATE);
727 		break;
728 
729 	case SRV_NEW_RAW:
730 	case SRV_OLD_RAW:
731 		file.set_open_flags(OS_FILE_OPEN_RAW);
732 		break;
733 	}
734 
735 	return(DB_SUCCESS);
736 }
737 
738 /** Note that the data file was found.
739 @param[in,out]	file	data file object
740 @return true if a new instance to be created */
741 bool
file_found(Datafile & file)742 SysTablespace::file_found(
743 	Datafile&	file)
744 {
745 	/* Note that the file exists and can be opened
746 	in the appropriate mode. */
747 	file.m_exists = true;
748 
749 	/* Set the file open mode */
750 	switch (file.m_type) {
751 	case SRV_NOT_RAW:
752 		file.set_open_flags(
753 			&file == &m_files.front()
754 			? OS_FILE_OPEN_RETRY : OS_FILE_OPEN);
755 		break;
756 
757 	case SRV_NEW_RAW:
758 	case SRV_OLD_RAW:
759 		file.set_open_flags(OS_FILE_OPEN_RAW);
760 		break;
761 	}
762 
763 	/* Need to create the system tablespace for new raw device. */
764 	return(file.m_type == SRV_NEW_RAW);
765 }
766 #ifndef UNIV_HOTBACKUP
767 /** Check the data file specification.
768 @param[out] create_new_db	true if a new database is to be created
769 @param[in] min_expected_size	Minimum expected tablespace size in bytes
770 @return DB_SUCCESS if all OK else error code */
771 dberr_t
check_file_spec(bool * create_new_db,ulint min_expected_size)772 SysTablespace::check_file_spec(
773 	bool*	create_new_db,
774 	ulint	min_expected_size)
775 {
776 	*create_new_db = FALSE;
777 
778 	if (m_files.size() >= 1000) {
779 		ib::error() << "There must be < 1000 data files in "
780 			<< name() << " but " << m_files.size() << " have been"
781 			" defined.";
782 
783 		return(DB_ERROR);
784 	}
785 
786 	if (get_sum_of_sizes() < min_expected_size / UNIV_PAGE_SIZE) {
787 
788 		ib::error() << "Tablespace size must be at least "
789 			<< min_expected_size / (1024 * 1024) << " MB";
790 
791 		return(DB_ERROR);
792 	}
793 
794 	dberr_t	err = DB_SUCCESS;
795 
796 	ut_a(!m_files.empty());
797 
798 	/* If there is more than one data file and the last data file
799 	doesn't exist, that is OK. We allow adding of new data files. */
800 
801 	files_t::iterator	begin = m_files.begin();
802 	files_t::iterator	end = m_files.end();
803 
804 	for (files_t::iterator it = begin; it != end; ++it) {
805 
806 		file_status_t reason_if_failed;
807 		err = check_file_status(*it, reason_if_failed);
808 
809 		if (err == DB_NOT_FOUND) {
810 
811 			err = file_not_found(*it, create_new_db);
812 
813 			if (err != DB_SUCCESS) {
814 				break;
815 			}
816 
817 		} else if (err != DB_SUCCESS) {
818 			if (reason_if_failed == FILE_STATUS_READ_WRITE_ERROR) {
819 				const char*	p = (!srv_read_only_mode
820 						     || m_ignore_read_only)
821 						    ? "writable" : "readable";
822 				ib::error() << "The " << name() << " data file"
823 					<< " '" << it->name() << "' must be "
824 					<< p;
825 			}
826 
827 			ut_a(err != DB_FAIL);
828 			break;
829 
830 		} else if (*create_new_db) {
831 			ib::error() << "The " << name() << " data file '"
832 				<< begin->m_name << "' was not found but"
833 				" one of the other data files '" << it->m_name
834 				<< "' exists.";
835 
836 			err = DB_ERROR;
837 			break;
838 
839 		} else {
840 			*create_new_db = file_found(*it);
841 		}
842 	}
843 
844 	/* We assume doublewirte blocks in the first data file. */
845 	if (err == DB_SUCCESS && *create_new_db
846 	    && begin->m_size < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3) {
847 		ib::error() << "The " << name() << " data file "
848 			<< "'" << begin->name() << "' must be at least "
849 			<< TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 3 * UNIV_PAGE_SIZE
850 			/ (1024 * 1024) << " MB";
851 
852 		err = DB_ERROR;
853 	}
854 
855 	return(err);
856 }
857 
858 /** Open or create the data files
859 @param[in]  is_temp		whether this is a temporary tablespace
860 @param[in]  create_new_db	whether we are creating a new database
861 @param[out] sum_new_sizes	sum of sizes of the new files added
862 @param[out] flush_lsn		FIL_PAGE_FILE_FLUSH_LSN of first file
863 @return DB_SUCCESS or error code */
864 dberr_t
open_or_create(bool is_temp,bool create_new_db,ulint * sum_new_sizes,lsn_t * flush_lsn)865 SysTablespace::open_or_create(
866 	bool	is_temp,
867 	bool	create_new_db,
868 	ulint*	sum_new_sizes,
869 	lsn_t*	flush_lsn)
870 {
871 	dberr_t		err	= DB_SUCCESS;
872 	fil_space_t*	space	= NULL;
873 
874 	ut_ad(!m_files.empty());
875 
876 	if (sum_new_sizes) {
877 		*sum_new_sizes = 0;
878 	}
879 
880 	files_t::iterator	begin = m_files.begin();
881 	files_t::iterator	end = m_files.end();
882 
883 	ut_ad(begin->order() == 0);
884 
885 	for (files_t::iterator it = begin; it != end; ++it) {
886 
887 		if (it->m_exists) {
888 			err = open_file(*it);
889 
890 			/* For new raw device increment new size. */
891 			if (sum_new_sizes && it->m_type == SRV_NEW_RAW) {
892 
893 				*sum_new_sizes += it->m_size;
894 			}
895 
896 		} else {
897 			err = create_file(*it);
898 
899 			if (sum_new_sizes) {
900 				*sum_new_sizes += it->m_size;
901 			}
902 
903 			/* Set the correct open flags now that we have
904 			successfully created the file. */
905 			if (err == DB_SUCCESS) {
906 				/* We ignore new_db OUT parameter here
907 				as the information is known at this stage */
908 				file_found(*it);
909 			}
910 		}
911 
912 		if (err != DB_SUCCESS) {
913 			return(err);
914 		}
915 
916 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
917 		/* Note: This should really be per node and not per
918 		tablespace because a tablespace can contain multiple
919 		files (nodes). The implication is that all files of
920 		the tablespace should be on the same medium. */
921 
922 		if (fil_fusionio_enable_atomic_write(it->m_handle)) {
923 
924 			if (srv_use_doublewrite_buf) {
925 				ib::info() << "FusionIO atomic IO enabled,"
926 					" disabling the double write buffer";
927 
928 				srv_use_doublewrite_buf = false;
929 			}
930 
931 			it->m_atomic_write = true;
932 		} else {
933 			it->m_atomic_write = false;
934 		}
935 #else
936 		it->m_atomic_write = false;
937 #endif /* !NO_FALLOCATE && UNIV_LINUX*/
938 	}
939 
940 	if (!create_new_db && flush_lsn) {
941 		/* Validate the header page in the first datafile
942 		and read LSNs fom the others. */
943 		err = read_lsn_and_check_flags(flush_lsn);
944 		if (err != DB_SUCCESS) {
945 			return(err);
946 		}
947 	}
948 
949 	/* Close the curent handles, add space and file info to the
950 	fil_system cache and the Data Dictionary, and re-open them
951 	in file_system cache so that they stay open until shutdown. */
952 	ulint	node_counter = 0;
953 	for (files_t::iterator it = begin; it != end; ++it) {
954 		it->close();
955 		it->m_exists = true;
956 
957 		if (it == begin) {
958 			/* First data file. */
959 
960 			/* Create the tablespace entry for the multi-file
961 			tablespace in the tablespace manager. */
962 			space = fil_space_create(
963 				name(), space_id(), flags(), is_temp
964 				? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
965 		}
966 
967 		ut_a(fil_validate());
968 
969 		ulint	max_size = (++node_counter == m_files.size()
970 				    ? (m_last_file_size_max == 0
971 				       ? ULINT_MAX
972 				       : m_last_file_size_max)
973 				    : it->m_size);
974 
975 		/* Add the datafile to the fil_system cache. */
976 		if (!fil_node_create(
977 			    it->m_filepath, it->m_size,
978 			    space, it->m_type != SRV_NOT_RAW,
979 			    it->m_atomic_write, max_size)) {
980 
981 			err = DB_ERROR;
982 			break;
983 		}
984 	}
985 
986 	return(err);
987 }
988 #endif /* UNIV_HOTBACKUP */
989 /** Normalize the file size, convert from megabytes to number of pages. */
990 void
normalize()991 SysTablespace::normalize()
992 {
993 	files_t::iterator	end = m_files.end();
994 
995 	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
996 
997 		it->m_size *= (1024 * 1024) / UNIV_PAGE_SIZE;
998 	}
999 
1000 	m_last_file_size_max *= (1024 * 1024) / UNIV_PAGE_SIZE;
1001 }
1002 
1003 
1004 /**
1005 @return next increment size */
1006 ulint
get_increment() const1007 SysTablespace::get_increment() const
1008 {
1009 	ulint	increment;
1010 
1011 	if (m_last_file_size_max == 0) {
1012 		increment = get_autoextend_increment();
1013 	} else {
1014 
1015 		if (!is_valid_size()) {
1016 			ib::error() << "The last data file in " << name()
1017 				<< " has a size of " << last_file_size()
1018 				<< " but the max size allowed is "
1019 				<< m_last_file_size_max;
1020 		}
1021 
1022 		increment = m_last_file_size_max - last_file_size();
1023 	}
1024 
1025 	if (increment > get_autoextend_increment()) {
1026 		increment = get_autoextend_increment();
1027 	}
1028 
1029 	return(increment);
1030 }
1031 
1032 
1033 /**
1034 @return true if configured to use raw devices */
1035 bool
has_raw_device()1036 SysTablespace::has_raw_device()
1037 {
1038 	files_t::iterator	end = m_files.end();
1039 
1040 	for (files_t::iterator it = m_files.begin(); it != end; ++it) {
1041 
1042 		if (it->is_raw_device()) {
1043 			return(true);
1044 		}
1045 	}
1046 
1047 	return(false);
1048 }
1049